avoid reading file to ram in ox.net.save_url

This commit is contained in:
j 2018-12-29 11:36:47 +01:00
parent e72d5bb6c1
commit aaf30c35a0

View file

@ -8,6 +8,11 @@ import os
import re import re
import struct import struct
try:
import requests
USE_REQUESTS = True
except:
USE_REQUESTS = False
from six import BytesIO, PY2 from six import BytesIO, PY2
from six.moves import urllib from six.moves import urllib
from chardet.universaldetector import UniversalDetector from chardet.universaldetector import UniversalDetector
@ -117,9 +122,17 @@ def save_url(url, filename, overwrite=False):
dirname = os.path.dirname(filename) dirname = os.path.dirname(filename)
if dirname and not os.path.exists(dirname): if dirname and not os.path.exists(dirname):
os.makedirs(dirname) os.makedirs(dirname)
data = read_url(url) headers = DEFAULT_HEADERS.copy()
with open(filename, 'wb') as f: if USE_REQUESTS:
f.write(data) r = requests.get(url, headers=headers, stream=True)
with open(filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
else:
data = read_url(url)
with open(filename, 'wb') as f:
f.write(data)
def _get_size(url): def _get_size(url):
req = urllib.request.Request(url, headers=DEFAULT_HEADERS.copy()) req = urllib.request.Request(url, headers=DEFAULT_HEADERS.copy())