From aaf30c35a00ba044dceb44942735ea6f83fd9011 Mon Sep 17 00:00:00 2001 From: j Date: Sat, 29 Dec 2018 11:36:47 +0100 Subject: [PATCH] avoid reading file to ram in ox.net.save_url --- ox/net.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/ox/net.py b/ox/net.py index 02c7156..2a5e71b 100644 --- a/ox/net.py +++ b/ox/net.py @@ -8,6 +8,11 @@ import os import re import struct +try: + import requests + USE_REQUESTS = True +except: + USE_REQUESTS = False from six import BytesIO, PY2 from six.moves import urllib from chardet.universaldetector import UniversalDetector @@ -117,9 +122,17 @@ def save_url(url, filename, overwrite=False): dirname = os.path.dirname(filename) if dirname and not os.path.exists(dirname): os.makedirs(dirname) - data = read_url(url) - with open(filename, 'wb') as f: - f.write(data) + headers = DEFAULT_HEADERS.copy() + if USE_REQUESTS: + r = requests.get(url, headers=headers, stream=True) + with open(filename, 'wb') as f: + for chunk in r.iter_content(chunk_size=1024): + if chunk: # filter out keep-alive new chunks + f.write(chunk) + else: + data = read_url(url) + with open(filename, 'wb') as f: + f.write(data) def _get_size(url): req = urllib.request.Request(url, headers=DEFAULT_HEADERS.copy())