requests is always required now
This commit is contained in:
parent
adad3be419
commit
bf34774533
2 changed files with 23 additions and 49 deletions
50
ox/cache.py
50
ox/cache.py
|
@ -12,12 +12,8 @@ import zlib
|
||||||
|
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
import urllib
|
import urllib
|
||||||
try:
|
import requests
|
||||||
import requests
|
|
||||||
USE_REQUESTS = True
|
|
||||||
requests_session = requests.Session()
|
|
||||||
except:
|
|
||||||
USE_REQUESTS = False
|
|
||||||
|
|
||||||
from .utils import json
|
from .utils import json
|
||||||
from .file import makedirs
|
from .file import makedirs
|
||||||
|
@ -27,6 +23,7 @@ from .net import DEFAULT_HEADERS, detect_encoding
|
||||||
|
|
||||||
|
|
||||||
cache_timeout = 30*24*60*60 # default is 30 days
|
cache_timeout = 30*24*60*60 # default is 30 days
|
||||||
|
requests_session = requests.Session()
|
||||||
|
|
||||||
COMPRESS_TYPES = (
|
COMPRESS_TYPES = (
|
||||||
'text/html',
|
'text/html',
|
||||||
|
@ -100,35 +97,20 @@ def read_url(url, data=None, headers=None, timeout=cache_timeout, valid=None, un
|
||||||
result = store.get(url, data, headers, timeout)
|
result = store.get(url, data, headers, timeout)
|
||||||
url_headers = {}
|
url_headers = {}
|
||||||
if not result:
|
if not result:
|
||||||
if USE_REQUESTS:
|
if headers is None:
|
||||||
if headers is None:
|
headers = DEFAULT_HEADERS.copy()
|
||||||
headers = DEFAULT_HEADERS.copy()
|
if data:
|
||||||
if data:
|
r = requests_session.post(url, data=data, headers=headers)
|
||||||
r = requests_session.post(url, data=data, headers=headers)
|
|
||||||
else:
|
|
||||||
r = requests_session.get(url, headers=headers)
|
|
||||||
for key in r.headers:
|
|
||||||
url_headers[key.lower()] = r.headers[key]
|
|
||||||
result = r.content
|
|
||||||
url_headers['Status'] = "%s" % r.status_code
|
|
||||||
if not valid or valid(result, url_headers):
|
|
||||||
store.set(url, post_data=data, data=result, headers=url_headers)
|
|
||||||
else:
|
|
||||||
raise InvalidResult(result, url_headers)
|
|
||||||
else:
|
else:
|
||||||
try:
|
r = requests_session.get(url, headers=headers)
|
||||||
url_headers, result = net.read_url(url, data, headers, return_headers=True)
|
for key in r.headers:
|
||||||
except urllib.error.HTTPError as e:
|
url_headers[key.lower()] = r.headers[key]
|
||||||
e.headers['Status'] = "%s" % e.code
|
result = r.content
|
||||||
for key in e.headers:
|
url_headers['Status'] = "%s" % r.status_code
|
||||||
url_headers[key.lower()] = e.headers[key]
|
if not valid or valid(result, url_headers):
|
||||||
result = e.read()
|
store.set(url, post_data=data, data=result, headers=url_headers)
|
||||||
if url_headers.get('content-encoding', None) == 'gzip':
|
else:
|
||||||
result = gzip.GzipFile(fileobj=BytesIO(result)).read()
|
raise InvalidResult(result, url_headers)
|
||||||
if not valid or valid(result, url_headers):
|
|
||||||
store.set(url, post_data=data, data=result, headers=url_headers)
|
|
||||||
else:
|
|
||||||
raise InvalidResult(result, url_headers)
|
|
||||||
if unicode:
|
if unicode:
|
||||||
ctype = url_headers.get('content-type', '').lower()
|
ctype = url_headers.get('content-type', '').lower()
|
||||||
if 'charset' in ctype:
|
if 'charset' in ctype:
|
||||||
|
|
22
ox/net.py
22
ox/net.py
|
@ -8,11 +8,8 @@ import os
|
||||||
import re
|
import re
|
||||||
import struct
|
import struct
|
||||||
|
|
||||||
try:
|
import requests
|
||||||
import requests
|
|
||||||
USE_REQUESTS = True
|
|
||||||
except:
|
|
||||||
USE_REQUESTS = False
|
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
import urllib
|
import urllib
|
||||||
from chardet.universaldetector import UniversalDetector
|
from chardet.universaldetector import UniversalDetector
|
||||||
|
@ -119,16 +116,11 @@ def save_url(url, filename, overwrite=False):
|
||||||
if dirname and not os.path.exists(dirname):
|
if dirname and not os.path.exists(dirname):
|
||||||
os.makedirs(dirname)
|
os.makedirs(dirname)
|
||||||
headers = DEFAULT_HEADERS.copy()
|
headers = DEFAULT_HEADERS.copy()
|
||||||
if USE_REQUESTS:
|
r = requests.get(url, headers=headers, stream=True)
|
||||||
r = requests.get(url, headers=headers, stream=True)
|
with open(filename, 'wb') as f:
|
||||||
with open(filename, 'wb') as f:
|
for chunk in r.iter_content(chunk_size=1024):
|
||||||
for chunk in r.iter_content(chunk_size=1024):
|
if chunk: # filter out keep-alive new chunks
|
||||||
if chunk: # filter out keep-alive new chunks
|
f.write(chunk)
|
||||||
f.write(chunk)
|
|
||||||
else:
|
|
||||||
data = read_url(url)
|
|
||||||
with open(filename, 'wb') as f:
|
|
||||||
f.write(data)
|
|
||||||
|
|
||||||
def _get_size(url):
|
def _get_size(url):
|
||||||
req = urllib.request.Request(url, headers=DEFAULT_HEADERS.copy())
|
req = urllib.request.Request(url, headers=DEFAULT_HEADERS.copy())
|
||||||
|
|
Loading…
Reference in a new issue