requests is always required now

This commit is contained in:
j 2023-07-27 18:07:49 +02:00
parent adad3be419
commit bf34774533
2 changed files with 23 additions and 49 deletions

View file

@ -12,12 +12,8 @@ import zlib
from io import BytesIO from io import BytesIO
import urllib import urllib
try: import requests
import requests
USE_REQUESTS = True
requests_session = requests.Session()
except:
USE_REQUESTS = False
from .utils import json from .utils import json
from .file import makedirs from .file import makedirs
@ -27,6 +23,7 @@ from .net import DEFAULT_HEADERS, detect_encoding
cache_timeout = 30*24*60*60 # default is 30 days cache_timeout = 30*24*60*60 # default is 30 days
requests_session = requests.Session()
COMPRESS_TYPES = ( COMPRESS_TYPES = (
'text/html', 'text/html',
@ -100,35 +97,20 @@ def read_url(url, data=None, headers=None, timeout=cache_timeout, valid=None, un
result = store.get(url, data, headers, timeout) result = store.get(url, data, headers, timeout)
url_headers = {} url_headers = {}
if not result: if not result:
if USE_REQUESTS: if headers is None:
if headers is None: headers = DEFAULT_HEADERS.copy()
headers = DEFAULT_HEADERS.copy() if data:
if data: r = requests_session.post(url, data=data, headers=headers)
r = requests_session.post(url, data=data, headers=headers)
else:
r = requests_session.get(url, headers=headers)
for key in r.headers:
url_headers[key.lower()] = r.headers[key]
result = r.content
url_headers['Status'] = "%s" % r.status_code
if not valid or valid(result, url_headers):
store.set(url, post_data=data, data=result, headers=url_headers)
else:
raise InvalidResult(result, url_headers)
else: else:
try: r = requests_session.get(url, headers=headers)
url_headers, result = net.read_url(url, data, headers, return_headers=True) for key in r.headers:
except urllib.error.HTTPError as e: url_headers[key.lower()] = r.headers[key]
e.headers['Status'] = "%s" % e.code result = r.content
for key in e.headers: url_headers['Status'] = "%s" % r.status_code
url_headers[key.lower()] = e.headers[key] if not valid or valid(result, url_headers):
result = e.read() store.set(url, post_data=data, data=result, headers=url_headers)
if url_headers.get('content-encoding', None) == 'gzip': else:
result = gzip.GzipFile(fileobj=BytesIO(result)).read() raise InvalidResult(result, url_headers)
if not valid or valid(result, url_headers):
store.set(url, post_data=data, data=result, headers=url_headers)
else:
raise InvalidResult(result, url_headers)
if unicode: if unicode:
ctype = url_headers.get('content-type', '').lower() ctype = url_headers.get('content-type', '').lower()
if 'charset' in ctype: if 'charset' in ctype:

View file

@ -8,11 +8,8 @@ import os
import re import re
import struct import struct
try: import requests
import requests
USE_REQUESTS = True
except:
USE_REQUESTS = False
from io import BytesIO from io import BytesIO
import urllib import urllib
from chardet.universaldetector import UniversalDetector from chardet.universaldetector import UniversalDetector
@ -119,16 +116,11 @@ def save_url(url, filename, overwrite=False):
if dirname and not os.path.exists(dirname): if dirname and not os.path.exists(dirname):
os.makedirs(dirname) os.makedirs(dirname)
headers = DEFAULT_HEADERS.copy() headers = DEFAULT_HEADERS.copy()
if USE_REQUESTS: r = requests.get(url, headers=headers, stream=True)
r = requests.get(url, headers=headers, stream=True) with open(filename, 'wb') as f:
with open(filename, 'wb') as f: for chunk in r.iter_content(chunk_size=1024):
for chunk in r.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks
if chunk: # filter out keep-alive new chunks f.write(chunk)
f.write(chunk)
else:
data = read_url(url)
with open(filename, 'wb') as f:
f.write(data)
def _get_size(url): def _get_size(url):
req = urllib.request.Request(url, headers=DEFAULT_HEADERS.copy()) req = urllib.request.Request(url, headers=DEFAULT_HEADERS.copy())