Compare commits
No commits in common. "16f1c35875da0a628706deb13ee9707a22377d78" and "4feacb4a97aafd442cc9ec77162b1b7efaa83182" have entirely different histories.
16f1c35875
...
4feacb4a97
3 changed files with 45 additions and 31 deletions
73
ox/api.py
73
ox/api.py
|
|
@ -4,7 +4,6 @@
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
from types import MethodType
|
from types import MethodType
|
||||||
import gzip
|
import gzip
|
||||||
import mimetypes
|
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
|
|
@ -14,7 +13,6 @@ from six.moves import http_cookiejar as cookielib
|
||||||
from six import BytesIO, PY2
|
from six import BytesIO, PY2
|
||||||
from six.moves import urllib
|
from six.moves import urllib
|
||||||
from six.moves.urllib.parse import urlparse
|
from six.moves.urllib.parse import urlparse
|
||||||
import requests
|
|
||||||
|
|
||||||
from . import __version__
|
from . import __version__
|
||||||
from .utils import json
|
from .utils import json
|
||||||
|
|
@ -39,13 +37,12 @@ class API(object):
|
||||||
self._cj = cj
|
self._cj = cj
|
||||||
else:
|
else:
|
||||||
self._cj = cookielib.CookieJar()
|
self._cj = cookielib.CookieJar()
|
||||||
|
self._opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self._cj),
|
||||||
|
urllib.request.HTTPHandler(debuglevel=self.debuglevel))
|
||||||
|
self._opener.addheaders = [
|
||||||
|
('User-Agent', '%s/%s' % (self.__name__, self.__version__))
|
||||||
|
]
|
||||||
|
|
||||||
self._requests_session = requests.Session()
|
|
||||||
self._requests_session.cookies = self._cj
|
|
||||||
self._requests_session.headers = {
|
|
||||||
'User-Agent': '%s/%s' % (self.__name__, self.__version__),
|
|
||||||
'Accept-Encoding': 'gzip, deflate',
|
|
||||||
}
|
|
||||||
self.url = url
|
self.url = url
|
||||||
r = self._request('api', {'docs': True})
|
r = self._request('api', {'docs': True})
|
||||||
self._properties = r['data']['actions']
|
self._properties = r['data']['actions']
|
||||||
|
|
@ -79,12 +76,26 @@ class API(object):
|
||||||
method.func_name = action
|
method.func_name = action
|
||||||
self._add_method(method, action)
|
self._add_method(method, action)
|
||||||
|
|
||||||
def _json_request(self, url, data, files=None):
|
def _json_request(self, url, form):
|
||||||
result = {}
|
result = {}
|
||||||
try:
|
try:
|
||||||
request = self._requests_session.post(url, data=data, files=files)
|
body = form.body()
|
||||||
result = request.json()
|
if PY2:
|
||||||
return result
|
if not isinstance(url, bytes):
|
||||||
|
url = url.encode('utf-8')
|
||||||
|
request = urllib.request.Request(url)
|
||||||
|
request.add_data(body)
|
||||||
|
else:
|
||||||
|
request = urllib.request.Request(url, data=body, method='POST')
|
||||||
|
request.add_header('Content-Type', form.get_content_type())
|
||||||
|
request.add_header('Content-Length', str(len(body)))
|
||||||
|
request.add_header('Accept-Encoding', 'gzip, deflate')
|
||||||
|
f = self._opener.open(request)
|
||||||
|
result = f.read()
|
||||||
|
if f.headers.get('content-encoding', None) == 'gzip':
|
||||||
|
result = gzip.GzipFile(fileobj=BytesIO(result)).read()
|
||||||
|
result = result.decode('utf-8')
|
||||||
|
return json.loads(result)
|
||||||
except urllib.error.HTTPError as e:
|
except urllib.error.HTTPError as e:
|
||||||
if self.DEBUG:
|
if self.DEBUG:
|
||||||
import webbrowser
|
import webbrowser
|
||||||
|
|
@ -114,15 +125,17 @@ class API(object):
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def _request(self, action, data=None):
|
def _request(self, action, data=None):
|
||||||
form = {
|
form = MultiPartForm()
|
||||||
'action': action
|
form.add_field('action', action)
|
||||||
}
|
|
||||||
if data:
|
if data:
|
||||||
form['data'] = json.dumps(data)
|
form.add_field('data', json.dumps(data))
|
||||||
return self._json_request(self.url, form)
|
return self._json_request(self.url, form)
|
||||||
|
|
||||||
def get_url(self, url):
|
def get_url(self, url):
|
||||||
return self._requests_session.get(url).content
|
request = urllib.request.Request(url, method='GET')
|
||||||
|
f = self._opener.open(request)
|
||||||
|
result = f.read()
|
||||||
|
return result
|
||||||
|
|
||||||
def save_url(self, url, filename, overwrite=False):
|
def save_url(self, url, filename, overwrite=False):
|
||||||
chunk_size = 16 * 1024
|
chunk_size = 16 * 1024
|
||||||
|
|
@ -130,15 +143,21 @@ class API(object):
|
||||||
dirname = os.path.dirname(filename)
|
dirname = os.path.dirname(filename)
|
||||||
if dirname and not os.path.exists(dirname):
|
if dirname and not os.path.exists(dirname):
|
||||||
os.makedirs(dirname)
|
os.makedirs(dirname)
|
||||||
|
request = urllib.request.Request(url, method='GET')
|
||||||
tmpname = filename + '.tmp'
|
tmpname = filename + '.tmp'
|
||||||
with open(tmpname, 'wb') as fd:
|
with open(tmpname, 'wb') as fd:
|
||||||
r = self._requests_session.get(url)
|
u = self._opener.open(request)
|
||||||
for chunk in iter(lambda: r.read(chunk_size), b''):
|
for chunk in iter(lambda: u.read(chunk_size), b''):
|
||||||
fd.write(chunk)
|
fd.write(chunk)
|
||||||
shutil.move(tmpname, filename)
|
shutil.move(tmpname, filename)
|
||||||
|
|
||||||
|
|
||||||
def upload_chunks(self, url, filename, data=None, silent=False):
|
def upload_chunks(self, url, filename, data=None, silent=False):
|
||||||
data = self._json_request(url, data)
|
form = MultiPartForm()
|
||||||
|
if data:
|
||||||
|
for key in data:
|
||||||
|
form.add_field(key, data[key])
|
||||||
|
data = self._json_request(url, form)
|
||||||
|
|
||||||
def full_url(path):
|
def full_url(path):
|
||||||
if path.startswith('/'):
|
if path.startswith('/'):
|
||||||
|
|
@ -159,20 +178,16 @@ class API(object):
|
||||||
resume_offset = 0
|
resume_offset = 0
|
||||||
chunk = f.read(CHUNK_SIZE)
|
chunk = f.read(CHUNK_SIZE)
|
||||||
fname = os.path.basename(filename)
|
fname = os.path.basename(filename)
|
||||||
mime_type = mimetypes.guess_type(fname)[0] or 'application/octet-stream'
|
|
||||||
if not isinstance(fname, bytes):
|
if not isinstance(fname, bytes):
|
||||||
fname = fname.encode('utf-8')
|
fname = fname.encode('utf-8')
|
||||||
while chunk:
|
while chunk:
|
||||||
meta = {
|
form = MultiPartForm()
|
||||||
'offset': str(done)
|
form.add_file('chunk', fname, chunk)
|
||||||
}
|
|
||||||
if len(chunk) < CHUNK_SIZE or f.tell() == fsize:
|
if len(chunk) < CHUNK_SIZE or f.tell() == fsize:
|
||||||
meta['done'] = '1'
|
form.add_field('done', '1')
|
||||||
files = [
|
form.add_field('offset', str(done))
|
||||||
('chunk', (fname, chunk, mime_type))
|
|
||||||
]
|
|
||||||
try:
|
try:
|
||||||
data = self._json_request(uploadUrl, meta, files=files)
|
data = self._json_request(uploadUrl, form)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
if not slient:
|
if not slient:
|
||||||
print("\ninterrupted by user.")
|
print("\ninterrupted by user.")
|
||||||
|
|
|
||||||
|
|
@ -552,7 +552,7 @@ class Imdb(SiteParser):
|
||||||
metadata = self.get_page_data('keywords')
|
metadata = self.get_page_data('keywords')
|
||||||
keywords = get_keywords(metadata)
|
keywords = get_keywords(metadata)
|
||||||
if keywords:
|
if keywords:
|
||||||
self['keyword'] = keywords
|
self['keywords'] = keywords
|
||||||
|
|
||||||
if 'summary' not in self and 'storyline' in self:
|
if 'summary' not in self and 'storyline' in self:
|
||||||
self['summary'] = self.pop('storyline')
|
self['summary'] = self.pop('storyline')
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
chardet
|
chardet
|
||||||
six>=1.5.2
|
six>=1.5.2
|
||||||
lxml
|
lxml
|
||||||
requests
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue