use six to support python 2 and 3
This commit is contained in:
parent
1b1dcf1c58
commit
d4d09b56b6
28 changed files with 1730 additions and 1678 deletions
|
@ -3,28 +3,32 @@
|
||||||
# GPL 2011
|
# GPL 2011
|
||||||
__version__ = '2.1.1'
|
__version__ = '2.1.1'
|
||||||
|
|
||||||
import cache
|
from . import cache
|
||||||
import js
|
from . import js
|
||||||
import jsonc
|
from . import jsonc
|
||||||
import net
|
from . import net
|
||||||
import srt
|
from . import srt
|
||||||
import utils
|
from . import utils
|
||||||
|
|
||||||
from api import *
|
from .api import *
|
||||||
from file import *
|
from .file import *
|
||||||
from form import *
|
from .form import *
|
||||||
from format import *
|
from .format import *
|
||||||
from geo import *
|
from .geo import *
|
||||||
from html import *
|
from .html import *
|
||||||
#image depends on PIL, not easy enough to instal on osx
|
#image depends on PIL, not easy enough to instal on osx
|
||||||
try:
|
try:
|
||||||
from image import *
|
from .image import *
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
from location import *
|
from .location import *
|
||||||
from movie import *
|
from .movie import *
|
||||||
from normalize import *
|
from .normalize import *
|
||||||
from oembed import *
|
from .oembed import *
|
||||||
from text import *
|
from .text import *
|
||||||
from torrent import *
|
#currently broken in python3
|
||||||
from fixunicode import *
|
try:
|
||||||
|
from .torrent import *
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
from .fixunicode import *
|
||||||
|
|
14
ox/api.py
14
ox/api.py
|
@ -3,10 +3,10 @@
|
||||||
# GPL 2011
|
# GPL 2011
|
||||||
from __future__ import with_statement
|
from __future__ import with_statement
|
||||||
|
|
||||||
import cookielib
|
from six.moves import http_cookiejar as cookielib
|
||||||
import gzip
|
import gzip
|
||||||
import StringIO
|
from six import StringIO
|
||||||
import urllib2
|
from six.moves import urllib
|
||||||
from types import MethodType
|
from types import MethodType
|
||||||
|
|
||||||
from . import __version__
|
from . import __version__
|
||||||
|
@ -29,8 +29,8 @@ class API(object):
|
||||||
self._cj = cj
|
self._cj = cj
|
||||||
else:
|
else:
|
||||||
self._cj = cookielib.CookieJar()
|
self._cj = cookielib.CookieJar()
|
||||||
self._opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self._cj),
|
self._opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self._cj),
|
||||||
urllib2.HTTPHandler(debuglevel=self.debuglevel))
|
urllib.HTTPHandler(debuglevel=self.debuglevel))
|
||||||
self._opener.addheaders = [
|
self._opener.addheaders = [
|
||||||
('User-Agent', '%s/%s' % (self.__name__, self.__version__))
|
('User-Agent', '%s/%s' % (self.__name__, self.__version__))
|
||||||
]
|
]
|
||||||
|
@ -64,7 +64,7 @@ class API(object):
|
||||||
result = {}
|
result = {}
|
||||||
try:
|
try:
|
||||||
body = str(form)
|
body = str(form)
|
||||||
request = urllib2.Request(str(url))
|
request = urllib.reuqest.Request(str(url))
|
||||||
request.add_header('Content-type', form.get_content_type())
|
request.add_header('Content-type', form.get_content_type())
|
||||||
request.add_header('Content-Length', str(len(body)))
|
request.add_header('Content-Length', str(len(body)))
|
||||||
request.add_header('Accept-Encoding', 'gzip, deflate')
|
request.add_header('Accept-Encoding', 'gzip, deflate')
|
||||||
|
@ -75,7 +75,7 @@ class API(object):
|
||||||
result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read()
|
result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read()
|
||||||
result = result.decode('utf-8')
|
result = result.decode('utf-8')
|
||||||
return json.loads(result)
|
return json.loads(result)
|
||||||
except urllib2.HTTPError, e:
|
except urllib.error.HTTPError as e:
|
||||||
if self.DEBUG:
|
if self.DEBUG:
|
||||||
import webbrowser
|
import webbrowser
|
||||||
if e.code >= 500:
|
if e.code >= 500:
|
||||||
|
|
59
ox/cache.py
59
ox/cache.py
|
@ -1,24 +1,22 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
# GPL 2011
|
# GPL 2011
|
||||||
from __future__ import with_statement
|
from __future__ import with_statement, print_function
|
||||||
|
|
||||||
import gzip
|
import gzip
|
||||||
import zlib
|
import zlib
|
||||||
import hashlib
|
import hashlib
|
||||||
import os
|
import os
|
||||||
import StringIO
|
from six import BytesIO
|
||||||
import time
|
import time
|
||||||
import urlparse
|
from six.moves import urllib
|
||||||
import urllib2
|
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
|
||||||
import chardet
|
from .utils import json
|
||||||
from utils import json
|
|
||||||
from .file import makedirs
|
from .file import makedirs
|
||||||
|
|
||||||
import net
|
from . import net
|
||||||
from net import DEFAULT_HEADERS, detect_encoding
|
from .net import DEFAULT_HEADERS, detect_encoding
|
||||||
|
|
||||||
cache_timeout = 30*24*60*60 # default is 30 days
|
cache_timeout = 30*24*60*60 # default is 30 days
|
||||||
|
|
||||||
|
@ -69,7 +67,7 @@ class InvalidResult(Exception):
|
||||||
self.headers = headers
|
self.headers = headers
|
||||||
|
|
||||||
def _fix_unicode_url(url):
|
def _fix_unicode_url(url):
|
||||||
if isinstance(url, unicode):
|
if not isinstance(url, bytes):
|
||||||
url = url.encode('utf-8')
|
url = url.encode('utf-8')
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
@ -83,24 +81,30 @@ def read_url(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout, val
|
||||||
if this function fails, InvalidResult will be raised deal with it in your code
|
if this function fails, InvalidResult will be raised deal with it in your code
|
||||||
'''
|
'''
|
||||||
if net.DEBUG:
|
if net.DEBUG:
|
||||||
print 'ox.cache.read_url', url
|
print('ox.cache.read_url', url)
|
||||||
#FIXME: send last-modified / etag from cache and only update if needed
|
#FIXME: send last-modified / etag from cache and only update if needed
|
||||||
url = _fix_unicode_url(url)
|
#url = _fix_unicode_url(url)
|
||||||
result = store.get(url, data, headers, timeout)
|
result = store.get(url, data, headers, timeout)
|
||||||
|
url_headers = {}
|
||||||
if not result:
|
if not result:
|
||||||
try:
|
try:
|
||||||
url_headers, result = net.read_url(url, data, headers, return_headers=True)
|
url_headers, result = net.read_url(url, data, headers, return_headers=True)
|
||||||
except urllib2.HTTPError, e:
|
except urllib.error.HTTPError as e:
|
||||||
e.headers['Status'] = "%s" % e.code
|
e.headers['Status'] = "%s" % e.code
|
||||||
url_headers = dict(e.headers)
|
for key in e.headers:
|
||||||
|
url_headers[key.lower()] = e.headers[key]
|
||||||
result = e.read()
|
result = e.read()
|
||||||
if url_headers.get('content-encoding', None) == 'gzip':
|
if url_headers.get('content-encoding', None) == 'gzip':
|
||||||
result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read()
|
result = gzip.GzipFile(fileobj=BytesIO(result)).read()
|
||||||
if not valid or valid(result, url_headers):
|
if not valid or valid(result, url_headers):
|
||||||
store.set(url, post_data=data, data=result, headers=url_headers)
|
store.set(url, post_data=data, data=result, headers=url_headers)
|
||||||
else:
|
else:
|
||||||
raise InvalidResult(result, url_headers)
|
raise InvalidResult(result, url_headers)
|
||||||
if unicode:
|
if unicode:
|
||||||
|
ctype = url_headers.get('content-type', '').lower()
|
||||||
|
if 'charset' in ctype:
|
||||||
|
encoding = ctype.split('charset=')[-1]
|
||||||
|
else:
|
||||||
encoding = detect_encoding(result)
|
encoding = detect_encoding(result)
|
||||||
if not encoding:
|
if not encoding:
|
||||||
encoding = 'latin-1'
|
encoding = 'latin-1'
|
||||||
|
@ -143,9 +147,8 @@ class SQLiteCache(Cache):
|
||||||
self.create()
|
self.create()
|
||||||
|
|
||||||
def connect(self):
|
def connect(self):
|
||||||
conn = sqlite3.connect(self.db, timeout=10)
|
self.conn = sqlite3.connect(self.db, timeout=10)
|
||||||
conn.text_factory = str
|
return self.conn
|
||||||
return conn
|
|
||||||
|
|
||||||
def create(self):
|
def create(self):
|
||||||
conn = self.connect()
|
conn = self.connect()
|
||||||
|
@ -177,9 +180,9 @@ class SQLiteCache(Cache):
|
||||||
if timeout == 0:
|
if timeout == 0:
|
||||||
return r
|
return r
|
||||||
if data:
|
if data:
|
||||||
url_hash = hashlib.sha1(url + '?' + data).hexdigest()
|
url_hash = hashlib.sha1((url + '?' + data).encode('utf-8')).hexdigest()
|
||||||
else:
|
else:
|
||||||
url_hash = hashlib.sha1(url).hexdigest()
|
url_hash = hashlib.sha1(url.encode('utf-8')).hexdigest()
|
||||||
|
|
||||||
conn = self.connect()
|
conn = self.connect()
|
||||||
c = conn.cursor()
|
c = conn.cursor()
|
||||||
|
@ -210,11 +213,11 @@ class SQLiteCache(Cache):
|
||||||
|
|
||||||
def set(self, url, post_data, data, headers):
|
def set(self, url, post_data, data, headers):
|
||||||
if post_data:
|
if post_data:
|
||||||
url_hash = hashlib.sha1(url + '?' + post_data).hexdigest()
|
url_hash = hashlib.sha1((url + '?' + post_data).encode('utf-8')).hexdigest()
|
||||||
else:
|
else:
|
||||||
url_hash = hashlib.sha1(url).hexdigest()
|
url_hash = hashlib.sha1(url.encode('utf-8')).hexdigest()
|
||||||
|
|
||||||
domain = ".".join(urlparse.urlparse(url)[1].split('.')[-2:])
|
domain = ".".join(urllib.parse.urlparse(url)[1].split('.')[-2:])
|
||||||
|
|
||||||
conn = self.connect()
|
conn = self.connect()
|
||||||
c = conn.cursor()
|
c = conn.cursor()
|
||||||
|
@ -266,11 +269,11 @@ class FileCache(Cache):
|
||||||
return r
|
return r
|
||||||
|
|
||||||
if data:
|
if data:
|
||||||
url_hash = hashlib.sha1(url + '?' + data).hexdigest()
|
url_hash = hashlib.sha1((url + '?' + data).encode('utf-8')).hexdigest()
|
||||||
else:
|
else:
|
||||||
url_hash = hashlib.sha1(url).hexdigest()
|
url_hash = hashlib.sha1(url.encode('utf-8')).hexdigest()
|
||||||
|
|
||||||
domain = ".".join(urlparse.urlparse(url)[1].split('.')[-2:])
|
domain = ".".join(urllib.parse.urlparse(url)[1].split('.')[-2:])
|
||||||
prefix, i, f = self.files(domain, url_hash)
|
prefix, i, f = self.files(domain, url_hash)
|
||||||
if os.path.exists(i):
|
if os.path.exists(i):
|
||||||
with open(i) as _i:
|
with open(i) as _i:
|
||||||
|
@ -295,11 +298,11 @@ class FileCache(Cache):
|
||||||
|
|
||||||
def set(self, url, post_data, data, headers):
|
def set(self, url, post_data, data, headers):
|
||||||
if post_data:
|
if post_data:
|
||||||
url_hash = hashlib.sha1(url + '?' + post_data).hexdigest()
|
url_hash = hashlib.sha1((url + '?' + post_data).encode('utf-8')).hexdigest()
|
||||||
else:
|
else:
|
||||||
url_hash = hashlib.sha1(url).hexdigest()
|
url_hash = hashlib.sha1(url.encode('utf-8')).hexdigest()
|
||||||
|
|
||||||
domain = ".".join(urlparse.urlparse(url)[1].split('.')[-2:])
|
domain = ".".join(urllib.parse.urlparse(url)[1].split('.')[-2:])
|
||||||
prefix, i, f = self.files(domain, url_hash)
|
prefix, i, f = self.files(domain, url_hash)
|
||||||
makedirs(prefix)
|
makedirs(prefix)
|
||||||
|
|
||||||
|
|
18
ox/file.py
18
ox/file.py
|
@ -1,7 +1,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
# GPL 2008
|
# GPL 2008
|
||||||
from __future__ import division, with_statement
|
from __future__ import division, with_statement, print_function
|
||||||
import os
|
import os
|
||||||
import hashlib
|
import hashlib
|
||||||
import re
|
import re
|
||||||
|
@ -10,7 +10,7 @@ import struct
|
||||||
import subprocess
|
import subprocess
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
|
||||||
from ox.utils import json
|
from .utils import json
|
||||||
|
|
||||||
__all__ = ['sha1sum', 'oshash', 'avinfo', 'makedirs']
|
__all__ = ['sha1sum', 'oshash', 'avinfo', 'makedirs']
|
||||||
|
|
||||||
|
@ -283,19 +283,19 @@ def makedirs(path):
|
||||||
if not os.path.exists(path):
|
if not os.path.exists(path):
|
||||||
try:
|
try:
|
||||||
os.makedirs(path)
|
os.makedirs(path)
|
||||||
except OSError, e:
|
except OSError as e:
|
||||||
if e.errno != 17:
|
if e.errno != 17:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def copy_file(source, target, verbose=False):
|
def copy_file(source, target, verbose=False):
|
||||||
if verbose:
|
if verbose:
|
||||||
print 'copying', source, 'to', target
|
print('copying', source, 'to', target)
|
||||||
write_path(target)
|
write_path(target)
|
||||||
shutil.copyfile(source, target)
|
shutil.copyfile(source, target)
|
||||||
|
|
||||||
def read_file(file, verbose=False):
|
def read_file(file, verbose=False):
|
||||||
if verbose:
|
if verbose:
|
||||||
print 'reading', file
|
print('reading', file)
|
||||||
f = open(file)
|
f = open(file)
|
||||||
data = f.read()
|
data = f.read()
|
||||||
f.close()
|
f.close()
|
||||||
|
@ -303,14 +303,14 @@ def read_file(file, verbose=False):
|
||||||
|
|
||||||
def read_json(file, verbose=False):
|
def read_json(file, verbose=False):
|
||||||
if verbose:
|
if verbose:
|
||||||
print 'reading', file
|
print('reading', file)
|
||||||
with open(file) as fd:
|
with open(file) as fd:
|
||||||
data = json.load(fd)
|
data = json.load(fd)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def write_file(file, data, verbose=False):
|
def write_file(file, data, verbose=False):
|
||||||
if verbose:
|
if verbose:
|
||||||
print 'writing', file
|
print('writing', file)
|
||||||
write_path(file)
|
write_path(file)
|
||||||
f = open(file, 'w')
|
f = open(file, 'w')
|
||||||
f.write(data)
|
f.write(data)
|
||||||
|
@ -319,7 +319,7 @@ def write_file(file, data, verbose=False):
|
||||||
|
|
||||||
def write_image(file, image, verbose=False):
|
def write_image(file, image, verbose=False):
|
||||||
if verbose:
|
if verbose:
|
||||||
print 'writing', file
|
print('writing', file)
|
||||||
write_path(file)
|
write_path(file)
|
||||||
image.save(file)
|
image.save(file)
|
||||||
|
|
||||||
|
@ -329,7 +329,7 @@ def write_json(file, data, ensure_ascii=True, indent=0, sort_keys=False, verbose
|
||||||
|
|
||||||
def write_link(source, target, verbose=False):
|
def write_link(source, target, verbose=False):
|
||||||
if verbose:
|
if verbose:
|
||||||
print 'linking', source, 'to', target
|
print('linking', source, 'to', target)
|
||||||
write_path(target)
|
write_path(target)
|
||||||
if os.path.exists(target):
|
if os.path.exists(target):
|
||||||
os.unlink(target)
|
os.unlink(target)
|
||||||
|
|
|
@ -2,13 +2,16 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# from http://blog.lumino.so/2012/08/20/fix-unicode-mistakes-with-python/
|
# from http://blog.lumino.so/2012/08/20/fix-unicode-mistakes-with-python/
|
||||||
# MIT
|
# MIT
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
|
from six import unichr
|
||||||
|
|
||||||
__all__ = ['fix_bad_unicode']
|
__all__ = ['fix_bad_unicode']
|
||||||
|
|
||||||
def fix_bad_unicode(text):
|
def fix_bad_unicode(text):
|
||||||
u"""
|
"""
|
||||||
Something you will find all over the place, in real-world text, is text
|
Something you will find all over the place, in real-world text, is text
|
||||||
that's mistakenly encoded as utf-8, decoded in some ugly format like
|
that's mistakenly encoded as utf-8, decoded in some ugly format like
|
||||||
latin-1 or even Windows codepage 1252, and encoded as utf-8 again.
|
latin-1 or even Windows codepage 1252, and encoded as utf-8 again.
|
||||||
|
@ -26,52 +29,53 @@ def fix_bad_unicode(text):
|
||||||
auto-decode bytes for you -- then it would just create the problems it's
|
auto-decode bytes for you -- then it would just create the problems it's
|
||||||
supposed to fix.
|
supposed to fix.
|
||||||
|
|
||||||
>>> print fix_bad_unicode(u'único')
|
>>> fix_bad_unicode(u'único')
|
||||||
único
|
'único'
|
||||||
|
|
||||||
|
>>> fix_bad_unicode('This text is fine already :þ')
|
||||||
|
'This text is fine already :þ'
|
||||||
|
|
||||||
>>> print fix_bad_unicode(u'This text is fine already :þ')
|
|
||||||
This text is fine already :þ
|
|
||||||
|
|
||||||
Because these characters often come from Microsoft products, we allow
|
Because these characters often come from Microsoft products, we allow
|
||||||
for the possibility that we get not just Unicode characters 128-255, but
|
for the possibility that we get not just Unicode characters 128-255, but
|
||||||
also Windows's conflicting idea of what characters 128-160 are.
|
also Windows's conflicting idea of what characters 128-160 are.
|
||||||
|
|
||||||
>>> print fix_bad_unicode(u'This — should be an em dash')
|
>>> fix_bad_unicode('This — should be an em dash')
|
||||||
This — should be an em dash
|
'This — should be an em dash'
|
||||||
|
|
||||||
We might have to deal with both Windows characters and raw control
|
We might have to deal with both Windows characters and raw control
|
||||||
characters at the same time, especially when dealing with characters like
|
characters at the same time, especially when dealing with characters like
|
||||||
\x81 that have no mapping in Windows.
|
\x81 that have no mapping in Windows.
|
||||||
|
|
||||||
>>> print fix_bad_unicode(u'This text is sad .â\x81”.')
|
>>> fix_bad_unicode('This text is sad .â\x81”.')
|
||||||
This text is sad .⁔.
|
'This text is sad .⁔.'
|
||||||
|
|
||||||
This function even fixes multiple levels of badness:
|
This function even fixes multiple levels of badness:
|
||||||
|
|
||||||
>>> wtf = u'\xc3\xa0\xc2\xb2\xc2\xa0_\xc3\xa0\xc2\xb2\xc2\xa0'
|
>>> wtf = '\xc3\xa0\xc2\xb2\xc2\xa0_\xc3\xa0\xc2\xb2\xc2\xa0'
|
||||||
>>> print fix_bad_unicode(wtf)
|
>>> fix_bad_unicode(wtf)
|
||||||
ಠ_ಠ
|
'ಠ_ಠ'
|
||||||
|
|
||||||
However, it has safeguards against fixing sequences of letters and
|
However, it has safeguards against fixing sequences of letters and
|
||||||
punctuation that can occur in valid text:
|
punctuation that can occur in valid text:
|
||||||
|
|
||||||
>>> print fix_bad_unicode(u'not such a fan of Charlotte Brontë…”')
|
>>> fix_bad_unicode('not such a fan of Charlotte Brontë…”')
|
||||||
not such a fan of Charlotte Brontë…”
|
'not such a fan of Charlotte Brontë…”'
|
||||||
|
|
||||||
Cases of genuine ambiguity can sometimes be addressed by finding other
|
Cases of genuine ambiguity can sometimes be addressed by finding other
|
||||||
characters that are not double-encoding, and expecting the encoding to
|
characters that are not double-encoding, and expecting the encoding to
|
||||||
be consistent:
|
be consistent:
|
||||||
|
|
||||||
>>> print fix_bad_unicode(u'AHÅ™, the new sofa from IKEA®')
|
>>> fix_bad_unicode('AHÅ™, the new sofa from IKEA®')
|
||||||
AHÅ™, the new sofa from IKEA®
|
'AHÅ™, the new sofa from IKEA®'
|
||||||
|
|
||||||
Finally, we handle the case where the text is in a single-byte encoding
|
Finally, we handle the case where the text is in a single-byte encoding
|
||||||
that was intended as Windows-1252 all along but read as Latin-1:
|
that was intended as Windows-1252 all along but read as Latin-1:
|
||||||
|
|
||||||
>>> print fix_bad_unicode(u'This text was never Unicode at all\x85')
|
>>> fix_bad_unicode('This text was never Unicode at all\x85')
|
||||||
This text was never Unicode at all…
|
'This text was never Unicode at all…'
|
||||||
"""
|
"""
|
||||||
if not isinstance(text, unicode):
|
if not isinstance(text, str):
|
||||||
raise TypeError("This isn't even decoded into Unicode yet. "
|
raise TypeError("This isn't even decoded into Unicode yet. "
|
||||||
"Decode it first.")
|
"Decode it first.")
|
||||||
if len(text) == 0:
|
if len(text) == 0:
|
||||||
|
@ -118,7 +122,7 @@ def reinterpret_windows1252_as_utf8(wrongtext):
|
||||||
altered_bytes.append(char.encode('WINDOWS_1252'))
|
altered_bytes.append(char.encode('WINDOWS_1252'))
|
||||||
else:
|
else:
|
||||||
altered_bytes.append(char.encode('latin-1', 'replace'))
|
altered_bytes.append(char.encode('latin-1', 'replace'))
|
||||||
return ''.join(altered_bytes).decode('utf-8', 'replace')
|
return b''.join(altered_bytes).decode('utf-8', 'replace')
|
||||||
|
|
||||||
|
|
||||||
def reinterpret_latin1_as_windows1252(wrongtext):
|
def reinterpret_latin1_as_windows1252(wrongtext):
|
||||||
|
@ -130,7 +134,7 @@ def reinterpret_latin1_as_windows1252(wrongtext):
|
||||||
|
|
||||||
|
|
||||||
def text_badness(text):
|
def text_badness(text):
|
||||||
u'''
|
'''
|
||||||
Look for red flags that text is encoded incorrectly:
|
Look for red flags that text is encoded incorrectly:
|
||||||
|
|
||||||
Obvious problems:
|
Obvious problems:
|
||||||
|
@ -147,12 +151,12 @@ def text_badness(text):
|
||||||
- Improbable single-byte characters, such as ƒ or ¬
|
- Improbable single-byte characters, such as ƒ or ¬
|
||||||
- Letters in somewhat rare scripts
|
- Letters in somewhat rare scripts
|
||||||
'''
|
'''
|
||||||
assert isinstance(text, unicode)
|
assert isinstance(text, str)
|
||||||
errors = 0
|
errors = 0
|
||||||
very_weird_things = 0
|
very_weird_things = 0
|
||||||
weird_things = 0
|
weird_things = 0
|
||||||
prev_letter_script = None
|
prev_letter_script = None
|
||||||
for pos in xrange(len(text)):
|
for pos in range(len(text)):
|
||||||
char = text[pos]
|
char = text[pos]
|
||||||
index = ord(char)
|
index = ord(char)
|
||||||
if index < 256:
|
if index < 256:
|
||||||
|
@ -241,7 +245,7 @@ WINDOWS_1252_GREMLINS = [
|
||||||
]
|
]
|
||||||
|
|
||||||
# a list of Unicode characters that might appear in Windows-1252 text
|
# a list of Unicode characters that might appear in Windows-1252 text
|
||||||
WINDOWS_1252_CODEPOINTS = range(256) + WINDOWS_1252_GREMLINS
|
WINDOWS_1252_CODEPOINTS = list(range(256)) + WINDOWS_1252_GREMLINS
|
||||||
|
|
||||||
# Rank the characters typically represented by a single byte -- that is, in
|
# Rank the characters typically represented by a single byte -- that is, in
|
||||||
# Latin-1 or Windows-1252 -- by how weird it would be to see them in running
|
# Latin-1 or Windows-1252 -- by how weird it would be to see them in running
|
||||||
|
@ -286,7 +290,7 @@ SINGLE_BYTE_WEIRDNESS = (
|
||||||
# letters. We'll need it often.
|
# letters. We'll need it often.
|
||||||
SINGLE_BYTE_LETTERS = [
|
SINGLE_BYTE_LETTERS = [
|
||||||
unicodedata.category(unichr(i)).startswith('L')
|
unicodedata.category(unichr(i)).startswith('L')
|
||||||
for i in xrange(256)
|
for i in range(256)
|
||||||
]
|
]
|
||||||
|
|
||||||
# A table telling us how to interpret the first word of a letter's Unicode
|
# A table telling us how to interpret the first word of a letter's Unicode
|
||||||
|
|
21
ox/form.py
21
ox/form.py
|
@ -1,17 +1,34 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
|
# GPL 2014
|
||||||
|
from __future__ import with_statement, print_function
|
||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
import mimetools
|
|
||||||
import mimetypes
|
import mimetypes
|
||||||
|
import random
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['MultiPartForm']
|
__all__ = ['MultiPartForm']
|
||||||
|
|
||||||
|
# from /usr/lib/python3.4/email/generator.py
|
||||||
|
# Helper used by Generator._make_boundary
|
||||||
|
_width = len(repr(sys.maxsize-1))
|
||||||
|
_fmt = '%%0%dd' % _width
|
||||||
|
|
||||||
|
def _make_boundary():
|
||||||
|
# Craft a random boundary.
|
||||||
|
token = random.randrange(sys.maxsize)
|
||||||
|
boundary = ('=' * 15) + (_fmt % token) + '=='
|
||||||
|
return boundary
|
||||||
|
|
||||||
class MultiPartForm(object):
|
class MultiPartForm(object):
|
||||||
"""Accumulate the data to be used when posting a form."""
|
"""Accumulate the data to be used when posting a form."""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.form_fields = []
|
self.form_fields = []
|
||||||
self.files = []
|
self.files = []
|
||||||
self.boundary = mimetools.choose_boundary()
|
self.boundary = _make_boundary()
|
||||||
return
|
return
|
||||||
|
|
||||||
def get_content_type(self):
|
def get_content_type(self):
|
||||||
|
|
|
@ -20,7 +20,7 @@ def toAZ(num):
|
||||||
>>> toAZ(1234567890)
|
>>> toAZ(1234567890)
|
||||||
'CYWOQVJ'
|
'CYWOQVJ'
|
||||||
"""
|
"""
|
||||||
if num < 1: raise ValueError, "must supply a positive integer"
|
if num < 1: raise ValueError("must supply a positive integer")
|
||||||
digits = string.ascii_uppercase
|
digits = string.ascii_uppercase
|
||||||
az = ''
|
az = ''
|
||||||
while num != 0:
|
while num != 0:
|
||||||
|
@ -62,7 +62,7 @@ def to26(q):
|
||||||
>>> to26(347485647)
|
>>> to26(347485647)
|
||||||
'BDGKMAP'
|
'BDGKMAP'
|
||||||
"""
|
"""
|
||||||
if q < 0: raise ValueError, "must supply a positive integer"
|
if q < 0: raise ValueError("must supply a positive integer")
|
||||||
base26 = string.ascii_uppercase
|
base26 = string.ascii_uppercase
|
||||||
converted = []
|
converted = []
|
||||||
while q != 0:
|
while q != 0:
|
||||||
|
@ -119,7 +119,7 @@ def to32(q):
|
||||||
ValueError: must supply a positive integer
|
ValueError: must supply a positive integer
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if q < 0: raise ValueError, "must supply a positive integer"
|
if q < 0: raise ValueError("must supply a positive integer")
|
||||||
letters = "0123456789ABCDEFGHJKMNPQRSTVWXYZ"
|
letters = "0123456789ABCDEFGHJKMNPQRSTVWXYZ"
|
||||||
converted = []
|
converted = []
|
||||||
while q != 0:
|
while q != 0:
|
||||||
|
@ -206,7 +206,7 @@ def to36(q):
|
||||||
...
|
...
|
||||||
ValueError: must supply a positive integer
|
ValueError: must supply a positive integer
|
||||||
"""
|
"""
|
||||||
if q < 0: raise ValueError, "must supply a positive integer"
|
if q < 0: raise ValueError("must supply a positive integer")
|
||||||
letters = "0123456789abcdefghijklmnopqrstuvwxyz"
|
letters = "0123456789abcdefghijklmnopqrstuvwxyz"
|
||||||
converted = []
|
converted = []
|
||||||
while q != 0:
|
while q != 0:
|
||||||
|
|
14
ox/html.py
14
ox/html.py
|
@ -1,9 +1,11 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
# GPL 2008
|
# GPL 2008
|
||||||
|
import sys
|
||||||
import re
|
import re
|
||||||
import string
|
import string
|
||||||
from htmlentitydefs import name2codepoint
|
from six.moves.html_entities import name2codepoint
|
||||||
|
from six import unichr
|
||||||
|
|
||||||
|
|
||||||
# Configuration for add_links() function
|
# Configuration for add_links() function
|
||||||
|
@ -23,7 +25,8 @@ link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+')
|
||||||
html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE)
|
html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE)
|
||||||
hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL)
|
hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL)
|
||||||
trailing_empty_content_re = re.compile(r'(?:<p>(?: |\s|<br \/>)*?</p>\s*)+\Z')
|
trailing_empty_content_re = re.compile(r'(?:<p>(?: |\s|<br \/>)*?</p>\s*)+\Z')
|
||||||
del x # Temporary variable
|
if sys.version[0] == 2:
|
||||||
|
del x # Temporary variable
|
||||||
|
|
||||||
def escape(html):
|
def escape(html):
|
||||||
'''
|
'''
|
||||||
|
@ -146,12 +149,9 @@ def decode_html(html):
|
||||||
>>> decode_html('Anniversary of Daoud's Republic')
|
>>> decode_html('Anniversary of Daoud's Republic')
|
||||||
u"Anniversary of Daoud's Republic"
|
u"Anniversary of Daoud's Republic"
|
||||||
"""
|
"""
|
||||||
if type(html) != unicode:
|
if isinstance(html, bytes):
|
||||||
html = unicode(html)[:]
|
html = html.decode('utf-8')
|
||||||
if type(html) is unicode:
|
|
||||||
uchr = unichr
|
uchr = unichr
|
||||||
else:
|
|
||||||
uchr = lambda value: value > 255 and unichr(value) or chr(value)
|
|
||||||
def entitydecode(match, uchr=uchr):
|
def entitydecode(match, uchr=uchr):
|
||||||
entity = match.group(1)
|
entity = match.group(1)
|
||||||
if entity == '#x80':
|
if entity == '#x80':
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
from __future__ import with_statement
|
from __future__ import with_statement, print_function
|
||||||
|
|
||||||
from js import minify
|
from .js import minify
|
||||||
from utils import json
|
from .utils import json
|
||||||
|
|
||||||
|
|
||||||
def load(f):
|
def load(f):
|
||||||
|
@ -14,7 +14,7 @@ def loads(source):
|
||||||
try:
|
try:
|
||||||
minified = minify(source)
|
minified = minify(source)
|
||||||
return json.loads(minified)
|
return json.loads(minified)
|
||||||
except json.JSONDecodeError, e:
|
except json.JSONDecodeError as e:
|
||||||
s = minified.split('\n')
|
s = minified.split('\n')
|
||||||
context = s[e.lineno-1][max(0, e.colno-1):e.colno+30]
|
context = s[e.lineno-1][max(0, e.colno-1):e.colno+30]
|
||||||
msg = e.msg + ' at ' + context
|
msg = e.msg + ' at ' + context
|
||||||
|
|
|
@ -9,9 +9,9 @@ import os
|
||||||
import re
|
import re
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
from normalize import normalize_name
|
from .normalize import normalize_name
|
||||||
from text import get_sort_name, find_re
|
from .text import get_sort_name, find_re
|
||||||
from file import EXTENSIONS
|
from .file import EXTENSIONS
|
||||||
|
|
||||||
__all__ = ['parse_movie_path', 'create_movie_path', 'get_oxid']
|
__all__ = ['parse_movie_path', 'create_movie_path', 'get_oxid']
|
||||||
|
|
||||||
|
|
51
ox/net.py
51
ox/net.py
|
@ -1,13 +1,13 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
# GPL 2008
|
# GPL 2008
|
||||||
|
from __future__ import with_statement, print_function
|
||||||
import os
|
import os
|
||||||
import gzip
|
import gzip
|
||||||
import re
|
import re
|
||||||
import StringIO
|
from six import BytesIO
|
||||||
import struct
|
import struct
|
||||||
import urllib
|
from six.moves import urllib
|
||||||
import urllib2
|
|
||||||
|
|
||||||
from chardet.universaldetector import UniversalDetector
|
from chardet.universaldetector import UniversalDetector
|
||||||
|
|
||||||
|
@ -26,7 +26,7 @@ def status(url, data=None, headers=DEFAULT_HEADERS):
|
||||||
try:
|
try:
|
||||||
f = open_url(url, data, headers)
|
f = open_url(url, data, headers)
|
||||||
s = f.code
|
s = f.code
|
||||||
except urllib2.HTTPError, e:
|
except urllib.error.HTTPError as e:
|
||||||
s = e.code
|
s = e.code
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
@ -42,46 +42,59 @@ def get_headers(url, data=None, headers=DEFAULT_HEADERS):
|
||||||
f.headers['Status'] = "%s" % f.code
|
f.headers['Status'] = "%s" % f.code
|
||||||
headers = f.headers
|
headers = f.headers
|
||||||
f.close()
|
f.close()
|
||||||
except urllib2.HTTPError, e:
|
except urllib.error.HTTPError as e:
|
||||||
e.headers['Status'] = "%s" % e.code
|
e.headers['Status'] = "%s" % e.code
|
||||||
headers = e.headers
|
headers = e.headers
|
||||||
return dict(headers)
|
return dict(headers)
|
||||||
|
|
||||||
def open_url(url, data=None, headers=DEFAULT_HEADERS):
|
def open_url(url, data=None, headers=DEFAULT_HEADERS):
|
||||||
|
if isinstance(url, bytes):
|
||||||
|
url = url.decode('utf-8')
|
||||||
url = url.replace(' ', '%20')
|
url = url.replace(' ', '%20')
|
||||||
req = urllib2.Request(url, data, headers)
|
req = urllib.request.Request(url, data, headers)
|
||||||
return urllib2.urlopen(req)
|
return urllib.request.urlopen(req)
|
||||||
|
|
||||||
def read_url(url, data=None, headers=DEFAULT_HEADERS, return_headers=False, unicode=False):
|
def read_url(url, data=None, headers=DEFAULT_HEADERS, return_headers=False, unicode=False):
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
print 'ox.net.read_url', url
|
print('ox.net.read_url', url)
|
||||||
f = open_url(url, data, headers)
|
f = open_url(url, data, headers)
|
||||||
result = f.read()
|
result = f.read()
|
||||||
f.close()
|
f.close()
|
||||||
if f.headers.get('content-encoding', None) == 'gzip':
|
if f.headers.get('content-encoding', None) == 'gzip':
|
||||||
result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read()
|
result = gzip.GzipFile(fileobj=BytesIO(result)).read()
|
||||||
if unicode:
|
if unicode:
|
||||||
|
ctype = f.headers.get('content-type', '').lower()
|
||||||
|
if 'charset' in ctype:
|
||||||
|
encoding = ctype.split('charset=')[-1]
|
||||||
|
else:
|
||||||
encoding = detect_encoding(result)
|
encoding = detect_encoding(result)
|
||||||
if not encoding:
|
if not encoding:
|
||||||
encoding = 'latin-1'
|
encoding = 'latin-1'
|
||||||
result = result.decode(encoding)
|
result = result.decode(encoding)
|
||||||
if return_headers:
|
if return_headers:
|
||||||
f.headers['Status'] = "%s" % f.code
|
f.headers['Status'] = "%s" % f.code
|
||||||
return dict(f.headers), result
|
headers = {}
|
||||||
|
for key in f.headers:
|
||||||
|
headers[key.lower()] = f.headers[key]
|
||||||
|
return headers, result
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def detect_encoding(data):
|
def detect_encoding(data):
|
||||||
data_lower = data.lower()
|
data_lower = data.lower().decode('utf-8', 'ignore')
|
||||||
charset = re.compile('content="text/html; charset=(.*?)"').findall(data)
|
charset = re.compile('content="text/html; charset=(.*?)"').findall(data_lower)
|
||||||
if not charset:
|
if not charset:
|
||||||
charset = re.compile('meta charset="(.*?)"').findall(data)
|
charset = re.compile('meta charset="(.*?)"').findall(data_lower)
|
||||||
if charset:
|
if charset:
|
||||||
return charset[0].lower()
|
return charset[0].lower()
|
||||||
detector = UniversalDetector()
|
detector = UniversalDetector()
|
||||||
for line in data.split('\n'):
|
p = 0
|
||||||
detector.feed(line)
|
l = len(data)
|
||||||
|
s = 1024
|
||||||
|
while p < l:
|
||||||
|
detector.feed(data[p:p+s])
|
||||||
if detector.done:
|
if detector.done:
|
||||||
break
|
break
|
||||||
|
p += s
|
||||||
detector.close()
|
detector.close()
|
||||||
return detector.result['encoding']
|
return detector.result['encoding']
|
||||||
|
|
||||||
|
@ -97,9 +110,9 @@ def save_url(url, filename, overwrite=False):
|
||||||
|
|
||||||
def oshash(url):
|
def oshash(url):
|
||||||
def get_size(url):
|
def get_size(url):
|
||||||
req = urllib2.Request(url, headers=DEFAULT_HEADERS.copy())
|
req = urllib.request.Request(url, headers=DEFAULT_HEADERS.copy())
|
||||||
req.get_method = lambda : 'HEAD'
|
req.get_method = lambda : 'HEAD'
|
||||||
u = urllib2.urlopen(req)
|
u = urllib.request.urlopen(req)
|
||||||
if u.code != 200 or not 'Content-Length' in u.headers:
|
if u.code != 200 or not 'Content-Length' in u.headers:
|
||||||
raise IOError
|
raise IOError
|
||||||
return int(u.headers['Content-Length'])
|
return int(u.headers['Content-Length'])
|
||||||
|
@ -107,8 +120,8 @@ def oshash(url):
|
||||||
def get_range(url, start, end):
|
def get_range(url, start, end):
|
||||||
headers = DEFAULT_HEADERS.copy()
|
headers = DEFAULT_HEADERS.copy()
|
||||||
headers['Range'] = 'bytes=%s-%s' % (start, end)
|
headers['Range'] = 'bytes=%s-%s' % (start, end)
|
||||||
req = urllib2.Request(url, headers=headers)
|
req = urllib.request.Request(url, headers=headers)
|
||||||
u = urllib2.urlopen(req)
|
u = urllib.request.urlopen(req)
|
||||||
return u.read()
|
return u.read()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# ci:si:et:sw=4:sts=4:ts=4
|
# ci:si:et:sw=4:sts=4:ts=4
|
||||||
import re
|
import re
|
||||||
from text import find_re
|
|
||||||
import cache
|
from . import cache
|
||||||
from utils import json, ET
|
from .text import find_re
|
||||||
|
from .utils import json, ET
|
||||||
|
|
||||||
def get_embed_code(url, maxwidth=None, maxheight=None):
|
def get_embed_code(url, maxwidth=None, maxheight=None):
|
||||||
embed = {}
|
embed = {}
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
from __future__ import with_statement, division
|
from __future__ import with_statement, division, print_function
|
||||||
import chardet
|
import chardet
|
||||||
import re
|
import re
|
||||||
import codecs
|
import codecs
|
||||||
|
@ -71,7 +71,7 @@ def load(filename, offset=0):
|
||||||
try:
|
try:
|
||||||
data = unicode(data, 'latin-1')
|
data = unicode(data, 'latin-1')
|
||||||
except:
|
except:
|
||||||
print "failed to detect encoding, giving up"
|
print("failed to detect encoding, giving up")
|
||||||
return srt
|
return srt
|
||||||
|
|
||||||
data = data.replace('\r\n', '\n')
|
data = data.replace('\r\n', '\n')
|
||||||
|
|
|
@ -6,7 +6,7 @@ from threading import Event
|
||||||
from hashlib import sha1
|
from hashlib import sha1
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from bencode import bencode, bdecode
|
from .bencode import bencode, bdecode
|
||||||
|
|
||||||
__all__ = ['create_torrent', 'get_info_hash', 'get_torrent_info', 'get_files', 'get_torrent_size']
|
__all__ = ['create_torrent', 'get_info_hash', 'get_torrent_info', 'get_files', 'get_torrent_size']
|
||||||
|
|
||||||
|
@ -24,9 +24,8 @@ def get_info_hash(torrentFile):
|
||||||
return sha1(bencode(info)).hexdigest()
|
return sha1(bencode(info)).hexdigest()
|
||||||
|
|
||||||
def get_torrent_info(data=None, file=None):
|
def get_torrent_info(data=None, file=None):
|
||||||
from bencode import bencode
|
|
||||||
if file:
|
if file:
|
||||||
if isinstance(file, unicode):
|
if not isinstance(file, bytes):
|
||||||
file = file.encode('utf-8')
|
file = file.encode('utf-8')
|
||||||
with open(file, 'rb') as f:
|
with open(file, 'rb') as f:
|
||||||
data = f.read()
|
data = f.read()
|
||||||
|
@ -36,7 +35,7 @@ def get_torrent_info(data=None, file=None):
|
||||||
metainfo = bdecode(data)
|
metainfo = bdecode(data)
|
||||||
info = metainfo['info']
|
info = metainfo['info']
|
||||||
piece_length = info['piece length']
|
piece_length = info['piece length']
|
||||||
if info.has_key('length'):
|
if 'length' in info:
|
||||||
# let's assume we just have one file
|
# let's assume we just have one file
|
||||||
file_length = info['length']
|
file_length = info['length']
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -2,8 +2,8 @@
|
||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
__version__ = '1.0.0'
|
__version__ = '1.0.0'
|
||||||
|
|
||||||
import imdb
|
from . import imdb
|
||||||
import wikipedia
|
from . import wikipedia
|
||||||
import google
|
from . import google
|
||||||
import piratecinema
|
from . import piratecinema
|
||||||
import oxdb
|
from . import oxdb
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
import re
|
import re
|
||||||
import time
|
|
||||||
|
|
||||||
from ox import strip_tags, find_re
|
from ox import strip_tags, find_re
|
||||||
from ox.cache import read_url
|
from ox.cache import read_url
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
import re
|
import re
|
||||||
from urllib import quote
|
from six.moves.urllib.parse import quote
|
||||||
|
|
||||||
from ox import find_re, strip_tags, decode_html
|
from ox import find_re, strip_tags, decode_html
|
||||||
from ox.cache import read_url
|
from ox.cache import read_url
|
||||||
|
|
|
@ -1,14 +1,11 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
from datetime import datetime
|
|
||||||
from urllib import urlencode
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from ox import find_re, strip_tags, decode_html
|
from ox import find_re, strip_tags
|
||||||
from ox.cache import read_url
|
from ox.cache import read_url
|
||||||
from ox.net import open_url
|
|
||||||
|
|
||||||
def get_data(id, language='en'):
|
def get_data(id, language='en'):
|
||||||
if language == 'de':
|
if language == 'de':
|
||||||
|
@ -57,7 +54,7 @@ def backup(filename):
|
||||||
data = json.load(f)
|
data = json.load(f)
|
||||||
else:
|
else:
|
||||||
data = {}
|
data = {}
|
||||||
start = ids and max(map(int, data)) or 1
|
start = max(map(int, data)) or 1
|
||||||
for i in range(start, 11872):
|
for i in range(start, 11872):
|
||||||
info = get_data(i)
|
info = get_data(i)
|
||||||
if info:
|
if info:
|
||||||
|
|
|
@ -5,7 +5,7 @@ import re
|
||||||
import ox.cache
|
import ox.cache
|
||||||
from ox.cache import read_url
|
from ox.cache import read_url
|
||||||
from ox.html import strip_tags
|
from ox.html import strip_tags
|
||||||
from ox.text import find_re, remove_special_characters
|
from ox.text import find_re
|
||||||
|
|
||||||
import imdb
|
import imdb
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
import re
|
import re
|
||||||
from urllib import unquote
|
from six.moves.urllib.parse import unquote
|
||||||
from ox.cache import read_url
|
from ox.cache import read_url
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,17 +1,17 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
import re
|
import re
|
||||||
import urllib
|
|
||||||
|
from six.moves import urllib
|
||||||
import ox
|
import ox
|
||||||
from ox import strip_tags, decode_html
|
from ox import strip_tags, decode_html
|
||||||
from ox.utils import json
|
|
||||||
from ox.cache import read_url
|
from ox.cache import read_url
|
||||||
|
|
||||||
|
|
||||||
def find(query, timeout=ox.cache.cache_timeout):
|
def find(query, timeout=ox.cache.cache_timeout):
|
||||||
if isinstance(query, unicode):
|
if not isinstance(query, bytes):
|
||||||
query = query.encode('utf-8')
|
query = query.encode('utf-8')
|
||||||
params = urllib.urlencode({'q': query})
|
params = urllib.parse.urlencode({'q': query})
|
||||||
url = 'http://duckduckgo.com/html/?' + params
|
url = 'http://duckduckgo.com/html/?' + params
|
||||||
data = read_url(url, timeout=timeout).decode('utf-8')
|
data = read_url(url, timeout=timeout).decode('utf-8')
|
||||||
results = []
|
results = []
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
import re
|
import re
|
||||||
import urllib
|
from six.moves import urllib
|
||||||
|
|
||||||
import ox
|
import ox
|
||||||
from ox import strip_tags, decode_html
|
from ox import strip_tags, decode_html
|
||||||
|
@ -13,9 +13,9 @@ def read_url(url, data=None, headers=ox.net.DEFAULT_HEADERS, timeout=DEFAULT_TIM
|
||||||
return ox.cache.read_url(url, data, headers, timeout, unicode=True)
|
return ox.cache.read_url(url, data, headers, timeout, unicode=True)
|
||||||
|
|
||||||
def quote_plus(s):
|
def quote_plus(s):
|
||||||
if not isinstance(s, str):
|
if not isinstance(s, bytes):
|
||||||
s = s.encode('utf-8')
|
s = s.encode('utf-8')
|
||||||
return urllib.quote_plus(s)
|
return urllib.parse.quote_plus(s)
|
||||||
|
|
||||||
def find(query, max_results=DEFAULT_MAX_RESULTS, timeout=DEFAULT_TIMEOUT):
|
def find(query, max_results=DEFAULT_MAX_RESULTS, timeout=DEFAULT_TIMEOUT):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -1,23 +1,27 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
import urllib
|
from __future__ import print_function
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
import ox
|
from six.moves import urllib
|
||||||
from ox import find_re, strip_tags
|
from six import string_types
|
||||||
import ox.cache
|
|
||||||
|
|
||||||
from siteparser import SiteParser
|
|
||||||
import duckduckgo
|
|
||||||
|
|
||||||
|
from .. import find_re, strip_tags, decode_html
|
||||||
|
from .. import cache
|
||||||
|
|
||||||
|
|
||||||
|
from . siteparser import SiteParser
|
||||||
|
from . import duckduckgo
|
||||||
from ..utils import datetime
|
from ..utils import datetime
|
||||||
from ..geo import normalize_country_name
|
from ..geo import normalize_country_name
|
||||||
|
|
||||||
def read_url(url, data=None, headers=ox.cache.DEFAULT_HEADERS, timeout=ox.cache.cache_timeout, valid=None, unicode=False):
|
def read_url(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None, unicode=False):
|
||||||
headers = headers.copy()
|
headers = headers.copy()
|
||||||
return ox.cache.read_url(url, data, headers, timeout, unicode=unicode)
|
return cache.read_url(url, data, headers, timeout, unicode=unicode)
|
||||||
|
|
||||||
def get_url(id):
|
def get_url(id):
|
||||||
return "http://www.imdb.com/title/tt%s/" % id
|
return "http://www.imdb.com/title/tt%s/" % id
|
||||||
|
@ -49,7 +53,7 @@ class Imdb(SiteParser):
|
||||||
'page': 'business',
|
'page': 'business',
|
||||||
're': [
|
're': [
|
||||||
'<h5>Budget</h5>\s*?\$(.*?)<br',
|
'<h5>Budget</h5>\s*?\$(.*?)<br',
|
||||||
lambda data: find_re(ox.decode_html(data).replace(',', ''), '\d+')
|
lambda data: find_re(decode_html(data).replace(',', ''), '\d+')
|
||||||
],
|
],
|
||||||
'type': 'int'
|
'type': 'int'
|
||||||
},
|
},
|
||||||
|
@ -211,7 +215,7 @@ class Imdb(SiteParser):
|
||||||
'page': 'releaseinfo',
|
'page': 'releaseinfo',
|
||||||
're': [
|
're': [
|
||||||
'<td class="release_date">(.*?)</td>',
|
'<td class="release_date">(.*?)</td>',
|
||||||
ox.strip_tags,
|
strip_tags,
|
||||||
],
|
],
|
||||||
'type': 'list'
|
'type': 'list'
|
||||||
},
|
},
|
||||||
|
@ -326,7 +330,7 @@ class Imdb(SiteParser):
|
||||||
|
|
||||||
if 'alternativeTitles' in self:
|
if 'alternativeTitles' in self:
|
||||||
if len(self['alternativeTitles']) == 2 and \
|
if len(self['alternativeTitles']) == 2 and \
|
||||||
isinstance(self['alternativeTitles'][0], basestring):
|
isinstance(self['alternativeTitles'][0], string_types):
|
||||||
self['alternativeTitles'] = [self['alternativeTitles']]
|
self['alternativeTitles'] = [self['alternativeTitles']]
|
||||||
|
|
||||||
#normalize country names
|
#normalize country names
|
||||||
|
@ -472,7 +476,7 @@ class Imdb(SiteParser):
|
||||||
if c:
|
if c:
|
||||||
alt[title].append(c)
|
alt[title].append(c)
|
||||||
self['alternativeTitles'] = []
|
self['alternativeTitles'] = []
|
||||||
for t in sorted(alt, lambda a, b: cmp(sorted(alt[a]), sorted(alt[b]))):
|
for t in sorted(alt, key=lambda a: sorted(alt[a])):
|
||||||
if alt[t]:
|
if alt[t]:
|
||||||
countries = sorted([normalize_country_name(c) or c for c in alt[t]])
|
countries = sorted([normalize_country_name(c) or c for c in alt[t]])
|
||||||
self['alternativeTitles'].append((t, countries))
|
self['alternativeTitles'].append((t, countries))
|
||||||
|
@ -492,7 +496,7 @@ class Imdb(SiteParser):
|
||||||
if 'votes' in self: self['votes'] = self['votes'].replace(',', '')
|
if 'votes' in self: self['votes'] = self['votes'].replace(',', '')
|
||||||
|
|
||||||
if 'cast' in self:
|
if 'cast' in self:
|
||||||
if isinstance(self['cast'][0], basestring):
|
if isinstance(self['cast'][0], string_types):
|
||||||
self['cast'] = [self['cast']]
|
self['cast'] = [self['cast']]
|
||||||
self['actor'] = [c[0] for c in self['cast']]
|
self['actor'] = [c[0] for c in self['cast']]
|
||||||
def cleanup_character(c):
|
def cleanup_character(c):
|
||||||
|
@ -503,10 +507,12 @@ class Imdb(SiteParser):
|
||||||
|
|
||||||
if 'connections' in self:
|
if 'connections' in self:
|
||||||
cc={}
|
cc={}
|
||||||
if len(self['connections']) == 3 and isinstance(self['connections'][0], basestring):
|
if len(self['connections']) == 3 and isinstance(self['connections'][0], string_types):
|
||||||
self['connections'] = [self['connections']]
|
self['connections'] = [self['connections']]
|
||||||
for rel, data, _ in self['connections']:
|
for rel, data, _ in self['connections']:
|
||||||
#cc[unicode(rel)] = re.compile('<a href="/title/tt(\d{7})/">(.*?)</a>').findall(data)
|
if isinstance(rel, bytes):
|
||||||
|
rel = rel.decode('utf-8')
|
||||||
|
#cc[rel] = re.compile('<a href="/title/tt(\d{7})/">(.*?)</a>').findall(data)
|
||||||
def get_conn(c):
|
def get_conn(c):
|
||||||
r = {
|
r = {
|
||||||
'id': c[0],
|
'id': c[0],
|
||||||
|
@ -516,14 +522,14 @@ class Imdb(SiteParser):
|
||||||
if len(description) == 2 and description[-1].strip() != '-':
|
if len(description) == 2 and description[-1].strip() != '-':
|
||||||
r['description'] = description[-1].strip()
|
r['description'] = description[-1].strip()
|
||||||
return r
|
return r
|
||||||
cc[unicode(rel)] = map(get_conn, re.compile('<a href="/title/tt(\d{7})/">(.*?)</a>(.*?)<\/div', re.DOTALL).findall(data))
|
cc[rel] = list(map(get_conn, re.compile('<a href="/title/tt(\d{7})/">(.*?)</a>(.*?)<\/div', re.DOTALL).findall(data)))
|
||||||
|
|
||||||
|
|
||||||
self['connections'] = cc
|
self['connections'] = cc
|
||||||
|
|
||||||
for key in ('country', 'genre'):
|
for key in ('country', 'genre'):
|
||||||
if key in self:
|
if key in self:
|
||||||
self[key] = filter(lambda x: x.lower() != 'home', self[key])
|
self[key] = list(filter(lambda x: x.lower() != 'home', self[key]))
|
||||||
#0092999
|
#0092999
|
||||||
if '_director' in self:
|
if '_director' in self:
|
||||||
if 'series' in self or 'isSeries' in self:
|
if 'series' in self or 'isSeries' in self:
|
||||||
|
@ -590,8 +596,8 @@ class Imdb(SiteParser):
|
||||||
if key in self:
|
if key in self:
|
||||||
if isinstance(self[key][0], list):
|
if isinstance(self[key][0], list):
|
||||||
self[key] = [i[0] for i in self[key] if i]
|
self[key] = [i[0] for i in self[key] if i]
|
||||||
self[key] = sorted(list(set(self[key])),
|
self[key] = sorted(list(set(self[key])), key=lambda a: self[key].index(a))
|
||||||
lambda a, b: self[key].index(a) - self[key].index(b))
|
|
||||||
|
|
||||||
if 'budget' in self and 'gross' in self:
|
if 'budget' in self and 'gross' in self:
|
||||||
self['profit'] = self['gross'] - self['budget']
|
self['profit'] = self['gross'] - self['budget']
|
||||||
|
@ -655,7 +661,7 @@ def get_movie_by_title(title, timeout=-1):
|
||||||
u'0866567'
|
u'0866567'
|
||||||
'''
|
'''
|
||||||
params = {'s':'tt','q': title}
|
params = {'s':'tt','q': title}
|
||||||
if isinstance(title, unicode):
|
if not isinstance(title, bytes):
|
||||||
try:
|
try:
|
||||||
params['q'] = unicodedata.normalize('NFKC', params['q']).encode('latin-1')
|
params['q'] = unicodedata.normalize('NFKC', params['q']).encode('latin-1')
|
||||||
except:
|
except:
|
||||||
|
@ -731,7 +737,7 @@ def get_movie_id(title, director='', year='', timeout=-1):
|
||||||
if year:
|
if year:
|
||||||
params['q'] = u'"%s (%s)" %s' % (title, year, director)
|
params['q'] = u'"%s (%s)" %s' % (title, year, director)
|
||||||
google_query = "site:imdb.com %s" % params['q']
|
google_query = "site:imdb.com %s" % params['q']
|
||||||
if isinstance(params['q'], unicode):
|
if not isinstance(params['q'], bytes):
|
||||||
try:
|
try:
|
||||||
params['q'] = unicodedata.normalize('NFKC', params['q']).encode('latin-1')
|
params['q'] = unicodedata.normalize('NFKC', params['q']).encode('latin-1')
|
||||||
except:
|
except:
|
||||||
|
@ -775,7 +781,7 @@ def get_movie_poster(imdbId):
|
||||||
info = ImdbCombined(imdbId)
|
info = ImdbCombined(imdbId)
|
||||||
if 'posterId' in info:
|
if 'posterId' in info:
|
||||||
url = "http://www.imdb.com/media/rm%s/tt%s" % (info['posterId'], imdbId)
|
url = "http://www.imdb.com/media/rm%s/tt%s" % (info['posterId'], imdbId)
|
||||||
data = read_url(url)
|
data = read_url(url).decode('utf-8', 'ignore')
|
||||||
poster = find_re(data, 'img.*?id="primary-img".*?src="(.*?)"')
|
poster = find_re(data, 'img.*?id="primary-img".*?src="(.*?)"')
|
||||||
return poster
|
return poster
|
||||||
elif 'series' in info:
|
elif 'series' in info:
|
||||||
|
@ -787,11 +793,11 @@ def get_episodes(imdbId, season=None):
|
||||||
url = 'http://www.imdb.com/title/tt%s/episodes' % imdbId
|
url = 'http://www.imdb.com/title/tt%s/episodes' % imdbId
|
||||||
if season:
|
if season:
|
||||||
url += '?season=%d' % season
|
url += '?season=%d' % season
|
||||||
data = ox.cache.read_url(url)
|
data = cache.read_url(url)
|
||||||
for e in re.compile('<div data-const="tt(\d{7})".*?>.*?<div>S(\d+), Ep(\d+)<\/div>\n<\/div>', re.DOTALL).findall(data):
|
for e in re.compile('<div data-const="tt(\d{7})".*?>.*?<div>S(\d+), Ep(\d+)<\/div>\n<\/div>', re.DOTALL).findall(data):
|
||||||
episodes['S%02dE%02d' %(int(e[1]), int(e[2]))] = e[0]
|
episodes['S%02dE%02d' %(int(e[1]), int(e[2]))] = e[0]
|
||||||
else:
|
else:
|
||||||
data = ox.cache.read_url(url)
|
data = cache.read_url(url)
|
||||||
match = re.compile('<strong>Season (\d+)</strong>').findall(data)
|
match = re.compile('<strong>Season (\d+)</strong>').findall(data)
|
||||||
if match:
|
if match:
|
||||||
for season in range(1, int(match[0]) + 1):
|
for season in range(1, int(match[0]) + 1):
|
||||||
|
@ -800,7 +806,7 @@ def get_episodes(imdbId, season=None):
|
||||||
|
|
||||||
def max_votes():
|
def max_votes():
|
||||||
url = 'http://www.imdb.com/search/title?num_votes=500000,&sort=num_votes,desc'
|
url = 'http://www.imdb.com/search/title?num_votes=500000,&sort=num_votes,desc'
|
||||||
data = ox.cache.read_url(url)
|
data = cache.read_url(url)
|
||||||
votes = max([int(v.replace(',', ''))
|
votes = max([int(v.replace(',', ''))
|
||||||
for v in re.compile('<td class="sort_col">([\d,]+)</td>').findall(data)])
|
for v in re.compile('<td class="sort_col">([\d,]+)</td>').findall(data)])
|
||||||
return votes
|
return votes
|
||||||
|
@ -810,6 +816,6 @@ def guess(title, director='', timeout=-1):
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import json
|
import json
|
||||||
print json.dumps(Imdb('0306414'), indent=2)
|
print(json.dumps(Imdb('0306414'), indent=2))
|
||||||
#print json.dumps(Imdb('0133093'), indent=2)
|
#print json.dumps(Imdb('0133093'), indent=2)
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from ox.net import read_url
|
from ox.net import read_url
|
||||||
|
|
||||||
|
@ -13,5 +15,5 @@ def get_poster_url(id):
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print get_poster_url('0749451')
|
print(get_poster_url('0749451'))
|
||||||
|
|
||||||
|
|
|
@ -2,22 +2,24 @@
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from six import string_types
|
||||||
|
|
||||||
from ..cache import read_url
|
from ..cache import read_url
|
||||||
from .. import strip_tags, decode_html
|
from .. import decode_html
|
||||||
from ..utils import datetime
|
from ..utils import datetime
|
||||||
|
|
||||||
|
|
||||||
def cleanup(key, data, data_type):
|
def cleanup(key, data, data_type):
|
||||||
if data:
|
if data:
|
||||||
if isinstance(data[0], basestring):
|
if isinstance(data[0], string_types):
|
||||||
#FIXME: some types need strip_tags
|
#FIXME: some types need strip_tags
|
||||||
#data = [strip_tags(decode_html(p)).strip() for p in data]
|
#data = [strip_tags(decode_html(p)).strip() for p in data]
|
||||||
data = [decode_html(p).strip() for p in data]
|
data = [decode_html(p).strip() for p in data]
|
||||||
elif isinstance(data[0], list) or isinstance(data[0], tuple):
|
elif isinstance(data[0], list) or isinstance(data[0], tuple):
|
||||||
data = [cleanup(key, p, data_type) for p in data]
|
data = [cleanup(key, p, data_type) for p in data]
|
||||||
while len(data) == 1 and not isinstance(data, basestring):
|
while len(data) == 1 and not isinstance(data, string_types):
|
||||||
data = data[0]
|
data = data[0]
|
||||||
if data_type == 'list' and isinstance(data, basestring):
|
if data_type == 'list' and isinstance(data, string_types):
|
||||||
data = [data, ]
|
data = [data, ]
|
||||||
elif data_type != 'list':
|
elif data_type != 'list':
|
||||||
data = ''
|
data = ''
|
||||||
|
@ -40,7 +42,7 @@ class SiteParser(dict):
|
||||||
for key in self.regex:
|
for key in self.regex:
|
||||||
url = self.get_url(self.regex[key]['page'])
|
url = self.get_url(self.regex[key]['page'])
|
||||||
data = self.read_url(url, timeout)
|
data = self.read_url(url, timeout)
|
||||||
if isinstance(self.regex[key]['re'], basestring):
|
if isinstance(self.regex[key]['re'], string_types):
|
||||||
data = re.compile(self.regex[key]['re'], re.DOTALL).findall(data)
|
data = re.compile(self.regex[key]['re'], re.DOTALL).findall(data)
|
||||||
data = cleanup(key, data, self.regex[key]['type'])
|
data = cleanup(key, data, self.regex[key]['type'])
|
||||||
elif callable(self.regex[key]['re']):
|
elif callable(self.regex[key]['re']):
|
||||||
|
@ -51,7 +53,7 @@ class SiteParser(dict):
|
||||||
f = r
|
f = r
|
||||||
else:
|
else:
|
||||||
f = re.compile(r, re.DOTALL).findall
|
f = re.compile(r, re.DOTALL).findall
|
||||||
if isinstance(data, basestring):
|
if isinstance(data, string_types):
|
||||||
data = f(data)
|
data = f(data)
|
||||||
else:
|
else:
|
||||||
data = [f(d) for d in data]
|
data = [f(d) for d in data]
|
||||||
|
|
|
@ -1,11 +1,14 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from urllib import urlencode
|
|
||||||
|
from six.moves import urllib
|
||||||
|
|
||||||
from ox.utils import json
|
from ox.utils import json
|
||||||
from ox.cache import read_url
|
from ox.cache import read_url
|
||||||
from ox import find_re, decode_html
|
from ox import find_re
|
||||||
|
|
||||||
|
|
||||||
def get_id(url):
|
def get_id(url):
|
||||||
|
@ -138,11 +141,11 @@ def get_allmovie_id(wikipedia_url):
|
||||||
def find(query, max_results=10):
|
def find(query, max_results=10):
|
||||||
query = {'action': 'query', 'list':'search', 'format': 'json',
|
query = {'action': 'query', 'list':'search', 'format': 'json',
|
||||||
'srlimit': max_results, 'srwhat': 'text', 'srsearch': query.encode('utf-8')}
|
'srlimit': max_results, 'srwhat': 'text', 'srsearch': query.encode('utf-8')}
|
||||||
url = "http://en.wikipedia.org/w/api.php?" + urlencode(query)
|
url = "http://en.wikipedia.org/w/api.php?" + urllib.parse.urlencode(query)
|
||||||
data = read_url(url)
|
data = read_url(url)
|
||||||
if not data:
|
if not data:
|
||||||
data = read_url(url, timeout=0)
|
data = read_url(url, timeout=0)
|
||||||
result = json.loads(data)
|
result = json.loads(data.decode('utf-8'))
|
||||||
results = []
|
results = []
|
||||||
if result and 'query' in result:
|
if result and 'query' in result:
|
||||||
for r in result['query']['search']:
|
for r in result['query']['search']:
|
||||||
|
|
5
setup.py
5
setup.py
|
@ -36,15 +36,16 @@ setup(
|
||||||
download_url="http://code.0x2620.org/python-ox/download",
|
download_url="http://code.0x2620.org/python-ox/download",
|
||||||
license="GPLv3",
|
license="GPLv3",
|
||||||
packages=['ox', 'ox.django', 'ox.django.api', 'ox.torrent', 'ox.web'],
|
packages=['ox', 'ox.django', 'ox.django.api', 'ox.torrent', 'ox.web'],
|
||||||
install_requires=['chardet', 'feedparser'],
|
install_requires=['six', 'chardet', 'feedparser'],
|
||||||
keywords = [
|
keywords = [
|
||||||
],
|
],
|
||||||
classifiers = [
|
classifiers = [
|
||||||
'Operating System :: OS Independent',
|
'Operating System :: OS Independent',
|
||||||
'Programming Language :: Python',
|
'Programming Language :: Python',
|
||||||
'Programming Language :: Python :: 2',
|
'Programming Language :: Python :: 2',
|
||||||
'Programming Language :: Python :: 2.6',
|
|
||||||
'Programming Language :: Python :: 2.7',
|
'Programming Language :: Python :: 2.7',
|
||||||
|
'Programming Language :: Python :: 3',
|
||||||
|
'Programming Language :: Python :: 3.4',
|
||||||
'Topic :: Software Development :: Libraries :: Python Modules',
|
'Topic :: Software Development :: Libraries :: Python Modules',
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in a new issue