use six to support python 2 and 3

2014-09-30 21:04:46 +02:00 · 2014-09-30 21:04:46 +02:00 · d4d09b56b6
commit d4d09b56b6
parent 1b1dcf1c58
28 changed files with 1730 additions and 1678 deletions
--- a/ox/init.py
+++ b/ox/init.py
@ -3,28 +3,32 @@
 # GPL 2011
 __version__ = '2.1.1'
-import cache
+from . import cache
-import js
+from . import js
-import jsonc
+from . import jsonc
-import net
+from . import net
-import srt
+from . import srt
-import utils
+from . import utils
-from api import *
+from .api import *
-from file import *
+from .file import *
-from form import *
+from .form import *
-from format import *
+from .format import *
-from geo import *
+from .geo import *
-from html import *
+from .html import *
 #image depends on PIL, not easy enough to instal on osx
 try:
-    from image import *
+    from .image import *
 except:
    pass
-from location import *
+from .location import *
-from movie import *
+from .movie import *
-from normalize import *
+from .normalize import *
-from oembed import *
+from .oembed import *
-from text import *
+from .text import *
-from torrent import *
+#currently broken in python3
-from fixunicode import *
+try:
    from .torrent import *
 except:
    pass
 from .fixunicode import *
--- a/ox/api.py
+++ b/ox/api.py
@ -3,10 +3,10 @@
 # GPL 2011
 from __future__ import with_statement
-import cookielib
+from six.moves import http_cookiejar as cookielib
 import gzip
-import StringIO
+from six import StringIO
-import urllib2
+from six.moves import urllib
 from types import MethodType
 from . import __version__
@ -29,8 +29,8 @@ class API(object):
            self._cj = cj
        else:
            self._cj = cookielib.CookieJar()
-        self._opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self._cj),
+        self._opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self._cj),
-                                            urllib2.HTTPHandler(debuglevel=self.debuglevel))
+                                            urllib.HTTPHandler(debuglevel=self.debuglevel))
        self._opener.addheaders = [
            ('User-Agent', '%s/%s' % (self.__name__, self.__version__))
        ]
@ -64,7 +64,7 @@ class API(object):
        result = {}
        try:
            body = str(form)
-            request = urllib2.Request(str(url))
+            request = urllib.reuqest.Request(str(url))
            request.add_header('Content-type', form.get_content_type())
            request.add_header('Content-Length', str(len(body)))
            request.add_header('Accept-Encoding', 'gzip, deflate')
@ -75,7 +75,7 @@ class API(object):
                result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read()
            result = result.decode('utf-8')
            return json.loads(result)
-        except urllib2.HTTPError, e:
+        except urllib.error.HTTPError as e:
            if self.DEBUG:
                import webbrowser
                if e.code >= 500:
--- a/ox/cache.py
+++ b/ox/cache.py
@ -1,24 +1,22 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 # GPL 2011
-from __future__ import with_statement
+from __future__ import with_statement, print_function
 import gzip
 import zlib
 import hashlib
 import os
-import StringIO
+from six import BytesIO
 import time
-import urlparse
+from six.moves import urllib
 import urllib2
 import sqlite3
-import chardet
+from .utils import json
 from utils import json
 from .file import makedirs
-import net
+from . import net
-from net import DEFAULT_HEADERS, detect_encoding
+from .net import DEFAULT_HEADERS, detect_encoding
 cache_timeout = 30*24*60*60 # default is 30 days
@ -69,7 +67,7 @@ class InvalidResult(Exception):
        self.headers = headers
 def _fix_unicode_url(url):
-    if isinstance(url, unicode):
+    if not isinstance(url, bytes):
        url = url.encode('utf-8')
    return url
@ -83,24 +81,30 @@ def read_url(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout, val
                  if this function fails, InvalidResult will be raised deal with it in your code 
    '''
    if net.DEBUG:
-        print 'ox.cache.read_url', url
+        print('ox.cache.read_url', url)
    #FIXME: send last-modified / etag from cache and only update if needed
-    url = _fix_unicode_url(url)
+    #url = _fix_unicode_url(url)
    result = store.get(url, data, headers, timeout)
    url_headers = {}
    if not result:
        try:
            url_headers, result = net.read_url(url, data, headers, return_headers=True)
-        except urllib2.HTTPError, e:
+        except urllib.error.HTTPError as e:
            e.headers['Status'] = "%s" % e.code
-            url_headers = dict(e.headers)
+            for key in e.headers:
                url_headers[key.lower()] = e.headers[key]
            result = e.read()
            if url_headers.get('content-encoding', None) == 'gzip':
-                result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read()
+                result = gzip.GzipFile(fileobj=BytesIO(result)).read()
        if not valid or valid(result, url_headers):
            store.set(url, post_data=data, data=result, headers=url_headers)
        else:
            raise InvalidResult(result, url_headers)
    if unicode:
        ctype = url_headers.get('content-type', '').lower()
        if 'charset' in ctype:
            encoding = ctype.split('charset=')[-1]
        else:
            encoding = detect_encoding(result)
        if not encoding:
            encoding = 'latin-1'
@ -143,9 +147,8 @@ class SQLiteCache(Cache):
        self.create()
    def connect(self):
-        conn = sqlite3.connect(self.db, timeout=10)
+        self.conn = sqlite3.connect(self.db, timeout=10)
-        conn.text_factory = str
+        return self.conn
        return conn
    def create(self):
        conn = self.connect()
@ -177,9 +180,9 @@ class SQLiteCache(Cache):
        if timeout == 0:
            return r
        if data:
-            url_hash = hashlib.sha1(url + '?' + data).hexdigest()
+            url_hash = hashlib.sha1((url + '?' + data).encode('utf-8')).hexdigest()
        else:
-            url_hash = hashlib.sha1(url).hexdigest()
+            url_hash = hashlib.sha1(url.encode('utf-8')).hexdigest()
        conn = self.connect()
        c = conn.cursor()
@ -210,11 +213,11 @@ class SQLiteCache(Cache):
    def set(self, url, post_data, data, headers):
        if post_data:
-            url_hash = hashlib.sha1(url + '?' + post_data).hexdigest()
+            url_hash = hashlib.sha1((url + '?' + post_data).encode('utf-8')).hexdigest()
        else:
-            url_hash = hashlib.sha1(url).hexdigest()
+            url_hash = hashlib.sha1(url.encode('utf-8')).hexdigest()
-        domain = ".".join(urlparse.urlparse(url)[1].split('.')[-2:])
+        domain = ".".join(urllib.parse.urlparse(url)[1].split('.')[-2:])
        conn = self.connect()
        c = conn.cursor()
@ -266,11 +269,11 @@ class FileCache(Cache):
            return r
        if data:
-            url_hash = hashlib.sha1(url + '?' + data).hexdigest()
+            url_hash = hashlib.sha1((url + '?' + data).encode('utf-8')).hexdigest()
        else:
-            url_hash = hashlib.sha1(url).hexdigest()
+            url_hash = hashlib.sha1(url.encode('utf-8')).hexdigest()
-        domain = ".".join(urlparse.urlparse(url)[1].split('.')[-2:])
+        domain = ".".join(urllib.parse.urlparse(url)[1].split('.')[-2:])
        prefix, i, f = self.files(domain, url_hash)
        if os.path.exists(i):
            with open(i) as _i:
@ -295,11 +298,11 @@ class FileCache(Cache):
    def set(self, url, post_data, data, headers):
        if post_data:
-            url_hash = hashlib.sha1(url + '?' + post_data).hexdigest()
+            url_hash = hashlib.sha1((url + '?' + post_data).encode('utf-8')).hexdigest()
        else:
-            url_hash = hashlib.sha1(url).hexdigest()
+            url_hash = hashlib.sha1(url.encode('utf-8')).hexdigest()
-        domain = ".".join(urlparse.urlparse(url)[1].split('.')[-2:])
+        domain = ".".join(urllib.parse.urlparse(url)[1].split('.')[-2:])
        prefix, i, f = self.files(domain, url_hash)
        makedirs(prefix)
--- a/ox/file.py
+++ b/ox/file.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 # GPL 2008
-from __future__ import division, with_statement
+from __future__ import division, with_statement, print_function
 import os
 import hashlib
 import re
@ -10,7 +10,7 @@ import struct
 import subprocess
 import sqlite3
-from ox.utils import json
+from .utils import json
 __all__ = ['sha1sum', 'oshash', 'avinfo', 'makedirs']
@ -283,19 +283,19 @@ def makedirs(path):
    if not os.path.exists(path):
        try:
            os.makedirs(path)
-        except OSError, e:
+        except OSError as e:
            if e.errno != 17:
                raise
 def copy_file(source, target, verbose=False):
    if verbose:
-        print 'copying', source, 'to', target
+        print('copying', source, 'to', target)
    write_path(target)
    shutil.copyfile(source, target)
 def read_file(file, verbose=False):
    if verbose:
-        print 'reading', file
+        print('reading', file)
    f = open(file)
    data = f.read()
    f.close()
@ -303,14 +303,14 @@ def read_file(file, verbose=False):
 def read_json(file, verbose=False):
    if verbose:
-        print 'reading', file
+        print('reading', file)
    with open(file) as fd:
        data = json.load(fd)
    return data
 def write_file(file, data, verbose=False):
    if verbose:
-        print 'writing', file
+        print('writing', file)
    write_path(file)
    f = open(file, 'w')
    f.write(data)
@ -319,7 +319,7 @@ def write_file(file, data, verbose=False):
 def write_image(file, image, verbose=False):
    if verbose:
-        print 'writing', file
+        print('writing', file)
    write_path(file)
    image.save(file)
@ -329,7 +329,7 @@ def write_json(file, data, ensure_ascii=True, indent=0, sort_keys=False, verbose
 def write_link(source, target, verbose=False):
    if verbose:
-        print 'linking', source, 'to', target
+        print('linking', source, 'to', target)
    write_path(target)
    if os.path.exists(target):
        os.unlink(target)
--- a/ox/fixunicode.py
+++ b/ox/fixunicode.py
@ -2,13 +2,16 @@
 # -*- coding: utf-8 -*-
 # from http://blog.lumino.so/2012/08/20/fix-unicode-mistakes-with-python/
 # MIT
 from __future__ import print_function
 import unicodedata
 from six import unichr
 __all__ = ['fix_bad_unicode']
 def fix_bad_unicode(text):
-    u"""
+    """
    Something you will find all over the place, in real-world text, is text
    that's mistakenly encoded as utf-8, decoded in some ugly format like
    latin-1 or even Windows codepage 1252, and encoded as utf-8 again.
@ -26,52 +29,53 @@ def fix_bad_unicode(text):
    auto-decode bytes for you -- then it would just create the problems it's
    supposed to fix.
-        >>> print fix_bad_unicode(u'Ãºnico')
+        >>> fix_bad_unicode(u'Ãºnico')
-        único
+        'único'
        >>> fix_bad_unicode('This text is fine already :þ')
        'This text is fine already :þ'
        >>> print fix_bad_unicode(u'This text is fine already :þ')
        This text is fine already :þ
    Because these characters often come from Microsoft products, we allow
    for the possibility that we get not just Unicode characters 128-255, but
    also Windows's conflicting idea of what characters 128-160 are.
-        >>> print fix_bad_unicode(u'This â€” should be an em dash')
+        >>> fix_bad_unicode('This â€” should be an em dash')
-        This — should be an em dash
+        'This — should be an em dash'
    We might have to deal with both Windows characters and raw control
    characters at the same time, especially when dealing with characters like
    \x81 that have no mapping in Windows.
-        >>> print fix_bad_unicode(u'This text is sad .â\x81”.')
+        >>> fix_bad_unicode('This text is sad .â\x81”.')
-        This text is sad .⁔.
+        'This text is sad .⁔.'
    This function even fixes multiple levels of badness:
-        >>> wtf = u'\xc3\xa0\xc2\xb2\xc2\xa0_\xc3\xa0\xc2\xb2\xc2\xa0'
+        >>> wtf = '\xc3\xa0\xc2\xb2\xc2\xa0_\xc3\xa0\xc2\xb2\xc2\xa0'
-        >>> print fix_bad_unicode(wtf)
+        >>> fix_bad_unicode(wtf)
-        ಠ_ಠ
+        'ಠ_ಠ'
    However, it has safeguards against fixing sequences of letters and
    punctuation that can occur in valid text:
-        >>> print fix_bad_unicode(u'not such a fan of Charlotte Brontë…”')
+        >>> fix_bad_unicode('not such a fan of Charlotte Brontë…”')
-        not such a fan of Charlotte Brontë…”
+        'not such a fan of Charlotte Brontë…”'
    Cases of genuine ambiguity can sometimes be addressed by finding other
    characters that are not double-encoding, and expecting the encoding to
    be consistent:
-        >>> print fix_bad_unicode(u'AHÅ™, the new sofa from IKEA®')
+        >>> fix_bad_unicode('AHÅ™, the new sofa from IKEA®')
-        AHÅ™, the new sofa from IKEA®
+        'AHÅ™, the new sofa from IKEA®'
    Finally, we handle the case where the text is in a single-byte encoding
    that was intended as Windows-1252 all along but read as Latin-1:
-        >>> print fix_bad_unicode(u'This text was never Unicode at all\x85')
+        >>> fix_bad_unicode('This text was never Unicode at all\x85')
-        This text was never Unicode at all…
+        'This text was never Unicode at all…'
    """
-    if not isinstance(text, unicode):
+    if not isinstance(text, str):
        raise TypeError("This isn't even decoded into Unicode yet. "
                        "Decode it first.")
    if len(text) == 0:
@ -118,7 +122,7 @@ def reinterpret_windows1252_as_utf8(wrongtext):
            altered_bytes.append(char.encode('WINDOWS_1252'))
        else:
            altered_bytes.append(char.encode('latin-1', 'replace'))
-    return ''.join(altered_bytes).decode('utf-8', 'replace')
+    return b''.join(altered_bytes).decode('utf-8', 'replace')
 def reinterpret_latin1_as_windows1252(wrongtext):
@ -130,7 +134,7 @@ def reinterpret_latin1_as_windows1252(wrongtext):
 def text_badness(text):
-    u'''
+    '''
    Look for red flags that text is encoded incorrectly:
    Obvious problems:
@ -147,12 +151,12 @@ def text_badness(text):
    - Improbable single-byte characters, such as ƒ or ¬
    - Letters in somewhat rare scripts
    '''
-    assert isinstance(text, unicode)
+    assert isinstance(text, str)
    errors = 0
    very_weird_things = 0
    weird_things = 0
    prev_letter_script = None
-    for pos in xrange(len(text)):
+    for pos in range(len(text)):
        char = text[pos]
        index = ord(char)
        if index < 256:
@ -241,7 +245,7 @@ WINDOWS_1252_GREMLINS = [
 ]
 # a list of Unicode characters that might appear in Windows-1252 text
-WINDOWS_1252_CODEPOINTS = range(256) + WINDOWS_1252_GREMLINS
+WINDOWS_1252_CODEPOINTS = list(range(256)) + WINDOWS_1252_GREMLINS
 # Rank the characters typically represented by a single byte -- that is, in
 # Latin-1 or Windows-1252 -- by how weird it would be to see them in running
@ -286,7 +290,7 @@ SINGLE_BYTE_WEIRDNESS = (
 # letters. We'll need it often.
 SINGLE_BYTE_LETTERS = [
    unicodedata.category(unichr(i)).startswith('L')
-    for i in xrange(256)
+    for i in range(256)
 ]
 # A table telling us how to interpret the first word of a letter's Unicode
--- a/ox/form.py
+++ b/ox/form.py
@ -1,17 +1,34 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 # GPL 2014
 from __future__ import with_statement, print_function
 import itertools
 import mimetools
 import mimetypes
 import random
 import sys
 __all__ = ['MultiPartForm']
 # from /usr/lib/python3.4/email/generator.py
 # Helper used by Generator._make_boundary
 _width = len(repr(sys.maxsize-1))
 _fmt = '%%0%dd' % _width
 def _make_boundary():
    # Craft a random boundary.
    token = random.randrange(sys.maxsize)
    boundary = ('=' * 15) + (_fmt % token) + '=='
    return boundary
 class MultiPartForm(object):
    """Accumulate the data to be used when posting a form."""
    def __init__(self):
        self.form_fields = []
        self.files = []
-        self.boundary = mimetools.choose_boundary()
+        self.boundary = _make_boundary()
        return
    def get_content_type(self):
--- a/ox/format.py
+++ b/ox/format.py
@ -20,7 +20,7 @@ def toAZ(num):
    >>> toAZ(1234567890)
    'CYWOQVJ'
    """
-    if num < 1: raise ValueError, "must supply a positive integer"
+    if num < 1: raise ValueError("must supply a positive integer")
    digits = string.ascii_uppercase
    az = ''
    while num != 0:
@ -62,7 +62,7 @@ def to26(q):
    >>> to26(347485647)
    'BDGKMAP'
    """
-    if q < 0: raise ValueError, "must supply a positive integer"
+    if q < 0: raise ValueError("must supply a positive integer")
    base26 = string.ascii_uppercase
    converted = []
    while q != 0:
@ -119,7 +119,7 @@ def to32(q):
    ValueError: must supply a positive integer
    """
-    if q < 0: raise ValueError, "must supply a positive integer"
+    if q < 0: raise ValueError("must supply a positive integer")
    letters = "0123456789ABCDEFGHJKMNPQRSTVWXYZ"
    converted = []
    while q != 0:
@ -206,7 +206,7 @@ def to36(q):
        ...
    ValueError: must supply a positive integer
    """
-    if q < 0: raise ValueError, "must supply a positive integer"
+    if q < 0: raise ValueError("must supply a positive integer")
    letters = "0123456789abcdefghijklmnopqrstuvwxyz"
    converted = []
    while q != 0:
--- a/ox/geo.py
+++ b/ox/geo.py
--- a/ox/html.py
+++ b/ox/html.py
@ -1,9 +1,11 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 # GPL 2008
 import sys
 import re
 import string
-from htmlentitydefs import name2codepoint
+from six.moves.html_entities import name2codepoint
 from six import unichr
 # Configuration for add_links() function
@ -23,7 +25,8 @@ link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+')
 html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE)
 hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL)
 trailing_empty_content_re = re.compile(r'(?:<p>(?:&nbsp;|\s|<br \/>)*?</p>\s*)+\Z')
-del x # Temporary variable
+if sys.version[0] == 2:
    del x # Temporary variable
 def escape(html):
    '''
@ -146,12 +149,9 @@ def decode_html(html):
    >>> decode_html('Anniversary of Daoud&apos;s Republic')
    u"Anniversary of Daoud's Republic"
    """
-    if type(html) != unicode:
+    if isinstance(html, bytes):
-        html = unicode(html)[:]
+        html = html.decode('utf-8')
    if type(html) is unicode:
    uchr = unichr
    else:
        uchr = lambda value: value > 255 and unichr(value) or chr(value)
    def entitydecode(match, uchr=uchr):
        entity = match.group(1)
        if entity == '#x80':
--- a/ox/jsonc.py
+++ b/ox/jsonc.py
@ -1,10 +1,10 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
-from __future__ import with_statement
+from __future__ import with_statement, print_function
-from js import minify
+from .js import minify
-from utils import json
+from .utils import json
 def load(f):
@ -14,7 +14,7 @@ def loads(source):
    try:
        minified = minify(source)
        return json.loads(minified)
-    except json.JSONDecodeError, e:
+    except json.JSONDecodeError as e:
        s = minified.split('\n')
        context = s[e.lineno-1][max(0, e.colno-1):e.colno+30]
        msg = e.msg + ' at ' + context
--- a/ox/movie.py
+++ b/ox/movie.py
@ -9,9 +9,9 @@ import os
 import re
 import unicodedata
-from normalize import normalize_name
+from .normalize import normalize_name
-from text import get_sort_name, find_re
+from .text import get_sort_name, find_re
-from file import EXTENSIONS
+from .file import EXTENSIONS
 __all__ = ['parse_movie_path', 'create_movie_path', 'get_oxid']
--- a/ox/net.py
+++ b/ox/net.py
@ -1,13 +1,13 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 # GPL 2008
 from __future__ import with_statement, print_function
 import os
 import gzip
 import re
-import StringIO
+from six import BytesIO
 import struct
-import urllib
+from six.moves import urllib
 import urllib2
 from chardet.universaldetector import UniversalDetector
@ -26,7 +26,7 @@ def status(url, data=None, headers=DEFAULT_HEADERS):
    try:
        f = open_url(url, data, headers)
        s = f.code
-    except urllib2.HTTPError, e:
+    except urllib.error.HTTPError as e:
        s = e.code
    return s
@ -42,46 +42,59 @@ def get_headers(url, data=None, headers=DEFAULT_HEADERS):
        f.headers['Status'] = "%s" % f.code
        headers = f.headers
        f.close()
-    except urllib2.HTTPError, e:
+    except urllib.error.HTTPError as e:
        e.headers['Status'] = "%s" % e.code
        headers = e.headers
    return dict(headers)
 def open_url(url, data=None, headers=DEFAULT_HEADERS):
    if isinstance(url, bytes):
        url = url.decode('utf-8')
    url = url.replace(' ', '%20')
-    req = urllib2.Request(url, data, headers)
+    req = urllib.request.Request(url, data, headers)
-    return urllib2.urlopen(req)
+    return urllib.request.urlopen(req)
 def read_url(url, data=None, headers=DEFAULT_HEADERS, return_headers=False, unicode=False):
    if DEBUG:
-        print 'ox.net.read_url', url
+        print('ox.net.read_url', url)
    f = open_url(url, data, headers)
    result = f.read()
    f.close()
    if f.headers.get('content-encoding', None) == 'gzip':
-        result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read()
+        result = gzip.GzipFile(fileobj=BytesIO(result)).read()
    if unicode:
        ctype = f.headers.get('content-type', '').lower()
        if 'charset' in ctype:
            encoding = ctype.split('charset=')[-1]
        else:
            encoding = detect_encoding(result)
        if not encoding:
            encoding = 'latin-1'
        result = result.decode(encoding)
    if return_headers:
        f.headers['Status'] = "%s" % f.code
-        return dict(f.headers), result
+        headers = {}
        for key in f.headers:
            headers[key.lower()] = f.headers[key]
        return headers, result
    return result
 def detect_encoding(data):
-    data_lower = data.lower()
+    data_lower = data.lower().decode('utf-8', 'ignore')
-    charset = re.compile('content="text/html; charset=(.*?)"').findall(data)
+    charset = re.compile('content="text/html; charset=(.*?)"').findall(data_lower)
    if not charset:
-        charset = re.compile('meta charset="(.*?)"').findall(data)
+        charset = re.compile('meta charset="(.*?)"').findall(data_lower)
    if charset:
        return charset[0].lower()
    detector = UniversalDetector()
-    for line in data.split('\n'):
+    p = 0
-        detector.feed(line)
+    l = len(data)
    s = 1024
    while p < l:
        detector.feed(data[p:p+s])
        if detector.done:
            break
        p += s
    detector.close()
    return detector.result['encoding']
@ -97,9 +110,9 @@ def save_url(url, filename, overwrite=False):
 def oshash(url):
    def get_size(url):
-        req = urllib2.Request(url, headers=DEFAULT_HEADERS.copy())
+        req = urllib.request.Request(url, headers=DEFAULT_HEADERS.copy())
        req.get_method = lambda : 'HEAD'
-        u = urllib2.urlopen(req)
+        u = urllib.request.urlopen(req)
        if u.code != 200 or not 'Content-Length' in u.headers:
            raise IOError
        return int(u.headers['Content-Length'])
@ -107,8 +120,8 @@ def oshash(url):
    def get_range(url, start, end):
        headers = DEFAULT_HEADERS.copy()
        headers['Range'] = 'bytes=%s-%s' % (start, end)
-        req = urllib2.Request(url, headers=headers)
+        req = urllib.request.Request(url, headers=headers)
-        u = urllib2.urlopen(req)
+        u = urllib.request.urlopen(req)
        return u.read() 
    try:
--- a/ox/oembed.py
+++ b/ox/oembed.py
@ -1,9 +1,10 @@
 # -*- coding: utf-8 -*-
 # ci:si:et:sw=4:sts=4:ts=4
 import re
-from text import find_re
+
-import cache
+from . import cache
-from utils import json, ET
+from .text import find_re
 from .utils import json, ET
 def get_embed_code(url, maxwidth=None, maxheight=None):
    embed = {}
--- a/ox/srt.py
+++ b/ox/srt.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
-from __future__ import with_statement, division
+from __future__ import with_statement, division, print_function
 import chardet
 import re
 import codecs
@ -71,7 +71,7 @@ def load(filename, offset=0):
        try:
            data = unicode(data, 'latin-1')
        except:
-            print "failed to detect encoding, giving up"
+            print("failed to detect encoding, giving up")
            return srt
    data = data.replace('\r\n', '\n')
--- a/ox/torrent/init.py
+++ b/ox/torrent/init.py
@ -6,7 +6,7 @@ from threading import Event
 from hashlib import sha1
 import os
-from bencode import bencode, bdecode
+from .bencode import bencode, bdecode
 __all__ = ['create_torrent', 'get_info_hash', 'get_torrent_info', 'get_files', 'get_torrent_size']
@ -24,9 +24,8 @@ def get_info_hash(torrentFile):
    return sha1(bencode(info)).hexdigest()
 def get_torrent_info(data=None, file=None):
    from bencode import bencode
    if file:
-        if isinstance(file, unicode):
+        if not isinstance(file, bytes):
            file = file.encode('utf-8')
        with open(file, 'rb') as f:
            data = f.read()
@ -36,7 +35,7 @@ def get_torrent_info(data=None, file=None):
    metainfo = bdecode(data)
    info = metainfo['info']
    piece_length = info['piece length']
-    if info.has_key('length'):
+    if 'length' in info:
        # let's assume we just have one file
        file_length = info['length']
    else:
--- a/ox/web/init.py
+++ b/ox/web/init.py
@ -2,8 +2,8 @@
 # encoding: utf-8
 __version__ = '1.0.0'
-import imdb
+from . import imdb
-import wikipedia
+from . import wikipedia
-import google
+from . import google
-import piratecinema
+from . import piratecinema
-import oxdb
+from . import oxdb
--- a/ox/web/allmovie.py
+++ b/ox/web/allmovie.py
@ -1,7 +1,6 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 import re
 import time
 from ox import strip_tags, find_re
 from ox.cache import read_url
--- a/ox/web/amazon.py
+++ b/ox/web/amazon.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 import re
-from urllib import quote
+from six.moves.urllib.parse import quote
 from ox import find_re, strip_tags, decode_html
 from ox.cache import read_url
--- a/ox/web/arsenalberlin.py
+++ b/ox/web/arsenalberlin.py
@ -1,14 +1,11 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 from datetime import datetime
 from urllib import urlencode
 import json
 import os
 import re
-from ox import find_re, strip_tags, decode_html
+from ox import find_re, strip_tags
 from ox.cache import read_url
 from ox.net import open_url
 def get_data(id, language='en'):
    if language == 'de':
@ -57,7 +54,7 @@ def backup(filename):
            data = json.load(f)
    else:
        data = {}
-    start = ids and max(map(int, data)) or 1
+    start = max(map(int, data)) or 1
    for i in range(start, 11872):
        info = get_data(i)
        if info:
--- a/ox/web/criterion.py
+++ b/ox/web/criterion.py
@ -5,7 +5,7 @@ import re
 import ox.cache
 from ox.cache import read_url
 from ox.html import strip_tags
-from ox.text import find_re, remove_special_characters
+from ox.text import find_re
 import imdb
--- a/ox/web/dailymotion.py
+++ b/ox/web/dailymotion.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 import re
-from urllib import unquote
+from six.moves.urllib.parse import unquote
 from ox.cache import read_url
--- a/ox/web/duckduckgo.py
+++ b/ox/web/duckduckgo.py
@ -1,17 +1,17 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 import re
-import urllib
+
 from six.moves import urllib
 import ox
 from ox import strip_tags, decode_html
 from ox.utils import json
 from ox.cache import read_url
 def find(query, timeout=ox.cache.cache_timeout):
-    if isinstance(query, unicode):
+    if not isinstance(query, bytes):
        query = query.encode('utf-8')
-    params = urllib.urlencode({'q': query})
+    params = urllib.parse.urlencode({'q': query})
    url = 'http://duckduckgo.com/html/?' + params
    data = read_url(url, timeout=timeout).decode('utf-8')
    results = []
--- a/ox/web/google.py
+++ b/ox/web/google.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 import re
-import urllib
+from six.moves import urllib
 import ox
 from ox import strip_tags, decode_html
@ -13,9 +13,9 @@ def read_url(url, data=None, headers=ox.net.DEFAULT_HEADERS, timeout=DEFAULT_TIM
    return ox.cache.read_url(url, data, headers, timeout, unicode=True)
 def quote_plus(s):
-    if not isinstance(s, str):
+    if not isinstance(s, bytes):
        s = s.encode('utf-8')
-    return urllib.quote_plus(s)
+    return urllib.parse.quote_plus(s)
 def find(query, max_results=DEFAULT_MAX_RESULTS, timeout=DEFAULT_TIMEOUT):
    """
--- a/ox/web/imdb.py
+++ b/ox/web/imdb.py
@ -1,23 +1,27 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
-import urllib
+from __future__ import print_function
 import re
 import time
 import unicodedata
-import ox
+from six.moves import urllib
-from ox import find_re, strip_tags
+from six import string_types
 import ox.cache
 from siteparser import SiteParser
 import duckduckgo
 from .. import find_re, strip_tags, decode_html
 from .. import cache
 from . siteparser import SiteParser
 from . import duckduckgo
 from ..utils import datetime
 from ..geo import normalize_country_name
-def read_url(url, data=None, headers=ox.cache.DEFAULT_HEADERS, timeout=ox.cache.cache_timeout, valid=None, unicode=False):
+def read_url(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None, unicode=False):
    headers = headers.copy()
-    return ox.cache.read_url(url, data, headers, timeout, unicode=unicode)
+    return cache.read_url(url, data, headers, timeout, unicode=unicode)
 def get_url(id):
    return "http://www.imdb.com/title/tt%s/" % id
@ -49,7 +53,7 @@ class Imdb(SiteParser):
            'page': 'business',
            're': [
                '<h5>Budget</h5>\s*?\$(.*?)<br',
-                lambda data: find_re(ox.decode_html(data).replace(',', ''), '\d+')
+                lambda data: find_re(decode_html(data).replace(',', ''), '\d+')
            ],
            'type': 'int'
        },
@ -211,7 +215,7 @@ class Imdb(SiteParser):
            'page': 'releaseinfo',
            're': [
                '<td class="release_date">(.*?)</td>',
-                ox.strip_tags,
+                strip_tags,
            ],
            'type': 'list'
        },
@ -326,7 +330,7 @@ class Imdb(SiteParser):
        if 'alternativeTitles' in self:
            if len(self['alternativeTitles']) == 2 and \
-               isinstance(self['alternativeTitles'][0], basestring):
+               isinstance(self['alternativeTitles'][0], string_types):
               self['alternativeTitles'] = [self['alternativeTitles']]
        #normalize country names
@ -472,7 +476,7 @@ class Imdb(SiteParser):
                            if c:
                                alt[title].append(c)
            self['alternativeTitles'] = []
-            for t in sorted(alt, lambda a, b: cmp(sorted(alt[a]), sorted(alt[b]))):
+            for t in sorted(alt, key=lambda a: sorted(alt[a])):
                if alt[t]:
                    countries = sorted([normalize_country_name(c) or c for c in alt[t]])
                    self['alternativeTitles'].append((t, countries))
@ -492,7 +496,7 @@ class Imdb(SiteParser):
        if 'votes' in self: self['votes'] = self['votes'].replace(',', '')
        if 'cast' in self:
-            if isinstance(self['cast'][0], basestring):
+            if isinstance(self['cast'][0], string_types):
                self['cast'] = [self['cast']]
            self['actor'] = [c[0] for c in self['cast']]
            def cleanup_character(c):
@ -503,10 +507,12 @@ class Imdb(SiteParser):
        if 'connections' in self:
            cc={}
-            if len(self['connections']) == 3 and isinstance(self['connections'][0], basestring):
+            if len(self['connections']) == 3 and isinstance(self['connections'][0], string_types):
                self['connections'] = [self['connections']]
            for rel, data, _ in self['connections']:
-                #cc[unicode(rel)] = re.compile('<a href="/title/tt(\d{7})/">(.*?)</a>').findall(data)
+                if isinstance(rel, bytes):
                    rel = rel.decode('utf-8')
                #cc[rel] = re.compile('<a href="/title/tt(\d{7})/">(.*?)</a>').findall(data)
                def get_conn(c):
                    r = {
                        'id': c[0],
@ -516,14 +522,14 @@ class Imdb(SiteParser):
                    if len(description) == 2 and description[-1].strip() != '-':
                        r['description'] = description[-1].strip()
                    return r
-                cc[unicode(rel)] = map(get_conn, re.compile('<a href="/title/tt(\d{7})/">(.*?)</a>(.*?)<\/div', re.DOTALL).findall(data))
+                cc[rel] = list(map(get_conn, re.compile('<a href="/title/tt(\d{7})/">(.*?)</a>(.*?)<\/div', re.DOTALL).findall(data)))
            self['connections'] = cc
        for key in ('country', 'genre'):
            if key in self:
-                self[key] = filter(lambda x: x.lower() != 'home', self[key])
+                self[key] = list(filter(lambda x: x.lower() != 'home', self[key]))
        #0092999
        if '_director' in self:
            if 'series' in self or 'isSeries' in self:
@ -590,8 +596,8 @@ class Imdb(SiteParser):
            if key in self:
                if isinstance(self[key][0], list):
                    self[key] = [i[0] for i in self[key] if i]
-                self[key] = sorted(list(set(self[key])),
+                self[key] = sorted(list(set(self[key])), key=lambda a: self[key].index(a))
-                                   lambda a, b: self[key].index(a) - self[key].index(b))
+
        if 'budget' in self and 'gross' in self:
            self['profit'] = self['gross'] - self['budget']
@ -655,7 +661,7 @@ def get_movie_by_title(title, timeout=-1):
    u'0866567'
    '''
    params = {'s':'tt','q': title}
-    if isinstance(title, unicode):
+    if not isinstance(title, bytes):
        try:
            params['q'] = unicodedata.normalize('NFKC', params['q']).encode('latin-1')
        except:
@ -731,7 +737,7 @@ def get_movie_id(title, director='', year='', timeout=-1):
    if year:
        params['q'] = u'"%s (%s)" %s' % (title, year, director)
    google_query = "site:imdb.com %s" % params['q']
-    if isinstance(params['q'], unicode):
+    if not isinstance(params['q'], bytes):
        try:
            params['q'] = unicodedata.normalize('NFKC', params['q']).encode('latin-1')
        except:
@ -775,7 +781,7 @@ def get_movie_poster(imdbId):
    info = ImdbCombined(imdbId)
    if 'posterId' in info:
        url = "http://www.imdb.com/media/rm%s/tt%s" % (info['posterId'], imdbId)
-        data = read_url(url)
+        data = read_url(url).decode('utf-8', 'ignore')
        poster = find_re(data, 'img.*?id="primary-img".*?src="(.*?)"')
        return poster
    elif 'series' in info:
@ -787,11 +793,11 @@ def get_episodes(imdbId, season=None):
    url = 'http://www.imdb.com/title/tt%s/episodes' % imdbId
    if season:
        url += '?season=%d' % season
-        data = ox.cache.read_url(url)
+        data = cache.read_url(url)
        for e in re.compile('<div data-const="tt(\d{7})".*?>.*?<div>S(\d+), Ep(\d+)<\/div>\n<\/div>', re.DOTALL).findall(data):
            episodes['S%02dE%02d' %(int(e[1]), int(e[2]))] = e[0]
    else:
-        data = ox.cache.read_url(url)
+        data = cache.read_url(url)
        match = re.compile('<strong>Season (\d+)</strong>').findall(data)
        if match:
            for season in range(1, int(match[0]) + 1):
@ -800,7 +806,7 @@ def get_episodes(imdbId, season=None):
 def max_votes():
    url = 'http://www.imdb.com/search/title?num_votes=500000,&sort=num_votes,desc'
-    data = ox.cache.read_url(url)
+    data = cache.read_url(url)
    votes = max([int(v.replace(',', ''))
        for v in re.compile('<td class="sort_col">([\d,]+)</td>').findall(data)])
    return votes
@ -810,6 +816,6 @@ def guess(title, director='', timeout=-1):
 if __name__ == "__main__":
    import json
-    print json.dumps(Imdb('0306414'), indent=2)
+    print(json.dumps(Imdb('0306414'), indent=2))
    #print json.dumps(Imdb('0133093'), indent=2)
--- a/ox/web/piratecinema.py
+++ b/ox/web/piratecinema.py
@ -1,5 +1,7 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 from __future__ import print_function
 import re
 from ox.net import read_url
@ -13,5 +15,5 @@ def get_poster_url(id):
    return ''
 if __name__ == '__main__':
-    print get_poster_url('0749451')
+    print(get_poster_url('0749451'))
--- a/ox/web/siteparser.py
+++ b/ox/web/siteparser.py
@ -2,22 +2,24 @@
 # vi:si:et:sw=4:sts=4:ts=4
 import re
 from six import string_types
 from ..cache import read_url
-from .. import strip_tags, decode_html
+from .. import decode_html
 from ..utils import datetime
 def cleanup(key, data, data_type):
    if data:
-        if isinstance(data[0], basestring):
+        if isinstance(data[0], string_types):
            #FIXME: some types need strip_tags
            #data = [strip_tags(decode_html(p)).strip() for p in data]
            data = [decode_html(p).strip() for p in data]
        elif isinstance(data[0], list) or isinstance(data[0], tuple):
            data = [cleanup(key, p, data_type) for p in data]
-        while len(data) == 1 and not isinstance(data, basestring):
+        while len(data) == 1 and not isinstance(data, string_types):
            data = data[0]
-        if data_type == 'list' and isinstance(data, basestring):
+        if data_type == 'list' and isinstance(data, string_types):
            data = [data, ]
    elif data_type != 'list':
        data = ''
@ -40,7 +42,7 @@ class SiteParser(dict):
        for key in self.regex:
            url = self.get_url(self.regex[key]['page'])
            data = self.read_url(url, timeout)
-            if isinstance(self.regex[key]['re'], basestring):
+            if isinstance(self.regex[key]['re'], string_types):
                data = re.compile(self.regex[key]['re'], re.DOTALL).findall(data)
                data = cleanup(key, data, self.regex[key]['type'])
            elif callable(self.regex[key]['re']):
@ -51,7 +53,7 @@ class SiteParser(dict):
                        f = r
                    else:
                        f = re.compile(r, re.DOTALL).findall
-                    if isinstance(data, basestring):
+                    if isinstance(data, string_types):
                        data = f(data)
                    else:
                        data = [f(d) for d in data]
--- a/ox/web/wikipedia.py
+++ b/ox/web/wikipedia.py
@ -1,11 +1,14 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 from __future__ import print_function
 import re
-from urllib import urlencode
+
 from six.moves import urllib
 from ox.utils import json
 from ox.cache import read_url
-from ox import find_re, decode_html
+from ox import find_re
 def get_id(url):
@ -138,11 +141,11 @@ def get_allmovie_id(wikipedia_url):
 def find(query, max_results=10):
    query = {'action': 'query', 'list':'search', 'format': 'json',
             'srlimit': max_results, 'srwhat': 'text', 'srsearch': query.encode('utf-8')}
-    url = "http://en.wikipedia.org/w/api.php?" + urlencode(query)
+    url = "http://en.wikipedia.org/w/api.php?" + urllib.parse.urlencode(query)
    data = read_url(url)
    if not data:
        data  = read_url(url, timeout=0)
-    result = json.loads(data)
+    result = json.loads(data.decode('utf-8'))
    results = []
    if result and 'query' in result:
        for r in result['query']['search']:
--- a/setup.py
+++ b/setup.py
@ -36,15 +36,16 @@ setup(
    download_url="http://code.0x2620.org/python-ox/download",
    license="GPLv3",
    packages=['ox', 'ox.django', 'ox.django.api', 'ox.torrent', 'ox.web'],
-    install_requires=['chardet', 'feedparser'],
+    install_requires=['six', 'chardet', 'feedparser'],
    keywords = [
    ],
    classifiers = [
        'Operating System :: OS Independent',
        'Programming Language :: Python',
        'Programming Language :: Python :: 2',
        'Programming Language :: Python :: 2.6',
        'Programming Language :: Python :: 2.7',
        'Programming Language :: Python :: 3',
        'Programming Language :: Python :: 3.4',
        'Topic :: Software Development :: Libraries :: Python Modules',
    ],
 )