use six to support python 2 and 3

2014-09-30 21:04:46 +02:00 · 2014-09-30 21:04:46 +02:00 · d4d09b56b6
commit d4d09b56b6
parent 1b1dcf1c58
28 changed files with 1730 additions and 1678 deletions
--- a/ox/init.py
+++ b/ox/init.py
@ -3,28 +3,32 @@
 # GPL 2011
 __version__ = '2.1.1'

-import cache
-import js
-import jsonc
-import net
-import srt
-import utils
+from . import cache
+from . import js
+from . import jsonc
+from . import net
+from . import srt
+from . import utils

-from api import *
-from file import *
-from form import *
-from format import *
-from geo import *
-from html import *
+from .api import *
+from .file import *
+from .form import *
+from .format import *
+from .geo import *
+from .html import *
 #image depends on PIL, not easy enough to instal on osx
 try:
-    from image import *
+    from .image import *
 except:
    pass
-from location import *
-from movie import *
-from normalize import *
-from oembed import *
-from text import *
-from torrent import *
-from fixunicode import *
+from .location import *
+from .movie import *
+from .normalize import *
+from .oembed import *
+from .text import *
+#currently broken in python3
+try:
+    from .torrent import *
+except:
+    pass
+from .fixunicode import *
--- a/ox/api.py
+++ b/ox/api.py
@ -3,10 +3,10 @@
 # GPL 2011
 from __future__ import with_statement

-import cookielib
+from six.moves import http_cookiejar as cookielib
 import gzip
-import StringIO
-import urllib2
+from six import StringIO
+from six.moves import urllib
 from types import MethodType

 from . import __version__
@ -29,8 +29,8 @@ class API(object):
            self._cj = cj
        else:
            self._cj = cookielib.CookieJar()
-        self._opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self._cj),
-                                            urllib2.HTTPHandler(debuglevel=self.debuglevel))
+        self._opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self._cj),
+                                            urllib.HTTPHandler(debuglevel=self.debuglevel))
        self._opener.addheaders = [
            ('User-Agent', '%s/%s' % (self.__name__, self.__version__))
        ]
@ -64,7 +64,7 @@ class API(object):
        result = {}
        try:
            body = str(form)
-            request = urllib2.Request(str(url))
+            request = urllib.reuqest.Request(str(url))
            request.add_header('Content-type', form.get_content_type())
            request.add_header('Content-Length', str(len(body)))
            request.add_header('Accept-Encoding', 'gzip, deflate')
@ -75,7 +75,7 @@ class API(object):
                result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read()
            result = result.decode('utf-8')
            return json.loads(result)
-        except urllib2.HTTPError, e:
+        except urllib.error.HTTPError as e:
            if self.DEBUG:
                import webbrowser
                if e.code >= 500:
--- a/ox/cache.py
+++ b/ox/cache.py
@ -1,24 +1,22 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 # GPL 2011
-from __future__ import with_statement
+from __future__ import with_statement, print_function

 import gzip
 import zlib
 import hashlib
 import os
-import StringIO
+from six import BytesIO
 import time
-import urlparse
-import urllib2
+from six.moves import urllib
 import sqlite3

-import chardet
-from utils import json
+from .utils import json
 from .file import makedirs

-import net
-from net import DEFAULT_HEADERS, detect_encoding
+from . import net
+from .net import DEFAULT_HEADERS, detect_encoding

 cache_timeout = 30*24*60*60 # default is 30 days

@ -69,7 +67,7 @@ class InvalidResult(Exception):
        self.headers = headers

 def _fix_unicode_url(url):
-    if isinstance(url, unicode):
+    if not isinstance(url, bytes):
        url = url.encode('utf-8')
    return url

@ -83,25 +81,31 @@ def read_url(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout, val
                  if this function fails, InvalidResult will be raised deal with it in your code 
    '''
    if net.DEBUG:
-        print 'ox.cache.read_url', url
+        print('ox.cache.read_url', url)
    #FIXME: send last-modified / etag from cache and only update if needed
-    url = _fix_unicode_url(url)
+    #url = _fix_unicode_url(url)
    result = store.get(url, data, headers, timeout)
+    url_headers = {}
    if not result:
        try:
            url_headers, result = net.read_url(url, data, headers, return_headers=True)
-        except urllib2.HTTPError, e:
+        except urllib.error.HTTPError as e:
            e.headers['Status'] = "%s" % e.code
-            url_headers = dict(e.headers)
+            for key in e.headers:
+                url_headers[key.lower()] = e.headers[key]
            result = e.read()
            if url_headers.get('content-encoding', None) == 'gzip':
-                result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read()
+                result = gzip.GzipFile(fileobj=BytesIO(result)).read()
        if not valid or valid(result, url_headers):
            store.set(url, post_data=data, data=result, headers=url_headers)
        else:
            raise InvalidResult(result, url_headers)
    if unicode:
-        encoding = detect_encoding(result)
+        ctype = url_headers.get('content-type', '').lower()
+        if 'charset' in ctype:
+            encoding = ctype.split('charset=')[-1]
+        else:
+            encoding = detect_encoding(result)
        if not encoding:
            encoding = 'latin-1'
        result = result.decode(encoding)
@ -143,9 +147,8 @@ class SQLiteCache(Cache):
        self.create()

    def connect(self):
-        conn = sqlite3.connect(self.db, timeout=10)
-        conn.text_factory = str
-        return conn
+        self.conn = sqlite3.connect(self.db, timeout=10)
+        return self.conn

    def create(self):
        conn = self.connect()
@ -177,9 +180,9 @@ class SQLiteCache(Cache):
        if timeout == 0:
            return r
        if data:
-            url_hash = hashlib.sha1(url + '?' + data).hexdigest()
+            url_hash = hashlib.sha1((url + '?' + data).encode('utf-8')).hexdigest()
        else:
-            url_hash = hashlib.sha1(url).hexdigest()
+            url_hash = hashlib.sha1(url.encode('utf-8')).hexdigest()

        conn = self.connect()
        c = conn.cursor()
@ -210,11 +213,11 @@ class SQLiteCache(Cache):

    def set(self, url, post_data, data, headers):
        if post_data:
-            url_hash = hashlib.sha1(url + '?' + post_data).hexdigest()
+            url_hash = hashlib.sha1((url + '?' + post_data).encode('utf-8')).hexdigest()
        else:
-            url_hash = hashlib.sha1(url).hexdigest()
+            url_hash = hashlib.sha1(url.encode('utf-8')).hexdigest()

-        domain = ".".join(urlparse.urlparse(url)[1].split('.')[-2:])
+        domain = ".".join(urllib.parse.urlparse(url)[1].split('.')[-2:])

        conn = self.connect()
        c = conn.cursor()
@ -266,11 +269,11 @@ class FileCache(Cache):
            return r

        if data:
-            url_hash = hashlib.sha1(url + '?' + data).hexdigest()
+            url_hash = hashlib.sha1((url + '?' + data).encode('utf-8')).hexdigest()
        else:
-            url_hash = hashlib.sha1(url).hexdigest()
+            url_hash = hashlib.sha1(url.encode('utf-8')).hexdigest()

-        domain = ".".join(urlparse.urlparse(url)[1].split('.')[-2:])
+        domain = ".".join(urllib.parse.urlparse(url)[1].split('.')[-2:])
        prefix, i, f = self.files(domain, url_hash)
        if os.path.exists(i):
            with open(i) as _i:
@ -295,11 +298,11 @@ class FileCache(Cache):

    def set(self, url, post_data, data, headers):
        if post_data:
-            url_hash = hashlib.sha1(url + '?' + post_data).hexdigest()
+            url_hash = hashlib.sha1((url + '?' + post_data).encode('utf-8')).hexdigest()
        else:
-            url_hash = hashlib.sha1(url).hexdigest()
+            url_hash = hashlib.sha1(url.encode('utf-8')).hexdigest()

-        domain = ".".join(urlparse.urlparse(url)[1].split('.')[-2:])
+        domain = ".".join(urllib.parse.urlparse(url)[1].split('.')[-2:])
        prefix, i, f = self.files(domain, url_hash)
        makedirs(prefix)

--- a/ox/file.py
+++ b/ox/file.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 # GPL 2008
-from __future__ import division, with_statement
+from __future__ import division, with_statement, print_function
 import os
 import hashlib
 import re
@ -10,7 +10,7 @@ import struct
 import subprocess
 import sqlite3

-from ox.utils import json
+from .utils import json

 __all__ = ['sha1sum', 'oshash', 'avinfo', 'makedirs']

@ -283,19 +283,19 @@ def makedirs(path):
    if not os.path.exists(path):
        try:
            os.makedirs(path)
-        except OSError, e:
+        except OSError as e:
            if e.errno != 17:
                raise

 def copy_file(source, target, verbose=False):
    if verbose:
-        print 'copying', source, 'to', target
+        print('copying', source, 'to', target)
    write_path(target)
    shutil.copyfile(source, target)

 def read_file(file, verbose=False):
    if verbose:
-        print 'reading', file
+        print('reading', file)
    f = open(file)
    data = f.read()
    f.close()
@ -303,14 +303,14 @@ def read_file(file, verbose=False):

 def read_json(file, verbose=False):
    if verbose:
-        print 'reading', file
+        print('reading', file)
    with open(file) as fd:
        data = json.load(fd)
    return data

 def write_file(file, data, verbose=False):
    if verbose:
-        print 'writing', file
+        print('writing', file)
    write_path(file)
    f = open(file, 'w')
    f.write(data)
@ -319,7 +319,7 @@ def write_file(file, data, verbose=False):

 def write_image(file, image, verbose=False):
    if verbose:
-        print 'writing', file
+        print('writing', file)
    write_path(file)
    image.save(file)

@ -329,7 +329,7 @@ def write_json(file, data, ensure_ascii=True, indent=0, sort_keys=False, verbose

 def write_link(source, target, verbose=False):
    if verbose:
-        print 'linking', source, 'to', target
+        print('linking', source, 'to', target)
    write_path(target)
    if os.path.exists(target):
        os.unlink(target)
--- a/ox/fixunicode.py
+++ b/ox/fixunicode.py
@ -2,13 +2,16 @@
 # -*- coding: utf-8 -*-
 # from http://blog.lumino.so/2012/08/20/fix-unicode-mistakes-with-python/
 # MIT
+from __future__ import print_function

 import unicodedata

+from six import unichr
+
 __all__ = ['fix_bad_unicode']

 def fix_bad_unicode(text):
-    u"""
+    """
    Something you will find all over the place, in real-world text, is text
    that's mistakenly encoded as utf-8, decoded in some ugly format like
    latin-1 or even Windows codepage 1252, and encoded as utf-8 again.
@ -26,52 +29,53 @@ def fix_bad_unicode(text):
    auto-decode bytes for you -- then it would just create the problems it's
    supposed to fix.

-        >>> print fix_bad_unicode(u'Ãºnico')
-        único
+        >>> fix_bad_unicode(u'Ãºnico')
+        'único'
+
+        >>> fix_bad_unicode('This text is fine already :þ')
+        'This text is fine already :þ'

-        >>> print fix_bad_unicode(u'This text is fine already :þ')
-        This text is fine already :þ

    Because these characters often come from Microsoft products, we allow
    for the possibility that we get not just Unicode characters 128-255, but
    also Windows's conflicting idea of what characters 128-160 are.

-        >>> print fix_bad_unicode(u'This â€” should be an em dash')
-        This — should be an em dash
+        >>> fix_bad_unicode('This â€” should be an em dash')
+        'This — should be an em dash'

    We might have to deal with both Windows characters and raw control
    characters at the same time, especially when dealing with characters like
    \x81 that have no mapping in Windows.

-        >>> print fix_bad_unicode(u'This text is sad .â\x81”.')
-        This text is sad .⁔.
+        >>> fix_bad_unicode('This text is sad .â\x81”.')
+        'This text is sad .⁔.'

    This function even fixes multiple levels of badness:

-        >>> wtf = u'\xc3\xa0\xc2\xb2\xc2\xa0_\xc3\xa0\xc2\xb2\xc2\xa0'
-        >>> print fix_bad_unicode(wtf)
-        ಠ_ಠ
+        >>> wtf = '\xc3\xa0\xc2\xb2\xc2\xa0_\xc3\xa0\xc2\xb2\xc2\xa0'
+        >>> fix_bad_unicode(wtf)
+        'ಠ_ಠ'

    However, it has safeguards against fixing sequences of letters and
    punctuation that can occur in valid text:

-        >>> print fix_bad_unicode(u'not such a fan of Charlotte Brontë…”')
-        not such a fan of Charlotte Brontë…”
+        >>> fix_bad_unicode('not such a fan of Charlotte Brontë…”')
+        'not such a fan of Charlotte Brontë…”'

    Cases of genuine ambiguity can sometimes be addressed by finding other
    characters that are not double-encoding, and expecting the encoding to
    be consistent:

-        >>> print fix_bad_unicode(u'AHÅ™, the new sofa from IKEA®')
-        AHÅ™, the new sofa from IKEA®
+        >>> fix_bad_unicode('AHÅ™, the new sofa from IKEA®')
+        'AHÅ™, the new sofa from IKEA®'

    Finally, we handle the case where the text is in a single-byte encoding
    that was intended as Windows-1252 all along but read as Latin-1:

-        >>> print fix_bad_unicode(u'This text was never Unicode at all\x85')
-        This text was never Unicode at all…
+        >>> fix_bad_unicode('This text was never Unicode at all\x85')
+        'This text was never Unicode at all…'
    """
-    if not isinstance(text, unicode):
+    if not isinstance(text, str):
        raise TypeError("This isn't even decoded into Unicode yet. "
                        "Decode it first.")
    if len(text) == 0:
@ -118,7 +122,7 @@ def reinterpret_windows1252_as_utf8(wrongtext):
            altered_bytes.append(char.encode('WINDOWS_1252'))
        else:
            altered_bytes.append(char.encode('latin-1', 'replace'))
-    return ''.join(altered_bytes).decode('utf-8', 'replace')
+    return b''.join(altered_bytes).decode('utf-8', 'replace')


 def reinterpret_latin1_as_windows1252(wrongtext):
@ -130,7 +134,7 @@ def reinterpret_latin1_as_windows1252(wrongtext):


 def text_badness(text):
-    u'''
+    '''
    Look for red flags that text is encoded incorrectly:

    Obvious problems:
@ -147,12 +151,12 @@ def text_badness(text):
    - Improbable single-byte characters, such as ƒ or ¬
    - Letters in somewhat rare scripts
    '''
-    assert isinstance(text, unicode)
+    assert isinstance(text, str)
    errors = 0
    very_weird_things = 0
    weird_things = 0
    prev_letter_script = None
-    for pos in xrange(len(text)):
+    for pos in range(len(text)):
        char = text[pos]
        index = ord(char)
        if index < 256:
@ -241,7 +245,7 @@ WINDOWS_1252_GREMLINS = [
 ]

 # a list of Unicode characters that might appear in Windows-1252 text
-WINDOWS_1252_CODEPOINTS = range(256) + WINDOWS_1252_GREMLINS
+WINDOWS_1252_CODEPOINTS = list(range(256)) + WINDOWS_1252_GREMLINS

 # Rank the characters typically represented by a single byte -- that is, in
 # Latin-1 or Windows-1252 -- by how weird it would be to see them in running
@ -286,7 +290,7 @@ SINGLE_BYTE_WEIRDNESS = (
 # letters. We'll need it often.
 SINGLE_BYTE_LETTERS = [
    unicodedata.category(unichr(i)).startswith('L')
-    for i in xrange(256)
+    for i in range(256)
 ]

 # A table telling us how to interpret the first word of a letter's Unicode
--- a/ox/form.py
+++ b/ox/form.py
@ -1,17 +1,34 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+# GPL 2014
+from __future__ import with_statement, print_function
+
 import itertools
-import mimetools
 import mimetypes
+import random
+import sys


 __all__ = ['MultiPartForm']

+# from /usr/lib/python3.4/email/generator.py
+# Helper used by Generator._make_boundary
+_width = len(repr(sys.maxsize-1))
+_fmt = '%%0%dd' % _width
+
+def _make_boundary():
+    # Craft a random boundary.
+    token = random.randrange(sys.maxsize)
+    boundary = ('=' * 15) + (_fmt % token) + '=='
+    return boundary
+
 class MultiPartForm(object):
    """Accumulate the data to be used when posting a form."""

    def __init__(self):
        self.form_fields = []
        self.files = []
-        self.boundary = mimetools.choose_boundary()
+        self.boundary = _make_boundary()
        return
    
    def get_content_type(self):
--- a/ox/format.py
+++ b/ox/format.py
@ -20,7 +20,7 @@ def toAZ(num):
    >>> toAZ(1234567890)
    'CYWOQVJ'
    """
-    if num < 1: raise ValueError, "must supply a positive integer"
+    if num < 1: raise ValueError("must supply a positive integer")
    digits = string.ascii_uppercase
    az = ''
    while num != 0:
@ -62,7 +62,7 @@ def to26(q):
    >>> to26(347485647)
    'BDGKMAP'
    """
-    if q < 0: raise ValueError, "must supply a positive integer"
+    if q < 0: raise ValueError("must supply a positive integer")
    base26 = string.ascii_uppercase
    converted = []
    while q != 0:
@ -119,7 +119,7 @@ def to32(q):
    ValueError: must supply a positive integer
    """

-    if q < 0: raise ValueError, "must supply a positive integer"
+    if q < 0: raise ValueError("must supply a positive integer")
    letters = "0123456789ABCDEFGHJKMNPQRSTVWXYZ"
    converted = []
    while q != 0:
@ -206,7 +206,7 @@ def to36(q):
        ...
    ValueError: must supply a positive integer
    """
-    if q < 0: raise ValueError, "must supply a positive integer"
+    if q < 0: raise ValueError("must supply a positive integer")
    letters = "0123456789abcdefghijklmnopqrstuvwxyz"
    converted = []
    while q != 0:
--- a/ox/geo.py
+++ b/ox/geo.py
--- a/ox/html.py
+++ b/ox/html.py
@ -1,9 +1,11 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 # GPL 2008
+import sys
 import re
 import string
-from htmlentitydefs import name2codepoint
+from six.moves.html_entities import name2codepoint
+from six import unichr


 # Configuration for add_links() function
@ -23,7 +25,8 @@ link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+')
 html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE)
 hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL)
 trailing_empty_content_re = re.compile(r'(?:<p>(?:&nbsp;|\s|<br \/>)*?</p>\s*)+\Z')
-del x # Temporary variable
+if sys.version[0] == 2:
+    del x # Temporary variable

 def escape(html):
    '''
@ -146,12 +149,9 @@ def decode_html(html):
    >>> decode_html('Anniversary of Daoud&apos;s Republic')
    u"Anniversary of Daoud's Republic"
    """
-    if type(html) != unicode:
-        html = unicode(html)[:]
-    if type(html) is unicode:
-        uchr = unichr
-    else:
-        uchr = lambda value: value > 255 and unichr(value) or chr(value)
+    if isinstance(html, bytes):
+        html = html.decode('utf-8')
+    uchr = unichr
    def entitydecode(match, uchr=uchr):
        entity = match.group(1)
        if entity == '#x80':
--- a/ox/jsonc.py
+++ b/ox/jsonc.py
@ -1,10 +1,10 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
-from __future__ import with_statement
+from __future__ import with_statement, print_function

-from js import minify
-from utils import json
+from .js import minify
+from .utils import json


 def load(f):
@ -14,7 +14,7 @@ def loads(source):
    try:
        minified = minify(source)
        return json.loads(minified)
-    except json.JSONDecodeError, e:
+    except json.JSONDecodeError as e:
        s = minified.split('\n')
        context = s[e.lineno-1][max(0, e.colno-1):e.colno+30]
        msg = e.msg + ' at ' + context
--- a/ox/movie.py
+++ b/ox/movie.py
@ -9,9 +9,9 @@ import os
 import re
 import unicodedata

-from normalize import normalize_name
-from text import get_sort_name, find_re
-from file import EXTENSIONS
+from .normalize import normalize_name
+from .text import get_sort_name, find_re
+from .file import EXTENSIONS

 __all__ = ['parse_movie_path', 'create_movie_path', 'get_oxid']

--- a/ox/net.py
+++ b/ox/net.py
@ -1,13 +1,13 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 # GPL 2008
+from __future__ import with_statement, print_function
 import os
 import gzip
 import re
-import StringIO
+from six import BytesIO
 import struct
-import urllib
-import urllib2
+from six.moves import urllib

 from chardet.universaldetector import UniversalDetector

@ -26,7 +26,7 @@ def status(url, data=None, headers=DEFAULT_HEADERS):
    try:
        f = open_url(url, data, headers)
        s = f.code
-    except urllib2.HTTPError, e:
+    except urllib.error.HTTPError as e:
        s = e.code
    return s

@ -42,46 +42,59 @@ def get_headers(url, data=None, headers=DEFAULT_HEADERS):
        f.headers['Status'] = "%s" % f.code
        headers = f.headers
        f.close()
-    except urllib2.HTTPError, e:
+    except urllib.error.HTTPError as e:
        e.headers['Status'] = "%s" % e.code
        headers = e.headers
    return dict(headers)

 def open_url(url, data=None, headers=DEFAULT_HEADERS):
+    if isinstance(url, bytes):
+        url = url.decode('utf-8')
    url = url.replace(' ', '%20')
-    req = urllib2.Request(url, data, headers)
-    return urllib2.urlopen(req)
+    req = urllib.request.Request(url, data, headers)
+    return urllib.request.urlopen(req)

 def read_url(url, data=None, headers=DEFAULT_HEADERS, return_headers=False, unicode=False):
    if DEBUG:
-        print 'ox.net.read_url', url
+        print('ox.net.read_url', url)
    f = open_url(url, data, headers)
    result = f.read()
    f.close()
    if f.headers.get('content-encoding', None) == 'gzip':
-        result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read()
+        result = gzip.GzipFile(fileobj=BytesIO(result)).read()
    if unicode:
-        encoding = detect_encoding(result)
+        ctype = f.headers.get('content-type', '').lower()
+        if 'charset' in ctype:
+            encoding = ctype.split('charset=')[-1]
+        else:
+            encoding = detect_encoding(result)
        if not encoding:
            encoding = 'latin-1'
        result = result.decode(encoding)
    if return_headers:
        f.headers['Status'] = "%s" % f.code
-        return dict(f.headers), result
+        headers = {}
+        for key in f.headers:
+            headers[key.lower()] = f.headers[key]
+        return headers, result
    return result

 def detect_encoding(data):
-    data_lower = data.lower()
-    charset = re.compile('content="text/html; charset=(.*?)"').findall(data)
+    data_lower = data.lower().decode('utf-8', 'ignore')
+    charset = re.compile('content="text/html; charset=(.*?)"').findall(data_lower)
    if not charset:
-        charset = re.compile('meta charset="(.*?)"').findall(data)
+        charset = re.compile('meta charset="(.*?)"').findall(data_lower)
    if charset:
        return charset[0].lower()
    detector = UniversalDetector()
-    for line in data.split('\n'):
-        detector.feed(line)
+    p = 0
+    l = len(data)
+    s = 1024
+    while p < l:
+        detector.feed(data[p:p+s])
        if detector.done:
            break
+        p += s
    detector.close()
    return detector.result['encoding']

@ -97,9 +110,9 @@ def save_url(url, filename, overwrite=False):

 def oshash(url):
    def get_size(url):
-        req = urllib2.Request(url, headers=DEFAULT_HEADERS.copy())
+        req = urllib.request.Request(url, headers=DEFAULT_HEADERS.copy())
        req.get_method = lambda : 'HEAD'
-        u = urllib2.urlopen(req)
+        u = urllib.request.urlopen(req)
        if u.code != 200 or not 'Content-Length' in u.headers:
            raise IOError
        return int(u.headers['Content-Length'])
@ -107,8 +120,8 @@ def oshash(url):
    def get_range(url, start, end):
        headers = DEFAULT_HEADERS.copy()
        headers['Range'] = 'bytes=%s-%s' % (start, end)
-        req = urllib2.Request(url, headers=headers)
-        u = urllib2.urlopen(req)
+        req = urllib.request.Request(url, headers=headers)
+        u = urllib.request.urlopen(req)
        return u.read() 

    try:
--- a/ox/oembed.py
+++ b/ox/oembed.py
@ -1,9 +1,10 @@
 # -*- coding: utf-8 -*-
 # ci:si:et:sw=4:sts=4:ts=4
 import re
-from text import find_re
-import cache
-from utils import json, ET
+
+from . import cache
+from .text import find_re
+from .utils import json, ET

 def get_embed_code(url, maxwidth=None, maxheight=None):
    embed = {}
--- a/ox/srt.py
+++ b/ox/srt.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
-from __future__ import with_statement, division
+from __future__ import with_statement, division, print_function
 import chardet
 import re
 import codecs
@ -71,7 +71,7 @@ def load(filename, offset=0):
        try:
            data = unicode(data, 'latin-1')
        except:
-            print "failed to detect encoding, giving up"
+            print("failed to detect encoding, giving up")
            return srt

    data = data.replace('\r\n', '\n')
--- a/ox/torrent/init.py
+++ b/ox/torrent/init.py
@ -6,7 +6,7 @@ from threading import Event
 from hashlib import sha1
 import os

-from bencode import bencode, bdecode
+from .bencode import bencode, bdecode

 __all__ = ['create_torrent', 'get_info_hash', 'get_torrent_info', 'get_files', 'get_torrent_size']

@ -24,9 +24,8 @@ def get_info_hash(torrentFile):
    return sha1(bencode(info)).hexdigest()

 def get_torrent_info(data=None, file=None):
-    from bencode import bencode
    if file:
-        if isinstance(file, unicode):
+        if not isinstance(file, bytes):
            file = file.encode('utf-8')
        with open(file, 'rb') as f:
            data = f.read()
@ -36,7 +35,7 @@ def get_torrent_info(data=None, file=None):
    metainfo = bdecode(data)
    info = metainfo['info']
    piece_length = info['piece length']
-    if info.has_key('length'):
+    if 'length' in info:
        # let's assume we just have one file
        file_length = info['length']
    else:
--- a/ox/web/init.py
+++ b/ox/web/init.py
@ -2,8 +2,8 @@
 # encoding: utf-8
 __version__ = '1.0.0'

-import imdb
-import wikipedia
-import google
-import piratecinema
-import oxdb
+from . import imdb
+from . import wikipedia
+from . import google
+from . import piratecinema
+from . import oxdb
--- a/ox/web/allmovie.py
+++ b/ox/web/allmovie.py
@ -1,7 +1,6 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 import re
-import time

 from ox import strip_tags, find_re
 from ox.cache import read_url
--- a/ox/web/amazon.py
+++ b/ox/web/amazon.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 import re
-from urllib import quote
+from six.moves.urllib.parse import quote

 from ox import find_re, strip_tags, decode_html
 from ox.cache import read_url
--- a/ox/web/arsenalberlin.py
+++ b/ox/web/arsenalberlin.py
@ -1,14 +1,11 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
-from datetime import datetime
-from urllib import urlencode
 import json
 import os
 import re

-from ox import find_re, strip_tags, decode_html
+from ox import find_re, strip_tags
 from ox.cache import read_url
-from ox.net import open_url

 def get_data(id, language='en'):
    if language == 'de':
@ -57,7 +54,7 @@ def backup(filename):
            data = json.load(f)
    else:
        data = {}
-    start = ids and max(map(int, data)) or 1
+    start = max(map(int, data)) or 1
    for i in range(start, 11872):
        info = get_data(i)
        if info:
--- a/ox/web/criterion.py
+++ b/ox/web/criterion.py
@ -5,7 +5,7 @@ import re
 import ox.cache
 from ox.cache import read_url
 from ox.html import strip_tags
-from ox.text import find_re, remove_special_characters
+from ox.text import find_re

 import imdb

--- a/ox/web/dailymotion.py
+++ b/ox/web/dailymotion.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 import re
-from urllib import unquote
+from six.moves.urllib.parse import unquote
 from ox.cache import read_url


--- a/ox/web/duckduckgo.py
+++ b/ox/web/duckduckgo.py
@ -1,17 +1,17 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 import re
-import urllib
+
+from six.moves import urllib
 import ox
 from ox import strip_tags, decode_html
-from ox.utils import json
 from ox.cache import read_url


 def find(query, timeout=ox.cache.cache_timeout):
-    if isinstance(query, unicode):
+    if not isinstance(query, bytes):
        query = query.encode('utf-8')
-    params = urllib.urlencode({'q': query})
+    params = urllib.parse.urlencode({'q': query})
    url = 'http://duckduckgo.com/html/?' + params
    data = read_url(url, timeout=timeout).decode('utf-8')
    results = []
--- a/ox/web/google.py
+++ b/ox/web/google.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 import re
-import urllib
+from six.moves import urllib

 import ox
 from ox import strip_tags, decode_html
@ -13,9 +13,9 @@ def read_url(url, data=None, headers=ox.net.DEFAULT_HEADERS, timeout=DEFAULT_TIM
    return ox.cache.read_url(url, data, headers, timeout, unicode=True)

 def quote_plus(s):
-    if not isinstance(s, str):
+    if not isinstance(s, bytes):
        s = s.encode('utf-8')
-    return urllib.quote_plus(s)
+    return urllib.parse.quote_plus(s)

 def find(query, max_results=DEFAULT_MAX_RESULTS, timeout=DEFAULT_TIMEOUT):
    """
--- a/ox/web/imdb.py
+++ b/ox/web/imdb.py
@ -1,23 +1,27 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
-import urllib
+from __future__ import print_function
+
 import re
 import time
 import unicodedata

-import ox
-from ox import find_re, strip_tags
-import ox.cache
+from six.moves import urllib
+from six import string_types

-from siteparser import SiteParser
-import duckduckgo

+from .. import find_re, strip_tags, decode_html
+from .. import cache
+
+
+from . siteparser import SiteParser
+from . import duckduckgo
 from ..utils import datetime
 from ..geo import normalize_country_name

-def read_url(url, data=None, headers=ox.cache.DEFAULT_HEADERS, timeout=ox.cache.cache_timeout, valid=None, unicode=False):
+def read_url(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None, unicode=False):
    headers = headers.copy()
-    return ox.cache.read_url(url, data, headers, timeout, unicode=unicode)
+    return cache.read_url(url, data, headers, timeout, unicode=unicode)

 def get_url(id):
    return "http://www.imdb.com/title/tt%s/" % id
@ -49,7 +53,7 @@ class Imdb(SiteParser):
            'page': 'business',
            're': [
                '<h5>Budget</h5>\s*?\$(.*?)<br',
-                lambda data: find_re(ox.decode_html(data).replace(',', ''), '\d+')
+                lambda data: find_re(decode_html(data).replace(',', ''), '\d+')
            ],
            'type': 'int'
        },
@ -211,7 +215,7 @@ class Imdb(SiteParser):
            'page': 'releaseinfo',
            're': [
                '<td class="release_date">(.*?)</td>',
-                ox.strip_tags,
+                strip_tags,
            ],
            'type': 'list'
        },
@ -326,7 +330,7 @@ class Imdb(SiteParser):

        if 'alternativeTitles' in self:
            if len(self['alternativeTitles']) == 2 and \
-               isinstance(self['alternativeTitles'][0], basestring):
+               isinstance(self['alternativeTitles'][0], string_types):
               self['alternativeTitles'] = [self['alternativeTitles']]

        #normalize country names
@ -472,7 +476,7 @@ class Imdb(SiteParser):
                            if c:
                                alt[title].append(c)
            self['alternativeTitles'] = []
-            for t in sorted(alt, lambda a, b: cmp(sorted(alt[a]), sorted(alt[b]))):
+            for t in sorted(alt, key=lambda a: sorted(alt[a])):
                if alt[t]:
                    countries = sorted([normalize_country_name(c) or c for c in alt[t]])
                    self['alternativeTitles'].append((t, countries))
@ -492,7 +496,7 @@ class Imdb(SiteParser):
        if 'votes' in self: self['votes'] = self['votes'].replace(',', '')

        if 'cast' in self:
-            if isinstance(self['cast'][0], basestring):
+            if isinstance(self['cast'][0], string_types):
                self['cast'] = [self['cast']]
            self['actor'] = [c[0] for c in self['cast']]
            def cleanup_character(c):
@ -503,10 +507,12 @@ class Imdb(SiteParser):

        if 'connections' in self:
            cc={}
-            if len(self['connections']) == 3 and isinstance(self['connections'][0], basestring):
+            if len(self['connections']) == 3 and isinstance(self['connections'][0], string_types):
                self['connections'] = [self['connections']]
            for rel, data, _ in self['connections']:
-                #cc[unicode(rel)] = re.compile('<a href="/title/tt(\d{7})/">(.*?)</a>').findall(data)
+                if isinstance(rel, bytes):
+                    rel = rel.decode('utf-8')
+                #cc[rel] = re.compile('<a href="/title/tt(\d{7})/">(.*?)</a>').findall(data)
                def get_conn(c):
                    r = {
                        'id': c[0],
@ -516,14 +522,14 @@ class Imdb(SiteParser):
                    if len(description) == 2 and description[-1].strip() != '-':
                        r['description'] = description[-1].strip()
                    return r
-                cc[unicode(rel)] = map(get_conn, re.compile('<a href="/title/tt(\d{7})/">(.*?)</a>(.*?)<\/div', re.DOTALL).findall(data))
+                cc[rel] = list(map(get_conn, re.compile('<a href="/title/tt(\d{7})/">(.*?)</a>(.*?)<\/div', re.DOTALL).findall(data)))


            self['connections'] = cc

        for key in ('country', 'genre'):
            if key in self:
-                self[key] = filter(lambda x: x.lower() != 'home', self[key])
+                self[key] = list(filter(lambda x: x.lower() != 'home', self[key]))
        #0092999
        if '_director' in self:
            if 'series' in self or 'isSeries' in self:
@ -590,8 +596,8 @@ class Imdb(SiteParser):
            if key in self:
                if isinstance(self[key][0], list):
                    self[key] = [i[0] for i in self[key] if i]
-                self[key] = sorted(list(set(self[key])),
-                                   lambda a, b: self[key].index(a) - self[key].index(b))
+                self[key] = sorted(list(set(self[key])), key=lambda a: self[key].index(a))
+

        if 'budget' in self and 'gross' in self:
            self['profit'] = self['gross'] - self['budget']
@ -655,7 +661,7 @@ def get_movie_by_title(title, timeout=-1):
    u'0866567'
    '''
    params = {'s':'tt','q': title}
-    if isinstance(title, unicode):
+    if not isinstance(title, bytes):
        try:
            params['q'] = unicodedata.normalize('NFKC', params['q']).encode('latin-1')
        except:
@ -731,7 +737,7 @@ def get_movie_id(title, director='', year='', timeout=-1):
    if year:
        params['q'] = u'"%s (%s)" %s' % (title, year, director)
    google_query = "site:imdb.com %s" % params['q']
-    if isinstance(params['q'], unicode):
+    if not isinstance(params['q'], bytes):
        try:
            params['q'] = unicodedata.normalize('NFKC', params['q']).encode('latin-1')
        except:
@ -775,7 +781,7 @@ def get_movie_poster(imdbId):
    info = ImdbCombined(imdbId)
    if 'posterId' in info:
        url = "http://www.imdb.com/media/rm%s/tt%s" % (info['posterId'], imdbId)
-        data = read_url(url)
+        data = read_url(url).decode('utf-8', 'ignore')
        poster = find_re(data, 'img.*?id="primary-img".*?src="(.*?)"')
        return poster
    elif 'series' in info:
@ -787,11 +793,11 @@ def get_episodes(imdbId, season=None):
    url = 'http://www.imdb.com/title/tt%s/episodes' % imdbId
    if season:
        url += '?season=%d' % season
-        data = ox.cache.read_url(url)
+        data = cache.read_url(url)
        for e in re.compile('<div data-const="tt(\d{7})".*?>.*?<div>S(\d+), Ep(\d+)<\/div>\n<\/div>', re.DOTALL).findall(data):
            episodes['S%02dE%02d' %(int(e[1]), int(e[2]))] = e[0]
    else:
-        data = ox.cache.read_url(url)
+        data = cache.read_url(url)
        match = re.compile('<strong>Season (\d+)</strong>').findall(data)
        if match:
            for season in range(1, int(match[0]) + 1):
@ -800,7 +806,7 @@ def get_episodes(imdbId, season=None):

 def max_votes():
    url = 'http://www.imdb.com/search/title?num_votes=500000,&sort=num_votes,desc'
-    data = ox.cache.read_url(url)
+    data = cache.read_url(url)
    votes = max([int(v.replace(',', ''))
        for v in re.compile('<td class="sort_col">([\d,]+)</td>').findall(data)])
    return votes
@ -810,6 +816,6 @@ def guess(title, director='', timeout=-1):

 if __name__ == "__main__":
    import json
-    print json.dumps(Imdb('0306414'), indent=2)
+    print(json.dumps(Imdb('0306414'), indent=2))
    #print json.dumps(Imdb('0133093'), indent=2)

--- a/ox/web/piratecinema.py
+++ b/ox/web/piratecinema.py
@ -1,5 +1,7 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
+from __future__ import print_function
+
 import re
 from ox.net import read_url

@ -13,5 +15,5 @@ def get_poster_url(id):
    return ''

 if __name__ == '__main__':
-    print get_poster_url('0749451')
+    print(get_poster_url('0749451'))

--- a/ox/web/siteparser.py
+++ b/ox/web/siteparser.py
@ -2,22 +2,24 @@
 # vi:si:et:sw=4:sts=4:ts=4
 import re

+from six import string_types
+
 from ..cache import read_url
-from .. import strip_tags, decode_html
+from .. import decode_html
 from ..utils import datetime


 def cleanup(key, data, data_type):
    if data:
-        if isinstance(data[0], basestring):
+        if isinstance(data[0], string_types):
            #FIXME: some types need strip_tags
            #data = [strip_tags(decode_html(p)).strip() for p in data]
            data = [decode_html(p).strip() for p in data]
        elif isinstance(data[0], list) or isinstance(data[0], tuple):
            data = [cleanup(key, p, data_type) for p in data]
-        while len(data) == 1 and not isinstance(data, basestring):
+        while len(data) == 1 and not isinstance(data, string_types):
            data = data[0]
-        if data_type == 'list' and isinstance(data, basestring):
+        if data_type == 'list' and isinstance(data, string_types):
            data = [data, ]
    elif data_type != 'list':
        data = ''
@ -40,7 +42,7 @@ class SiteParser(dict):
        for key in self.regex:
            url = self.get_url(self.regex[key]['page'])
            data = self.read_url(url, timeout)
-            if isinstance(self.regex[key]['re'], basestring):
+            if isinstance(self.regex[key]['re'], string_types):
                data = re.compile(self.regex[key]['re'], re.DOTALL).findall(data)
                data = cleanup(key, data, self.regex[key]['type'])
            elif callable(self.regex[key]['re']):
@ -51,7 +53,7 @@ class SiteParser(dict):
                        f = r
                    else:
                        f = re.compile(r, re.DOTALL).findall
-                    if isinstance(data, basestring):
+                    if isinstance(data, string_types):
                        data = f(data)
                    else:
                        data = [f(d) for d in data]
--- a/ox/web/wikipedia.py
+++ b/ox/web/wikipedia.py
@ -1,11 +1,14 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
+from __future__ import print_function
+
 import re
-from urllib import urlencode
+
+from six.moves import urllib

 from ox.utils import json
 from ox.cache import read_url
-from ox import find_re, decode_html
+from ox import find_re


 def get_id(url):
@ -138,11 +141,11 @@ def get_allmovie_id(wikipedia_url):
 def find(query, max_results=10):
    query = {'action': 'query', 'list':'search', 'format': 'json',
             'srlimit': max_results, 'srwhat': 'text', 'srsearch': query.encode('utf-8')}
-    url = "http://en.wikipedia.org/w/api.php?" + urlencode(query)
+    url = "http://en.wikipedia.org/w/api.php?" + urllib.parse.urlencode(query)
    data = read_url(url)
    if not data:
        data  = read_url(url, timeout=0)
-    result = json.loads(data)
+    result = json.loads(data.decode('utf-8'))
    results = []
    if result and 'query' in result:
        for r in result['query']['search']:
--- a/setup.py
+++ b/setup.py
@ -36,15 +36,16 @@ setup(
    download_url="http://code.0x2620.org/python-ox/download",
    license="GPLv3",
    packages=['ox', 'ox.django', 'ox.django.api', 'ox.torrent', 'ox.web'],
-    install_requires=['chardet', 'feedparser'],
+    install_requires=['six', 'chardet', 'feedparser'],
    keywords = [
    ],
    classifiers = [
        'Operating System :: OS Independent',
        'Programming Language :: Python',
        'Programming Language :: Python :: 2',
-        'Programming Language :: Python :: 2.6',
        'Programming Language :: Python :: 2.7',
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.4',
        'Topic :: Software Development :: Libraries :: Python Modules',
    ],
 )