diff --git a/ox/fixunicode.py b/ox/fixunicode.py index b49e277..b649a58 100644 --- a/ox/fixunicode.py +++ b/ox/fixunicode.py @@ -6,7 +6,7 @@ from __future__ import print_function import unicodedata -from six import unichr, PY3 +from six import unichr, PY2 __all__ = ['fix_bad_unicode'] @@ -151,10 +151,10 @@ def text_badness(text): - Improbable single-byte characters, such as ƒ or ¬ - Letters in somewhat rare scripts ''' - if PY3: - assert isinstance(text, str) - else: + if PY2: assert isinstance(text, unicode) + else: + assert isinstance(text, str) errors = 0 very_weird_things = 0 weird_things = 0 diff --git a/ox/html.py b/ox/html.py index 13524fd..7154e21 100644 --- a/ox/html.py +++ b/ox/html.py @@ -117,7 +117,7 @@ def clean_html(text): * Removes stuff like "

  

", but only if it's at the bottom of the text. """ - from text import normalize_newlines + from .text import normalize_newlines text = normalize_newlines(text) text = re.sub(r'<(/?)\s*b\s*>', '<\\1strong>', text) text = re.sub(r'<(/?)\s*i\s*>', '<\\1em>', text) diff --git a/ox/net.py b/ox/net.py index e125133..46ef0e1 100644 --- a/ox/net.py +++ b/ox/net.py @@ -8,7 +8,7 @@ import os import re import struct -from six import BytesIO, PY3 +from six import BytesIO, PY2 from six.moves import urllib from chardet.universaldetector import UniversalDetector @@ -52,14 +52,14 @@ def get_json(url, data=None, headers=DEFAULT_HEADERS): return json.loads(read_url(url, data, headers).decode('utf-8')) def open_url(url, data=None, headers=DEFAULT_HEADERS): - if PY3: - if isinstance(url, bytes): - url = url.decode('utf-8') - else: + if PY2: if not isinstance(url, bytes): url = url.encode('utf-8') + else: + if isinstance(url, bytes): + url = url.decode('utf-8') url = url.replace(' ', '%20') - if data and PY3 and not isinstance(data, bytes): + if data and not PY2 and not isinstance(data, bytes): data = data.encode('utf-8') req = urllib.request.Request(url, data, headers) return urllib.request.urlopen(req)