diff --git a/ox/html.py b/ox/html.py index 642dd55..815f657 100644 --- a/ox/html.py +++ b/ox/html.py @@ -4,7 +4,7 @@ import re import string from six.moves.html_entities import name2codepoint -from six import unichr, PY2 +from six import unichr, PY2, string_types # Configuration for add_links() function @@ -34,7 +34,7 @@ def escape(html): >>> escape('html "test" & ') 'html "test" & <brothers>' ''' - if not isinstance(html, basestring): + if not isinstance(html, string_types): html = str(html) return html.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"').replace("'", ''') diff --git a/ox/normalize.py b/ox/normalize.py index e3876f3..128f33c 100644 --- a/ox/normalize.py +++ b/ox/normalize.py @@ -4,6 +4,9 @@ import re import unicodedata +from six import string_types + + _articles = ('the', 'la', 'a', 'die', 'der', 'le', 'el', "l'", 'il', 'das', 'les', 'o', 'ein', 'i', 'un', 'los', 'de', 'an', 'una', 'las', 'eine', 'den', 'gli', 'het', 'os', 'lo', @@ -95,7 +98,7 @@ def normalize_imdbid(imdbId): >>> normalize_imdbid('tt0159206') '0159206' """ - if isinstance(imdbId, basestring): + if isinstance(imdbId, string_types): imdbId = re.sub('.*(\d{7}).*', '\\1', imdbId) elif isinstance(imdbId, int): imdbId = "%07d" % imdbId diff --git a/ox/web/archive.py b/ox/web/archive.py index 52ac487..0c733c3 100644 --- a/ox/web/archive.py +++ b/ox/web/archive.py @@ -3,6 +3,8 @@ from .. import cache from ..utils import json +from six import string_types + def get_id(url): return url.split("/")[-1] @@ -19,7 +21,7 @@ def get_data(id): data[key] = details['metadata'][key] if isinstance(data[key], list): data[key] = data[key][0] - if isinstance(data[key], basestring): + if isinstance(data[key], string_types): data[key] = data[key].strip() if data[key][0] == '[' and data[key][-1] == ']': data[key] = data[key][1:-1] diff --git a/ox/web/wikipedia.py b/ox/web/wikipedia.py index 0e9d8de..beacdac 100644 --- a/ox/web/wikipedia.py +++ b/ox/web/wikipedia.py @@ -5,6 +5,7 @@ from __future__ import print_function import re from six.moves import urllib +from six import string_types from ox.utils import json from ox.cache import read_url @@ -68,7 +69,7 @@ def get_movie_data(wikipedia_url): value = value.split('
') if value: if key in filmbox: - if isinstance(value, list) and isinstance(filmbox[key], basestring): + if isinstance(value, list) and isinstance(filmbox[key], string_types): filmbox[key] = [filmbox[key]] + value else: filmbox[key] += value