use six to support python 2 and 3

2014-09-30 21:04:46 +02:00 · 2014-09-30 21:04:46 +02:00 · d4d09b56b6
commit d4d09b56b6
parent 1b1dcf1c58
28 changed files with 1730 additions and 1678 deletions
--- a/ox/web/siteparser.py
+++ b/ox/web/siteparser.py
@ -2,22 +2,24 @@
 # vi:si:et:sw=4:sts=4:ts=4
 import re

+from six import string_types
+
 from ..cache import read_url
-from .. import strip_tags, decode_html
+from .. import decode_html
 from ..utils import datetime


 def cleanup(key, data, data_type):
    if data:
-        if isinstance(data[0], basestring):
+        if isinstance(data[0], string_types):
            #FIXME: some types need strip_tags
            #data = [strip_tags(decode_html(p)).strip() for p in data]
            data = [decode_html(p).strip() for p in data]
        elif isinstance(data[0], list) or isinstance(data[0], tuple):
            data = [cleanup(key, p, data_type) for p in data]
-        while len(data) == 1 and not isinstance(data, basestring):
+        while len(data) == 1 and not isinstance(data, string_types):
            data = data[0]
-        if data_type == 'list' and isinstance(data, basestring):
+        if data_type == 'list' and isinstance(data, string_types):
            data = [data, ]
    elif data_type != 'list':
        data = ''
@ -40,7 +42,7 @@ class SiteParser(dict):
        for key in self.regex:
            url = self.get_url(self.regex[key]['page'])
            data = self.read_url(url, timeout)
-            if isinstance(self.regex[key]['re'], basestring):
+            if isinstance(self.regex[key]['re'], string_types):
                data = re.compile(self.regex[key]['re'], re.DOTALL).findall(data)
                data = cleanup(key, data, self.regex[key]['type'])
            elif callable(self.regex[key]['re']):
@ -51,7 +53,7 @@ class SiteParser(dict):
                        f = r
                    else:
                        f = re.compile(r, re.DOTALL).findall
-                    if isinstance(data, basestring):
+                    if isinstance(data, string_types):
                        data = f(data)
                    else:
                        data = [f(d) for d in data]