drop six and python2 support

2023-07-27 13:07:13 +02:00 · 2023-07-27 13:07:13 +02:00 · adad3be419
commit adad3be419
parent 955b4a4e9b
31 changed files with 54 additions and 426 deletions
--- a/ox/web/amazon.py
+++ b/ox/web/amazon.py
@ -2,7 +2,7 @@
 # vi:si:et:sw=4:sts=4:ts=4
 from __future__ import print_function
 import re
-from six.moves.urllib.parse import quote
+from urllib.parse import quote

 from ox import find_re, strip_tags, decode_html
 from ox.cache import read_url
--- a/ox/web/apple.py
+++ b/ox/web/apple.py
@ -2,7 +2,6 @@ from __future__ import print_function
 import json
 import re

-from six import text_type
 from ox.cache import read_url

 HEADERS = {
@ -17,9 +16,9 @@ USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7) '
 USER_AGENT += 'AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 Safari/534.48.3'

 def get_movie_data(title, director):
-    if isinstance(title, text_type):
+    if isinstance(title, str):
        title = title.encode('utf-8')
-    if isinstance(director, text_type):
+    if isinstance(director, str):
        director = director.encode('utf-8')
    data = {}
    # itunes section (preferred source for link)
--- a/ox/web/archive.py
+++ b/ox/web/archive.py
@ -3,8 +3,6 @@
 from .. import cache
 from ..utils import json

-from six import string_types
-
 def get_id(url):
    return url.split("/")[-1]

@ -21,7 +19,7 @@ def get_data(id):
            data[key] = details['metadata'][key]
            if isinstance(data[key], list):
                data[key] = data[key][0]
-            if isinstance(data[key], string_types):
+            if isinstance(data[key], str):
                data[key] = data[key].strip()
                if data[key][0] == '[' and data[key][-1] == ']':
                    data[key] = data[key][1:-1] 
--- a/ox/web/dailymotion.py
+++ b/ox/web/dailymotion.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 import re
-from six.moves.urllib.parse import unquote
+from urllib.parse import unquote
 from ox.cache import read_url


--- a/ox/web/duckduckgo.py
+++ b/ox/web/duckduckgo.py
@ -2,7 +2,7 @@
 # vi:si:et:sw=4:sts=4:ts=4
 import re

-from six.moves import urllib
+import urllib
 import ox
 from ox import strip_tags, decode_html
 from ox.cache import read_url
--- a/ox/web/google.py
+++ b/ox/web/google.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 import re
-from six.moves import urllib
+import urllib

 import ox
 from ox import strip_tags, decode_html
--- a/ox/web/imdb.py
+++ b/ox/web/imdb.py
@ -7,8 +7,7 @@ import re
 import time
 import unicodedata

-from six.moves.urllib.parse import urlencode
-from six import string_types
+from urllib.parse import urlencode

 from .. import find_re, strip_tags, decode_html
 from .. import cache
@ -449,7 +448,7 @@ class Imdb(SiteParser):

        if 'alternativeTitles' in self:
            if len(self['alternativeTitles']) == 2 and \
-               isinstance(self['alternativeTitles'][0], string_types):
+               isinstance(self['alternativeTitles'][0], str):
               self['alternativeTitles'] = [self['alternativeTitles']]

        for key in ('country', 'genre', 'language', 'sound', 'color'):
@ -514,7 +513,7 @@ class Imdb(SiteParser):
            self['sound'] = list(sorted(set(self['sound'])))

        if 'cast' in self:
-            if isinstance(self['cast'][0], string_types):
+            if isinstance(self['cast'][0], str):
                self['cast'] = [self['cast']]
            self['actor'] = [c[0] for c in self['cast']]
            def cleanup_character(c):
--- a/ox/web/itunes.py
+++ b/ox/web/itunes.py
@ -2,7 +2,7 @@
 # encoding: utf-8
 from __future__ import print_function
 import re
-from six.moves.urllib.parse import urlencode
+from urllib.parse import urlencode

 from ox.cache import read_url
 from ox.html import decode_html, strip_tags
--- a/ox/web/metacritic.py
+++ b/ox/web/metacritic.py
@ -2,7 +2,7 @@
 # vi:si:et:sw=4:sts=4:ts=4
 import re

-from six.moves.urllib.parse import quote
+from urllib.parse import quote
 from lxml.html import document_fromstring

 from ox.cache import read_url
--- a/ox/web/siteparser.py
+++ b/ox/web/siteparser.py
@ -4,8 +4,6 @@ import re
 import json
 from multiprocessing.pool import ThreadPool

-from six import string_types
-
 from ..cache import read_url
 from .. import decode_html
 from ..utils import datetime
@ -13,15 +11,15 @@ from ..utils import datetime

 def cleanup(key, data, data_type):
    if data:
-        if isinstance(data[0], string_types):
+        if isinstance(data[0], str):
            #FIXME: some types need strip_tags
            #data = [strip_tags(decode_html(p)).strip() for p in data]
            data = [decode_html(p).strip() for p in data]
        elif isinstance(data[0], list) or isinstance(data[0], tuple):
            data = [cleanup(key, p, data_type) for p in data]
-        while len(data) == 1 and not isinstance(data, string_types):
+        while len(data) == 1 and not isinstance(data, str):
            data = data[0]
-        if data_type == 'list' and isinstance(data, string_types):
+        if data_type == 'list' and isinstance(data, str):
            data = [data, ]
    elif data_type != 'list':
        data = ''
@ -49,7 +47,7 @@ class SiteParser(dict):
        for key in self.regex:
            url = self.get_url(self.regex[key]['page'])
            data = self.read_url(url, timeout)
-            if isinstance(self.regex[key]['re'], string_types):
+            if isinstance(self.regex[key]['re'], str):
                data = re.compile(self.regex[key]['re'], re.DOTALL).findall(data)
                data = cleanup(key, data, self.regex[key]['type'])
            elif callable(self.regex[key]['re']):
@ -60,7 +58,7 @@ class SiteParser(dict):
                        f = r
                    else:
                        f = re.compile(r, re.DOTALL).findall
-                    if isinstance(data, string_types):
+                    if isinstance(data, str):
                        data = f(data)
                    else:
                        data = [f(d) for d in data]
--- a/ox/web/startpage.py
+++ b/ox/web/startpage.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
-from six.moves import urllib
+import urllib

 import lxml.html
 import ox
--- a/ox/web/thepiratebay.py
+++ b/ox/web/thepiratebay.py
@ -3,7 +3,7 @@
 from datetime import datetime
 import re

-from six.moves.urllib.parse import quote
+from urllib.parse import quote

 from ox import find_re, cache, strip_tags, decode_html, get_torrent_info, normalize_newlines
 from ox.normalize import normalize_imdbid
--- a/ox/web/twitter.py
+++ b/ox/web/twitter.py
@ -2,7 +2,7 @@
 # vi:si:et:sw=4:sts=4:ts=4
 import re
 from datetime import datetime
-from six.moves.urllib.parse import quote
+from urllib.parse import quote

 import lxml.html
 import ox
--- a/ox/web/wikipedia.py
+++ b/ox/web/wikipedia.py
@ -4,8 +4,7 @@ from __future__ import print_function

 import re

-from six.moves import urllib
-from six import string_types
+import urllib

 from ox.utils import json
 from ox.cache import read_url
@ -69,7 +68,7 @@ def get_movie_data(wikipedia_url):
                    value = value.split('<br>')
                if value:
                    if key in filmbox:
-                        if isinstance(value, list) and isinstance(filmbox[key], string_types):
+                        if isinstance(value, list) and isinstance(filmbox[key], str):
                            filmbox[key] = [filmbox[key]] + value
                        else:
                            filmbox[key] += value
--- a/ox/web/youtube.py
+++ b/ox/web/youtube.py
@ -1,8 +1,8 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
-from six.moves.urllib.parse import quote, unquote_plus
-from six.moves import urllib
-from six.moves import http_cookiejar as cookielib
+from urllib.parse import quote, unquote_plus
+import urllib
+from http import cookiejar as cookielib
 import re
 from xml.dom.minidom import parseString
 import json