drop six and python2 support

This commit is contained in:
j 2023-07-27 13:07:13 +02:00
parent 955b4a4e9b
commit adad3be419
31 changed files with 54 additions and 426 deletions

View file

@ -10,10 +10,10 @@ import shutil
import sys
import time
from six.moves import http_cookiejar as cookielib
from six import BytesIO, PY2
from six.moves import urllib
from six.moves.urllib.parse import urlparse
from http import cookiejar as cookielib
from io import BytesIO
import urllib
from urllib.parse import urlparse
import requests
from . import __version__
@ -56,9 +56,6 @@ class API(object):
def _add_method(self, method, name):
if name is None:
name = method.func_name
if PY2:
setattr(self, name, MethodType(method, self, type(self)))
else:
setattr(self, name, MethodType(method, self))
def _add_action(self, action):
@ -73,9 +70,6 @@ class API(object):
return self._request(action, kw)
if 'doc' in self._properties[action]:
method.__doc__ = self._properties[action]['doc']
if PY2:
method.func_name = str(action)
else:
method.func_name = action
self._add_method(method, action)

View file

@ -10,9 +10,8 @@ import sqlite3
import time
import zlib
from six import BytesIO
from six.moves import urllib
from six import PY2
from io import BytesIO
import urllib
try:
import requests
USE_REQUESTS = True
@ -242,8 +241,6 @@ class SQLiteCache(Cache):
elif value == 'data':
if row[1] == 1:
r = zlib.decompress(r)
elif PY2:
r = str(r)
break
c.close()

View file

@ -6,7 +6,6 @@ from __future__ import print_function
import unicodedata
from six import unichr, text_type
__all__ = ['fix_bad_unicode']
@ -151,7 +150,7 @@ def text_badness(text):
- Improbable single-byte characters, such as ƒ or ¬
- Letters in somewhat rare scripts
'''
assert isinstance(text, text_type)
assert isinstance(text, str)
errors = 0
very_weird_things = 0
weird_things = 0
@ -289,7 +288,7 @@ SINGLE_BYTE_WEIRDNESS = (
# Pre-cache the Unicode data saying which of these first 256 characters are
# letters. We'll need it often.
SINGLE_BYTE_LETTERS = [
unicodedata.category(unichr(i)).startswith('L')
unicodedata.category(chr(i)).startswith('L')
for i in range(256)
]

View file

@ -9,8 +9,6 @@ import os
import hashlib
import sys
from six import PY2
__all__ = ['MultiPartForm']
@ -63,8 +61,6 @@ class MultiPartForm(object):
def __str__(self):
body = self.body()
if not PY2:
body = body.decode('utf-8')
return body
def body(self):

View file

@ -4,8 +4,6 @@ import math
import re
import string
from six import text_type
def toAZ(num):
"""
Converts an integer to bijective base 26 string using A-Z
@ -238,7 +236,7 @@ def int_value(strValue, default=u''):
u''
"""
try:
val = re.compile('(\d+)').findall(text_type(strValue).strip())[0]
val = re.compile('(\d+)').findall(str(strValue).strip())[0]
except:
val = default
return val
@ -255,7 +253,7 @@ def float_value(strValue, default=u''):
u''
"""
try:
val = re.compile('([\d.]+)').findall(text_type(strValue).strip())[0]
val = re.compile('([\d.]+)').findall(str(strValue).strip())[0]
except:
val = default
return val

View file

@ -3,8 +3,7 @@
# GPL 2008
import re
import string
from six.moves.html_entities import name2codepoint
from six import unichr, PY2, string_types
from html.entities import name2codepoint
letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
@ -26,8 +25,7 @@ link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+')
html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE)
hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL)
trailing_empty_content_re = re.compile(r'(?:<p>(?:&nbsp;|\s|<br \/>)*?</p>\s*)+\Z')
if PY2:
del x # Temporary variable
def escape(html):
'''
@ -36,7 +34,7 @@ def escape(html):
>>> escape('html "test" & <brothers>')
'html &quot;test&quot; &amp; &lt;brothers&gt;'
'''
if not isinstance(html, string_types):
if not isinstance(html, str):
html = str(html)
return html.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;').replace("'", '&apos;')
@ -155,7 +153,7 @@ def decode_html(html):
"""
if isinstance(html, bytes):
html = html.decode('utf-8')
uchr = unichr
uchr = chr
def entitydecode(match, uchr=uchr):
entity = match.group(1)

View file

@ -2,19 +2,12 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from six import PY2
from .utils import json
def minify(source, comment=''):
# see https://github.com/douglascrockford/JSMin/blob/master/README
def get_next_non_whitespace_token():
pass
# python2 performance with unicode string is terrible
if PY2:
if isinstance(source, unicode): # pylint: disable=undefined-variable
source = source.encode('utf-8')
if isinstance(comment, unicode): # pylint: disable=undefined-variable
comment = comment.encode('utf-8')
tokens = tokenize(source)
length = len(tokens)
minified = '/*' + comment + '*/' if comment else ''

View file

@ -13,8 +13,8 @@ try:
USE_REQUESTS = True
except:
USE_REQUESTS = False
from six import BytesIO, PY2
from six.moves import urllib
from io import BytesIO
import urllib
from chardet.universaldetector import UniversalDetector
@ -59,14 +59,10 @@ def get_json(url, data=None, headers=None):
def open_url(url, data=None, headers=None):
if headers is None:
headers = DEFAULT_HEADERS.copy()
if PY2:
if not isinstance(url, bytes):
url = url.encode('utf-8')
else:
if isinstance(url, bytes):
url = url.decode('utf-8')
url = url.replace(' ', '%20')
if data and not PY2 and not isinstance(data, bytes):
if data and not isinstance(data, bytes):
data = data.encode('utf-8')
req = urllib.request.Request(url, data, headers)
return urllib.request.urlopen(req)

View file

@ -4,8 +4,6 @@
import re
import unicodedata
from six import string_types
_articles = ('the', 'la', 'a', 'die', 'der', 'le', 'el',
"l'", 'il', 'das', 'les', 'o', 'ein', 'i', 'un', 'los', 'de',
@ -103,7 +101,7 @@ def normalize_imdbid(imdbId):
>>> normalize_imdbid('tt0159206')
'0159206'
"""
if isinstance(imdbId, string_types):
if isinstance(imdbId, str):
imdbId = re.sub('.*(\d{7}).*', '\\1', imdbId)
elif isinstance(imdbId, int):
imdbId = "%07d" % imdbId

View file

@ -5,7 +5,6 @@ import codecs
import re
import chardet
from six import PY2
import ox
@ -24,9 +23,6 @@ def _detect_encoding(fp):
# go to beginning of file and get the first 4 bytes
oldFP = fp.tell()
fp.seek(0)
if PY2:
(byte1, byte2, byte3, byte4) = [ord(b) for b in fp.read(4)]
else:
(byte1, byte2, byte3, byte4) = fp.read(4)
# try bom detection using 4 bytes, 3 bytes, or 2 bytes

View file

@ -1,11 +1,13 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
# GPL 2008
import gzip
import math
import re
import unicodedata
from io import BytesIO
from six.moves import reduce
from functools import reduce
ARTICLES = list(set([
# def sg, def pl, indef sg, indef pl (each m/f/n)
@ -646,8 +648,6 @@ def phone2numeric(phone):
return letters.sub(char2number, phone)
def compress_string(s):
import gzip
from six import BytesIO
zbuf = BytesIO()
zfile = gzip.GzipFile(mode='wb', compresslevel=6, fileobj=zbuf)
zfile.write(s)

View file

@ -5,12 +5,8 @@
from threading import Event
from hashlib import sha1
import os
from six import PY2
if PY2:
from .bencode import bencode, bdecode
else:
from .bencode3 import bencode, bdecode
from .bencode3 import bencode, bdecode
__all__ = ['create_torrent', 'get_info_hash', 'get_torrent_info', 'get_files', 'get_torrent_size']

View file

@ -1,321 +0,0 @@
# Written by Petru Paler, Uoti Urpala, Ross Cohen and John Hoffman
# see LICENSE.txt for license information
from __future__ import print_function
from types import IntType, LongType, StringType, ListType, TupleType, DictType
try:
from types import BooleanType
except ImportError:
BooleanType = None
try:
from types import UnicodeType
except ImportError:
UnicodeType = None
from cStringIO import StringIO
def decode_int(x, f):
f += 1
newf = x.index('e', f)
try:
n = int(x[f:newf])
except:
n = long(x[f:newf])
if x[f] == '-':
if x[f + 1] == '0':
raise ValueError
elif x[f] == '0' and newf != f+1:
raise ValueError
return (n, newf+1)
def decode_string(x, f):
colon = x.index(':', f)
try:
n = int(x[f:colon])
except (OverflowError, ValueError):
n = long(x[f:colon])
if x[f] == '0' and colon != f+1:
raise ValueError
colon += 1
return (x[colon:colon+n], colon+n)
def decode_unicode(x, f):
s, f = decode_string(x, f+1)
return (s.decode('UTF-8'),f)
def decode_list(x, f):
r, f = [], f+1
while x[f] != 'e':
v, f = decode_func[x[f]](x, f)
r.append(v)
return (r, f + 1)
def decode_dict(x, f):
r, f = {}, f+1
lastkey = None
while x[f] != 'e':
k, f = decode_string(x, f)
# why is this needed
# if lastkey >= k:
# raise ValueError
lastkey = k
r[k], f = decode_func[x[f]](x, f)
return (r, f + 1)
decode_func = {}
decode_func['l'] = decode_list
decode_func['d'] = decode_dict
decode_func['i'] = decode_int
decode_func['0'] = decode_string
decode_func['1'] = decode_string
decode_func['2'] = decode_string
decode_func['3'] = decode_string
decode_func['4'] = decode_string
decode_func['5'] = decode_string
decode_func['6'] = decode_string
decode_func['7'] = decode_string
decode_func['8'] = decode_string
decode_func['9'] = decode_string
#decode_func['u'] = decode_unicode
def bdecode(x, sloppy = 1):
try:
r, l = decode_func[x[0]](x, 0)
# except (IndexError, KeyError):
except (IndexError, KeyError, ValueError):
raise ValueError("bad bencoded data")
if not sloppy and l != len(x):
raise ValueError("bad bencoded data")
return r
def test_bdecode():
try:
bdecode('0:0:')
assert 0
except ValueError:
pass
try:
bdecode('ie')
assert 0
except ValueError:
pass
try:
bdecode('i341foo382e')
assert 0
except ValueError:
pass
assert bdecode('i4e') == 4
assert bdecode('i0e') == 0
assert bdecode('i123456789e') == 123456789
assert bdecode('i-10e') == -10
try:
bdecode('i-0e')
assert 0
except ValueError:
pass
try:
bdecode('i123')
assert 0
except ValueError:
pass
try:
bdecode('')
assert 0
except ValueError:
pass
try:
bdecode('i6easd')
assert 0
except ValueError:
pass
try:
bdecode('35208734823ljdahflajhdf')
assert 0
except ValueError:
pass
try:
bdecode('2:abfdjslhfld')
assert 0
except ValueError:
pass
assert bdecode('0:') == ''
assert bdecode('3:abc') == 'abc'
assert bdecode('10:1234567890') == '1234567890'
try:
bdecode('02:xy')
assert 0
except ValueError:
pass
try:
bdecode('l')
assert 0
except ValueError:
pass
assert bdecode('le') == []
try:
bdecode('leanfdldjfh')
assert 0
except ValueError:
pass
assert bdecode('l0:0:0:e') == ['', '', '']
try:
bdecode('relwjhrlewjh')
assert 0
except ValueError:
pass
assert bdecode('li1ei2ei3ee') == [1, 2, 3]
assert bdecode('l3:asd2:xye') == ['asd', 'xy']
assert bdecode('ll5:Alice3:Bobeli2ei3eee') == [['Alice', 'Bob'], [2, 3]]
try:
bdecode('d')
assert 0
except ValueError:
pass
try:
bdecode('defoobar')
assert 0
except ValueError:
pass
assert bdecode('de') == {}
assert bdecode('d3:agei25e4:eyes4:bluee') == {'age': 25, 'eyes': 'blue'}
assert bdecode('d8:spam.mp3d6:author5:Alice6:lengthi100000eee') == {'spam.mp3': {'author': 'Alice', 'length': 100000}}
try:
bdecode('d3:fooe')
assert 0
except ValueError:
pass
try:
bdecode('di1e0:e')
assert 0
except ValueError:
pass
try:
bdecode('d1:b0:1:a0:e')
assert 0
except ValueError:
pass
try:
bdecode('d1:a0:1:a0:e')
assert 0
except ValueError:
pass
try:
bdecode('i03e')
assert 0
except ValueError:
pass
try:
bdecode('l01:ae')
assert 0
except ValueError:
pass
try:
bdecode('9999:x')
assert 0
except ValueError:
pass
try:
bdecode('l0:')
assert 0
except ValueError:
pass
try:
bdecode('d0:0:')
assert 0
except ValueError:
pass
try:
bdecode('d0:')
assert 0
except ValueError:
pass
bencached_marker = []
class Bencached:
def __init__(self, s):
self.marker = bencached_marker
self.bencoded = s
BencachedType = type(Bencached('')) # insufficient, but good as a filter
def encode_bencached(x,r):
assert x.marker == bencached_marker
r.append(x.bencoded)
def encode_int(x,r):
r.extend(('i',str(x),'e'))
def encode_bool(x,r):
encode_int(int(x),r)
def encode_string(x,r):
r.extend((str(len(x)),':',x))
def encode_unicode(x,r):
#r.append('u')
encode_string(x.encode('UTF-8'),r)
def encode_list(x,r):
r.append('l')
for e in x:
encode_func[type(e)](e, r)
r.append('e')
def encode_dict(x,r):
r.append('d')
ilist = x.items()
ilist.sort()
for k,v in ilist:
r.extend((str(len(k)),':',k))
encode_func[type(v)](v, r)
r.append('e')
encode_func = {}
encode_func[BencachedType] = encode_bencached
encode_func[IntType] = encode_int
encode_func[LongType] = encode_int
encode_func[StringType] = encode_string
encode_func[ListType] = encode_list
encode_func[TupleType] = encode_list
encode_func[DictType] = encode_dict
if BooleanType:
encode_func[BooleanType] = encode_bool
if UnicodeType:
encode_func[UnicodeType] = encode_unicode
def bencode(x):
r = []
try:
encode_func[type(x)](x, r)
except:
print("*** error *** could not encode type %s (value: %s)" % (type(x), x))
assert 0
return ''.join(r)
def test_bencode():
assert bencode(4) == 'i4e'
assert bencode(0) == 'i0e'
assert bencode(-10) == 'i-10e'
assert bencode(12345678901234567890) == 'i12345678901234567890e'
assert bencode('') == '0:'
assert bencode('abc') == '3:abc'
assert bencode('1234567890') == '10:1234567890'
assert bencode([]) == 'le'
assert bencode([1, 2, 3]) == 'li1ei2ei3ee'
assert bencode([['Alice', 'Bob'], [2, 3]]) == 'll5:Alice3:Bobeli2ei3eee'
assert bencode({}) == 'de'
assert bencode({'age': 25, 'eyes': 'blue'}) == 'd3:agei25e4:eyes4:bluee'
assert bencode({'spam.mp3': {'author': 'Alice', 'length': 100000}}) == 'd8:spam.mp3d6:author5:Alice6:lengthi100000eee'
try:
bencode({1: 'foo'})
assert 0
except AssertionError:
pass
try:
import psyco
psyco.bind(bdecode)
psyco.bind(bencode)
except ImportError:
pass

View file

@ -8,11 +8,7 @@ from hashlib import sha1 as sha
from copy import copy
import re
from six import PY2
if PY2:
from .bencode import bencode
else:
from .bencode3 import bencode
from .bencode3 import bencode
from threading import Event
from time import time
from traceback import print_exc

View file

@ -2,7 +2,7 @@
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import print_function
import re
from six.moves.urllib.parse import quote
from urllib.parse import quote
from ox import find_re, strip_tags, decode_html
from ox.cache import read_url

View file

@ -2,7 +2,6 @@ from __future__ import print_function
import json
import re
from six import text_type
from ox.cache import read_url
HEADERS = {
@ -17,9 +16,9 @@ USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7) '
USER_AGENT += 'AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 Safari/534.48.3'
def get_movie_data(title, director):
if isinstance(title, text_type):
if isinstance(title, str):
title = title.encode('utf-8')
if isinstance(director, text_type):
if isinstance(director, str):
director = director.encode('utf-8')
data = {}
# itunes section (preferred source for link)

View file

@ -3,8 +3,6 @@
from .. import cache
from ..utils import json
from six import string_types
def get_id(url):
return url.split("/")[-1]
@ -21,7 +19,7 @@ def get_data(id):
data[key] = details['metadata'][key]
if isinstance(data[key], list):
data[key] = data[key][0]
if isinstance(data[key], string_types):
if isinstance(data[key], str):
data[key] = data[key].strip()
if data[key][0] == '[' and data[key][-1] == ']':
data[key] = data[key][1:-1]

View file

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import re
from six.moves.urllib.parse import unquote
from urllib.parse import unquote
from ox.cache import read_url

View file

@ -2,7 +2,7 @@
# vi:si:et:sw=4:sts=4:ts=4
import re
from six.moves import urllib
import urllib
import ox
from ox import strip_tags, decode_html
from ox.cache import read_url

View file

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import re
from six.moves import urllib
import urllib
import ox
from ox import strip_tags, decode_html

View file

@ -7,8 +7,7 @@ import re
import time
import unicodedata
from six.moves.urllib.parse import urlencode
from six import string_types
from urllib.parse import urlencode
from .. import find_re, strip_tags, decode_html
from .. import cache
@ -449,7 +448,7 @@ class Imdb(SiteParser):
if 'alternativeTitles' in self:
if len(self['alternativeTitles']) == 2 and \
isinstance(self['alternativeTitles'][0], string_types):
isinstance(self['alternativeTitles'][0], str):
self['alternativeTitles'] = [self['alternativeTitles']]
for key in ('country', 'genre', 'language', 'sound', 'color'):
@ -514,7 +513,7 @@ class Imdb(SiteParser):
self['sound'] = list(sorted(set(self['sound'])))
if 'cast' in self:
if isinstance(self['cast'][0], string_types):
if isinstance(self['cast'][0], str):
self['cast'] = [self['cast']]
self['actor'] = [c[0] for c in self['cast']]
def cleanup_character(c):

View file

@ -2,7 +2,7 @@
# encoding: utf-8
from __future__ import print_function
import re
from six.moves.urllib.parse import urlencode
from urllib.parse import urlencode
from ox.cache import read_url
from ox.html import decode_html, strip_tags

View file

@ -2,7 +2,7 @@
# vi:si:et:sw=4:sts=4:ts=4
import re
from six.moves.urllib.parse import quote
from urllib.parse import quote
from lxml.html import document_fromstring
from ox.cache import read_url

View file

@ -4,8 +4,6 @@ import re
import json
from multiprocessing.pool import ThreadPool
from six import string_types
from ..cache import read_url
from .. import decode_html
from ..utils import datetime
@ -13,15 +11,15 @@ from ..utils import datetime
def cleanup(key, data, data_type):
if data:
if isinstance(data[0], string_types):
if isinstance(data[0], str):
#FIXME: some types need strip_tags
#data = [strip_tags(decode_html(p)).strip() for p in data]
data = [decode_html(p).strip() for p in data]
elif isinstance(data[0], list) or isinstance(data[0], tuple):
data = [cleanup(key, p, data_type) for p in data]
while len(data) == 1 and not isinstance(data, string_types):
while len(data) == 1 and not isinstance(data, str):
data = data[0]
if data_type == 'list' and isinstance(data, string_types):
if data_type == 'list' and isinstance(data, str):
data = [data, ]
elif data_type != 'list':
data = ''
@ -49,7 +47,7 @@ class SiteParser(dict):
for key in self.regex:
url = self.get_url(self.regex[key]['page'])
data = self.read_url(url, timeout)
if isinstance(self.regex[key]['re'], string_types):
if isinstance(self.regex[key]['re'], str):
data = re.compile(self.regex[key]['re'], re.DOTALL).findall(data)
data = cleanup(key, data, self.regex[key]['type'])
elif callable(self.regex[key]['re']):
@ -60,7 +58,7 @@ class SiteParser(dict):
f = r
else:
f = re.compile(r, re.DOTALL).findall
if isinstance(data, string_types):
if isinstance(data, str):
data = f(data)
else:
data = [f(d) for d in data]

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from six.moves import urllib
import urllib
import lxml.html
import ox

View file

@ -3,7 +3,7 @@
from datetime import datetime
import re
from six.moves.urllib.parse import quote
from urllib.parse import quote
from ox import find_re, cache, strip_tags, decode_html, get_torrent_info, normalize_newlines
from ox.normalize import normalize_imdbid

View file

@ -2,7 +2,7 @@
# vi:si:et:sw=4:sts=4:ts=4
import re
from datetime import datetime
from six.moves.urllib.parse import quote
from urllib.parse import quote
import lxml.html
import ox

View file

@ -4,8 +4,7 @@ from __future__ import print_function
import re
from six.moves import urllib
from six import string_types
import urllib
from ox.utils import json
from ox.cache import read_url
@ -69,7 +68,7 @@ def get_movie_data(wikipedia_url):
value = value.split('<br>')
if value:
if key in filmbox:
if isinstance(value, list) and isinstance(filmbox[key], string_types):
if isinstance(value, list) and isinstance(filmbox[key], str):
filmbox[key] = [filmbox[key]] + value
else:
filmbox[key] += value

View file

@ -1,8 +1,8 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from six.moves.urllib.parse import quote, unquote_plus
from six.moves import urllib
from six.moves import http_cookiejar as cookielib
from urllib.parse import quote, unquote_plus
import urllib
from http import cookiejar as cookielib
import re
from xml.dom.minidom import parseString
import json

View file

@ -1,4 +1,3 @@
chardet
six>=1.5.2
lxml
requests

View file

@ -50,7 +50,7 @@ setup(
url="https://code.0x2620.org/0x2620/python-ox",
license="GPLv3",
packages=['ox', 'ox.torrent', 'ox.web'],
install_requires=['six>=1.5.2', 'chardet'],
install_requires=['chardet'],
keywords=[
],
classifiers=[