From adad3be419c89b43856bd1b1be53ddb0214c9178 Mon Sep 17 00:00:00 2001 From: j Date: Thu, 27 Jul 2023 13:07:13 +0200 Subject: [PATCH 1/7] drop six and python2 support --- ox/api.py | 18 +-- ox/cache.py | 7 +- ox/fixunicode.py | 5 +- ox/form.py | 4 - ox/format.py | 6 +- ox/html.py | 10 +- ox/js.py | 7 - ox/net.py | 14 +- ox/normalize.py | 4 +- ox/srt.py | 6 +- ox/text.py | 6 +- ox/torrent/__init__.py | 6 +- ox/torrent/bencode.py | 321 ------------------------------------- ox/torrent/makemetafile.py | 6 +- ox/web/amazon.py | 2 +- ox/web/apple.py | 5 +- ox/web/archive.py | 4 +- ox/web/dailymotion.py | 2 +- ox/web/duckduckgo.py | 2 +- ox/web/google.py | 2 +- ox/web/imdb.py | 7 +- ox/web/itunes.py | 2 +- ox/web/metacritic.py | 2 +- ox/web/siteparser.py | 12 +- ox/web/startpage.py | 2 +- ox/web/thepiratebay.py | 2 +- ox/web/twitter.py | 2 +- ox/web/wikipedia.py | 5 +- ox/web/youtube.py | 6 +- requirements.txt | 1 - setup.py | 2 +- 31 files changed, 54 insertions(+), 426 deletions(-) delete mode 100644 ox/torrent/bencode.py diff --git a/ox/api.py b/ox/api.py index 639fec0..88af34f 100644 --- a/ox/api.py +++ b/ox/api.py @@ -10,10 +10,10 @@ import shutil import sys import time -from six.moves import http_cookiejar as cookielib -from six import BytesIO, PY2 -from six.moves import urllib -from six.moves.urllib.parse import urlparse +from http import cookiejar as cookielib +from io import BytesIO +import urllib +from urllib.parse import urlparse import requests from . import __version__ @@ -56,10 +56,7 @@ class API(object): def _add_method(self, method, name): if name is None: name = method.func_name - if PY2: - setattr(self, name, MethodType(method, self, type(self))) - else: - setattr(self, name, MethodType(method, self)) + setattr(self, name, MethodType(method, self)) def _add_action(self, action): def method(self, *args, **kw): @@ -73,10 +70,7 @@ class API(object): return self._request(action, kw) if 'doc' in self._properties[action]: method.__doc__ = self._properties[action]['doc'] - if PY2: - method.func_name = str(action) - else: - method.func_name = action + method.func_name = action self._add_method(method, action) def _json_request(self, url, data, files=None): diff --git a/ox/cache.py b/ox/cache.py index ba41574..097825c 100644 --- a/ox/cache.py +++ b/ox/cache.py @@ -10,9 +10,8 @@ import sqlite3 import time import zlib -from six import BytesIO -from six.moves import urllib -from six import PY2 +from io import BytesIO +import urllib try: import requests USE_REQUESTS = True @@ -242,8 +241,6 @@ class SQLiteCache(Cache): elif value == 'data': if row[1] == 1: r = zlib.decompress(r) - elif PY2: - r = str(r) break c.close() diff --git a/ox/fixunicode.py b/ox/fixunicode.py index d3a162d..e0386c6 100644 --- a/ox/fixunicode.py +++ b/ox/fixunicode.py @@ -6,7 +6,6 @@ from __future__ import print_function import unicodedata -from six import unichr, text_type __all__ = ['fix_bad_unicode'] @@ -151,7 +150,7 @@ def text_badness(text): - Improbable single-byte characters, such as ƒ or ¬ - Letters in somewhat rare scripts ''' - assert isinstance(text, text_type) + assert isinstance(text, str) errors = 0 very_weird_things = 0 weird_things = 0 @@ -289,7 +288,7 @@ SINGLE_BYTE_WEIRDNESS = ( # Pre-cache the Unicode data saying which of these first 256 characters are # letters. We'll need it often. SINGLE_BYTE_LETTERS = [ - unicodedata.category(unichr(i)).startswith('L') + unicodedata.category(chr(i)).startswith('L') for i in range(256) ] diff --git a/ox/form.py b/ox/form.py index faa1551..1a182c2 100644 --- a/ox/form.py +++ b/ox/form.py @@ -9,8 +9,6 @@ import os import hashlib import sys -from six import PY2 - __all__ = ['MultiPartForm'] @@ -63,8 +61,6 @@ class MultiPartForm(object): def __str__(self): body = self.body() - if not PY2: - body = body.decode('utf-8') return body def body(self): diff --git a/ox/format.py b/ox/format.py index ad18c31..f11a50c 100644 --- a/ox/format.py +++ b/ox/format.py @@ -4,8 +4,6 @@ import math import re import string -from six import text_type - def toAZ(num): """ Converts an integer to bijective base 26 string using A-Z @@ -238,7 +236,7 @@ def int_value(strValue, default=u''): u'' """ try: - val = re.compile('(\d+)').findall(text_type(strValue).strip())[0] + val = re.compile('(\d+)').findall(str(strValue).strip())[0] except: val = default return val @@ -255,7 +253,7 @@ def float_value(strValue, default=u''): u'' """ try: - val = re.compile('([\d.]+)').findall(text_type(strValue).strip())[0] + val = re.compile('([\d.]+)').findall(str(strValue).strip())[0] except: val = default return val diff --git a/ox/html.py b/ox/html.py index 73234ea..5fcc6c1 100644 --- a/ox/html.py +++ b/ox/html.py @@ -3,8 +3,7 @@ # GPL 2008 import re import string -from six.moves.html_entities import name2codepoint -from six import unichr, PY2, string_types +from html.entities import name2codepoint letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' @@ -26,8 +25,7 @@ link_target_attribute_re = re.compile(r'(]*?)target=[^\s>]+') html_gunk_re = re.compile(r'(?:
|<\/i>|<\/b>|<\/em>|<\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE) hard_coded_bullets_re = re.compile(r'((?:

(?:%s).*?[a-zA-Z].*?

\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL) trailing_empty_content_re = re.compile(r'(?:

(?: |\s|
)*?

\s*)+\Z') -if PY2: - del x # Temporary variable + def escape(html): ''' @@ -36,7 +34,7 @@ def escape(html): >>> escape('html "test" & ') 'html "test" & <brothers>' ''' - if not isinstance(html, string_types): + if not isinstance(html, str): html = str(html) return html.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"').replace("'", ''') @@ -155,7 +153,7 @@ def decode_html(html): """ if isinstance(html, bytes): html = html.decode('utf-8') - uchr = unichr + uchr = chr def entitydecode(match, uchr=uchr): entity = match.group(1) diff --git a/ox/js.py b/ox/js.py index 2f419bd..9e9f1cb 100644 --- a/ox/js.py +++ b/ox/js.py @@ -2,19 +2,12 @@ # -*- coding: utf-8 -*- # vi:si:et:sw=4:sts=4:ts=4 -from six import PY2 from .utils import json def minify(source, comment=''): # see https://github.com/douglascrockford/JSMin/blob/master/README def get_next_non_whitespace_token(): pass - # python2 performance with unicode string is terrible - if PY2: - if isinstance(source, unicode): # pylint: disable=undefined-variable - source = source.encode('utf-8') - if isinstance(comment, unicode): # pylint: disable=undefined-variable - comment = comment.encode('utf-8') tokens = tokenize(source) length = len(tokens) minified = '/*' + comment + '*/' if comment else '' diff --git a/ox/net.py b/ox/net.py index 3a07d91..17a56d4 100644 --- a/ox/net.py +++ b/ox/net.py @@ -13,8 +13,8 @@ try: USE_REQUESTS = True except: USE_REQUESTS = False -from six import BytesIO, PY2 -from six.moves import urllib +from io import BytesIO +import urllib from chardet.universaldetector import UniversalDetector @@ -59,14 +59,10 @@ def get_json(url, data=None, headers=None): def open_url(url, data=None, headers=None): if headers is None: headers = DEFAULT_HEADERS.copy() - if PY2: - if not isinstance(url, bytes): - url = url.encode('utf-8') - else: - if isinstance(url, bytes): - url = url.decode('utf-8') + if isinstance(url, bytes): + url = url.decode('utf-8') url = url.replace(' ', '%20') - if data and not PY2 and not isinstance(data, bytes): + if data and not isinstance(data, bytes): data = data.encode('utf-8') req = urllib.request.Request(url, data, headers) return urllib.request.urlopen(req) diff --git a/ox/normalize.py b/ox/normalize.py index dea40ae..4ee9293 100644 --- a/ox/normalize.py +++ b/ox/normalize.py @@ -4,8 +4,6 @@ import re import unicodedata -from six import string_types - _articles = ('the', 'la', 'a', 'die', 'der', 'le', 'el', "l'", 'il', 'das', 'les', 'o', 'ein', 'i', 'un', 'los', 'de', @@ -103,7 +101,7 @@ def normalize_imdbid(imdbId): >>> normalize_imdbid('tt0159206') '0159206' """ - if isinstance(imdbId, string_types): + if isinstance(imdbId, str): imdbId = re.sub('.*(\d{7}).*', '\\1', imdbId) elif isinstance(imdbId, int): imdbId = "%07d" % imdbId diff --git a/ox/srt.py b/ox/srt.py index c29ae8b..464c08e 100644 --- a/ox/srt.py +++ b/ox/srt.py @@ -5,7 +5,6 @@ import codecs import re import chardet -from six import PY2 import ox @@ -24,10 +23,7 @@ def _detect_encoding(fp): # go to beginning of file and get the first 4 bytes oldFP = fp.tell() fp.seek(0) - if PY2: - (byte1, byte2, byte3, byte4) = [ord(b) for b in fp.read(4)] - else: - (byte1, byte2, byte3, byte4) = fp.read(4) + (byte1, byte2, byte3, byte4) = fp.read(4) # try bom detection using 4 bytes, 3 bytes, or 2 bytes bomDetection = bomDict.get((byte1, byte2, byte3, byte4)) diff --git a/ox/text.py b/ox/text.py index 282afa2..a967092 100644 --- a/ox/text.py +++ b/ox/text.py @@ -1,11 +1,13 @@ # -*- coding: utf-8 -*- # vi:si:et:sw=4:sts=4:ts=4 # GPL 2008 +import gzip import math import re import unicodedata +from io import BytesIO -from six.moves import reduce +from functools import reduce ARTICLES = list(set([ # def sg, def pl, indef sg, indef pl (each m/f/n) @@ -646,8 +648,6 @@ def phone2numeric(phone): return letters.sub(char2number, phone) def compress_string(s): - import gzip - from six import BytesIO zbuf = BytesIO() zfile = gzip.GzipFile(mode='wb', compresslevel=6, fileobj=zbuf) zfile.write(s) diff --git a/ox/torrent/__init__.py b/ox/torrent/__init__.py index a250215..9c399fe 100644 --- a/ox/torrent/__init__.py +++ b/ox/torrent/__init__.py @@ -5,12 +5,8 @@ from threading import Event from hashlib import sha1 import os -from six import PY2 -if PY2: - from .bencode import bencode, bdecode -else: - from .bencode3 import bencode, bdecode +from .bencode3 import bencode, bdecode __all__ = ['create_torrent', 'get_info_hash', 'get_torrent_info', 'get_files', 'get_torrent_size'] diff --git a/ox/torrent/bencode.py b/ox/torrent/bencode.py deleted file mode 100644 index b586001..0000000 --- a/ox/torrent/bencode.py +++ /dev/null @@ -1,321 +0,0 @@ -# Written by Petru Paler, Uoti Urpala, Ross Cohen and John Hoffman -# see LICENSE.txt for license information -from __future__ import print_function - -from types import IntType, LongType, StringType, ListType, TupleType, DictType -try: - from types import BooleanType -except ImportError: - BooleanType = None -try: - from types import UnicodeType -except ImportError: - UnicodeType = None -from cStringIO import StringIO - -def decode_int(x, f): - f += 1 - newf = x.index('e', f) - try: - n = int(x[f:newf]) - except: - n = long(x[f:newf]) - if x[f] == '-': - if x[f + 1] == '0': - raise ValueError - elif x[f] == '0' and newf != f+1: - raise ValueError - return (n, newf+1) - -def decode_string(x, f): - colon = x.index(':', f) - try: - n = int(x[f:colon]) - except (OverflowError, ValueError): - n = long(x[f:colon]) - if x[f] == '0' and colon != f+1: - raise ValueError - colon += 1 - return (x[colon:colon+n], colon+n) - -def decode_unicode(x, f): - s, f = decode_string(x, f+1) - return (s.decode('UTF-8'),f) - -def decode_list(x, f): - r, f = [], f+1 - while x[f] != 'e': - v, f = decode_func[x[f]](x, f) - r.append(v) - return (r, f + 1) - -def decode_dict(x, f): - r, f = {}, f+1 - lastkey = None - while x[f] != 'e': - k, f = decode_string(x, f) - # why is this needed - # if lastkey >= k: - # raise ValueError - lastkey = k - r[k], f = decode_func[x[f]](x, f) - return (r, f + 1) - -decode_func = {} -decode_func['l'] = decode_list -decode_func['d'] = decode_dict -decode_func['i'] = decode_int -decode_func['0'] = decode_string -decode_func['1'] = decode_string -decode_func['2'] = decode_string -decode_func['3'] = decode_string -decode_func['4'] = decode_string -decode_func['5'] = decode_string -decode_func['6'] = decode_string -decode_func['7'] = decode_string -decode_func['8'] = decode_string -decode_func['9'] = decode_string -#decode_func['u'] = decode_unicode - -def bdecode(x, sloppy = 1): - try: - r, l = decode_func[x[0]](x, 0) -# except (IndexError, KeyError): - except (IndexError, KeyError, ValueError): - raise ValueError("bad bencoded data") - if not sloppy and l != len(x): - raise ValueError("bad bencoded data") - return r - -def test_bdecode(): - try: - bdecode('0:0:') - assert 0 - except ValueError: - pass - try: - bdecode('ie') - assert 0 - except ValueError: - pass - try: - bdecode('i341foo382e') - assert 0 - except ValueError: - pass - assert bdecode('i4e') == 4 - assert bdecode('i0e') == 0 - assert bdecode('i123456789e') == 123456789 - assert bdecode('i-10e') == -10 - try: - bdecode('i-0e') - assert 0 - except ValueError: - pass - try: - bdecode('i123') - assert 0 - except ValueError: - pass - try: - bdecode('') - assert 0 - except ValueError: - pass - try: - bdecode('i6easd') - assert 0 - except ValueError: - pass - try: - bdecode('35208734823ljdahflajhdf') - assert 0 - except ValueError: - pass - try: - bdecode('2:abfdjslhfld') - assert 0 - except ValueError: - pass - assert bdecode('0:') == '' - assert bdecode('3:abc') == 'abc' - assert bdecode('10:1234567890') == '1234567890' - try: - bdecode('02:xy') - assert 0 - except ValueError: - pass - try: - bdecode('l') - assert 0 - except ValueError: - pass - assert bdecode('le') == [] - try: - bdecode('leanfdldjfh') - assert 0 - except ValueError: - pass - assert bdecode('l0:0:0:e') == ['', '', ''] - try: - bdecode('relwjhrlewjh') - assert 0 - except ValueError: - pass - assert bdecode('li1ei2ei3ee') == [1, 2, 3] - assert bdecode('l3:asd2:xye') == ['asd', 'xy'] - assert bdecode('ll5:Alice3:Bobeli2ei3eee') == [['Alice', 'Bob'], [2, 3]] - try: - bdecode('d') - assert 0 - except ValueError: - pass - try: - bdecode('defoobar') - assert 0 - except ValueError: - pass - assert bdecode('de') == {} - assert bdecode('d3:agei25e4:eyes4:bluee') == {'age': 25, 'eyes': 'blue'} - assert bdecode('d8:spam.mp3d6:author5:Alice6:lengthi100000eee') == {'spam.mp3': {'author': 'Alice', 'length': 100000}} - try: - bdecode('d3:fooe') - assert 0 - except ValueError: - pass - try: - bdecode('di1e0:e') - assert 0 - except ValueError: - pass - try: - bdecode('d1:b0:1:a0:e') - assert 0 - except ValueError: - pass - try: - bdecode('d1:a0:1:a0:e') - assert 0 - except ValueError: - pass - try: - bdecode('i03e') - assert 0 - except ValueError: - pass - try: - bdecode('l01:ae') - assert 0 - except ValueError: - pass - try: - bdecode('9999:x') - assert 0 - except ValueError: - pass - try: - bdecode('l0:') - assert 0 - except ValueError: - pass - try: - bdecode('d0:0:') - assert 0 - except ValueError: - pass - try: - bdecode('d0:') - assert 0 - except ValueError: - pass - -bencached_marker = [] - -class Bencached: - def __init__(self, s): - self.marker = bencached_marker - self.bencoded = s - -BencachedType = type(Bencached('')) # insufficient, but good as a filter - -def encode_bencached(x,r): - assert x.marker == bencached_marker - r.append(x.bencoded) - -def encode_int(x,r): - r.extend(('i',str(x),'e')) - -def encode_bool(x,r): - encode_int(int(x),r) - -def encode_string(x,r): - r.extend((str(len(x)),':',x)) - -def encode_unicode(x,r): - #r.append('u') - encode_string(x.encode('UTF-8'),r) - -def encode_list(x,r): - r.append('l') - for e in x: - encode_func[type(e)](e, r) - r.append('e') - -def encode_dict(x,r): - r.append('d') - ilist = x.items() - ilist.sort() - for k,v in ilist: - r.extend((str(len(k)),':',k)) - encode_func[type(v)](v, r) - r.append('e') - -encode_func = {} -encode_func[BencachedType] = encode_bencached -encode_func[IntType] = encode_int -encode_func[LongType] = encode_int -encode_func[StringType] = encode_string -encode_func[ListType] = encode_list -encode_func[TupleType] = encode_list -encode_func[DictType] = encode_dict -if BooleanType: - encode_func[BooleanType] = encode_bool -if UnicodeType: - encode_func[UnicodeType] = encode_unicode - -def bencode(x): - r = [] - try: - encode_func[type(x)](x, r) - except: - print("*** error *** could not encode type %s (value: %s)" % (type(x), x)) - assert 0 - return ''.join(r) - -def test_bencode(): - assert bencode(4) == 'i4e' - assert bencode(0) == 'i0e' - assert bencode(-10) == 'i-10e' - assert bencode(12345678901234567890) == 'i12345678901234567890e' - assert bencode('') == '0:' - assert bencode('abc') == '3:abc' - assert bencode('1234567890') == '10:1234567890' - assert bencode([]) == 'le' - assert bencode([1, 2, 3]) == 'li1ei2ei3ee' - assert bencode([['Alice', 'Bob'], [2, 3]]) == 'll5:Alice3:Bobeli2ei3eee' - assert bencode({}) == 'de' - assert bencode({'age': 25, 'eyes': 'blue'}) == 'd3:agei25e4:eyes4:bluee' - assert bencode({'spam.mp3': {'author': 'Alice', 'length': 100000}}) == 'd8:spam.mp3d6:author5:Alice6:lengthi100000eee' - try: - bencode({1: 'foo'}) - assert 0 - except AssertionError: - pass - - -try: - import psyco - psyco.bind(bdecode) - psyco.bind(bencode) -except ImportError: - pass diff --git a/ox/torrent/makemetafile.py b/ox/torrent/makemetafile.py index 31d6ebe..c2db27a 100644 --- a/ox/torrent/makemetafile.py +++ b/ox/torrent/makemetafile.py @@ -8,11 +8,7 @@ from hashlib import sha1 as sha from copy import copy import re -from six import PY2 -if PY2: - from .bencode import bencode -else: - from .bencode3 import bencode +from .bencode3 import bencode from threading import Event from time import time from traceback import print_exc diff --git a/ox/web/amazon.py b/ox/web/amazon.py index 19a72c7..d721d5c 100644 --- a/ox/web/amazon.py +++ b/ox/web/amazon.py @@ -2,7 +2,7 @@ # vi:si:et:sw=4:sts=4:ts=4 from __future__ import print_function import re -from six.moves.urllib.parse import quote +from urllib.parse import quote from ox import find_re, strip_tags, decode_html from ox.cache import read_url diff --git a/ox/web/apple.py b/ox/web/apple.py index 099d6cb..84abba0 100644 --- a/ox/web/apple.py +++ b/ox/web/apple.py @@ -2,7 +2,6 @@ from __future__ import print_function import json import re -from six import text_type from ox.cache import read_url HEADERS = { @@ -17,9 +16,9 @@ USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7) ' USER_AGENT += 'AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 Safari/534.48.3' def get_movie_data(title, director): - if isinstance(title, text_type): + if isinstance(title, str): title = title.encode('utf-8') - if isinstance(director, text_type): + if isinstance(director, str): director = director.encode('utf-8') data = {} # itunes section (preferred source for link) diff --git a/ox/web/archive.py b/ox/web/archive.py index 0c733c3..3e7ab47 100644 --- a/ox/web/archive.py +++ b/ox/web/archive.py @@ -3,8 +3,6 @@ from .. import cache from ..utils import json -from six import string_types - def get_id(url): return url.split("/")[-1] @@ -21,7 +19,7 @@ def get_data(id): data[key] = details['metadata'][key] if isinstance(data[key], list): data[key] = data[key][0] - if isinstance(data[key], string_types): + if isinstance(data[key], str): data[key] = data[key].strip() if data[key][0] == '[' and data[key][-1] == ']': data[key] = data[key][1:-1] diff --git a/ox/web/dailymotion.py b/ox/web/dailymotion.py index 0ec8d86..851b728 100644 --- a/ox/web/dailymotion.py +++ b/ox/web/dailymotion.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # vi:si:et:sw=4:sts=4:ts=4 import re -from six.moves.urllib.parse import unquote +from urllib.parse import unquote from ox.cache import read_url diff --git a/ox/web/duckduckgo.py b/ox/web/duckduckgo.py index b4b3494..35c0602 100644 --- a/ox/web/duckduckgo.py +++ b/ox/web/duckduckgo.py @@ -2,7 +2,7 @@ # vi:si:et:sw=4:sts=4:ts=4 import re -from six.moves import urllib +import urllib import ox from ox import strip_tags, decode_html from ox.cache import read_url diff --git a/ox/web/google.py b/ox/web/google.py index 0842d01..01bb7ce 100644 --- a/ox/web/google.py +++ b/ox/web/google.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # vi:si:et:sw=4:sts=4:ts=4 import re -from six.moves import urllib +import urllib import ox from ox import strip_tags, decode_html diff --git a/ox/web/imdb.py b/ox/web/imdb.py index 96454f7..4b08cab 100644 --- a/ox/web/imdb.py +++ b/ox/web/imdb.py @@ -7,8 +7,7 @@ import re import time import unicodedata -from six.moves.urllib.parse import urlencode -from six import string_types +from urllib.parse import urlencode from .. import find_re, strip_tags, decode_html from .. import cache @@ -449,7 +448,7 @@ class Imdb(SiteParser): if 'alternativeTitles' in self: if len(self['alternativeTitles']) == 2 and \ - isinstance(self['alternativeTitles'][0], string_types): + isinstance(self['alternativeTitles'][0], str): self['alternativeTitles'] = [self['alternativeTitles']] for key in ('country', 'genre', 'language', 'sound', 'color'): @@ -514,7 +513,7 @@ class Imdb(SiteParser): self['sound'] = list(sorted(set(self['sound']))) if 'cast' in self: - if isinstance(self['cast'][0], string_types): + if isinstance(self['cast'][0], str): self['cast'] = [self['cast']] self['actor'] = [c[0] for c in self['cast']] def cleanup_character(c): diff --git a/ox/web/itunes.py b/ox/web/itunes.py index f599099..bb85952 100644 --- a/ox/web/itunes.py +++ b/ox/web/itunes.py @@ -2,7 +2,7 @@ # encoding: utf-8 from __future__ import print_function import re -from six.moves.urllib.parse import urlencode +from urllib.parse import urlencode from ox.cache import read_url from ox.html import decode_html, strip_tags diff --git a/ox/web/metacritic.py b/ox/web/metacritic.py index 2ecded5..8c59998 100644 --- a/ox/web/metacritic.py +++ b/ox/web/metacritic.py @@ -2,7 +2,7 @@ # vi:si:et:sw=4:sts=4:ts=4 import re -from six.moves.urllib.parse import quote +from urllib.parse import quote from lxml.html import document_fromstring from ox.cache import read_url diff --git a/ox/web/siteparser.py b/ox/web/siteparser.py index b8b78f8..b260be6 100644 --- a/ox/web/siteparser.py +++ b/ox/web/siteparser.py @@ -4,8 +4,6 @@ import re import json from multiprocessing.pool import ThreadPool -from six import string_types - from ..cache import read_url from .. import decode_html from ..utils import datetime @@ -13,15 +11,15 @@ from ..utils import datetime def cleanup(key, data, data_type): if data: - if isinstance(data[0], string_types): + if isinstance(data[0], str): #FIXME: some types need strip_tags #data = [strip_tags(decode_html(p)).strip() for p in data] data = [decode_html(p).strip() for p in data] elif isinstance(data[0], list) or isinstance(data[0], tuple): data = [cleanup(key, p, data_type) for p in data] - while len(data) == 1 and not isinstance(data, string_types): + while len(data) == 1 and not isinstance(data, str): data = data[0] - if data_type == 'list' and isinstance(data, string_types): + if data_type == 'list' and isinstance(data, str): data = [data, ] elif data_type != 'list': data = '' @@ -49,7 +47,7 @@ class SiteParser(dict): for key in self.regex: url = self.get_url(self.regex[key]['page']) data = self.read_url(url, timeout) - if isinstance(self.regex[key]['re'], string_types): + if isinstance(self.regex[key]['re'], str): data = re.compile(self.regex[key]['re'], re.DOTALL).findall(data) data = cleanup(key, data, self.regex[key]['type']) elif callable(self.regex[key]['re']): @@ -60,7 +58,7 @@ class SiteParser(dict): f = r else: f = re.compile(r, re.DOTALL).findall - if isinstance(data, string_types): + if isinstance(data, str): data = f(data) else: data = [f(d) for d in data] diff --git a/ox/web/startpage.py b/ox/web/startpage.py index ca18437..83e92f9 100644 --- a/ox/web/startpage.py +++ b/ox/web/startpage.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # vi:si:et:sw=4:sts=4:ts=4 -from six.moves import urllib +import urllib import lxml.html import ox diff --git a/ox/web/thepiratebay.py b/ox/web/thepiratebay.py index e9a6445..cbbdf56 100644 --- a/ox/web/thepiratebay.py +++ b/ox/web/thepiratebay.py @@ -3,7 +3,7 @@ from datetime import datetime import re -from six.moves.urllib.parse import quote +from urllib.parse import quote from ox import find_re, cache, strip_tags, decode_html, get_torrent_info, normalize_newlines from ox.normalize import normalize_imdbid diff --git a/ox/web/twitter.py b/ox/web/twitter.py index fa33bfc..619c458 100644 --- a/ox/web/twitter.py +++ b/ox/web/twitter.py @@ -2,7 +2,7 @@ # vi:si:et:sw=4:sts=4:ts=4 import re from datetime import datetime -from six.moves.urllib.parse import quote +from urllib.parse import quote import lxml.html import ox diff --git a/ox/web/wikipedia.py b/ox/web/wikipedia.py index de8b064..5d86655 100644 --- a/ox/web/wikipedia.py +++ b/ox/web/wikipedia.py @@ -4,8 +4,7 @@ from __future__ import print_function import re -from six.moves import urllib -from six import string_types +import urllib from ox.utils import json from ox.cache import read_url @@ -69,7 +68,7 @@ def get_movie_data(wikipedia_url): value = value.split('
') if value: if key in filmbox: - if isinstance(value, list) and isinstance(filmbox[key], string_types): + if isinstance(value, list) and isinstance(filmbox[key], str): filmbox[key] = [filmbox[key]] + value else: filmbox[key] += value diff --git a/ox/web/youtube.py b/ox/web/youtube.py index 805f716..0f59b80 100644 --- a/ox/web/youtube.py +++ b/ox/web/youtube.py @@ -1,8 +1,8 @@ # -*- coding: utf-8 -*- # vi:si:et:sw=4:sts=4:ts=4 -from six.moves.urllib.parse import quote, unquote_plus -from six.moves import urllib -from six.moves import http_cookiejar as cookielib +from urllib.parse import quote, unquote_plus +import urllib +from http import cookiejar as cookielib import re from xml.dom.minidom import parseString import json diff --git a/requirements.txt b/requirements.txt index 4e7d966..f30c448 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ chardet -six>=1.5.2 lxml requests diff --git a/setup.py b/setup.py index e5948ea..e76a325 100644 --- a/setup.py +++ b/setup.py @@ -50,7 +50,7 @@ setup( url="https://code.0x2620.org/0x2620/python-ox", license="GPLv3", packages=['ox', 'ox.torrent', 'ox.web'], - install_requires=['six>=1.5.2', 'chardet'], + install_requires=['chardet'], keywords=[ ], classifiers=[ From bf347745337509ac38f67bdd07f5851cad209136 Mon Sep 17 00:00:00 2001 From: j Date: Thu, 27 Jul 2023 18:07:49 +0200 Subject: [PATCH 2/7] requests is always required now --- ox/cache.py | 50 ++++++++++++++++---------------------------------- ox/net.py | 22 +++++++--------------- 2 files changed, 23 insertions(+), 49 deletions(-) diff --git a/ox/cache.py b/ox/cache.py index 097825c..b5ead82 100644 --- a/ox/cache.py +++ b/ox/cache.py @@ -12,12 +12,8 @@ import zlib from io import BytesIO import urllib -try: - import requests - USE_REQUESTS = True - requests_session = requests.Session() -except: - USE_REQUESTS = False +import requests + from .utils import json from .file import makedirs @@ -27,6 +23,7 @@ from .net import DEFAULT_HEADERS, detect_encoding cache_timeout = 30*24*60*60 # default is 30 days +requests_session = requests.Session() COMPRESS_TYPES = ( 'text/html', @@ -100,35 +97,20 @@ def read_url(url, data=None, headers=None, timeout=cache_timeout, valid=None, un result = store.get(url, data, headers, timeout) url_headers = {} if not result: - if USE_REQUESTS: - if headers is None: - headers = DEFAULT_HEADERS.copy() - if data: - r = requests_session.post(url, data=data, headers=headers) - else: - r = requests_session.get(url, headers=headers) - for key in r.headers: - url_headers[key.lower()] = r.headers[key] - result = r.content - url_headers['Status'] = "%s" % r.status_code - if not valid or valid(result, url_headers): - store.set(url, post_data=data, data=result, headers=url_headers) - else: - raise InvalidResult(result, url_headers) + if headers is None: + headers = DEFAULT_HEADERS.copy() + if data: + r = requests_session.post(url, data=data, headers=headers) else: - try: - url_headers, result = net.read_url(url, data, headers, return_headers=True) - except urllib.error.HTTPError as e: - e.headers['Status'] = "%s" % e.code - for key in e.headers: - url_headers[key.lower()] = e.headers[key] - result = e.read() - if url_headers.get('content-encoding', None) == 'gzip': - result = gzip.GzipFile(fileobj=BytesIO(result)).read() - if not valid or valid(result, url_headers): - store.set(url, post_data=data, data=result, headers=url_headers) - else: - raise InvalidResult(result, url_headers) + r = requests_session.get(url, headers=headers) + for key in r.headers: + url_headers[key.lower()] = r.headers[key] + result = r.content + url_headers['Status'] = "%s" % r.status_code + if not valid or valid(result, url_headers): + store.set(url, post_data=data, data=result, headers=url_headers) + else: + raise InvalidResult(result, url_headers) if unicode: ctype = url_headers.get('content-type', '').lower() if 'charset' in ctype: diff --git a/ox/net.py b/ox/net.py index 17a56d4..4d58bad 100644 --- a/ox/net.py +++ b/ox/net.py @@ -8,11 +8,8 @@ import os import re import struct -try: - import requests - USE_REQUESTS = True -except: - USE_REQUESTS = False +import requests + from io import BytesIO import urllib from chardet.universaldetector import UniversalDetector @@ -119,16 +116,11 @@ def save_url(url, filename, overwrite=False): if dirname and not os.path.exists(dirname): os.makedirs(dirname) headers = DEFAULT_HEADERS.copy() - if USE_REQUESTS: - r = requests.get(url, headers=headers, stream=True) - with open(filename, 'wb') as f: - for chunk in r.iter_content(chunk_size=1024): - if chunk: # filter out keep-alive new chunks - f.write(chunk) - else: - data = read_url(url) - with open(filename, 'wb') as f: - f.write(data) + r = requests.get(url, headers=headers, stream=True) + with open(filename, 'wb') as f: + for chunk in r.iter_content(chunk_size=1024): + if chunk: # filter out keep-alive new chunks + f.write(chunk) def _get_size(url): req = urllib.request.Request(url, headers=DEFAULT_HEADERS.copy()) From a0d5c793ebc2537458ecbd226b5b6d8b06b78267 Mon Sep 17 00:00:00 2001 From: j Date: Thu, 27 Jul 2023 18:21:21 +0200 Subject: [PATCH 3/7] use CaseInsensitiveDict --- ox/cache.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ox/cache.py b/ox/cache.py index b5ead82..3954ea7 100644 --- a/ox/cache.py +++ b/ox/cache.py @@ -13,6 +13,7 @@ import zlib from io import BytesIO import urllib import requests +from requests.structures import CaseInsensitiveDict from .utils import json @@ -65,7 +66,7 @@ def get_headers(url, data=None, headers=None, timeout=cache_timeout): if not url_headers: url_headers = net.get_headers(url, data, headers) store.set(url, data, -1, url_headers) - return url_headers + return CaseInsensitiveDict(url_headers) def get_json(url, data=None, headers=None, timeout=cache_timeout): return json.loads(read_url(url, data, headers, timeout).decode('utf-8')) @@ -261,6 +262,8 @@ class SQLiteCache(Cache): data = zlib.compress(data) else: compressed = 0 + if isinstance(data, str): + data = data.encode("utf-8") data = sqlite3.Binary(data) #fixme: this looks wrong From 6b4a307e2308edcd97584bb0fec19eb371032824 Mon Sep 17 00:00:00 2001 From: j Date: Thu, 27 Jul 2023 18:35:06 +0200 Subject: [PATCH 4/7] python3 does not need L --- ox/format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ox/format.py b/ox/format.py index f11a50c..9090cf0 100644 --- a/ox/format.py +++ b/ox/format.py @@ -106,7 +106,7 @@ def to32(q): >>> to32(555306645) 'GHJKMN' - >>> to32(800197332334559L) + >>> to32(800197332334559) 'PQRSTVWXYZ' >>> to32(32) From d03a6b120d63b49eae2af04908645b07fadc910a Mon Sep 17 00:00:00 2001 From: j Date: Thu, 27 Jul 2023 18:35:33 +0200 Subject: [PATCH 5/7] fix sanitize_fragment('\ufeff') --- ox/html.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ox/html.py b/ox/html.py index 5fcc6c1..5286116 100644 --- a/ox/html.py +++ b/ox/html.py @@ -440,7 +440,12 @@ def sanitize_fragment(html): if not html.strip(): return html import lxml.html - body = lxml.html.document_fromstring(html).find('body') + try: + body = lxml.html.document_fromstring(html).find('body') + except lxml.etree.ParserError as e: + if e.args and e.args[0] == 'Document is empty': + return html + raise e html = lxml.html.tostring(body, encoding='utf-8')[6:-7].decode('utf-8') if html.startswith('

') and html.endswith('

'): html = html[3:-4] From 99e221095ba8d5bcdc3f6310d39a8f5417a77911 Mon Sep 17 00:00:00 2001 From: j Date: Thu, 27 Jul 2023 18:12:13 +0200 Subject: [PATCH 6/7] get rid of u string literal --- ox/format.py | 16 +++---- ox/html.py | 94 ++++++++++++++++++++--------------------- ox/text.py | 54 +++++++++++------------ ox/web/allmovie.py | 8 ++-- ox/web/arsenalberlin.py | 12 +++--- ox/web/criterion.py | 8 ++-- ox/web/flixter.py | 4 +- 7 files changed, 98 insertions(+), 98 deletions(-) diff --git a/ox/format.py b/ox/format.py index 9090cf0..83756c1 100644 --- a/ox/format.py +++ b/ox/format.py @@ -224,16 +224,16 @@ def to36(q): def from36(q): return int(q, 36) -def int_value(strValue, default=u''): +def int_value(strValue, default=''): """ >>> int_value('abc23') - u'23' + '23' >>> int_value(' abc23') - u'23' + '23' >>> int_value('ab') - u'' + '' """ try: val = re.compile('(\d+)').findall(str(strValue).strip())[0] @@ -241,16 +241,16 @@ def int_value(strValue, default=u''): val = default return val -def float_value(strValue, default=u''): +def float_value(strValue, default=''): """ >>> float_value('abc23.4') - u'23.4' + '23.4' >>> float_value(' abc23.4') - u'23.4' + '23.4' >>> float_value('ab') - u'' + '' """ try: val = re.compile('([\d.]+)').findall(str(strValue).strip())[0] diff --git a/ox/html.py b/ox/html.py index 5286116..8666713 100644 --- a/ox/html.py +++ b/ox/html.py @@ -145,11 +145,11 @@ charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?') def decode_html(html): """ >>> decode_html('me & you and $&%') - u'me & you and $&%' + 'me & you and $&%' >>> decode_html('€') - u'\u20ac' + '\u20ac' >>> decode_html('Anniversary of Daoud's Republic') - u"Anniversary of Daoud's Republic" + "Anniversary of Daoud's Republic" """ if isinstance(html, bytes): html = html.decode('utf-8') @@ -158,7 +158,7 @@ def decode_html(html): def entitydecode(match, uchr=uchr): entity = match.group(1) if entity == '#x80': - return u'€' + return '€' elif entity.startswith('#x'): return uchr(int(entity[2:], 16)) elif entity.startswith('#'): @@ -169,7 +169,7 @@ def decode_html(html): return "'" else: return match.group(0) - return charrefpat.sub(entitydecode, html).replace(u'\xa0', ' ') + return charrefpat.sub(entitydecode, html).replace('\xa0', ' ') def highlight(text, query, hlClass="hl"): """ @@ -187,51 +187,51 @@ def highlight(text, query, hlClass="hl"): def escape_html(value): ''' - >>> escape_html(u'') - u'<script>alert()</script>' + '<script>alert()</script>' >>> sanitize_html("'foo' < 'bar' && \\"foo\\" > \\"bar\\"") - u'\\'foo\\' < \\'bar\\' && "foo" > "bar"' + '\\'foo\\' < \\'bar\\' && "foo" > "bar"' >>> sanitize_html('foo') - u'foo' + 'foo' >>> sanitize_html('foo') - u'foo' + 'foo' >>> sanitize_html('Anniversary of Daoud's Republic') - u"Anniversary of Daoud's Republic" + "Anniversary of Daoud's Republic" >>> sanitize_html('') - u'' + '' >>> sanitize_html(' ') - u' ' - >>> sanitize_html(u' ') # canonicalised to a space: okay, I suppose - u' ' - >>> sanitize_html(u'\u00a0') # also nbsp - u' ' + ' ' + >>> sanitize_html(' ') # canonicalised to a space: okay, I suppose + ' ' + >>> sanitize_html('\u00a0') # also nbsp + ' ' ''' if not tags: valid_url = '^((https?:\/\/|\/|mailto:).*?)' @@ -412,24 +412,24 @@ def sanitize_fragment(html): are quoted, etc. Does not strip potentially-malicious HTML: use sanitize_html() for that. - >>> sanitize_fragment(u'') - u'' - >>> sanitize_fragment(u'') - u'' - >>> sanitize_fragment(u'


') - u'

' - >>> sanitize_fragment(u'
foo') - u'foo' - >>> sanitize_fragment(u'') - u'' - >>> sanitize_fragment(u' ') - u' ' - >>> sanitize_fragment(u' ') - u'\\xa0' - >>> sanitize_fragment(u'\\u00a0') # nbsp - u'\\xa0' - >>> sanitize_fragment(u'\\ufeff') # zero-width no-break space - u'\\ufeff' + >>> sanitize_fragment('') + '' + >>> sanitize_fragment('') + '' + >>> sanitize_fragment('


') + '

' + >>> sanitize_fragment('foo') + 'foo' + >>> sanitize_fragment('') + '' + >>> sanitize_fragment(' ') + ' ' + >>> sanitize_fragment(' ') + '\\xa0' + >>> sanitize_fragment('\\u00a0') # nbsp + '\\xa0' + >>> sanitize_fragment('\\ufeff') # zero-width no-break space + '\\ufeff' ''' ''' diff --git a/ox/text.py b/ox/text.py index a967092..d650262 100644 --- a/ox/text.py +++ b/ox/text.py @@ -475,10 +475,10 @@ def wrap(text, width): def wrap_string(string, length=80, separator='\n', balance=False): ''' - >>> wrap_string(u"Anticonstitutionellement, Paris s'eveille", 16) - u"Anticonstitution\\nellement, Paris \\ns'eveille" + >>> wrap_string("Anticonstitutionellement, Paris s'eveille", 16) + "Anticonstitution\\nellement, Paris \\ns'eveille" >>> wrap_string(u'All you can eat', 12, '\\n', True) - u'All you \\ncan eat' + 'All you \\ncan eat' ''' words = string.split(' ') if balance: @@ -493,20 +493,20 @@ def wrap_string(string, length=80, separator='\n', balance=False): break lines = [''] for word in words: - if len(lines[len(lines) - 1] + word + u' ') <= length + 1: + if len(lines[len(lines) - 1] + word + ' ') <= length + 1: # word fits in current line - lines[len(lines) - 1] += word + u' ' + lines[len(lines) - 1] += word + ' ' else: if len(word) <= length: # word fits in next line - lines.append(word + u' ') + lines.append(word + ' ') else: # word is longer than line position = length - len(lines[len(lines) - 1]) lines[len(lines) - 1] += word[0:position] for i in range(position, len(word), length): lines.append(word[i:i+length]) - lines[len(lines) - 1] += u' ' + lines[len(lines) - 1] += ' ' return separator.join(lines).strip() def truncate_string(string, length, padding='...', position='right'): @@ -578,14 +578,14 @@ def get_valid_filename(s): def get_text_list(list_, last_word='or'): """ - >>> get_text_list([u'a', u'b', u'c', u'd']) - u'a, b, c or d' - >>> get_text_list([u'a', u'b', u'c'], 'and') - u'a, b and c' - >>> get_text_list([u'a', u'b'], 'and') - u'a and b' - >>> get_text_list([u'a']) - u'a' + >>> get_text_list(['a', 'b', 'c', 'd']) + 'a, b, c or d' + >>> get_text_list(['a', 'b', 'c'], 'and') + 'a, b and c' + >>> get_text_list(['a', 'b'], 'and') + 'a and b' + >>> get_text_list(['a']) + 'a' >>> get_text_list([]) '' """ @@ -593,24 +593,24 @@ def get_text_list(list_, last_word='or'): return '' if len(list_) == 1: return list_[0] - return u'%s %s %s' % (u', '.join([i for i in list_][:-1]), last_word, list_[-1]) + return '%s %s %s' % (', '.join([i for i in list_][:-1]), last_word, list_[-1]) def get_list_text(text, last_word='or'): """ - >>> get_list_text(u'a, b, c or d') - [u'a', u'b', u'c', u'd'] - >>> get_list_text(u'a, b and c', u'and') - [u'a', u'b', u'c'] - >>> get_list_text(u'a and b', u'and') - [u'a', u'b'] - >>> get_list_text(u'a') - [u'a'] - >>> get_list_text(u'') + >>> get_list_text('a, b, c or d') + ['a', 'b', 'c', 'd'] + >>> get_list_text('a, b and c', 'and') + ['a', 'b', 'c'] + >>> get_list_text('a and b', 'and') + ['a', 'b'] + >>> get_list_text('a') + ['a'] + >>> get_list_text('') [] """ list_ = [] if text: - list_ = text.split(u', ') + list_ = text.split(', ') if list_: i = len(list_)-1 last = list_[i].split(last_word) @@ -682,7 +682,7 @@ def words(text): return [re.sub("(([.!?:-_]|'s)$)", '', x) for x in text] def sort_string(string): - string = string.replace(u'Æ', 'AE').replace(u'Ø', 'O').replace(u'Þ', 'Th') + string = string.replace('Æ', 'AE').replace('Ø', 'O').replace('Þ', 'Th') # pad numbered titles string = re.sub('(\d),(\d{3})', '\\1\\2', string) diff --git a/ox/web/allmovie.py b/ox/web/allmovie.py index fdb7a46..c94c438 100644 --- a/ox/web/allmovie.py +++ b/ox/web/allmovie.py @@ -13,13 +13,13 @@ def get_id(url): def get_data(id): ''' >>> get_data('129689')['cast'][1][1] - u'Marianne' + 'Marianne' >>> get_data('129689')['credits'][0][0] - u'Jean-Luc Godard' + 'Jean-Luc Godard' >>> get_data('129689')['posters'][0] - u'http://image.allmusic.com/00/adg/cov200/dru800/u812/u81260bbffr.jpg' + 'http://image.allmusic.com/00/adg/cov200/dru800/u812/u81260bbffr.jpg' >>> get_data('129689')['rating'] - u'4.5' + '4.5' ''' if id.startswith('http'): id = get_id(id) diff --git a/ox/web/arsenalberlin.py b/ox/web/arsenalberlin.py index e5a0dd2..ca77b5e 100644 --- a/ox/web/arsenalberlin.py +++ b/ox/web/arsenalberlin.py @@ -19,18 +19,18 @@ def get_data(id, language='en'): if 'Willkommen in der Datenbank des Arsenal' in html: return None data = {} - data[u'id'] = id - data[u'url'] = url + data['id'] = id + data['url'] = url m = re.compile('

(.*?)

').findall(html) if m: - data[u'title'] = m[0] + data['title'] = m[0] m = re.compile("Director: (.*?)").findall(html) if m: - data[u'director'] = m[0] + data['director'] = m[0] m = re.compile("caUI.initImageScroller\(\[\{url:'(.*?)'").findall(html) if m: - data[u'image'] = m[0] + data['image'] = m[0] units = re.compile("
(.*?)
", re.DOTALL).findall(html) for x in map(re.compile('(.*?): (.*)', re.DOTALL).findall, units): @@ -43,7 +43,7 @@ def get_data(id, language='en'): else: data[key] = strip_tags(data[key]) if "running time (minutes)" in data: - data[u'runtime'] = float(data.pop("running time (minutes)").replace(',', '.')) * 60 + data['runtime'] = float(data.pop("running time (minutes)").replace(',', '.')) * 60 for key in ('year', 'length in metres', 'forum participation year', 'number of reels'): if key in data and data[key].isdigit(): data[key] = int(data[key]) diff --git a/ox/web/criterion.py b/ox/web/criterion.py index d7914be..67d4a8a 100644 --- a/ox/web/criterion.py +++ b/ox/web/criterion.py @@ -19,13 +19,13 @@ def get_url(id): def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False): ''' >>> get_data('1333').get('imdbId') - u'0060304' + '0060304' >>> get_data('236')['posters'][0] - u'http://s3.amazonaws.com/criterion-production/release_images/1586/ThirdManReplace.jpg' + 'http://s3.amazonaws.com/criterion-production/release_images/1586/ThirdManReplace.jpg' >>> get_data('786')['posters'][0] - u'http://s3.amazonaws.com/criterion-production/product_images/185/343_box_348x490.jpg' + 'http://s3.amazonaws.com/criterion-production/product_images/185/343_box_348x490.jpg' ''' data = { "id": id, @@ -39,7 +39,7 @@ def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False): data["number"] = find_re(html, "Spine #(\d+)") data["title"] = decode_html(find_re(html, "

(.*?)

")) - data["title"] = data["title"].split(u' \u2014 The Television Version')[0].strip() + data["title"] = data["title"].split(' \u2014 The Television Version')[0].strip() results = find_re(html, '
    (.*?)
') info = re.compile('
  • (.*?)
  • ', re.DOTALL).findall(results) info = {k: strip_tags(v).strip() for k, v in info} diff --git a/ox/web/flixter.py b/ox/web/flixter.py index e6d6a0a..d713208 100644 --- a/ox/web/flixter.py +++ b/ox/web/flixter.py @@ -58,10 +58,10 @@ def get_data(id, timeout=-1): def get_id(url=None, imdb=None): ''' >>> get_id(imdb='0133093') - u'the-matrix' + 'the-matrix' #>>> get_id(imdb='0060304') - #u'2-or-3-things-i-know-about-her' + #'2-or-3-things-i-know-about-her' ''' if imdb: i = ImdbCombined(imdb) From 2bac617dc98d6906751bdcd1a0d7964b5f8fe72a Mon Sep 17 00:00:00 2001 From: j Date: Thu, 27 Jul 2023 18:40:38 +0200 Subject: [PATCH 7/7] update to version 3 --- ox/__init__.py | 2 +- setup.py | 23 ++++++++++++----------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/ox/__init__.py b/ox/__init__.py index 98402fb..4620e8f 100644 --- a/ox/__init__.py +++ b/ox/__init__.py @@ -5,7 +5,7 @@ try: from . import __version __version__ = __version.VERSION except: - __version__ = '2.3.x' + __version__ = '3.0.x' from . import cache from . import js diff --git a/setup.py b/setup.py index e76a325..b1d1de5 100644 --- a/setup.py +++ b/setup.py @@ -7,9 +7,10 @@ try: except: from distutils.core import setup -def get_revision(): +def get_git_version(): import subprocess - return subprocess.check_output(['git', 'rev-list', 'HEAD', '--count']).decode().strip() + version = subprocess.check_output(['git', 'describe', '--tags']).decode().strip().replace('-', '.') + return '.'.join((version.split('.') + ['0'])[:3]) def get_version(): import os @@ -18,9 +19,8 @@ def get_version(): __version = os.path.join(os.path.dirname(__file__), 'ox/__version.py') changelog = os.path.join(os.path.dirname(__file__), 'debian/changelog') if os.path.exists(_git): - rev = get_revision() - if rev: - version = "2.3.%s" % rev + version = get_git_version() + if version: with open(__version, 'w') as fd: fd.write('VERSION="%s"' % version) return version @@ -37,8 +37,8 @@ def get_version(): f.close() rev = re.compile('\d+\.\d+\.(\d+)').findall(head) if rev: - return '2.3.%s' % rev[0] - return '2.3.x' + return '3.0.%s' % rev[0] + return '3.0.x' setup( @@ -56,11 +56,12 @@ setup( classifiers=[ 'Operating System :: OS Independent', 'Programming Language :: Python', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', 'Topic :: Software Development :: Libraries :: Python Modules', ], )