cleanup pylint errors and py2/3 issues
This commit is contained in:
parent
4e7898ae57
commit
77f8876fca
20 changed files with 232 additions and 197 deletions
|
@ -101,7 +101,7 @@ class API(object):
|
||||||
result = result.decode('utf-8')
|
result = result.decode('utf-8')
|
||||||
result = json.loads(result)
|
result = json.loads(result)
|
||||||
except:
|
except:
|
||||||
result = {'status':{}}
|
result = {'status': {}}
|
||||||
result['status']['code'] = e.code
|
result['status']['code'] = e.code
|
||||||
result['status']['text'] = str(e)
|
result['status']['text'] = str(e)
|
||||||
return result
|
return result
|
||||||
|
|
39
ox/file.py
39
ox/file.py
|
@ -131,25 +131,25 @@ def oshash(filename, cached=True):
|
||||||
if filesize < 65536:
|
if filesize < 65536:
|
||||||
for x in range(int(filesize/bytesize)):
|
for x in range(int(filesize/bytesize)):
|
||||||
buffer = f.read(bytesize)
|
buffer = f.read(bytesize)
|
||||||
(l_value,)= struct.unpack(longlongformat, buffer)
|
(l_value,) = struct.unpack(longlongformat, buffer)
|
||||||
hash += l_value
|
hash += l_value
|
||||||
hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number
|
hash = hash & 0xFFFFFFFFFFFFFFFF # to remain as 64bit number
|
||||||
else:
|
else:
|
||||||
for x in range(int(65536/bytesize)):
|
for x in range(int(65536/bytesize)):
|
||||||
buffer = f.read(bytesize)
|
buffer = f.read(bytesize)
|
||||||
(l_value,)= struct.unpack(longlongformat, buffer)
|
(l_value,) = struct.unpack(longlongformat, buffer)
|
||||||
hash += l_value
|
hash += l_value
|
||||||
hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number
|
hash = hash & 0xFFFFFFFFFFFFFFFF # to remain as 64bit number
|
||||||
f.seek(max(0,filesize-65536),0)
|
f.seek(max(0, filesize-65536), 0)
|
||||||
for x in range(int(65536/bytesize)):
|
for x in range(int(65536/bytesize)):
|
||||||
buffer = f.read(bytesize)
|
buffer = f.read(bytesize)
|
||||||
(l_value,)= struct.unpack(longlongformat, buffer)
|
(l_value,) = struct.unpack(longlongformat, buffer)
|
||||||
hash += l_value
|
hash += l_value
|
||||||
hash = hash & 0xFFFFFFFFFFFFFFFF
|
hash = hash & 0xFFFFFFFFFFFFFFFF
|
||||||
f.close()
|
f.close()
|
||||||
returnedhash = "%016x" % hash
|
returnedhash = "%016x" % hash
|
||||||
return returnedhash
|
return returnedhash
|
||||||
except(IOError):
|
except IOError:
|
||||||
return "IOError"
|
return "IOError"
|
||||||
|
|
||||||
def avinfo(filename, cached=True):
|
def avinfo(filename, cached=True):
|
||||||
|
@ -160,23 +160,25 @@ def avinfo(filename, cached=True):
|
||||||
return ffprobe(filename)
|
return ffprobe(filename)
|
||||||
ffmpeg2theora = cmd('ffmpeg2theora')
|
ffmpeg2theora = cmd('ffmpeg2theora')
|
||||||
p = subprocess.Popen([ffmpeg2theora], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
p = subprocess.Popen([ffmpeg2theora], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
info, error = p.communicate()
|
stdout, error = p.communicate()
|
||||||
version = info.split('\n')[0].split(' - ')[0].split(' ')[-1]
|
stdout = stdout.decode('utf-8')
|
||||||
|
version = stdout.split('\n')[0].split(' - ')[0].split(' ')[-1]
|
||||||
if version < '0.27':
|
if version < '0.27':
|
||||||
raise EnvironmentError('version of ffmpeg2theora needs to be 0.27 or later, found %s' % version)
|
raise EnvironmentError('version of ffmpeg2theora needs to be 0.27 or later, found %s' % version)
|
||||||
p = subprocess.Popen([ffmpeg2theora, '--info', filename],
|
p = subprocess.Popen([ffmpeg2theora, '--info', filename],
|
||||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
info, error = p.communicate()
|
stdout, error = p.communicate()
|
||||||
try:
|
try:
|
||||||
info = json.loads(info)
|
info = json.loads(stdout)
|
||||||
except:
|
except:
|
||||||
#remove metadata, can be broken
|
# remove metadata, can be broken
|
||||||
|
stdout = stdout.decode('utf-8')
|
||||||
reg = re.compile('"metadata": {.*?},', re.DOTALL)
|
reg = re.compile('"metadata": {.*?},', re.DOTALL)
|
||||||
info = re.sub(reg, '', info)
|
stdout = re.sub(reg, '', stdout)
|
||||||
info = json.loads(info)
|
info = json.loads(stdout)
|
||||||
if 'video' in info:
|
if 'video' in info:
|
||||||
for v in info['video']:
|
for v in info['video']:
|
||||||
if not 'display_aspect_ratio' in v and 'width' in v:
|
if 'display_aspect_ratio' not in v and 'width' in v:
|
||||||
v['display_aspect_ratio'] = '%d:%d' % (v['width'], v['height'])
|
v['display_aspect_ratio'] = '%d:%d' % (v['width'], v['height'])
|
||||||
v['pixel_aspect_ratio'] = '1:1'
|
v['pixel_aspect_ratio'] = '1:1'
|
||||||
if len(info.get('audio', [])) > 1:
|
if len(info.get('audio', [])) > 1:
|
||||||
|
@ -189,6 +191,7 @@ def avinfo(filename, cached=True):
|
||||||
ffmpeg = cmd('ffmpeg')
|
ffmpeg = cmd('ffmpeg')
|
||||||
p = subprocess.Popen([ffmpeg, '-i', filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
p = subprocess.Popen([ffmpeg, '-i', filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
stdout, stderr = p.communicate()
|
stdout, stderr = p.communicate()
|
||||||
|
stderr = stderr.decode('utf-8')
|
||||||
languages = [re.compile('\((.+?)\):').findall(l) for l in stderr.split('\n') if 'Stream' in l and 'Audio' in l]
|
languages = [re.compile('\((.+?)\):').findall(l) for l in stderr.split('\n') if 'Stream' in l and 'Audio' in l]
|
||||||
if len(languages) == len(info['audio']):
|
if len(languages) == len(info['audio']):
|
||||||
for i, stream in enumerate(info['audio']):
|
for i, stream in enumerate(info['audio']):
|
||||||
|
@ -278,16 +281,16 @@ def ffprobe(filename):
|
||||||
info[s['codec_type']].append(stream)
|
info[s['codec_type']].append(stream)
|
||||||
else:
|
else:
|
||||||
pass
|
pass
|
||||||
#print s
|
# print s
|
||||||
for v in info['video']:
|
for v in info['video']:
|
||||||
k = 'display_aspect_ratio'
|
k = 'display_aspect_ratio'
|
||||||
if not k in v and 'width' in v \
|
if k not in v and 'width' in v \
|
||||||
or (k in v and v[k] == '0:1'):
|
or (k in v and v[k] == '0:1'):
|
||||||
v[k] = '%d:%d' % (v['width'], v['height'])
|
v[k] = '%d:%d' % (v['width'], v['height'])
|
||||||
v['pixel_aspect_ratio'] = '1:1'
|
v['pixel_aspect_ratio'] = '1:1'
|
||||||
info['oshash'] = oshash(filename)
|
info['oshash'] = oshash(filename)
|
||||||
info['path'] = filename
|
info['path'] = filename
|
||||||
if not 'size' in info:
|
if 'size' not in info:
|
||||||
info['size'] = os.path.getsize(filename)
|
info['size'] = os.path.getsize(filename)
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,7 @@ from __future__ import print_function
|
||||||
|
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
from six import unichr, PY2
|
from six import unichr, text_type
|
||||||
|
|
||||||
__all__ = ['fix_bad_unicode']
|
__all__ = ['fix_bad_unicode']
|
||||||
|
|
||||||
|
@ -151,10 +151,7 @@ def text_badness(text):
|
||||||
- Improbable single-byte characters, such as ƒ or ¬
|
- Improbable single-byte characters, such as ƒ or ¬
|
||||||
- Letters in somewhat rare scripts
|
- Letters in somewhat rare scripts
|
||||||
'''
|
'''
|
||||||
if PY2:
|
assert isinstance(text, text_type)
|
||||||
assert isinstance(text, unicode)
|
|
||||||
else:
|
|
||||||
assert isinstance(text, str)
|
|
||||||
errors = 0
|
errors = 0
|
||||||
very_weird_things = 0
|
very_weird_things = 0
|
||||||
weird_things = 0
|
weird_things = 0
|
||||||
|
|
45
ox/format.py
45
ox/format.py
|
@ -4,6 +4,7 @@ import math
|
||||||
import re
|
import re
|
||||||
import string
|
import string
|
||||||
|
|
||||||
|
from six import text_type
|
||||||
|
|
||||||
def toAZ(num):
|
def toAZ(num):
|
||||||
"""
|
"""
|
||||||
|
@ -20,7 +21,8 @@ def toAZ(num):
|
||||||
>>> toAZ(1234567890)
|
>>> toAZ(1234567890)
|
||||||
'CYWOQVJ'
|
'CYWOQVJ'
|
||||||
"""
|
"""
|
||||||
if num < 1: raise ValueError("must supply a positive integer")
|
if num < 1:
|
||||||
|
raise ValueError("must supply a positive integer")
|
||||||
digits = string.ascii_uppercase
|
digits = string.ascii_uppercase
|
||||||
az = ''
|
az = ''
|
||||||
while num != 0:
|
while num != 0:
|
||||||
|
@ -30,7 +32,7 @@ def toAZ(num):
|
||||||
az = digits[r] + az
|
az = digits[r] + az
|
||||||
return az
|
return az
|
||||||
|
|
||||||
encode_base26=toAZ
|
encode_base26 = toAZ
|
||||||
|
|
||||||
def fromAZ(num):
|
def fromAZ(num):
|
||||||
"""
|
"""
|
||||||
|
@ -45,7 +47,7 @@ def fromAZ(num):
|
||||||
>>> fromAZ('FOO')
|
>>> fromAZ('FOO')
|
||||||
4461
|
4461
|
||||||
"""
|
"""
|
||||||
num = num.replace('-','')
|
num = num.replace('-', '')
|
||||||
digits = string.ascii_uppercase
|
digits = string.ascii_uppercase
|
||||||
r = 0
|
r = 0
|
||||||
for exp, char in enumerate(reversed(num)):
|
for exp, char in enumerate(reversed(num)):
|
||||||
|
@ -64,7 +66,8 @@ def to26(q):
|
||||||
>>> to26(347485647)
|
>>> to26(347485647)
|
||||||
'BDGKMAP'
|
'BDGKMAP'
|
||||||
"""
|
"""
|
||||||
if q < 0: raise ValueError("must supply a positive integer")
|
if q < 0:
|
||||||
|
raise ValueError("must supply a positive integer")
|
||||||
base26 = string.ascii_uppercase
|
base26 = string.ascii_uppercase
|
||||||
converted = []
|
converted = []
|
||||||
while q != 0:
|
while q != 0:
|
||||||
|
@ -73,7 +76,7 @@ def to26(q):
|
||||||
converted.insert(0, l)
|
converted.insert(0, l)
|
||||||
return "".join(converted) or 'A'
|
return "".join(converted) or 'A'
|
||||||
|
|
||||||
decode_base26=toAZ
|
decode_base26 = toAZ
|
||||||
|
|
||||||
def from26(q):
|
def from26(q):
|
||||||
"""
|
"""
|
||||||
|
@ -82,7 +85,7 @@ def from26(q):
|
||||||
0
|
0
|
||||||
"""
|
"""
|
||||||
base26 = string.ascii_uppercase
|
base26 = string.ascii_uppercase
|
||||||
q = q.replace('-','')
|
q = q.replace('-', '')
|
||||||
r = 0
|
r = 0
|
||||||
for i in q:
|
for i in q:
|
||||||
r = r * 26 + base26.index(i.upper())
|
r = r * 26 + base26.index(i.upper())
|
||||||
|
@ -123,7 +126,8 @@ def to32(q):
|
||||||
ValueError: must supply a positive integer
|
ValueError: must supply a positive integer
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if q < 0: raise ValueError("must supply a positive integer")
|
if q < 0:
|
||||||
|
raise ValueError("must supply a positive integer")
|
||||||
letters = "0123456789ABCDEFGHJKMNPQRSTVWXYZ"
|
letters = "0123456789ABCDEFGHJKMNPQRSTVWXYZ"
|
||||||
converted = []
|
converted = []
|
||||||
while q != 0:
|
while q != 0:
|
||||||
|
@ -188,7 +192,7 @@ def from32(q):
|
||||||
'Z': 31,
|
'Z': 31,
|
||||||
}
|
}
|
||||||
base32 = ('0123456789' + string.ascii_uppercase)[:32]
|
base32 = ('0123456789' + string.ascii_uppercase)[:32]
|
||||||
q = q.replace('-','')
|
q = q.replace('-', '')
|
||||||
q = ''.join([base32[_32map[i.upper()]] for i in q])
|
q = ''.join([base32[_32map[i.upper()]] for i in q])
|
||||||
return int(q, 32)
|
return int(q, 32)
|
||||||
|
|
||||||
|
@ -210,7 +214,8 @@ def to36(q):
|
||||||
...
|
...
|
||||||
ValueError: must supply a positive integer
|
ValueError: must supply a positive integer
|
||||||
"""
|
"""
|
||||||
if q < 0: raise ValueError("must supply a positive integer")
|
if q < 0:
|
||||||
|
raise ValueError("must supply a positive integer")
|
||||||
letters = "0123456789abcdefghijklmnopqrstuvwxyz"
|
letters = "0123456789abcdefghijklmnopqrstuvwxyz"
|
||||||
converted = []
|
converted = []
|
||||||
while q != 0:
|
while q != 0:
|
||||||
|
@ -233,7 +238,7 @@ def int_value(strValue, default=u''):
|
||||||
u''
|
u''
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
val = re.compile('(\d+)').findall(unicode(strValue).strip())[0]
|
val = re.compile('(\d+)').findall(text_type(strValue).strip())[0]
|
||||||
except:
|
except:
|
||||||
val = default
|
val = default
|
||||||
return val
|
return val
|
||||||
|
@ -250,7 +255,7 @@ def float_value(strValue, default=u''):
|
||||||
u''
|
u''
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
val = re.compile('([\d.]+)').findall(unicode(strValue).strip())[0]
|
val = re.compile('([\d.]+)').findall(text_type(strValue).strip())[0]
|
||||||
except:
|
except:
|
||||||
val = default
|
val = default
|
||||||
return val
|
return val
|
||||||
|
@ -286,7 +291,7 @@ def format_number(number, longName, shortName):
|
||||||
n = number / math.pow(1024, i + 1)
|
n = number / math.pow(1024, i + 1)
|
||||||
return '%s %s%s' % (format_thousands('%.*f' % (i, n)), prefix[i], shortName)
|
return '%s %s%s' % (format_thousands('%.*f' % (i, n)), prefix[i], shortName)
|
||||||
|
|
||||||
def format_thousands(number, separator = ','):
|
def format_thousands(number, separator=','):
|
||||||
"""
|
"""
|
||||||
Return the number with separators (1,000,000)
|
Return the number with separators (1,000,000)
|
||||||
|
|
||||||
|
@ -318,13 +323,13 @@ def format_pixels(number):
|
||||||
def format_currency(amount, currency="$"):
|
def format_currency(amount, currency="$"):
|
||||||
if amount:
|
if amount:
|
||||||
temp = "%.2f" % amount
|
temp = "%.2f" % amount
|
||||||
profile=re.compile(r"(\d)(\d\d\d[.,])")
|
profile = re.compile(r"(\d)(\d\d\d[.,])")
|
||||||
while 1:
|
while 1:
|
||||||
temp, count = re.subn(profile,r"\1,\2",temp)
|
temp, count = re.subn(profile, r"\1,\2", temp)
|
||||||
if not count:
|
if not count:
|
||||||
break
|
break
|
||||||
if temp.startswith('-'):
|
if temp.startswith('-'):
|
||||||
return "-"+ currency + temp[1:-3]
|
return "-" + currency + temp[1:-3]
|
||||||
return currency + temp[:-3]
|
return currency + temp[:-3]
|
||||||
else:
|
else:
|
||||||
return ""
|
return ""
|
||||||
|
@ -339,7 +344,8 @@ def plural(amount, unit, plural='s'):
|
||||||
if abs(amount) != 1:
|
if abs(amount) != 1:
|
||||||
if plural == 's':
|
if plural == 's':
|
||||||
unit = unit + plural
|
unit = unit + plural
|
||||||
else: unit = plural
|
else:
|
||||||
|
unit = plural
|
||||||
return "%s %s" % (format_thousands(amount), unit)
|
return "%s %s" % (format_thousands(amount), unit)
|
||||||
|
|
||||||
def format_duration(ms, verbosity=0, years=True, hours=True, milliseconds=True):
|
def format_duration(ms, verbosity=0, years=True, hours=True, milliseconds=True):
|
||||||
|
@ -396,7 +402,7 @@ def format_duration(ms, verbosity=0, years=True, hours=True, milliseconds=True):
|
||||||
if milliseconds:
|
if milliseconds:
|
||||||
durations.append("%sms" % ms)
|
durations.append("%sms" % ms)
|
||||||
else:
|
else:
|
||||||
durations = [plural(d, 'day'), plural(h,'hour'),
|
durations = [plural(d, 'day'), plural(h, 'hour'),
|
||||||
plural(m, 'minute'), plural(s, 'second')]
|
plural(m, 'minute'), plural(s, 'second')]
|
||||||
if years:
|
if years:
|
||||||
durations.insert(0, plural(y, 'year'))
|
durations.insert(0, plural(y, 'year'))
|
||||||
|
@ -434,7 +440,7 @@ def parse_timecode(string):
|
||||||
'''
|
'''
|
||||||
timecode = 0
|
timecode = 0
|
||||||
for i, v in enumerate(list(reversed(string.split(':')))[:4]):
|
for i, v in enumerate(list(reversed(string.split(':')))[:4]):
|
||||||
timecode += float(v) * ( 86400 if i == 3 else pow(60, i))
|
timecode += float(v) * (86400 if i == 3 else pow(60, i))
|
||||||
return timecode
|
return timecode
|
||||||
|
|
||||||
def ms2runtime(ms, shortenLong=False):
|
def ms2runtime(ms, shortenLong=False):
|
||||||
|
@ -482,7 +488,8 @@ def time2ms(timeString):
|
||||||
p = timeString.split(':')
|
p = timeString.split(':')
|
||||||
for i in range(len(p)):
|
for i in range(len(p)):
|
||||||
_p = p[i]
|
_p = p[i]
|
||||||
if _p.endswith('.'): _p =_p[:-1]
|
if _p.endswith('.'):
|
||||||
|
_p = _p[:-1]
|
||||||
ms = ms * 60 + float(_p)
|
ms = ms * 60 + float(_p)
|
||||||
return int(ms * 1000)
|
return int(ms * 1000)
|
||||||
|
|
||||||
|
|
28
ox/html.py
28
ox/html.py
|
@ -18,8 +18,8 @@ DOTS = ['·', '*', '\xe2\x80\xa2', '•', '•', '•']
|
||||||
|
|
||||||
unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)')
|
unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)')
|
||||||
word_split_re = re.compile(r'(\s+)')
|
word_split_re = re.compile(r'(\s+)')
|
||||||
punctuation_re = re.compile('^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % \
|
punctuation_re = re.compile('^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % (
|
||||||
('|'.join([re.escape(x) for x in LEADING_PUNCTUATION]),
|
'|'.join([re.escape(x) for x in LEADING_PUNCTUATION]),
|
||||||
'|'.join([re.escape(x) for x in TRAILING_PUNCTUATION])))
|
'|'.join([re.escape(x) for x in TRAILING_PUNCTUATION])))
|
||||||
simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
|
simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
|
||||||
link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+')
|
link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+')
|
||||||
|
@ -83,20 +83,22 @@ def add_links(text, trim_url_limit=None, nofollow=False):
|
||||||
|
|
||||||
If nofollow is True, the URLs in link text will get a rel="nofollow" attribute.
|
If nofollow is True, the URLs in link text will get a rel="nofollow" attribute.
|
||||||
"""
|
"""
|
||||||
trim_url = lambda x, limit=trim_url_limit: limit is not None and (x[:limit] + (len(x) >=limit and '...' or '')) or x
|
trim_url = lambda x, limit=trim_url_limit: limit is not None and (x[:limit] + (len(x) >= limit and '...' or '')) or x
|
||||||
words = word_split_re.split(text)
|
words = word_split_re.split(text)
|
||||||
nofollow_attr = nofollow and ' rel="nofollow"' or ''
|
nofollow_attr = nofollow and ' rel="nofollow"' or ''
|
||||||
for i, word in enumerate(words):
|
for i, word in enumerate(words):
|
||||||
match = punctuation_re.match(word)
|
match = punctuation_re.match(word)
|
||||||
if match:
|
if match:
|
||||||
lead, middle, trail = match.groups()
|
lead, middle, trail = match.groups()
|
||||||
if middle.startswith('www.') or ('@' not in middle and not middle.startswith('http://') and \
|
if middle.startswith('www.') or ('@' not in middle and not middle.startswith('http://') and
|
||||||
len(middle) > 0 and middle[0] in letters + string.digits and \
|
len(middle) > 0 and middle[0] in letters + string.digits and
|
||||||
(middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))):
|
(middle.endswith('.org') or
|
||||||
|
middle.endswith('.net') or
|
||||||
|
middle.endswith('.com'))):
|
||||||
middle = '<a href="http://%s"%s>%s</a>' % (middle, nofollow_attr, trim_url(middle))
|
middle = '<a href="http://%s"%s>%s</a>' % (middle, nofollow_attr, trim_url(middle))
|
||||||
if middle.startswith('http://') or middle.startswith('https://'):
|
if middle.startswith('http://') or middle.startswith('https://'):
|
||||||
middle = '<a href="%s"%s>%s</a>' % (middle, nofollow_attr, trim_url(middle))
|
middle = '<a href="%s"%s>%s</a>' % (middle, nofollow_attr, trim_url(middle))
|
||||||
if '@' in middle and not middle.startswith('www.') and not ':' in middle \
|
if '@' in middle and not middle.startswith('www.') and ':' not in middle \
|
||||||
and simple_email_re.match(middle):
|
and simple_email_re.match(middle):
|
||||||
middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
|
middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
|
||||||
if lead + middle + trail != word:
|
if lead + middle + trail != word:
|
||||||
|
@ -127,6 +129,7 @@ def clean_html(text):
|
||||||
# Trim stupid HTML such as <br clear="all">.
|
# Trim stupid HTML such as <br clear="all">.
|
||||||
text = html_gunk_re.sub('', text)
|
text = html_gunk_re.sub('', text)
|
||||||
# Convert hard-coded bullets into HTML unordered lists.
|
# Convert hard-coded bullets into HTML unordered lists.
|
||||||
|
|
||||||
def replace_p_tags(match):
|
def replace_p_tags(match):
|
||||||
s = match.group().replace('</p>', '</li>')
|
s = match.group().replace('</p>', '</li>')
|
||||||
for d in DOTS:
|
for d in DOTS:
|
||||||
|
@ -153,6 +156,7 @@ def decode_html(html):
|
||||||
if isinstance(html, bytes):
|
if isinstance(html, bytes):
|
||||||
html = html.decode('utf-8')
|
html = html.decode('utf-8')
|
||||||
uchr = unichr
|
uchr = unichr
|
||||||
|
|
||||||
def entitydecode(match, uchr=uchr):
|
def entitydecode(match, uchr=uchr):
|
||||||
entity = match.group(1)
|
entity = match.group(1)
|
||||||
if entity == '#x80':
|
if entity == '#x80':
|
||||||
|
@ -328,15 +332,14 @@ def sanitize_html(html, tags=None, global_attributes=[]):
|
||||||
|
|
||||||
for tag in tags:
|
for tag in tags:
|
||||||
valid_attributes[tag['name']] = tag.get('required', []) \
|
valid_attributes[tag['name']] = tag.get('required', []) \
|
||||||
+ tag.get('optional', []) \
|
+ tag.get('optional', []) + global_attributes
|
||||||
+ global_attributes
|
|
||||||
required_attributes[tag['name']] = tag.get('required', [])
|
required_attributes[tag['name']] = tag.get('required', [])
|
||||||
validation[tag['name']] = tag.get('validation', {})
|
validation[tag['name']] = tag.get('validation', {})
|
||||||
|
|
||||||
if '[]' in validation:
|
if '[]' in validation:
|
||||||
html = re.sub(
|
html = re.sub(
|
||||||
re.compile('\[((https?:\/\/|\/).+?) (.+?)\]', re.IGNORECASE),
|
re.compile('\[((https?:\/\/|\/).+?) (.+?)\]', re.IGNORECASE),
|
||||||
'<a href="\\1">\\3</a>', html);
|
'<a href="\\1">\\3</a>', html)
|
||||||
|
|
||||||
parts = split_tags(html)
|
parts = split_tags(html)
|
||||||
for i, part in enumerate(parts):
|
for i, part in enumerate(parts):
|
||||||
|
@ -351,7 +354,7 @@ def sanitize_html(html, tags=None, global_attributes=[]):
|
||||||
a = attr_re.findall(attributes)
|
a = attr_re.findall(attributes)
|
||||||
attrs = dict(a)
|
attrs = dict(a)
|
||||||
|
|
||||||
if not closing and not name in non_closing_tags:
|
if not closing and name not in non_closing_tags:
|
||||||
level += 1
|
level += 1
|
||||||
|
|
||||||
if not attrs and attributes or name not in valid_tags:
|
if not attrs and attributes or name not in valid_tags:
|
||||||
|
@ -361,7 +364,7 @@ def sanitize_html(html, tags=None, global_attributes=[]):
|
||||||
for key in set(attrs) - set(valid_attributes[name]):
|
for key in set(attrs) - set(valid_attributes[name]):
|
||||||
del attrs[key]
|
del attrs[key]
|
||||||
for key in required_attributes[tag['name']]:
|
for key in required_attributes[tag['name']]:
|
||||||
if not key in attrs:
|
if key not in attrs:
|
||||||
valid = False
|
valid = False
|
||||||
|
|
||||||
if valid:
|
if valid:
|
||||||
|
@ -395,6 +398,7 @@ def sanitize_html(html, tags=None, global_attributes=[]):
|
||||||
|
|
||||||
def split_tags(string):
|
def split_tags(string):
|
||||||
tags = []
|
tags = []
|
||||||
|
|
||||||
def collect(match):
|
def collect(match):
|
||||||
tags.append(match.group(0))
|
tags.append(match.group(0))
|
||||||
return '\0'
|
return '\0'
|
||||||
|
|
|
@ -208,7 +208,7 @@ def langTo3Code(lang):
|
||||||
if lang:
|
if lang:
|
||||||
lang = langEnglishName(lang)
|
lang = langEnglishName(lang)
|
||||||
if lang:
|
if lang:
|
||||||
lang=lang.lower()
|
lang = lang.lower()
|
||||||
for l in _iso639_languages:
|
for l in _iso639_languages:
|
||||||
if l[0].lower() == lang:
|
if l[0].lower() == lang:
|
||||||
return l[3]
|
return l[3]
|
||||||
|
@ -218,7 +218,7 @@ def langTo2Code(lang):
|
||||||
if lang:
|
if lang:
|
||||||
lang = langEnglishName(lang)
|
lang = langEnglishName(lang)
|
||||||
if lang:
|
if lang:
|
||||||
lang=lang.lower()
|
lang = lang.lower()
|
||||||
for l in _iso639_languages:
|
for l in _iso639_languages:
|
||||||
if l[0].lower() == lang:
|
if l[0].lower() == lang:
|
||||||
return l[2]
|
return l[2]
|
||||||
|
|
22
ox/js.py
22
ox/js.py
|
@ -11,9 +11,9 @@ def minify(source, comment=''):
|
||||||
pass
|
pass
|
||||||
# python2 performance with unicode string is terrible
|
# python2 performance with unicode string is terrible
|
||||||
if PY2:
|
if PY2:
|
||||||
if isinstance(source, unicode):
|
if isinstance(source, unicode): # pylint: disable=undefined-variable
|
||||||
source = source.encode('utf-8')
|
source = source.encode('utf-8')
|
||||||
if isinstance(comment, unicode):
|
if isinstance(comment, unicode): # pylint: disable=undefined-variable
|
||||||
comment = comment.encode('utf-8')
|
comment = comment.encode('utf-8')
|
||||||
tokens = tokenize(source)
|
tokens = tokenize(source)
|
||||||
length = len(tokens)
|
length = len(tokens)
|
||||||
|
@ -30,20 +30,20 @@ def minify(source, comment=''):
|
||||||
# numbers or strings or unary operators or grouping operators
|
# numbers or strings or unary operators or grouping operators
|
||||||
# with a single newline, otherwise remove it
|
# with a single newline, otherwise remove it
|
||||||
if prevToken and nextToken\
|
if prevToken and nextToken\
|
||||||
and (prevToken['type'] in ['identifier', 'number', 'string']\
|
and (prevToken['type'] in ['identifier', 'number', 'string']
|
||||||
or prevToken['value'] in ['++', '--', ')', ']', '}'])\
|
or prevToken['value'] in ['++', '--', ')', ']', '}']) \
|
||||||
and (nextToken['type'] in ['identifier', 'number', 'string']\
|
and (nextToken['type'] in ['identifier', 'number', 'string']
|
||||||
or nextToken['value'] in ['+', '-', '++', '--', '~', '!', '(', '[', '{']):
|
or nextToken['value'] in ['+', '-', '++', '--', '~', '!', '(', '[', '{']):
|
||||||
minified += '\n'
|
minified += '\n'
|
||||||
elif token['type'] == 'whitespace':
|
elif token['type'] == 'whitespace':
|
||||||
# replace whitespace between two tokens that are identifiers or
|
# replace whitespace between two tokens that are identifiers or
|
||||||
# numbers, or between a token that ends with "+" or "-" and one that
|
# numbers, or between a token that ends with "+" or "-" and one that
|
||||||
# begins with "+" or "-", with a single space, otherwise remove it
|
# begins with "+" or "-", with a single space, otherwise remove it
|
||||||
if prevToken and nextToken\
|
if prevToken and nextToken \
|
||||||
and ((prevToken['type'] in ['identifier', 'number']\
|
and ((prevToken['type'] in ['identifier', 'number'] and
|
||||||
and nextToken['type'] in ['identifier', 'number'])
|
nextToken['type'] in ['identifier', 'number']) or
|
||||||
or (prevToken['value'] in ['+', '-', '++', '--']
|
(prevToken['value'] in ['+', '-', '++', '--'] and
|
||||||
and nextToken['value'] in ['+', '-', '++', '--'])):
|
nextToken['value'] in ['+', '-', '++', '--'])):
|
||||||
minified += ' '
|
minified += ' '
|
||||||
elif token['type'] != 'comment':
|
elif token['type'] != 'comment':
|
||||||
# remove comments and leave all other tokens untouched
|
# remove comments and leave all other tokens untouched
|
||||||
|
@ -178,7 +178,7 @@ def tokenize(source):
|
||||||
'value': value
|
'value': value
|
||||||
})
|
})
|
||||||
if type == 'comment':
|
if type == 'comment':
|
||||||
lines = value.split('\n');
|
lines = value.split('\n')
|
||||||
column = len(lines[-1])
|
column = len(lines[-1])
|
||||||
line += len(lines) - 1
|
line += len(lines) - 1
|
||||||
elif type == 'linebreak':
|
elif type == 'linebreak':
|
||||||
|
|
|
@ -23,11 +23,11 @@ def loads(source):
|
||||||
try:
|
try:
|
||||||
m = re.search(r'line (\d+) column (\d+)', msg)
|
m = re.search(r'line (\d+) column (\d+)', msg)
|
||||||
if m:
|
if m:
|
||||||
(lineno, colno) = map(int, m.groups())
|
(lineno, colno) = [int(n) for n in m.groups()]
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
if lineno and colno:
|
if lineno and colno:
|
||||||
s = minified.split('\n')
|
s = minified.split('\n')
|
||||||
context = s[lineno-1][max(0, colno-30):colno+30]
|
context = s[lineno-1][max(0, colno-30):colno+30]
|
||||||
msg += ' at:\n\n %s\n %s\033[1m^\033[0m' %(context, ' ' * (colno - max(0, colno-30) - 2))
|
msg += ' at:\n\n %s\n %s\033[1m^\033[0m' % (context, ' ' * (colno - max(0, colno-30) - 2))
|
||||||
raise ValueError(msg)
|
raise ValueError(msg)
|
||||||
|
|
|
@ -18,7 +18,8 @@ _articles = ('the', 'la', 'a', 'die', 'der', 'le', 'el',
|
||||||
_articlesDict = dict([(x, x) for x in _articles])
|
_articlesDict = dict([(x, x) for x in _articles])
|
||||||
_spArticles = []
|
_spArticles = []
|
||||||
for article in _articles:
|
for article in _articles:
|
||||||
if article[-1] not in ("'", '-'): article += ' '
|
if article[-1] not in ("'", '-'):
|
||||||
|
article += ' '
|
||||||
_spArticles.append(article)
|
_spArticles.append(article)
|
||||||
|
|
||||||
_noarticles = (
|
_noarticles = (
|
||||||
|
@ -50,8 +51,10 @@ def canonical_title(title):
|
||||||
'Los Angeles Plays Itself'
|
'Los Angeles Plays Itself'
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
if _articlesDict.has_key(title.split(', ')[-1].lower()): return title
|
if title.split(', ')[-1].lower() in _articlesDict:
|
||||||
except IndexError: pass
|
return title
|
||||||
|
except IndexError:
|
||||||
|
pass
|
||||||
ltitle = title.lower()
|
ltitle = title.lower()
|
||||||
for start in _noarticles:
|
for start in _noarticles:
|
||||||
if ltitle.startswith(start):
|
if ltitle.startswith(start):
|
||||||
|
@ -60,7 +63,8 @@ def canonical_title(title):
|
||||||
if ltitle.startswith(article):
|
if ltitle.startswith(article):
|
||||||
lart = len(article)
|
lart = len(article)
|
||||||
title = '%s, %s' % (title[lart:], title[:lart])
|
title = '%s, %s' % (title[lart:], title[:lart])
|
||||||
if article[-1] == ' ': title = title[:-1]
|
if article[-1] == ' ':
|
||||||
|
title = title[:-1]
|
||||||
break
|
break
|
||||||
## XXX: an attempt using a dictionary lookup.
|
## XXX: an attempt using a dictionary lookup.
|
||||||
##for artSeparator in (' ', "'", '-'):
|
##for artSeparator in (' ', "'", '-'):
|
||||||
|
@ -82,9 +86,10 @@ def normalize_title(title):
|
||||||
'The Movie Title'
|
'The Movie Title'
|
||||||
"""
|
"""
|
||||||
stitle = title.split(', ')
|
stitle = title.split(', ')
|
||||||
if len(stitle) > 1 and _articlesDict.has_key(stitle[-1].lower()):
|
if len(stitle) > 1 and stitle[-1].lower() in _articlesDict:
|
||||||
sep = ' '
|
sep = ' '
|
||||||
if stitle[-1][-1] in ("'", '-'): sep = ''
|
if stitle[-1][-1] in ("'", '-'):
|
||||||
|
sep = ''
|
||||||
title = '%s%s%s' % (stitle[-1], sep, ', '.join(stitle[:-1]))
|
title = '%s%s%s' % (stitle[-1], sep, ', '.join(stitle[:-1]))
|
||||||
return title
|
return title
|
||||||
|
|
||||||
|
@ -139,7 +144,8 @@ def canonical_name(name):
|
||||||
# Don't convert names already in the canonical format.
|
# Don't convert names already in the canonical format.
|
||||||
if name in ('Unknown Director', ):
|
if name in ('Unknown Director', ):
|
||||||
return name
|
return name
|
||||||
if name.find(', ') != -1: return name
|
if name.find(', ') != -1:
|
||||||
|
return name
|
||||||
sname = name.split(' ')
|
sname = name.split(' ')
|
||||||
snl = len(sname)
|
snl = len(sname)
|
||||||
if snl == 2:
|
if snl == 2:
|
||||||
|
@ -147,11 +153,14 @@ def canonical_name(name):
|
||||||
name = '%s, %s' % (sname[1], sname[0])
|
name = '%s, %s' % (sname[1], sname[0])
|
||||||
elif snl > 2:
|
elif snl > 2:
|
||||||
lsname = [x.lower() for x in sname]
|
lsname = [x.lower() for x in sname]
|
||||||
if snl == 3: _indexes = (0, snl-2)
|
if snl == 3:
|
||||||
else: _indexes = (0, snl-2, snl-3)
|
_indexes = (0, snl-2)
|
||||||
|
else:
|
||||||
|
_indexes = (0, snl-2, snl-3)
|
||||||
# Check for common surname prefixes at the beginning and near the end.
|
# Check for common surname prefixes at the beginning and near the end.
|
||||||
for index in _indexes:
|
for index in _indexes:
|
||||||
if lsname[index] not in _sname_suffixes: continue
|
if lsname[index] not in _sname_suffixes:
|
||||||
|
continue
|
||||||
try:
|
try:
|
||||||
# Build the surname.
|
# Build the surname.
|
||||||
surn = '%s %s' % (sname[index], sname[index+1])
|
surn = '%s %s' % (sname[index], sname[index+1])
|
||||||
|
@ -194,11 +203,12 @@ def normalize_name(name):
|
||||||
|
|
||||||
def normalize_path(path):
|
def normalize_path(path):
|
||||||
path = path.replace(':', '_').replace('/', '_')
|
path = path.replace(':', '_').replace('/', '_')
|
||||||
if path.endswith('.'): path = path[:-1] + '_'
|
if path.endswith('.'):
|
||||||
|
path = path[:-1] + '_'
|
||||||
return path
|
return path
|
||||||
|
|
||||||
def strip_accents(s):
|
def strip_accents(s):
|
||||||
if isinstance(s, str):
|
if isinstance(s, str):
|
||||||
s = unicode(s)
|
s = s.decode('utf-8')
|
||||||
return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn'))
|
return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn'))
|
||||||
|
|
||||||
|
|
|
@ -6,13 +6,16 @@ from . import cache
|
||||||
from .text import find_re
|
from .text import find_re
|
||||||
from .utils import json, ET
|
from .utils import json, ET
|
||||||
|
|
||||||
|
|
||||||
def get_embed_code(url, maxwidth=None, maxheight=None):
|
def get_embed_code(url, maxwidth=None, maxheight=None):
|
||||||
embed = {}
|
embed = {}
|
||||||
header = cache.get_headers(url)
|
header = cache.get_headers(url)
|
||||||
if header.get('content-type', '').startswith('text/html'):
|
if header.get('content-type', '').startswith('text/html'):
|
||||||
html = cache.read_url(url)
|
html = cache.read_url(url)
|
||||||
json_oembed = filter(lambda l: 'json+oembed' in l, re.compile('<link.*?>').findall(html))
|
links = re.compile('<link.*?>').findall(html)
|
||||||
xml_oembed = filter(lambda l: 'xml+oembed' in l, re.compile('<link.*?>').findall(html))
|
json_oembed = [l for l in links if 'json+oembed' in l]
|
||||||
|
xml_oembed = [l for l in links if 'xml+oembed' in l]
|
||||||
|
|
||||||
if json_oembed:
|
if json_oembed:
|
||||||
oembed_url = find_re(json_oembed[0], 'href="(.*?)"')
|
oembed_url = find_re(json_oembed[0], 'href="(.*?)"')
|
||||||
if maxwidth:
|
if maxwidth:
|
||||||
|
@ -21,7 +24,7 @@ def get_embed_code(url, maxwidth=None, maxheight=None):
|
||||||
oembed_url += '&maxheight=%d' % maxheight
|
oembed_url += '&maxheight=%d' % maxheight
|
||||||
embed = json.loads(cache.read_url(oembed_url))
|
embed = json.loads(cache.read_url(oembed_url))
|
||||||
elif xml_oembed:
|
elif xml_oembed:
|
||||||
oembed_url = find_re(json_oembed[0], 'href="(.*?)"')
|
oembed_url = find_re(xml_oembed[0], 'href="(.*?)"')
|
||||||
if maxwidth:
|
if maxwidth:
|
||||||
oembed_url += '&maxwidth=%d' % maxwidth
|
oembed_url += '&maxwidth=%d' % maxwidth
|
||||||
if maxheight:
|
if maxheight:
|
||||||
|
|
|
@ -14,8 +14,8 @@ else:
|
||||||
|
|
||||||
__all__ = ['create_torrent', 'get_info_hash', 'get_torrent_info', 'get_files', 'get_torrent_size']
|
__all__ = ['create_torrent', 'get_info_hash', 'get_torrent_info', 'get_files', 'get_torrent_size']
|
||||||
|
|
||||||
def create_torrent(file, url, params = {}, flag = Event(),
|
def create_torrent(file, url, params={}, flag=Event(),
|
||||||
progress = lambda x: None, progress_percent = 1):
|
progress=lambda x: None, progress_percent=1):
|
||||||
"Creates a torrent for a given file, using url as tracker url"
|
"Creates a torrent for a given file, using url as tracker url"
|
||||||
from .makemetafile import make_meta_file
|
from .makemetafile import make_meta_file
|
||||||
return make_meta_file(file, url, params, flag, progress, progress_percent)
|
return make_meta_file(file, url, params, flag, progress, progress_percent)
|
||||||
|
|
|
@ -11,7 +11,7 @@ def _decode_int(data):
|
||||||
"""
|
"""
|
||||||
data = data[1:]
|
data = data[1:]
|
||||||
end = data.index(b'e')
|
end = data.index(b'e')
|
||||||
return int(data[:end],10), data[end+1:]
|
return int(data[:end], 10), data[end+1:]
|
||||||
|
|
||||||
def _decode_str(data):
|
def _decode_str(data):
|
||||||
"""
|
"""
|
||||||
|
@ -19,9 +19,9 @@ def _decode_str(data):
|
||||||
return string, remaining data
|
return string, remaining data
|
||||||
"""
|
"""
|
||||||
start = data.index(b':')
|
start = data.index(b':')
|
||||||
l = int(data[:start].decode(),10)
|
l = int(data[:start].decode(), 10)
|
||||||
if l <= 0:
|
if l <= 0:
|
||||||
raise Exception('invalid string size: %d'%d)
|
raise Exception('invalid string size: %d' % l)
|
||||||
start += 1
|
start += 1
|
||||||
ret = bytes(data[start:start+l])
|
ret = bytes(data[start:start+l])
|
||||||
data = data[start+l:]
|
data = data[start+l:]
|
||||||
|
@ -67,45 +67,45 @@ def _decode(data):
|
||||||
elif ch.isdigit():
|
elif ch.isdigit():
|
||||||
return _decode_str(data)
|
return _decode_str(data)
|
||||||
else:
|
else:
|
||||||
raise Exception('could not deserialize data: %s'%data)
|
raise Exception('could not deserialize data: %s' % data)
|
||||||
|
|
||||||
def bdecode(data):
|
def bdecode(data):
|
||||||
"""
|
"""
|
||||||
decode a bytearray
|
decode a bytearray
|
||||||
return deserialized object
|
return deserialized object
|
||||||
"""
|
"""
|
||||||
obj , data = _decode(data)
|
obj, data = _decode(data)
|
||||||
if len(data) > 0:
|
if len(data) > 0:
|
||||||
raise Exception('failed to deserialize, extra data: %s'%data)
|
raise Exception('failed to deserialize, extra data: %s' % data)
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
def _encode_str(s,buff):
|
def _encode_str(s, buff):
|
||||||
"""
|
"""
|
||||||
encode string to a buffer
|
encode string to a buffer
|
||||||
"""
|
"""
|
||||||
s = bytearray(s)
|
s = bytearray(s)
|
||||||
l = len(s)
|
l = len(s)
|
||||||
buff.append(bytearray(str(l)+':','utf-8'))
|
buff.append(bytearray(str(l)+':', 'utf-8'))
|
||||||
buff.append(s)
|
buff.append(s)
|
||||||
|
|
||||||
def _encode_int(i,buff):
|
def _encode_int(i, buff):
|
||||||
"""
|
"""
|
||||||
encode integer to a buffer
|
encode integer to a buffer
|
||||||
"""
|
"""
|
||||||
buff.append(b'i')
|
buff.append(b'i')
|
||||||
buff.append(bytearray(str(i),'ascii'))
|
buff.append(bytearray(str(i), 'ascii'))
|
||||||
buff.append(b'e')
|
buff.append(b'e')
|
||||||
|
|
||||||
def _encode_list(l,buff):
|
def _encode_list(l, buff):
|
||||||
"""
|
"""
|
||||||
encode list of elements to a buffer
|
encode list of elements to a buffer
|
||||||
"""
|
"""
|
||||||
buff.append(b'l')
|
buff.append(b'l')
|
||||||
for i in l:
|
for i in l:
|
||||||
_encode(i,buff)
|
_encode(i, buff)
|
||||||
buff.append(b'e')
|
buff.append(b'e')
|
||||||
|
|
||||||
def _encode_dict(d,buff):
|
def _encode_dict(d, buff):
|
||||||
"""
|
"""
|
||||||
encode dict
|
encode dict
|
||||||
"""
|
"""
|
||||||
|
@ -113,30 +113,30 @@ def _encode_dict(d,buff):
|
||||||
l = list(d.keys())
|
l = list(d.keys())
|
||||||
l.sort()
|
l.sort()
|
||||||
for k in l:
|
for k in l:
|
||||||
_encode(str(k),buff)
|
_encode(str(k), buff)
|
||||||
_encode(d[k],buff)
|
_encode(d[k], buff)
|
||||||
buff.append(b'e')
|
buff.append(b'e')
|
||||||
|
|
||||||
def _encode(obj,buff):
|
def _encode(obj, buff):
|
||||||
"""
|
"""
|
||||||
encode element obj to a buffer buff
|
encode element obj to a buffer buff
|
||||||
"""
|
"""
|
||||||
if isinstance(obj,str):
|
if isinstance(obj, str):
|
||||||
_encode_str(bytearray(obj,'utf-8'),buff)
|
_encode_str(bytearray(obj, 'utf-8'), buff)
|
||||||
elif isinstance(obj,bytes):
|
elif isinstance(obj, bytes):
|
||||||
_encode_str(bytearray(obj),buff)
|
_encode_str(bytearray(obj), buff)
|
||||||
elif isinstance(obj,bytearray):
|
elif isinstance(obj, bytearray):
|
||||||
_encode_str(obj,buff)
|
_encode_str(obj, buff)
|
||||||
elif str(obj).isdigit():
|
elif str(obj).isdigit():
|
||||||
_encode_int(obj,buff)
|
_encode_int(obj, buff)
|
||||||
elif isinstance(obj,list):
|
elif isinstance(obj, list):
|
||||||
_encode_list(obj,buff)
|
_encode_list(obj, buff)
|
||||||
elif hasattr(obj,'keys') and hasattr(obj,'values'):
|
elif hasattr(obj, 'keys') and hasattr(obj, 'values'):
|
||||||
_encode_dict(obj,buff)
|
_encode_dict(obj, buff)
|
||||||
elif str(obj) in ['True','False']:
|
elif str(obj) in ['True', 'False']:
|
||||||
_encode_int(int(obj and '1' or '0'),buff)
|
_encode_int(int(obj and '1' or '0'), buff)
|
||||||
else:
|
else:
|
||||||
raise Exception('non serializable object: %s'%obj)
|
raise Exception('non serializable object: %s' % obj)
|
||||||
|
|
||||||
|
|
||||||
def bencode(obj):
|
def bencode(obj):
|
||||||
|
@ -144,7 +144,7 @@ def bencode(obj):
|
||||||
bencode element, return bytearray
|
bencode element, return bytearray
|
||||||
"""
|
"""
|
||||||
buff = []
|
buff = []
|
||||||
_encode(obj,buff)
|
_encode(obj, buff)
|
||||||
ret = bytearray()
|
ret = bytearray()
|
||||||
for ba in buff:
|
for ba in buff:
|
||||||
ret += ba
|
ret += ba
|
||||||
|
|
|
@ -2,6 +2,7 @@ from __future__ import print_function
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from six import text_type
|
||||||
from ox.cache import read_url
|
from ox.cache import read_url
|
||||||
|
|
||||||
HEADERS = {
|
HEADERS = {
|
||||||
|
@ -16,9 +17,9 @@ USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7) '
|
||||||
USER_AGENT += 'AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 Safari/534.48.3'
|
USER_AGENT += 'AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 Safari/534.48.3'
|
||||||
|
|
||||||
def get_movie_data(title, director):
|
def get_movie_data(title, director):
|
||||||
if isinstance(title, unicode):
|
if isinstance(title, text_type):
|
||||||
title = title.encode('utf-8')
|
title = title.encode('utf-8')
|
||||||
if isinstance(director, unicode):
|
if isinstance(director, text_type):
|
||||||
director = director.encode('utf-8')
|
director = director.encode('utf-8')
|
||||||
data = {}
|
data = {}
|
||||||
# itunes section (preferred source for link)
|
# itunes section (preferred source for link)
|
||||||
|
@ -45,7 +46,7 @@ def get_movie_data(title, director):
|
||||||
results = js['results']
|
results = js['results']
|
||||||
if results:
|
if results:
|
||||||
url = host + results[0]['location']
|
url = host + results[0]['location']
|
||||||
if not 'link' in data:
|
if 'link' not in data:
|
||||||
data['link'] = url
|
data['link'] = url
|
||||||
headers = {
|
headers = {
|
||||||
'User-Agent': USER_AGENT
|
'User-Agent': USER_AGENT
|
||||||
|
|
|
@ -7,7 +7,7 @@ import time
|
||||||
from ox import strip_tags, find_re
|
from ox import strip_tags, find_re
|
||||||
from ox.cache import read_url
|
from ox.cache import read_url
|
||||||
|
|
||||||
import google
|
from . import google
|
||||||
|
|
||||||
|
|
||||||
def get_show_url(title):
|
def get_show_url(title):
|
||||||
|
|
|
@ -28,22 +28,32 @@ def get_show_url(title):
|
||||||
def get_data(url):
|
def get_data(url):
|
||||||
data = read_url(url, unicode=True)
|
data = read_url(url, unicode=True)
|
||||||
doc = document_fromstring(data)
|
doc = document_fromstring(data)
|
||||||
score = filter(lambda s: s.attrib.get('property') == 'v:average',
|
score = [s for s in doc.xpath('//span[@class="score_value"]')
|
||||||
doc.xpath('//span[@class="score_value"]'))
|
if s.attrib.get('property') == 'v:average']
|
||||||
if score:
|
if score:
|
||||||
score = int(score[0].text)
|
score = int(score[0].text)
|
||||||
else:
|
else:
|
||||||
score = -1
|
score = -1
|
||||||
authors = [a.text
|
authors = [
|
||||||
for a in doc.xpath('//div[@class="review_content"]//div[@class="author"]//a')]
|
a.text
|
||||||
sources = [d.text
|
for a in doc.xpath('//div[@class="review_content"]//div[@class="author"]//a')
|
||||||
for d in doc.xpath('//div[@class="review_content"]//div[@class="source"]/a')]
|
]
|
||||||
reviews = [d.text
|
sources = [
|
||||||
for d in doc.xpath('//div[@class="review_content"]//div[@class="review_body"]')]
|
d.text
|
||||||
scores = [int(d.text.strip())
|
for d in doc.xpath('//div[@class="review_content"]//div[@class="source"]/a')
|
||||||
for d in doc.xpath('//div[@class="review_content"]//div[contains(@class, "critscore")]')]
|
]
|
||||||
urls = [a.attrib['href']
|
reviews = [
|
||||||
for a in doc.xpath('//div[@class="review_content"]//a[contains(@class, "external")]')]
|
d.text
|
||||||
|
for d in doc.xpath('//div[@class="review_content"]//div[@class="review_body"]')
|
||||||
|
]
|
||||||
|
scores = [
|
||||||
|
int(d.text.strip())
|
||||||
|
for d in doc.xpath('//div[@class="review_content"]//div[contains(@class, "critscore")]')
|
||||||
|
]
|
||||||
|
urls = [
|
||||||
|
a.attrib['href']
|
||||||
|
for a in doc.xpath('//div[@class="review_content"]//a[contains(@class, "external")]')
|
||||||
|
]
|
||||||
|
|
||||||
metacritics = []
|
metacritics = []
|
||||||
for i in range(len(authors)):
|
for i in range(len(authors)):
|
||||||
|
|
|
@ -32,7 +32,7 @@ def get_data(url):
|
||||||
r['summary'] = get_og(data, 'description')
|
r['summary'] = get_og(data, 'description')
|
||||||
|
|
||||||
meter = re.compile('<span id="all-critics-meter" class="meter(.*?)">(.*?)</span>').findall(data)
|
meter = re.compile('<span id="all-critics-meter" class="meter(.*?)">(.*?)</span>').findall(data)
|
||||||
meter = filter(lambda m: m[1].isdigit(), meter)
|
meter = [m for m in meter if m[1].isdigit()]
|
||||||
if meter:
|
if meter:
|
||||||
r['tomatometer'] = meter[0][1]
|
r['tomatometer'] = meter[0][1]
|
||||||
r['rating'] = find_re(data, 'Average Rating: <span>([\d.]+)/10</span>')
|
r['rating'] = find_re(data, 'Average Rating: <span>([\d.]+)/10</span>')
|
||||||
|
|
|
@ -95,7 +95,7 @@ def format_subsection(string):
|
||||||
'ussports': 'US-Sports',
|
'ussports': 'US-Sports',
|
||||||
'wunderbar': 'wunderBAR'
|
'wunderbar': 'wunderBAR'
|
||||||
}
|
}
|
||||||
if subsection.has_key(string):
|
if string in subsection:
|
||||||
return subsection[string].replace(u'\xc3', 'ae')
|
return subsection[string].replace(u'\xc3', 'ae')
|
||||||
return string[:1].upper() + string[1:]
|
return string[:1].upper() + string[1:]
|
||||||
|
|
||||||
|
@ -219,8 +219,8 @@ def archive_news():
|
||||||
else:
|
else:
|
||||||
dMax = days[m]
|
dMax = days[m]
|
||||||
for d in range(dMax, 0, -1):
|
for d in range(dMax, 0, -1):
|
||||||
print('getNews(%d, %d, %d)' % (y, m, d))
|
print('get_news(%d, %d, %d)' % (y, m, d))
|
||||||
news = getNews(y, m ,d)
|
news = get_news(y, m, d)
|
||||||
for new in news:
|
for new in news:
|
||||||
dirname = archivePath + '/' + new['date'][0:4] + '/' + new['date'][5:7] + new['date'][8:10] + '/' + new['date'][11:13] + new['date'][14:16]
|
dirname = archivePath + '/' + new['date'][0:4] + '/' + new['date'][5:7] + new['date'][8:10] + '/' + new['date'][11:13] + new['date'][14:16]
|
||||||
if not os.path.exists(dirname):
|
if not os.path.exists(dirname):
|
||||||
|
@ -230,7 +230,7 @@ def archive_news():
|
||||||
else:
|
else:
|
||||||
filename = dirname + '/' + new['url'] + '.json'
|
filename = dirname + '/' + new['url'] + '.json'
|
||||||
if not os.path.exists(filename) or True:
|
if not os.path.exists(filename) or True:
|
||||||
data = json.dumps(new, ensure_ascii = False)
|
data = json.dumps(new, ensure_ascii=False)
|
||||||
f = open(filename, 'w')
|
f = open(filename, 'w')
|
||||||
f.write(data)
|
f.write(data)
|
||||||
f.close()
|
f.close()
|
||||||
|
@ -253,7 +253,7 @@ def archive_news():
|
||||||
string = strings[3]
|
string = strings[3]
|
||||||
if len(strings) == 6:
|
if len(strings) == 6:
|
||||||
string += '/' + strings[4]
|
string += '/' + strings[4]
|
||||||
if not count.has_key(string):
|
if string not in count:
|
||||||
count[string] = {'count': 1, 'string': '%s %s http://www.spiegel.de/%s/0,1518,archiv-%d-%03d,00.html' % (new['date'], new['date'], new['section'].lower(), y, int(datetime(y, m, d).strftime('%j')))}
|
count[string] = {'count': 1, 'string': '%s %s http://www.spiegel.de/%s/0,1518,archiv-%d-%03d,00.html' % (new['date'], new['date'], new['section'].lower(), y, int(datetime(y, m, d).strftime('%j')))}
|
||||||
else:
|
else:
|
||||||
count[string] = {'count': count[string]['count'] + 1, 'string': '%s %s' % (new['date'], count[string]['string'][17:])}
|
count[string] = {'count': count[string]['count'] + 1, 'string': '%s %s' % (new['date'], count[string]['string'][17:])}
|
||||||
|
@ -269,12 +269,12 @@ if __name__ == '__main__':
|
||||||
# spiegel = Spiegel(2008, 8)
|
# spiegel = Spiegel(2008, 8)
|
||||||
# print(spiegel.getContents())
|
# print(spiegel.getContents())
|
||||||
# news = News(2001, 9, 10)
|
# news = News(2001, 9, 10)
|
||||||
# output(news.getNews())
|
# output(news.get_news())
|
||||||
'''
|
'''
|
||||||
x = []
|
x = []
|
||||||
for d in range(10, 30):
|
for d in range(10, 30):
|
||||||
print('2/%d' % d)
|
print('2/%d' % d)
|
||||||
news = getNews(2008, 2, d)
|
news = get_news(2008, 2, d)
|
||||||
for new in news:
|
for new in news:
|
||||||
strings = new['url'].split('/')
|
strings = new['url'].split('/')
|
||||||
string = format_section(strings[3])
|
string = format_section(strings[3])
|
||||||
|
|
|
@ -27,15 +27,15 @@ def video_url(youtubeId, format='mp4', timeout=cache_timeout):
|
||||||
"""
|
"""
|
||||||
fmt = None
|
fmt = None
|
||||||
if format == '4k':
|
if format == '4k':
|
||||||
fmt=38
|
fmt = 38
|
||||||
elif format == '1080p':
|
elif format == '1080p':
|
||||||
fmt=37
|
fmt = 37
|
||||||
elif format == '720p':
|
elif format == '720p':
|
||||||
fmt=22
|
fmt = 22
|
||||||
elif format == 'mp4':
|
elif format == 'mp4':
|
||||||
fmt=18
|
fmt = 18
|
||||||
elif format == 'high':
|
elif format == 'high':
|
||||||
fmt=35
|
fmt = 35
|
||||||
elif format == 'webm':
|
elif format == 'webm':
|
||||||
streams = videos(youtubeId, 'webm')
|
streams = videos(youtubeId, 'webm')
|
||||||
return streams[max(streams.keys())]['url']
|
return streams[max(streams.keys())]['url']
|
||||||
|
|
Loading…
Reference in a new issue