vi:si:et:sw=4:sts=4:ts=4
This commit is contained in:
parent
dafe20aa04
commit
4a6e2702b4
11 changed files with 921 additions and 921 deletions
|
@ -1,5 +1,5 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=2:sts=2:ts=2
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
# 2008
|
# 2008
|
||||||
|
|
||||||
from hashes import *
|
from hashes import *
|
||||||
|
@ -11,7 +11,7 @@ import cache
|
||||||
|
|
||||||
#only works if BitTornado is installed
|
#only works if BitTornado is installed
|
||||||
try:
|
try:
|
||||||
from torrent import *
|
from torrent import *
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
154
oxutils/cache.py
154
oxutils/cache.py
|
@ -1,5 +1,5 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=2:sts=2:ts=2
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
# 2008
|
# 2008
|
||||||
import gzip
|
import gzip
|
||||||
import StringIO
|
import StringIO
|
||||||
|
@ -19,99 +19,99 @@ from net import DEFAULT_HEADERS, getEncoding
|
||||||
cache_timeout = 30*24*60*60 # default is 30 days
|
cache_timeout = 30*24*60*60 # default is 30 days
|
||||||
|
|
||||||
def status(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
|
def status(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
|
||||||
'''
|
'''
|
||||||
>>> status('http://google.com')
|
>>> status('http://google.com')
|
||||||
200
|
200
|
||||||
>>> status('http://google.com/mysearch')
|
>>> status('http://google.com/mysearch')
|
||||||
404
|
404
|
||||||
'''
|
'''
|
||||||
headers = getHeaders(url, data, headers)
|
headers = getHeaders(url, data, headers)
|
||||||
return int(headers['status'])
|
return int(headers['status'])
|
||||||
|
|
||||||
def exists(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
|
def exists(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
|
||||||
'''
|
'''
|
||||||
>>> exists('http://google.com')
|
>>> exists('http://google.com')
|
||||||
True
|
True
|
||||||
>>> exists('http://google.com/mysearch')
|
>>> exists('http://google.com/mysearch')
|
||||||
False
|
False
|
||||||
'''
|
'''
|
||||||
s = status(url, data, headers, timeout)
|
s = status(url, data, headers, timeout)
|
||||||
if s >= 200 and s < 400:
|
if s >= 200 and s < 400:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def getHeaders(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
|
def getHeaders(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
|
||||||
url_cache_file = "%s.headers" % getUrlCacheFile(url, data, headers)
|
url_cache_file = "%s.headers" % getUrlCacheFile(url, data, headers)
|
||||||
url_headers = loadUrlCache(url_cache_file, timeout)
|
url_headers = loadUrlCache(url_cache_file, timeout)
|
||||||
if url_headers:
|
if url_headers:
|
||||||
url_headers = simplejson.loads(url_headers)
|
url_headers = simplejson.loads(url_headers)
|
||||||
else:
|
else:
|
||||||
url_headers = net.getHeaders(url, data, headers)
|
url_headers = net.getHeaders(url, data, headers)
|
||||||
saveUrlHeaders(url_cache_file, url_headers)
|
saveUrlHeaders(url_cache_file, url_headers)
|
||||||
return url_headers
|
return url_headers
|
||||||
|
|
||||||
def getUrl(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
|
def getUrl(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
|
||||||
url_cache_file = getUrlCacheFile(url, data, headers)
|
url_cache_file = getUrlCacheFile(url, data, headers)
|
||||||
result = loadUrlCache(url_cache_file, timeout)
|
result = loadUrlCache(url_cache_file, timeout)
|
||||||
if not result:
|
if not result:
|
||||||
try:
|
try:
|
||||||
url_headers, result = net.getUrl(url, data, headers, returnHeaders=True)
|
url_headers, result = net.getUrl(url, data, headers, returnHeaders=True)
|
||||||
except urllib2.HTTPError, e:
|
except urllib2.HTTPError, e:
|
||||||
e.headers['Status'] = "%s" % e.code
|
e.headers['Status'] = "%s" % e.code
|
||||||
url_headers = dict(e.headers)
|
url_headers = dict(e.headers)
|
||||||
result = e.read()
|
result = e.read()
|
||||||
if url_headers.get('content-encoding', None) == 'gzip':
|
if url_headers.get('content-encoding', None) == 'gzip':
|
||||||
result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read()
|
result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read()
|
||||||
saveUrlCache(url_cache_file, result, url_headers)
|
saveUrlCache(url_cache_file, result, url_headers)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def getUrlUnicode(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout, _getUrl=getUrl):
|
def getUrlUnicode(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout, _getUrl=getUrl):
|
||||||
data = _getUrl(url, data, headers, timeout)
|
data = _getUrl(url, data, headers, timeout)
|
||||||
encoding = getEncoding(data)
|
encoding = getEncoding(data)
|
||||||
if not encoding:
|
if not encoding:
|
||||||
encoding = 'latin-1'
|
encoding = 'latin-1'
|
||||||
return unicode(data, encoding)
|
return unicode(data, encoding)
|
||||||
|
|
||||||
def getCacheBase():
|
def getCacheBase():
|
||||||
'cache base is eather ~/.ox/cache or can set via env variable oxCACHE'
|
'cache base is eather ~/.ox/cache or can set via env variable oxCACHE'
|
||||||
return os.environ.get('oxCACHE', os.path.expanduser('~/.ox/cache'))
|
return os.environ.get('oxCACHE', os.path.expanduser('~/.ox/cache'))
|
||||||
|
|
||||||
def getUrlCacheFile(url, data=None, headers=DEFAULT_HEADERS):
|
def getUrlCacheFile(url, data=None, headers=DEFAULT_HEADERS):
|
||||||
if data:
|
if data:
|
||||||
url_hash = sha.sha(url + '?' + data).hexdigest()
|
url_hash = sha.sha(url + '?' + data).hexdigest()
|
||||||
else:
|
else:
|
||||||
url_hash = sha.sha(url).hexdigest()
|
url_hash = sha.sha(url).hexdigest()
|
||||||
domain = ".".join(urlparse.urlparse(url)[1].split('.')[-2:])
|
domain = ".".join(urlparse.urlparse(url)[1].split('.')[-2:])
|
||||||
return os.path.join(getCacheBase(), domain, url_hash[:2], url_hash[2:4], url_hash[4:6], url_hash)
|
return os.path.join(getCacheBase(), domain, url_hash[:2], url_hash[2:4], url_hash[4:6], url_hash)
|
||||||
|
|
||||||
def loadUrlCache(url_cache_file, timeout=cache_timeout):
|
def loadUrlCache(url_cache_file, timeout=cache_timeout):
|
||||||
if timeout == 0:
|
if timeout == 0:
|
||||||
|
return None
|
||||||
|
if os.path.exists(url_cache_file):
|
||||||
|
ctime = os.stat(url_cache_file).st_ctime
|
||||||
|
now = time.mktime(time.localtime())
|
||||||
|
file_age = now-ctime
|
||||||
|
if timeout < 0 or file_age < timeout:
|
||||||
|
f = open(url_cache_file)
|
||||||
|
data = f.read()
|
||||||
|
f.close()
|
||||||
|
return data
|
||||||
return None
|
return None
|
||||||
if os.path.exists(url_cache_file):
|
|
||||||
ctime = os.stat(url_cache_file).st_ctime
|
|
||||||
now = time.mktime(time.localtime())
|
|
||||||
file_age = now-ctime
|
|
||||||
if timeout < 0 or file_age < timeout:
|
|
||||||
f = open(url_cache_file)
|
|
||||||
data = f.read()
|
|
||||||
f.close()
|
|
||||||
return data
|
|
||||||
return None
|
|
||||||
|
|
||||||
def saveUrlCache(url_cache_file, data, headers):
|
def saveUrlCache(url_cache_file, data, headers):
|
||||||
folder = os.path.dirname(url_cache_file)
|
folder = os.path.dirname(url_cache_file)
|
||||||
if not os.path.exists(folder):
|
if not os.path.exists(folder):
|
||||||
os.makedirs(folder)
|
os.makedirs(folder)
|
||||||
f = open(url_cache_file, 'w')
|
f = open(url_cache_file, 'w')
|
||||||
f.write(data)
|
f.write(data)
|
||||||
f.close()
|
f.close()
|
||||||
saveUrlHeaders("%s.headers" % url_cache_file, headers)
|
saveUrlHeaders("%s.headers" % url_cache_file, headers)
|
||||||
|
|
||||||
def saveUrlHeaders(url_cache_file, headers):
|
def saveUrlHeaders(url_cache_file, headers):
|
||||||
folder = os.path.dirname(url_cache_file)
|
folder = os.path.dirname(url_cache_file)
|
||||||
if not os.path.exists(folder):
|
if not os.path.exists(folder):
|
||||||
os.makedirs(folder)
|
os.makedirs(folder)
|
||||||
f = open(url_cache_file, 'w')
|
f = open(url_cache_file, 'w')
|
||||||
f.write(simplejson.dumps(headers))
|
f.write(simplejson.dumps(headers))
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
|
|
|
@ -1,208 +1,207 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=2:sts=2:ts=2
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
import math
|
import math
|
||||||
import re
|
import re
|
||||||
|
|
||||||
def to36(q):
|
def to36(q):
|
||||||
"""
|
"""
|
||||||
Converts an integer to base 36 (a useful scheme for human-sayable IDs).
|
Converts an integer to base 36 (a useful scheme for human-sayable IDs).
|
||||||
|
|
||||||
>>> to36(35)
|
>>> to36(35)
|
||||||
'z'
|
'z'
|
||||||
>>> to36(119292)
|
>>> to36(119292)
|
||||||
'2k1o'
|
'2k1o'
|
||||||
>>> int(to36(939387374), 36)
|
>>> int(to36(939387374), 36)
|
||||||
939387374
|
939387374
|
||||||
>>> to36(0)
|
>>> to36(0)
|
||||||
'0'
|
'0'
|
||||||
>>> to36(-393)
|
>>> to36(-393)
|
||||||
Traceback (most recent call last):
|
Traceback (most recent call last):
|
||||||
...
|
...
|
||||||
ValueError: must supply a positive integer
|
ValueError: must supply a positive integer
|
||||||
"""
|
"""
|
||||||
if q < 0: raise ValueError, "must supply a positive integer"
|
if q < 0: raise ValueError, "must supply a positive integer"
|
||||||
letters = "0123456789abcdefghijklmnopqrstuvwxyz"
|
letters = "0123456789abcdefghijklmnopqrstuvwxyz"
|
||||||
converted = []
|
converted = []
|
||||||
while q != 0:
|
while q != 0:
|
||||||
q, r = divmod(q, 36)
|
q, r = divmod(q, 36)
|
||||||
converted.insert(0, letters[r])
|
converted.insert(0, letters[r])
|
||||||
return "".join(converted) or '0'
|
return "".join(converted) or '0'
|
||||||
|
|
||||||
def from36(q):
|
def from36(q):
|
||||||
return int(q, 36)
|
return int(q, 36)
|
||||||
|
|
||||||
def intValue(strValue, default=''):
|
def intValue(strValue, default=''):
|
||||||
try:
|
try:
|
||||||
val = re.compile('(\d+)').findall(unicode(strValue).strip())[0]
|
val = re.compile('(\d+)').findall(unicode(strValue).strip())[0]
|
||||||
except:
|
except:
|
||||||
val = default
|
val = default
|
||||||
return val
|
return val
|
||||||
|
|
||||||
def test_intValue():
|
def test_intValue():
|
||||||
assert intValue('abc23') == '23'
|
assert intValue('abc23') == '23'
|
||||||
assert intValue(' abc23') == '23'
|
assert intValue(' abc23') == '23'
|
||||||
assert intValue(' abc') == ''
|
assert intValue(' abc') == ''
|
||||||
|
|
||||||
def floatValue(strValue, default=''):
|
def floatValue(strValue, default=''):
|
||||||
try:
|
try:
|
||||||
val = re.compile('([\d.]+)').findall(unicode(strValue).strip())[0]
|
val = re.compile('([\d.]+)').findall(unicode(strValue).strip())[0]
|
||||||
except:
|
except:
|
||||||
val = default
|
val = default
|
||||||
return val
|
return val
|
||||||
|
|
||||||
def test_floatValue():
|
def test_floatValue():
|
||||||
print "floatValue"
|
assert floatValue('abc23.4') == '23.4'
|
||||||
assert floatValue('abc23.4') == '23.4'
|
assert floatValue(' abc23.4') == '23.4'
|
||||||
assert floatValue(' abc23.4') == '23.4'
|
assert floatValue(' abc') == ''
|
||||||
assert floatValue(' abc') == ''
|
|
||||||
|
|
||||||
def formatNumber(number, longName, shortName):
|
def formatNumber(number, longName, shortName):
|
||||||
"""
|
"""
|
||||||
Return the number in a human-readable format (23 KB, 23.4 MB, 23.42 GB)
|
Return the number in a human-readable format (23 KB, 23.4 MB, 23.42 GB)
|
||||||
|
|
||||||
>>> formatNumber(123, 'Byte', 'B')
|
>>> formatNumber(123, 'Byte', 'B')
|
||||||
'123 Bytes'
|
'123 Bytes'
|
||||||
|
|
||||||
>>> formatNumber(1234, 'Byte', 'B')
|
>>> formatNumber(1234, 'Byte', 'B')
|
||||||
'1 KB'
|
'1 KB'
|
||||||
|
|
||||||
>>> formatNumber(1234567, 'Byte', 'B')
|
>>> formatNumber(1234567, 'Byte', 'B')
|
||||||
'1.2 MB'
|
'1.2 MB'
|
||||||
|
|
||||||
>>> formatNumber(1234567890, 'Byte', 'B')
|
>>> formatNumber(1234567890, 'Byte', 'B')
|
||||||
'1.15 GB'
|
'1.15 GB'
|
||||||
|
|
||||||
>>> formatNumber(1234567890123456789, 'Byte', 'B')
|
>>> formatNumber(1234567890123456789, 'Byte', 'B')
|
||||||
'1,096.5166 PB'
|
'1,096.5166 PB'
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if number < 1024:
|
if number < 1024:
|
||||||
return '%s %s%s' % (formatThousands(number), longName, number != 1 and 's' or '')
|
return '%s %s%s' % (formatThousands(number), longName, number != 1 and 's' or '')
|
||||||
prefix = ['K', 'M', 'G', 'T', 'P']
|
prefix = ['K', 'M', 'G', 'T', 'P']
|
||||||
for i in range(5):
|
for i in range(5):
|
||||||
if number < math.pow(1024, i + 2) or i == 4:
|
if number < math.pow(1024, i + 2) or i == 4:
|
||||||
n = number / math.pow(1024, i + 1)
|
n = number / math.pow(1024, i + 1)
|
||||||
return '%s %s%s' % (formatThousands('%.*f' % (i, n)), prefix[i], shortName)
|
return '%s %s%s' % (formatThousands('%.*f' % (i, n)), prefix[i], shortName)
|
||||||
|
|
||||||
def formatThousands(number, separator = ','):
|
def formatThousands(number, separator = ','):
|
||||||
"""
|
"""
|
||||||
Return the number with separators (1,000,000)
|
Return the number with separators (1,000,000)
|
||||||
|
|
||||||
>>> formatThousands(1)
|
>>> formatThousands(1)
|
||||||
'1'
|
'1'
|
||||||
>>> formatThousands(1000)
|
>>> formatThousands(1000)
|
||||||
'1,000'
|
'1,000'
|
||||||
>>> formatThousands(1000000)
|
>>> formatThousands(1000000)
|
||||||
'1,000,000'
|
'1,000,000'
|
||||||
"""
|
"""
|
||||||
string = str(number).split('.')
|
string = str(number).split('.')
|
||||||
l = []
|
l = []
|
||||||
for i, character in enumerate(reversed(string[0])):
|
for i, character in enumerate(reversed(string[0])):
|
||||||
if i and (not (i % 3)):
|
if i and (not (i % 3)):
|
||||||
l.insert(0, separator)
|
l.insert(0, separator)
|
||||||
l.insert(0, character)
|
l.insert(0, character)
|
||||||
string[0] = ''.join(l)
|
string[0] = ''.join(l)
|
||||||
return '.'.join(string)
|
return '.'.join(string)
|
||||||
|
|
||||||
def formatBits(number):
|
def formatBits(number):
|
||||||
return formatNumber(number, 'bit', 'b')
|
return formatNumber(number, 'bit', 'b')
|
||||||
|
|
||||||
def formatBytes(number):
|
def formatBytes(number):
|
||||||
return formatNumber(number, 'byte', 'B')
|
return formatNumber(number, 'byte', 'B')
|
||||||
|
|
||||||
def formatPixels(number):
|
def formatPixels(number):
|
||||||
return formatNumber(number, 'pixel', 'px')
|
return formatNumber(number, 'pixel', 'px')
|
||||||
|
|
||||||
def plural(amount, unit, plural='s'):
|
def plural(amount, unit, plural='s'):
|
||||||
'''
|
'''
|
||||||
>>> plural(1, 'unit')
|
>>> plural(1, 'unit')
|
||||||
'1 unit'
|
'1 unit'
|
||||||
>>> plural(2, 'unit')
|
>>> plural(2, 'unit')
|
||||||
'2 units'
|
'2 units'
|
||||||
'''
|
'''
|
||||||
if abs(amount) != 1:
|
if abs(amount) != 1:
|
||||||
if plural == 's':
|
if plural == 's':
|
||||||
unit = unit + plural
|
unit = unit + plural
|
||||||
else: unit = plural
|
else: unit = plural
|
||||||
return "%s %s" % (formatThousands(amount), unit)
|
return "%s %s" % (formatThousands(amount), unit)
|
||||||
|
|
||||||
def ms2runtime(ms):
|
def ms2runtime(ms):
|
||||||
'''
|
'''
|
||||||
>>> ms2runtime(5000)
|
>>> ms2runtime(5000)
|
||||||
'5 seconds'
|
'5 seconds'
|
||||||
>>> ms2runtime(500000)
|
>>> ms2runtime(500000)
|
||||||
'8 minutes 20 seconds'
|
'8 minutes 20 seconds'
|
||||||
>>> ms2runtime(50000000)
|
>>> ms2runtime(50000000)
|
||||||
'13 hours 53 minutes 20 seconds'
|
'13 hours 53 minutes 20 seconds'
|
||||||
>>> ms2runtime(50000000-20000)
|
>>> ms2runtime(50000000-20000)
|
||||||
'13 hours 53 minutes'
|
'13 hours 53 minutes'
|
||||||
'''
|
'''
|
||||||
seconds = int(ms / 1000)
|
seconds = int(ms / 1000)
|
||||||
years = 0
|
years = 0
|
||||||
days = 0
|
days = 0
|
||||||
hours = 0
|
hours = 0
|
||||||
minutes = 0
|
minutes = 0
|
||||||
if seconds >= 60:
|
if seconds >= 60:
|
||||||
minutes = int(seconds / 60)
|
minutes = int(seconds / 60)
|
||||||
seconds = seconds % 60
|
seconds = seconds % 60
|
||||||
if minutes >= 60:
|
if minutes >= 60:
|
||||||
hours = int(minutes / 60)
|
hours = int(minutes / 60)
|
||||||
minutes = minutes % 60
|
minutes = minutes % 60
|
||||||
if hours >= 24:
|
if hours >= 24:
|
||||||
days = int(hours / 24)
|
days = int(hours / 24)
|
||||||
hours = hours % 24
|
hours = hours % 24
|
||||||
if days >= 365:
|
if days >= 365:
|
||||||
years = int(days / 365)
|
years = int(days / 365)
|
||||||
days = days % 365
|
days = days % 365
|
||||||
runtimeString = (plural(years, 'year'), plural(days, 'day'),
|
runtimeString = (plural(years, 'year'), plural(days, 'day'),
|
||||||
plural(hours,'hour'), plural(minutes, 'minute'), plural(seconds, 'second'))
|
plural(hours,'hour'), plural(minutes, 'minute'), plural(seconds, 'second'))
|
||||||
runtimeString = filter(lambda x: not x.startswith('0'), runtimeString)
|
runtimeString = filter(lambda x: not x.startswith('0'), runtimeString)
|
||||||
return " ".join(runtimeString).strip()
|
return " ".join(runtimeString).strip()
|
||||||
|
|
||||||
def ms2playtime(ms):
|
def ms2playtime(ms):
|
||||||
'''
|
'''
|
||||||
>>> ms2playtime(5000)
|
>>> ms2playtime(5000)
|
||||||
'00:05'
|
'00:05'
|
||||||
>>> ms2playtime(500000)
|
>>> ms2playtime(500000)
|
||||||
'08:20'
|
'08:20'
|
||||||
>>> ms2playtime(50000000)
|
>>> ms2playtime(50000000)
|
||||||
'13:53:20'
|
'13:53:20'
|
||||||
'''
|
'''
|
||||||
it = int(ms / 1000)
|
it = int(ms / 1000)
|
||||||
ms = ms - it*1000
|
ms = ms - it*1000
|
||||||
ss = it % 60
|
ss = it % 60
|
||||||
mm = ((it-ss)/60) % 60
|
mm = ((it-ss)/60) % 60
|
||||||
hh = ((it-(mm*60)-ss)/3600) % 60
|
hh = ((it-(mm*60)-ss)/3600) % 60
|
||||||
if hh:
|
if hh:
|
||||||
playtime= "%02d:%02d:%02d" % (hh, mm, ss)
|
playtime= "%02d:%02d:%02d" % (hh, mm, ss)
|
||||||
else:
|
else:
|
||||||
playtime= "%02d:%02d" % (mm, ss)
|
playtime= "%02d:%02d" % (mm, ss)
|
||||||
return playtime
|
return playtime
|
||||||
|
|
||||||
def ms2time(ms):
|
def ms2time(ms):
|
||||||
'''
|
'''
|
||||||
>>> ms2time(44592123)
|
>>> ms2time(44592123)
|
||||||
'12:23:12.123'
|
'12:23:12.123'
|
||||||
'''
|
'''
|
||||||
it = int(ms / 1000)
|
it = int(ms / 1000)
|
||||||
ms = ms - it*1000
|
ms = ms - it*1000
|
||||||
ss = it % 60
|
ss = it % 60
|
||||||
mm = ((it-ss)/60) % 60
|
mm = ((it-ss)/60) % 60
|
||||||
hh = ((it-(mm*60)-ss)/3600) % 60
|
hh = ((it-(mm*60)-ss)/3600) % 60
|
||||||
return "%d:%02d:%02d.%03d" % (hh, mm, ss, ms)
|
return "%d:%02d:%02d.%03d" % (hh, mm, ss, ms)
|
||||||
|
|
||||||
def time2ms(timeString):
|
def time2ms(timeString):
|
||||||
'''
|
'''
|
||||||
>>> time2ms('12:23:12.123')
|
>>> time2ms('12:23:12.123')
|
||||||
44592123
|
44592123
|
||||||
'''
|
'''
|
||||||
ms = 0.0
|
ms = 0.0
|
||||||
p = timeString.split(':')
|
p = timeString.split(':')
|
||||||
for i in range(len(p)):
|
for i in range(len(p)):
|
||||||
ms = ms * 60 + float(p[i])
|
ms = ms * 60 + float(p[i])
|
||||||
return int(ms * 1000)
|
return int(ms * 1000)
|
||||||
|
|
||||||
def shiftTime(offset, timeString):
|
def shiftTime(offset, timeString):
|
||||||
newTime = time2ms(timeString) + offset
|
newTime = time2ms(timeString) + offset
|
||||||
return ms2time(newTime)
|
return ms2time(newTime)
|
||||||
|
|
||||||
|
|
|
@ -1,17 +1,17 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=2:sts=2:ts=2
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
# GPL written 2008 by j@pad.ma
|
# GPL written 2008 by j@pad.ma
|
||||||
import sha
|
import sha
|
||||||
import os
|
import os
|
||||||
|
|
||||||
def sha1sum(filename):
|
def sha1sum(filename):
|
||||||
sha1 = sha.new()
|
sha1 = sha.new()
|
||||||
file=open(filename)
|
file=open(filename)
|
||||||
buffer=file.read(4096)
|
|
||||||
while buffer:
|
|
||||||
sha1.update(buffer)
|
|
||||||
buffer=file.read(4096)
|
buffer=file.read(4096)
|
||||||
file.close()
|
while buffer:
|
||||||
return sha1.hexdigest()
|
sha1.update(buffer)
|
||||||
|
buffer=file.read(4096)
|
||||||
|
file.close()
|
||||||
|
return sha1.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
|
234
oxutils/html.py
234
oxutils/html.py
|
@ -1,5 +1,5 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=2:sts=2:ts=2
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
# GPL written 2008 by j@pad.ma
|
# GPL written 2008 by j@pad.ma
|
||||||
import re
|
import re
|
||||||
import string
|
import string
|
||||||
|
@ -26,147 +26,147 @@ trailing_empty_content_re = re.compile(r'(?:<p>(?: |\s|<br \/>)*?</p>\s*)+\
|
||||||
del x # Temporary variable
|
del x # Temporary variable
|
||||||
|
|
||||||
def escape(html):
|
def escape(html):
|
||||||
'''
|
'''
|
||||||
Returns the given HTML with ampersands, quotes and carets encoded
|
Returns the given HTML with ampersands, quotes and carets encoded
|
||||||
|
|
||||||
>>> escape('html "test" & <brothers>')
|
>>> escape('html "test" & <brothers>')
|
||||||
'html "test" & <brothers>'
|
'html "test" & <brothers>'
|
||||||
'''
|
'''
|
||||||
if not isinstance(html, basestring):
|
if not isinstance(html, basestring):
|
||||||
html = str(html)
|
html = str(html)
|
||||||
return html.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"').replace("'", ''')
|
return html.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"').replace("'", ''')
|
||||||
|
|
||||||
def linebreaks(value):
|
def linebreaks(value):
|
||||||
'''
|
'''
|
||||||
Converts newlines into <p> and <br />
|
Converts newlines into <p> and <br />
|
||||||
'''
|
'''
|
||||||
value = re.sub(r'\r\n|\r|\n', '\n', value) # normalize newlines
|
value = re.sub(r'\r\n|\r|\n', '\n', value) # normalize newlines
|
||||||
paras = re.split('\n{2,}', value)
|
paras = re.split('\n{2,}', value)
|
||||||
paras = ['<p>%s</p>' % p.strip().replace('\n', '<br />') for p in paras]
|
paras = ['<p>%s</p>' % p.strip().replace('\n', '<br />') for p in paras]
|
||||||
return '\n\n'.join(paras)
|
return '\n\n'.join(paras)
|
||||||
|
|
||||||
def stripTags(value):
|
def stripTags(value):
|
||||||
"""
|
"""
|
||||||
Returns the given HTML with all tags stripped
|
Returns the given HTML with all tags stripped
|
||||||
|
|
||||||
>>> stripTags('some <h2>title</h2> <script>asdfasdf</script>')
|
>>> stripTags('some <h2>title</h2> <script>asdfasdf</script>')
|
||||||
'some title asdfasdf'
|
'some title asdfasdf'
|
||||||
"""
|
"""
|
||||||
return re.sub(r'<[^>]*?>', '', value)
|
return re.sub(r'<[^>]*?>', '', value)
|
||||||
|
|
||||||
def stripSpacesBetweenTags(value):
|
def stripSpacesBetweenTags(value):
|
||||||
"Returns the given HTML with spaces between tags normalized to a single space"
|
"Returns the given HTML with spaces between tags normalized to a single space"
|
||||||
return re.sub(r'>\s+<', '> <', value)
|
return re.sub(r'>\s+<', '> <', value)
|
||||||
|
|
||||||
def stripEntities(value):
|
def stripEntities(value):
|
||||||
"Returns the given HTML with all entities (&something;) stripped"
|
"Returns the given HTML with all entities (&something;) stripped"
|
||||||
return re.sub(r'&(?:\w+|#\d);', '', value)
|
return re.sub(r'&(?:\w+|#\d);', '', value)
|
||||||
|
|
||||||
def fixAmpersands(value):
|
def fixAmpersands(value):
|
||||||
"Returns the given HTML with all unencoded ampersands encoded correctly"
|
"Returns the given HTML with all unencoded ampersands encoded correctly"
|
||||||
return unencoded_ampersands_re.sub('&', value)
|
return unencoded_ampersands_re.sub('&', value)
|
||||||
|
|
||||||
def urlize(text, trim_url_limit=None, nofollow=False):
|
def urlize(text, trim_url_limit=None, nofollow=False):
|
||||||
"""
|
"""
|
||||||
Converts any URLs in text into clickable links. Works on http://, https:// and
|
Converts any URLs in text into clickable links. Works on http://, https:// and
|
||||||
www. links. Links can have trailing punctuation (periods, commas, close-parens)
|
www. links. Links can have trailing punctuation (periods, commas, close-parens)
|
||||||
and leading punctuation (opening parens) and it'll still do the right thing.
|
and leading punctuation (opening parens) and it'll still do the right thing.
|
||||||
|
|
||||||
If trim_url_limit is not None, the URLs in link text will be limited to
|
If trim_url_limit is not None, the URLs in link text will be limited to
|
||||||
trim_url_limit characters.
|
trim_url_limit characters.
|
||||||
|
|
||||||
If nofollow is True, the URLs in link text will get a rel="nofollow" attribute.
|
If nofollow is True, the URLs in link text will get a rel="nofollow" attribute.
|
||||||
"""
|
"""
|
||||||
trim_url = lambda x, limit=trim_url_limit: limit is not None and (x[:limit] + (len(x) >=limit and '...' or '')) or x
|
trim_url = lambda x, limit=trim_url_limit: limit is not None and (x[:limit] + (len(x) >=limit and '...' or '')) or x
|
||||||
words = word_split_re.split(text)
|
words = word_split_re.split(text)
|
||||||
nofollow_attr = nofollow and ' rel="nofollow"' or ''
|
nofollow_attr = nofollow and ' rel="nofollow"' or ''
|
||||||
for i, word in enumerate(words):
|
for i, word in enumerate(words):
|
||||||
match = punctuation_re.match(word)
|
match = punctuation_re.match(word)
|
||||||
if match:
|
if match:
|
||||||
lead, middle, trail = match.groups()
|
lead, middle, trail = match.groups()
|
||||||
if middle.startswith('www.') or ('@' not in middle and not middle.startswith('http://') and \
|
if middle.startswith('www.') or ('@' not in middle and not middle.startswith('http://') and \
|
||||||
len(middle) > 0 and middle[0] in string.letters + string.digits and \
|
len(middle) > 0 and middle[0] in string.letters + string.digits and \
|
||||||
(middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))):
|
(middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))):
|
||||||
middle = '<a href="http://%s"%s>%s</a>' % (middle, nofollow_attr, trim_url(middle))
|
middle = '<a href="http://%s"%s>%s</a>' % (middle, nofollow_attr, trim_url(middle))
|
||||||
if middle.startswith('http://') or middle.startswith('https://'):
|
if middle.startswith('http://') or middle.startswith('https://'):
|
||||||
middle = '<a href="%s"%s>%s</a>' % (middle, nofollow_attr, trim_url(middle))
|
middle = '<a href="%s"%s>%s</a>' % (middle, nofollow_attr, trim_url(middle))
|
||||||
if '@' in middle and not middle.startswith('www.') and not ':' in middle \
|
if '@' in middle and not middle.startswith('www.') and not ':' in middle \
|
||||||
and simple_email_re.match(middle):
|
and simple_email_re.match(middle):
|
||||||
middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
|
middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
|
||||||
if lead + middle + trail != word:
|
if lead + middle + trail != word:
|
||||||
words[i] = lead + middle + trail
|
words[i] = lead + middle + trail
|
||||||
return ''.join(words)
|
return ''.join(words)
|
||||||
|
|
||||||
def cleanHtml(text):
|
def cleanHtml(text):
|
||||||
"""
|
"""
|
||||||
Cleans the given HTML. Specifically, it does the following:
|
Cleans the given HTML. Specifically, it does the following:
|
||||||
* Converts <b> and <i> to <strong> and <em>.
|
* Converts <b> and <i> to <strong> and <em>.
|
||||||
* Encodes all ampersands correctly.
|
* Encodes all ampersands correctly.
|
||||||
* Removes all "target" attributes from <a> tags.
|
* Removes all "target" attributes from <a> tags.
|
||||||
* Removes extraneous HTML, such as presentational tags that open and
|
* Removes extraneous HTML, such as presentational tags that open and
|
||||||
immediately close and <br clear="all">.
|
immediately close and <br clear="all">.
|
||||||
* Converts hard-coded bullets into HTML unordered lists.
|
* Converts hard-coded bullets into HTML unordered lists.
|
||||||
* Removes stuff like "<p> </p>", but only if it's at the
|
* Removes stuff like "<p> </p>", but only if it's at the
|
||||||
bottom of the text.
|
bottom of the text.
|
||||||
"""
|
"""
|
||||||
from text import normalizeNewlines
|
from text import normalizeNewlines
|
||||||
text = normalizeNewlines(text)
|
text = normalizeNewlines(text)
|
||||||
text = re.sub(r'<(/?)\s*b\s*>', '<\\1strong>', text)
|
text = re.sub(r'<(/?)\s*b\s*>', '<\\1strong>', text)
|
||||||
text = re.sub(r'<(/?)\s*i\s*>', '<\\1em>', text)
|
text = re.sub(r'<(/?)\s*i\s*>', '<\\1em>', text)
|
||||||
text = fixAmpersands(text)
|
text = fixAmpersands(text)
|
||||||
# Remove all target="" attributes from <a> tags.
|
# Remove all target="" attributes from <a> tags.
|
||||||
text = link_target_attribute_re.sub('\\1', text)
|
text = link_target_attribute_re.sub('\\1', text)
|
||||||
# Trim stupid HTML such as <br clear="all">.
|
# Trim stupid HTML such as <br clear="all">.
|
||||||
text = html_gunk_re.sub('', text)
|
text = html_gunk_re.sub('', text)
|
||||||
# Convert hard-coded bullets into HTML unordered lists.
|
# Convert hard-coded bullets into HTML unordered lists.
|
||||||
def replace_p_tags(match):
|
def replace_p_tags(match):
|
||||||
s = match.group().replace('</p>', '</li>')
|
s = match.group().replace('</p>', '</li>')
|
||||||
for d in DOTS:
|
for d in DOTS:
|
||||||
s = s.replace('<p>%s' % d, '<li>')
|
s = s.replace('<p>%s' % d, '<li>')
|
||||||
return '<ul>\n%s\n</ul>' % s
|
return '<ul>\n%s\n</ul>' % s
|
||||||
text = hard_coded_bullets_re.sub(replace_p_tags, text)
|
text = hard_coded_bullets_re.sub(replace_p_tags, text)
|
||||||
# Remove stuff like "<p> </p>", but only if it's at the bottom of the text.
|
# Remove stuff like "<p> </p>", but only if it's at the bottom of the text.
|
||||||
text = trailing_empty_content_re.sub('', text)
|
text = trailing_empty_content_re.sub('', text)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
# This pattern matches a character entity reference (a decimal numeric
|
# This pattern matches a character entity reference (a decimal numeric
|
||||||
# references, a hexadecimal numeric reference, or a named reference).
|
# references, a hexadecimal numeric reference, or a named reference).
|
||||||
charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?')
|
charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?')
|
||||||
|
|
||||||
def decodeHtml(html):
|
def decodeHtml(html):
|
||||||
"""
|
"""
|
||||||
>>> decodeHtml('me & you and $&%')
|
>>> decodeHtml('me & you and $&%')
|
||||||
u'me & you and $&%'
|
u'me & you and $&%'
|
||||||
"""
|
"""
|
||||||
if type(html) != unicode:
|
if type(html) != unicode:
|
||||||
html = unicode(html)[:]
|
html = unicode(html)[:]
|
||||||
if type(html) is unicode:
|
if type(html) is unicode:
|
||||||
uchr = unichr
|
uchr = unichr
|
||||||
else:
|
|
||||||
uchr = lambda value: value > 255 and unichr(value) or chr(value)
|
|
||||||
def entitydecode(match, uchr=uchr):
|
|
||||||
entity = match.group(1)
|
|
||||||
if entity.startswith('#x'):
|
|
||||||
return uchr(int(entity[2:], 16))
|
|
||||||
elif entity.startswith('#'):
|
|
||||||
return uchr(int(entity[1:]))
|
|
||||||
elif entity in name2codepoint:
|
|
||||||
return uchr(name2codepoint[entity])
|
|
||||||
else:
|
else:
|
||||||
return match.group(0)
|
uchr = lambda value: value > 255 and unichr(value) or chr(value)
|
||||||
return charrefpat.sub(entitydecode, html).replace(u'\xa0', ' ')
|
def entitydecode(match, uchr=uchr):
|
||||||
|
entity = match.group(1)
|
||||||
|
if entity.startswith('#x'):
|
||||||
|
return uchr(int(entity[2:], 16))
|
||||||
|
elif entity.startswith('#'):
|
||||||
|
return uchr(int(entity[1:]))
|
||||||
|
elif entity in name2codepoint:
|
||||||
|
return uchr(name2codepoint[entity])
|
||||||
|
else:
|
||||||
|
return match.group(0)
|
||||||
|
return charrefpat.sub(entitydecode, html).replace(u'\xa0', ' ')
|
||||||
|
|
||||||
def highlight(text, query, hlClass="hl"):
|
def highlight(text, query, hlClass="hl"):
|
||||||
"""
|
"""
|
||||||
>>> highlight('me & you and $&%', 'and')
|
>>> highlight('me & you and $&%', 'and')
|
||||||
'me & you <span class="hl">and</span> $&%'
|
'me & you <span class="hl">and</span> $&%'
|
||||||
"""
|
"""
|
||||||
if query:
|
if query:
|
||||||
text = text.replace('<br />', '|')
|
text = text.replace('<br />', '|')
|
||||||
query = re.escape(query).replace('\ ', '.')
|
query = re.escape(query).replace('\ ', '.')
|
||||||
m = re.compile("(%s)" % query, re.IGNORECASE).findall(text)
|
m = re.compile("(%s)" % query, re.IGNORECASE).findall(text)
|
||||||
for i in m:
|
for i in m:
|
||||||
text = re.sub("(%s)" % re.escape(i).replace('\ ', '.'), '<span class="%s">\\1</span>' % hlClass, text)
|
text = re.sub("(%s)" % re.escape(i).replace('\ ', '.'), '<span class="%s">\\1</span>' % hlClass, text)
|
||||||
text = text.replace('|', '<br />')
|
text = text.replace('|', '<br />')
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
436
oxutils/lang.py
436
oxutils/lang.py
|
@ -1,236 +1,236 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=2:sts=2:ts=2
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
|
|
||||||
|
|
||||||
_iso639_languages = [
|
_iso639_languages = [
|
||||||
("Unknown", "", "", "und"),
|
("Unknown", "", "", "und"),
|
||||||
("Afar", "", "aa", "aar"),
|
("Afar", "", "aa", "aar"),
|
||||||
("Abkhazian", "", "ab", "abk"),
|
("Abkhazian", "", "ab", "abk"),
|
||||||
("Afrikaans", "", "af", "afr"),
|
("Afrikaans", "", "af", "afr"),
|
||||||
("Akan", "", "ak", "aka"),
|
("Akan", "", "ak", "aka"),
|
||||||
("Albanian", "", "sq", "sqi"),
|
("Albanian", "", "sq", "sqi"),
|
||||||
("Amharic", "", "am", "amh"),
|
("Amharic", "", "am", "amh"),
|
||||||
("Arabic", "", "ar", "ara"),
|
("Arabic", "", "ar", "ara"),
|
||||||
("Aragonese", "", "an", "arg"),
|
("Aragonese", "", "an", "arg"),
|
||||||
("Armenian", "", "hy", "hye"),
|
("Armenian", "", "hy", "hye"),
|
||||||
("Assamese", "", "as", "asm"),
|
("Assamese", "", "as", "asm"),
|
||||||
("Avaric", "", "av", "ava"),
|
("Avaric", "", "av", "ava"),
|
||||||
("Avestan", "", "ae", "ave"),
|
("Avestan", "", "ae", "ave"),
|
||||||
("Aymara", "", "ay", "aym"),
|
("Aymara", "", "ay", "aym"),
|
||||||
("Azerbaijani", "", "az", "aze"),
|
("Azerbaijani", "", "az", "aze"),
|
||||||
("Bashkir", "", "ba", "bak"),
|
("Bashkir", "", "ba", "bak"),
|
||||||
("Bambara", "", "bm", "bam"),
|
("Bambara", "", "bm", "bam"),
|
||||||
("Basque", "", "eu", "eus"),
|
("Basque", "", "eu", "eus"),
|
||||||
("Belarusian", "", "be", "bel"),
|
("Belarusian", "", "be", "bel"),
|
||||||
("Bengali", "", "bn", "ben"),
|
("Bengali", "", "bn", "ben"),
|
||||||
("Bihari", "", "bh", "bih"),
|
("Bihari", "", "bh", "bih"),
|
||||||
("Bislama", "", "bi", "bis"),
|
("Bislama", "", "bi", "bis"),
|
||||||
("Bosnian", "", "bs", "bos"),
|
("Bosnian", "", "bs", "bos"),
|
||||||
("Breton", "", "br", "bre"),
|
("Breton", "", "br", "bre"),
|
||||||
("Bulgarian", "", "bg", "bul"),
|
("Bulgarian", "", "bg", "bul"),
|
||||||
("Burmese", "", "my", "mya"),
|
("Burmese", "", "my", "mya"),
|
||||||
("Catalan", "", "ca", "cat"),
|
("Catalan", "", "ca", "cat"),
|
||||||
("Chamorro", "", "ch", "cha"),
|
("Chamorro", "", "ch", "cha"),
|
||||||
("Chechen", "", "ce", "che"),
|
("Chechen", "", "ce", "che"),
|
||||||
("Chinese", "", "zh", "zho"),
|
("Chinese", "", "zh", "zho"),
|
||||||
("Church Slavic", "", "cu", "chu"),
|
("Church Slavic", "", "cu", "chu"),
|
||||||
("Chuvash", "", "cv", "chv"),
|
("Chuvash", "", "cv", "chv"),
|
||||||
("Cornish", "", "kw", "cor"),
|
("Cornish", "", "kw", "cor"),
|
||||||
("Corsican", "", "co", "cos"),
|
("Corsican", "", "co", "cos"),
|
||||||
("Cree", "", "cr", "cre"),
|
("Cree", "", "cr", "cre"),
|
||||||
("Czech", "", "cs", "ces"),
|
("Czech", "", "cs", "ces"),
|
||||||
("Danish", "Dansk", "da", "dan"),
|
("Danish", "Dansk", "da", "dan"),
|
||||||
("Divehi", "", "dv", "div"),
|
("Divehi", "", "dv", "div"),
|
||||||
("Dutch", "Nederlands", "nl", "nld"),
|
("Dutch", "Nederlands", "nl", "nld"),
|
||||||
("Dzongkha", "", "dz", "dzo"),
|
("Dzongkha", "", "dz", "dzo"),
|
||||||
("English", "English", "en", "eng"),
|
("English", "English", "en", "eng"),
|
||||||
("Esperanto", "", "eo", "epo"),
|
("Esperanto", "", "eo", "epo"),
|
||||||
("Estonian", "", "et", "est"),
|
("Estonian", "", "et", "est"),
|
||||||
("Ewe", "", "ee", "ewe"),
|
("Ewe", "", "ee", "ewe"),
|
||||||
("Faroese", "", "fo", "fao"),
|
("Faroese", "", "fo", "fao"),
|
||||||
("Fijian", "", "fj", "fij"),
|
("Fijian", "", "fj", "fij"),
|
||||||
("Finnish", "Suomi", "fi", "fin"),
|
("Finnish", "Suomi", "fi", "fin"),
|
||||||
("French", "Francais", "fr", "fra"),
|
("French", "Francais", "fr", "fra"),
|
||||||
("Western Frisian", "", "fy", "fry"),
|
("Western Frisian", "", "fy", "fry"),
|
||||||
("Fulah", "", "ff", "ful"),
|
("Fulah", "", "ff", "ful"),
|
||||||
("Georgian", "", "ka", "kat"),
|
("Georgian", "", "ka", "kat"),
|
||||||
("German", "Deutsch", "de", "deu"),
|
("German", "Deutsch", "de", "deu"),
|
||||||
("Gaelic (Scots)", "", "gd", "gla"),
|
("Gaelic (Scots)", "", "gd", "gla"),
|
||||||
("Irish", "", "ga", "gle"),
|
("Irish", "", "ga", "gle"),
|
||||||
("Galician", "", "gl", "glg"),
|
("Galician", "", "gl", "glg"),
|
||||||
("Manx", "", "gv", "glv"),
|
("Manx", "", "gv", "glv"),
|
||||||
("Greek, Modern", "", "el", "ell"),
|
("Greek, Modern", "", "el", "ell"),
|
||||||
("Guarani", "", "gn", "grn"),
|
("Guarani", "", "gn", "grn"),
|
||||||
("Gujarati", "", "gu", "guj"),
|
("Gujarati", "", "gu", "guj"),
|
||||||
("Haitian", "", "ht", "hat"),
|
("Haitian", "", "ht", "hat"),
|
||||||
("Hausa", "", "ha", "hau"),
|
("Hausa", "", "ha", "hau"),
|
||||||
("Hebrew", "", "he", "heb"),
|
("Hebrew", "", "he", "heb"),
|
||||||
("Herero", "", "hz", "her"),
|
("Herero", "", "hz", "her"),
|
||||||
("Hindi", "", "hi", "hin"),
|
("Hindi", "", "hi", "hin"),
|
||||||
("Hiri Motu", "", "ho", "hmo"),
|
("Hiri Motu", "", "ho", "hmo"),
|
||||||
("Hungarian", "Magyar", "hu", "hun"),
|
("Hungarian", "Magyar", "hu", "hun"),
|
||||||
("Igbo", "", "ig", "ibo"),
|
("Igbo", "", "ig", "ibo"),
|
||||||
("Icelandic", "Islenska", "is", "isl"),
|
("Icelandic", "Islenska", "is", "isl"),
|
||||||
("Ido", "", "io", "ido"),
|
("Ido", "", "io", "ido"),
|
||||||
("Sichuan Yi", "", "ii", "iii"),
|
("Sichuan Yi", "", "ii", "iii"),
|
||||||
("Inuktitut", "", "iu", "iku"),
|
("Inuktitut", "", "iu", "iku"),
|
||||||
("Interlingue", "", "ie", "ile"),
|
("Interlingue", "", "ie", "ile"),
|
||||||
("Interlingua", "", "ia", "ina"),
|
("Interlingua", "", "ia", "ina"),
|
||||||
("Indonesian", "", "id", "ind"),
|
("Indonesian", "", "id", "ind"),
|
||||||
("Inupiaq", "", "ik", "ipk"),
|
("Inupiaq", "", "ik", "ipk"),
|
||||||
("Italian", "Italiano", "it", "ita"),
|
("Italian", "Italiano", "it", "ita"),
|
||||||
("Javanese", "", "jv", "jav"),
|
("Javanese", "", "jv", "jav"),
|
||||||
("Japanese", "", "ja", "jpn"),
|
("Japanese", "", "ja", "jpn"),
|
||||||
("Kalaallisut (Greenlandic)", "", "kl", "kal"),
|
("Kalaallisut (Greenlandic)", "", "kl", "kal"),
|
||||||
("Kannada", "", "kn", "kan"),
|
("Kannada", "", "kn", "kan"),
|
||||||
("Kashmiri", "", "ks", "kas"),
|
("Kashmiri", "", "ks", "kas"),
|
||||||
("Kanuri", "", "kr", "kau"),
|
("Kanuri", "", "kr", "kau"),
|
||||||
("Kazakh", "", "kk", "kaz"),
|
("Kazakh", "", "kk", "kaz"),
|
||||||
("Central Khmer", "", "km", "khm"),
|
("Central Khmer", "", "km", "khm"),
|
||||||
("Kikuyu", "", "ki", "kik"),
|
("Kikuyu", "", "ki", "kik"),
|
||||||
("Kinyarwanda", "", "rw", "kin"),
|
("Kinyarwanda", "", "rw", "kin"),
|
||||||
("Kirghiz", "", "ky", "kir"),
|
("Kirghiz", "", "ky", "kir"),
|
||||||
("Komi", "", "kv", "kom"),
|
("Komi", "", "kv", "kom"),
|
||||||
("Kongo", "", "kg", "kon"),
|
("Kongo", "", "kg", "kon"),
|
||||||
("Korean", "", "ko", "kor"),
|
("Korean", "", "ko", "kor"),
|
||||||
("Kuanyama", "", "kj", "kua"),
|
("Kuanyama", "", "kj", "kua"),
|
||||||
("Kurdish", "", "ku", "kur"),
|
("Kurdish", "", "ku", "kur"),
|
||||||
("Lao", "", "lo", "lao"),
|
("Lao", "", "lo", "lao"),
|
||||||
("Latin", "", "la", "lat"),
|
("Latin", "", "la", "lat"),
|
||||||
("Latvian", "", "lv", "lav"),
|
("Latvian", "", "lv", "lav"),
|
||||||
("Limburgan", "", "li", "lim"),
|
("Limburgan", "", "li", "lim"),
|
||||||
("Lingala", "", "ln", "lin"),
|
("Lingala", "", "ln", "lin"),
|
||||||
("Lithuanian", "", "lt", "lit"),
|
("Lithuanian", "", "lt", "lit"),
|
||||||
("Luxembourgish", "", "lb", "ltz"),
|
("Luxembourgish", "", "lb", "ltz"),
|
||||||
("Luba-Katanga", "", "lu", "lub"),
|
("Luba-Katanga", "", "lu", "lub"),
|
||||||
("Ganda", "", "lg", "lug"),
|
("Ganda", "", "lg", "lug"),
|
||||||
("Macedonian", "", "mk", "mkd"),
|
("Macedonian", "", "mk", "mkd"),
|
||||||
("Marshallese", "", "mh", "mah"),
|
("Marshallese", "", "mh", "mah"),
|
||||||
("Malayalam", "", "ml", "mal"),
|
("Malayalam", "", "ml", "mal"),
|
||||||
("Maori", "", "mi", "mri"),
|
("Maori", "", "mi", "mri"),
|
||||||
("Marathi", "", "mr", "mar"),
|
("Marathi", "", "mr", "mar"),
|
||||||
("Malay", "", "ms", "msa"),
|
("Malay", "", "ms", "msa"),
|
||||||
("Malagasy", "", "mg", "mlg"),
|
("Malagasy", "", "mg", "mlg"),
|
||||||
("Maltese", "", "mt", "mlt"),
|
("Maltese", "", "mt", "mlt"),
|
||||||
("Moldavian", "", "mo", "mol"),
|
("Moldavian", "", "mo", "mol"),
|
||||||
("Mongolian", "", "mn", "mon"),
|
("Mongolian", "", "mn", "mon"),
|
||||||
("Nauru", "", "na", "nau"),
|
("Nauru", "", "na", "nau"),
|
||||||
("Navajo", "", "nv", "nav"),
|
("Navajo", "", "nv", "nav"),
|
||||||
("Ndebele, South", "", "nr", "nbl"),
|
("Ndebele, South", "", "nr", "nbl"),
|
||||||
("Ndebele, North", "", "nd", "nde"),
|
("Ndebele, North", "", "nd", "nde"),
|
||||||
("Ndonga", "", "ng", "ndo"),
|
("Ndonga", "", "ng", "ndo"),
|
||||||
("Nepali", "", "ne", "nep"),
|
("Nepali", "", "ne", "nep"),
|
||||||
("Norwegian Nynorsk", "", "nn", "nno"),
|
("Norwegian Nynorsk", "", "nn", "nno"),
|
||||||
("Norwegian Bokmål", "", "nb", "nob"),
|
("Norwegian Bokmål", "", "nb", "nob"),
|
||||||
("Norwegian", "Norsk", "no", "nor"),
|
("Norwegian", "Norsk", "no", "nor"),
|
||||||
("Chichewa; Nyanja", "", "ny", "nya"),
|
("Chichewa; Nyanja", "", "ny", "nya"),
|
||||||
("Occitan (post 1500); Provençal", "", "oc", "oci"),
|
("Occitan (post 1500); Provençal", "", "oc", "oci"),
|
||||||
("Ojibwa", "", "oj", "oji"),
|
("Ojibwa", "", "oj", "oji"),
|
||||||
("Oriya", "", "or", "ori"),
|
("Oriya", "", "or", "ori"),
|
||||||
("Oromo", "", "om", "orm"),
|
("Oromo", "", "om", "orm"),
|
||||||
("Ossetian; Ossetic", "", "os", "oss"),
|
("Ossetian; Ossetic", "", "os", "oss"),
|
||||||
("Panjabi", "", "pa", "pan"),
|
("Panjabi", "", "pa", "pan"),
|
||||||
("Persian", "", "fa", "fas"),
|
("Persian", "", "fa", "fas"),
|
||||||
("Pali", "", "pi", "pli"),
|
("Pali", "", "pi", "pli"),
|
||||||
("Polish", "", "pl", "pol"),
|
("Polish", "", "pl", "pol"),
|
||||||
("Portuguese", "Portugues", "pt", "por"),
|
("Portuguese", "Portugues", "pt", "por"),
|
||||||
("Pushto", "", "ps", "pus"),
|
("Pushto", "", "ps", "pus"),
|
||||||
("Quechua", "", "qu", "que"),
|
("Quechua", "", "qu", "que"),
|
||||||
("Romansh", "", "rm", "roh"),
|
("Romansh", "", "rm", "roh"),
|
||||||
("Romanian", "", "ro", "ron"),
|
("Romanian", "", "ro", "ron"),
|
||||||
("Rundi", "", "rn", "run"),
|
("Rundi", "", "rn", "run"),
|
||||||
("Russian", "", "ru", "rus"),
|
("Russian", "", "ru", "rus"),
|
||||||
("Sango", "", "sg", "sag"),
|
("Sango", "", "sg", "sag"),
|
||||||
("Sanskrit", "", "sa", "san"),
|
("Sanskrit", "", "sa", "san"),
|
||||||
("Serbian", "", "sr", "srp"),
|
("Serbian", "", "sr", "srp"),
|
||||||
("Croatian", "Hrvatski", "hr", "hrv"),
|
("Croatian", "Hrvatski", "hr", "hrv"),
|
||||||
("Sinhala", "", "si", "sin"),
|
("Sinhala", "", "si", "sin"),
|
||||||
("Slovak", "", "sk", "slk"),
|
("Slovak", "", "sk", "slk"),
|
||||||
("Slovenian", "", "sl", "slv"),
|
("Slovenian", "", "sl", "slv"),
|
||||||
("Northern Sami", "", "se", "sme"),
|
("Northern Sami", "", "se", "sme"),
|
||||||
("Samoan", "", "sm", "smo"),
|
("Samoan", "", "sm", "smo"),
|
||||||
("Shona", "", "sn", "sna"),
|
("Shona", "", "sn", "sna"),
|
||||||
("Sindhi", "", "sd", "snd"),
|
("Sindhi", "", "sd", "snd"),
|
||||||
("Somali", "", "so", "som"),
|
("Somali", "", "so", "som"),
|
||||||
("Sotho, Southern", "", "st", "sot"),
|
("Sotho, Southern", "", "st", "sot"),
|
||||||
("Spanish", "Espanol", "es", "spa"),
|
("Spanish", "Espanol", "es", "spa"),
|
||||||
("Sardinian", "", "sc", "srd"),
|
("Sardinian", "", "sc", "srd"),
|
||||||
("Swati", "", "ss", "ssw"),
|
("Swati", "", "ss", "ssw"),
|
||||||
("Sundanese", "", "su", "sun"),
|
("Sundanese", "", "su", "sun"),
|
||||||
("Swahili", "", "sw", "swa"),
|
("Swahili", "", "sw", "swa"),
|
||||||
("Swedish", "Svenska", "sv", "swe"),
|
("Swedish", "Svenska", "sv", "swe"),
|
||||||
("Tahitian", "", "ty", "tah"),
|
("Tahitian", "", "ty", "tah"),
|
||||||
("Tamil", "", "ta", "tam"),
|
("Tamil", "", "ta", "tam"),
|
||||||
("Tatar", "", "tt", "tat"),
|
("Tatar", "", "tt", "tat"),
|
||||||
("Telugu", "", "te", "tel"),
|
("Telugu", "", "te", "tel"),
|
||||||
("Tajik", "", "tg", "tgk"),
|
("Tajik", "", "tg", "tgk"),
|
||||||
("Tagalog", "", "tl", "tgl"),
|
("Tagalog", "", "tl", "tgl"),
|
||||||
("Thai", "", "th", "tha"),
|
("Thai", "", "th", "tha"),
|
||||||
("Tibetan", "", "bo", "bod"),
|
("Tibetan", "", "bo", "bod"),
|
||||||
("Tigrinya", "", "ti", "tir"),
|
("Tigrinya", "", "ti", "tir"),
|
||||||
("Tonga (Tonga Islands)", "", "to", "ton"),
|
("Tonga (Tonga Islands)", "", "to", "ton"),
|
||||||
("Tswana", "", "tn", "tsn"),
|
("Tswana", "", "tn", "tsn"),
|
||||||
("Tsonga", "", "ts", "tso"),
|
("Tsonga", "", "ts", "tso"),
|
||||||
("Turkmen", "", "tk", "tuk"),
|
("Turkmen", "", "tk", "tuk"),
|
||||||
("Turkish", "", "tr", "tur"),
|
("Turkish", "", "tr", "tur"),
|
||||||
("Twi", "", "tw", "twi"),
|
("Twi", "", "tw", "twi"),
|
||||||
("Uighur", "", "ug", "uig"),
|
("Uighur", "", "ug", "uig"),
|
||||||
("Ukrainian", "", "uk", "ukr"),
|
("Ukrainian", "", "uk", "ukr"),
|
||||||
("Urdu", "", "ur", "urd"),
|
("Urdu", "", "ur", "urd"),
|
||||||
("Uzbek", "", "uz", "uzb"),
|
("Uzbek", "", "uz", "uzb"),
|
||||||
("Venda", "", "ve", "ven"),
|
("Venda", "", "ve", "ven"),
|
||||||
("Vietnamese", "", "vi", "vie"),
|
("Vietnamese", "", "vi", "vie"),
|
||||||
("Volapük", "", "vo", "vol"),
|
("Volapük", "", "vo", "vol"),
|
||||||
("Welsh", "", "cy", "cym"),
|
("Welsh", "", "cy", "cym"),
|
||||||
("Walloon", "", "wa", "wln"),
|
("Walloon", "", "wa", "wln"),
|
||||||
("Wolof", "", "wo", "wol"),
|
("Wolof", "", "wo", "wol"),
|
||||||
("Xhosa", "", "xh", "xho"),
|
("Xhosa", "", "xh", "xho"),
|
||||||
("Yiddish", "", "yi", "yid"),
|
("Yiddish", "", "yi", "yid"),
|
||||||
("Yoruba", "", "yo", "yor"),
|
("Yoruba", "", "yo", "yor"),
|
||||||
("Zhuang", "", "za", "zha"),
|
("Zhuang", "", "za", "zha"),
|
||||||
("Zulu", "", "zu", "zul"),
|
("Zulu", "", "zu", "zul"),
|
||||||
]
|
]
|
||||||
|
|
||||||
def codeToLang(code):
|
def codeToLang(code):
|
||||||
code = code.lower()
|
code = code.lower()
|
||||||
if len(code) == 2:
|
if len(code) == 2:
|
||||||
for l in _iso639_languages:
|
for l in _iso639_languages:
|
||||||
if l[2] == code:
|
if l[2] == code:
|
||||||
return l[0]
|
return l[0]
|
||||||
elif len(code) == 3:
|
elif len(code) == 3:
|
||||||
for l in _iso639_languages:
|
for l in _iso639_languages:
|
||||||
if l[3] == code:
|
if l[3] == code:
|
||||||
return l[0]
|
return l[0]
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def langTo3Code(lang):
|
def langTo3Code(lang):
|
||||||
lang = englishName(lang)
|
lang = englishName(lang)
|
||||||
if lang:
|
if lang:
|
||||||
lang=lang.lower()
|
lang=lang.lower()
|
||||||
for l in _iso639_languages:
|
for l in _iso639_languages:
|
||||||
if l[0].lower() == lang:
|
if l[0].lower() == lang:
|
||||||
return l[3]
|
return l[3]
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def langTo2Code(lang):
|
def langTo2Code(lang):
|
||||||
lang = englishName(lang)
|
lang = englishName(lang)
|
||||||
if lang:
|
if lang:
|
||||||
lang=lang.lower()
|
lang=lang.lower()
|
||||||
for l in _iso639_languages:
|
for l in _iso639_languages:
|
||||||
if l[0].lower() == lang:
|
if l[0].lower() == lang:
|
||||||
return l[2]
|
return l[2]
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def langCode2To3(code):
|
def langCode2To3(code):
|
||||||
langTo3Code(codeToLang(code))
|
langTo3Code(codeToLang(code))
|
||||||
|
|
||||||
def langCode3To2(code):
|
def langCode3To2(code):
|
||||||
langTo2Code(codeToLang(code))
|
langTo2Code(codeToLang(code))
|
||||||
|
|
||||||
def englishName(lang):
|
def englishName(lang):
|
||||||
lang = lang.lower()
|
lang = lang.lower()
|
||||||
for l in _iso639_languages:
|
for l in _iso639_languages:
|
||||||
if l[1].lower() == lang:
|
if l[1].lower() == lang:
|
||||||
return l[0]
|
return l[0]
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=2:sts=2:ts=2
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
import gzip
|
import gzip
|
||||||
import StringIO
|
import StringIO
|
||||||
import urllib
|
import urllib
|
||||||
|
@ -10,64 +10,64 @@ from chardet.universaldetector import UniversalDetector
|
||||||
|
|
||||||
# Default headers for HTTP requests.
|
# Default headers for HTTP requests.
|
||||||
DEFAULT_HEADERS = {
|
DEFAULT_HEADERS = {
|
||||||
'User-Agent': 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9) Gecko/2008061015 Firefox/3.0',
|
'User-Agent': 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9) Gecko/2008061015 Firefox/3.0',
|
||||||
'Accept-Encoding': 'gzip'
|
'Accept-Encoding': 'gzip'
|
||||||
}
|
}
|
||||||
|
|
||||||
def status(url, data=None, headers=DEFAULT_HEADERS):
|
def status(url, data=None, headers=DEFAULT_HEADERS):
|
||||||
try:
|
try:
|
||||||
f = openUrl(url, data, headers)
|
f = openUrl(url, data, headers)
|
||||||
s = f.code
|
s = f.code
|
||||||
except urllib2.HTTPError, e:
|
except urllib2.HTTPError, e:
|
||||||
s = e.code
|
s = e.code
|
||||||
return s
|
return s
|
||||||
|
|
||||||
def exists(url, data=None, headers=DEFAULT_HEADERS):
|
def exists(url, data=None, headers=DEFAULT_HEADERS):
|
||||||
s = status(url, data, headers)
|
s = status(url, data, headers)
|
||||||
if s >= 200 and s < 400:
|
if s >= 200 and s < 400:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def getHeaders(url, data=None, headers=DEFAULT_HEADERS):
|
def getHeaders(url, data=None, headers=DEFAULT_HEADERS):
|
||||||
try:
|
try:
|
||||||
f = openUrl(url, data, headers)
|
f = openUrl(url, data, headers)
|
||||||
f.headers['Status'] = "%s" % f.code
|
f.headers['Status'] = "%s" % f.code
|
||||||
headers = f.headers
|
headers = f.headers
|
||||||
f.close()
|
f.close()
|
||||||
except urllib2.HTTPError, e:
|
except urllib2.HTTPError, e:
|
||||||
e.headers['Status'] = "%s" % e.code
|
e.headers['Status'] = "%s" % e.code
|
||||||
headers = e.headers
|
headers = e.headers
|
||||||
return dict(headers)
|
return dict(headers)
|
||||||
|
|
||||||
def openUrl(url, data=None, headers=DEFAULT_HEADERS):
|
def openUrl(url, data=None, headers=DEFAULT_HEADERS):
|
||||||
url = url.replace(' ', '%20')
|
url = url.replace(' ', '%20')
|
||||||
req = urllib2.Request(url, data, headers)
|
req = urllib2.Request(url, data, headers)
|
||||||
return urllib2.urlopen(req)
|
return urllib2.urlopen(req)
|
||||||
|
|
||||||
def getUrl(url, data=None, headers=DEFAULT_HEADERS, returnHeaders=False):
|
def getUrl(url, data=None, headers=DEFAULT_HEADERS, returnHeaders=False):
|
||||||
f = openUrl(url, data, headers)
|
f = openUrl(url, data, headers)
|
||||||
data = f.read()
|
data = f.read()
|
||||||
f.close()
|
f.close()
|
||||||
if f.headers.get('content-encoding', None) == 'gzip':
|
if f.headers.get('content-encoding', None) == 'gzip':
|
||||||
data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read()
|
data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read()
|
||||||
if returnHeaders:
|
if returnHeaders:
|
||||||
f.headers['Status'] = "%s" % f.code
|
f.headers['Status'] = "%s" % f.code
|
||||||
return dict(f.headers), data
|
return dict(f.headers), data
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def getUrlUnicode(url):
|
def getUrlUnicode(url):
|
||||||
data = getUrl(url)
|
data = getUrl(url)
|
||||||
encoding = getEncoding(data)
|
encoding = getEncoding(data)
|
||||||
if not encoding:
|
if not encoding:
|
||||||
encoding = 'latin-1'
|
encoding = 'latin-1'
|
||||||
return unicode(data, encoding)
|
return unicode(data, encoding)
|
||||||
|
|
||||||
def getEncoding(data):
|
def getEncoding(data):
|
||||||
detector = UniversalDetector()
|
detector = UniversalDetector()
|
||||||
for line in data.split('\n'):
|
for line in data.split('\n'):
|
||||||
detector.feed(line)
|
detector.feed(line)
|
||||||
if detector.done:
|
if detector.done:
|
||||||
break
|
break
|
||||||
detector.close()
|
detector.close()
|
||||||
return detector.result['encoding']
|
return detector.result['encoding']
|
||||||
|
|
||||||
|
|
|
@ -1,79 +1,79 @@
|
||||||
# -*- Mode: Python; -*-
|
# -*- Mode: Python; -*-
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=2:sts=2:ts=2
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
import re
|
import re
|
||||||
|
|
||||||
_articles = ('the', 'la', 'a', 'die', 'der', 'le', 'el',
|
_articles = ('the', 'la', 'a', 'die', 'der', 'le', 'el',
|
||||||
"l'", 'il', 'das', 'les', 'o', 'ein', 'i', 'un', 'los', 'de',
|
"l'", 'il', 'das', 'les', 'o', 'ein', 'i', 'un', 'los', 'de',
|
||||||
'an', 'una', 'las', 'eine', 'den', 'gli', 'het', 'os', 'lo',
|
'an', 'una', 'las', 'eine', 'den', 'gli', 'het', 'os', 'lo',
|
||||||
'az', 'det', 'ha-', 'een', 'ang', 'oi', 'ta', 'al-', 'dem',
|
'az', 'det', 'ha-', 'een', 'ang', 'oi', 'ta', 'al-', 'dem',
|
||||||
'mga', 'uno', "un'", 'ett', u'\xcf', 'eines', u'\xc7', 'els',
|
'mga', 'uno', "un'", 'ett', u'\xcf', 'eines', u'\xc7', 'els',
|
||||||
u'\xd4\xef', u'\xcf\xe9')
|
u'\xd4\xef', u'\xcf\xe9')
|
||||||
|
|
||||||
# Articles in a dictionary.
|
# Articles in a dictionary.
|
||||||
_articlesDict = dict([(x, x) for x in _articles])
|
_articlesDict = dict([(x, x) for x in _articles])
|
||||||
_spArticles = []
|
_spArticles = []
|
||||||
for article in _articles:
|
for article in _articles:
|
||||||
if article[-1] not in ("'", '-'): article += ' '
|
if article[-1] not in ("'", '-'): article += ' '
|
||||||
_spArticles.append(article)
|
_spArticles.append(article)
|
||||||
|
|
||||||
def canonicalTitle(title):
|
def canonicalTitle(title):
|
||||||
"""Return the title in the canonic format 'Movie Title, The'.
|
"""Return the title in the canonic format 'Movie Title, The'.
|
||||||
|
|
||||||
>>> canonicalTitle('The Movie Title')
|
>>> canonicalTitle('The Movie Title')
|
||||||
'Movie Title, The'
|
'Movie Title, The'
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
if _articlesDict.has_key(title.split(', ')[-1].lower()): return title
|
if _articlesDict.has_key(title.split(', ')[-1].lower()): return title
|
||||||
except IndexError: pass
|
except IndexError: pass
|
||||||
ltitle = title.lower()
|
ltitle = title.lower()
|
||||||
for article in _spArticles:
|
for article in _spArticles:
|
||||||
if ltitle.startswith(article):
|
if ltitle.startswith(article):
|
||||||
lart = len(article)
|
lart = len(article)
|
||||||
title = '%s, %s' % (title[lart:], title[:lart])
|
title = '%s, %s' % (title[lart:], title[:lart])
|
||||||
if article[-1] == ' ': title = title[:-1]
|
if article[-1] == ' ': title = title[:-1]
|
||||||
break
|
break
|
||||||
## XXX: an attempt using a dictionary lookup.
|
## XXX: an attempt using a dictionary lookup.
|
||||||
##for artSeparator in (' ', "'", '-'):
|
##for artSeparator in (' ', "'", '-'):
|
||||||
## article = _articlesDict.get(ltitle.split(artSeparator)[0])
|
## article = _articlesDict.get(ltitle.split(artSeparator)[0])
|
||||||
## if article is not None:
|
## if article is not None:
|
||||||
## lart = len(article)
|
## lart = len(article)
|
||||||
## # check titles like "una", "I'm Mad" and "L'abbacchio".
|
## # check titles like "una", "I'm Mad" and "L'abbacchio".
|
||||||
## if title[lart:] == '' or (artSeparator != ' ' and
|
## if title[lart:] == '' or (artSeparator != ' ' and
|
||||||
## title[lart:][1] != artSeparator): continue
|
## title[lart:][1] != artSeparator): continue
|
||||||
## title = '%s, %s' % (title[lart:], title[:lart])
|
## title = '%s, %s' % (title[lart:], title[:lart])
|
||||||
## if artSeparator == ' ': title = title[1:]
|
## if artSeparator == ' ': title = title[1:]
|
||||||
## break
|
## break
|
||||||
return title
|
return title
|
||||||
|
|
||||||
def normalizeTitle(title):
|
def normalizeTitle(title):
|
||||||
"""Return the title in the normal "The Title" format.
|
"""Return the title in the normal "The Title" format.
|
||||||
|
|
||||||
>>> normalizeTitle('Movie Title, The')
|
>>> normalizeTitle('Movie Title, The')
|
||||||
'The Movie Title'
|
'The Movie Title'
|
||||||
"""
|
"""
|
||||||
stitle = title.split(', ')
|
stitle = title.split(', ')
|
||||||
if len(stitle) > 1 and _articlesDict.has_key(stitle[-1].lower()):
|
if len(stitle) > 1 and _articlesDict.has_key(stitle[-1].lower()):
|
||||||
sep = ' '
|
sep = ' '
|
||||||
if stitle[-1][-1] in ("'", '-'): sep = ''
|
if stitle[-1][-1] in ("'", '-'): sep = ''
|
||||||
title = '%s%s%s' % (stitle[-1], sep, ', '.join(stitle[:-1]))
|
title = '%s%s%s' % (stitle[-1], sep, ', '.join(stitle[:-1]))
|
||||||
return title
|
return title
|
||||||
|
|
||||||
def normalizeImdbId(imdbId):
|
def normalizeImdbId(imdbId):
|
||||||
"""Return 7 digit imdbId.
|
"""Return 7 digit imdbId.
|
||||||
|
|
||||||
>>> normalizeImdbId('http://www.imdb.com/title/tt0159206/')
|
>>> normalizeImdbId('http://www.imdb.com/title/tt0159206/')
|
||||||
'0159206'
|
'0159206'
|
||||||
>>> normalizeImdbId(159206)
|
>>> normalizeImdbId(159206)
|
||||||
'0159206'
|
'0159206'
|
||||||
>>> normalizeImdbId('tt0159206')
|
>>> normalizeImdbId('tt0159206')
|
||||||
'0159206'
|
'0159206'
|
||||||
"""
|
"""
|
||||||
if isinstance(imdbId, basestring):
|
if isinstance(imdbId, basestring):
|
||||||
imdbId = re.sub('.*(\d{7}).*', '\\1', imdbId)
|
imdbId = re.sub('.*(\d{7}).*', '\\1', imdbId)
|
||||||
elif isinstance(imdbId, int):
|
elif isinstance(imdbId, int):
|
||||||
imdbId = "%07d" % imdbId
|
imdbId = "%07d" % imdbId
|
||||||
return imdbId
|
return imdbId
|
||||||
|
|
||||||
|
|
||||||
# Common suffixes in surnames.
|
# Common suffixes in surnames.
|
||||||
|
|
336
oxutils/text.py
336
oxutils/text.py
|
@ -1,216 +1,216 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=2:sts=2:ts=2
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
# GPL written 2008 by j@pad.ma
|
# GPL written 2008 by j@pad.ma
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
def findRe(string, regexp):
|
def findRe(string, regexp):
|
||||||
result = re.compile(regexp, re.DOTALL).findall(string)
|
result = re.compile(regexp, re.DOTALL).findall(string)
|
||||||
if result:
|
if result:
|
||||||
return result[0].strip()
|
return result[0].strip()
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
def findString(string, string0='', string1 = ''):
|
def findString(string, string0='', string1 = ''):
|
||||||
"""Return the string between string0 and string1.
|
"""Return the string between string0 and string1.
|
||||||
|
|
||||||
If string0 or string1 is left out, begining or end of string is used.
|
If string0 or string1 is left out, begining or end of string is used.
|
||||||
|
|
||||||
>>> findString('i am not there', string1=' not there')
|
>>> findString('i am not there', string1=' not there')
|
||||||
'i am'
|
'i am'
|
||||||
|
|
||||||
>>> findString('i am not there', 'i am ', ' there')
|
>>> findString('i am not there', 'i am ', ' there')
|
||||||
'not'
|
'not'
|
||||||
|
|
||||||
>>> findString('i am not there', 'i am not t')
|
>>> findString('i am not there', 'i am not t')
|
||||||
'here'
|
'here'
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if string0:
|
if string0:
|
||||||
string0 = re.escape(string0)
|
string0 = re.escape(string0)
|
||||||
else:
|
else:
|
||||||
string0 = '^'
|
string0 = '^'
|
||||||
if string1:
|
if string1:
|
||||||
string1 = re.escape(string1)
|
string1 = re.escape(string1)
|
||||||
else:
|
else:
|
||||||
string1 = '$'
|
string1 = '$'
|
||||||
return findRegexp(string, string0 + '(.*?)' + string1)
|
return findRegexp(string, string0 + '(.*?)' + string1)
|
||||||
|
|
||||||
# Capitalizes the first letter of a string.
|
# Capitalizes the first letter of a string.
|
||||||
capfirst = lambda x: x and x[0].upper() + x[1:]
|
capfirst = lambda x: x and x[0].upper() + x[1:]
|
||||||
|
|
||||||
def removeSpecialCharacters(text):
|
def removeSpecialCharacters(text):
|
||||||
"""
|
"""
|
||||||
Removes special characters inserted by Word.
|
Removes special characters inserted by Word.
|
||||||
"""
|
"""
|
||||||
text = text.replace(u'\u2013', '-')
|
text = text.replace(u'\u2013', '-')
|
||||||
text = text.replace(u'\u2026O', "'")
|
text = text.replace(u'\u2026O', "'")
|
||||||
text = text.replace(u'\u2019', "'")
|
text = text.replace(u'\u2019', "'")
|
||||||
text = text.replace(u'', "'")
|
text = text.replace(u'', "'")
|
||||||
text = text.replace(u'', "'")
|
text = text.replace(u'', "'")
|
||||||
text = text.replace(u'', "-")
|
text = text.replace(u'', "-")
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def wrap(text, width):
|
def wrap(text, width):
|
||||||
"""
|
"""
|
||||||
A word-wrap function that preserves existing line breaks and most spaces in
|
A word-wrap function that preserves existing line breaks and most spaces in
|
||||||
the text. Expects that existing line breaks are posix newlines (\n).
|
the text. Expects that existing line breaks are posix newlines (\n).
|
||||||
See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/148061
|
See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/148061
|
||||||
"""
|
"""
|
||||||
return reduce(lambda line, word, width=width: '%s%s%s' %
|
return reduce(lambda line, word, width=width: '%s%s%s' %
|
||||||
(line,
|
(line,
|
||||||
' \n'[(len(line[line.rfind('\n')+1:])
|
' \n'[(len(line[line.rfind('\n')+1:])
|
||||||
+ len(word.split('\n',1)[0]
|
+ len(word.split('\n',1)[0]
|
||||||
) >= width)],
|
) >= width)],
|
||||||
word),
|
word),
|
||||||
text.split(' ')
|
text.split(' ')
|
||||||
)
|
)
|
||||||
|
|
||||||
def truncateString(s, num):
|
def truncateString(s, num):
|
||||||
"""Truncates a string after a certain number of chacters, but ends with a word
|
"""Truncates a string after a certain number of chacters, but ends with a word
|
||||||
|
|
||||||
>>> truncateString('Truncates a string after a certain number of chacters, but ends with a word', 23)
|
>>> truncateString('Truncates a string after a certain number of chacters, but ends with a word', 23)
|
||||||
'Truncates a string...'
|
'Truncates a string...'
|
||||||
>>> truncateString('Truncates a string', 23)
|
>>> truncateString('Truncates a string', 23)
|
||||||
'Truncates a string'
|
'Truncates a string'
|
||||||
|
|
||||||
"""
|
"""
|
||||||
length = int(num)
|
length = int(num)
|
||||||
if len(s) <= length:
|
if len(s) <= length:
|
||||||
return s
|
return s
|
||||||
words = s.split()
|
words = s.split()
|
||||||
ts = ""
|
ts = ""
|
||||||
while words and len(ts) + len(words[0]) < length:
|
while words and len(ts) + len(words[0]) < length:
|
||||||
ts += " " + words.pop(0)
|
ts += " " + words.pop(0)
|
||||||
if words:
|
if words:
|
||||||
ts += "..."
|
ts += "..."
|
||||||
return ts.strip()
|
return ts.strip()
|
||||||
|
|
||||||
def trimString(string, num):
|
def trimString(string, num):
|
||||||
"""Truncates a string after a certain number of chacters, adding ... at -10 characters
|
"""Truncates a string after a certain number of chacters, adding ... at -10 characters
|
||||||
|
|
||||||
>>> trimString('Truncates a string after a certain number of chacters', 23)
|
>>> trimString('Truncates a string after a certain number of chacters', 23)
|
||||||
'Truncates ...f chacters'
|
'Truncates ...f chacters'
|
||||||
>>> trimString('Truncates a string', 23)
|
>>> trimString('Truncates a string', 23)
|
||||||
'Truncates a string'
|
'Truncates a string'
|
||||||
"""
|
"""
|
||||||
if len(string) > num:
|
if len(string) > num:
|
||||||
string = string[:num - 13] + '...' + string[-10:]
|
string = string[:num - 13] + '...' + string[-10:]
|
||||||
return string
|
return string
|
||||||
|
|
||||||
def truncateWords(s, num):
|
def truncateWords(s, num):
|
||||||
"Truncates a string after a certain number of words."
|
"Truncates a string after a certain number of words."
|
||||||
length = int(num)
|
length = int(num)
|
||||||
words = s.split()
|
words = s.split()
|
||||||
if len(words) > length:
|
if len(words) > length:
|
||||||
words = words[:length]
|
words = words[:length]
|
||||||
if not words[-1].endswith('...'):
|
if not words[-1].endswith('...'):
|
||||||
words.append('...')
|
words.append('...')
|
||||||
return ' '.join(words)
|
return ' '.join(words)
|
||||||
|
|
||||||
def getValidFilename(s):
|
def getValidFilename(s):
|
||||||
"""
|
"""
|
||||||
Returns the given string converted to a string that can be used for a clean
|
Returns the given string converted to a string that can be used for a clean
|
||||||
filename. Specifically, leading and trailing spaces are removed;
|
filename. Specifically, leading and trailing spaces are removed;
|
||||||
all non-filename-safe characters are removed.
|
all non-filename-safe characters are removed.
|
||||||
|
|
||||||
>>> getValidFilename("john's portrait in 2004.jpg")
|
>>> getValidFilename("john's portrait in 2004.jpg")
|
||||||
'john_s_portrait_in_2004.jpg'
|
'john_s_portrait_in_2004.jpg'
|
||||||
"""
|
"""
|
||||||
s = s.strip()
|
s = s.strip()
|
||||||
s = s.replace(' ', '_')
|
s = s.replace(' ', '_')
|
||||||
s = re.sub(r'[^-A-Za-z0-9_.\[\]\ ]', '_', s)
|
s = re.sub(r'[^-A-Za-z0-9_.\[\]\ ]', '_', s)
|
||||||
s = s.replace('__', '_').replace('__', '_')
|
s = s.replace('__', '_').replace('__', '_')
|
||||||
return s
|
return s
|
||||||
|
|
||||||
def getTextList(list_, last_word='or'):
|
def getTextList(list_, last_word='or'):
|
||||||
"""
|
"""
|
||||||
>>> getTextList(['a', 'b', 'c', 'd'])
|
>>> getTextList(['a', 'b', 'c', 'd'])
|
||||||
'a, b, c or d'
|
'a, b, c or d'
|
||||||
>>> getTextList(['a', 'b', 'c'], 'and')
|
>>> getTextList(['a', 'b', 'c'], 'and')
|
||||||
'a, b and c'
|
'a, b and c'
|
||||||
>>> getTextList(['a', 'b'], 'and')
|
>>> getTextList(['a', 'b'], 'and')
|
||||||
'a and b'
|
'a and b'
|
||||||
>>> getTextList(['a'])
|
>>> getTextList(['a'])
|
||||||
'a'
|
'a'
|
||||||
>>> getTextList([])
|
>>> getTextList([])
|
||||||
''
|
''
|
||||||
"""
|
"""
|
||||||
if len(list_) == 0: return ''
|
if len(list_) == 0: return ''
|
||||||
if len(list_) == 1: return list_[0]
|
if len(list_) == 1: return list_[0]
|
||||||
return '%s %s %s' % (', '.join([str(i) for i in list_][:-1]), last_word, list_[-1])
|
return '%s %s %s' % (', '.join([str(i) for i in list_][:-1]), last_word, list_[-1])
|
||||||
|
|
||||||
def getListText(text, last_word='or'):
|
def getListText(text, last_word='or'):
|
||||||
"""
|
"""
|
||||||
>>> getListText('a, b, c or d')
|
>>> getListText('a, b, c or d')
|
||||||
['a', 'b', 'c', 'd']
|
['a', 'b', 'c', 'd']
|
||||||
>>> getListText('a, b and c', 'and')
|
>>> getListText('a, b and c', 'and')
|
||||||
['a', 'b', 'c']
|
['a', 'b', 'c']
|
||||||
>>> getListText('a and b', 'and')
|
>>> getListText('a and b', 'and')
|
||||||
['a', 'b']
|
['a', 'b']
|
||||||
>>> getListText('a')
|
>>> getListText('a')
|
||||||
['a']
|
['a']
|
||||||
>>> getListText('')
|
>>> getListText('')
|
||||||
[]
|
[]
|
||||||
"""
|
"""
|
||||||
list_ = []
|
list_ = []
|
||||||
if text:
|
if text:
|
||||||
list_ = text.split(', ')
|
list_ = text.split(', ')
|
||||||
if list_:
|
if list_:
|
||||||
i=len(list_)-1
|
i=len(list_)-1
|
||||||
last = list_[i].split(last_word)
|
last = list_[i].split(last_word)
|
||||||
if len(last) == 2:
|
if len(last) == 2:
|
||||||
list_[i] = last[0].strip()
|
list_[i] = last[0].strip()
|
||||||
list_.append(last[1].strip())
|
list_.append(last[1].strip())
|
||||||
return list_
|
return list_
|
||||||
|
|
||||||
def normalizeNewlines(text):
|
def normalizeNewlines(text):
|
||||||
return re.sub(r'\r\n|\r|\n', '\n', text)
|
return re.sub(r'\r\n|\r|\n', '\n', text)
|
||||||
|
|
||||||
def recapitalize(text):
|
def recapitalize(text):
|
||||||
"Recapitalizes text, placing caps after end-of-sentence punctuation."
|
"Recapitalizes text, placing caps after end-of-sentence punctuation."
|
||||||
# capwords = ()
|
#capwords = ()
|
||||||
text = text.lower()
|
text = text.lower()
|
||||||
capsRE = re.compile(r'(?:^|(?<=[\.\?\!] ))([a-z])')
|
capsRE = re.compile(r'(?:^|(?<=[\.\?\!] ))([a-z])')
|
||||||
text = capsRE.sub(lambda x: x.group(1).upper(), text)
|
text = capsRE.sub(lambda x: x.group(1).upper(), text)
|
||||||
# for capword in capwords:
|
#for capword in capwords:
|
||||||
# capwordRE = re.compile(r'\b%s\b' % capword, re.I)
|
# capwordRE = re.compile(r'\b%s\b' % capword, re.I)
|
||||||
# text = capwordRE.sub(capword, text)
|
# text = capwordRE.sub(capword, text)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def phone2numeric(phone):
|
def phone2numeric(phone):
|
||||||
"Converts a phone number with letters into its numeric equivalent."
|
"Converts a phone number with letters into its numeric equivalent."
|
||||||
letters = re.compile(r'[A-PR-Y]', re.I)
|
letters = re.compile(r'[A-PR-Y]', re.I)
|
||||||
char2number = lambda m: {'a': '2', 'c': '2', 'b': '2', 'e': '3',
|
char2number = lambda m: {'a': '2', 'c': '2', 'b': '2', 'e': '3',
|
||||||
'd': '3', 'g': '4', 'f': '3', 'i': '4', 'h': '4', 'k': '5',
|
'd': '3', 'g': '4', 'f': '3', 'i': '4', 'h': '4', 'k': '5',
|
||||||
'j': '5', 'm': '6', 'l': '5', 'o': '6', 'n': '6', 'p': '7',
|
'j': '5', 'm': '6', 'l': '5', 'o': '6', 'n': '6', 'p': '7',
|
||||||
's': '7', 'r': '7', 'u': '8', 't': '8', 'w': '9', 'v': '8',
|
's': '7', 'r': '7', 'u': '8', 't': '8', 'w': '9', 'v': '8',
|
||||||
'y': '9', 'x': '9'}.get(m.group(0).lower())
|
'y': '9', 'x': '9'}.get(m.group(0).lower())
|
||||||
return letters.sub(char2number, phone)
|
return letters.sub(char2number, phone)
|
||||||
|
|
||||||
def compressString(s):
|
def compressString(s):
|
||||||
import cStringIO, gzip
|
import cStringIO, gzip
|
||||||
zbuf = cStringIO.StringIO()
|
zbuf = cStringIO.StringIO()
|
||||||
zfile = gzip.GzipFile(mode='wb', compresslevel=6, fileobj=zbuf)
|
zfile = gzip.GzipFile(mode='wb', compresslevel=6, fileobj=zbuf)
|
||||||
zfile.write(s)
|
zfile.write(s)
|
||||||
zfile.close()
|
zfile.close()
|
||||||
return zbuf.getvalue()
|
return zbuf.getvalue()
|
||||||
|
|
||||||
smart_split_re = re.compile('("(?:[^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'(?:[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'|[^\\s]+)')
|
smart_split_re = re.compile('("(?:[^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'(?:[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'|[^\\s]+)')
|
||||||
def smartSplit(text):
|
def smartSplit(text):
|
||||||
"""
|
"""
|
||||||
Generator that splits a string by spaces, leaving quoted phrases together.
|
Generator that splits a string by spaces, leaving quoted phrases together.
|
||||||
Supports both single and double quotes, and supports escaping quotes with
|
Supports both single and double quotes, and supports escaping quotes with
|
||||||
backslashes. In the output, strings will keep their initial and trailing
|
backslashes. In the output, strings will keep their initial and trailing
|
||||||
quote marks.
|
quote marks.
|
||||||
>>> list(smartSplit('This is "a person\\'s" test.'))
|
>>> list(smartSplit('This is "a person\\'s" test.'))
|
||||||
['This', 'is', '"a person\\'s"', 'test.']
|
['This', 'is', '"a person\\'s"', 'test.']
|
||||||
"""
|
"""
|
||||||
for bit in smart_split_re.finditer(text):
|
for bit in smart_split_re.finditer(text):
|
||||||
bit = bit.group(0)
|
bit = bit.group(0)
|
||||||
if bit[0] == '"':
|
if bit[0] == '"':
|
||||||
yield '"' + bit[1:-1].replace('\\"', '"').replace('\\\\', '\\') + '"'
|
yield '"' + bit[1:-1].replace('\\"', '"').replace('\\\\', '\\') + '"'
|
||||||
elif bit[0] == "'":
|
elif bit[0] == "'":
|
||||||
yield "'" + bit[1:-1].replace("\\'", "'").replace("\\\\", "\\") + "'"
|
yield "'" + bit[1:-1].replace("\\'", "'").replace("\\\\", "\\") + "'"
|
||||||
else:
|
else:
|
||||||
yield bit
|
yield bit
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
# Written 2007 by j@mailb.org
|
# Written 2007 by j@mailb.org
|
||||||
|
|
||||||
from threading import Event
|
from threading import Event
|
||||||
|
@ -11,50 +12,50 @@ from BitTornado.bencode import bencode, bdecode
|
||||||
|
|
||||||
def createTorrent(file, url, params = {}, flag = Event(),
|
def createTorrent(file, url, params = {}, flag = Event(),
|
||||||
progress = lambda x: None, progress_percent = 1):
|
progress = lambda x: None, progress_percent = 1):
|
||||||
"Creates a torrent for a given file, using url as tracker url"
|
"Creates a torrent for a given file, using url as tracker url"
|
||||||
return make_meta_file(file, url, params, flag, progress, progress_percent)
|
return make_meta_file(file, url, params, flag, progress, progress_percent)
|
||||||
|
|
||||||
def getInfoHash(torrentFile):
|
def getInfoHash(torrentFile):
|
||||||
"Returns Torrent Info Hash from torrent file"
|
"Returns Torrent Info Hash from torrent file"
|
||||||
metainfo_file = open(torrentFile, 'rb')
|
metainfo_file = open(torrentFile, 'rb')
|
||||||
metainfo = bdecode(metainfo_file.read())
|
metainfo = bdecode(metainfo_file.read())
|
||||||
info = metainfo['info']
|
info = metainfo['info']
|
||||||
return sha.sha(bencode(info)).hexdigest().upper()
|
return sha.sha(bencode(info)).hexdigest().upper()
|
||||||
|
|
||||||
def getTorrentInfoFromFile(torrentFile):
|
def getTorrentInfoFromFile(torrentFile):
|
||||||
f = open(torrentFile, 'rb')
|
f = open(torrentFile, 'rb')
|
||||||
data = f.read()
|
data = f.read()
|
||||||
f.close()
|
f.close()
|
||||||
tinfo = getTorrentInfo(data)
|
tinfo = getTorrentInfo(data)
|
||||||
tinfo['timestamp'] = stat(torrentFile).st_ctime
|
tinfo['timestamp'] = stat(torrentFile).st_ctime
|
||||||
return tinfo
|
return tinfo
|
||||||
|
|
||||||
def getTorrentInfo(data):
|
def getTorrentInfo(data):
|
||||||
"Returns Torrent Info from torrent file"
|
"Returns Torrent Info from torrent file"
|
||||||
tinfo = {}
|
tinfo = {}
|
||||||
metainfo = bdecode(data)
|
metainfo = bdecode(data)
|
||||||
info = metainfo['info']
|
info = metainfo['info']
|
||||||
piece_length = info['piece length']
|
piece_length = info['piece length']
|
||||||
if info.has_key('length'):
|
if info.has_key('length'):
|
||||||
# let's assume we just have one file
|
# let's assume we just have one file
|
||||||
file_length = info['length']
|
file_length = info['length']
|
||||||
else:
|
else:
|
||||||
# let's assume we have a directory structure
|
# let's assume we have a directory structure
|
||||||
file_length = 0;
|
file_length = 0;
|
||||||
for f in info['files']:
|
for f in info['files']:
|
||||||
file_length += f['length']
|
file_length += f['length']
|
||||||
for key in info:
|
for key in info:
|
||||||
if key != 'pieces':
|
if key != 'pieces':
|
||||||
tinfo[key] = info[key]
|
tinfo[key] = info[key]
|
||||||
for key in metainfo:
|
for key in metainfo:
|
||||||
if key != 'info':
|
if key != 'info':
|
||||||
tinfo[key] = metainfo[key]
|
tinfo[key] = metainfo[key]
|
||||||
tinfo['size'] = file_length
|
tinfo['size'] = file_length
|
||||||
tinfo['hash'] = sha.sha(bencode(info)).hexdigest()
|
tinfo['hash'] = sha.sha(bencode(info)).hexdigest()
|
||||||
tinfo['announce'] = metainfo['announce']
|
tinfo['announce'] = metainfo['announce']
|
||||||
return tinfo
|
return tinfo
|
||||||
|
|
||||||
def getTorrentSize(torrentFile):
|
def getTorrentSize(torrentFile):
|
||||||
"Returns Size of files in torrent file in bytes"
|
"Returns Size of files in torrent file in bytes"
|
||||||
return getTorrentInfo(torrentFile)['size']
|
return getTorrentInfo(torrentFile)['size']
|
||||||
|
|
||||||
|
|
46
setup.py
46
setup.py
|
@ -1,30 +1,30 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# vi:si:et:sw=2:sts=2:ts=2
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
from setuptools import setup, find_packages
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="oxutils",
|
name="oxutils",
|
||||||
version="0.1",
|
version="0.1",
|
||||||
|
|
||||||
description="collection of utils used to work with python",
|
description="collection of utils used to work with python",
|
||||||
author="0x",
|
author="0x",
|
||||||
author_email="code@0xdb.org",
|
author_email="code@0xdb.org",
|
||||||
url="http://code.0xdb.org/oxutils",
|
url="http://code.0xdb.org/oxutils",
|
||||||
download_url="http://code.0xdb.org/oxutils/download",
|
download_url="http://code.0xdb.org/oxutils/download",
|
||||||
license="GPLv3",
|
license="GPLv3",
|
||||||
packages=find_packages(),
|
packages=find_packages(),
|
||||||
zip_safe=False,
|
zip_safe=False,
|
||||||
install_requires=[
|
install_requires=[
|
||||||
'chardet',
|
'chardet',
|
||||||
],
|
],
|
||||||
keywords = [
|
keywords = [
|
||||||
],
|
],
|
||||||
classifiers = [
|
classifiers = [
|
||||||
'Development Status :: 3 - Alpha',
|
'Development Status :: 3 - Alpha',
|
||||||
'Operating System :: OS Independent',
|
'Operating System :: OS Independent',
|
||||||
'Programming Language :: Python',
|
'Programming Language :: Python',
|
||||||
'Topic :: Software Development :: Libraries :: Python Modules',
|
'Topic :: Software Development :: Libraries :: Python Modules',
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue