vi:si:et:sw=4:sts=4:ts=4

This commit is contained in:
j 2008-06-19 11:21:21 +02:00
parent dafe20aa04
commit 4a6e2702b4
11 changed files with 921 additions and 921 deletions

View file

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2 # vi:si:et:sw=4:sts=4:ts=4
# 2008 # 2008
from hashes import * from hashes import *
@ -11,7 +11,7 @@ import cache
#only works if BitTornado is installed #only works if BitTornado is installed
try: try:
from torrent import * from torrent import *
except: except:
pass pass

View file

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2 # vi:si:et:sw=4:sts=4:ts=4
# 2008 # 2008
import gzip import gzip
import StringIO import StringIO
@ -19,99 +19,99 @@ from net import DEFAULT_HEADERS, getEncoding
cache_timeout = 30*24*60*60 # default is 30 days cache_timeout = 30*24*60*60 # default is 30 days
def status(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout): def status(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
''' '''
>>> status('http://google.com') >>> status('http://google.com')
200 200
>>> status('http://google.com/mysearch') >>> status('http://google.com/mysearch')
404 404
''' '''
headers = getHeaders(url, data, headers) headers = getHeaders(url, data, headers)
return int(headers['status']) return int(headers['status'])
def exists(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout): def exists(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
''' '''
>>> exists('http://google.com') >>> exists('http://google.com')
True True
>>> exists('http://google.com/mysearch') >>> exists('http://google.com/mysearch')
False False
''' '''
s = status(url, data, headers, timeout) s = status(url, data, headers, timeout)
if s >= 200 and s < 400: if s >= 200 and s < 400:
return True return True
return False return False
def getHeaders(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout): def getHeaders(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
url_cache_file = "%s.headers" % getUrlCacheFile(url, data, headers) url_cache_file = "%s.headers" % getUrlCacheFile(url, data, headers)
url_headers = loadUrlCache(url_cache_file, timeout) url_headers = loadUrlCache(url_cache_file, timeout)
if url_headers: if url_headers:
url_headers = simplejson.loads(url_headers) url_headers = simplejson.loads(url_headers)
else: else:
url_headers = net.getHeaders(url, data, headers) url_headers = net.getHeaders(url, data, headers)
saveUrlHeaders(url_cache_file, url_headers) saveUrlHeaders(url_cache_file, url_headers)
return url_headers return url_headers
def getUrl(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout): def getUrl(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
url_cache_file = getUrlCacheFile(url, data, headers) url_cache_file = getUrlCacheFile(url, data, headers)
result = loadUrlCache(url_cache_file, timeout) result = loadUrlCache(url_cache_file, timeout)
if not result: if not result:
try: try:
url_headers, result = net.getUrl(url, data, headers, returnHeaders=True) url_headers, result = net.getUrl(url, data, headers, returnHeaders=True)
except urllib2.HTTPError, e: except urllib2.HTTPError, e:
e.headers['Status'] = "%s" % e.code e.headers['Status'] = "%s" % e.code
url_headers = dict(e.headers) url_headers = dict(e.headers)
result = e.read() result = e.read()
if url_headers.get('content-encoding', None) == 'gzip': if url_headers.get('content-encoding', None) == 'gzip':
result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read() result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read()
saveUrlCache(url_cache_file, result, url_headers) saveUrlCache(url_cache_file, result, url_headers)
return result return result
def getUrlUnicode(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout, _getUrl=getUrl): def getUrlUnicode(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout, _getUrl=getUrl):
data = _getUrl(url, data, headers, timeout) data = _getUrl(url, data, headers, timeout)
encoding = getEncoding(data) encoding = getEncoding(data)
if not encoding: if not encoding:
encoding = 'latin-1' encoding = 'latin-1'
return unicode(data, encoding) return unicode(data, encoding)
def getCacheBase(): def getCacheBase():
'cache base is eather ~/.ox/cache or can set via env variable oxCACHE' 'cache base is eather ~/.ox/cache or can set via env variable oxCACHE'
return os.environ.get('oxCACHE', os.path.expanduser('~/.ox/cache')) return os.environ.get('oxCACHE', os.path.expanduser('~/.ox/cache'))
def getUrlCacheFile(url, data=None, headers=DEFAULT_HEADERS): def getUrlCacheFile(url, data=None, headers=DEFAULT_HEADERS):
if data: if data:
url_hash = sha.sha(url + '?' + data).hexdigest() url_hash = sha.sha(url + '?' + data).hexdigest()
else: else:
url_hash = sha.sha(url).hexdigest() url_hash = sha.sha(url).hexdigest()
domain = ".".join(urlparse.urlparse(url)[1].split('.')[-2:]) domain = ".".join(urlparse.urlparse(url)[1].split('.')[-2:])
return os.path.join(getCacheBase(), domain, url_hash[:2], url_hash[2:4], url_hash[4:6], url_hash) return os.path.join(getCacheBase(), domain, url_hash[:2], url_hash[2:4], url_hash[4:6], url_hash)
def loadUrlCache(url_cache_file, timeout=cache_timeout): def loadUrlCache(url_cache_file, timeout=cache_timeout):
if timeout == 0: if timeout == 0:
return None
if os.path.exists(url_cache_file):
ctime = os.stat(url_cache_file).st_ctime
now = time.mktime(time.localtime())
file_age = now-ctime
if timeout < 0 or file_age < timeout:
f = open(url_cache_file)
data = f.read()
f.close()
return data
return None return None
if os.path.exists(url_cache_file):
ctime = os.stat(url_cache_file).st_ctime
now = time.mktime(time.localtime())
file_age = now-ctime
if timeout < 0 or file_age < timeout:
f = open(url_cache_file)
data = f.read()
f.close()
return data
return None
def saveUrlCache(url_cache_file, data, headers): def saveUrlCache(url_cache_file, data, headers):
folder = os.path.dirname(url_cache_file) folder = os.path.dirname(url_cache_file)
if not os.path.exists(folder): if not os.path.exists(folder):
os.makedirs(folder) os.makedirs(folder)
f = open(url_cache_file, 'w') f = open(url_cache_file, 'w')
f.write(data) f.write(data)
f.close() f.close()
saveUrlHeaders("%s.headers" % url_cache_file, headers) saveUrlHeaders("%s.headers" % url_cache_file, headers)
def saveUrlHeaders(url_cache_file, headers): def saveUrlHeaders(url_cache_file, headers):
folder = os.path.dirname(url_cache_file) folder = os.path.dirname(url_cache_file)
if not os.path.exists(folder): if not os.path.exists(folder):
os.makedirs(folder) os.makedirs(folder)
f = open(url_cache_file, 'w') f = open(url_cache_file, 'w')
f.write(simplejson.dumps(headers)) f.write(simplejson.dumps(headers))
f.close() f.close()

View file

@ -1,208 +1,207 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2 # vi:si:et:sw=4:sts=4:ts=4
import math import math
import re import re
def to36(q): def to36(q):
""" """
Converts an integer to base 36 (a useful scheme for human-sayable IDs). Converts an integer to base 36 (a useful scheme for human-sayable IDs).
>>> to36(35) >>> to36(35)
'z' 'z'
>>> to36(119292) >>> to36(119292)
'2k1o' '2k1o'
>>> int(to36(939387374), 36) >>> int(to36(939387374), 36)
939387374 939387374
>>> to36(0) >>> to36(0)
'0' '0'
>>> to36(-393) >>> to36(-393)
Traceback (most recent call last): Traceback (most recent call last):
... ...
ValueError: must supply a positive integer ValueError: must supply a positive integer
""" """
if q < 0: raise ValueError, "must supply a positive integer" if q < 0: raise ValueError, "must supply a positive integer"
letters = "0123456789abcdefghijklmnopqrstuvwxyz" letters = "0123456789abcdefghijklmnopqrstuvwxyz"
converted = [] converted = []
while q != 0: while q != 0:
q, r = divmod(q, 36) q, r = divmod(q, 36)
converted.insert(0, letters[r]) converted.insert(0, letters[r])
return "".join(converted) or '0' return "".join(converted) or '0'
def from36(q): def from36(q):
return int(q, 36) return int(q, 36)
def intValue(strValue, default=''): def intValue(strValue, default=''):
try: try:
val = re.compile('(\d+)').findall(unicode(strValue).strip())[0] val = re.compile('(\d+)').findall(unicode(strValue).strip())[0]
except: except:
val = default val = default
return val return val
def test_intValue(): def test_intValue():
assert intValue('abc23') == '23' assert intValue('abc23') == '23'
assert intValue(' abc23') == '23' assert intValue(' abc23') == '23'
assert intValue(' abc') == '' assert intValue(' abc') == ''
def floatValue(strValue, default=''): def floatValue(strValue, default=''):
try: try:
val = re.compile('([\d.]+)').findall(unicode(strValue).strip())[0] val = re.compile('([\d.]+)').findall(unicode(strValue).strip())[0]
except: except:
val = default val = default
return val return val
def test_floatValue(): def test_floatValue():
print "floatValue" assert floatValue('abc23.4') == '23.4'
assert floatValue('abc23.4') == '23.4' assert floatValue(' abc23.4') == '23.4'
assert floatValue(' abc23.4') == '23.4' assert floatValue(' abc') == ''
assert floatValue(' abc') == ''
def formatNumber(number, longName, shortName): def formatNumber(number, longName, shortName):
""" """
Return the number in a human-readable format (23 KB, 23.4 MB, 23.42 GB) Return the number in a human-readable format (23 KB, 23.4 MB, 23.42 GB)
>>> formatNumber(123, 'Byte', 'B') >>> formatNumber(123, 'Byte', 'B')
'123 Bytes' '123 Bytes'
>>> formatNumber(1234, 'Byte', 'B') >>> formatNumber(1234, 'Byte', 'B')
'1 KB' '1 KB'
>>> formatNumber(1234567, 'Byte', 'B') >>> formatNumber(1234567, 'Byte', 'B')
'1.2 MB' '1.2 MB'
>>> formatNumber(1234567890, 'Byte', 'B') >>> formatNumber(1234567890, 'Byte', 'B')
'1.15 GB' '1.15 GB'
>>> formatNumber(1234567890123456789, 'Byte', 'B') >>> formatNumber(1234567890123456789, 'Byte', 'B')
'1,096.5166 PB' '1,096.5166 PB'
""" """
if number < 1024: if number < 1024:
return '%s %s%s' % (formatThousands(number), longName, number != 1 and 's' or '') return '%s %s%s' % (formatThousands(number), longName, number != 1 and 's' or '')
prefix = ['K', 'M', 'G', 'T', 'P'] prefix = ['K', 'M', 'G', 'T', 'P']
for i in range(5): for i in range(5):
if number < math.pow(1024, i + 2) or i == 4: if number < math.pow(1024, i + 2) or i == 4:
n = number / math.pow(1024, i + 1) n = number / math.pow(1024, i + 1)
return '%s %s%s' % (formatThousands('%.*f' % (i, n)), prefix[i], shortName) return '%s %s%s' % (formatThousands('%.*f' % (i, n)), prefix[i], shortName)
def formatThousands(number, separator = ','): def formatThousands(number, separator = ','):
""" """
Return the number with separators (1,000,000) Return the number with separators (1,000,000)
>>> formatThousands(1) >>> formatThousands(1)
'1' '1'
>>> formatThousands(1000) >>> formatThousands(1000)
'1,000' '1,000'
>>> formatThousands(1000000) >>> formatThousands(1000000)
'1,000,000' '1,000,000'
""" """
string = str(number).split('.') string = str(number).split('.')
l = [] l = []
for i, character in enumerate(reversed(string[0])): for i, character in enumerate(reversed(string[0])):
if i and (not (i % 3)): if i and (not (i % 3)):
l.insert(0, separator) l.insert(0, separator)
l.insert(0, character) l.insert(0, character)
string[0] = ''.join(l) string[0] = ''.join(l)
return '.'.join(string) return '.'.join(string)
def formatBits(number): def formatBits(number):
return formatNumber(number, 'bit', 'b') return formatNumber(number, 'bit', 'b')
def formatBytes(number): def formatBytes(number):
return formatNumber(number, 'byte', 'B') return formatNumber(number, 'byte', 'B')
def formatPixels(number): def formatPixels(number):
return formatNumber(number, 'pixel', 'px') return formatNumber(number, 'pixel', 'px')
def plural(amount, unit, plural='s'): def plural(amount, unit, plural='s'):
''' '''
>>> plural(1, 'unit') >>> plural(1, 'unit')
'1 unit' '1 unit'
>>> plural(2, 'unit') >>> plural(2, 'unit')
'2 units' '2 units'
''' '''
if abs(amount) != 1: if abs(amount) != 1:
if plural == 's': if plural == 's':
unit = unit + plural unit = unit + plural
else: unit = plural else: unit = plural
return "%s %s" % (formatThousands(amount), unit) return "%s %s" % (formatThousands(amount), unit)
def ms2runtime(ms): def ms2runtime(ms):
''' '''
>>> ms2runtime(5000) >>> ms2runtime(5000)
'5 seconds' '5 seconds'
>>> ms2runtime(500000) >>> ms2runtime(500000)
'8 minutes 20 seconds' '8 minutes 20 seconds'
>>> ms2runtime(50000000) >>> ms2runtime(50000000)
'13 hours 53 minutes 20 seconds' '13 hours 53 minutes 20 seconds'
>>> ms2runtime(50000000-20000) >>> ms2runtime(50000000-20000)
'13 hours 53 minutes' '13 hours 53 minutes'
''' '''
seconds = int(ms / 1000) seconds = int(ms / 1000)
years = 0 years = 0
days = 0 days = 0
hours = 0 hours = 0
minutes = 0 minutes = 0
if seconds >= 60: if seconds >= 60:
minutes = int(seconds / 60) minutes = int(seconds / 60)
seconds = seconds % 60 seconds = seconds % 60
if minutes >= 60: if minutes >= 60:
hours = int(minutes / 60) hours = int(minutes / 60)
minutes = minutes % 60 minutes = minutes % 60
if hours >= 24: if hours >= 24:
days = int(hours / 24) days = int(hours / 24)
hours = hours % 24 hours = hours % 24
if days >= 365: if days >= 365:
years = int(days / 365) years = int(days / 365)
days = days % 365 days = days % 365
runtimeString = (plural(years, 'year'), plural(days, 'day'), runtimeString = (plural(years, 'year'), plural(days, 'day'),
plural(hours,'hour'), plural(minutes, 'minute'), plural(seconds, 'second')) plural(hours,'hour'), plural(minutes, 'minute'), plural(seconds, 'second'))
runtimeString = filter(lambda x: not x.startswith('0'), runtimeString) runtimeString = filter(lambda x: not x.startswith('0'), runtimeString)
return " ".join(runtimeString).strip() return " ".join(runtimeString).strip()
def ms2playtime(ms): def ms2playtime(ms):
''' '''
>>> ms2playtime(5000) >>> ms2playtime(5000)
'00:05' '00:05'
>>> ms2playtime(500000) >>> ms2playtime(500000)
'08:20' '08:20'
>>> ms2playtime(50000000) >>> ms2playtime(50000000)
'13:53:20' '13:53:20'
''' '''
it = int(ms / 1000) it = int(ms / 1000)
ms = ms - it*1000 ms = ms - it*1000
ss = it % 60 ss = it % 60
mm = ((it-ss)/60) % 60 mm = ((it-ss)/60) % 60
hh = ((it-(mm*60)-ss)/3600) % 60 hh = ((it-(mm*60)-ss)/3600) % 60
if hh: if hh:
playtime= "%02d:%02d:%02d" % (hh, mm, ss) playtime= "%02d:%02d:%02d" % (hh, mm, ss)
else: else:
playtime= "%02d:%02d" % (mm, ss) playtime= "%02d:%02d" % (mm, ss)
return playtime return playtime
def ms2time(ms): def ms2time(ms):
''' '''
>>> ms2time(44592123) >>> ms2time(44592123)
'12:23:12.123' '12:23:12.123'
''' '''
it = int(ms / 1000) it = int(ms / 1000)
ms = ms - it*1000 ms = ms - it*1000
ss = it % 60 ss = it % 60
mm = ((it-ss)/60) % 60 mm = ((it-ss)/60) % 60
hh = ((it-(mm*60)-ss)/3600) % 60 hh = ((it-(mm*60)-ss)/3600) % 60
return "%d:%02d:%02d.%03d" % (hh, mm, ss, ms) return "%d:%02d:%02d.%03d" % (hh, mm, ss, ms)
def time2ms(timeString): def time2ms(timeString):
''' '''
>>> time2ms('12:23:12.123') >>> time2ms('12:23:12.123')
44592123 44592123
''' '''
ms = 0.0 ms = 0.0
p = timeString.split(':') p = timeString.split(':')
for i in range(len(p)): for i in range(len(p)):
ms = ms * 60 + float(p[i]) ms = ms * 60 + float(p[i])
return int(ms * 1000) return int(ms * 1000)
def shiftTime(offset, timeString): def shiftTime(offset, timeString):
newTime = time2ms(timeString) + offset newTime = time2ms(timeString) + offset
return ms2time(newTime) return ms2time(newTime)

View file

@ -1,17 +1,17 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2 # vi:si:et:sw=4:sts=4:ts=4
# GPL written 2008 by j@pad.ma # GPL written 2008 by j@pad.ma
import sha import sha
import os import os
def sha1sum(filename): def sha1sum(filename):
sha1 = sha.new() sha1 = sha.new()
file=open(filename) file=open(filename)
buffer=file.read(4096)
while buffer:
sha1.update(buffer)
buffer=file.read(4096) buffer=file.read(4096)
file.close() while buffer:
return sha1.hexdigest() sha1.update(buffer)
buffer=file.read(4096)
file.close()
return sha1.hexdigest()

View file

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2 # vi:si:et:sw=4:sts=4:ts=4
# GPL written 2008 by j@pad.ma # GPL written 2008 by j@pad.ma
import re import re
import string import string
@ -26,147 +26,147 @@ trailing_empty_content_re = re.compile(r'(?:<p>(?:&nbsp;|\s|<br \/>)*?</p>\s*)+\
del x # Temporary variable del x # Temporary variable
def escape(html): def escape(html):
''' '''
Returns the given HTML with ampersands, quotes and carets encoded Returns the given HTML with ampersands, quotes and carets encoded
>>> escape('html "test" & <brothers>') >>> escape('html "test" & <brothers>')
'html &quot;test&quot; &amp; &lt;brothers&gt;' 'html &quot;test&quot; &amp; &lt;brothers&gt;'
''' '''
if not isinstance(html, basestring): if not isinstance(html, basestring):
html = str(html) html = str(html)
return html.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;').replace("'", '&#39;') return html.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;').replace("'", '&#39;')
def linebreaks(value): def linebreaks(value):
''' '''
Converts newlines into <p> and <br /> Converts newlines into <p> and <br />
''' '''
value = re.sub(r'\r\n|\r|\n', '\n', value) # normalize newlines value = re.sub(r'\r\n|\r|\n', '\n', value) # normalize newlines
paras = re.split('\n{2,}', value) paras = re.split('\n{2,}', value)
paras = ['<p>%s</p>' % p.strip().replace('\n', '<br />') for p in paras] paras = ['<p>%s</p>' % p.strip().replace('\n', '<br />') for p in paras]
return '\n\n'.join(paras) return '\n\n'.join(paras)
def stripTags(value): def stripTags(value):
""" """
Returns the given HTML with all tags stripped Returns the given HTML with all tags stripped
>>> stripTags('some <h2>title</h2> <script>asdfasdf</script>') >>> stripTags('some <h2>title</h2> <script>asdfasdf</script>')
'some title asdfasdf' 'some title asdfasdf'
""" """
return re.sub(r'<[^>]*?>', '', value) return re.sub(r'<[^>]*?>', '', value)
def stripSpacesBetweenTags(value): def stripSpacesBetweenTags(value):
"Returns the given HTML with spaces between tags normalized to a single space" "Returns the given HTML with spaces between tags normalized to a single space"
return re.sub(r'>\s+<', '> <', value) return re.sub(r'>\s+<', '> <', value)
def stripEntities(value): def stripEntities(value):
"Returns the given HTML with all entities (&something;) stripped" "Returns the given HTML with all entities (&something;) stripped"
return re.sub(r'&(?:\w+|#\d);', '', value) return re.sub(r'&(?:\w+|#\d);', '', value)
def fixAmpersands(value): def fixAmpersands(value):
"Returns the given HTML with all unencoded ampersands encoded correctly" "Returns the given HTML with all unencoded ampersands encoded correctly"
return unencoded_ampersands_re.sub('&amp;', value) return unencoded_ampersands_re.sub('&amp;', value)
def urlize(text, trim_url_limit=None, nofollow=False): def urlize(text, trim_url_limit=None, nofollow=False):
""" """
Converts any URLs in text into clickable links. Works on http://, https:// and Converts any URLs in text into clickable links. Works on http://, https:// and
www. links. Links can have trailing punctuation (periods, commas, close-parens) www. links. Links can have trailing punctuation (periods, commas, close-parens)
and leading punctuation (opening parens) and it'll still do the right thing. and leading punctuation (opening parens) and it'll still do the right thing.
If trim_url_limit is not None, the URLs in link text will be limited to If trim_url_limit is not None, the URLs in link text will be limited to
trim_url_limit characters. trim_url_limit characters.
If nofollow is True, the URLs in link text will get a rel="nofollow" attribute. If nofollow is True, the URLs in link text will get a rel="nofollow" attribute.
""" """
trim_url = lambda x, limit=trim_url_limit: limit is not None and (x[:limit] + (len(x) >=limit and '...' or '')) or x trim_url = lambda x, limit=trim_url_limit: limit is not None and (x[:limit] + (len(x) >=limit and '...' or '')) or x
words = word_split_re.split(text) words = word_split_re.split(text)
nofollow_attr = nofollow and ' rel="nofollow"' or '' nofollow_attr = nofollow and ' rel="nofollow"' or ''
for i, word in enumerate(words): for i, word in enumerate(words):
match = punctuation_re.match(word) match = punctuation_re.match(word)
if match: if match:
lead, middle, trail = match.groups() lead, middle, trail = match.groups()
if middle.startswith('www.') or ('@' not in middle and not middle.startswith('http://') and \ if middle.startswith('www.') or ('@' not in middle and not middle.startswith('http://') and \
len(middle) > 0 and middle[0] in string.letters + string.digits and \ len(middle) > 0 and middle[0] in string.letters + string.digits and \
(middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))): (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))):
middle = '<a href="http://%s"%s>%s</a>' % (middle, nofollow_attr, trim_url(middle)) middle = '<a href="http://%s"%s>%s</a>' % (middle, nofollow_attr, trim_url(middle))
if middle.startswith('http://') or middle.startswith('https://'): if middle.startswith('http://') or middle.startswith('https://'):
middle = '<a href="%s"%s>%s</a>' % (middle, nofollow_attr, trim_url(middle)) middle = '<a href="%s"%s>%s</a>' % (middle, nofollow_attr, trim_url(middle))
if '@' in middle and not middle.startswith('www.') and not ':' in middle \ if '@' in middle and not middle.startswith('www.') and not ':' in middle \
and simple_email_re.match(middle): and simple_email_re.match(middle):
middle = '<a href="mailto:%s">%s</a>' % (middle, middle) middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
if lead + middle + trail != word: if lead + middle + trail != word:
words[i] = lead + middle + trail words[i] = lead + middle + trail
return ''.join(words) return ''.join(words)
def cleanHtml(text): def cleanHtml(text):
""" """
Cleans the given HTML. Specifically, it does the following: Cleans the given HTML. Specifically, it does the following:
* Converts <b> and <i> to <strong> and <em>. * Converts <b> and <i> to <strong> and <em>.
* Encodes all ampersands correctly. * Encodes all ampersands correctly.
* Removes all "target" attributes from <a> tags. * Removes all "target" attributes from <a> tags.
* Removes extraneous HTML, such as presentational tags that open and * Removes extraneous HTML, such as presentational tags that open and
immediately close and <br clear="all">. immediately close and <br clear="all">.
* Converts hard-coded bullets into HTML unordered lists. * Converts hard-coded bullets into HTML unordered lists.
* Removes stuff like "<p>&nbsp;&nbsp;</p>", but only if it's at the * Removes stuff like "<p>&nbsp;&nbsp;</p>", but only if it's at the
bottom of the text. bottom of the text.
""" """
from text import normalizeNewlines from text import normalizeNewlines
text = normalizeNewlines(text) text = normalizeNewlines(text)
text = re.sub(r'<(/?)\s*b\s*>', '<\\1strong>', text) text = re.sub(r'<(/?)\s*b\s*>', '<\\1strong>', text)
text = re.sub(r'<(/?)\s*i\s*>', '<\\1em>', text) text = re.sub(r'<(/?)\s*i\s*>', '<\\1em>', text)
text = fixAmpersands(text) text = fixAmpersands(text)
# Remove all target="" attributes from <a> tags. # Remove all target="" attributes from <a> tags.
text = link_target_attribute_re.sub('\\1', text) text = link_target_attribute_re.sub('\\1', text)
# Trim stupid HTML such as <br clear="all">. # Trim stupid HTML such as <br clear="all">.
text = html_gunk_re.sub('', text) text = html_gunk_re.sub('', text)
# Convert hard-coded bullets into HTML unordered lists. # Convert hard-coded bullets into HTML unordered lists.
def replace_p_tags(match): def replace_p_tags(match):
s = match.group().replace('</p>', '</li>') s = match.group().replace('</p>', '</li>')
for d in DOTS: for d in DOTS:
s = s.replace('<p>%s' % d, '<li>') s = s.replace('<p>%s' % d, '<li>')
return '<ul>\n%s\n</ul>' % s return '<ul>\n%s\n</ul>' % s
text = hard_coded_bullets_re.sub(replace_p_tags, text) text = hard_coded_bullets_re.sub(replace_p_tags, text)
# Remove stuff like "<p>&nbsp;&nbsp;</p>", but only if it's at the bottom of the text. # Remove stuff like "<p>&nbsp;&nbsp;</p>", but only if it's at the bottom of the text.
text = trailing_empty_content_re.sub('', text) text = trailing_empty_content_re.sub('', text)
return text return text
# This pattern matches a character entity reference (a decimal numeric # This pattern matches a character entity reference (a decimal numeric
# references, a hexadecimal numeric reference, or a named reference). # references, a hexadecimal numeric reference, or a named reference).
charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?') charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?')
def decodeHtml(html): def decodeHtml(html):
""" """
>>> decodeHtml('me &amp; you and &#36;&#38;%') >>> decodeHtml('me &amp; you and &#36;&#38;%')
u'me & you and $&%' u'me & you and $&%'
""" """
if type(html) != unicode: if type(html) != unicode:
html = unicode(html)[:] html = unicode(html)[:]
if type(html) is unicode: if type(html) is unicode:
uchr = unichr uchr = unichr
else:
uchr = lambda value: value > 255 and unichr(value) or chr(value)
def entitydecode(match, uchr=uchr):
entity = match.group(1)
if entity.startswith('#x'):
return uchr(int(entity[2:], 16))
elif entity.startswith('#'):
return uchr(int(entity[1:]))
elif entity in name2codepoint:
return uchr(name2codepoint[entity])
else: else:
return match.group(0) uchr = lambda value: value > 255 and unichr(value) or chr(value)
return charrefpat.sub(entitydecode, html).replace(u'\xa0', ' ') def entitydecode(match, uchr=uchr):
entity = match.group(1)
if entity.startswith('#x'):
return uchr(int(entity[2:], 16))
elif entity.startswith('#'):
return uchr(int(entity[1:]))
elif entity in name2codepoint:
return uchr(name2codepoint[entity])
else:
return match.group(0)
return charrefpat.sub(entitydecode, html).replace(u'\xa0', ' ')
def highlight(text, query, hlClass="hl"): def highlight(text, query, hlClass="hl"):
""" """
>>> highlight('me &amp; you and &#36;&#38;%', 'and') >>> highlight('me &amp; you and &#36;&#38;%', 'and')
'me &amp; you <span class="hl">and</span> &#36;&#38;%' 'me &amp; you <span class="hl">and</span> &#36;&#38;%'
""" """
if query: if query:
text = text.replace('<br />', '|') text = text.replace('<br />', '|')
query = re.escape(query).replace('\ ', '.') query = re.escape(query).replace('\ ', '.')
m = re.compile("(%s)" % query, re.IGNORECASE).findall(text) m = re.compile("(%s)" % query, re.IGNORECASE).findall(text)
for i in m: for i in m:
text = re.sub("(%s)" % re.escape(i).replace('\ ', '.'), '<span class="%s">\\1</span>' % hlClass, text) text = re.sub("(%s)" % re.escape(i).replace('\ ', '.'), '<span class="%s">\\1</span>' % hlClass, text)
text = text.replace('|', '<br />') text = text.replace('|', '<br />')
return text return text

View file

@ -1,236 +1,236 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2 # vi:si:et:sw=4:sts=4:ts=4
_iso639_languages = [ _iso639_languages = [
("Unknown", "", "", "und"), ("Unknown", "", "", "und"),
("Afar", "", "aa", "aar"), ("Afar", "", "aa", "aar"),
("Abkhazian", "", "ab", "abk"), ("Abkhazian", "", "ab", "abk"),
("Afrikaans", "", "af", "afr"), ("Afrikaans", "", "af", "afr"),
("Akan", "", "ak", "aka"), ("Akan", "", "ak", "aka"),
("Albanian", "", "sq", "sqi"), ("Albanian", "", "sq", "sqi"),
("Amharic", "", "am", "amh"), ("Amharic", "", "am", "amh"),
("Arabic", "", "ar", "ara"), ("Arabic", "", "ar", "ara"),
("Aragonese", "", "an", "arg"), ("Aragonese", "", "an", "arg"),
("Armenian", "", "hy", "hye"), ("Armenian", "", "hy", "hye"),
("Assamese", "", "as", "asm"), ("Assamese", "", "as", "asm"),
("Avaric", "", "av", "ava"), ("Avaric", "", "av", "ava"),
("Avestan", "", "ae", "ave"), ("Avestan", "", "ae", "ave"),
("Aymara", "", "ay", "aym"), ("Aymara", "", "ay", "aym"),
("Azerbaijani", "", "az", "aze"), ("Azerbaijani", "", "az", "aze"),
("Bashkir", "", "ba", "bak"), ("Bashkir", "", "ba", "bak"),
("Bambara", "", "bm", "bam"), ("Bambara", "", "bm", "bam"),
("Basque", "", "eu", "eus"), ("Basque", "", "eu", "eus"),
("Belarusian", "", "be", "bel"), ("Belarusian", "", "be", "bel"),
("Bengali", "", "bn", "ben"), ("Bengali", "", "bn", "ben"),
("Bihari", "", "bh", "bih"), ("Bihari", "", "bh", "bih"),
("Bislama", "", "bi", "bis"), ("Bislama", "", "bi", "bis"),
("Bosnian", "", "bs", "bos"), ("Bosnian", "", "bs", "bos"),
("Breton", "", "br", "bre"), ("Breton", "", "br", "bre"),
("Bulgarian", "", "bg", "bul"), ("Bulgarian", "", "bg", "bul"),
("Burmese", "", "my", "mya"), ("Burmese", "", "my", "mya"),
("Catalan", "", "ca", "cat"), ("Catalan", "", "ca", "cat"),
("Chamorro", "", "ch", "cha"), ("Chamorro", "", "ch", "cha"),
("Chechen", "", "ce", "che"), ("Chechen", "", "ce", "che"),
("Chinese", "", "zh", "zho"), ("Chinese", "", "zh", "zho"),
("Church Slavic", "", "cu", "chu"), ("Church Slavic", "", "cu", "chu"),
("Chuvash", "", "cv", "chv"), ("Chuvash", "", "cv", "chv"),
("Cornish", "", "kw", "cor"), ("Cornish", "", "kw", "cor"),
("Corsican", "", "co", "cos"), ("Corsican", "", "co", "cos"),
("Cree", "", "cr", "cre"), ("Cree", "", "cr", "cre"),
("Czech", "", "cs", "ces"), ("Czech", "", "cs", "ces"),
("Danish", "Dansk", "da", "dan"), ("Danish", "Dansk", "da", "dan"),
("Divehi", "", "dv", "div"), ("Divehi", "", "dv", "div"),
("Dutch", "Nederlands", "nl", "nld"), ("Dutch", "Nederlands", "nl", "nld"),
("Dzongkha", "", "dz", "dzo"), ("Dzongkha", "", "dz", "dzo"),
("English", "English", "en", "eng"), ("English", "English", "en", "eng"),
("Esperanto", "", "eo", "epo"), ("Esperanto", "", "eo", "epo"),
("Estonian", "", "et", "est"), ("Estonian", "", "et", "est"),
("Ewe", "", "ee", "ewe"), ("Ewe", "", "ee", "ewe"),
("Faroese", "", "fo", "fao"), ("Faroese", "", "fo", "fao"),
("Fijian", "", "fj", "fij"), ("Fijian", "", "fj", "fij"),
("Finnish", "Suomi", "fi", "fin"), ("Finnish", "Suomi", "fi", "fin"),
("French", "Francais", "fr", "fra"), ("French", "Francais", "fr", "fra"),
("Western Frisian", "", "fy", "fry"), ("Western Frisian", "", "fy", "fry"),
("Fulah", "", "ff", "ful"), ("Fulah", "", "ff", "ful"),
("Georgian", "", "ka", "kat"), ("Georgian", "", "ka", "kat"),
("German", "Deutsch", "de", "deu"), ("German", "Deutsch", "de", "deu"),
("Gaelic (Scots)", "", "gd", "gla"), ("Gaelic (Scots)", "", "gd", "gla"),
("Irish", "", "ga", "gle"), ("Irish", "", "ga", "gle"),
("Galician", "", "gl", "glg"), ("Galician", "", "gl", "glg"),
("Manx", "", "gv", "glv"), ("Manx", "", "gv", "glv"),
("Greek, Modern", "", "el", "ell"), ("Greek, Modern", "", "el", "ell"),
("Guarani", "", "gn", "grn"), ("Guarani", "", "gn", "grn"),
("Gujarati", "", "gu", "guj"), ("Gujarati", "", "gu", "guj"),
("Haitian", "", "ht", "hat"), ("Haitian", "", "ht", "hat"),
("Hausa", "", "ha", "hau"), ("Hausa", "", "ha", "hau"),
("Hebrew", "", "he", "heb"), ("Hebrew", "", "he", "heb"),
("Herero", "", "hz", "her"), ("Herero", "", "hz", "her"),
("Hindi", "", "hi", "hin"), ("Hindi", "", "hi", "hin"),
("Hiri Motu", "", "ho", "hmo"), ("Hiri Motu", "", "ho", "hmo"),
("Hungarian", "Magyar", "hu", "hun"), ("Hungarian", "Magyar", "hu", "hun"),
("Igbo", "", "ig", "ibo"), ("Igbo", "", "ig", "ibo"),
("Icelandic", "Islenska", "is", "isl"), ("Icelandic", "Islenska", "is", "isl"),
("Ido", "", "io", "ido"), ("Ido", "", "io", "ido"),
("Sichuan Yi", "", "ii", "iii"), ("Sichuan Yi", "", "ii", "iii"),
("Inuktitut", "", "iu", "iku"), ("Inuktitut", "", "iu", "iku"),
("Interlingue", "", "ie", "ile"), ("Interlingue", "", "ie", "ile"),
("Interlingua", "", "ia", "ina"), ("Interlingua", "", "ia", "ina"),
("Indonesian", "", "id", "ind"), ("Indonesian", "", "id", "ind"),
("Inupiaq", "", "ik", "ipk"), ("Inupiaq", "", "ik", "ipk"),
("Italian", "Italiano", "it", "ita"), ("Italian", "Italiano", "it", "ita"),
("Javanese", "", "jv", "jav"), ("Javanese", "", "jv", "jav"),
("Japanese", "", "ja", "jpn"), ("Japanese", "", "ja", "jpn"),
("Kalaallisut (Greenlandic)", "", "kl", "kal"), ("Kalaallisut (Greenlandic)", "", "kl", "kal"),
("Kannada", "", "kn", "kan"), ("Kannada", "", "kn", "kan"),
("Kashmiri", "", "ks", "kas"), ("Kashmiri", "", "ks", "kas"),
("Kanuri", "", "kr", "kau"), ("Kanuri", "", "kr", "kau"),
("Kazakh", "", "kk", "kaz"), ("Kazakh", "", "kk", "kaz"),
("Central Khmer", "", "km", "khm"), ("Central Khmer", "", "km", "khm"),
("Kikuyu", "", "ki", "kik"), ("Kikuyu", "", "ki", "kik"),
("Kinyarwanda", "", "rw", "kin"), ("Kinyarwanda", "", "rw", "kin"),
("Kirghiz", "", "ky", "kir"), ("Kirghiz", "", "ky", "kir"),
("Komi", "", "kv", "kom"), ("Komi", "", "kv", "kom"),
("Kongo", "", "kg", "kon"), ("Kongo", "", "kg", "kon"),
("Korean", "", "ko", "kor"), ("Korean", "", "ko", "kor"),
("Kuanyama", "", "kj", "kua"), ("Kuanyama", "", "kj", "kua"),
("Kurdish", "", "ku", "kur"), ("Kurdish", "", "ku", "kur"),
("Lao", "", "lo", "lao"), ("Lao", "", "lo", "lao"),
("Latin", "", "la", "lat"), ("Latin", "", "la", "lat"),
("Latvian", "", "lv", "lav"), ("Latvian", "", "lv", "lav"),
("Limburgan", "", "li", "lim"), ("Limburgan", "", "li", "lim"),
("Lingala", "", "ln", "lin"), ("Lingala", "", "ln", "lin"),
("Lithuanian", "", "lt", "lit"), ("Lithuanian", "", "lt", "lit"),
("Luxembourgish", "", "lb", "ltz"), ("Luxembourgish", "", "lb", "ltz"),
("Luba-Katanga", "", "lu", "lub"), ("Luba-Katanga", "", "lu", "lub"),
("Ganda", "", "lg", "lug"), ("Ganda", "", "lg", "lug"),
("Macedonian", "", "mk", "mkd"), ("Macedonian", "", "mk", "mkd"),
("Marshallese", "", "mh", "mah"), ("Marshallese", "", "mh", "mah"),
("Malayalam", "", "ml", "mal"), ("Malayalam", "", "ml", "mal"),
("Maori", "", "mi", "mri"), ("Maori", "", "mi", "mri"),
("Marathi", "", "mr", "mar"), ("Marathi", "", "mr", "mar"),
("Malay", "", "ms", "msa"), ("Malay", "", "ms", "msa"),
("Malagasy", "", "mg", "mlg"), ("Malagasy", "", "mg", "mlg"),
("Maltese", "", "mt", "mlt"), ("Maltese", "", "mt", "mlt"),
("Moldavian", "", "mo", "mol"), ("Moldavian", "", "mo", "mol"),
("Mongolian", "", "mn", "mon"), ("Mongolian", "", "mn", "mon"),
("Nauru", "", "na", "nau"), ("Nauru", "", "na", "nau"),
("Navajo", "", "nv", "nav"), ("Navajo", "", "nv", "nav"),
("Ndebele, South", "", "nr", "nbl"), ("Ndebele, South", "", "nr", "nbl"),
("Ndebele, North", "", "nd", "nde"), ("Ndebele, North", "", "nd", "nde"),
("Ndonga", "", "ng", "ndo"), ("Ndonga", "", "ng", "ndo"),
("Nepali", "", "ne", "nep"), ("Nepali", "", "ne", "nep"),
("Norwegian Nynorsk", "", "nn", "nno"), ("Norwegian Nynorsk", "", "nn", "nno"),
("Norwegian Bokmål", "", "nb", "nob"), ("Norwegian Bokmål", "", "nb", "nob"),
("Norwegian", "Norsk", "no", "nor"), ("Norwegian", "Norsk", "no", "nor"),
("Chichewa; Nyanja", "", "ny", "nya"), ("Chichewa; Nyanja", "", "ny", "nya"),
("Occitan (post 1500); Provençal", "", "oc", "oci"), ("Occitan (post 1500); Provençal", "", "oc", "oci"),
("Ojibwa", "", "oj", "oji"), ("Ojibwa", "", "oj", "oji"),
("Oriya", "", "or", "ori"), ("Oriya", "", "or", "ori"),
("Oromo", "", "om", "orm"), ("Oromo", "", "om", "orm"),
("Ossetian; Ossetic", "", "os", "oss"), ("Ossetian; Ossetic", "", "os", "oss"),
("Panjabi", "", "pa", "pan"), ("Panjabi", "", "pa", "pan"),
("Persian", "", "fa", "fas"), ("Persian", "", "fa", "fas"),
("Pali", "", "pi", "pli"), ("Pali", "", "pi", "pli"),
("Polish", "", "pl", "pol"), ("Polish", "", "pl", "pol"),
("Portuguese", "Portugues", "pt", "por"), ("Portuguese", "Portugues", "pt", "por"),
("Pushto", "", "ps", "pus"), ("Pushto", "", "ps", "pus"),
("Quechua", "", "qu", "que"), ("Quechua", "", "qu", "que"),
("Romansh", "", "rm", "roh"), ("Romansh", "", "rm", "roh"),
("Romanian", "", "ro", "ron"), ("Romanian", "", "ro", "ron"),
("Rundi", "", "rn", "run"), ("Rundi", "", "rn", "run"),
("Russian", "", "ru", "rus"), ("Russian", "", "ru", "rus"),
("Sango", "", "sg", "sag"), ("Sango", "", "sg", "sag"),
("Sanskrit", "", "sa", "san"), ("Sanskrit", "", "sa", "san"),
("Serbian", "", "sr", "srp"), ("Serbian", "", "sr", "srp"),
("Croatian", "Hrvatski", "hr", "hrv"), ("Croatian", "Hrvatski", "hr", "hrv"),
("Sinhala", "", "si", "sin"), ("Sinhala", "", "si", "sin"),
("Slovak", "", "sk", "slk"), ("Slovak", "", "sk", "slk"),
("Slovenian", "", "sl", "slv"), ("Slovenian", "", "sl", "slv"),
("Northern Sami", "", "se", "sme"), ("Northern Sami", "", "se", "sme"),
("Samoan", "", "sm", "smo"), ("Samoan", "", "sm", "smo"),
("Shona", "", "sn", "sna"), ("Shona", "", "sn", "sna"),
("Sindhi", "", "sd", "snd"), ("Sindhi", "", "sd", "snd"),
("Somali", "", "so", "som"), ("Somali", "", "so", "som"),
("Sotho, Southern", "", "st", "sot"), ("Sotho, Southern", "", "st", "sot"),
("Spanish", "Espanol", "es", "spa"), ("Spanish", "Espanol", "es", "spa"),
("Sardinian", "", "sc", "srd"), ("Sardinian", "", "sc", "srd"),
("Swati", "", "ss", "ssw"), ("Swati", "", "ss", "ssw"),
("Sundanese", "", "su", "sun"), ("Sundanese", "", "su", "sun"),
("Swahili", "", "sw", "swa"), ("Swahili", "", "sw", "swa"),
("Swedish", "Svenska", "sv", "swe"), ("Swedish", "Svenska", "sv", "swe"),
("Tahitian", "", "ty", "tah"), ("Tahitian", "", "ty", "tah"),
("Tamil", "", "ta", "tam"), ("Tamil", "", "ta", "tam"),
("Tatar", "", "tt", "tat"), ("Tatar", "", "tt", "tat"),
("Telugu", "", "te", "tel"), ("Telugu", "", "te", "tel"),
("Tajik", "", "tg", "tgk"), ("Tajik", "", "tg", "tgk"),
("Tagalog", "", "tl", "tgl"), ("Tagalog", "", "tl", "tgl"),
("Thai", "", "th", "tha"), ("Thai", "", "th", "tha"),
("Tibetan", "", "bo", "bod"), ("Tibetan", "", "bo", "bod"),
("Tigrinya", "", "ti", "tir"), ("Tigrinya", "", "ti", "tir"),
("Tonga (Tonga Islands)", "", "to", "ton"), ("Tonga (Tonga Islands)", "", "to", "ton"),
("Tswana", "", "tn", "tsn"), ("Tswana", "", "tn", "tsn"),
("Tsonga", "", "ts", "tso"), ("Tsonga", "", "ts", "tso"),
("Turkmen", "", "tk", "tuk"), ("Turkmen", "", "tk", "tuk"),
("Turkish", "", "tr", "tur"), ("Turkish", "", "tr", "tur"),
("Twi", "", "tw", "twi"), ("Twi", "", "tw", "twi"),
("Uighur", "", "ug", "uig"), ("Uighur", "", "ug", "uig"),
("Ukrainian", "", "uk", "ukr"), ("Ukrainian", "", "uk", "ukr"),
("Urdu", "", "ur", "urd"), ("Urdu", "", "ur", "urd"),
("Uzbek", "", "uz", "uzb"), ("Uzbek", "", "uz", "uzb"),
("Venda", "", "ve", "ven"), ("Venda", "", "ve", "ven"),
("Vietnamese", "", "vi", "vie"), ("Vietnamese", "", "vi", "vie"),
("Volapük", "", "vo", "vol"), ("Volapük", "", "vo", "vol"),
("Welsh", "", "cy", "cym"), ("Welsh", "", "cy", "cym"),
("Walloon", "", "wa", "wln"), ("Walloon", "", "wa", "wln"),
("Wolof", "", "wo", "wol"), ("Wolof", "", "wo", "wol"),
("Xhosa", "", "xh", "xho"), ("Xhosa", "", "xh", "xho"),
("Yiddish", "", "yi", "yid"), ("Yiddish", "", "yi", "yid"),
("Yoruba", "", "yo", "yor"), ("Yoruba", "", "yo", "yor"),
("Zhuang", "", "za", "zha"), ("Zhuang", "", "za", "zha"),
("Zulu", "", "zu", "zul"), ("Zulu", "", "zu", "zul"),
] ]
def codeToLang(code): def codeToLang(code):
code = code.lower() code = code.lower()
if len(code) == 2: if len(code) == 2:
for l in _iso639_languages: for l in _iso639_languages:
if l[2] == code: if l[2] == code:
return l[0] return l[0]
elif len(code) == 3: elif len(code) == 3:
for l in _iso639_languages: for l in _iso639_languages:
if l[3] == code: if l[3] == code:
return l[0] return l[0]
return None return None
def langTo3Code(lang): def langTo3Code(lang):
lang = englishName(lang) lang = englishName(lang)
if lang: if lang:
lang=lang.lower() lang=lang.lower()
for l in _iso639_languages: for l in _iso639_languages:
if l[0].lower() == lang: if l[0].lower() == lang:
return l[3] return l[3]
return None return None
def langTo2Code(lang): def langTo2Code(lang):
lang = englishName(lang) lang = englishName(lang)
if lang: if lang:
lang=lang.lower() lang=lang.lower()
for l in _iso639_languages: for l in _iso639_languages:
if l[0].lower() == lang: if l[0].lower() == lang:
return l[2] return l[2]
return None return None
def langCode2To3(code): def langCode2To3(code):
langTo3Code(codeToLang(code)) langTo3Code(codeToLang(code))
def langCode3To2(code): def langCode3To2(code):
langTo2Code(codeToLang(code)) langTo2Code(codeToLang(code))
def englishName(lang): def englishName(lang):
lang = lang.lower() lang = lang.lower()
for l in _iso639_languages: for l in _iso639_languages:
if l[1].lower() == lang: if l[1].lower() == lang:
return l[0] return l[0]
return None return None

View file

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2 # vi:si:et:sw=4:sts=4:ts=4
import gzip import gzip
import StringIO import StringIO
import urllib import urllib
@ -10,64 +10,64 @@ from chardet.universaldetector import UniversalDetector
# Default headers for HTTP requests. # Default headers for HTTP requests.
DEFAULT_HEADERS = { DEFAULT_HEADERS = {
'User-Agent': 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9) Gecko/2008061015 Firefox/3.0', 'User-Agent': 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9) Gecko/2008061015 Firefox/3.0',
'Accept-Encoding': 'gzip' 'Accept-Encoding': 'gzip'
} }
def status(url, data=None, headers=DEFAULT_HEADERS): def status(url, data=None, headers=DEFAULT_HEADERS):
try: try:
f = openUrl(url, data, headers) f = openUrl(url, data, headers)
s = f.code s = f.code
except urllib2.HTTPError, e: except urllib2.HTTPError, e:
s = e.code s = e.code
return s return s
def exists(url, data=None, headers=DEFAULT_HEADERS): def exists(url, data=None, headers=DEFAULT_HEADERS):
s = status(url, data, headers) s = status(url, data, headers)
if s >= 200 and s < 400: if s >= 200 and s < 400:
return True return True
return False return False
def getHeaders(url, data=None, headers=DEFAULT_HEADERS): def getHeaders(url, data=None, headers=DEFAULT_HEADERS):
try: try:
f = openUrl(url, data, headers) f = openUrl(url, data, headers)
f.headers['Status'] = "%s" % f.code f.headers['Status'] = "%s" % f.code
headers = f.headers headers = f.headers
f.close() f.close()
except urllib2.HTTPError, e: except urllib2.HTTPError, e:
e.headers['Status'] = "%s" % e.code e.headers['Status'] = "%s" % e.code
headers = e.headers headers = e.headers
return dict(headers) return dict(headers)
def openUrl(url, data=None, headers=DEFAULT_HEADERS): def openUrl(url, data=None, headers=DEFAULT_HEADERS):
url = url.replace(' ', '%20') url = url.replace(' ', '%20')
req = urllib2.Request(url, data, headers) req = urllib2.Request(url, data, headers)
return urllib2.urlopen(req) return urllib2.urlopen(req)
def getUrl(url, data=None, headers=DEFAULT_HEADERS, returnHeaders=False): def getUrl(url, data=None, headers=DEFAULT_HEADERS, returnHeaders=False):
f = openUrl(url, data, headers) f = openUrl(url, data, headers)
data = f.read() data = f.read()
f.close() f.close()
if f.headers.get('content-encoding', None) == 'gzip': if f.headers.get('content-encoding', None) == 'gzip':
data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read() data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read()
if returnHeaders: if returnHeaders:
f.headers['Status'] = "%s" % f.code f.headers['Status'] = "%s" % f.code
return dict(f.headers), data return dict(f.headers), data
return data return data
def getUrlUnicode(url): def getUrlUnicode(url):
data = getUrl(url) data = getUrl(url)
encoding = getEncoding(data) encoding = getEncoding(data)
if not encoding: if not encoding:
encoding = 'latin-1' encoding = 'latin-1'
return unicode(data, encoding) return unicode(data, encoding)
def getEncoding(data): def getEncoding(data):
detector = UniversalDetector() detector = UniversalDetector()
for line in data.split('\n'): for line in data.split('\n'):
detector.feed(line) detector.feed(line)
if detector.done: if detector.done:
break break
detector.close() detector.close()
return detector.result['encoding'] return detector.result['encoding']

View file

@ -1,79 +1,79 @@
# -*- Mode: Python; -*- # -*- Mode: Python; -*-
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2 # vi:si:et:sw=4:sts=4:ts=4
import re import re
_articles = ('the', 'la', 'a', 'die', 'der', 'le', 'el', _articles = ('the', 'la', 'a', 'die', 'der', 'le', 'el',
"l'", 'il', 'das', 'les', 'o', 'ein', 'i', 'un', 'los', 'de', "l'", 'il', 'das', 'les', 'o', 'ein', 'i', 'un', 'los', 'de',
'an', 'una', 'las', 'eine', 'den', 'gli', 'het', 'os', 'lo', 'an', 'una', 'las', 'eine', 'den', 'gli', 'het', 'os', 'lo',
'az', 'det', 'ha-', 'een', 'ang', 'oi', 'ta', 'al-', 'dem', 'az', 'det', 'ha-', 'een', 'ang', 'oi', 'ta', 'al-', 'dem',
'mga', 'uno', "un'", 'ett', u'\xcf', 'eines', u'\xc7', 'els', 'mga', 'uno', "un'", 'ett', u'\xcf', 'eines', u'\xc7', 'els',
u'\xd4\xef', u'\xcf\xe9') u'\xd4\xef', u'\xcf\xe9')
# Articles in a dictionary. # Articles in a dictionary.
_articlesDict = dict([(x, x) for x in _articles]) _articlesDict = dict([(x, x) for x in _articles])
_spArticles = [] _spArticles = []
for article in _articles: for article in _articles:
if article[-1] not in ("'", '-'): article += ' ' if article[-1] not in ("'", '-'): article += ' '
_spArticles.append(article) _spArticles.append(article)
def canonicalTitle(title): def canonicalTitle(title):
"""Return the title in the canonic format 'Movie Title, The'. """Return the title in the canonic format 'Movie Title, The'.
>>> canonicalTitle('The Movie Title') >>> canonicalTitle('The Movie Title')
'Movie Title, The' 'Movie Title, The'
""" """
try: try:
if _articlesDict.has_key(title.split(', ')[-1].lower()): return title if _articlesDict.has_key(title.split(', ')[-1].lower()): return title
except IndexError: pass except IndexError: pass
ltitle = title.lower() ltitle = title.lower()
for article in _spArticles: for article in _spArticles:
if ltitle.startswith(article): if ltitle.startswith(article):
lart = len(article) lart = len(article)
title = '%s, %s' % (title[lart:], title[:lart]) title = '%s, %s' % (title[lart:], title[:lart])
if article[-1] == ' ': title = title[:-1] if article[-1] == ' ': title = title[:-1]
break break
## XXX: an attempt using a dictionary lookup. ## XXX: an attempt using a dictionary lookup.
##for artSeparator in (' ', "'", '-'): ##for artSeparator in (' ', "'", '-'):
## article = _articlesDict.get(ltitle.split(artSeparator)[0]) ## article = _articlesDict.get(ltitle.split(artSeparator)[0])
## if article is not None: ## if article is not None:
## lart = len(article) ## lart = len(article)
## # check titles like "una", "I'm Mad" and "L'abbacchio". ## # check titles like "una", "I'm Mad" and "L'abbacchio".
## if title[lart:] == '' or (artSeparator != ' ' and ## if title[lart:] == '' or (artSeparator != ' ' and
## title[lart:][1] != artSeparator): continue ## title[lart:][1] != artSeparator): continue
## title = '%s, %s' % (title[lart:], title[:lart]) ## title = '%s, %s' % (title[lart:], title[:lart])
## if artSeparator == ' ': title = title[1:] ## if artSeparator == ' ': title = title[1:]
## break ## break
return title return title
def normalizeTitle(title): def normalizeTitle(title):
"""Return the title in the normal "The Title" format. """Return the title in the normal "The Title" format.
>>> normalizeTitle('Movie Title, The') >>> normalizeTitle('Movie Title, The')
'The Movie Title' 'The Movie Title'
""" """
stitle = title.split(', ') stitle = title.split(', ')
if len(stitle) > 1 and _articlesDict.has_key(stitle[-1].lower()): if len(stitle) > 1 and _articlesDict.has_key(stitle[-1].lower()):
sep = ' ' sep = ' '
if stitle[-1][-1] in ("'", '-'): sep = '' if stitle[-1][-1] in ("'", '-'): sep = ''
title = '%s%s%s' % (stitle[-1], sep, ', '.join(stitle[:-1])) title = '%s%s%s' % (stitle[-1], sep, ', '.join(stitle[:-1]))
return title return title
def normalizeImdbId(imdbId): def normalizeImdbId(imdbId):
"""Return 7 digit imdbId. """Return 7 digit imdbId.
>>> normalizeImdbId('http://www.imdb.com/title/tt0159206/') >>> normalizeImdbId('http://www.imdb.com/title/tt0159206/')
'0159206' '0159206'
>>> normalizeImdbId(159206) >>> normalizeImdbId(159206)
'0159206' '0159206'
>>> normalizeImdbId('tt0159206') >>> normalizeImdbId('tt0159206')
'0159206' '0159206'
""" """
if isinstance(imdbId, basestring): if isinstance(imdbId, basestring):
imdbId = re.sub('.*(\d{7}).*', '\\1', imdbId) imdbId = re.sub('.*(\d{7}).*', '\\1', imdbId)
elif isinstance(imdbId, int): elif isinstance(imdbId, int):
imdbId = "%07d" % imdbId imdbId = "%07d" % imdbId
return imdbId return imdbId
# Common suffixes in surnames. # Common suffixes in surnames.

View file

@ -1,216 +1,216 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2 # vi:si:et:sw=4:sts=4:ts=4
# GPL written 2008 by j@pad.ma # GPL written 2008 by j@pad.ma
import re import re
def findRe(string, regexp): def findRe(string, regexp):
result = re.compile(regexp, re.DOTALL).findall(string) result = re.compile(regexp, re.DOTALL).findall(string)
if result: if result:
return result[0].strip() return result[0].strip()
return '' return ''
def findString(string, string0='', string1 = ''): def findString(string, string0='', string1 = ''):
"""Return the string between string0 and string1. """Return the string between string0 and string1.
If string0 or string1 is left out, begining or end of string is used. If string0 or string1 is left out, begining or end of string is used.
>>> findString('i am not there', string1=' not there') >>> findString('i am not there', string1=' not there')
'i am' 'i am'
>>> findString('i am not there', 'i am ', ' there') >>> findString('i am not there', 'i am ', ' there')
'not' 'not'
>>> findString('i am not there', 'i am not t') >>> findString('i am not there', 'i am not t')
'here' 'here'
""" """
if string0: if string0:
string0 = re.escape(string0) string0 = re.escape(string0)
else: else:
string0 = '^' string0 = '^'
if string1: if string1:
string1 = re.escape(string1) string1 = re.escape(string1)
else: else:
string1 = '$' string1 = '$'
return findRegexp(string, string0 + '(.*?)' + string1) return findRegexp(string, string0 + '(.*?)' + string1)
# Capitalizes the first letter of a string. # Capitalizes the first letter of a string.
capfirst = lambda x: x and x[0].upper() + x[1:] capfirst = lambda x: x and x[0].upper() + x[1:]
def removeSpecialCharacters(text): def removeSpecialCharacters(text):
""" """
Removes special characters inserted by Word. Removes special characters inserted by Word.
""" """
text = text.replace(u'\u2013', '-') text = text.replace(u'\u2013', '-')
text = text.replace(u'\u2026O', "'") text = text.replace(u'\u2026O', "'")
text = text.replace(u'\u2019', "'") text = text.replace(u'\u2019', "'")
text = text.replace(u'', "'") text = text.replace(u'', "'")
text = text.replace(u'', "'") text = text.replace(u'', "'")
text = text.replace(u'', "-") text = text.replace(u'', "-")
return text return text
def wrap(text, width): def wrap(text, width):
""" """
A word-wrap function that preserves existing line breaks and most spaces in A word-wrap function that preserves existing line breaks and most spaces in
the text. Expects that existing line breaks are posix newlines (\n). the text. Expects that existing line breaks are posix newlines (\n).
See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/148061 See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/148061
""" """
return reduce(lambda line, word, width=width: '%s%s%s' % return reduce(lambda line, word, width=width: '%s%s%s' %
(line, (line,
' \n'[(len(line[line.rfind('\n')+1:]) ' \n'[(len(line[line.rfind('\n')+1:])
+ len(word.split('\n',1)[0] + len(word.split('\n',1)[0]
) >= width)], ) >= width)],
word), word),
text.split(' ') text.split(' ')
) )
def truncateString(s, num): def truncateString(s, num):
"""Truncates a string after a certain number of chacters, but ends with a word """Truncates a string after a certain number of chacters, but ends with a word
>>> truncateString('Truncates a string after a certain number of chacters, but ends with a word', 23) >>> truncateString('Truncates a string after a certain number of chacters, but ends with a word', 23)
'Truncates a string...' 'Truncates a string...'
>>> truncateString('Truncates a string', 23) >>> truncateString('Truncates a string', 23)
'Truncates a string' 'Truncates a string'
""" """
length = int(num) length = int(num)
if len(s) <= length: if len(s) <= length:
return s return s
words = s.split() words = s.split()
ts = "" ts = ""
while words and len(ts) + len(words[0]) < length: while words and len(ts) + len(words[0]) < length:
ts += " " + words.pop(0) ts += " " + words.pop(0)
if words: if words:
ts += "..." ts += "..."
return ts.strip() return ts.strip()
def trimString(string, num): def trimString(string, num):
"""Truncates a string after a certain number of chacters, adding ... at -10 characters """Truncates a string after a certain number of chacters, adding ... at -10 characters
>>> trimString('Truncates a string after a certain number of chacters', 23) >>> trimString('Truncates a string after a certain number of chacters', 23)
'Truncates ...f chacters' 'Truncates ...f chacters'
>>> trimString('Truncates a string', 23) >>> trimString('Truncates a string', 23)
'Truncates a string' 'Truncates a string'
""" """
if len(string) > num: if len(string) > num:
string = string[:num - 13] + '...' + string[-10:] string = string[:num - 13] + '...' + string[-10:]
return string return string
def truncateWords(s, num): def truncateWords(s, num):
"Truncates a string after a certain number of words." "Truncates a string after a certain number of words."
length = int(num) length = int(num)
words = s.split() words = s.split()
if len(words) > length: if len(words) > length:
words = words[:length] words = words[:length]
if not words[-1].endswith('...'): if not words[-1].endswith('...'):
words.append('...') words.append('...')
return ' '.join(words) return ' '.join(words)
def getValidFilename(s): def getValidFilename(s):
""" """
Returns the given string converted to a string that can be used for a clean Returns the given string converted to a string that can be used for a clean
filename. Specifically, leading and trailing spaces are removed; filename. Specifically, leading and trailing spaces are removed;
all non-filename-safe characters are removed. all non-filename-safe characters are removed.
>>> getValidFilename("john's portrait in 2004.jpg") >>> getValidFilename("john's portrait in 2004.jpg")
'john_s_portrait_in_2004.jpg' 'john_s_portrait_in_2004.jpg'
""" """
s = s.strip() s = s.strip()
s = s.replace(' ', '_') s = s.replace(' ', '_')
s = re.sub(r'[^-A-Za-z0-9_.\[\]\ ]', '_', s) s = re.sub(r'[^-A-Za-z0-9_.\[\]\ ]', '_', s)
s = s.replace('__', '_').replace('__', '_') s = s.replace('__', '_').replace('__', '_')
return s return s
def getTextList(list_, last_word='or'): def getTextList(list_, last_word='or'):
""" """
>>> getTextList(['a', 'b', 'c', 'd']) >>> getTextList(['a', 'b', 'c', 'd'])
'a, b, c or d' 'a, b, c or d'
>>> getTextList(['a', 'b', 'c'], 'and') >>> getTextList(['a', 'b', 'c'], 'and')
'a, b and c' 'a, b and c'
>>> getTextList(['a', 'b'], 'and') >>> getTextList(['a', 'b'], 'and')
'a and b' 'a and b'
>>> getTextList(['a']) >>> getTextList(['a'])
'a' 'a'
>>> getTextList([]) >>> getTextList([])
'' ''
""" """
if len(list_) == 0: return '' if len(list_) == 0: return ''
if len(list_) == 1: return list_[0] if len(list_) == 1: return list_[0]
return '%s %s %s' % (', '.join([str(i) for i in list_][:-1]), last_word, list_[-1]) return '%s %s %s' % (', '.join([str(i) for i in list_][:-1]), last_word, list_[-1])
def getListText(text, last_word='or'): def getListText(text, last_word='or'):
""" """
>>> getListText('a, b, c or d') >>> getListText('a, b, c or d')
['a', 'b', 'c', 'd'] ['a', 'b', 'c', 'd']
>>> getListText('a, b and c', 'and') >>> getListText('a, b and c', 'and')
['a', 'b', 'c'] ['a', 'b', 'c']
>>> getListText('a and b', 'and') >>> getListText('a and b', 'and')
['a', 'b'] ['a', 'b']
>>> getListText('a') >>> getListText('a')
['a'] ['a']
>>> getListText('') >>> getListText('')
[] []
""" """
list_ = [] list_ = []
if text: if text:
list_ = text.split(', ') list_ = text.split(', ')
if list_: if list_:
i=len(list_)-1 i=len(list_)-1
last = list_[i].split(last_word) last = list_[i].split(last_word)
if len(last) == 2: if len(last) == 2:
list_[i] = last[0].strip() list_[i] = last[0].strip()
list_.append(last[1].strip()) list_.append(last[1].strip())
return list_ return list_
def normalizeNewlines(text): def normalizeNewlines(text):
return re.sub(r'\r\n|\r|\n', '\n', text) return re.sub(r'\r\n|\r|\n', '\n', text)
def recapitalize(text): def recapitalize(text):
"Recapitalizes text, placing caps after end-of-sentence punctuation." "Recapitalizes text, placing caps after end-of-sentence punctuation."
# capwords = () #capwords = ()
text = text.lower() text = text.lower()
capsRE = re.compile(r'(?:^|(?<=[\.\?\!] ))([a-z])') capsRE = re.compile(r'(?:^|(?<=[\.\?\!] ))([a-z])')
text = capsRE.sub(lambda x: x.group(1).upper(), text) text = capsRE.sub(lambda x: x.group(1).upper(), text)
# for capword in capwords: #for capword in capwords:
# capwordRE = re.compile(r'\b%s\b' % capword, re.I) # capwordRE = re.compile(r'\b%s\b' % capword, re.I)
# text = capwordRE.sub(capword, text) # text = capwordRE.sub(capword, text)
return text return text
def phone2numeric(phone): def phone2numeric(phone):
"Converts a phone number with letters into its numeric equivalent." "Converts a phone number with letters into its numeric equivalent."
letters = re.compile(r'[A-PR-Y]', re.I) letters = re.compile(r'[A-PR-Y]', re.I)
char2number = lambda m: {'a': '2', 'c': '2', 'b': '2', 'e': '3', char2number = lambda m: {'a': '2', 'c': '2', 'b': '2', 'e': '3',
'd': '3', 'g': '4', 'f': '3', 'i': '4', 'h': '4', 'k': '5', 'd': '3', 'g': '4', 'f': '3', 'i': '4', 'h': '4', 'k': '5',
'j': '5', 'm': '6', 'l': '5', 'o': '6', 'n': '6', 'p': '7', 'j': '5', 'm': '6', 'l': '5', 'o': '6', 'n': '6', 'p': '7',
's': '7', 'r': '7', 'u': '8', 't': '8', 'w': '9', 'v': '8', 's': '7', 'r': '7', 'u': '8', 't': '8', 'w': '9', 'v': '8',
'y': '9', 'x': '9'}.get(m.group(0).lower()) 'y': '9', 'x': '9'}.get(m.group(0).lower())
return letters.sub(char2number, phone) return letters.sub(char2number, phone)
def compressString(s): def compressString(s):
import cStringIO, gzip import cStringIO, gzip
zbuf = cStringIO.StringIO() zbuf = cStringIO.StringIO()
zfile = gzip.GzipFile(mode='wb', compresslevel=6, fileobj=zbuf) zfile = gzip.GzipFile(mode='wb', compresslevel=6, fileobj=zbuf)
zfile.write(s) zfile.write(s)
zfile.close() zfile.close()
return zbuf.getvalue() return zbuf.getvalue()
smart_split_re = re.compile('("(?:[^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'(?:[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'|[^\\s]+)') smart_split_re = re.compile('("(?:[^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'(?:[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'|[^\\s]+)')
def smartSplit(text): def smartSplit(text):
""" """
Generator that splits a string by spaces, leaving quoted phrases together. Generator that splits a string by spaces, leaving quoted phrases together.
Supports both single and double quotes, and supports escaping quotes with Supports both single and double quotes, and supports escaping quotes with
backslashes. In the output, strings will keep their initial and trailing backslashes. In the output, strings will keep their initial and trailing
quote marks. quote marks.
>>> list(smartSplit('This is "a person\\'s" test.')) >>> list(smartSplit('This is "a person\\'s" test.'))
['This', 'is', '"a person\\'s"', 'test.'] ['This', 'is', '"a person\\'s"', 'test.']
""" """
for bit in smart_split_re.finditer(text): for bit in smart_split_re.finditer(text):
bit = bit.group(0) bit = bit.group(0)
if bit[0] == '"': if bit[0] == '"':
yield '"' + bit[1:-1].replace('\\"', '"').replace('\\\\', '\\') + '"' yield '"' + bit[1:-1].replace('\\"', '"').replace('\\\\', '\\') + '"'
elif bit[0] == "'": elif bit[0] == "'":
yield "'" + bit[1:-1].replace("\\'", "'").replace("\\\\", "\\") + "'" yield "'" + bit[1:-1].replace("\\'", "'").replace("\\\\", "\\") + "'"
else: else:
yield bit yield bit

View file

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
# Written 2007 by j@mailb.org # Written 2007 by j@mailb.org
from threading import Event from threading import Event
@ -11,50 +12,50 @@ from BitTornado.bencode import bencode, bdecode
def createTorrent(file, url, params = {}, flag = Event(), def createTorrent(file, url, params = {}, flag = Event(),
progress = lambda x: None, progress_percent = 1): progress = lambda x: None, progress_percent = 1):
"Creates a torrent for a given file, using url as tracker url" "Creates a torrent for a given file, using url as tracker url"
return make_meta_file(file, url, params, flag, progress, progress_percent) return make_meta_file(file, url, params, flag, progress, progress_percent)
def getInfoHash(torrentFile): def getInfoHash(torrentFile):
"Returns Torrent Info Hash from torrent file" "Returns Torrent Info Hash from torrent file"
metainfo_file = open(torrentFile, 'rb') metainfo_file = open(torrentFile, 'rb')
metainfo = bdecode(metainfo_file.read()) metainfo = bdecode(metainfo_file.read())
info = metainfo['info'] info = metainfo['info']
return sha.sha(bencode(info)).hexdigest().upper() return sha.sha(bencode(info)).hexdigest().upper()
def getTorrentInfoFromFile(torrentFile): def getTorrentInfoFromFile(torrentFile):
f = open(torrentFile, 'rb') f = open(torrentFile, 'rb')
data = f.read() data = f.read()
f.close() f.close()
tinfo = getTorrentInfo(data) tinfo = getTorrentInfo(data)
tinfo['timestamp'] = stat(torrentFile).st_ctime tinfo['timestamp'] = stat(torrentFile).st_ctime
return tinfo return tinfo
def getTorrentInfo(data): def getTorrentInfo(data):
"Returns Torrent Info from torrent file" "Returns Torrent Info from torrent file"
tinfo = {} tinfo = {}
metainfo = bdecode(data) metainfo = bdecode(data)
info = metainfo['info'] info = metainfo['info']
piece_length = info['piece length'] piece_length = info['piece length']
if info.has_key('length'): if info.has_key('length'):
# let's assume we just have one file # let's assume we just have one file
file_length = info['length'] file_length = info['length']
else: else:
# let's assume we have a directory structure # let's assume we have a directory structure
file_length = 0; file_length = 0;
for f in info['files']: for f in info['files']:
file_length += f['length'] file_length += f['length']
for key in info: for key in info:
if key != 'pieces': if key != 'pieces':
tinfo[key] = info[key] tinfo[key] = info[key]
for key in metainfo: for key in metainfo:
if key != 'info': if key != 'info':
tinfo[key] = metainfo[key] tinfo[key] = metainfo[key]
tinfo['size'] = file_length tinfo['size'] = file_length
tinfo['hash'] = sha.sha(bencode(info)).hexdigest() tinfo['hash'] = sha.sha(bencode(info)).hexdigest()
tinfo['announce'] = metainfo['announce'] tinfo['announce'] = metainfo['announce']
return tinfo return tinfo
def getTorrentSize(torrentFile): def getTorrentSize(torrentFile):
"Returns Size of files in torrent file in bytes" "Returns Size of files in torrent file in bytes"
return getTorrentInfo(torrentFile)['size'] return getTorrentInfo(torrentFile)['size']

View file

@ -1,30 +1,30 @@
#!/usr/bin/env python #!/usr/bin/env python
# vi:si:et:sw=2:sts=2:ts=2 # vi:si:et:sw=4:sts=4:ts=4
# encoding: utf-8 # encoding: utf-8
from setuptools import setup, find_packages from setuptools import setup, find_packages
setup( setup(
name="oxutils", name="oxutils",
version="0.1", version="0.1",
description="collection of utils used to work with python", description="collection of utils used to work with python",
author="0x", author="0x",
author_email="code@0xdb.org", author_email="code@0xdb.org",
url="http://code.0xdb.org/oxutils", url="http://code.0xdb.org/oxutils",
download_url="http://code.0xdb.org/oxutils/download", download_url="http://code.0xdb.org/oxutils/download",
license="GPLv3", license="GPLv3",
packages=find_packages(), packages=find_packages(),
zip_safe=False, zip_safe=False,
install_requires=[ install_requires=[
'chardet', 'chardet',
], ],
keywords = [ keywords = [
], ],
classifiers = [ classifiers = [
'Development Status :: 3 - Alpha', 'Development Status :: 3 - Alpha',
'Operating System :: OS Independent', 'Operating System :: OS Independent',
'Programming Language :: Python', 'Programming Language :: Python',
'Topic :: Software Development :: Libraries :: Python Modules', 'Topic :: Software Development :: Libraries :: Python Modules',
], ],
) )