rename oxutils -> oxlib

2008-07-03 11:23:42 +02:00 · 2008-07-03 11:23:42 +02:00 · bd88811dea
commit bd88811dea
parent c68746dbd1
12 changed files with 11 additions and 11 deletions
--- a/oxlib/init.py
+++ b/oxlib/init.py
@ -0,0 +1,17 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+# 2008
+
+from hashes import *
+from html import *
+from text import *
+from format import *
+import net
+import cache
+
+#only works if BitTornado is installed
+try:
+    from torrent import *
+except:
+    pass
+
--- a/oxlib/cache.py
+++ b/oxlib/cache.py
@ -0,0 +1,117 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+# 2008
+import gzip
+import StringIO
+import os
+import sha
+import time
+import urlparse
+import urllib2
+
+import chardet
+import simplejson
+
+import net
+from net import DEFAULT_HEADERS, getEncoding
+
+
+cache_timeout = 30*24*60*60 # default is 30 days
+
+def status(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
+    '''
+      >>> status('http://google.com')
+      200
+      >>> status('http://google.com/mysearch')
+      404
+    '''
+    headers = getHeaders(url, data, headers)
+    return int(headers['status'])
+
+def exists(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
+    '''
+      >>> exists('http://google.com')
+      True
+      >>> exists('http://google.com/mysearch')
+      False
+    '''
+    s = status(url, data, headers, timeout)
+    if s >= 200 and s < 400:
+        return True
+    return False
+
+def getHeaders(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
+    url_cache_file = "%s.headers" % getUrlCacheFile(url, data, headers)
+    url_headers = loadUrlCache(url_cache_file, timeout)
+    if url_headers:
+        url_headers = simplejson.loads(url_headers)
+    else:
+        url_headers = net.getHeaders(url, data, headers)
+        saveUrlHeaders(url_cache_file, url_headers)
+    return url_headers
+
+def getUrl(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
+    url_cache_file = getUrlCacheFile(url, data, headers)
+    result = loadUrlCache(url_cache_file, timeout)
+    if not result:
+        try:
+            url_headers, result = net.getUrl(url, data, headers, returnHeaders=True)
+        except urllib2.HTTPError, e:
+            e.headers['Status'] = "%s" % e.code
+            url_headers = dict(e.headers)
+            result = e.read()
+            if url_headers.get('content-encoding', None) == 'gzip':
+                result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read()
+        saveUrlCache(url_cache_file, result, url_headers)
+    return result
+
+def getUrlUnicode(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout, _getUrl=getUrl):
+    data = _getUrl(url, data, headers, timeout)
+    encoding = getEncoding(data)
+    if not encoding:
+        encoding = 'latin-1'
+    return unicode(data, encoding)
+
+def getCacheBase():
+    'cache base is eather ~/.ox/cache or can set via env variable oxCACHE'
+    return os.environ.get('oxCACHE', os.path.expanduser('~/.ox/cache'))
+
+def getUrlCacheFile(url, data=None, headers=DEFAULT_HEADERS):
+    if data:
+        url_hash = sha.sha(url + '?' + data).hexdigest()
+    else:
+        url_hash = sha.sha(url).hexdigest()
+    domain = ".".join(urlparse.urlparse(url)[1].split('.')[-2:])
+    return os.path.join(getCacheBase(), domain, url_hash[:2], url_hash[2:4], url_hash[4:6], url_hash)
+
+def loadUrlCache(url_cache_file, timeout=cache_timeout):
+    if timeout == 0:
+        return None
+    if os.path.exists(url_cache_file):
+        ctime = os.stat(url_cache_file).st_ctime
+        now = time.mktime(time.localtime())
+        file_age = now-ctime
+        if timeout < 0 or file_age < timeout:
+            f = open(url_cache_file)
+            data = f.read()
+            f.close()
+            return data
+    return None
+
+def saveUrlCache(url_cache_file, data, headers):
+    folder = os.path.dirname(url_cache_file)
+    if not os.path.exists(folder):
+        os.makedirs(folder)
+    f = open(url_cache_file, 'w')
+    f.write(data)
+    f.close()
+    saveUrlHeaders("%s.headers" % url_cache_file, headers)
+
+def saveUrlHeaders(url_cache_file, headers):
+    folder = os.path.dirname(url_cache_file)
+    if not os.path.exists(folder):
+        os.makedirs(folder)
+    f = open(url_cache_file, 'w')
+    f.write(simplejson.dumps(headers))
+    f.close()
+
--- a/oxlib/format.py
+++ b/oxlib/format.py
@ -0,0 +1,278 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import math
+import re
+
+def to36(q):
+    """
+    Converts an integer to base 36 (a useful scheme for human-sayable IDs).
+
+    >>> to36(35)
+    'z'
+    >>> to36(119292)
+    '2k1o'
+    >>> int(to36(939387374), 36)
+    939387374
+    >>> to36(0)
+    '0'
+    >>> to36(-393)
+    Traceback (most recent call last):
+        ...
+    ValueError: must supply a positive integer
+    """
+    if q < 0: raise ValueError, "must supply a positive integer"
+    letters = "0123456789abcdefghijklmnopqrstuvwxyz"
+    converted = []
+    while q != 0:
+        q, r = divmod(q, 36)
+        converted.insert(0, letters[r])
+    return "".join(converted) or '0'
+
+def from36(q):
+    return int(q, 36)
+
+def intValue(strValue, default=''):
+    try:
+        val = re.compile('(\d+)').findall(unicode(strValue).strip())[0]
+    except:
+        val = default
+    return val
+
+def test_intValue():
+    assert intValue('abc23') == '23'
+    assert intValue(' abc23') == '23'
+    assert intValue(' abc') == ''
+
+def floatValue(strValue, default=''):
+    try:
+        val = re.compile('([\d.]+)').findall(unicode(strValue).strip())[0]
+    except:
+        val = default
+    return val
+
+def test_floatValue():
+    assert floatValue('abc23.4') == '23.4'
+    assert floatValue(' abc23.4') == '23.4'
+    assert floatValue(' abc') == ''
+
+def formatNumber(number, longName, shortName):
+    """
+    Return the number in a human-readable format (23 KB, 23.4 MB, 23.42 GB)
+    
+    >>> formatNumber(123, 'Byte', 'B')
+    '123 Bytes'
+
+    >>> formatNumber(1234, 'Byte', 'B')
+    '1 KB'
+
+    >>> formatNumber(1234567, 'Byte', 'B')
+    '1.2 MB'
+
+    >>> formatNumber(1234567890, 'Byte', 'B')
+    '1.15 GB'
+
+    >>> formatNumber(1234567890123456789, 'Byte', 'B')
+    '1,096.5166 PB'
+
+    """
+    if number < 1024:
+        return '%s %s%s' % (formatThousands(number), longName, number != 1 and 's' or '')
+    prefix = ['K', 'M', 'G', 'T', 'P']
+    for i in range(5):
+        if number < math.pow(1024, i + 2) or i == 4:
+            n = number / math.pow(1024, i + 1)
+            return '%s %s%s' % (formatThousands('%.*f' % (i, n)), prefix[i], shortName)
+
+def formatThousands(number, separator = ','):
+    """
+    Return the number with separators (1,000,000)
+    
+    >>> formatThousands(1)
+    '1'
+    >>> formatThousands(1000)
+    '1,000'
+    >>> formatThousands(1000000)
+    '1,000,000'
+    """
+    string = str(number).split('.')
+    l = []
+    for i, character in enumerate(reversed(string[0])):
+        if i and (not (i % 3)):
+            l.insert(0, separator)
+        l.insert(0, character)
+    string[0] = ''.join(l)
+    return '.'.join(string)
+
+def formatBits(number):
+    return formatNumber(number, 'bit', 'b')
+
+def formatBytes(number):
+    return formatNumber(number, 'byte', 'B')
+
+def formatPixels(number):
+    return formatNumber(number, 'pixel', 'px')
+
+def formatCurrency(amount, currency="$"):
+  if amount:
+    temp = "%.2f" % amount
+    profile=re.compile(r"(\d)(\d\d\d[.,])")
+    while 1:
+      temp, count = re.subn(profile,r"\1,\2",temp)
+      if not count:
+        break
+    if temp.startswith('-'):
+       return "-"+ currency + temp[1:-3]
+    return currency + temp[:-3]
+  else:
+    return ""
+
+def plural(amount, unit, plural='s'):
+    '''
+    >>> plural(1, 'unit')
+    '1 unit'
+    >>> plural(2, 'unit')
+    '2 units'
+    '''
+    if abs(amount) != 1:
+        if plural == 's':
+            unit = unit + plural
+        else: unit = plural
+    return "%s %s" % (formatThousands(amount), unit)
+
+def formatDuration(ms, verbosity=0, years=True, hours=True, milliseconds=True):
+    '''
+    verbosity
+        0: D:HH:MM:SS
+        1: Dd Hh Mm Ss
+        2: D days H hours M minutes S seconds
+    years
+        True: 366 days are 1 year 1 day
+        False: 366 days are 366 days
+    hours
+        True: 30 seconds are 00:00:30
+        False: 30 seconds are 00:30
+    milliseconds
+        True: always display milliseconds
+        False: never display milliseconds
+    >>> formatDuration(1000 * 60 * 60 * 24 * 366)
+    '1:001:00:00:00.000'
+    >>> formatDuration(1000 * 60 * 60 * 24 * 366, years=False)
+    '366:00:00:00.000'
+    >>> formatDuration(1000 * 60 * 60 * 24 * 365 + 2003, verbosity=2)
+    '1 year 2 seconds 3 milliseconds'
+    >>> formatDuration(1000 * 30, hours=False, milliseconds=False)
+    '00:30'
+    '''
+    if years:
+        y = int(ms / 31536000000)
+        d = int(ms % 31536000000 / 86400000)
+    else:
+        d = int(ms / 86400000)
+    h = int(ms % 86400000 / 3600000)
+    m = int(ms % 3600000 / 60000)
+    s = int(ms % 60000 / 1000)
+    ms = ms % 1000
+    if verbosity == 0:
+        if years and y:
+            duration = "%d:%03d:%02d:%02d:%02d" % (y, d, h, m, s)
+        elif d:
+            duration = "%d:%02d:%02d:%02d" % (d, h, m, s)
+        elif hours or h:
+            duration = "%02d:%02d:%02d" % (h, m, s)
+        else:
+            duration = "%02d:%02d" % (m, s)
+        if milliseconds:
+            duration += ".%03d" % ms
+    else:
+        if verbosity == 1:
+            durations = ["%sd" % d, "%sh" % h,  "%sm" % m, "%ss" % s]
+            if years:
+                durations.insert(0, "%sy" % y)
+            if milliseconds:
+                durations.append("%sms" % ms)
+        else:
+            durations = [plural(d, 'day'), plural(h,'hour'),
+                plural(m, 'minute'), plural(s, 'second')]
+            if years:
+                durations.insert(0, plural(y, 'year'))
+            if milliseconds:
+                durations.append(plural(ms, 'millisecond'))
+        durations = filter(lambda x: not x.startswith('0'), durations)
+        duration = ' '.join(durations)
+    return duration
+
+def ms2runtime(ms, shortenLong=False):
+    # deprecated - use formatDuration
+    '''
+    >>> ms2runtime(5000)
+    '5 seconds'
+    >>> ms2runtime(500000)
+    '8 minutes 20 seconds'
+    >>> ms2runtime(50000000)
+    '13 hours 53 minutes 20 seconds'
+    >>> ms2runtime(50000000-20000)
+    '13 hours 53 minutes'
+    '''
+    y = int(ms / 31536000000)
+    d = int(ms % 31536000000 / 86400000)
+    h = int(ms % 86400000 / 3600000)
+    m = int(ms % 3600000 / 60000)
+    s = int(ms % 60000 / 1000)
+    if shortenLong and y > 0 or d > 99:
+        runtimeString = ("%sy" % y, "%sd" % d, "%sh" % h,  "%sm" % m, "%ss" % s)
+    else:
+        runtimeString = (plural(y, 'year'), plural(d, 'day'),
+                         plural(h,'hour'), plural(m, 'minute'), plural(s, 'second'))
+    runtimeString = filter(lambda x: not x.startswith('0'), runtimeString)
+    return ' '.join(runtimeString).strip()
+
+def ms2playtime(ms, hours=False):
+    # deprecated - use formatDuration
+    '''
+    >>> ms2playtime(5000)
+    '00:05'
+    >>> ms2playtime(500000)
+    '08:20'
+    >>> ms2playtime(50000000)
+    '13:53:20'
+    '''
+    d = int(ms / 86400000)
+    h = int(ms % 86400000 / 3600000)
+    m = int(ms % 3600000 / 60000)
+    s = int(ms % 60000 / 1000)
+    if d:
+        playtime= "%d:%02d:%02d:%02d" % (d, h, m, s)
+    elif h or hours:
+        playtime= "%02d:%02d:%02d" % (h, m, s)
+    else:
+        playtime= "%02d:%02d" % (m, s)
+    return playtime
+
+def ms2time(ms):
+    # deprecated - use formatDuration
+    '''
+    >>> ms2time(44592123)
+    '12:23:12.123'
+    '''
+    it = int(ms / 1000)
+    ms = ms - it*1000
+    ss = it % 60
+    mm = ((it-ss)/60) % 60
+    hh = ((it-(mm*60)-ss)/3600) % 60
+    return "%d:%02d:%02d.%03d" % (hh, mm, ss, ms)
+
+def time2ms(timeString):
+    '''
+    >>> time2ms('12:23:12.123')
+    44592123
+    '''
+    ms = 0.0
+    p = timeString.split(':')
+    for i in range(len(p)):
+        ms = ms * 60 + float(p[i])
+    return int(ms * 1000)
+
+def shiftTime(offset, timeString):
+    newTime = time2ms(timeString) + offset
+    return ms2time(newTime)
+
--- a/oxlib/hashes.py
+++ b/oxlib/hashes.py
@ -0,0 +1,17 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+# GPL written 2008 by j@pad.ma
+import sha
+import os
+
+def sha1sum(filename):
+    sha1 = sha.new()
+    file=open(filename)
+    buffer=file.read(4096)
+    while buffer:
+        sha1.update(buffer)
+        buffer=file.read(4096)
+    file.close()
+    return sha1.hexdigest()
+
+
--- a/oxlib/html.py
+++ b/oxlib/html.py
@ -0,0 +1,172 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+# GPL written 2008 by j@pad.ma
+import re
+import string
+from htmlentitydefs import name2codepoint
+
+
+# Configuration for urlize() function
+LEADING_PUNCTUATION  = ['(', '<', '&lt;']
+TRAILING_PUNCTUATION = ['.', ',', ')', '>', '\n', '&gt;', "'", '"']
+
+# list of possible strings used for bullets in bulleted lists
+DOTS = ['&middot;', '*', '\xe2\x80\xa2', '&#149;', '&bull;', '&#8226;']
+
+unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)')
+word_split_re = re.compile(r'(\s+)')
+punctuation_re = re.compile('^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % \
+    ('|'.join([re.escape(x) for x in LEADING_PUNCTUATION]),
+    '|'.join([re.escape(x) for x in TRAILING_PUNCTUATION])))
+simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
+link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+')
+html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE)
+hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL)
+trailing_empty_content_re = re.compile(r'(?:<p>(?:&nbsp;|\s|<br \/>)*?</p>\s*)+\Z')
+del x # Temporary variable
+
+def escape(html):
+    '''
+    Returns the given HTML with ampersands, quotes and carets encoded
+
+    >>> escape('html "test" & <brothers>')
+    'html &quot;test&quot; &amp; &lt;brothers&gt;'
+    '''
+    if not isinstance(html, basestring):
+          html = str(html)
+    return html.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;').replace("'", '&#39;')
+
+def linebreaks(value):
+    '''
+    Converts newlines into <p> and <br />
+    '''
+    value = re.sub(r'\r\n|\r|\n', '\n', value) # normalize newlines
+    paras = re.split('\n{2,}', value)
+    paras = ['<p>%s</p>' % p.strip().replace('\n', '<br />') for p in paras]
+    return '\n\n'.join(paras)
+
+def stripTags(value):
+    """
+    Returns the given HTML with all tags stripped
+    
+    >>> stripTags('some <h2>title</h2> <script>asdfasdf</script>')
+    'some title asdfasdf'
+    """
+    return re.sub(r'<[^>]*?>', '', value)
+    
+def stripSpacesBetweenTags(value):
+    "Returns the given HTML with spaces between tags normalized to a single space"
+    return re.sub(r'>\s+<', '> <', value)
+
+def stripEntities(value):
+    "Returns the given HTML with all entities (&something;) stripped"
+    return re.sub(r'&(?:\w+|#\d);', '', value)
+
+def fixAmpersands(value):
+    "Returns the given HTML with all unencoded ampersands encoded correctly"
+    return unencoded_ampersands_re.sub('&amp;', value)
+
+def urlize(text, trim_url_limit=None, nofollow=False):
+    """
+    Converts any URLs in text into clickable links. Works on http://, https:// and
+    www. links. Links can have trailing punctuation (periods, commas, close-parens)
+    and leading punctuation (opening parens) and it'll still do the right thing.
+
+    If trim_url_limit is not None, the URLs in link text will be limited to
+    trim_url_limit characters.
+
+    If nofollow is True, the URLs in link text will get a rel="nofollow" attribute.
+    """
+    trim_url = lambda x, limit=trim_url_limit: limit is not None and (x[:limit] + (len(x) >=limit and '...' or ''))  or x
+    words = word_split_re.split(text)
+    nofollow_attr = nofollow and ' rel="nofollow"' or ''
+    for i, word in enumerate(words):
+        match = punctuation_re.match(word)
+        if match:
+            lead, middle, trail = match.groups()
+            if middle.startswith('www.') or ('@' not in middle and not middle.startswith('http://') and \
+                    len(middle) > 0 and middle[0] in string.letters + string.digits and \
+                    (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))):
+                middle = '<a href="http://%s"%s>%s</a>' % (middle, nofollow_attr, trim_url(middle))
+            if middle.startswith('http://') or middle.startswith('https://'):
+                middle = '<a href="%s"%s>%s</a>' % (middle, nofollow_attr, trim_url(middle))
+            if '@' in middle and not middle.startswith('www.') and not ':' in middle \
+              and simple_email_re.match(middle):
+                middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
+            if lead + middle + trail != word:
+                words[i] = lead + middle + trail
+    return ''.join(words)
+
+def cleanHtml(text):
+    """
+    Cleans the given HTML. Specifically, it does the following:
+        * Converts <b> and <i> to <strong> and <em>.
+        * Encodes all ampersands correctly.
+        * Removes all "target" attributes from <a> tags.
+        * Removes extraneous HTML, such as presentational tags that open and
+          immediately close and <br clear="all">.
+        * Converts hard-coded bullets into HTML unordered lists.
+        * Removes stuff like "<p>&nbsp;&nbsp;</p>", but only if it's at the
+          bottom of the text.
+    """
+    from text import normalizeNewlines
+    text = normalizeNewlines(text)
+    text = re.sub(r'<(/?)\s*b\s*>', '<\\1strong>', text)
+    text = re.sub(r'<(/?)\s*i\s*>', '<\\1em>', text)
+    text = fixAmpersands(text)
+    # Remove all target="" attributes from <a> tags.
+    text = link_target_attribute_re.sub('\\1', text)
+    # Trim stupid HTML such as <br clear="all">.
+    text = html_gunk_re.sub('', text)
+    # Convert hard-coded bullets into HTML unordered lists.
+    def replace_p_tags(match):
+        s = match.group().replace('</p>', '</li>')
+        for d in DOTS:
+            s = s.replace('<p>%s' % d, '<li>')
+        return '<ul>\n%s\n</ul>' % s
+    text = hard_coded_bullets_re.sub(replace_p_tags, text)
+    # Remove stuff like "<p>&nbsp;&nbsp;</p>", but only if it's at the bottom of the text.
+    text = trailing_empty_content_re.sub('', text)
+    return text
+
+# This pattern matches a character entity reference (a decimal numeric
+# references, a hexadecimal numeric reference, or a named reference).
+charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?')
+
+def decodeHtml(html):
+    """
+    >>> decodeHtml('me &amp; you and &#36;&#38;%')
+    u'me & you and $&%'
+    """
+    if type(html) != unicode:
+        html = unicode(html)[:]
+    if type(html) is unicode:
+        uchr = unichr
+    else:
+        uchr = lambda value: value > 255 and unichr(value) or chr(value)
+    def entitydecode(match, uchr=uchr):
+        entity = match.group(1)
+        if entity.startswith('#x'):
+            return uchr(int(entity[2:], 16))
+        elif entity.startswith('#'):
+            return uchr(int(entity[1:]))
+        elif entity in name2codepoint:
+            return uchr(name2codepoint[entity])
+        else:
+            return match.group(0)
+    return charrefpat.sub(entitydecode, html).replace(u'\xa0', ' ')
+
+def highlight(text, query, hlClass="hl"):
+    """
+    >>> highlight('me &amp; you and &#36;&#38;%', 'and')
+    'me &amp; you <span class="hl">and</span> &#36;&#38;%'
+    """
+    if query:
+        text = text.replace('<br />', '|')
+        query = re.escape(query).replace('\ ', '.')
+        m = re.compile("(%s)" % query, re.IGNORECASE).findall(text)
+        for i in m:
+            text = re.sub("(%s)" % re.escape(i).replace('\ ', '.'), '<span class="%s">\\1</span>' % hlClass, text)
+        text = text.replace('|', '<br />')
+    return text
+
--- a/oxlib/lang.py
+++ b/oxlib/lang.py
@ -0,0 +1,243 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+
+
+_iso639_languages = [
+    ("Unknown", "", "", "und"),
+    ("Afar", "", "aa", "aar"),
+    ("Abkhazian", "", "ab", "abk"),
+    ("Afrikaans", "", "af", "afr"),
+    ("Akan", "", "ak", "aka"),
+    ("Albanian", "", "sq", "sqi"),
+    ("Amharic", "", "am", "amh"),
+    ("Arabic", "", "ar", "ara"),
+    ("Aragonese", "", "an", "arg"),
+    ("Armenian", "", "hy", "hye"),
+    ("Assamese", "", "as", "asm"),
+    ("Avaric", "", "av", "ava"),
+    ("Avestan", "", "ae", "ave"),
+    ("Aymara", "", "ay", "aym"),
+    ("Azerbaijani", "", "az", "aze"),
+    ("Bashkir", "", "ba", "bak"),
+    ("Bambara", "", "bm", "bam"),
+    ("Basque", "", "eu", "eus"),
+    ("Belarusian", "", "be", "bel"),
+    ("Bengali", "", "bn", "ben"),
+    ("Bihari", "", "bh", "bih"),
+    ("Bislama", "", "bi", "bis"),
+    ("Bosnian", "", "bs", "bos"),
+    ("Breton", "", "br", "bre"),
+    ("Bulgarian", "", "bg", "bul"),
+    ("Burmese", "", "my", "mya"),
+    ("Catalan", "", "ca", "cat"),
+    ("Chamorro", "", "ch", "cha"),
+    ("Chechen", "", "ce", "che"),
+    ("Chinese", "", "zh", "zho"),
+    ("Church Slavic", "", "cu", "chu"),
+    ("Chuvash", "", "cv", "chv"),
+    ("Cornish", "", "kw", "cor"),
+    ("Corsican", "", "co", "cos"),
+    ("Cree", "", "cr", "cre"),
+    ("Czech", "", "cs", "ces"),
+    ("Danish", "Dansk", "da", "dan"),
+    ("Divehi", "", "dv", "div"),
+    ("Dutch", "Nederlands", "nl", "nld"),
+    ("Dzongkha", "", "dz", "dzo"),
+    ("English", "English", "en", "eng"),
+    ("Esperanto", "", "eo", "epo"),
+    ("Estonian", "", "et", "est"),
+    ("Ewe", "", "ee", "ewe"),
+    ("Faroese", "", "fo", "fao"),
+    ("Fijian", "", "fj", "fij"),
+    ("Finnish", "Suomi", "fi", "fin"),
+    ("French", "Francais", "fr", "fra"),
+    ("Western Frisian", "", "fy", "fry"),
+    ("Fulah", "", "ff", "ful"),
+    ("Georgian", "", "ka", "kat"),
+    ("German", "Deutsch", "de", "deu"),
+    ("Gaelic (Scots)", "", "gd", "gla"),
+    ("Irish", "", "ga", "gle"),
+    ("Galician", "", "gl", "glg"),
+    ("Manx", "", "gv", "glv"),
+    ("Greek, Modern", "", "el", "ell"),
+    ("Guarani", "", "gn", "grn"),
+    ("Gujarati", "", "gu", "guj"),
+    ("Haitian", "", "ht", "hat"),
+    ("Hausa", "", "ha", "hau"),
+    ("Hebrew", "", "he", "heb"),
+    ("Herero", "", "hz", "her"),
+    ("Hindi", "", "hi", "hin"),
+    ("Hiri Motu", "", "ho", "hmo"),
+    ("Hungarian", "Magyar", "hu", "hun"),
+    ("Igbo", "", "ig", "ibo"),
+    ("Icelandic", "Islenska", "is", "isl"),
+    ("Ido", "", "io", "ido"),
+    ("Sichuan Yi", "", "ii", "iii"),
+    ("Inuktitut", "", "iu", "iku"),
+    ("Interlingue", "", "ie", "ile"),
+    ("Interlingua", "", "ia", "ina"),
+    ("Indonesian", "", "id", "ind"),
+    ("Inupiaq", "", "ik", "ipk"),
+    ("Italian", "Italiano", "it", "ita"),
+    ("Javanese", "", "jv", "jav"),
+    ("Japanese", "", "ja", "jpn"),
+    ("Kalaallisut (Greenlandic)", "", "kl", "kal"),
+    ("Kannada", "", "kn", "kan"),
+    ("Kashmiri", "", "ks", "kas"),
+    ("Kanuri", "", "kr", "kau"),
+    ("Kazakh", "", "kk", "kaz"),
+    ("Central Khmer", "", "km", "khm"),
+    ("Kikuyu", "", "ki", "kik"),
+    ("Kinyarwanda", "", "rw", "kin"),
+    ("Kirghiz", "", "ky", "kir"),
+    ("Komi", "", "kv", "kom"),
+    ("Kongo", "", "kg", "kon"),
+    ("Korean", "", "ko", "kor"),
+    ("Kuanyama", "", "kj", "kua"),
+    ("Kurdish", "", "ku", "kur"),
+    ("Lao", "", "lo", "lao"),
+    ("Latin", "", "la", "lat"),
+    ("Latvian", "", "lv", "lav"),
+    ("Limburgan", "", "li", "lim"),
+    ("Lingala", "", "ln", "lin"),
+    ("Lithuanian", "", "lt", "lit"),
+    ("Luxembourgish", "", "lb", "ltz"),
+    ("Luba-Katanga", "", "lu", "lub"),
+    ("Ganda", "", "lg", "lug"),
+    ("Macedonian", "", "mk", "mkd"),
+    ("Marshallese", "", "mh", "mah"),
+    ("Malayalam", "", "ml", "mal"),
+    ("Maori", "", "mi", "mri"),
+    ("Marathi", "", "mr", "mar"),
+    ("Malay", "", "ms", "msa"),
+    ("Malagasy", "", "mg", "mlg"),
+    ("Maltese", "", "mt", "mlt"),
+    ("Moldavian", "", "mo", "mol"),
+    ("Mongolian", "", "mn", "mon"),
+    ("Nauru", "", "na", "nau"),
+    ("Navajo", "", "nv", "nav"),
+    ("Ndebele, South", "", "nr", "nbl"),
+    ("Ndebele, North", "", "nd", "nde"),
+    ("Ndonga", "", "ng", "ndo"),
+    ("Nepali", "", "ne", "nep"),
+    ("Norwegian Nynorsk", "", "nn", "nno"),
+    ("Norwegian Bokmål", "", "nb", "nob"),
+    ("Norwegian", "Norsk", "no", "nor"),
+    ("Chichewa; Nyanja", "", "ny", "nya"),
+    ("Occitan (post 1500); Provençal", "", "oc", "oci"),
+    ("Ojibwa", "", "oj", "oji"),
+    ("Oriya", "", "or", "ori"),
+    ("Oromo", "", "om", "orm"),
+    ("Ossetian; Ossetic", "", "os", "oss"),
+    ("Panjabi", "", "pa", "pan"),
+    ("Persian", "", "fa", "fas"),
+    ("Pali", "", "pi", "pli"),
+    ("Polish", "", "pl", "pol"),
+    ("Portuguese", "Portugues", "pt", "por"),
+    ("Pushto", "", "ps", "pus"),
+    ("Quechua", "", "qu", "que"),
+    ("Romansh", "", "rm", "roh"),
+    ("Romanian", "", "ro", "ron"),
+    ("Rundi", "", "rn", "run"),
+    ("Russian", "", "ru", "rus"),
+    ("Sango", "", "sg", "sag"),
+    ("Sanskrit", "", "sa", "san"),
+    ("Serbian", "", "sr", "srp"),
+    ("Croatian", "Hrvatski", "hr", "hrv"),
+    ("Sinhala", "", "si", "sin"),
+    ("Slovak", "", "sk", "slk"),
+    ("Slovenian", "", "sl", "slv"),
+    ("Northern Sami", "", "se", "sme"),
+    ("Samoan", "", "sm", "smo"),
+    ("Shona", "", "sn", "sna"),
+    ("Sindhi", "", "sd", "snd"),
+    ("Somali", "", "so", "som"),
+    ("Sotho, Southern", "", "st", "sot"),
+    ("Spanish", "Espanol", "es", "spa"),
+    ("Sardinian", "", "sc", "srd"),
+    ("Swati", "", "ss", "ssw"),
+    ("Sundanese", "", "su", "sun"),
+    ("Swahili", "", "sw", "swa"),
+    ("Swedish", "Svenska", "sv", "swe"),
+    ("Tahitian", "", "ty", "tah"),
+    ("Tamil", "", "ta", "tam"),
+    ("Tatar", "", "tt", "tat"),
+    ("Telugu", "", "te", "tel"),
+    ("Tajik", "", "tg", "tgk"),
+    ("Tagalog", "", "tl", "tgl"),
+    ("Thai", "", "th", "tha"),
+    ("Tibetan", "", "bo", "bod"),
+    ("Tigrinya", "", "ti", "tir"),
+    ("Tonga (Tonga Islands)", "", "to", "ton"),
+    ("Tswana", "", "tn", "tsn"),
+    ("Tsonga", "", "ts", "tso"),
+    ("Turkmen", "", "tk", "tuk"),
+    ("Turkish", "", "tr", "tur"),
+    ("Twi", "", "tw", "twi"),
+    ("Uighur", "", "ug", "uig"),
+    ("Ukrainian", "", "uk", "ukr"),
+    ("Urdu", "", "ur", "urd"),
+    ("Uzbek", "", "uz", "uzb"),
+    ("Venda", "", "ve", "ven"),
+    ("Vietnamese", "", "vi", "vie"),
+    ("Volapük", "", "vo", "vol"),
+    ("Welsh", "", "cy", "cym"),
+    ("Walloon", "", "wa", "wln"),
+    ("Wolof", "", "wo", "wol"),
+    ("Xhosa", "", "xh", "xho"),
+    ("Yiddish", "", "yi", "yid"),
+    ("Yoruba", "", "yo", "yor"),
+    ("Zhuang", "", "za", "zha"),
+    ("Zulu", "", "zu", "zul"),
+]
+
+def codeToLang(code):
+    code = code.lower()
+    if len(code) == 2:
+        for l in _iso639_languages:
+            if l[2] == code:
+                return l[0]
+    elif len(code) == 3:
+        for l in _iso639_languages:
+            if l[3] == code:
+                return l[0]
+    return None
+
+def langTo3Code(lang):
+    lang = englishName(lang)
+    if lang:
+        lang=lang.lower()
+    for l in _iso639_languages:
+        if l[0].lower() == lang:
+            return l[3]
+    return None
+
+def langTo2Code(lang):
+    lang = englishName(lang)
+    if lang:
+        lang=lang.lower()
+    for l in _iso639_languages:
+        if l[0].lower() == lang:
+            return l[2]
+    return None
+
+def langCode2To3(code):
+    langTo3Code(codeToLang(code))
+
+def langCode3To2(code):
+    langTo2Code(codeToLang(code))
+
+def englishName(lang):
+    lang = lang.lower()
+    for l in _iso639_languages:
+        if l[1].lower() == lang:
+            return l[0]
+    return None
+
+def languages2Letter():
+    languages = []
+    for l in _iso639_languages:
+        if l[2]:
+            languages.append(l[2])
+    return languages
+
--- a/oxlib/net.py
+++ b/oxlib/net.py
@ -0,0 +1,73 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import gzip
+import StringIO
+import urllib
+import urllib2
+
+from chardet.universaldetector import UniversalDetector
+
+
+# Default headers for HTTP requests.
+DEFAULT_HEADERS = {
+    'User-Agent': 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9) Gecko/2008061015 Firefox/3.0',
+    'Accept-Encoding': 'gzip'
+}
+
+def status(url, data=None, headers=DEFAULT_HEADERS):
+    try:
+        f = openUrl(url, data, headers)
+        s = f.code
+    except urllib2.HTTPError, e:
+        s = e.code
+    return s
+
+def exists(url, data=None, headers=DEFAULT_HEADERS):
+    s = status(url, data, headers)
+    if s >= 200 and s < 400:
+        return True
+    return False
+
+def getHeaders(url, data=None, headers=DEFAULT_HEADERS):
+    try:
+        f = openUrl(url, data, headers)
+        f.headers['Status'] = "%s" % f.code
+        headers = f.headers
+        f.close()
+    except urllib2.HTTPError, e:
+        e.headers['Status'] = "%s" % e.code
+        headers = e.headers
+    return dict(headers)
+
+def openUrl(url, data=None, headers=DEFAULT_HEADERS):
+    url = url.replace(' ', '%20')
+    req = urllib2.Request(url, data, headers)
+    return urllib2.urlopen(req)
+
+def getUrl(url, data=None, headers=DEFAULT_HEADERS, returnHeaders=False):
+    f = openUrl(url, data, headers)
+    data = f.read()
+    f.close()
+    if f.headers.get('content-encoding', None) == 'gzip':
+        data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read()
+    if returnHeaders:
+        f.headers['Status'] = "%s" % f.code
+        return dict(f.headers), data
+    return data
+
+def getUrlUnicode(url):
+    data = getUrl(url)
+    encoding = getEncoding(data)
+    if not encoding:
+        encoding = 'latin-1'
+    return unicode(data, encoding)
+
+def getEncoding(data):
+    detector = UniversalDetector()
+    for line in data.split('\n'):
+        detector.feed(line)
+        if detector.done:
+            break
+    detector.close()
+    return detector.result['encoding']
+
--- a/oxlib/normalize.py
+++ b/oxlib/normalize.py
@ -0,0 +1,161 @@
+# -*- Mode: Python; -*-
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import re
+
+_articles = ('the', 'la', 'a', 'die', 'der', 'le', 'el',
+             "l'", 'il', 'das', 'les', 'o', 'ein', 'i', 'un', 'los', 'de',
+             'an', 'una', 'las', 'eine', 'den', 'gli', 'het', 'os', 'lo',
+             'az', 'det', 'ha-', 'een', 'ang', 'oi', 'ta', 'al-', 'dem',
+             'mga', 'uno', "un'", 'ett', u'\xcf', 'eines', u'\xc7', 'els',
+             u'\xd4\xef', u'\xcf\xe9')
+
+# Articles in a dictionary.
+_articlesDict = dict([(x, x) for x in _articles])
+_spArticles = []
+for article in _articles:
+    if article[-1] not in ("'", '-'): article += ' '
+    _spArticles.append(article)
+
+def canonicalTitle(title):
+    """Return the title in the canonic format 'Movie Title, The'.
+    
+    >>> canonicalTitle('The Movie Title')
+    'Movie Title, The'
+    """
+    try:
+        if _articlesDict.has_key(title.split(', ')[-1].lower()): return title
+    except IndexError: pass
+    ltitle = title.lower()
+    for article in _spArticles:
+        if ltitle.startswith(article):
+            lart = len(article)
+            title = '%s, %s' % (title[lart:], title[:lart])
+            if article[-1] == ' ': title = title[:-1]
+            break
+    ## XXX: an attempt using a dictionary lookup.
+    ##for artSeparator in (' ', "'", '-'):
+    ##    article = _articlesDict.get(ltitle.split(artSeparator)[0])
+    ##    if article is not None:
+    ##        lart = len(article)
+    ##        # check titles like "una", "I'm Mad" and "L'abbacchio".
+    ##        if title[lart:] == '' or (artSeparator != ' ' and
+    ##                                title[lart:][1] != artSeparator): continue
+    ##        title = '%s, %s' % (title[lart:], title[:lart])
+    ##        if artSeparator == ' ': title = title[1:]
+    ##        break
+    return title
+
+def normalizeTitle(title):
+    """Return the title in the normal "The Title" format.
+
+    >>> normalizeTitle('Movie Title, The')
+    'The Movie Title'
+    """
+    stitle = title.split(', ')
+    if len(stitle) > 1 and _articlesDict.has_key(stitle[-1].lower()):
+        sep = ' '
+        if stitle[-1][-1] in ("'", '-'): sep = ''
+        title = '%s%s%s' % (stitle[-1], sep, ', '.join(stitle[:-1]))
+    return title
+
+def normalizeImdbId(imdbId):
+    """Return 7 digit imdbId.
+
+    >>> normalizeImdbId('http://www.imdb.com/title/tt0159206/')
+    '0159206'
+    >>> normalizeImdbId(159206)
+    '0159206'
+    >>> normalizeImdbId('tt0159206')
+    '0159206'
+    """
+    if isinstance(imdbId, basestring):
+        imdbId = re.sub('.*(\d{7}).*', '\\1', imdbId)
+    elif isinstance(imdbId, int):
+        imdbId = "%07d" % imdbId
+    return imdbId
+
+
+# Common suffixes in surnames.
+_sname_suffixes = ('de', 'la', 'der', 'den', 'del', 'y', 'da', 'van',
+                    'e', 'von', 'the', 'di', 'du', 'el', 'al')
+
+def canonicalName(name):
+    """Return the given name in canonical "Surname, Name" format.
+    It assumes that name is in the 'Name Surname' format.
+    
+    >>> canonicalName('Jean Luc Godard')
+    'Godard, Jean Luc'
+
+    >>> canonicalName('Ivan Ivanov-Vano')
+    'Ivanov-Vano, Ivan'
+
+    >>> canonicalName('Gus Van Sant')
+    'Van Sant, Gus'
+
+    >>> canonicalName('Brian De Palma')
+    'De Palma, Brian'
+    """
+
+    # XXX: some statistics (over 1852406 names):
+    #      - just a surname:                 51921
+    #      - single surname, single name:  1792759
+    #      - composed surname, composed name: 7726
+    #      - composed surname, single name:  55623
+    #        (2: 49259, 3: 5502, 4: 551)
+    #      - single surname, composed name: 186604
+    #        (2: 178315, 3: 6573, 4: 1219, 5: 352)
+    # Don't convert names already in the canonical format.
+    if name.find(', ') != -1: return name
+    sname = name.split(' ')
+    snl = len(sname)
+    if snl == 2:
+        # Just a name and a surname: how boring...
+        name = '%s, %s' % (sname[1], sname[0])
+    elif snl > 2:
+        lsname = [x.lower() for x in sname]
+        if snl == 3: _indexes = (0, snl-2)
+        else: _indexes = (0, snl-2, snl-3)
+        # Check for common surname prefixes at the beginning and near the end.
+        for index in _indexes:
+            if lsname[index] not in _sname_suffixes: continue
+            try:
+                # Build the surname.
+                surn = '%s %s' % (sname[index], sname[index+1])
+                del sname[index]
+                del sname[index]
+                try:
+                    # Handle the "Jr." after the name.
+                    if lsname[index+2].startswith('jr'):
+                        surn += ' %s' % sname[index]
+                        del sname[index]
+                except (IndexError, ValueError):
+                    pass
+                name = '%s, %s' % (surn, ' '.join(sname))
+                break
+            except ValueError:
+                continue
+        else:
+            name = '%s, %s' % (sname[-1], ' '.join(sname[:-1]))
+    return name
+
+def normalizeName(name):
+    """Return a name in the normal "Name Surname" format.
+    
+    >>> normalizeName('Godard, Jean Luc')
+    'Jean Luc Godard'
+
+    >>> normalizeName('Ivanov-Vano, Ivan')
+    'Ivan Ivanov-Vano'
+
+    >>> normalizeName('Van Sant, Gus')
+    'Gus Van Sant'
+
+    >>> normalizeName('De Palma, Brian')
+    'Brian De Palma'
+    """
+    sname = name.split(', ')
+    if len(sname) == 2:
+        name = '%s %s' % (sname[1], sname[0])
+    return name
+
--- a/oxlib/text.py
+++ b/oxlib/text.py
@ -0,0 +1,216 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+# GPL written 2008 by j@pad.ma
+import re
+
+
+def findRe(string, regexp):
+    result = re.compile(regexp, re.DOTALL).findall(string)
+    if result:
+        return result[0].strip()
+    return ''
+
+def findString(string, string0='', string1 = ''):
+    """Return the string between string0 and string1. 
+
+    If string0 or string1 is left out, begining or end of string is used.
+
+    >>> findString('i am not there', string1=' not there')
+    'i am'
+
+    >>> findString('i am not there', 'i am ', ' there')
+    'not'
+
+    >>> findString('i am not there', 'i am not t')
+    'here'
+
+    """
+    if string0:
+        string0 = re.escape(string0)
+    else:
+        string0 = '^'
+    if string1:
+        string1 = re.escape(string1)
+    else:
+        string1 = '$'
+    return findRe(string, string0 + '(.*?)' + string1)
+
+# Capitalizes the first letter of a string.
+capfirst = lambda x: x and x[0].upper() + x[1:]
+
+def removeSpecialCharacters(text):
+    """
+    Removes special characters inserted by Word.
+    """
+    text = text.replace(u'\u2013', '-')
+    text = text.replace(u'\u2026O', "'")
+    text = text.replace(u'\u2019', "'")
+    text = text.replace(u'', "'")
+    text = text.replace(u'', "'")
+    text = text.replace(u'', "-")
+    return text
+
+def wrap(text, width):
+    """
+    A word-wrap function that preserves existing line breaks and most spaces in
+    the text. Expects that existing line breaks are posix newlines (\n).
+    See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/148061
+    """
+    return reduce(lambda line, word, width=width: '%s%s%s' %
+                  (line,
+                    ' \n'[(len(line[line.rfind('\n')+1:])
+                          + len(word.split('\n',1)[0]
+                              ) >= width)],
+                    word),
+                  text.split(' ')
+                  )
+
+def truncateString(s, num):
+    """Truncates a string after a certain number of chacters, but ends with a word
+
+    >>> truncateString('Truncates a string after a certain number of chacters, but ends with a word', 23)
+    'Truncates a string...'
+    >>> truncateString('Truncates a string', 23)
+    'Truncates a string'
+
+    """
+    length = int(num)
+    if len(s) <= length:
+        return s
+    words = s.split()
+    ts = ""
+    while words and len(ts) + len(words[0]) < length:
+        ts += " " + words.pop(0)
+    if words:
+        ts += "..."
+    return ts.strip()
+
+def trimString(string, num):
+    """Truncates a string after a certain number of chacters, adding ... at -10 characters
+
+    >>> trimString('Truncates a string after a certain number of chacters', 23)
+    'Truncates ...f chacters'
+    >>> trimString('Truncates a string', 23)
+    'Truncates a string'
+    """
+    if len(string) > num:
+        string = string[:num - 13] + '...' + string[-10:]
+    return string
+
+def truncateWords(s, num):
+    "Truncates a string after a certain number of words."
+    length = int(num)
+    words = s.split()
+    if len(words) > length:
+        words = words[:length]
+        if not words[-1].endswith('...'):
+            words.append('...')
+    return ' '.join(words)
+
+def getValidFilename(s):
+    """
+    Returns the given string converted to a string that can be used for a clean
+    filename. Specifically, leading and trailing spaces are removed; 
+    all non-filename-safe characters are removed.
+
+    >>> getValidFilename("john's portrait in 2004.jpg")
+    'john_s_portrait_in_2004.jpg'
+    """
+    s = s.strip()
+    s = s.replace(' ', '_')
+    s = re.sub(r'[^-A-Za-z0-9_.\[\]\ ]', '_', s)
+    s = s.replace('__', '_').replace('__', '_')
+    return s
+
+def getTextList(list_, last_word='or'):
+    """
+    >>> getTextList(['a', 'b', 'c', 'd'])
+    'a, b, c or d'
+    >>> getTextList(['a', 'b', 'c'], 'and')
+    'a, b and c'
+    >>> getTextList(['a', 'b'], 'and')
+    'a and b'
+    >>> getTextList(['a'])
+    'a'
+    >>> getTextList([])
+    ''
+    """
+    if len(list_) == 0: return ''
+    if len(list_) == 1: return list_[0]
+    return '%s %s %s' % (', '.join([str(i) for i in list_][:-1]), last_word, list_[-1])
+
+def getListText(text, last_word='or'):
+    """
+    >>> getListText('a, b, c or d')
+    ['a', 'b', 'c', 'd']
+    >>> getListText('a, b and c', 'and')
+    ['a', 'b', 'c']
+    >>> getListText('a and b', 'and')
+    ['a', 'b']
+    >>> getListText('a')
+    ['a']
+    >>> getListText('')
+    []
+    """
+    list_ = []
+    if text:
+        list_ = text.split(', ')
+        if list_:
+            i=len(list_)-1
+            last = list_[i].split(last_word)
+            if len(last) == 2:
+                list_[i] = last[0].strip()
+                list_.append(last[1].strip())
+    return list_
+
+def normalizeNewlines(text):
+    return re.sub(r'\r\n|\r|\n', '\n', text)
+
+def recapitalize(text):
+    "Recapitalizes text, placing caps after end-of-sentence punctuation."
+    #capwords = ()
+    text = text.lower()
+    capsRE = re.compile(r'(?:^|(?<=[\.\?\!] ))([a-z])')
+    text = capsRE.sub(lambda x: x.group(1).upper(), text)
+    #for capword in capwords:
+    #    capwordRE = re.compile(r'\b%s\b' % capword, re.I)
+    #    text = capwordRE.sub(capword, text)
+    return text
+
+def phone2numeric(phone):
+    "Converts a phone number with letters into its numeric equivalent."
+    letters = re.compile(r'[A-PR-Y]', re.I)
+    char2number = lambda m: {'a': '2', 'c': '2', 'b': '2', 'e': '3',
+          'd': '3', 'g': '4', 'f': '3', 'i': '4', 'h': '4', 'k': '5',
+          'j': '5', 'm': '6', 'l': '5', 'o': '6', 'n': '6', 'p': '7',
+          's': '7', 'r': '7', 'u': '8', 't': '8', 'w': '9', 'v': '8',
+          'y': '9', 'x': '9'}.get(m.group(0).lower())
+    return letters.sub(char2number, phone)
+
+def compressString(s):
+    import cStringIO, gzip
+    zbuf = cStringIO.StringIO()
+    zfile = gzip.GzipFile(mode='wb', compresslevel=6, fileobj=zbuf)
+    zfile.write(s)
+    zfile.close()
+    return zbuf.getvalue()
+
+smart_split_re = re.compile('("(?:[^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'(?:[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'|[^\\s]+)')
+def smartSplit(text):
+    """
+    Generator that splits a string by spaces, leaving quoted phrases together.
+    Supports both single and double quotes, and supports escaping quotes with
+    backslashes. In the output, strings will keep their initial and trailing
+    quote marks.
+    >>> list(smartSplit('This is "a person\\'s" test.'))
+    ['This', 'is', '"a person\\'s"', 'test.']
+    """
+    for bit in smart_split_re.finditer(text):
+        bit = bit.group(0)
+        if bit[0] == '"':
+            yield '"' + bit[1:-1].replace('\\"', '"').replace('\\\\', '\\') + '"'
+        elif bit[0] == "'":
+            yield "'" + bit[1:-1].replace("\\'", "'").replace("\\\\", "\\") + "'"
+        else:
+            yield bit
+
--- a/oxlib/torrent.py
+++ b/oxlib/torrent.py
@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+# Written 2007 by j@mailb.org
+
+from threading import Event
+import sha
+from os import stat
+
+from BitTornado.BT1.makemetafile import make_meta_file
+from BitTornado.bencode import bencode, bdecode
+
+
+def createTorrent(file, url, params = {}, flag = Event(),
+                   progress = lambda x: None, progress_percent = 1):
+    "Creates a torrent for a given file, using url as tracker url"
+    return make_meta_file(file, url, params, flag, progress, progress_percent)
+
+def getInfoHash(torrentFile):
+    "Returns Torrent Info Hash from torrent file"
+    metainfo_file = open(torrentFile, 'rb')
+    metainfo = bdecode(metainfo_file.read())
+    info = metainfo['info']
+    return sha.sha(bencode(info)).hexdigest().upper()
+
+def getTorrentInfoFromFile(torrentFile):
+    f = open(torrentFile, 'rb')
+    data = f.read()
+    f.close()
+    tinfo = getTorrentInfo(data)
+    tinfo['timestamp'] = stat(torrentFile).st_ctime
+    return tinfo
+
+def getTorrentInfo(data):
+    "Returns Torrent Info from torrent file"
+    tinfo = {}
+    metainfo = bdecode(data)
+    info = metainfo['info']
+    piece_length = info['piece length']
+    if info.has_key('length'):
+        # let's assume we just have one file
+        file_length = info['length']
+    else:
+        # let's assume we have a directory structure
+        file_length = 0;
+        for f in info['files']:
+            file_length += f['length']
+    for key in info:
+        if key != 'pieces':
+            tinfo[key] = info[key]
+    for key in metainfo:
+        if key != 'info':
+            tinfo[key] = metainfo[key]
+    tinfo['size'] = file_length
+    tinfo['hash'] = sha.sha(bencode(info)).hexdigest()
+    tinfo['announce'] = metainfo['announce']
+    return tinfo
+
+def getTorrentSize(torrentFile):
+    "Returns Size of files in torrent file in bytes"
+    return getTorrentInfo(torrentFile)['size']
+