vi:si:et:sw=4:sts=4:ts=4

2008-06-19 11:21:21 +02:00 · 2008-06-19 11:21:21 +02:00 · 4a6e2702b4
commit 4a6e2702b4
parent dafe20aa04
11 changed files with 921 additions and 921 deletions
--- a/oxutils/init.py
+++ b/oxutils/init.py
@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# vi:si:et:sw=2:sts=2:ts=2
+# vi:si:et:sw=4:sts=4:ts=4
 # 2008
 from hashes import *
@ -11,7 +11,7 @@ import cache
 #only works if BitTornado is installed
 try:
-  from torrent import *
+    from torrent import *
 except:
-  pass
+    pass
--- a/oxutils/cache.py
+++ b/oxutils/cache.py
@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# vi:si:et:sw=2:sts=2:ts=2
+# vi:si:et:sw=4:sts=4:ts=4
 # 2008
 import gzip
 import StringIO
@ -19,99 +19,99 @@ from net import DEFAULT_HEADERS, getEncoding
 cache_timeout = 30*24*60*60 # default is 30 days
 def status(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
-  '''
+    '''
-    >>> status('http://google.com')
+      >>> status('http://google.com')
-    200
+      200
-    >>> status('http://google.com/mysearch')
+      >>> status('http://google.com/mysearch')
-    404
+      404
-  '''
+    '''
-  headers = getHeaders(url, data, headers)
+    headers = getHeaders(url, data, headers)
-  return int(headers['status'])
+    return int(headers['status'])
 def exists(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
-  '''
+    '''
-    >>> exists('http://google.com')
+      >>> exists('http://google.com')
-    True
+      True
-    >>> exists('http://google.com/mysearch')
+      >>> exists('http://google.com/mysearch')
-    False
+      False
-  '''
+    '''
-  s = status(url, data, headers, timeout)
+    s = status(url, data, headers, timeout)
-  if s >= 200 and s < 400:
+    if s >= 200 and s < 400:
-    return True
+        return True
-  return False
+    return False
 def getHeaders(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
-  url_cache_file = "%s.headers" % getUrlCacheFile(url, data, headers)
+    url_cache_file = "%s.headers" % getUrlCacheFile(url, data, headers)
-  url_headers = loadUrlCache(url_cache_file, timeout)
+    url_headers = loadUrlCache(url_cache_file, timeout)
-  if url_headers:
+    if url_headers:
-    url_headers = simplejson.loads(url_headers)
+        url_headers = simplejson.loads(url_headers)
-  else:
+    else:
-    url_headers = net.getHeaders(url, data, headers)
+        url_headers = net.getHeaders(url, data, headers)
-    saveUrlHeaders(url_cache_file, url_headers)
+        saveUrlHeaders(url_cache_file, url_headers)
-  return url_headers
+    return url_headers
 def getUrl(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
-  url_cache_file = getUrlCacheFile(url, data, headers)
+    url_cache_file = getUrlCacheFile(url, data, headers)
-  result = loadUrlCache(url_cache_file, timeout)
+    result = loadUrlCache(url_cache_file, timeout)
-  if not result:
+    if not result:
-    try:
+        try:
-      url_headers, result = net.getUrl(url, data, headers, returnHeaders=True)
+            url_headers, result = net.getUrl(url, data, headers, returnHeaders=True)
-    except urllib2.HTTPError, e:
+        except urllib2.HTTPError, e:
-      e.headers['Status'] = "%s" % e.code
+            e.headers['Status'] = "%s" % e.code
-      url_headers = dict(e.headers)
+            url_headers = dict(e.headers)
-      result = e.read()
+            result = e.read()
-      if url_headers.get('content-encoding', None) == 'gzip':
+            if url_headers.get('content-encoding', None) == 'gzip':
-        result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read()
+                result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read()
-    saveUrlCache(url_cache_file, result, url_headers)
+          saveUrlCache(url_cache_file, result, url_headers)
-  return result
+    return result
 def getUrlUnicode(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout, _getUrl=getUrl):
-  data = _getUrl(url, data, headers, timeout)
+    data = _getUrl(url, data, headers, timeout)
-  encoding = getEncoding(data)
+    encoding = getEncoding(data)
-  if not encoding:
+    if not encoding:
-    encoding = 'latin-1'
+        encoding = 'latin-1'
-  return unicode(data, encoding)
+    return unicode(data, encoding)
 def getCacheBase():
-  'cache base is eather ~/.ox/cache or can set via env variable oxCACHE'
+    'cache base is eather ~/.ox/cache or can set via env variable oxCACHE'
-  return os.environ.get('oxCACHE', os.path.expanduser('~/.ox/cache'))
+    return os.environ.get('oxCACHE', os.path.expanduser('~/.ox/cache'))
 def getUrlCacheFile(url, data=None, headers=DEFAULT_HEADERS):
-  if data:
+    if data:
-    url_hash = sha.sha(url + '?' + data).hexdigest()
+        url_hash = sha.sha(url + '?' + data).hexdigest()
-  else:
+    else:
-    url_hash = sha.sha(url).hexdigest()
+        url_hash = sha.sha(url).hexdigest()
-  domain = ".".join(urlparse.urlparse(url)[1].split('.')[-2:])
+    domain = ".".join(urlparse.urlparse(url)[1].split('.')[-2:])
-  return os.path.join(getCacheBase(), domain, url_hash[:2], url_hash[2:4], url_hash[4:6], url_hash)
+    return os.path.join(getCacheBase(), domain, url_hash[:2], url_hash[2:4], url_hash[4:6], url_hash)
 def loadUrlCache(url_cache_file, timeout=cache_timeout):
-  if timeout == 0:
+    if timeout == 0:
        return None
    if os.path.exists(url_cache_file):
        ctime = os.stat(url_cache_file).st_ctime
        now = time.mktime(time.localtime())
        file_age = now-ctime
        if timeout < 0 or file_age < timeout:
            f = open(url_cache_file)
            data = f.read()
            f.close()
            return data
    return None
  if os.path.exists(url_cache_file):
    ctime = os.stat(url_cache_file).st_ctime
    now = time.mktime(time.localtime())
    file_age = now-ctime
    if timeout < 0 or file_age < timeout:
      f = open(url_cache_file)
      data = f.read()
      f.close()
      return data
  return None
 def saveUrlCache(url_cache_file, data, headers):
-  folder = os.path.dirname(url_cache_file)
+    folder = os.path.dirname(url_cache_file)
-  if not os.path.exists(folder):
+    if not os.path.exists(folder):
-    os.makedirs(folder)
+        os.makedirs(folder)
-  f = open(url_cache_file, 'w')
+    f = open(url_cache_file, 'w')
-  f.write(data)
+    f.write(data)
-  f.close()
+    f.close()
-  saveUrlHeaders("%s.headers" % url_cache_file, headers)
+    saveUrlHeaders("%s.headers" % url_cache_file, headers)
 def saveUrlHeaders(url_cache_file, headers):
-  folder = os.path.dirname(url_cache_file)
+    folder = os.path.dirname(url_cache_file)
-  if not os.path.exists(folder):
+    if not os.path.exists(folder):
-    os.makedirs(folder)
+        os.makedirs(folder)
-  f = open(url_cache_file, 'w')
+    f = open(url_cache_file, 'w')
-  f.write(simplejson.dumps(headers))
+    f.write(simplejson.dumps(headers))
-  f.close()
+    f.close()
--- a/oxutils/format.py
+++ b/oxutils/format.py
@ -1,208 +1,207 @@
 # -*- coding: utf-8 -*-
-# vi:si:et:sw=2:sts=2:ts=2
+# vi:si:et:sw=4:sts=4:ts=4
 import math
 import re
 def to36(q):
-  """
+    """
-  Converts an integer to base 36 (a useful scheme for human-sayable IDs).
+    Converts an integer to base 36 (a useful scheme for human-sayable IDs).
-  >>> to36(35)
+    >>> to36(35)
-  'z'
+    'z'
-  >>> to36(119292)
+    >>> to36(119292)
-  '2k1o'
+    '2k1o'
-  >>> int(to36(939387374), 36)
+    >>> int(to36(939387374), 36)
-  939387374
+    939387374
-  >>> to36(0)
+    >>> to36(0)
-  '0'
+    '0'
-  >>> to36(-393)
+    >>> to36(-393)
-  Traceback (most recent call last):
+    Traceback (most recent call last):
-      ...
+        ...
-  ValueError: must supply a positive integer
+    ValueError: must supply a positive integer
-  """
+    """
-  if q < 0: raise ValueError, "must supply a positive integer"
+    if q < 0: raise ValueError, "must supply a positive integer"
-  letters = "0123456789abcdefghijklmnopqrstuvwxyz"
+    letters = "0123456789abcdefghijklmnopqrstuvwxyz"
-  converted = []
+    converted = []
-  while q != 0:
+    while q != 0:
-    q, r = divmod(q, 36)
+        q, r = divmod(q, 36)
-    converted.insert(0, letters[r])
+        converted.insert(0, letters[r])
-  return "".join(converted) or '0'
+    return "".join(converted) or '0'
 def from36(q):
-  return int(q, 36)
+    return int(q, 36)
 def intValue(strValue, default=''):
-  try:
+    try:
-    val = re.compile('(\d+)').findall(unicode(strValue).strip())[0]
+        val = re.compile('(\d+)').findall(unicode(strValue).strip())[0]
-  except:
+    except:
-    val = default
+        val = default
-  return val
+    return val
 def test_intValue():
-  assert intValue('abc23') == '23'
+    assert intValue('abc23') == '23'
-  assert intValue(' abc23') == '23'
+    assert intValue(' abc23') == '23'
-  assert intValue(' abc') == ''
+    assert intValue(' abc') == ''
 def floatValue(strValue, default=''):
-  try:
+    try:
-    val = re.compile('([\d.]+)').findall(unicode(strValue).strip())[0]
+        val = re.compile('([\d.]+)').findall(unicode(strValue).strip())[0]
-  except:
+    except:
-    val = default
+        val = default
-  return val
+    return val
 def test_floatValue():
-  print "floatValue"
+    assert floatValue('abc23.4') == '23.4'
-  assert floatValue('abc23.4') == '23.4'
+    assert floatValue(' abc23.4') == '23.4'
-  assert floatValue(' abc23.4') == '23.4'
+    assert floatValue(' abc') == ''
  assert floatValue(' abc') == ''
 def formatNumber(number, longName, shortName):
-  """
+    """
-  Return the number in a human-readable format (23 KB, 23.4 MB, 23.42 GB)
+    Return the number in a human-readable format (23 KB, 23.4 MB, 23.42 GB)
-  
+    
-  >>> formatNumber(123, 'Byte', 'B')
+    >>> formatNumber(123, 'Byte', 'B')
-  '123 Bytes'
+    '123 Bytes'
-  >>> formatNumber(1234, 'Byte', 'B')
+    >>> formatNumber(1234, 'Byte', 'B')
-  '1 KB'
+    '1 KB'
-  >>> formatNumber(1234567, 'Byte', 'B')
+    >>> formatNumber(1234567, 'Byte', 'B')
-  '1.2 MB'
+    '1.2 MB'
-  >>> formatNumber(1234567890, 'Byte', 'B')
+    >>> formatNumber(1234567890, 'Byte', 'B')
-  '1.15 GB'
+    '1.15 GB'
-  >>> formatNumber(1234567890123456789, 'Byte', 'B')
+    >>> formatNumber(1234567890123456789, 'Byte', 'B')
-  '1,096.5166 PB'
+    '1,096.5166 PB'
-  """
+    """
-  if number < 1024:
+    if number < 1024:
-    return '%s %s%s' % (formatThousands(number), longName, number != 1 and 's' or '')
+        return '%s %s%s' % (formatThousands(number), longName, number != 1 and 's' or '')
-  prefix = ['K', 'M', 'G', 'T', 'P']
+    prefix = ['K', 'M', 'G', 'T', 'P']
-  for i in range(5):
+    for i in range(5):
-    if number < math.pow(1024, i + 2) or i == 4:
+        if number < math.pow(1024, i + 2) or i == 4:
-      n = number / math.pow(1024, i + 1)
+            n = number / math.pow(1024, i + 1)
-      return '%s %s%s' % (formatThousands('%.*f' % (i, n)), prefix[i], shortName)
+            return '%s %s%s' % (formatThousands('%.*f' % (i, n)), prefix[i], shortName)
 def formatThousands(number, separator = ','):
-  """
+    """
-  Return the number with separators (1,000,000)
+    Return the number with separators (1,000,000)
-  
+    
-  >>> formatThousands(1)
+    >>> formatThousands(1)
-  '1'
+    '1'
-  >>> formatThousands(1000)
+    >>> formatThousands(1000)
-  '1,000'
+    '1,000'
-  >>> formatThousands(1000000)
+    >>> formatThousands(1000000)
-  '1,000,000'
+    '1,000,000'
-  """
+    """
-  string = str(number).split('.')
+    string = str(number).split('.')
-  l = []
+    l = []
-  for i, character in enumerate(reversed(string[0])):
+    for i, character in enumerate(reversed(string[0])):
-    if i and (not (i % 3)):
+        if i and (not (i % 3)):
-      l.insert(0, separator)
+            l.insert(0, separator)
-    l.insert(0, character)
+        l.insert(0, character)
-  string[0] = ''.join(l)
+    string[0] = ''.join(l)
-  return '.'.join(string)
+    return '.'.join(string)
 def formatBits(number):
-  return formatNumber(number, 'bit', 'b')
+    return formatNumber(number, 'bit', 'b')
 def formatBytes(number):
-  return formatNumber(number, 'byte', 'B')
+    return formatNumber(number, 'byte', 'B')
 def formatPixels(number):
-  return formatNumber(number, 'pixel', 'px')
+    return formatNumber(number, 'pixel', 'px')
 def plural(amount, unit, plural='s'):
-  '''
+    '''
-  >>> plural(1, 'unit')
+    >>> plural(1, 'unit')
-  '1 unit'
+    '1 unit'
-  >>> plural(2, 'unit')
+    >>> plural(2, 'unit')
-  '2 units'
+    '2 units'
-  '''
+    '''
-  if abs(amount) != 1:
+    if abs(amount) != 1:
-    if plural == 's':
+        if plural == 's':
-      unit = unit + plural
+            unit = unit + plural
-    else: unit = plural
+        else: unit = plural
-  return "%s %s" % (formatThousands(amount), unit)
+    return "%s %s" % (formatThousands(amount), unit)
 def ms2runtime(ms):
-  '''
+    '''
-  >>> ms2runtime(5000)
+    >>> ms2runtime(5000)
-  '5 seconds'
+    '5 seconds'
-  >>> ms2runtime(500000)
+    >>> ms2runtime(500000)
-  '8 minutes 20 seconds'
+    '8 minutes 20 seconds'
-  >>> ms2runtime(50000000)
+    >>> ms2runtime(50000000)
-  '13 hours 53 minutes 20 seconds'
+    '13 hours 53 minutes 20 seconds'
-  >>> ms2runtime(50000000-20000)
+    >>> ms2runtime(50000000-20000)
-  '13 hours 53 minutes'
+    '13 hours 53 minutes'
-  '''
+    '''
-  seconds = int(ms / 1000)
+    seconds = int(ms / 1000)
-  years = 0
+    years = 0
-  days = 0
+    days = 0
-  hours = 0
+    hours = 0
-  minutes = 0
+    minutes = 0
-  if seconds >= 60:
+    if seconds >= 60:
-    minutes = int(seconds / 60)
+        minutes = int(seconds / 60)
-    seconds = seconds % 60
+        seconds = seconds % 60
-  if minutes >= 60:
+    if minutes >= 60:
-    hours = int(minutes / 60)
+        hours = int(minutes / 60)
-    minutes = minutes % 60
+        minutes = minutes % 60
-  if hours >= 24:
+    if hours >= 24:
-    days = int(hours / 24)
+        days = int(hours / 24)
-    hours = hours % 24
+        hours = hours % 24
-  if days >= 365:
+    if days >= 365:
-    years = int(days / 365)
+        years = int(days / 365)
-    days = days % 365
+        days = days % 365
-  runtimeString = (plural(years, 'year'), plural(days, 'day'),
+    runtimeString = (plural(years, 'year'), plural(days, 'day'),
-    plural(hours,'hour'), plural(minutes, 'minute'), plural(seconds, 'second'))
+      plural(hours,'hour'), plural(minutes, 'minute'), plural(seconds, 'second'))
-  runtimeString = filter(lambda x: not x.startswith('0'), runtimeString)
+    runtimeString = filter(lambda x: not x.startswith('0'), runtimeString)
-  return " ".join(runtimeString).strip()
+    return " ".join(runtimeString).strip()
 def ms2playtime(ms):
-  '''
+    '''
-  >>> ms2playtime(5000)
+    >>> ms2playtime(5000)
-  '00:05'
+    '00:05'
-  >>> ms2playtime(500000)
+    >>> ms2playtime(500000)
-  '08:20'
+    '08:20'
-  >>> ms2playtime(50000000)
+    >>> ms2playtime(50000000)
-  '13:53:20'
+    '13:53:20'
-  '''
+    '''
-  it = int(ms / 1000)
+    it = int(ms / 1000)
-  ms = ms - it*1000
+    ms = ms - it*1000
-  ss = it % 60
+    ss = it % 60
-  mm = ((it-ss)/60) % 60
+    mm = ((it-ss)/60) % 60
-  hh = ((it-(mm*60)-ss)/3600) % 60
+    hh = ((it-(mm*60)-ss)/3600) % 60
-  if hh:
+    if hh:
-    playtime= "%02d:%02d:%02d" % (hh, mm, ss)
+        playtime= "%02d:%02d:%02d" % (hh, mm, ss)
-  else:
+    else:
-    playtime= "%02d:%02d" % (mm, ss)
+        playtime= "%02d:%02d" % (mm, ss)
-  return playtime
+    return playtime
 def ms2time(ms):
-  '''
+    '''
-  >>> ms2time(44592123)
+    >>> ms2time(44592123)
-  '12:23:12.123'
+    '12:23:12.123'
-  '''
+    '''
-  it = int(ms / 1000)
+    it = int(ms / 1000)
-  ms = ms - it*1000
+    ms = ms - it*1000
-  ss = it % 60
+    ss = it % 60
-  mm = ((it-ss)/60) % 60
+    mm = ((it-ss)/60) % 60
-  hh = ((it-(mm*60)-ss)/3600) % 60
+    hh = ((it-(mm*60)-ss)/3600) % 60
-  return "%d:%02d:%02d.%03d" % (hh, mm, ss, ms)
+    return "%d:%02d:%02d.%03d" % (hh, mm, ss, ms)
 def time2ms(timeString):
-  '''
+    '''
-  >>> time2ms('12:23:12.123')
+    >>> time2ms('12:23:12.123')
-  44592123
+    44592123
-  '''
+    '''
-  ms = 0.0
+    ms = 0.0
-  p = timeString.split(':')
+    p = timeString.split(':')
-  for i in range(len(p)):
+    for i in range(len(p)):
-    ms = ms * 60 + float(p[i])
+        ms = ms * 60 + float(p[i])
-  return int(ms * 1000)
+    return int(ms * 1000)
 def shiftTime(offset, timeString):
-  newTime = time2ms(timeString) + offset
+    newTime = time2ms(timeString) + offset
-  return ms2time(newTime)
+    return ms2time(newTime)
--- a/oxutils/hashes.py
+++ b/oxutils/hashes.py
@ -1,17 +1,17 @@
 # -*- coding: utf-8 -*-
-# vi:si:et:sw=2:sts=2:ts=2
+# vi:si:et:sw=4:sts=4:ts=4
 # GPL written 2008 by j@pad.ma
 import sha
 import os
 def sha1sum(filename):
-  sha1 = sha.new()
+    sha1 = sha.new()
-  file=open(filename)
+    file=open(filename)
  buffer=file.read(4096)
  while buffer:
    sha1.update(buffer)
    buffer=file.read(4096)
-  file.close()
+    while buffer:
-  return sha1.hexdigest()
+        sha1.update(buffer)
        buffer=file.read(4096)
    file.close()
    return sha1.hexdigest()
--- a/oxutils/html.py
+++ b/oxutils/html.py
@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# vi:si:et:sw=2:sts=2:ts=2
+# vi:si:et:sw=4:sts=4:ts=4
 # GPL written 2008 by j@pad.ma
 import re
 import string
@ -26,147 +26,147 @@ trailing_empty_content_re = re.compile(r'(?:<p>(?:&nbsp;|\s|<br \/>)*?</p>\s*)+\
 del x # Temporary variable
 def escape(html):
-  '''
+    '''
-  Returns the given HTML with ampersands, quotes and carets encoded
+    Returns the given HTML with ampersands, quotes and carets encoded
-  >>> escape('html "test" & <brothers>')
+    >>> escape('html "test" & <brothers>')
-  'html &quot;test&quot; &amp; &lt;brothers&gt;'
+    'html &quot;test&quot; &amp; &lt;brothers&gt;'
-  '''
+    '''
-  if not isinstance(html, basestring):
+    if not isinstance(html, basestring):
-      html = str(html)
+          html = str(html)
-  return html.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;').replace("'", '&#39;')
+    return html.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;').replace("'", '&#39;')
 def linebreaks(value):
-  '''
+    '''
-  Converts newlines into <p> and <br />
+    Converts newlines into <p> and <br />
-  '''
+    '''
-  value = re.sub(r'\r\n|\r|\n', '\n', value) # normalize newlines
+    value = re.sub(r'\r\n|\r|\n', '\n', value) # normalize newlines
-  paras = re.split('\n{2,}', value)
+    paras = re.split('\n{2,}', value)
-  paras = ['<p>%s</p>' % p.strip().replace('\n', '<br />') for p in paras]
+    paras = ['<p>%s</p>' % p.strip().replace('\n', '<br />') for p in paras]
-  return '\n\n'.join(paras)
+    return '\n\n'.join(paras)
 def stripTags(value):
-  """
+    """
-  Returns the given HTML with all tags stripped
+    Returns the given HTML with all tags stripped
-  
+    
-  >>> stripTags('some <h2>title</h2> <script>asdfasdf</script>')
+    >>> stripTags('some <h2>title</h2> <script>asdfasdf</script>')
-  'some title asdfasdf'
+    'some title asdfasdf'
-  """
+    """
-  return re.sub(r'<[^>]*?>', '', value)
+    return re.sub(r'<[^>]*?>', '', value)
 def stripSpacesBetweenTags(value):
-  "Returns the given HTML with spaces between tags normalized to a single space"
+    "Returns the given HTML with spaces between tags normalized to a single space"
-  return re.sub(r'>\s+<', '> <', value)
+    return re.sub(r'>\s+<', '> <', value)
 def stripEntities(value):
-  "Returns the given HTML with all entities (&something;) stripped"
+    "Returns the given HTML with all entities (&something;) stripped"
-  return re.sub(r'&(?:\w+|#\d);', '', value)
+    return re.sub(r'&(?:\w+|#\d);', '', value)
 def fixAmpersands(value):
-  "Returns the given HTML with all unencoded ampersands encoded correctly"
+    "Returns the given HTML with all unencoded ampersands encoded correctly"
-  return unencoded_ampersands_re.sub('&amp;', value)
+    return unencoded_ampersands_re.sub('&amp;', value)
 def urlize(text, trim_url_limit=None, nofollow=False):
-  """
+    """
-  Converts any URLs in text into clickable links. Works on http://, https:// and
+    Converts any URLs in text into clickable links. Works on http://, https:// and
-  www. links. Links can have trailing punctuation (periods, commas, close-parens)
+    www. links. Links can have trailing punctuation (periods, commas, close-parens)
-  and leading punctuation (opening parens) and it'll still do the right thing.
+    and leading punctuation (opening parens) and it'll still do the right thing.
-  If trim_url_limit is not None, the URLs in link text will be limited to
+    If trim_url_limit is not None, the URLs in link text will be limited to
-  trim_url_limit characters.
+    trim_url_limit characters.
-  If nofollow is True, the URLs in link text will get a rel="nofollow" attribute.
+    If nofollow is True, the URLs in link text will get a rel="nofollow" attribute.
-  """
+    """
-  trim_url = lambda x, limit=trim_url_limit: limit is not None and (x[:limit] + (len(x) >=limit and '...' or ''))  or x
+    trim_url = lambda x, limit=trim_url_limit: limit is not None and (x[:limit] + (len(x) >=limit and '...' or ''))  or x
-  words = word_split_re.split(text)
+    words = word_split_re.split(text)
-  nofollow_attr = nofollow and ' rel="nofollow"' or ''
+    nofollow_attr = nofollow and ' rel="nofollow"' or ''
-  for i, word in enumerate(words):
+    for i, word in enumerate(words):
-    match = punctuation_re.match(word)
+        match = punctuation_re.match(word)
-    if match:
+        if match:
-      lead, middle, trail = match.groups()
+            lead, middle, trail = match.groups()
-      if middle.startswith('www.') or ('@' not in middle and not middle.startswith('http://') and \
+            if middle.startswith('www.') or ('@' not in middle and not middle.startswith('http://') and \
-              len(middle) > 0 and middle[0] in string.letters + string.digits and \
+                    len(middle) > 0 and middle[0] in string.letters + string.digits and \
-              (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))):
+                    (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))):
-        middle = '<a href="http://%s"%s>%s</a>' % (middle, nofollow_attr, trim_url(middle))
+                middle = '<a href="http://%s"%s>%s</a>' % (middle, nofollow_attr, trim_url(middle))
-      if middle.startswith('http://') or middle.startswith('https://'):
+            if middle.startswith('http://') or middle.startswith('https://'):
-        middle = '<a href="%s"%s>%s</a>' % (middle, nofollow_attr, trim_url(middle))
+                middle = '<a href="%s"%s>%s</a>' % (middle, nofollow_attr, trim_url(middle))
-      if '@' in middle and not middle.startswith('www.') and not ':' in middle \
+            if '@' in middle and not middle.startswith('www.') and not ':' in middle \
-        and simple_email_re.match(middle):
+              and simple_email_re.match(middle):
-        middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
+                middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
-      if lead + middle + trail != word:
+            if lead + middle + trail != word:
-        words[i] = lead + middle + trail
+                words[i] = lead + middle + trail
-  return ''.join(words)
+    return ''.join(words)
 def cleanHtml(text):
-  """
+    """
-  Cleans the given HTML. Specifically, it does the following:
+    Cleans the given HTML. Specifically, it does the following:
-      * Converts <b> and <i> to <strong> and <em>.
+        * Converts <b> and <i> to <strong> and <em>.
-      * Encodes all ampersands correctly.
+        * Encodes all ampersands correctly.
-      * Removes all "target" attributes from <a> tags.
+        * Removes all "target" attributes from <a> tags.
-      * Removes extraneous HTML, such as presentational tags that open and
+        * Removes extraneous HTML, such as presentational tags that open and
-        immediately close and <br clear="all">.
+          immediately close and <br clear="all">.
-      * Converts hard-coded bullets into HTML unordered lists.
+        * Converts hard-coded bullets into HTML unordered lists.
-      * Removes stuff like "<p>&nbsp;&nbsp;</p>", but only if it's at the
+        * Removes stuff like "<p>&nbsp;&nbsp;</p>", but only if it's at the
-        bottom of the text.
+          bottom of the text.
-  """
+    """
-  from text import normalizeNewlines
+    from text import normalizeNewlines
-  text = normalizeNewlines(text)
+    text = normalizeNewlines(text)
-  text = re.sub(r'<(/?)\s*b\s*>', '<\\1strong>', text)
+    text = re.sub(r'<(/?)\s*b\s*>', '<\\1strong>', text)
-  text = re.sub(r'<(/?)\s*i\s*>', '<\\1em>', text)
+    text = re.sub(r'<(/?)\s*i\s*>', '<\\1em>', text)
-  text = fixAmpersands(text)
+    text = fixAmpersands(text)
-  # Remove all target="" attributes from <a> tags.
+    # Remove all target="" attributes from <a> tags.
-  text = link_target_attribute_re.sub('\\1', text)
+    text = link_target_attribute_re.sub('\\1', text)
-  # Trim stupid HTML such as <br clear="all">.
+    # Trim stupid HTML such as <br clear="all">.
-  text = html_gunk_re.sub('', text)
+    text = html_gunk_re.sub('', text)
-  # Convert hard-coded bullets into HTML unordered lists.
+    # Convert hard-coded bullets into HTML unordered lists.
-  def replace_p_tags(match):
+    def replace_p_tags(match):
-    s = match.group().replace('</p>', '</li>')
+        s = match.group().replace('</p>', '</li>')
-    for d in DOTS:
+        for d in DOTS:
-      s = s.replace('<p>%s' % d, '<li>')
+            s = s.replace('<p>%s' % d, '<li>')
-    return '<ul>\n%s\n</ul>' % s
+        return '<ul>\n%s\n</ul>' % s
-  text = hard_coded_bullets_re.sub(replace_p_tags, text)
+    text = hard_coded_bullets_re.sub(replace_p_tags, text)
-  # Remove stuff like "<p>&nbsp;&nbsp;</p>", but only if it's at the bottom of the text.
+    # Remove stuff like "<p>&nbsp;&nbsp;</p>", but only if it's at the bottom of the text.
-  text = trailing_empty_content_re.sub('', text)
+    text = trailing_empty_content_re.sub('', text)
-  return text
+    return text
 # This pattern matches a character entity reference (a decimal numeric
 # references, a hexadecimal numeric reference, or a named reference).
 charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?')
 def decodeHtml(html):
-  """
+    """
-  >>> decodeHtml('me &amp; you and &#36;&#38;%')
+    >>> decodeHtml('me &amp; you and &#36;&#38;%')
-  u'me & you and $&%'
+    u'me & you and $&%'
-  """
+    """
-  if type(html) != unicode:
+    if type(html) != unicode:
-   html = unicode(html)[:]
+        html = unicode(html)[:]
-  if type(html) is unicode:
+    if type(html) is unicode:
-    uchr = unichr
+        uchr = unichr
  else:
    uchr = lambda value: value > 255 and unichr(value) or chr(value)
  def entitydecode(match, uchr=uchr):
    entity = match.group(1)
    if entity.startswith('#x'):
      return uchr(int(entity[2:], 16))
    elif entity.startswith('#'):
      return uchr(int(entity[1:]))
    elif entity in name2codepoint:
      return uchr(name2codepoint[entity])
    else:
-      return match.group(0)
+        uchr = lambda value: value > 255 and unichr(value) or chr(value)
-  return charrefpat.sub(entitydecode, html).replace(u'\xa0', ' ')
+    def entitydecode(match, uchr=uchr):
        entity = match.group(1)
        if entity.startswith('#x'):
            return uchr(int(entity[2:], 16))
        elif entity.startswith('#'):
            return uchr(int(entity[1:]))
        elif entity in name2codepoint:
            return uchr(name2codepoint[entity])
        else:
            return match.group(0)
    return charrefpat.sub(entitydecode, html).replace(u'\xa0', ' ')
 def highlight(text, query, hlClass="hl"):
-  """
+    """
-  >>> highlight('me &amp; you and &#36;&#38;%', 'and')
+    >>> highlight('me &amp; you and &#36;&#38;%', 'and')
-  'me &amp; you <span class="hl">and</span> &#36;&#38;%'
+    'me &amp; you <span class="hl">and</span> &#36;&#38;%'
-  """
+    """
-  if query:
+    if query:
-    text = text.replace('<br />', '|')
+        text = text.replace('<br />', '|')
-    query = re.escape(query).replace('\ ', '.')
+        query = re.escape(query).replace('\ ', '.')
-    m = re.compile("(%s)" % query, re.IGNORECASE).findall(text)
+        m = re.compile("(%s)" % query, re.IGNORECASE).findall(text)
-    for i in m:
+        for i in m:
-      text = re.sub("(%s)" % re.escape(i).replace('\ ', '.'), '<span class="%s">\\1</span>' % hlClass, text)
+            text = re.sub("(%s)" % re.escape(i).replace('\ ', '.'), '<span class="%s">\\1</span>' % hlClass, text)
-    text = text.replace('|', '<br />')
+        text = text.replace('|', '<br />')
-  return text
+    return text
--- a/oxutils/lang.py
+++ b/oxutils/lang.py
@ -1,236 +1,236 @@
 # -*- coding: utf-8 -*-
-# vi:si:et:sw=2:sts=2:ts=2
+# vi:si:et:sw=4:sts=4:ts=4
 _iso639_languages = [
-  ("Unknown", "", "", "und"),
+    ("Unknown", "", "", "und"),
-  ("Afar", "", "aa", "aar"),
+    ("Afar", "", "aa", "aar"),
-  ("Abkhazian", "", "ab", "abk"),
+    ("Abkhazian", "", "ab", "abk"),
-  ("Afrikaans", "", "af", "afr"),
+    ("Afrikaans", "", "af", "afr"),
-  ("Akan", "", "ak", "aka"),
+    ("Akan", "", "ak", "aka"),
-  ("Albanian", "", "sq", "sqi"),
+    ("Albanian", "", "sq", "sqi"),
-  ("Amharic", "", "am", "amh"),
+    ("Amharic", "", "am", "amh"),
-  ("Arabic", "", "ar", "ara"),
+    ("Arabic", "", "ar", "ara"),
-  ("Aragonese", "", "an", "arg"),
+    ("Aragonese", "", "an", "arg"),
-  ("Armenian", "", "hy", "hye"),
+    ("Armenian", "", "hy", "hye"),
-  ("Assamese", "", "as", "asm"),
+    ("Assamese", "", "as", "asm"),
-  ("Avaric", "", "av", "ava"),
+    ("Avaric", "", "av", "ava"),
-  ("Avestan", "", "ae", "ave"),
+    ("Avestan", "", "ae", "ave"),
-  ("Aymara", "", "ay", "aym"),
+    ("Aymara", "", "ay", "aym"),
-  ("Azerbaijani", "", "az", "aze"),
+    ("Azerbaijani", "", "az", "aze"),
-  ("Bashkir", "", "ba", "bak"),
+    ("Bashkir", "", "ba", "bak"),
-  ("Bambara", "", "bm", "bam"),
+    ("Bambara", "", "bm", "bam"),
-  ("Basque", "", "eu", "eus"),
+    ("Basque", "", "eu", "eus"),
-  ("Belarusian", "", "be", "bel"),
+    ("Belarusian", "", "be", "bel"),
-  ("Bengali", "", "bn", "ben"),
+    ("Bengali", "", "bn", "ben"),
-  ("Bihari", "", "bh", "bih"),
+    ("Bihari", "", "bh", "bih"),
-  ("Bislama", "", "bi", "bis"),
+    ("Bislama", "", "bi", "bis"),
-  ("Bosnian", "", "bs", "bos"),
+    ("Bosnian", "", "bs", "bos"),
-  ("Breton", "", "br", "bre"),
+    ("Breton", "", "br", "bre"),
-  ("Bulgarian", "", "bg", "bul"),
+    ("Bulgarian", "", "bg", "bul"),
-  ("Burmese", "", "my", "mya"),
+    ("Burmese", "", "my", "mya"),
-  ("Catalan", "", "ca", "cat"),
+    ("Catalan", "", "ca", "cat"),
-  ("Chamorro", "", "ch", "cha"),
+    ("Chamorro", "", "ch", "cha"),
-  ("Chechen", "", "ce", "che"),
+    ("Chechen", "", "ce", "che"),
-  ("Chinese", "", "zh", "zho"),
+    ("Chinese", "", "zh", "zho"),
-  ("Church Slavic", "", "cu", "chu"),
+    ("Church Slavic", "", "cu", "chu"),
-  ("Chuvash", "", "cv", "chv"),
+    ("Chuvash", "", "cv", "chv"),
-  ("Cornish", "", "kw", "cor"),
+    ("Cornish", "", "kw", "cor"),
-  ("Corsican", "", "co", "cos"),
+    ("Corsican", "", "co", "cos"),
-  ("Cree", "", "cr", "cre"),
+    ("Cree", "", "cr", "cre"),
-  ("Czech", "", "cs", "ces"),
+    ("Czech", "", "cs", "ces"),
-  ("Danish", "Dansk", "da", "dan"),
+    ("Danish", "Dansk", "da", "dan"),
-  ("Divehi", "", "dv", "div"),
+    ("Divehi", "", "dv", "div"),
-  ("Dutch", "Nederlands", "nl", "nld"),
+    ("Dutch", "Nederlands", "nl", "nld"),
-  ("Dzongkha", "", "dz", "dzo"),
+    ("Dzongkha", "", "dz", "dzo"),
-  ("English", "English", "en", "eng"),
+    ("English", "English", "en", "eng"),
-  ("Esperanto", "", "eo", "epo"),
+    ("Esperanto", "", "eo", "epo"),
-  ("Estonian", "", "et", "est"),
+    ("Estonian", "", "et", "est"),
-  ("Ewe", "", "ee", "ewe"),
+    ("Ewe", "", "ee", "ewe"),
-  ("Faroese", "", "fo", "fao"),
+    ("Faroese", "", "fo", "fao"),
-  ("Fijian", "", "fj", "fij"),
+    ("Fijian", "", "fj", "fij"),
-  ("Finnish", "Suomi", "fi", "fin"),
+    ("Finnish", "Suomi", "fi", "fin"),
-  ("French", "Francais", "fr", "fra"),
+    ("French", "Francais", "fr", "fra"),
-  ("Western Frisian", "", "fy", "fry"),
+    ("Western Frisian", "", "fy", "fry"),
-  ("Fulah", "", "ff", "ful"),
+    ("Fulah", "", "ff", "ful"),
-  ("Georgian", "", "ka", "kat"),
+    ("Georgian", "", "ka", "kat"),
-  ("German", "Deutsch", "de", "deu"),
+    ("German", "Deutsch", "de", "deu"),
-  ("Gaelic (Scots)", "", "gd", "gla"),
+    ("Gaelic (Scots)", "", "gd", "gla"),
-  ("Irish", "", "ga", "gle"),
+    ("Irish", "", "ga", "gle"),
-  ("Galician", "", "gl", "glg"),
+    ("Galician", "", "gl", "glg"),
-  ("Manx", "", "gv", "glv"),
+    ("Manx", "", "gv", "glv"),
-  ("Greek, Modern", "", "el", "ell"),
+    ("Greek, Modern", "", "el", "ell"),
-  ("Guarani", "", "gn", "grn"),
+    ("Guarani", "", "gn", "grn"),
-  ("Gujarati", "", "gu", "guj"),
+    ("Gujarati", "", "gu", "guj"),
-  ("Haitian", "", "ht", "hat"),
+    ("Haitian", "", "ht", "hat"),
-  ("Hausa", "", "ha", "hau"),
+    ("Hausa", "", "ha", "hau"),
-  ("Hebrew", "", "he", "heb"),
+    ("Hebrew", "", "he", "heb"),
-  ("Herero", "", "hz", "her"),
+    ("Herero", "", "hz", "her"),
-  ("Hindi", "", "hi", "hin"),
+    ("Hindi", "", "hi", "hin"),
-  ("Hiri Motu", "", "ho", "hmo"),
+    ("Hiri Motu", "", "ho", "hmo"),
-  ("Hungarian", "Magyar", "hu", "hun"),
+    ("Hungarian", "Magyar", "hu", "hun"),
-  ("Igbo", "", "ig", "ibo"),
+    ("Igbo", "", "ig", "ibo"),
-  ("Icelandic", "Islenska", "is", "isl"),
+    ("Icelandic", "Islenska", "is", "isl"),
-  ("Ido", "", "io", "ido"),
+    ("Ido", "", "io", "ido"),
-  ("Sichuan Yi", "", "ii", "iii"),
+    ("Sichuan Yi", "", "ii", "iii"),
-  ("Inuktitut", "", "iu", "iku"),
+    ("Inuktitut", "", "iu", "iku"),
-  ("Interlingue", "", "ie", "ile"),
+    ("Interlingue", "", "ie", "ile"),
-  ("Interlingua", "", "ia", "ina"),
+    ("Interlingua", "", "ia", "ina"),
-  ("Indonesian", "", "id", "ind"),
+    ("Indonesian", "", "id", "ind"),
-  ("Inupiaq", "", "ik", "ipk"),
+    ("Inupiaq", "", "ik", "ipk"),
-  ("Italian", "Italiano", "it", "ita"),
+    ("Italian", "Italiano", "it", "ita"),
-  ("Javanese", "", "jv", "jav"),
+    ("Javanese", "", "jv", "jav"),
-  ("Japanese", "", "ja", "jpn"),
+    ("Japanese", "", "ja", "jpn"),
-  ("Kalaallisut (Greenlandic)", "", "kl", "kal"),
+    ("Kalaallisut (Greenlandic)", "", "kl", "kal"),
-  ("Kannada", "", "kn", "kan"),
+    ("Kannada", "", "kn", "kan"),
-  ("Kashmiri", "", "ks", "kas"),
+    ("Kashmiri", "", "ks", "kas"),
-  ("Kanuri", "", "kr", "kau"),
+    ("Kanuri", "", "kr", "kau"),
-  ("Kazakh", "", "kk", "kaz"),
+    ("Kazakh", "", "kk", "kaz"),
-  ("Central Khmer", "", "km", "khm"),
+    ("Central Khmer", "", "km", "khm"),
-  ("Kikuyu", "", "ki", "kik"),
+    ("Kikuyu", "", "ki", "kik"),
-  ("Kinyarwanda", "", "rw", "kin"),
+    ("Kinyarwanda", "", "rw", "kin"),
-  ("Kirghiz", "", "ky", "kir"),
+    ("Kirghiz", "", "ky", "kir"),
-  ("Komi", "", "kv", "kom"),
+    ("Komi", "", "kv", "kom"),
-  ("Kongo", "", "kg", "kon"),
+    ("Kongo", "", "kg", "kon"),
-  ("Korean", "", "ko", "kor"),
+    ("Korean", "", "ko", "kor"),
-  ("Kuanyama", "", "kj", "kua"),
+    ("Kuanyama", "", "kj", "kua"),
-  ("Kurdish", "", "ku", "kur"),
+    ("Kurdish", "", "ku", "kur"),
-  ("Lao", "", "lo", "lao"),
+    ("Lao", "", "lo", "lao"),
-  ("Latin", "", "la", "lat"),
+    ("Latin", "", "la", "lat"),
-  ("Latvian", "", "lv", "lav"),
+    ("Latvian", "", "lv", "lav"),
-  ("Limburgan", "", "li", "lim"),
+    ("Limburgan", "", "li", "lim"),
-  ("Lingala", "", "ln", "lin"),
+    ("Lingala", "", "ln", "lin"),
-  ("Lithuanian", "", "lt", "lit"),
+    ("Lithuanian", "", "lt", "lit"),
-  ("Luxembourgish", "", "lb", "ltz"),
+    ("Luxembourgish", "", "lb", "ltz"),
-  ("Luba-Katanga", "", "lu", "lub"),
+    ("Luba-Katanga", "", "lu", "lub"),
-  ("Ganda", "", "lg", "lug"),
+    ("Ganda", "", "lg", "lug"),
-  ("Macedonian", "", "mk", "mkd"),
+    ("Macedonian", "", "mk", "mkd"),
-  ("Marshallese", "", "mh", "mah"),
+    ("Marshallese", "", "mh", "mah"),
-  ("Malayalam", "", "ml", "mal"),
+    ("Malayalam", "", "ml", "mal"),
-  ("Maori", "", "mi", "mri"),
+    ("Maori", "", "mi", "mri"),
-  ("Marathi", "", "mr", "mar"),
+    ("Marathi", "", "mr", "mar"),
-  ("Malay", "", "ms", "msa"),
+    ("Malay", "", "ms", "msa"),
-  ("Malagasy", "", "mg", "mlg"),
+    ("Malagasy", "", "mg", "mlg"),
-  ("Maltese", "", "mt", "mlt"),
+    ("Maltese", "", "mt", "mlt"),
-  ("Moldavian", "", "mo", "mol"),
+    ("Moldavian", "", "mo", "mol"),
-  ("Mongolian", "", "mn", "mon"),
+    ("Mongolian", "", "mn", "mon"),
-  ("Nauru", "", "na", "nau"),
+    ("Nauru", "", "na", "nau"),
-  ("Navajo", "", "nv", "nav"),
+    ("Navajo", "", "nv", "nav"),
-  ("Ndebele, South", "", "nr", "nbl"),
+    ("Ndebele, South", "", "nr", "nbl"),
-  ("Ndebele, North", "", "nd", "nde"),
+    ("Ndebele, North", "", "nd", "nde"),
-  ("Ndonga", "", "ng", "ndo"),
+    ("Ndonga", "", "ng", "ndo"),
-  ("Nepali", "", "ne", "nep"),
+    ("Nepali", "", "ne", "nep"),
-  ("Norwegian Nynorsk", "", "nn", "nno"),
+    ("Norwegian Nynorsk", "", "nn", "nno"),
-  ("Norwegian Bokmål", "", "nb", "nob"),
+    ("Norwegian Bokmål", "", "nb", "nob"),
-  ("Norwegian", "Norsk", "no", "nor"),
+    ("Norwegian", "Norsk", "no", "nor"),
-  ("Chichewa; Nyanja", "", "ny", "nya"),
+    ("Chichewa; Nyanja", "", "ny", "nya"),
-  ("Occitan (post 1500); Provençal", "", "oc", "oci"),
+    ("Occitan (post 1500); Provençal", "", "oc", "oci"),
-  ("Ojibwa", "", "oj", "oji"),
+    ("Ojibwa", "", "oj", "oji"),
-  ("Oriya", "", "or", "ori"),
+    ("Oriya", "", "or", "ori"),
-  ("Oromo", "", "om", "orm"),
+    ("Oromo", "", "om", "orm"),
-  ("Ossetian; Ossetic", "", "os", "oss"),
+    ("Ossetian; Ossetic", "", "os", "oss"),
-  ("Panjabi", "", "pa", "pan"),
+    ("Panjabi", "", "pa", "pan"),
-  ("Persian", "", "fa", "fas"),
+    ("Persian", "", "fa", "fas"),
-  ("Pali", "", "pi", "pli"),
+    ("Pali", "", "pi", "pli"),
-  ("Polish", "", "pl", "pol"),
+    ("Polish", "", "pl", "pol"),
-  ("Portuguese", "Portugues", "pt", "por"),
+    ("Portuguese", "Portugues", "pt", "por"),
-  ("Pushto", "", "ps", "pus"),
+    ("Pushto", "", "ps", "pus"),
-  ("Quechua", "", "qu", "que"),
+    ("Quechua", "", "qu", "que"),
-  ("Romansh", "", "rm", "roh"),
+    ("Romansh", "", "rm", "roh"),
-  ("Romanian", "", "ro", "ron"),
+    ("Romanian", "", "ro", "ron"),
-  ("Rundi", "", "rn", "run"),
+    ("Rundi", "", "rn", "run"),
-  ("Russian", "", "ru", "rus"),
+    ("Russian", "", "ru", "rus"),
-  ("Sango", "", "sg", "sag"),
+    ("Sango", "", "sg", "sag"),
-  ("Sanskrit", "", "sa", "san"),
+    ("Sanskrit", "", "sa", "san"),
-  ("Serbian", "", "sr", "srp"),
+    ("Serbian", "", "sr", "srp"),
-  ("Croatian", "Hrvatski", "hr", "hrv"),
+    ("Croatian", "Hrvatski", "hr", "hrv"),
-  ("Sinhala", "", "si", "sin"),
+    ("Sinhala", "", "si", "sin"),
-  ("Slovak", "", "sk", "slk"),
+    ("Slovak", "", "sk", "slk"),
-  ("Slovenian", "", "sl", "slv"),
+    ("Slovenian", "", "sl", "slv"),
-  ("Northern Sami", "", "se", "sme"),
+    ("Northern Sami", "", "se", "sme"),
-  ("Samoan", "", "sm", "smo"),
+    ("Samoan", "", "sm", "smo"),
-  ("Shona", "", "sn", "sna"),
+    ("Shona", "", "sn", "sna"),
-  ("Sindhi", "", "sd", "snd"),
+    ("Sindhi", "", "sd", "snd"),
-  ("Somali", "", "so", "som"),
+    ("Somali", "", "so", "som"),
-  ("Sotho, Southern", "", "st", "sot"),
+    ("Sotho, Southern", "", "st", "sot"),
-  ("Spanish", "Espanol", "es", "spa"),
+    ("Spanish", "Espanol", "es", "spa"),
-  ("Sardinian", "", "sc", "srd"),
+    ("Sardinian", "", "sc", "srd"),
-  ("Swati", "", "ss", "ssw"),
+    ("Swati", "", "ss", "ssw"),
-  ("Sundanese", "", "su", "sun"),
+    ("Sundanese", "", "su", "sun"),
-  ("Swahili", "", "sw", "swa"),
+    ("Swahili", "", "sw", "swa"),
-  ("Swedish", "Svenska", "sv", "swe"),
+    ("Swedish", "Svenska", "sv", "swe"),
-  ("Tahitian", "", "ty", "tah"),
+    ("Tahitian", "", "ty", "tah"),
-  ("Tamil", "", "ta", "tam"),
+    ("Tamil", "", "ta", "tam"),
-  ("Tatar", "", "tt", "tat"),
+    ("Tatar", "", "tt", "tat"),
-  ("Telugu", "", "te", "tel"),
+    ("Telugu", "", "te", "tel"),
-  ("Tajik", "", "tg", "tgk"),
+    ("Tajik", "", "tg", "tgk"),
-  ("Tagalog", "", "tl", "tgl"),
+    ("Tagalog", "", "tl", "tgl"),
-  ("Thai", "", "th", "tha"),
+    ("Thai", "", "th", "tha"),
-  ("Tibetan", "", "bo", "bod"),
+    ("Tibetan", "", "bo", "bod"),
-  ("Tigrinya", "", "ti", "tir"),
+    ("Tigrinya", "", "ti", "tir"),
-  ("Tonga (Tonga Islands)", "", "to", "ton"),
+    ("Tonga (Tonga Islands)", "", "to", "ton"),
-  ("Tswana", "", "tn", "tsn"),
+    ("Tswana", "", "tn", "tsn"),
-  ("Tsonga", "", "ts", "tso"),
+    ("Tsonga", "", "ts", "tso"),
-  ("Turkmen", "", "tk", "tuk"),
+    ("Turkmen", "", "tk", "tuk"),
-  ("Turkish", "", "tr", "tur"),
+    ("Turkish", "", "tr", "tur"),
-  ("Twi", "", "tw", "twi"),
+    ("Twi", "", "tw", "twi"),
-  ("Uighur", "", "ug", "uig"),
+    ("Uighur", "", "ug", "uig"),
-  ("Ukrainian", "", "uk", "ukr"),
+    ("Ukrainian", "", "uk", "ukr"),
-  ("Urdu", "", "ur", "urd"),
+    ("Urdu", "", "ur", "urd"),
-  ("Uzbek", "", "uz", "uzb"),
+    ("Uzbek", "", "uz", "uzb"),
-  ("Venda", "", "ve", "ven"),
+    ("Venda", "", "ve", "ven"),
-  ("Vietnamese", "", "vi", "vie"),
+    ("Vietnamese", "", "vi", "vie"),
-  ("Volapük", "", "vo", "vol"),
+    ("Volapük", "", "vo", "vol"),
-  ("Welsh", "", "cy", "cym"),
+    ("Welsh", "", "cy", "cym"),
-  ("Walloon", "", "wa", "wln"),
+    ("Walloon", "", "wa", "wln"),
-  ("Wolof", "", "wo", "wol"),
+    ("Wolof", "", "wo", "wol"),
-  ("Xhosa", "", "xh", "xho"),
+    ("Xhosa", "", "xh", "xho"),
-  ("Yiddish", "", "yi", "yid"),
+    ("Yiddish", "", "yi", "yid"),
-  ("Yoruba", "", "yo", "yor"),
+    ("Yoruba", "", "yo", "yor"),
-  ("Zhuang", "", "za", "zha"),
+    ("Zhuang", "", "za", "zha"),
-  ("Zulu", "", "zu", "zul"),
+    ("Zulu", "", "zu", "zul"),
 ]
 def codeToLang(code):
-  code = code.lower()
+    code = code.lower()
-  if len(code) == 2:
+    if len(code) == 2:
-    for l in _iso639_languages:
+        for l in _iso639_languages:
-      if l[2] == code:
+            if l[2] == code:
-        return l[0]
+                return l[0]
-  elif len(code) == 3:
+    elif len(code) == 3:
-    for l in _iso639_languages:
+        for l in _iso639_languages:
-      if l[3] == code:
+            if l[3] == code:
-        return l[0]
+                return l[0]
-  return None
+    return None
 def langTo3Code(lang):
-  lang = englishName(lang)
+    lang = englishName(lang)
-  if lang:
+    if lang:
-    lang=lang.lower()
+        lang=lang.lower()
-  for l in _iso639_languages:
+    for l in _iso639_languages:
-    if l[0].lower() == lang:
+        if l[0].lower() == lang:
-      return l[3]
+            return l[3]
-  return None
+    return None
 def langTo2Code(lang):
-  lang = englishName(lang)
+    lang = englishName(lang)
-  if lang:
+    if lang:
-    lang=lang.lower()
+        lang=lang.lower()
-  for l in _iso639_languages:
+    for l in _iso639_languages:
-    if l[0].lower() == lang:
+        if l[0].lower() == lang:
-      return l[2]
+            return l[2]
-  return None
+    return None
 def langCode2To3(code):
-  langTo3Code(codeToLang(code))
+    langTo3Code(codeToLang(code))
 def langCode3To2(code):
-  langTo2Code(codeToLang(code))
+    langTo2Code(codeToLang(code))
 def englishName(lang):
-  lang = lang.lower()
+    lang = lang.lower()
-  for l in _iso639_languages:
+    for l in _iso639_languages:
-    if l[1].lower() == lang:
+        if l[1].lower() == lang:
-      return l[0]
+            return l[0]
-  return None
+    return None
--- a/oxutils/net.py
+++ b/oxutils/net.py
@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# vi:si:et:sw=2:sts=2:ts=2
+# vi:si:et:sw=4:sts=4:ts=4
 import gzip
 import StringIO
 import urllib
@ -10,64 +10,64 @@ from chardet.universaldetector import UniversalDetector
 # Default headers for HTTP requests.
 DEFAULT_HEADERS = {
-  'User-Agent': 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9) Gecko/2008061015 Firefox/3.0',
+    'User-Agent': 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9) Gecko/2008061015 Firefox/3.0',
-  'Accept-Encoding': 'gzip'
+    'Accept-Encoding': 'gzip'
 }
 def status(url, data=None, headers=DEFAULT_HEADERS):
-  try:
+    try:
-    f = openUrl(url, data, headers)
+        f = openUrl(url, data, headers)
-    s = f.code
+        s = f.code
-  except urllib2.HTTPError, e:
+    except urllib2.HTTPError, e:
-    s = e.code
+        s = e.code
-  return s
+    return s
 def exists(url, data=None, headers=DEFAULT_HEADERS):
-  s = status(url, data, headers)
+    s = status(url, data, headers)
-  if s >= 200 and s < 400:
+    if s >= 200 and s < 400:
-    return True
+        return True
-  return False
+    return False
 def getHeaders(url, data=None, headers=DEFAULT_HEADERS):
-  try:
+    try:
-    f = openUrl(url, data, headers)
+        f = openUrl(url, data, headers)
-    f.headers['Status'] = "%s" % f.code
+        f.headers['Status'] = "%s" % f.code
-    headers = f.headers
+        headers = f.headers
-    f.close()
+        f.close()
-  except urllib2.HTTPError, e:
+    except urllib2.HTTPError, e:
-    e.headers['Status'] = "%s" % e.code
+        e.headers['Status'] = "%s" % e.code
-    headers = e.headers
+        headers = e.headers
-  return dict(headers)
+    return dict(headers)
 def openUrl(url, data=None, headers=DEFAULT_HEADERS):
-  url = url.replace(' ', '%20')
+    url = url.replace(' ', '%20')
-  req = urllib2.Request(url, data, headers)
+    req = urllib2.Request(url, data, headers)
-  return urllib2.urlopen(req)
+    return urllib2.urlopen(req)
 def getUrl(url, data=None, headers=DEFAULT_HEADERS, returnHeaders=False):
-  f = openUrl(url, data, headers)
+    f = openUrl(url, data, headers)
-  data = f.read()
+    data = f.read()
-  f.close()
+    f.close()
-  if f.headers.get('content-encoding', None) == 'gzip':
+    if f.headers.get('content-encoding', None) == 'gzip':
-    data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read()
+        data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read()
-  if returnHeaders:
+    if returnHeaders:
-    f.headers['Status'] = "%s" % f.code
+        f.headers['Status'] = "%s" % f.code
-    return dict(f.headers), data
+        return dict(f.headers), data
-  return data
+    return data
 def getUrlUnicode(url):
-  data = getUrl(url)
+    data = getUrl(url)
-  encoding = getEncoding(data)
+    encoding = getEncoding(data)
-  if not encoding:
+    if not encoding:
-    encoding = 'latin-1'
+        encoding = 'latin-1'
-  return unicode(data, encoding)
+    return unicode(data, encoding)
 def getEncoding(data):
-  detector = UniversalDetector()
+    detector = UniversalDetector()
-  for line in data.split('\n'):
+    for line in data.split('\n'):
-    detector.feed(line)
+        detector.feed(line)
-    if detector.done:
+        if detector.done:
-      break
+            break
-  detector.close()
+    detector.close()
-  return detector.result['encoding']
+    return detector.result['encoding']
--- a/oxutils/normalize.py
+++ b/oxutils/normalize.py
@ -1,79 +1,79 @@
 # -*- Mode: Python; -*-
 # -*- coding: utf-8 -*-
-# vi:si:et:sw=2:sts=2:ts=2
+# vi:si:et:sw=4:sts=4:ts=4
 import re
 _articles = ('the', 'la', 'a', 'die', 'der', 'le', 'el',
-            "l'", 'il', 'das', 'les', 'o', 'ein', 'i', 'un', 'los', 'de',
+             "l'", 'il', 'das', 'les', 'o', 'ein', 'i', 'un', 'los', 'de',
-            'an', 'una', 'las', 'eine', 'den', 'gli', 'het', 'os', 'lo',
+             'an', 'una', 'las', 'eine', 'den', 'gli', 'het', 'os', 'lo',
-            'az', 'det', 'ha-', 'een', 'ang', 'oi', 'ta', 'al-', 'dem',
+             'az', 'det', 'ha-', 'een', 'ang', 'oi', 'ta', 'al-', 'dem',
-            'mga', 'uno', "un'", 'ett', u'\xcf', 'eines', u'\xc7', 'els',
+             'mga', 'uno', "un'", 'ett', u'\xcf', 'eines', u'\xc7', 'els',
-            u'\xd4\xef', u'\xcf\xe9')
+             u'\xd4\xef', u'\xcf\xe9')
 # Articles in a dictionary.
 _articlesDict = dict([(x, x) for x in _articles])
 _spArticles = []
 for article in _articles:
-  if article[-1] not in ("'", '-'): article += ' '
+    if article[-1] not in ("'", '-'): article += ' '
-  _spArticles.append(article)
+    _spArticles.append(article)
 def canonicalTitle(title):
-  """Return the title in the canonic format 'Movie Title, The'.
+    """Return the title in the canonic format 'Movie Title, The'.
-  
+    
-  >>> canonicalTitle('The Movie Title')
+    >>> canonicalTitle('The Movie Title')
-  'Movie Title, The'
+    'Movie Title, The'
-  """
+    """
-  try:
+    try:
-      if _articlesDict.has_key(title.split(', ')[-1].lower()): return title
+        if _articlesDict.has_key(title.split(', ')[-1].lower()): return title
-  except IndexError: pass
+    except IndexError: pass
-  ltitle = title.lower()
+    ltitle = title.lower()
-  for article in _spArticles:
+    for article in _spArticles:
-      if ltitle.startswith(article):
+        if ltitle.startswith(article):
-          lart = len(article)
+            lart = len(article)
-          title = '%s, %s' % (title[lart:], title[:lart])
+            title = '%s, %s' % (title[lart:], title[:lart])
-          if article[-1] == ' ': title = title[:-1]
+            if article[-1] == ' ': title = title[:-1]
-          break
+            break
-  ## XXX: an attempt using a dictionary lookup.
+    ## XXX: an attempt using a dictionary lookup.
-  ##for artSeparator in (' ', "'", '-'):
+    ##for artSeparator in (' ', "'", '-'):
-  ##    article = _articlesDict.get(ltitle.split(artSeparator)[0])
+    ##    article = _articlesDict.get(ltitle.split(artSeparator)[0])
-  ##    if article is not None:
+    ##    if article is not None:
-  ##        lart = len(article)
+    ##        lart = len(article)
-  ##        # check titles like "una", "I'm Mad" and "L'abbacchio".
+    ##        # check titles like "una", "I'm Mad" and "L'abbacchio".
-  ##        if title[lart:] == '' or (artSeparator != ' ' and
+    ##        if title[lart:] == '' or (artSeparator != ' ' and
-  ##                                title[lart:][1] != artSeparator): continue
+    ##                                title[lart:][1] != artSeparator): continue
-  ##        title = '%s, %s' % (title[lart:], title[:lart])
+    ##        title = '%s, %s' % (title[lart:], title[:lart])
-  ##        if artSeparator == ' ': title = title[1:]
+    ##        if artSeparator == ' ': title = title[1:]
-  ##        break
+    ##        break
-  return title
+    return title
 def normalizeTitle(title):
-  """Return the title in the normal "The Title" format.
+    """Return the title in the normal "The Title" format.
-  >>> normalizeTitle('Movie Title, The')
+    >>> normalizeTitle('Movie Title, The')
-  'The Movie Title'
+    'The Movie Title'
-  """
+    """
-  stitle = title.split(', ')
+    stitle = title.split(', ')
-  if len(stitle) > 1 and _articlesDict.has_key(stitle[-1].lower()):
+    if len(stitle) > 1 and _articlesDict.has_key(stitle[-1].lower()):
-      sep = ' '
+        sep = ' '
-      if stitle[-1][-1] in ("'", '-'): sep = ''
+        if stitle[-1][-1] in ("'", '-'): sep = ''
-      title = '%s%s%s' % (stitle[-1], sep, ', '.join(stitle[:-1]))
+        title = '%s%s%s' % (stitle[-1], sep, ', '.join(stitle[:-1]))
-  return title
+    return title
 def normalizeImdbId(imdbId):
-  """Return 7 digit imdbId.
+    """Return 7 digit imdbId.
-  >>> normalizeImdbId('http://www.imdb.com/title/tt0159206/')
+    >>> normalizeImdbId('http://www.imdb.com/title/tt0159206/')
-  '0159206'
+    '0159206'
-  >>> normalizeImdbId(159206)
+    >>> normalizeImdbId(159206)
-  '0159206'
+    '0159206'
-  >>> normalizeImdbId('tt0159206')
+    >>> normalizeImdbId('tt0159206')
-  '0159206'
+    '0159206'
-  """
+    """
-  if isinstance(imdbId, basestring):
+    if isinstance(imdbId, basestring):
-    imdbId = re.sub('.*(\d{7}).*', '\\1', imdbId)
+        imdbId = re.sub('.*(\d{7}).*', '\\1', imdbId)
-  elif isinstance(imdbId, int):
+    elif isinstance(imdbId, int):
-    imdbId = "%07d" % imdbId
+        imdbId = "%07d" % imdbId
-  return imdbId
+    return imdbId
 # Common suffixes in surnames.
--- a/oxutils/text.py
+++ b/oxutils/text.py
@ -1,216 +1,216 @@
 # -*- coding: utf-8 -*-
-# vi:si:et:sw=2:sts=2:ts=2
+# vi:si:et:sw=4:sts=4:ts=4
 # GPL written 2008 by j@pad.ma
 import re
 def findRe(string, regexp):
-  result = re.compile(regexp, re.DOTALL).findall(string)
+    result = re.compile(regexp, re.DOTALL).findall(string)
-  if result:
+    if result:
-    return result[0].strip()
+        return result[0].strip()
-  return ''
+    return ''
 def findString(string, string0='', string1 = ''):
-  """Return the string between string0 and string1. 
+    """Return the string between string0 and string1. 
-  If string0 or string1 is left out, begining or end of string is used.
+    If string0 or string1 is left out, begining or end of string is used.
-  >>> findString('i am not there', string1=' not there')
+    >>> findString('i am not there', string1=' not there')
-  'i am'
+    'i am'
-  >>> findString('i am not there', 'i am ', ' there')
+    >>> findString('i am not there', 'i am ', ' there')
-  'not'
+    'not'
-  >>> findString('i am not there', 'i am not t')
+    >>> findString('i am not there', 'i am not t')
-  'here'
+    'here'
-  """
+    """
-  if string0:
+    if string0:
-    string0 = re.escape(string0)
+        string0 = re.escape(string0)
-  else:
+    else:
-    string0 = '^'
+        string0 = '^'
-  if string1:
+    if string1:
-    string1 = re.escape(string1)
+        string1 = re.escape(string1)
-  else:
+    else:
-    string1 = '$'
+        string1 = '$'
-  return findRegexp(string, string0 + '(.*?)' + string1)
+    return findRegexp(string, string0 + '(.*?)' + string1)
 # Capitalizes the first letter of a string.
 capfirst = lambda x: x and x[0].upper() + x[1:]
 def removeSpecialCharacters(text):
-  """
+    """
-  Removes special characters inserted by Word.
+    Removes special characters inserted by Word.
-  """
+    """
-  text = text.replace(u'\u2013', '-')
+    text = text.replace(u'\u2013', '-')
-  text = text.replace(u'\u2026O', "'")
+    text = text.replace(u'\u2026O', "'")
-  text = text.replace(u'\u2019', "'")
+    text = text.replace(u'\u2019', "'")
-  text = text.replace(u'', "'")
+    text = text.replace(u'', "'")
-  text = text.replace(u'', "'")
+    text = text.replace(u'', "'")
-  text = text.replace(u'', "-")
+    text = text.replace(u'', "-")
-  return text
+    return text
 def wrap(text, width):
-  """
+    """
-  A word-wrap function that preserves existing line breaks and most spaces in
+    A word-wrap function that preserves existing line breaks and most spaces in
-  the text. Expects that existing line breaks are posix newlines (\n).
+    the text. Expects that existing line breaks are posix newlines (\n).
-  See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/148061
+    See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/148061
-  """
+    """
-  return reduce(lambda line, word, width=width: '%s%s%s' %
+    return reduce(lambda line, word, width=width: '%s%s%s' %
-                (line,
+                  (line,
-                  ' \n'[(len(line[line.rfind('\n')+1:])
+                    ' \n'[(len(line[line.rfind('\n')+1:])
-                        + len(word.split('\n',1)[0]
+                          + len(word.split('\n',1)[0]
-                            ) >= width)],
+                              ) >= width)],
-                  word),
+                    word),
-                text.split(' ')
+                  text.split(' ')
-                )
+                  )
 def truncateString(s, num):
-  """Truncates a string after a certain number of chacters, but ends with a word
+    """Truncates a string after a certain number of chacters, but ends with a word
-  >>> truncateString('Truncates a string after a certain number of chacters, but ends with a word', 23)
+    >>> truncateString('Truncates a string after a certain number of chacters, but ends with a word', 23)
-  'Truncates a string...'
+    'Truncates a string...'
-  >>> truncateString('Truncates a string', 23)
+    >>> truncateString('Truncates a string', 23)
-  'Truncates a string'
+    'Truncates a string'
-  """
+    """
-  length = int(num)
+    length = int(num)
-  if len(s) <= length:
+    if len(s) <= length:
-    return s
+        return s
-  words = s.split()
+    words = s.split()
-  ts = ""
+    ts = ""
-  while words and len(ts) + len(words[0]) < length:
+    while words and len(ts) + len(words[0]) < length:
-    ts += " " + words.pop(0)
+        ts += " " + words.pop(0)
-  if words:
+    if words:
-    ts += "..."
+        ts += "..."
-  return ts.strip()
+    return ts.strip()
 def trimString(string, num):
-  """Truncates a string after a certain number of chacters, adding ... at -10 characters
+    """Truncates a string after a certain number of chacters, adding ... at -10 characters
-  >>> trimString('Truncates a string after a certain number of chacters', 23)
+    >>> trimString('Truncates a string after a certain number of chacters', 23)
-  'Truncates ...f chacters'
+    'Truncates ...f chacters'
-  >>> trimString('Truncates a string', 23)
+    >>> trimString('Truncates a string', 23)
-  'Truncates a string'
+    'Truncates a string'
-  """
+    """
-  if len(string) > num:
+    if len(string) > num:
-    string = string[:num - 13] + '...' + string[-10:]
+        string = string[:num - 13] + '...' + string[-10:]
-  return string
+    return string
 def truncateWords(s, num):
-  "Truncates a string after a certain number of words."
+    "Truncates a string after a certain number of words."
-  length = int(num)
+    length = int(num)
-  words = s.split()
+    words = s.split()
-  if len(words) > length:
+    if len(words) > length:
-    words = words[:length]
+        words = words[:length]
-    if not words[-1].endswith('...'):
+        if not words[-1].endswith('...'):
-      words.append('...')
+            words.append('...')
-  return ' '.join(words)
+    return ' '.join(words)
 def getValidFilename(s):
-  """
+    """
-  Returns the given string converted to a string that can be used for a clean
+    Returns the given string converted to a string that can be used for a clean
-  filename. Specifically, leading and trailing spaces are removed; 
+    filename. Specifically, leading and trailing spaces are removed; 
-  all non-filename-safe characters are removed.
+    all non-filename-safe characters are removed.
-  >>> getValidFilename("john's portrait in 2004.jpg")
+    >>> getValidFilename("john's portrait in 2004.jpg")
-  'john_s_portrait_in_2004.jpg'
+    'john_s_portrait_in_2004.jpg'
-  """
+    """
-  s = s.strip()
+    s = s.strip()
-  s = s.replace(' ', '_')
+    s = s.replace(' ', '_')
-  s = re.sub(r'[^-A-Za-z0-9_.\[\]\ ]', '_', s)
+    s = re.sub(r'[^-A-Za-z0-9_.\[\]\ ]', '_', s)
-  s = s.replace('__', '_').replace('__', '_')
+    s = s.replace('__', '_').replace('__', '_')
-  return s
+    return s
 def getTextList(list_, last_word='or'):
-  """
+    """
-  >>> getTextList(['a', 'b', 'c', 'd'])
+    >>> getTextList(['a', 'b', 'c', 'd'])
-  'a, b, c or d'
+    'a, b, c or d'
-  >>> getTextList(['a', 'b', 'c'], 'and')
+    >>> getTextList(['a', 'b', 'c'], 'and')
-  'a, b and c'
+    'a, b and c'
-  >>> getTextList(['a', 'b'], 'and')
+    >>> getTextList(['a', 'b'], 'and')
-  'a and b'
+    'a and b'
-  >>> getTextList(['a'])
+    >>> getTextList(['a'])
-  'a'
+    'a'
-  >>> getTextList([])
+    >>> getTextList([])
-  ''
+    ''
-  """
+    """
-  if len(list_) == 0: return ''
+    if len(list_) == 0: return ''
-  if len(list_) == 1: return list_[0]
+    if len(list_) == 1: return list_[0]
-  return '%s %s %s' % (', '.join([str(i) for i in list_][:-1]), last_word, list_[-1])
+    return '%s %s %s' % (', '.join([str(i) for i in list_][:-1]), last_word, list_[-1])
 def getListText(text, last_word='or'):
-  """
+    """
-  >>> getListText('a, b, c or d')
+    >>> getListText('a, b, c or d')
-  ['a', 'b', 'c', 'd']
+    ['a', 'b', 'c', 'd']
-  >>> getListText('a, b and c', 'and')
+    >>> getListText('a, b and c', 'and')
-  ['a', 'b', 'c']
+    ['a', 'b', 'c']
-  >>> getListText('a and b', 'and')
+    >>> getListText('a and b', 'and')
-  ['a', 'b']
+    ['a', 'b']
-  >>> getListText('a')
+    >>> getListText('a')
-  ['a']
+    ['a']
-  >>> getListText('')
+    >>> getListText('')
-  []
+    []
-  """
+    """
-  list_ = []
+    list_ = []
-  if text:
+    if text:
-    list_ = text.split(', ')
+        list_ = text.split(', ')
-    if list_:
+        if list_:
-      i=len(list_)-1
+            i=len(list_)-1
-      last = list_[i].split(last_word)
+            last = list_[i].split(last_word)
-      if len(last) == 2:
+            if len(last) == 2:
-        list_[i] = last[0].strip()
+                list_[i] = last[0].strip()
-        list_.append(last[1].strip())
+                list_.append(last[1].strip())
-  return list_
+    return list_
 def normalizeNewlines(text):
-  return re.sub(r'\r\n|\r|\n', '\n', text)
+    return re.sub(r'\r\n|\r|\n', '\n', text)
 def recapitalize(text):
-  "Recapitalizes text, placing caps after end-of-sentence punctuation."
+    "Recapitalizes text, placing caps after end-of-sentence punctuation."
-#  capwords = ()
+    #capwords = ()
-  text = text.lower()
+    text = text.lower()
-  capsRE = re.compile(r'(?:^|(?<=[\.\?\!] ))([a-z])')
+    capsRE = re.compile(r'(?:^|(?<=[\.\?\!] ))([a-z])')
-  text = capsRE.sub(lambda x: x.group(1).upper(), text)
+    text = capsRE.sub(lambda x: x.group(1).upper(), text)
-#  for capword in capwords:
+    #for capword in capwords:
-#    capwordRE = re.compile(r'\b%s\b' % capword, re.I)
+    #    capwordRE = re.compile(r'\b%s\b' % capword, re.I)
-#    text = capwordRE.sub(capword, text)
+    #    text = capwordRE.sub(capword, text)
-  return text
+    return text
 def phone2numeric(phone):
-  "Converts a phone number with letters into its numeric equivalent."
+    "Converts a phone number with letters into its numeric equivalent."
-  letters = re.compile(r'[A-PR-Y]', re.I)
+    letters = re.compile(r'[A-PR-Y]', re.I)
-  char2number = lambda m: {'a': '2', 'c': '2', 'b': '2', 'e': '3',
+    char2number = lambda m: {'a': '2', 'c': '2', 'b': '2', 'e': '3',
-        'd': '3', 'g': '4', 'f': '3', 'i': '4', 'h': '4', 'k': '5',
+          'd': '3', 'g': '4', 'f': '3', 'i': '4', 'h': '4', 'k': '5',
-        'j': '5', 'm': '6', 'l': '5', 'o': '6', 'n': '6', 'p': '7',
+          'j': '5', 'm': '6', 'l': '5', 'o': '6', 'n': '6', 'p': '7',
-        's': '7', 'r': '7', 'u': '8', 't': '8', 'w': '9', 'v': '8',
+          's': '7', 'r': '7', 'u': '8', 't': '8', 'w': '9', 'v': '8',
-        'y': '9', 'x': '9'}.get(m.group(0).lower())
+          'y': '9', 'x': '9'}.get(m.group(0).lower())
-  return letters.sub(char2number, phone)
+    return letters.sub(char2number, phone)
 def compressString(s):
-  import cStringIO, gzip
+    import cStringIO, gzip
-  zbuf = cStringIO.StringIO()
+    zbuf = cStringIO.StringIO()
-  zfile = gzip.GzipFile(mode='wb', compresslevel=6, fileobj=zbuf)
+    zfile = gzip.GzipFile(mode='wb', compresslevel=6, fileobj=zbuf)
-  zfile.write(s)
+    zfile.write(s)
-  zfile.close()
+    zfile.close()
-  return zbuf.getvalue()
+    return zbuf.getvalue()
 smart_split_re = re.compile('("(?:[^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'(?:[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'|[^\\s]+)')
 def smartSplit(text):
-  """
+    """
-  Generator that splits a string by spaces, leaving quoted phrases together.
+    Generator that splits a string by spaces, leaving quoted phrases together.
-  Supports both single and double quotes, and supports escaping quotes with
+    Supports both single and double quotes, and supports escaping quotes with
-  backslashes. In the output, strings will keep their initial and trailing
+    backslashes. In the output, strings will keep their initial and trailing
-  quote marks.
+    quote marks.
-  >>> list(smartSplit('This is "a person\\'s" test.'))
+    >>> list(smartSplit('This is "a person\\'s" test.'))
-  ['This', 'is', '"a person\\'s"', 'test.']
+    ['This', 'is', '"a person\\'s"', 'test.']
-  """
+    """
-  for bit in smart_split_re.finditer(text):
+    for bit in smart_split_re.finditer(text):
-      bit = bit.group(0)
+        bit = bit.group(0)
-      if bit[0] == '"':
+        if bit[0] == '"':
-          yield '"' + bit[1:-1].replace('\\"', '"').replace('\\\\', '\\') + '"'
+            yield '"' + bit[1:-1].replace('\\"', '"').replace('\\\\', '\\') + '"'
-      elif bit[0] == "'":
+        elif bit[0] == "'":
-          yield "'" + bit[1:-1].replace("\\'", "'").replace("\\\\", "\\") + "'"
+            yield "'" + bit[1:-1].replace("\\'", "'").replace("\\\\", "\\") + "'"
-      else:
+        else:
-          yield bit
+            yield bit
--- a/oxutils/torrent.py
+++ b/oxutils/torrent.py
@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 # Written 2007 by j@mailb.org
 from threading import Event
@ -11,50 +12,50 @@ from BitTornado.bencode import bencode, bdecode
 def createTorrent(file, url, params = {}, flag = Event(),
                   progress = lambda x: None, progress_percent = 1):
-  "Creates a torrent for a given file, using url as tracker url"
+    "Creates a torrent for a given file, using url as tracker url"
-  return make_meta_file(file, url, params, flag, progress, progress_percent)
+    return make_meta_file(file, url, params, flag, progress, progress_percent)
 def getInfoHash(torrentFile):
-  "Returns Torrent Info Hash from torrent file"
+    "Returns Torrent Info Hash from torrent file"
-  metainfo_file = open(torrentFile, 'rb')
+    metainfo_file = open(torrentFile, 'rb')
-  metainfo = bdecode(metainfo_file.read())
+    metainfo = bdecode(metainfo_file.read())
-  info = metainfo['info']
+    info = metainfo['info']
-  return sha.sha(bencode(info)).hexdigest().upper()
+    return sha.sha(bencode(info)).hexdigest().upper()
 def getTorrentInfoFromFile(torrentFile):
-  f = open(torrentFile, 'rb')
+    f = open(torrentFile, 'rb')
-  data = f.read()
+    data = f.read()
-  f.close()
+    f.close()
-  tinfo = getTorrentInfo(data)
+    tinfo = getTorrentInfo(data)
-  tinfo['timestamp'] = stat(torrentFile).st_ctime
+    tinfo['timestamp'] = stat(torrentFile).st_ctime
-  return tinfo
+    return tinfo
 def getTorrentInfo(data):
-  "Returns Torrent Info from torrent file"
+    "Returns Torrent Info from torrent file"
-  tinfo = {}
+    tinfo = {}
-  metainfo = bdecode(data)
+    metainfo = bdecode(data)
-  info = metainfo['info']
+    info = metainfo['info']
-  piece_length = info['piece length']
+    piece_length = info['piece length']
-  if info.has_key('length'):
+    if info.has_key('length'):
-    # let's assume we just have one file
+        # let's assume we just have one file
-    file_length = info['length']
+        file_length = info['length']
-  else:
+    else:
-    # let's assume we have a directory structure
+        # let's assume we have a directory structure
-    file_length = 0;
+        file_length = 0;
-    for f in info['files']:
+        for f in info['files']:
-      file_length += f['length']
+            file_length += f['length']
-  for key in info:
+    for key in info:
-    if key != 'pieces':
+        if key != 'pieces':
-      tinfo[key] = info[key]
+            tinfo[key] = info[key]
-  for key in metainfo:
+    for key in metainfo:
-    if key != 'info':
+        if key != 'info':
-      tinfo[key] = metainfo[key]
+            tinfo[key] = metainfo[key]
-  tinfo['size'] = file_length
+    tinfo['size'] = file_length
-  tinfo['hash'] = sha.sha(bencode(info)).hexdigest()
+    tinfo['hash'] = sha.sha(bencode(info)).hexdigest()
-  tinfo['announce'] = metainfo['announce']
+    tinfo['announce'] = metainfo['announce']
-  return tinfo
+    return tinfo
 def getTorrentSize(torrentFile):
-  "Returns Size of files in torrent file in bytes"
+    "Returns Size of files in torrent file in bytes"
-  return getTorrentInfo(torrentFile)['size']
+    return getTorrentInfo(torrentFile)['size']
--- a/setup.py
+++ b/setup.py
@ -1,30 +1,30 @@
 #!/usr/bin/env python
-# vi:si:et:sw=2:sts=2:ts=2
+# vi:si:et:sw=4:sts=4:ts=4
 # encoding: utf-8
 from setuptools import setup, find_packages
 setup(
-  name="oxutils",
+    name="oxutils",
-  version="0.1",
+    version="0.1",
-  description="collection of utils used to work with python",
+    description="collection of utils used to work with python",
-  author="0x",
+    author="0x",
-  author_email="code@0xdb.org",
+    author_email="code@0xdb.org",
-  url="http://code.0xdb.org/oxutils",
+    url="http://code.0xdb.org/oxutils",
-  download_url="http://code.0xdb.org/oxutils/download",
+    download_url="http://code.0xdb.org/oxutils/download",
-  license="GPLv3",
+    license="GPLv3",
-  packages=find_packages(),
+    packages=find_packages(),
-  zip_safe=False,
+    zip_safe=False,
-  install_requires=[
+    install_requires=[
-        'chardet',
+          'chardet',
-  ],
+    ],
-  keywords = [
+    keywords = [
-  ],
+    ],
-  classifiers = [
+    classifiers = [
-      'Development Status :: 3 - Alpha',
+        'Development Status :: 3 - Alpha',
-      'Operating System :: OS Independent',
+        'Operating System :: OS Independent',
-      'Programming Language :: Python',
+        'Programming Language :: Python',
-      'Topic :: Software Development :: Libraries :: Python Modules',
+        'Topic :: Software Development :: Libraries :: Python Modules',
-  ],
+    ],
-  )
+)