add test and cleanup some errors found while doing so

2008-05-05 20:12:27 +02:00 · 2008-05-05 20:12:27 +02:00 · ea43810f2a
commit ea43810f2a
parent 915d35ba4a
6 changed files with 165 additions and 34 deletions
--- a/3
+++ b/3
@ -16,3 +16,6 @@ Usage:
 oxutils.formatBytes(1234567890)
 '1.15 GB'

+
+Tests:
+ nosetests --with-doctest oxutils
--- a/oxutils/cache.py
+++ b/oxutils/cache.py
@ -19,10 +19,22 @@ from net import DEFAULT_HEADERS
 cache_timeout = 30*24*60*60 # default is 30 days

 def status(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
+  '''
+    >>> status('http://google.com')
+    200
+    >>> status('http://google.com/mysearch')
+    404
+  '''
  headers = getHeaders(url, data, headers)
  return int(headers['status'])

 def exists(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
+  '''
+    >>> exists('http://google.com')
+    True
+    >>> exists('http://google.com/mysearch')
+    False
+  '''
  s = status(url, data, headers, timeout)
  if s >= 200 and s < 400:
    return True
--- a/oxutils/format.py
+++ b/oxutils/format.py
@ -19,7 +19,6 @@ def to36(q):
  Traceback (most recent call last):
      ...
  ValueError: must supply a positive integer
-
  """
  if q < 0: raise ValueError, "must supply a positive integer"
  letters = "0123456789abcdefghijklmnopqrstuvwxyz"
@ -34,21 +33,48 @@ def from36(q):

 def intValue(strValue, default=''):
  try:
-    val = re.compile('(\d*)').findall(unicode(strValue))[0]
+    val = re.compile('(\d+)').findall(unicode(strValue).strip())[0]
  except:
    val = default
  return val

+def test_intValue():
+  assert intValue('abc23') == '23'
+  assert intValue(' abc23') == '23'
+  assert intValue(' abc') == ''
+
 def floatValue(strValue, default=''):
  try:
-    val = re.compile('([\d.]*)').findall(unicode(strValue))[0]
+    val = re.compile('([\d.]+)').findall(unicode(strValue).strip())[0]
  except:
    val = default
  return val

+def test_floatValue():
+  print "floatValue"
+  assert floatValue('abc23.4') == '23.4'
+  assert floatValue(' abc23.4') == '23.4'
+  assert floatValue(' abc') == ''
+
 def formatNumber(number, longName, shortName):
  """
  Return the number in a human-readable format (23 KB, 23.4 MB, 23.42 GB)
+  
+  >>> formatNumber(123, 'Byte', 'B')
+  '123 Bytes'
+
+  >>> formatNumber(1234, 'Byte', 'B')
+  '1 KB'
+
+  >>> formatNumber(1234567, 'Byte', 'B')
+  '1.2 MB'
+
+  >>> formatNumber(1234567890, 'Byte', 'B')
+  '1.15 GB'
+
+  >>> formatNumber(1234567890123456789, 'Byte', 'B')
+  '1,096.5166 PB'
+
  """
  if number < 1024:
    return '%s %s%s' % (formatThousands(number), longName, number != 1 and 's' or '')
@ -61,6 +87,13 @@ def formatNumber(number, longName, shortName):
 def formatThousands(number, separator = ','):
  """
  Return the number with separators (1,000,000)
+  
+  >>> formatThousands(1)
+  '1'
+  >>> formatThousands(1000)
+  '1,000'
+  >>> formatThousands(1000000)
+  '1,000,000'
  """
  string = str(number).split('.')
  l = []
@ -81,13 +114,29 @@ def formatPixels(number):
  return formatNumber(number, 'pixel', 'px')

 def plural(amount, unit, plural='s'):
+  '''
+  >>> plural(1, 'unit')
+  '1 unit'
+  >>> plural(2, 'unit')
+  '2 units'
+  '''
  if abs(amount) != 1:
    if plural == 's':
      unit = unit + plural
    else: unit = plural
-  return "%s %s" % (formatNumber(amount), unit)
+  return "%s %s" % (formatThousands(amount), unit)

 def ms2runtime(ms):
+  '''
+  >>> ms2runtime(5000)
+  '5 seconds'
+  >>> ms2runtime(500000)
+  '8 minutes 20 seconds'
+  >>> ms2runtime(50000000)
+  '13 hours 53 minutes 20 seconds'
+  >>> ms2runtime(50000000-20000)
+  '13 hours 53 minutes'
+  '''
  seconds = int(ms / 1000)
  years = 0
  days = 0
@ -111,6 +160,14 @@ def ms2runtime(ms):
  return " ".join(runtimeString).strip()

 def ms2playtime(ms):
+  '''
+  >>> ms2playtime(5000)
+  '00:05'
+  >>> ms2playtime(500000)
+  '08:20'
+  >>> ms2playtime(50000000)
+  '13:53:20'
+  '''
  it = int(ms / 1000)
  ms = ms - it*1000
  ss = it % 60
@ -123,6 +180,10 @@ def ms2playtime(ms):
  return playtime

 def ms2time(ms):
+  '''
+  >>> ms2time(44592123)
+  '12:23:12.123'
+  '''
  it = int(ms / 1000)
  ms = ms - it*1000
  ss = it % 60
@ -131,6 +192,10 @@ def ms2time(ms):
  return "%d:%02d:%02d.%03d" % (hh, mm, ss, ms)

 def time2ms(timeString):
+  '''
+  >>> time2ms('12:23:12.123')
+  44592123
+  '''
  ms = 0.0
  p = timeString.split(':')
  for i in range(len(p)):
--- a/oxutils/html.py
+++ b/oxutils/html.py
@ -26,20 +26,32 @@ trailing_empty_content_re = re.compile(r'(?:<p>(?:&nbsp;|\s|<br \/>)*?</p>\s*)+\
 del x # Temporary variable

 def escape(html):
-  "Returns the given HTML with ampersands, quotes and carets encoded"
+  '''
+  Returns the given HTML with ampersands, quotes and carets encoded
+
+  >>> escape('html "test" & <brothers>')
+  'html &quot;test&quot; &amp; &lt;brothers&gt;'
+  '''
  if not isinstance(html, basestring):
      html = str(html)
  return html.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;').replace("'", '&#39;')

 def linebreaks(value):
-  "Converts newlines into <p> and <br />s"
+  '''
+  Converts newlines into <p> and <br />
+  '''
  value = re.sub(r'\r\n|\r|\n', '\n', value) # normalize newlines
  paras = re.split('\n{2,}', value)
  paras = ['<p>%s</p>' % p.strip().replace('\n', '<br />') for p in paras]
  return '\n\n'.join(paras)

 def stripTags(value):
-  "Returns the given HTML with all tags stripped"
+  """
+  Returns the given HTML with all tags stripped
+  
+  >>> stripTags('some <h2>title</h2> <script>asdfasdf</script>')
+  'some title asdfasdf'
+  """
  return re.sub(r'<[^>]*?>', '', value)
    
 def stripSpacesBetweenTags(value):
@ -122,6 +134,10 @@ def cleanHtml(text):
 charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?')

 def decodeHtml(html):
+  """
+  >>> decodeHtml('me &amp; you and &#36;&#38;%')
+  u'me & you and $&%'
+  """
  return htmldecode(html)

 def htmldecode(text):
@ -145,6 +161,10 @@ def htmldecode(text):
  return charrefpat.sub(entitydecode, text).replace(u'\xa0', ' ')

 def highlight(text, query, hlClass="hl"):
+  """
+  >>> highlight('me &amp; you and &#36;&#38;%', 'and')
+  'me &amp; you <span class="hl">and</span> &#36;&#38;%'
+  """
  if query:
    text = text.replace('<br />', '|')
    query = re.escape(query).replace('\ ', '.')
--- a/oxutils/normalize.py
+++ b/oxutils/normalize.py
@ -18,7 +18,11 @@ for article in _articles:
  _spArticles.append(article)

 def canonicalTitle(title):
-  """Return the title in the canonic format 'Movie Title, The'."""
+  """Return the title in the canonic format 'Movie Title, The'.
+  
+  >>> canonicalTitle('The Movie Title')
+  'Movie Title, The'
+  """
  try:
      if _articlesDict.has_key(title.split(', ')[-1].lower()): return title
  except IndexError: pass
@ -43,7 +47,11 @@ def canonicalTitle(title):
  return title

 def normalizeTitle(title):
-  """Return the title in the normal "The Title" format."""
+  """Return the title in the normal "The Title" format.
+
+  >>> normalizeTitle('Movie Title, The')
+  'The Movie Title'
+  """
  stitle = title.split(', ')
  if len(stitle) > 1 and _articlesDict.has_key(stitle[-1].lower()):
      sep = ' '
@ -52,6 +60,15 @@ def normalizeTitle(title):
  return title

 def normalizeImdbId(imdbId):
+  """Return 7 digit imdbId.
+
+  >>> normalizeImdbId('http://www.imdb.com/title/tt0159206/')
+  '0159206'
+  >>> normalizeImdbId(159206)
+  '0159206'
+  >>> normalizeImdbId('tt0159206')
+  '0159206'
+  """
  if isinstance(imdbId, basestring):
    imdbId = re.sub('.*(\d{7}).*', '\\1', imdbId)
  elif isinstance(imdbId, int):
--- a/oxutils/text.py
+++ b/oxutils/text.py
@ -66,7 +66,14 @@ def wrap(text, width):
                )

 def truncateString(s, num):
-  "Truncates a string after a certain number of chacters, but ends with a word"
+  """Truncates a string after a certain number of chacters, but ends with a word
+
+  >>> truncateString('Truncates a string after a certain number of chacters, but ends with a word', 23)
+  'Truncates a string...'
+  >>> truncateString('Truncates a string', 23)
+  'Truncates a string'
+
+  """
  length = int(num)
  if len(s) <= length:
    return s
@ -76,10 +83,16 @@ def truncateString(s, num):
    ts += " " + words.pop(0)
  if words:
    ts += "..."
-  return ts
+  return ts.strip()

 def trimString(string, num):
-  "Truncates a string after a certain number of chacters, adding ... at -10 characters"
+  """Truncates a string after a certain number of chacters, adding ... at -10 characters
+
+  >>> trimString('Truncates a string after a certain number of chacters', 23)
+  'Truncates ...f chacters'
+  >>> trimString('Truncates a string', 23)
+  'Truncates a string'
+  """
  if len(string) > num:
    string = string[:num - 13] + '...' + string[-10:]
  return string
@ -99,8 +112,9 @@ def getValidFilename(s):
  Returns the given string converted to a string that can be used for a clean
  filename. Specifically, leading and trailing spaces are removed; 
  all non-filename-safe characters are removed.
-  >>> get_valid_filename("john's portrait in 2004.jpg")
-  'john_s portrait in 2004.jpg'
+
+  >>> getValidFilename("john's portrait in 2004.jpg")
+  'john_s_portrait_in_2004.jpg'
  """
  s = s.strip()
  s = s.replace(' ', '_')
@ -110,15 +124,15 @@ def getValidFilename(s):

 def getTextList(list_, last_word='or'):
  """
-  >>> get_text_list(['a', 'b', 'c', 'd'])
+  >>> getTextList(['a', 'b', 'c', 'd'])
  'a, b, c or d'
-  >>> get_text_list(['a', 'b', 'c'], 'and')
+  >>> getTextList(['a', 'b', 'c'], 'and')
  'a, b and c'
-  >>> get_text_list(['a', 'b'], 'and')
+  >>> getTextList(['a', 'b'], 'and')
  'a and b'
-  >>> get_text_list(['a'])
+  >>> getTextList(['a'])
  'a'
-  >>> get_text_list([])
+  >>> getTextList([])
  ''
  """
  if len(list_) == 0: return ''
@ -164,8 +178,8 @@ def smartSplit(text):
  Supports both single and double quotes, and supports escaping quotes with
  backslashes. In the output, strings will keep their initial and trailing
  quote marks.
-  >>> list(smart_split('This is "a person\'s" test.'))
-  ['This', 'is', '"a person\'s"', 'test.']
+  >>> list(smartSplit('This is "a person\\'s" test.'))
+  ['This', 'is', '"a person\\'s"', 'test.']
  """
  for bit in smart_split_re.finditer(text):
      bit = bit.group(0)