add test and cleanup some errors found while doing so

This commit is contained in:
j 2008-05-05 20:12:27 +02:00
parent 915d35ba4a
commit ea43810f2a
6 changed files with 165 additions and 34 deletions

3
README
View file

@ -16,3 +16,6 @@ Usage:
oxutils.formatBytes(1234567890) oxutils.formatBytes(1234567890)
'1.15 GB' '1.15 GB'
Tests:
nosetests --with-doctest oxutils

View file

@ -19,10 +19,22 @@ from net import DEFAULT_HEADERS
cache_timeout = 30*24*60*60 # default is 30 days cache_timeout = 30*24*60*60 # default is 30 days
def status(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout): def status(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
'''
>>> status('http://google.com')
200
>>> status('http://google.com/mysearch')
404
'''
headers = getHeaders(url, data, headers) headers = getHeaders(url, data, headers)
return int(headers['status']) return int(headers['status'])
def exists(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout): def exists(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
'''
>>> exists('http://google.com')
True
>>> exists('http://google.com/mysearch')
False
'''
s = status(url, data, headers, timeout) s = status(url, data, headers, timeout)
if s >= 200 and s < 400: if s >= 200 and s < 400:
return True return True

View file

@ -7,19 +7,18 @@ def to36(q):
""" """
Converts an integer to base 36 (a useful scheme for human-sayable IDs). Converts an integer to base 36 (a useful scheme for human-sayable IDs).
>>> to36(35) >>> to36(35)
'z' 'z'
>>> to36(119292) >>> to36(119292)
'2k1o' '2k1o'
>>> int(to36(939387374), 36) >>> int(to36(939387374), 36)
939387374 939387374
>>> to36(0) >>> to36(0)
'0' '0'
>>> to36(-393) >>> to36(-393)
Traceback (most recent call last): Traceback (most recent call last):
... ...
ValueError: must supply a positive integer ValueError: must supply a positive integer
""" """
if q < 0: raise ValueError, "must supply a positive integer" if q < 0: raise ValueError, "must supply a positive integer"
letters = "0123456789abcdefghijklmnopqrstuvwxyz" letters = "0123456789abcdefghijklmnopqrstuvwxyz"
@ -34,21 +33,48 @@ def from36(q):
def intValue(strValue, default=''): def intValue(strValue, default=''):
try: try:
val = re.compile('(\d*)').findall(unicode(strValue))[0] val = re.compile('(\d+)').findall(unicode(strValue).strip())[0]
except: except:
val = default val = default
return val return val
def test_intValue():
assert intValue('abc23') == '23'
assert intValue(' abc23') == '23'
assert intValue(' abc') == ''
def floatValue(strValue, default=''): def floatValue(strValue, default=''):
try: try:
val = re.compile('([\d.]*)').findall(unicode(strValue))[0] val = re.compile('([\d.]+)').findall(unicode(strValue).strip())[0]
except: except:
val = default val = default
return val return val
def test_floatValue():
print "floatValue"
assert floatValue('abc23.4') == '23.4'
assert floatValue(' abc23.4') == '23.4'
assert floatValue(' abc') == ''
def formatNumber(number, longName, shortName): def formatNumber(number, longName, shortName):
""" """
Return the number in a human-readable format (23 KB, 23.4 MB, 23.42 GB) Return the number in a human-readable format (23 KB, 23.4 MB, 23.42 GB)
>>> formatNumber(123, 'Byte', 'B')
'123 Bytes'
>>> formatNumber(1234, 'Byte', 'B')
'1 KB'
>>> formatNumber(1234567, 'Byte', 'B')
'1.2 MB'
>>> formatNumber(1234567890, 'Byte', 'B')
'1.15 GB'
>>> formatNumber(1234567890123456789, 'Byte', 'B')
'1,096.5166 PB'
""" """
if number < 1024: if number < 1024:
return '%s %s%s' % (formatThousands(number), longName, number != 1 and 's' or '') return '%s %s%s' % (formatThousands(number), longName, number != 1 and 's' or '')
@ -61,6 +87,13 @@ def formatNumber(number, longName, shortName):
def formatThousands(number, separator = ','): def formatThousands(number, separator = ','):
""" """
Return the number with separators (1,000,000) Return the number with separators (1,000,000)
>>> formatThousands(1)
'1'
>>> formatThousands(1000)
'1,000'
>>> formatThousands(1000000)
'1,000,000'
""" """
string = str(number).split('.') string = str(number).split('.')
l = [] l = []
@ -81,13 +114,29 @@ def formatPixels(number):
return formatNumber(number, 'pixel', 'px') return formatNumber(number, 'pixel', 'px')
def plural(amount, unit, plural='s'): def plural(amount, unit, plural='s'):
'''
>>> plural(1, 'unit')
'1 unit'
>>> plural(2, 'unit')
'2 units'
'''
if abs(amount) != 1: if abs(amount) != 1:
if plural == 's': if plural == 's':
unit = unit + plural unit = unit + plural
else: unit = plural else: unit = plural
return "%s %s" % (formatNumber(amount), unit) return "%s %s" % (formatThousands(amount), unit)
def ms2runtime(ms): def ms2runtime(ms):
'''
>>> ms2runtime(5000)
'5 seconds'
>>> ms2runtime(500000)
'8 minutes 20 seconds'
>>> ms2runtime(50000000)
'13 hours 53 minutes 20 seconds'
>>> ms2runtime(50000000-20000)
'13 hours 53 minutes'
'''
seconds = int(ms / 1000) seconds = int(ms / 1000)
years = 0 years = 0
days = 0 days = 0
@ -111,6 +160,14 @@ def ms2runtime(ms):
return " ".join(runtimeString).strip() return " ".join(runtimeString).strip()
def ms2playtime(ms): def ms2playtime(ms):
'''
>>> ms2playtime(5000)
'00:05'
>>> ms2playtime(500000)
'08:20'
>>> ms2playtime(50000000)
'13:53:20'
'''
it = int(ms / 1000) it = int(ms / 1000)
ms = ms - it*1000 ms = ms - it*1000
ss = it % 60 ss = it % 60
@ -123,6 +180,10 @@ def ms2playtime(ms):
return playtime return playtime
def ms2time(ms): def ms2time(ms):
'''
>>> ms2time(44592123)
'12:23:12.123'
'''
it = int(ms / 1000) it = int(ms / 1000)
ms = ms - it*1000 ms = ms - it*1000
ss = it % 60 ss = it % 60
@ -131,6 +192,10 @@ def ms2time(ms):
return "%d:%02d:%02d.%03d" % (hh, mm, ss, ms) return "%d:%02d:%02d.%03d" % (hh, mm, ss, ms)
def time2ms(timeString): def time2ms(timeString):
'''
>>> time2ms('12:23:12.123')
44592123
'''
ms = 0.0 ms = 0.0
p = timeString.split(':') p = timeString.split(':')
for i in range(len(p)): for i in range(len(p)):

View file

@ -26,20 +26,32 @@ trailing_empty_content_re = re.compile(r'(?:<p>(?:&nbsp;|\s|<br \/>)*?</p>\s*)+\
del x # Temporary variable del x # Temporary variable
def escape(html): def escape(html):
"Returns the given HTML with ampersands, quotes and carets encoded" '''
Returns the given HTML with ampersands, quotes and carets encoded
>>> escape('html "test" & <brothers>')
'html &quot;test&quot; &amp; &lt;brothers&gt;'
'''
if not isinstance(html, basestring): if not isinstance(html, basestring):
html = str(html) html = str(html)
return html.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;').replace("'", '&#39;') return html.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;').replace("'", '&#39;')
def linebreaks(value): def linebreaks(value):
"Converts newlines into <p> and <br />s" '''
Converts newlines into <p> and <br />
'''
value = re.sub(r'\r\n|\r|\n', '\n', value) # normalize newlines value = re.sub(r'\r\n|\r|\n', '\n', value) # normalize newlines
paras = re.split('\n{2,}', value) paras = re.split('\n{2,}', value)
paras = ['<p>%s</p>' % p.strip().replace('\n', '<br />') for p in paras] paras = ['<p>%s</p>' % p.strip().replace('\n', '<br />') for p in paras]
return '\n\n'.join(paras) return '\n\n'.join(paras)
def stripTags(value): def stripTags(value):
"Returns the given HTML with all tags stripped" """
Returns the given HTML with all tags stripped
>>> stripTags('some <h2>title</h2> <script>asdfasdf</script>')
'some title asdfasdf'
"""
return re.sub(r'<[^>]*?>', '', value) return re.sub(r'<[^>]*?>', '', value)
def stripSpacesBetweenTags(value): def stripSpacesBetweenTags(value):
@ -122,7 +134,11 @@ def cleanHtml(text):
charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?') charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?')
def decodeHtml(html): def decodeHtml(html):
return htmldecode(html) """
>>> decodeHtml('me &amp; you and &#36;&#38;%')
u'me & you and $&%'
"""
return htmldecode(html)
def htmldecode(text): def htmldecode(text):
"""Decode HTML entities in the given text.""" """Decode HTML entities in the given text."""
@ -145,6 +161,10 @@ def htmldecode(text):
return charrefpat.sub(entitydecode, text).replace(u'\xa0', ' ') return charrefpat.sub(entitydecode, text).replace(u'\xa0', ' ')
def highlight(text, query, hlClass="hl"): def highlight(text, query, hlClass="hl"):
"""
>>> highlight('me &amp; you and &#36;&#38;%', 'and')
'me &amp; you <span class="hl">and</span> &#36;&#38;%'
"""
if query: if query:
text = text.replace('<br />', '|') text = text.replace('<br />', '|')
query = re.escape(query).replace('\ ', '.') query = re.escape(query).replace('\ ', '.')

View file

@ -18,7 +18,11 @@ for article in _articles:
_spArticles.append(article) _spArticles.append(article)
def canonicalTitle(title): def canonicalTitle(title):
"""Return the title in the canonic format 'Movie Title, The'.""" """Return the title in the canonic format 'Movie Title, The'.
>>> canonicalTitle('The Movie Title')
'Movie Title, The'
"""
try: try:
if _articlesDict.has_key(title.split(', ')[-1].lower()): return title if _articlesDict.has_key(title.split(', ')[-1].lower()): return title
except IndexError: pass except IndexError: pass
@ -43,7 +47,11 @@ def canonicalTitle(title):
return title return title
def normalizeTitle(title): def normalizeTitle(title):
"""Return the title in the normal "The Title" format.""" """Return the title in the normal "The Title" format.
>>> normalizeTitle('Movie Title, The')
'The Movie Title'
"""
stitle = title.split(', ') stitle = title.split(', ')
if len(stitle) > 1 and _articlesDict.has_key(stitle[-1].lower()): if len(stitle) > 1 and _articlesDict.has_key(stitle[-1].lower()):
sep = ' ' sep = ' '
@ -52,6 +60,15 @@ def normalizeTitle(title):
return title return title
def normalizeImdbId(imdbId): def normalizeImdbId(imdbId):
"""Return 7 digit imdbId.
>>> normalizeImdbId('http://www.imdb.com/title/tt0159206/')
'0159206'
>>> normalizeImdbId(159206)
'0159206'
>>> normalizeImdbId('tt0159206')
'0159206'
"""
if isinstance(imdbId, basestring): if isinstance(imdbId, basestring):
imdbId = re.sub('.*(\d{7}).*', '\\1', imdbId) imdbId = re.sub('.*(\d{7}).*', '\\1', imdbId)
elif isinstance(imdbId, int): elif isinstance(imdbId, int):

View file

@ -66,7 +66,14 @@ def wrap(text, width):
) )
def truncateString(s, num): def truncateString(s, num):
"Truncates a string after a certain number of chacters, but ends with a word" """Truncates a string after a certain number of chacters, but ends with a word
>>> truncateString('Truncates a string after a certain number of chacters, but ends with a word', 23)
'Truncates a string...'
>>> truncateString('Truncates a string', 23)
'Truncates a string'
"""
length = int(num) length = int(num)
if len(s) <= length: if len(s) <= length:
return s return s
@ -76,10 +83,16 @@ def truncateString(s, num):
ts += " " + words.pop(0) ts += " " + words.pop(0)
if words: if words:
ts += "..." ts += "..."
return ts return ts.strip()
def trimString(string, num): def trimString(string, num):
"Truncates a string after a certain number of chacters, adding ... at -10 characters" """Truncates a string after a certain number of chacters, adding ... at -10 characters
>>> trimString('Truncates a string after a certain number of chacters', 23)
'Truncates ...f chacters'
>>> trimString('Truncates a string', 23)
'Truncates a string'
"""
if len(string) > num: if len(string) > num:
string = string[:num - 13] + '...' + string[-10:] string = string[:num - 13] + '...' + string[-10:]
return string return string
@ -99,8 +112,9 @@ def getValidFilename(s):
Returns the given string converted to a string that can be used for a clean Returns the given string converted to a string that can be used for a clean
filename. Specifically, leading and trailing spaces are removed; filename. Specifically, leading and trailing spaces are removed;
all non-filename-safe characters are removed. all non-filename-safe characters are removed.
>>> get_valid_filename("john's portrait in 2004.jpg")
'john_s portrait in 2004.jpg' >>> getValidFilename("john's portrait in 2004.jpg")
'john_s_portrait_in_2004.jpg'
""" """
s = s.strip() s = s.strip()
s = s.replace(' ', '_') s = s.replace(' ', '_')
@ -110,15 +124,15 @@ def getValidFilename(s):
def getTextList(list_, last_word='or'): def getTextList(list_, last_word='or'):
""" """
>>> get_text_list(['a', 'b', 'c', 'd']) >>> getTextList(['a', 'b', 'c', 'd'])
'a, b, c or d' 'a, b, c or d'
>>> get_text_list(['a', 'b', 'c'], 'and') >>> getTextList(['a', 'b', 'c'], 'and')
'a, b and c' 'a, b and c'
>>> get_text_list(['a', 'b'], 'and') >>> getTextList(['a', 'b'], 'and')
'a and b' 'a and b'
>>> get_text_list(['a']) >>> getTextList(['a'])
'a' 'a'
>>> get_text_list([]) >>> getTextList([])
'' ''
""" """
if len(list_) == 0: return '' if len(list_) == 0: return ''
@ -164,8 +178,8 @@ def smartSplit(text):
Supports both single and double quotes, and supports escaping quotes with Supports both single and double quotes, and supports escaping quotes with
backslashes. In the output, strings will keep their initial and trailing backslashes. In the output, strings will keep their initial and trailing
quote marks. quote marks.
>>> list(smart_split('This is "a person\'s" test.')) >>> list(smartSplit('This is "a person\\'s" test.'))
['This', 'is', '"a person\'s"', 'test.'] ['This', 'is', '"a person\\'s"', 'test.']
""" """
for bit in smart_split_re.finditer(text): for bit in smart_split_re.finditer(text):
bit = bit.group(0) bit = bit.group(0)