add test and cleanup some errors found while doing so

This commit is contained in:
j 2008-05-05 20:12:27 +02:00
parent 915d35ba4a
commit ea43810f2a
6 changed files with 165 additions and 34 deletions

3
README
View file

@ -16,3 +16,6 @@ Usage:
oxutils.formatBytes(1234567890)
'1.15 GB'
Tests:
nosetests --with-doctest oxutils

View file

@ -19,10 +19,22 @@ from net import DEFAULT_HEADERS
cache_timeout = 30*24*60*60 # default is 30 days
def status(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
'''
>>> status('http://google.com')
200
>>> status('http://google.com/mysearch')
404
'''
headers = getHeaders(url, data, headers)
return int(headers['status'])
def exists(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
'''
>>> exists('http://google.com')
True
>>> exists('http://google.com/mysearch')
False
'''
s = status(url, data, headers, timeout)
if s >= 200 and s < 400:
return True

View file

@ -19,7 +19,6 @@ def to36(q):
Traceback (most recent call last):
...
ValueError: must supply a positive integer
"""
if q < 0: raise ValueError, "must supply a positive integer"
letters = "0123456789abcdefghijklmnopqrstuvwxyz"
@ -34,21 +33,48 @@ def from36(q):
def intValue(strValue, default=''):
try:
val = re.compile('(\d*)').findall(unicode(strValue))[0]
val = re.compile('(\d+)').findall(unicode(strValue).strip())[0]
except:
val = default
return val
def test_intValue():
assert intValue('abc23') == '23'
assert intValue(' abc23') == '23'
assert intValue(' abc') == ''
def floatValue(strValue, default=''):
try:
val = re.compile('([\d.]*)').findall(unicode(strValue))[0]
val = re.compile('([\d.]+)').findall(unicode(strValue).strip())[0]
except:
val = default
return val
def test_floatValue():
print "floatValue"
assert floatValue('abc23.4') == '23.4'
assert floatValue(' abc23.4') == '23.4'
assert floatValue(' abc') == ''
def formatNumber(number, longName, shortName):
"""
Return the number in a human-readable format (23 KB, 23.4 MB, 23.42 GB)
>>> formatNumber(123, 'Byte', 'B')
'123 Bytes'
>>> formatNumber(1234, 'Byte', 'B')
'1 KB'
>>> formatNumber(1234567, 'Byte', 'B')
'1.2 MB'
>>> formatNumber(1234567890, 'Byte', 'B')
'1.15 GB'
>>> formatNumber(1234567890123456789, 'Byte', 'B')
'1,096.5166 PB'
"""
if number < 1024:
return '%s %s%s' % (formatThousands(number), longName, number != 1 and 's' or '')
@ -61,6 +87,13 @@ def formatNumber(number, longName, shortName):
def formatThousands(number, separator = ','):
"""
Return the number with separators (1,000,000)
>>> formatThousands(1)
'1'
>>> formatThousands(1000)
'1,000'
>>> formatThousands(1000000)
'1,000,000'
"""
string = str(number).split('.')
l = []
@ -81,13 +114,29 @@ def formatPixels(number):
return formatNumber(number, 'pixel', 'px')
def plural(amount, unit, plural='s'):
'''
>>> plural(1, 'unit')
'1 unit'
>>> plural(2, 'unit')
'2 units'
'''
if abs(amount) != 1:
if plural == 's':
unit = unit + plural
else: unit = plural
return "%s %s" % (formatNumber(amount), unit)
return "%s %s" % (formatThousands(amount), unit)
def ms2runtime(ms):
'''
>>> ms2runtime(5000)
'5 seconds'
>>> ms2runtime(500000)
'8 minutes 20 seconds'
>>> ms2runtime(50000000)
'13 hours 53 minutes 20 seconds'
>>> ms2runtime(50000000-20000)
'13 hours 53 minutes'
'''
seconds = int(ms / 1000)
years = 0
days = 0
@ -111,6 +160,14 @@ def ms2runtime(ms):
return " ".join(runtimeString).strip()
def ms2playtime(ms):
'''
>>> ms2playtime(5000)
'00:05'
>>> ms2playtime(500000)
'08:20'
>>> ms2playtime(50000000)
'13:53:20'
'''
it = int(ms / 1000)
ms = ms - it*1000
ss = it % 60
@ -123,6 +180,10 @@ def ms2playtime(ms):
return playtime
def ms2time(ms):
'''
>>> ms2time(44592123)
'12:23:12.123'
'''
it = int(ms / 1000)
ms = ms - it*1000
ss = it % 60
@ -131,6 +192,10 @@ def ms2time(ms):
return "%d:%02d:%02d.%03d" % (hh, mm, ss, ms)
def time2ms(timeString):
'''
>>> time2ms('12:23:12.123')
44592123
'''
ms = 0.0
p = timeString.split(':')
for i in range(len(p)):

View file

@ -26,20 +26,32 @@ trailing_empty_content_re = re.compile(r'(?:<p>(?:&nbsp;|\s|<br \/>)*?</p>\s*)+\
del x # Temporary variable
def escape(html):
"Returns the given HTML with ampersands, quotes and carets encoded"
'''
Returns the given HTML with ampersands, quotes and carets encoded
>>> escape('html "test" & <brothers>')
'html &quot;test&quot; &amp; &lt;brothers&gt;'
'''
if not isinstance(html, basestring):
html = str(html)
return html.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;').replace("'", '&#39;')
def linebreaks(value):
"Converts newlines into <p> and <br />s"
'''
Converts newlines into <p> and <br />
'''
value = re.sub(r'\r\n|\r|\n', '\n', value) # normalize newlines
paras = re.split('\n{2,}', value)
paras = ['<p>%s</p>' % p.strip().replace('\n', '<br />') for p in paras]
return '\n\n'.join(paras)
def stripTags(value):
"Returns the given HTML with all tags stripped"
"""
Returns the given HTML with all tags stripped
>>> stripTags('some <h2>title</h2> <script>asdfasdf</script>')
'some title asdfasdf'
"""
return re.sub(r'<[^>]*?>', '', value)
def stripSpacesBetweenTags(value):
@ -122,6 +134,10 @@ def cleanHtml(text):
charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?')
def decodeHtml(html):
"""
>>> decodeHtml('me &amp; you and &#36;&#38;%')
u'me & you and $&%'
"""
return htmldecode(html)
def htmldecode(text):
@ -145,6 +161,10 @@ def htmldecode(text):
return charrefpat.sub(entitydecode, text).replace(u'\xa0', ' ')
def highlight(text, query, hlClass="hl"):
"""
>>> highlight('me &amp; you and &#36;&#38;%', 'and')
'me &amp; you <span class="hl">and</span> &#36;&#38;%'
"""
if query:
text = text.replace('<br />', '|')
query = re.escape(query).replace('\ ', '.')

View file

@ -18,7 +18,11 @@ for article in _articles:
_spArticles.append(article)
def canonicalTitle(title):
"""Return the title in the canonic format 'Movie Title, The'."""
"""Return the title in the canonic format 'Movie Title, The'.
>>> canonicalTitle('The Movie Title')
'Movie Title, The'
"""
try:
if _articlesDict.has_key(title.split(', ')[-1].lower()): return title
except IndexError: pass
@ -43,7 +47,11 @@ def canonicalTitle(title):
return title
def normalizeTitle(title):
"""Return the title in the normal "The Title" format."""
"""Return the title in the normal "The Title" format.
>>> normalizeTitle('Movie Title, The')
'The Movie Title'
"""
stitle = title.split(', ')
if len(stitle) > 1 and _articlesDict.has_key(stitle[-1].lower()):
sep = ' '
@ -52,6 +60,15 @@ def normalizeTitle(title):
return title
def normalizeImdbId(imdbId):
"""Return 7 digit imdbId.
>>> normalizeImdbId('http://www.imdb.com/title/tt0159206/')
'0159206'
>>> normalizeImdbId(159206)
'0159206'
>>> normalizeImdbId('tt0159206')
'0159206'
"""
if isinstance(imdbId, basestring):
imdbId = re.sub('.*(\d{7}).*', '\\1', imdbId)
elif isinstance(imdbId, int):

View file

@ -66,7 +66,14 @@ def wrap(text, width):
)
def truncateString(s, num):
"Truncates a string after a certain number of chacters, but ends with a word"
"""Truncates a string after a certain number of chacters, but ends with a word
>>> truncateString('Truncates a string after a certain number of chacters, but ends with a word', 23)
'Truncates a string...'
>>> truncateString('Truncates a string', 23)
'Truncates a string'
"""
length = int(num)
if len(s) <= length:
return s
@ -76,10 +83,16 @@ def truncateString(s, num):
ts += " " + words.pop(0)
if words:
ts += "..."
return ts
return ts.strip()
def trimString(string, num):
"Truncates a string after a certain number of chacters, adding ... at -10 characters"
"""Truncates a string after a certain number of chacters, adding ... at -10 characters
>>> trimString('Truncates a string after a certain number of chacters', 23)
'Truncates ...f chacters'
>>> trimString('Truncates a string', 23)
'Truncates a string'
"""
if len(string) > num:
string = string[:num - 13] + '...' + string[-10:]
return string
@ -99,8 +112,9 @@ def getValidFilename(s):
Returns the given string converted to a string that can be used for a clean
filename. Specifically, leading and trailing spaces are removed;
all non-filename-safe characters are removed.
>>> get_valid_filename("john's portrait in 2004.jpg")
'john_s portrait in 2004.jpg'
>>> getValidFilename("john's portrait in 2004.jpg")
'john_s_portrait_in_2004.jpg'
"""
s = s.strip()
s = s.replace(' ', '_')
@ -110,15 +124,15 @@ def getValidFilename(s):
def getTextList(list_, last_word='or'):
"""
>>> get_text_list(['a', 'b', 'c', 'd'])
>>> getTextList(['a', 'b', 'c', 'd'])
'a, b, c or d'
>>> get_text_list(['a', 'b', 'c'], 'and')
>>> getTextList(['a', 'b', 'c'], 'and')
'a, b and c'
>>> get_text_list(['a', 'b'], 'and')
>>> getTextList(['a', 'b'], 'and')
'a and b'
>>> get_text_list(['a'])
>>> getTextList(['a'])
'a'
>>> get_text_list([])
>>> getTextList([])
''
"""
if len(list_) == 0: return ''
@ -164,8 +178,8 @@ def smartSplit(text):
Supports both single and double quotes, and supports escaping quotes with
backslashes. In the output, strings will keep their initial and trailing
quote marks.
>>> list(smart_split('This is "a person\'s" test.'))
['This', 'is', '"a person\'s"', 'test.']
>>> list(smartSplit('This is "a person\\'s" test.'))
['This', 'is', '"a person\\'s"', 'test.']
"""
for bit in smart_split_re.finditer(text):
bit = bit.group(0)