add test and cleanup some errors found while doing so
This commit is contained in:
parent
915d35ba4a
commit
ea43810f2a
6 changed files with 165 additions and 34 deletions
3
README
3
README
|
@ -16,3 +16,6 @@ Usage:
|
|||
oxutils.formatBytes(1234567890)
|
||||
'1.15 GB'
|
||||
|
||||
|
||||
Tests:
|
||||
nosetests --with-doctest oxutils
|
||||
|
|
|
@ -19,10 +19,22 @@ from net import DEFAULT_HEADERS
|
|||
cache_timeout = 30*24*60*60 # default is 30 days
|
||||
|
||||
def status(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
|
||||
'''
|
||||
>>> status('http://google.com')
|
||||
200
|
||||
>>> status('http://google.com/mysearch')
|
||||
404
|
||||
'''
|
||||
headers = getHeaders(url, data, headers)
|
||||
return int(headers['status'])
|
||||
|
||||
def exists(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
|
||||
'''
|
||||
>>> exists('http://google.com')
|
||||
True
|
||||
>>> exists('http://google.com/mysearch')
|
||||
False
|
||||
'''
|
||||
s = status(url, data, headers, timeout)
|
||||
if s >= 200 and s < 400:
|
||||
return True
|
||||
|
|
|
@ -7,19 +7,18 @@ def to36(q):
|
|||
"""
|
||||
Converts an integer to base 36 (a useful scheme for human-sayable IDs).
|
||||
|
||||
>>> to36(35)
|
||||
'z'
|
||||
>>> to36(119292)
|
||||
'2k1o'
|
||||
>>> int(to36(939387374), 36)
|
||||
939387374
|
||||
>>> to36(0)
|
||||
'0'
|
||||
>>> to36(-393)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: must supply a positive integer
|
||||
|
||||
>>> to36(35)
|
||||
'z'
|
||||
>>> to36(119292)
|
||||
'2k1o'
|
||||
>>> int(to36(939387374), 36)
|
||||
939387374
|
||||
>>> to36(0)
|
||||
'0'
|
||||
>>> to36(-393)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: must supply a positive integer
|
||||
"""
|
||||
if q < 0: raise ValueError, "must supply a positive integer"
|
||||
letters = "0123456789abcdefghijklmnopqrstuvwxyz"
|
||||
|
@ -34,21 +33,48 @@ def from36(q):
|
|||
|
||||
def intValue(strValue, default=''):
|
||||
try:
|
||||
val = re.compile('(\d*)').findall(unicode(strValue))[0]
|
||||
val = re.compile('(\d+)').findall(unicode(strValue).strip())[0]
|
||||
except:
|
||||
val = default
|
||||
return val
|
||||
|
||||
def test_intValue():
|
||||
assert intValue('abc23') == '23'
|
||||
assert intValue(' abc23') == '23'
|
||||
assert intValue(' abc') == ''
|
||||
|
||||
def floatValue(strValue, default=''):
|
||||
try:
|
||||
val = re.compile('([\d.]*)').findall(unicode(strValue))[0]
|
||||
val = re.compile('([\d.]+)').findall(unicode(strValue).strip())[0]
|
||||
except:
|
||||
val = default
|
||||
return val
|
||||
|
||||
def test_floatValue():
|
||||
print "floatValue"
|
||||
assert floatValue('abc23.4') == '23.4'
|
||||
assert floatValue(' abc23.4') == '23.4'
|
||||
assert floatValue(' abc') == ''
|
||||
|
||||
def formatNumber(number, longName, shortName):
|
||||
"""
|
||||
Return the number in a human-readable format (23 KB, 23.4 MB, 23.42 GB)
|
||||
|
||||
>>> formatNumber(123, 'Byte', 'B')
|
||||
'123 Bytes'
|
||||
|
||||
>>> formatNumber(1234, 'Byte', 'B')
|
||||
'1 KB'
|
||||
|
||||
>>> formatNumber(1234567, 'Byte', 'B')
|
||||
'1.2 MB'
|
||||
|
||||
>>> formatNumber(1234567890, 'Byte', 'B')
|
||||
'1.15 GB'
|
||||
|
||||
>>> formatNumber(1234567890123456789, 'Byte', 'B')
|
||||
'1,096.5166 PB'
|
||||
|
||||
"""
|
||||
if number < 1024:
|
||||
return '%s %s%s' % (formatThousands(number), longName, number != 1 and 's' or '')
|
||||
|
@ -61,6 +87,13 @@ def formatNumber(number, longName, shortName):
|
|||
def formatThousands(number, separator = ','):
|
||||
"""
|
||||
Return the number with separators (1,000,000)
|
||||
|
||||
>>> formatThousands(1)
|
||||
'1'
|
||||
>>> formatThousands(1000)
|
||||
'1,000'
|
||||
>>> formatThousands(1000000)
|
||||
'1,000,000'
|
||||
"""
|
||||
string = str(number).split('.')
|
||||
l = []
|
||||
|
@ -81,13 +114,29 @@ def formatPixels(number):
|
|||
return formatNumber(number, 'pixel', 'px')
|
||||
|
||||
def plural(amount, unit, plural='s'):
|
||||
'''
|
||||
>>> plural(1, 'unit')
|
||||
'1 unit'
|
||||
>>> plural(2, 'unit')
|
||||
'2 units'
|
||||
'''
|
||||
if abs(amount) != 1:
|
||||
if plural == 's':
|
||||
unit = unit + plural
|
||||
else: unit = plural
|
||||
return "%s %s" % (formatNumber(amount), unit)
|
||||
return "%s %s" % (formatThousands(amount), unit)
|
||||
|
||||
def ms2runtime(ms):
|
||||
'''
|
||||
>>> ms2runtime(5000)
|
||||
'5 seconds'
|
||||
>>> ms2runtime(500000)
|
||||
'8 minutes 20 seconds'
|
||||
>>> ms2runtime(50000000)
|
||||
'13 hours 53 minutes 20 seconds'
|
||||
>>> ms2runtime(50000000-20000)
|
||||
'13 hours 53 minutes'
|
||||
'''
|
||||
seconds = int(ms / 1000)
|
||||
years = 0
|
||||
days = 0
|
||||
|
@ -111,6 +160,14 @@ def ms2runtime(ms):
|
|||
return " ".join(runtimeString).strip()
|
||||
|
||||
def ms2playtime(ms):
|
||||
'''
|
||||
>>> ms2playtime(5000)
|
||||
'00:05'
|
||||
>>> ms2playtime(500000)
|
||||
'08:20'
|
||||
>>> ms2playtime(50000000)
|
||||
'13:53:20'
|
||||
'''
|
||||
it = int(ms / 1000)
|
||||
ms = ms - it*1000
|
||||
ss = it % 60
|
||||
|
@ -123,6 +180,10 @@ def ms2playtime(ms):
|
|||
return playtime
|
||||
|
||||
def ms2time(ms):
|
||||
'''
|
||||
>>> ms2time(44592123)
|
||||
'12:23:12.123'
|
||||
'''
|
||||
it = int(ms / 1000)
|
||||
ms = ms - it*1000
|
||||
ss = it % 60
|
||||
|
@ -131,6 +192,10 @@ def ms2time(ms):
|
|||
return "%d:%02d:%02d.%03d" % (hh, mm, ss, ms)
|
||||
|
||||
def time2ms(timeString):
|
||||
'''
|
||||
>>> time2ms('12:23:12.123')
|
||||
44592123
|
||||
'''
|
||||
ms = 0.0
|
||||
p = timeString.split(':')
|
||||
for i in range(len(p)):
|
||||
|
|
|
@ -26,20 +26,32 @@ trailing_empty_content_re = re.compile(r'(?:<p>(?: |\s|<br \/>)*?</p>\s*)+\
|
|||
del x # Temporary variable
|
||||
|
||||
def escape(html):
|
||||
"Returns the given HTML with ampersands, quotes and carets encoded"
|
||||
'''
|
||||
Returns the given HTML with ampersands, quotes and carets encoded
|
||||
|
||||
>>> escape('html "test" & <brothers>')
|
||||
'html "test" & <brothers>'
|
||||
'''
|
||||
if not isinstance(html, basestring):
|
||||
html = str(html)
|
||||
return html.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"').replace("'", ''')
|
||||
|
||||
def linebreaks(value):
|
||||
"Converts newlines into <p> and <br />s"
|
||||
'''
|
||||
Converts newlines into <p> and <br />
|
||||
'''
|
||||
value = re.sub(r'\r\n|\r|\n', '\n', value) # normalize newlines
|
||||
paras = re.split('\n{2,}', value)
|
||||
paras = ['<p>%s</p>' % p.strip().replace('\n', '<br />') for p in paras]
|
||||
return '\n\n'.join(paras)
|
||||
|
||||
def stripTags(value):
|
||||
"Returns the given HTML with all tags stripped"
|
||||
"""
|
||||
Returns the given HTML with all tags stripped
|
||||
|
||||
>>> stripTags('some <h2>title</h2> <script>asdfasdf</script>')
|
||||
'some title asdfasdf'
|
||||
"""
|
||||
return re.sub(r'<[^>]*?>', '', value)
|
||||
|
||||
def stripSpacesBetweenTags(value):
|
||||
|
@ -122,7 +134,11 @@ def cleanHtml(text):
|
|||
charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?')
|
||||
|
||||
def decodeHtml(html):
|
||||
return htmldecode(html)
|
||||
"""
|
||||
>>> decodeHtml('me & you and $&%')
|
||||
u'me & you and $&%'
|
||||
"""
|
||||
return htmldecode(html)
|
||||
|
||||
def htmldecode(text):
|
||||
"""Decode HTML entities in the given text."""
|
||||
|
@ -145,6 +161,10 @@ def htmldecode(text):
|
|||
return charrefpat.sub(entitydecode, text).replace(u'\xa0', ' ')
|
||||
|
||||
def highlight(text, query, hlClass="hl"):
|
||||
"""
|
||||
>>> highlight('me & you and $&%', 'and')
|
||||
'me & you <span class="hl">and</span> $&%'
|
||||
"""
|
||||
if query:
|
||||
text = text.replace('<br />', '|')
|
||||
query = re.escape(query).replace('\ ', '.')
|
||||
|
|
|
@ -18,7 +18,11 @@ for article in _articles:
|
|||
_spArticles.append(article)
|
||||
|
||||
def canonicalTitle(title):
|
||||
"""Return the title in the canonic format 'Movie Title, The'."""
|
||||
"""Return the title in the canonic format 'Movie Title, The'.
|
||||
|
||||
>>> canonicalTitle('The Movie Title')
|
||||
'Movie Title, The'
|
||||
"""
|
||||
try:
|
||||
if _articlesDict.has_key(title.split(', ')[-1].lower()): return title
|
||||
except IndexError: pass
|
||||
|
@ -43,7 +47,11 @@ def canonicalTitle(title):
|
|||
return title
|
||||
|
||||
def normalizeTitle(title):
|
||||
"""Return the title in the normal "The Title" format."""
|
||||
"""Return the title in the normal "The Title" format.
|
||||
|
||||
>>> normalizeTitle('Movie Title, The')
|
||||
'The Movie Title'
|
||||
"""
|
||||
stitle = title.split(', ')
|
||||
if len(stitle) > 1 and _articlesDict.has_key(stitle[-1].lower()):
|
||||
sep = ' '
|
||||
|
@ -52,6 +60,15 @@ def normalizeTitle(title):
|
|||
return title
|
||||
|
||||
def normalizeImdbId(imdbId):
|
||||
"""Return 7 digit imdbId.
|
||||
|
||||
>>> normalizeImdbId('http://www.imdb.com/title/tt0159206/')
|
||||
'0159206'
|
||||
>>> normalizeImdbId(159206)
|
||||
'0159206'
|
||||
>>> normalizeImdbId('tt0159206')
|
||||
'0159206'
|
||||
"""
|
||||
if isinstance(imdbId, basestring):
|
||||
imdbId = re.sub('.*(\d{7}).*', '\\1', imdbId)
|
||||
elif isinstance(imdbId, int):
|
||||
|
|
|
@ -66,7 +66,14 @@ def wrap(text, width):
|
|||
)
|
||||
|
||||
def truncateString(s, num):
|
||||
"Truncates a string after a certain number of chacters, but ends with a word"
|
||||
"""Truncates a string after a certain number of chacters, but ends with a word
|
||||
|
||||
>>> truncateString('Truncates a string after a certain number of chacters, but ends with a word', 23)
|
||||
'Truncates a string...'
|
||||
>>> truncateString('Truncates a string', 23)
|
||||
'Truncates a string'
|
||||
|
||||
"""
|
||||
length = int(num)
|
||||
if len(s) <= length:
|
||||
return s
|
||||
|
@ -76,10 +83,16 @@ def truncateString(s, num):
|
|||
ts += " " + words.pop(0)
|
||||
if words:
|
||||
ts += "..."
|
||||
return ts
|
||||
return ts.strip()
|
||||
|
||||
def trimString(string, num):
|
||||
"Truncates a string after a certain number of chacters, adding ... at -10 characters"
|
||||
"""Truncates a string after a certain number of chacters, adding ... at -10 characters
|
||||
|
||||
>>> trimString('Truncates a string after a certain number of chacters', 23)
|
||||
'Truncates ...f chacters'
|
||||
>>> trimString('Truncates a string', 23)
|
||||
'Truncates a string'
|
||||
"""
|
||||
if len(string) > num:
|
||||
string = string[:num - 13] + '...' + string[-10:]
|
||||
return string
|
||||
|
@ -99,8 +112,9 @@ def getValidFilename(s):
|
|||
Returns the given string converted to a string that can be used for a clean
|
||||
filename. Specifically, leading and trailing spaces are removed;
|
||||
all non-filename-safe characters are removed.
|
||||
>>> get_valid_filename("john's portrait in 2004.jpg")
|
||||
'john_s portrait in 2004.jpg'
|
||||
|
||||
>>> getValidFilename("john's portrait in 2004.jpg")
|
||||
'john_s_portrait_in_2004.jpg'
|
||||
"""
|
||||
s = s.strip()
|
||||
s = s.replace(' ', '_')
|
||||
|
@ -110,15 +124,15 @@ def getValidFilename(s):
|
|||
|
||||
def getTextList(list_, last_word='or'):
|
||||
"""
|
||||
>>> get_text_list(['a', 'b', 'c', 'd'])
|
||||
>>> getTextList(['a', 'b', 'c', 'd'])
|
||||
'a, b, c or d'
|
||||
>>> get_text_list(['a', 'b', 'c'], 'and')
|
||||
>>> getTextList(['a', 'b', 'c'], 'and')
|
||||
'a, b and c'
|
||||
>>> get_text_list(['a', 'b'], 'and')
|
||||
>>> getTextList(['a', 'b'], 'and')
|
||||
'a and b'
|
||||
>>> get_text_list(['a'])
|
||||
>>> getTextList(['a'])
|
||||
'a'
|
||||
>>> get_text_list([])
|
||||
>>> getTextList([])
|
||||
''
|
||||
"""
|
||||
if len(list_) == 0: return ''
|
||||
|
@ -164,8 +178,8 @@ def smartSplit(text):
|
|||
Supports both single and double quotes, and supports escaping quotes with
|
||||
backslashes. In the output, strings will keep their initial and trailing
|
||||
quote marks.
|
||||
>>> list(smart_split('This is "a person\'s" test.'))
|
||||
['This', 'is', '"a person\'s"', 'test.']
|
||||
>>> list(smartSplit('This is "a person\\'s" test.'))
|
||||
['This', 'is', '"a person\\'s"', 'test.']
|
||||
"""
|
||||
for bit in smart_split_re.finditer(text):
|
||||
bit = bit.group(0)
|
||||
|
|
Loading…
Reference in a new issue