add test and cleanup some errors found while doing so
This commit is contained in:
parent
915d35ba4a
commit
ea43810f2a
6 changed files with 165 additions and 34 deletions
3
README
3
README
|
@ -16,3 +16,6 @@ Usage:
|
||||||
oxutils.formatBytes(1234567890)
|
oxutils.formatBytes(1234567890)
|
||||||
'1.15 GB'
|
'1.15 GB'
|
||||||
|
|
||||||
|
|
||||||
|
Tests:
|
||||||
|
nosetests --with-doctest oxutils
|
||||||
|
|
|
@ -19,10 +19,22 @@ from net import DEFAULT_HEADERS
|
||||||
cache_timeout = 30*24*60*60 # default is 30 days
|
cache_timeout = 30*24*60*60 # default is 30 days
|
||||||
|
|
||||||
def status(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
|
def status(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
|
||||||
|
'''
|
||||||
|
>>> status('http://google.com')
|
||||||
|
200
|
||||||
|
>>> status('http://google.com/mysearch')
|
||||||
|
404
|
||||||
|
'''
|
||||||
headers = getHeaders(url, data, headers)
|
headers = getHeaders(url, data, headers)
|
||||||
return int(headers['status'])
|
return int(headers['status'])
|
||||||
|
|
||||||
def exists(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
|
def exists(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
|
||||||
|
'''
|
||||||
|
>>> exists('http://google.com')
|
||||||
|
True
|
||||||
|
>>> exists('http://google.com/mysearch')
|
||||||
|
False
|
||||||
|
'''
|
||||||
s = status(url, data, headers, timeout)
|
s = status(url, data, headers, timeout)
|
||||||
if s >= 200 and s < 400:
|
if s >= 200 and s < 400:
|
||||||
return True
|
return True
|
||||||
|
|
|
@ -19,7 +19,6 @@ def to36(q):
|
||||||
Traceback (most recent call last):
|
Traceback (most recent call last):
|
||||||
...
|
...
|
||||||
ValueError: must supply a positive integer
|
ValueError: must supply a positive integer
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if q < 0: raise ValueError, "must supply a positive integer"
|
if q < 0: raise ValueError, "must supply a positive integer"
|
||||||
letters = "0123456789abcdefghijklmnopqrstuvwxyz"
|
letters = "0123456789abcdefghijklmnopqrstuvwxyz"
|
||||||
|
@ -34,21 +33,48 @@ def from36(q):
|
||||||
|
|
||||||
def intValue(strValue, default=''):
|
def intValue(strValue, default=''):
|
||||||
try:
|
try:
|
||||||
val = re.compile('(\d*)').findall(unicode(strValue))[0]
|
val = re.compile('(\d+)').findall(unicode(strValue).strip())[0]
|
||||||
except:
|
except:
|
||||||
val = default
|
val = default
|
||||||
return val
|
return val
|
||||||
|
|
||||||
|
def test_intValue():
|
||||||
|
assert intValue('abc23') == '23'
|
||||||
|
assert intValue(' abc23') == '23'
|
||||||
|
assert intValue(' abc') == ''
|
||||||
|
|
||||||
def floatValue(strValue, default=''):
|
def floatValue(strValue, default=''):
|
||||||
try:
|
try:
|
||||||
val = re.compile('([\d.]*)').findall(unicode(strValue))[0]
|
val = re.compile('([\d.]+)').findall(unicode(strValue).strip())[0]
|
||||||
except:
|
except:
|
||||||
val = default
|
val = default
|
||||||
return val
|
return val
|
||||||
|
|
||||||
|
def test_floatValue():
|
||||||
|
print "floatValue"
|
||||||
|
assert floatValue('abc23.4') == '23.4'
|
||||||
|
assert floatValue(' abc23.4') == '23.4'
|
||||||
|
assert floatValue(' abc') == ''
|
||||||
|
|
||||||
def formatNumber(number, longName, shortName):
|
def formatNumber(number, longName, shortName):
|
||||||
"""
|
"""
|
||||||
Return the number in a human-readable format (23 KB, 23.4 MB, 23.42 GB)
|
Return the number in a human-readable format (23 KB, 23.4 MB, 23.42 GB)
|
||||||
|
|
||||||
|
>>> formatNumber(123, 'Byte', 'B')
|
||||||
|
'123 Bytes'
|
||||||
|
|
||||||
|
>>> formatNumber(1234, 'Byte', 'B')
|
||||||
|
'1 KB'
|
||||||
|
|
||||||
|
>>> formatNumber(1234567, 'Byte', 'B')
|
||||||
|
'1.2 MB'
|
||||||
|
|
||||||
|
>>> formatNumber(1234567890, 'Byte', 'B')
|
||||||
|
'1.15 GB'
|
||||||
|
|
||||||
|
>>> formatNumber(1234567890123456789, 'Byte', 'B')
|
||||||
|
'1,096.5166 PB'
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if number < 1024:
|
if number < 1024:
|
||||||
return '%s %s%s' % (formatThousands(number), longName, number != 1 and 's' or '')
|
return '%s %s%s' % (formatThousands(number), longName, number != 1 and 's' or '')
|
||||||
|
@ -61,6 +87,13 @@ def formatNumber(number, longName, shortName):
|
||||||
def formatThousands(number, separator = ','):
|
def formatThousands(number, separator = ','):
|
||||||
"""
|
"""
|
||||||
Return the number with separators (1,000,000)
|
Return the number with separators (1,000,000)
|
||||||
|
|
||||||
|
>>> formatThousands(1)
|
||||||
|
'1'
|
||||||
|
>>> formatThousands(1000)
|
||||||
|
'1,000'
|
||||||
|
>>> formatThousands(1000000)
|
||||||
|
'1,000,000'
|
||||||
"""
|
"""
|
||||||
string = str(number).split('.')
|
string = str(number).split('.')
|
||||||
l = []
|
l = []
|
||||||
|
@ -81,13 +114,29 @@ def formatPixels(number):
|
||||||
return formatNumber(number, 'pixel', 'px')
|
return formatNumber(number, 'pixel', 'px')
|
||||||
|
|
||||||
def plural(amount, unit, plural='s'):
|
def plural(amount, unit, plural='s'):
|
||||||
|
'''
|
||||||
|
>>> plural(1, 'unit')
|
||||||
|
'1 unit'
|
||||||
|
>>> plural(2, 'unit')
|
||||||
|
'2 units'
|
||||||
|
'''
|
||||||
if abs(amount) != 1:
|
if abs(amount) != 1:
|
||||||
if plural == 's':
|
if plural == 's':
|
||||||
unit = unit + plural
|
unit = unit + plural
|
||||||
else: unit = plural
|
else: unit = plural
|
||||||
return "%s %s" % (formatNumber(amount), unit)
|
return "%s %s" % (formatThousands(amount), unit)
|
||||||
|
|
||||||
def ms2runtime(ms):
|
def ms2runtime(ms):
|
||||||
|
'''
|
||||||
|
>>> ms2runtime(5000)
|
||||||
|
'5 seconds'
|
||||||
|
>>> ms2runtime(500000)
|
||||||
|
'8 minutes 20 seconds'
|
||||||
|
>>> ms2runtime(50000000)
|
||||||
|
'13 hours 53 minutes 20 seconds'
|
||||||
|
>>> ms2runtime(50000000-20000)
|
||||||
|
'13 hours 53 minutes'
|
||||||
|
'''
|
||||||
seconds = int(ms / 1000)
|
seconds = int(ms / 1000)
|
||||||
years = 0
|
years = 0
|
||||||
days = 0
|
days = 0
|
||||||
|
@ -111,6 +160,14 @@ def ms2runtime(ms):
|
||||||
return " ".join(runtimeString).strip()
|
return " ".join(runtimeString).strip()
|
||||||
|
|
||||||
def ms2playtime(ms):
|
def ms2playtime(ms):
|
||||||
|
'''
|
||||||
|
>>> ms2playtime(5000)
|
||||||
|
'00:05'
|
||||||
|
>>> ms2playtime(500000)
|
||||||
|
'08:20'
|
||||||
|
>>> ms2playtime(50000000)
|
||||||
|
'13:53:20'
|
||||||
|
'''
|
||||||
it = int(ms / 1000)
|
it = int(ms / 1000)
|
||||||
ms = ms - it*1000
|
ms = ms - it*1000
|
||||||
ss = it % 60
|
ss = it % 60
|
||||||
|
@ -123,6 +180,10 @@ def ms2playtime(ms):
|
||||||
return playtime
|
return playtime
|
||||||
|
|
||||||
def ms2time(ms):
|
def ms2time(ms):
|
||||||
|
'''
|
||||||
|
>>> ms2time(44592123)
|
||||||
|
'12:23:12.123'
|
||||||
|
'''
|
||||||
it = int(ms / 1000)
|
it = int(ms / 1000)
|
||||||
ms = ms - it*1000
|
ms = ms - it*1000
|
||||||
ss = it % 60
|
ss = it % 60
|
||||||
|
@ -131,6 +192,10 @@ def ms2time(ms):
|
||||||
return "%d:%02d:%02d.%03d" % (hh, mm, ss, ms)
|
return "%d:%02d:%02d.%03d" % (hh, mm, ss, ms)
|
||||||
|
|
||||||
def time2ms(timeString):
|
def time2ms(timeString):
|
||||||
|
'''
|
||||||
|
>>> time2ms('12:23:12.123')
|
||||||
|
44592123
|
||||||
|
'''
|
||||||
ms = 0.0
|
ms = 0.0
|
||||||
p = timeString.split(':')
|
p = timeString.split(':')
|
||||||
for i in range(len(p)):
|
for i in range(len(p)):
|
||||||
|
|
|
@ -26,20 +26,32 @@ trailing_empty_content_re = re.compile(r'(?:<p>(?: |\s|<br \/>)*?</p>\s*)+\
|
||||||
del x # Temporary variable
|
del x # Temporary variable
|
||||||
|
|
||||||
def escape(html):
|
def escape(html):
|
||||||
"Returns the given HTML with ampersands, quotes and carets encoded"
|
'''
|
||||||
|
Returns the given HTML with ampersands, quotes and carets encoded
|
||||||
|
|
||||||
|
>>> escape('html "test" & <brothers>')
|
||||||
|
'html "test" & <brothers>'
|
||||||
|
'''
|
||||||
if not isinstance(html, basestring):
|
if not isinstance(html, basestring):
|
||||||
html = str(html)
|
html = str(html)
|
||||||
return html.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"').replace("'", ''')
|
return html.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"').replace("'", ''')
|
||||||
|
|
||||||
def linebreaks(value):
|
def linebreaks(value):
|
||||||
"Converts newlines into <p> and <br />s"
|
'''
|
||||||
|
Converts newlines into <p> and <br />
|
||||||
|
'''
|
||||||
value = re.sub(r'\r\n|\r|\n', '\n', value) # normalize newlines
|
value = re.sub(r'\r\n|\r|\n', '\n', value) # normalize newlines
|
||||||
paras = re.split('\n{2,}', value)
|
paras = re.split('\n{2,}', value)
|
||||||
paras = ['<p>%s</p>' % p.strip().replace('\n', '<br />') for p in paras]
|
paras = ['<p>%s</p>' % p.strip().replace('\n', '<br />') for p in paras]
|
||||||
return '\n\n'.join(paras)
|
return '\n\n'.join(paras)
|
||||||
|
|
||||||
def stripTags(value):
|
def stripTags(value):
|
||||||
"Returns the given HTML with all tags stripped"
|
"""
|
||||||
|
Returns the given HTML with all tags stripped
|
||||||
|
|
||||||
|
>>> stripTags('some <h2>title</h2> <script>asdfasdf</script>')
|
||||||
|
'some title asdfasdf'
|
||||||
|
"""
|
||||||
return re.sub(r'<[^>]*?>', '', value)
|
return re.sub(r'<[^>]*?>', '', value)
|
||||||
|
|
||||||
def stripSpacesBetweenTags(value):
|
def stripSpacesBetweenTags(value):
|
||||||
|
@ -122,6 +134,10 @@ def cleanHtml(text):
|
||||||
charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?')
|
charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?')
|
||||||
|
|
||||||
def decodeHtml(html):
|
def decodeHtml(html):
|
||||||
|
"""
|
||||||
|
>>> decodeHtml('me & you and $&%')
|
||||||
|
u'me & you and $&%'
|
||||||
|
"""
|
||||||
return htmldecode(html)
|
return htmldecode(html)
|
||||||
|
|
||||||
def htmldecode(text):
|
def htmldecode(text):
|
||||||
|
@ -145,6 +161,10 @@ def htmldecode(text):
|
||||||
return charrefpat.sub(entitydecode, text).replace(u'\xa0', ' ')
|
return charrefpat.sub(entitydecode, text).replace(u'\xa0', ' ')
|
||||||
|
|
||||||
def highlight(text, query, hlClass="hl"):
|
def highlight(text, query, hlClass="hl"):
|
||||||
|
"""
|
||||||
|
>>> highlight('me & you and $&%', 'and')
|
||||||
|
'me & you <span class="hl">and</span> $&%'
|
||||||
|
"""
|
||||||
if query:
|
if query:
|
||||||
text = text.replace('<br />', '|')
|
text = text.replace('<br />', '|')
|
||||||
query = re.escape(query).replace('\ ', '.')
|
query = re.escape(query).replace('\ ', '.')
|
||||||
|
|
|
@ -18,7 +18,11 @@ for article in _articles:
|
||||||
_spArticles.append(article)
|
_spArticles.append(article)
|
||||||
|
|
||||||
def canonicalTitle(title):
|
def canonicalTitle(title):
|
||||||
"""Return the title in the canonic format 'Movie Title, The'."""
|
"""Return the title in the canonic format 'Movie Title, The'.
|
||||||
|
|
||||||
|
>>> canonicalTitle('The Movie Title')
|
||||||
|
'Movie Title, The'
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
if _articlesDict.has_key(title.split(', ')[-1].lower()): return title
|
if _articlesDict.has_key(title.split(', ')[-1].lower()): return title
|
||||||
except IndexError: pass
|
except IndexError: pass
|
||||||
|
@ -43,7 +47,11 @@ def canonicalTitle(title):
|
||||||
return title
|
return title
|
||||||
|
|
||||||
def normalizeTitle(title):
|
def normalizeTitle(title):
|
||||||
"""Return the title in the normal "The Title" format."""
|
"""Return the title in the normal "The Title" format.
|
||||||
|
|
||||||
|
>>> normalizeTitle('Movie Title, The')
|
||||||
|
'The Movie Title'
|
||||||
|
"""
|
||||||
stitle = title.split(', ')
|
stitle = title.split(', ')
|
||||||
if len(stitle) > 1 and _articlesDict.has_key(stitle[-1].lower()):
|
if len(stitle) > 1 and _articlesDict.has_key(stitle[-1].lower()):
|
||||||
sep = ' '
|
sep = ' '
|
||||||
|
@ -52,6 +60,15 @@ def normalizeTitle(title):
|
||||||
return title
|
return title
|
||||||
|
|
||||||
def normalizeImdbId(imdbId):
|
def normalizeImdbId(imdbId):
|
||||||
|
"""Return 7 digit imdbId.
|
||||||
|
|
||||||
|
>>> normalizeImdbId('http://www.imdb.com/title/tt0159206/')
|
||||||
|
'0159206'
|
||||||
|
>>> normalizeImdbId(159206)
|
||||||
|
'0159206'
|
||||||
|
>>> normalizeImdbId('tt0159206')
|
||||||
|
'0159206'
|
||||||
|
"""
|
||||||
if isinstance(imdbId, basestring):
|
if isinstance(imdbId, basestring):
|
||||||
imdbId = re.sub('.*(\d{7}).*', '\\1', imdbId)
|
imdbId = re.sub('.*(\d{7}).*', '\\1', imdbId)
|
||||||
elif isinstance(imdbId, int):
|
elif isinstance(imdbId, int):
|
||||||
|
|
|
@ -66,7 +66,14 @@ def wrap(text, width):
|
||||||
)
|
)
|
||||||
|
|
||||||
def truncateString(s, num):
|
def truncateString(s, num):
|
||||||
"Truncates a string after a certain number of chacters, but ends with a word"
|
"""Truncates a string after a certain number of chacters, but ends with a word
|
||||||
|
|
||||||
|
>>> truncateString('Truncates a string after a certain number of chacters, but ends with a word', 23)
|
||||||
|
'Truncates a string...'
|
||||||
|
>>> truncateString('Truncates a string', 23)
|
||||||
|
'Truncates a string'
|
||||||
|
|
||||||
|
"""
|
||||||
length = int(num)
|
length = int(num)
|
||||||
if len(s) <= length:
|
if len(s) <= length:
|
||||||
return s
|
return s
|
||||||
|
@ -76,10 +83,16 @@ def truncateString(s, num):
|
||||||
ts += " " + words.pop(0)
|
ts += " " + words.pop(0)
|
||||||
if words:
|
if words:
|
||||||
ts += "..."
|
ts += "..."
|
||||||
return ts
|
return ts.strip()
|
||||||
|
|
||||||
def trimString(string, num):
|
def trimString(string, num):
|
||||||
"Truncates a string after a certain number of chacters, adding ... at -10 characters"
|
"""Truncates a string after a certain number of chacters, adding ... at -10 characters
|
||||||
|
|
||||||
|
>>> trimString('Truncates a string after a certain number of chacters', 23)
|
||||||
|
'Truncates ...f chacters'
|
||||||
|
>>> trimString('Truncates a string', 23)
|
||||||
|
'Truncates a string'
|
||||||
|
"""
|
||||||
if len(string) > num:
|
if len(string) > num:
|
||||||
string = string[:num - 13] + '...' + string[-10:]
|
string = string[:num - 13] + '...' + string[-10:]
|
||||||
return string
|
return string
|
||||||
|
@ -99,8 +112,9 @@ def getValidFilename(s):
|
||||||
Returns the given string converted to a string that can be used for a clean
|
Returns the given string converted to a string that can be used for a clean
|
||||||
filename. Specifically, leading and trailing spaces are removed;
|
filename. Specifically, leading and trailing spaces are removed;
|
||||||
all non-filename-safe characters are removed.
|
all non-filename-safe characters are removed.
|
||||||
>>> get_valid_filename("john's portrait in 2004.jpg")
|
|
||||||
'john_s portrait in 2004.jpg'
|
>>> getValidFilename("john's portrait in 2004.jpg")
|
||||||
|
'john_s_portrait_in_2004.jpg'
|
||||||
"""
|
"""
|
||||||
s = s.strip()
|
s = s.strip()
|
||||||
s = s.replace(' ', '_')
|
s = s.replace(' ', '_')
|
||||||
|
@ -110,15 +124,15 @@ def getValidFilename(s):
|
||||||
|
|
||||||
def getTextList(list_, last_word='or'):
|
def getTextList(list_, last_word='or'):
|
||||||
"""
|
"""
|
||||||
>>> get_text_list(['a', 'b', 'c', 'd'])
|
>>> getTextList(['a', 'b', 'c', 'd'])
|
||||||
'a, b, c or d'
|
'a, b, c or d'
|
||||||
>>> get_text_list(['a', 'b', 'c'], 'and')
|
>>> getTextList(['a', 'b', 'c'], 'and')
|
||||||
'a, b and c'
|
'a, b and c'
|
||||||
>>> get_text_list(['a', 'b'], 'and')
|
>>> getTextList(['a', 'b'], 'and')
|
||||||
'a and b'
|
'a and b'
|
||||||
>>> get_text_list(['a'])
|
>>> getTextList(['a'])
|
||||||
'a'
|
'a'
|
||||||
>>> get_text_list([])
|
>>> getTextList([])
|
||||||
''
|
''
|
||||||
"""
|
"""
|
||||||
if len(list_) == 0: return ''
|
if len(list_) == 0: return ''
|
||||||
|
@ -164,8 +178,8 @@ def smartSplit(text):
|
||||||
Supports both single and double quotes, and supports escaping quotes with
|
Supports both single and double quotes, and supports escaping quotes with
|
||||||
backslashes. In the output, strings will keep their initial and trailing
|
backslashes. In the output, strings will keep their initial and trailing
|
||||||
quote marks.
|
quote marks.
|
||||||
>>> list(smart_split('This is "a person\'s" test.'))
|
>>> list(smartSplit('This is "a person\\'s" test.'))
|
||||||
['This', 'is', '"a person\'s"', 'test.']
|
['This', 'is', '"a person\\'s"', 'test.']
|
||||||
"""
|
"""
|
||||||
for bit in smart_split_re.finditer(text):
|
for bit in smart_split_re.finditer(text):
|
||||||
bit = bit.group(0)
|
bit = bit.group(0)
|
||||||
|
|
Loading…
Reference in a new issue