diff --git a/README b/README index 728d6dc..fd7ca1a 100644 --- a/README +++ b/README @@ -16,3 +16,6 @@ Usage: oxutils.formatBytes(1234567890) '1.15 GB' + +Tests: + nosetests --with-doctest oxutils diff --git a/oxutils/cache.py b/oxutils/cache.py index 086dda2..5e5169a 100644 --- a/oxutils/cache.py +++ b/oxutils/cache.py @@ -19,10 +19,22 @@ from net import DEFAULT_HEADERS cache_timeout = 30*24*60*60 # default is 30 days def status(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout): + ''' + >>> status('http://google.com') + 200 + >>> status('http://google.com/mysearch') + 404 + ''' headers = getHeaders(url, data, headers) return int(headers['status']) def exists(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout): + ''' + >>> exists('http://google.com') + True + >>> exists('http://google.com/mysearch') + False + ''' s = status(url, data, headers, timeout) if s >= 200 and s < 400: return True diff --git a/oxutils/format.py b/oxutils/format.py index 8a0ae26..b24258d 100644 --- a/oxutils/format.py +++ b/oxutils/format.py @@ -7,19 +7,18 @@ def to36(q): """ Converts an integer to base 36 (a useful scheme for human-sayable IDs). - >>> to36(35) - 'z' - >>> to36(119292) - '2k1o' - >>> int(to36(939387374), 36) - 939387374 - >>> to36(0) - '0' - >>> to36(-393) - Traceback (most recent call last): - ... - ValueError: must supply a positive integer - + >>> to36(35) + 'z' + >>> to36(119292) + '2k1o' + >>> int(to36(939387374), 36) + 939387374 + >>> to36(0) + '0' + >>> to36(-393) + Traceback (most recent call last): + ... + ValueError: must supply a positive integer """ if q < 0: raise ValueError, "must supply a positive integer" letters = "0123456789abcdefghijklmnopqrstuvwxyz" @@ -34,21 +33,48 @@ def from36(q): def intValue(strValue, default=''): try: - val = re.compile('(\d*)').findall(unicode(strValue))[0] + val = re.compile('(\d+)').findall(unicode(strValue).strip())[0] except: val = default return val +def test_intValue(): + assert intValue('abc23') == '23' + assert intValue(' abc23') == '23' + assert intValue(' abc') == '' + def floatValue(strValue, default=''): try: - val = re.compile('([\d.]*)').findall(unicode(strValue))[0] + val = re.compile('([\d.]+)').findall(unicode(strValue).strip())[0] except: val = default return val +def test_floatValue(): + print "floatValue" + assert floatValue('abc23.4') == '23.4' + assert floatValue(' abc23.4') == '23.4' + assert floatValue(' abc') == '' + def formatNumber(number, longName, shortName): """ Return the number in a human-readable format (23 KB, 23.4 MB, 23.42 GB) + + >>> formatNumber(123, 'Byte', 'B') + '123 Bytes' + + >>> formatNumber(1234, 'Byte', 'B') + '1 KB' + + >>> formatNumber(1234567, 'Byte', 'B') + '1.2 MB' + + >>> formatNumber(1234567890, 'Byte', 'B') + '1.15 GB' + + >>> formatNumber(1234567890123456789, 'Byte', 'B') + '1,096.5166 PB' + """ if number < 1024: return '%s %s%s' % (formatThousands(number), longName, number != 1 and 's' or '') @@ -61,6 +87,13 @@ def formatNumber(number, longName, shortName): def formatThousands(number, separator = ','): """ Return the number with separators (1,000,000) + + >>> formatThousands(1) + '1' + >>> formatThousands(1000) + '1,000' + >>> formatThousands(1000000) + '1,000,000' """ string = str(number).split('.') l = [] @@ -81,13 +114,29 @@ def formatPixels(number): return formatNumber(number, 'pixel', 'px') def plural(amount, unit, plural='s'): + ''' + >>> plural(1, 'unit') + '1 unit' + >>> plural(2, 'unit') + '2 units' + ''' if abs(amount) != 1: if plural == 's': unit = unit + plural else: unit = plural - return "%s %s" % (formatNumber(amount), unit) + return "%s %s" % (formatThousands(amount), unit) def ms2runtime(ms): + ''' + >>> ms2runtime(5000) + '5 seconds' + >>> ms2runtime(500000) + '8 minutes 20 seconds' + >>> ms2runtime(50000000) + '13 hours 53 minutes 20 seconds' + >>> ms2runtime(50000000-20000) + '13 hours 53 minutes' + ''' seconds = int(ms / 1000) years = 0 days = 0 @@ -111,6 +160,14 @@ def ms2runtime(ms): return " ".join(runtimeString).strip() def ms2playtime(ms): + ''' + >>> ms2playtime(5000) + '00:05' + >>> ms2playtime(500000) + '08:20' + >>> ms2playtime(50000000) + '13:53:20' + ''' it = int(ms / 1000) ms = ms - it*1000 ss = it % 60 @@ -123,6 +180,10 @@ def ms2playtime(ms): return playtime def ms2time(ms): + ''' + >>> ms2time(44592123) + '12:23:12.123' + ''' it = int(ms / 1000) ms = ms - it*1000 ss = it % 60 @@ -131,6 +192,10 @@ def ms2time(ms): return "%d:%02d:%02d.%03d" % (hh, mm, ss, ms) def time2ms(timeString): + ''' + >>> time2ms('12:23:12.123') + 44592123 + ''' ms = 0.0 p = timeString.split(':') for i in range(len(p)): diff --git a/oxutils/html.py b/oxutils/html.py index 6ed63f1..818b498 100644 --- a/oxutils/html.py +++ b/oxutils/html.py @@ -26,20 +26,32 @@ trailing_empty_content_re = re.compile(r'(?:

(?: |\s|
)*?

\s*)+\ del x # Temporary variable def escape(html): - "Returns the given HTML with ampersands, quotes and carets encoded" + ''' + Returns the given HTML with ampersands, quotes and carets encoded + + >>> escape('html "test" & ') + 'html "test" & <brothers>' + ''' if not isinstance(html, basestring): html = str(html) return html.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"').replace("'", ''') def linebreaks(value): - "Converts newlines into

and
s" + ''' + Converts newlines into

and
+ ''' value = re.sub(r'\r\n|\r|\n', '\n', value) # normalize newlines paras = re.split('\n{2,}', value) paras = ['

%s

' % p.strip().replace('\n', '
') for p in paras] return '\n\n'.join(paras) def stripTags(value): - "Returns the given HTML with all tags stripped" + """ + Returns the given HTML with all tags stripped + + >>> stripTags('some

title

') + 'some title asdfasdf' + """ return re.sub(r'<[^>]*?>', '', value) def stripSpacesBetweenTags(value): @@ -122,7 +134,11 @@ def cleanHtml(text): charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?') def decodeHtml(html): - return htmldecode(html) + """ + >>> decodeHtml('me & you and $&%') + u'me & you and $&%' + """ + return htmldecode(html) def htmldecode(text): """Decode HTML entities in the given text.""" @@ -145,6 +161,10 @@ def htmldecode(text): return charrefpat.sub(entitydecode, text).replace(u'\xa0', ' ') def highlight(text, query, hlClass="hl"): + """ + >>> highlight('me & you and $&%', 'and') + 'me & you and $&%' + """ if query: text = text.replace('
', '|') query = re.escape(query).replace('\ ', '.') diff --git a/oxutils/normalize.py b/oxutils/normalize.py index 8ad69e0..4a5d05c 100644 --- a/oxutils/normalize.py +++ b/oxutils/normalize.py @@ -18,7 +18,11 @@ for article in _articles: _spArticles.append(article) def canonicalTitle(title): - """Return the title in the canonic format 'Movie Title, The'.""" + """Return the title in the canonic format 'Movie Title, The'. + + >>> canonicalTitle('The Movie Title') + 'Movie Title, The' + """ try: if _articlesDict.has_key(title.split(', ')[-1].lower()): return title except IndexError: pass @@ -43,7 +47,11 @@ def canonicalTitle(title): return title def normalizeTitle(title): - """Return the title in the normal "The Title" format.""" + """Return the title in the normal "The Title" format. + + >>> normalizeTitle('Movie Title, The') + 'The Movie Title' + """ stitle = title.split(', ') if len(stitle) > 1 and _articlesDict.has_key(stitle[-1].lower()): sep = ' ' @@ -52,6 +60,15 @@ def normalizeTitle(title): return title def normalizeImdbId(imdbId): + """Return 7 digit imdbId. + + >>> normalizeImdbId('http://www.imdb.com/title/tt0159206/') + '0159206' + >>> normalizeImdbId(159206) + '0159206' + >>> normalizeImdbId('tt0159206') + '0159206' + """ if isinstance(imdbId, basestring): imdbId = re.sub('.*(\d{7}).*', '\\1', imdbId) elif isinstance(imdbId, int): diff --git a/oxutils/text.py b/oxutils/text.py index 157e6c5..ed83290 100644 --- a/oxutils/text.py +++ b/oxutils/text.py @@ -66,7 +66,14 @@ def wrap(text, width): ) def truncateString(s, num): - "Truncates a string after a certain number of chacters, but ends with a word" + """Truncates a string after a certain number of chacters, but ends with a word + + >>> truncateString('Truncates a string after a certain number of chacters, but ends with a word', 23) + 'Truncates a string...' + >>> truncateString('Truncates a string', 23) + 'Truncates a string' + + """ length = int(num) if len(s) <= length: return s @@ -76,10 +83,16 @@ def truncateString(s, num): ts += " " + words.pop(0) if words: ts += "..." - return ts + return ts.strip() def trimString(string, num): - "Truncates a string after a certain number of chacters, adding ... at -10 characters" + """Truncates a string after a certain number of chacters, adding ... at -10 characters + + >>> trimString('Truncates a string after a certain number of chacters', 23) + 'Truncates ...f chacters' + >>> trimString('Truncates a string', 23) + 'Truncates a string' + """ if len(string) > num: string = string[:num - 13] + '...' + string[-10:] return string @@ -99,8 +112,9 @@ def getValidFilename(s): Returns the given string converted to a string that can be used for a clean filename. Specifically, leading and trailing spaces are removed; all non-filename-safe characters are removed. - >>> get_valid_filename("john's portrait in 2004.jpg") - 'john_s portrait in 2004.jpg' + + >>> getValidFilename("john's portrait in 2004.jpg") + 'john_s_portrait_in_2004.jpg' """ s = s.strip() s = s.replace(' ', '_') @@ -110,15 +124,15 @@ def getValidFilename(s): def getTextList(list_, last_word='or'): """ - >>> get_text_list(['a', 'b', 'c', 'd']) + >>> getTextList(['a', 'b', 'c', 'd']) 'a, b, c or d' - >>> get_text_list(['a', 'b', 'c'], 'and') + >>> getTextList(['a', 'b', 'c'], 'and') 'a, b and c' - >>> get_text_list(['a', 'b'], 'and') + >>> getTextList(['a', 'b'], 'and') 'a and b' - >>> get_text_list(['a']) + >>> getTextList(['a']) 'a' - >>> get_text_list([]) + >>> getTextList([]) '' """ if len(list_) == 0: return '' @@ -164,8 +178,8 @@ def smartSplit(text): Supports both single and double quotes, and supports escaping quotes with backslashes. In the output, strings will keep their initial and trailing quote marks. - >>> list(smart_split('This is "a person\'s" test.')) - ['This', 'is', '"a person\'s"', 'test.'] + >>> list(smartSplit('This is "a person\\'s" test.')) + ['This', 'is', '"a person\\'s"', 'test.'] """ for bit in smart_split_re.finditer(text): bit = bit.group(0)