diff --git a/README b/README
index 728d6dc..fd7ca1a 100644
--- a/README
+++ b/README
@@ -16,3 +16,6 @@ Usage:
oxutils.formatBytes(1234567890)
'1.15 GB'
+
+Tests:
+ nosetests --with-doctest oxutils
diff --git a/oxutils/cache.py b/oxutils/cache.py
index 086dda2..5e5169a 100644
--- a/oxutils/cache.py
+++ b/oxutils/cache.py
@@ -19,10 +19,22 @@ from net import DEFAULT_HEADERS
cache_timeout = 30*24*60*60 # default is 30 days
def status(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
+ '''
+ >>> status('http://google.com')
+ 200
+ >>> status('http://google.com/mysearch')
+ 404
+ '''
headers = getHeaders(url, data, headers)
return int(headers['status'])
def exists(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
+ '''
+ >>> exists('http://google.com')
+ True
+ >>> exists('http://google.com/mysearch')
+ False
+ '''
s = status(url, data, headers, timeout)
if s >= 200 and s < 400:
return True
diff --git a/oxutils/format.py b/oxutils/format.py
index 8a0ae26..b24258d 100644
--- a/oxutils/format.py
+++ b/oxutils/format.py
@@ -7,19 +7,18 @@ def to36(q):
"""
Converts an integer to base 36 (a useful scheme for human-sayable IDs).
- >>> to36(35)
- 'z'
- >>> to36(119292)
- '2k1o'
- >>> int(to36(939387374), 36)
- 939387374
- >>> to36(0)
- '0'
- >>> to36(-393)
- Traceback (most recent call last):
- ...
- ValueError: must supply a positive integer
-
+ >>> to36(35)
+ 'z'
+ >>> to36(119292)
+ '2k1o'
+ >>> int(to36(939387374), 36)
+ 939387374
+ >>> to36(0)
+ '0'
+ >>> to36(-393)
+ Traceback (most recent call last):
+ ...
+ ValueError: must supply a positive integer
"""
if q < 0: raise ValueError, "must supply a positive integer"
letters = "0123456789abcdefghijklmnopqrstuvwxyz"
@@ -34,21 +33,48 @@ def from36(q):
def intValue(strValue, default=''):
try:
- val = re.compile('(\d*)').findall(unicode(strValue))[0]
+ val = re.compile('(\d+)').findall(unicode(strValue).strip())[0]
except:
val = default
return val
+def test_intValue():
+ assert intValue('abc23') == '23'
+ assert intValue(' abc23') == '23'
+ assert intValue(' abc') == ''
+
def floatValue(strValue, default=''):
try:
- val = re.compile('([\d.]*)').findall(unicode(strValue))[0]
+ val = re.compile('([\d.]+)').findall(unicode(strValue).strip())[0]
except:
val = default
return val
+def test_floatValue():
+ print "floatValue"
+ assert floatValue('abc23.4') == '23.4'
+ assert floatValue(' abc23.4') == '23.4'
+ assert floatValue(' abc') == ''
+
def formatNumber(number, longName, shortName):
"""
Return the number in a human-readable format (23 KB, 23.4 MB, 23.42 GB)
+
+ >>> formatNumber(123, 'Byte', 'B')
+ '123 Bytes'
+
+ >>> formatNumber(1234, 'Byte', 'B')
+ '1 KB'
+
+ >>> formatNumber(1234567, 'Byte', 'B')
+ '1.2 MB'
+
+ >>> formatNumber(1234567890, 'Byte', 'B')
+ '1.15 GB'
+
+ >>> formatNumber(1234567890123456789, 'Byte', 'B')
+ '1,096.5166 PB'
+
"""
if number < 1024:
return '%s %s%s' % (formatThousands(number), longName, number != 1 and 's' or '')
@@ -61,6 +87,13 @@ def formatNumber(number, longName, shortName):
def formatThousands(number, separator = ','):
"""
Return the number with separators (1,000,000)
+
+ >>> formatThousands(1)
+ '1'
+ >>> formatThousands(1000)
+ '1,000'
+ >>> formatThousands(1000000)
+ '1,000,000'
"""
string = str(number).split('.')
l = []
@@ -81,13 +114,29 @@ def formatPixels(number):
return formatNumber(number, 'pixel', 'px')
def plural(amount, unit, plural='s'):
+ '''
+ >>> plural(1, 'unit')
+ '1 unit'
+ >>> plural(2, 'unit')
+ '2 units'
+ '''
if abs(amount) != 1:
if plural == 's':
unit = unit + plural
else: unit = plural
- return "%s %s" % (formatNumber(amount), unit)
+ return "%s %s" % (formatThousands(amount), unit)
def ms2runtime(ms):
+ '''
+ >>> ms2runtime(5000)
+ '5 seconds'
+ >>> ms2runtime(500000)
+ '8 minutes 20 seconds'
+ >>> ms2runtime(50000000)
+ '13 hours 53 minutes 20 seconds'
+ >>> ms2runtime(50000000-20000)
+ '13 hours 53 minutes'
+ '''
seconds = int(ms / 1000)
years = 0
days = 0
@@ -111,6 +160,14 @@ def ms2runtime(ms):
return " ".join(runtimeString).strip()
def ms2playtime(ms):
+ '''
+ >>> ms2playtime(5000)
+ '00:05'
+ >>> ms2playtime(500000)
+ '08:20'
+ >>> ms2playtime(50000000)
+ '13:53:20'
+ '''
it = int(ms / 1000)
ms = ms - it*1000
ss = it % 60
@@ -123,6 +180,10 @@ def ms2playtime(ms):
return playtime
def ms2time(ms):
+ '''
+ >>> ms2time(44592123)
+ '12:23:12.123'
+ '''
it = int(ms / 1000)
ms = ms - it*1000
ss = it % 60
@@ -131,6 +192,10 @@ def ms2time(ms):
return "%d:%02d:%02d.%03d" % (hh, mm, ss, ms)
def time2ms(timeString):
+ '''
+ >>> time2ms('12:23:12.123')
+ 44592123
+ '''
ms = 0.0
p = timeString.split(':')
for i in range(len(p)):
diff --git a/oxutils/html.py b/oxutils/html.py
index 6ed63f1..818b498 100644
--- a/oxutils/html.py
+++ b/oxutils/html.py
@@ -26,20 +26,32 @@ trailing_empty_content_re = re.compile(r'(?:
(?: |\s|
)*?
\s*)+\
del x # Temporary variable
def escape(html):
- "Returns the given HTML with ampersands, quotes and carets encoded"
+ '''
+ Returns the given HTML with ampersands, quotes and carets encoded
+
+ >>> escape('html "test" & ')
+ 'html "test" & <brothers>'
+ '''
if not isinstance(html, basestring):
html = str(html)
return html.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"').replace("'", ''')
def linebreaks(value):
- "Converts newlines into and
s"
+ '''
+ Converts newlines into
and
+ '''
value = re.sub(r'\r\n|\r|\n', '\n', value) # normalize newlines
paras = re.split('\n{2,}', value)
paras = ['
%s
' % p.strip().replace('\n', '
') for p in paras]
return '\n\n'.join(paras)
def stripTags(value):
- "Returns the given HTML with all tags stripped"
+ """
+ Returns the given HTML with all tags stripped
+
+ >>> stripTags('some title
')
+ 'some title asdfasdf'
+ """
return re.sub(r'<[^>]*?>', '', value)
def stripSpacesBetweenTags(value):
@@ -122,7 +134,11 @@ def cleanHtml(text):
charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?')
def decodeHtml(html):
- return htmldecode(html)
+ """
+ >>> decodeHtml('me & you and $&%')
+ u'me & you and $&%'
+ """
+ return htmldecode(html)
def htmldecode(text):
"""Decode HTML entities in the given text."""
@@ -145,6 +161,10 @@ def htmldecode(text):
return charrefpat.sub(entitydecode, text).replace(u'\xa0', ' ')
def highlight(text, query, hlClass="hl"):
+ """
+ >>> highlight('me & you and $&%', 'and')
+ 'me & you and $&%'
+ """
if query:
text = text.replace('
', '|')
query = re.escape(query).replace('\ ', '.')
diff --git a/oxutils/normalize.py b/oxutils/normalize.py
index 8ad69e0..4a5d05c 100644
--- a/oxutils/normalize.py
+++ b/oxutils/normalize.py
@@ -18,7 +18,11 @@ for article in _articles:
_spArticles.append(article)
def canonicalTitle(title):
- """Return the title in the canonic format 'Movie Title, The'."""
+ """Return the title in the canonic format 'Movie Title, The'.
+
+ >>> canonicalTitle('The Movie Title')
+ 'Movie Title, The'
+ """
try:
if _articlesDict.has_key(title.split(', ')[-1].lower()): return title
except IndexError: pass
@@ -43,7 +47,11 @@ def canonicalTitle(title):
return title
def normalizeTitle(title):
- """Return the title in the normal "The Title" format."""
+ """Return the title in the normal "The Title" format.
+
+ >>> normalizeTitle('Movie Title, The')
+ 'The Movie Title'
+ """
stitle = title.split(', ')
if len(stitle) > 1 and _articlesDict.has_key(stitle[-1].lower()):
sep = ' '
@@ -52,6 +60,15 @@ def normalizeTitle(title):
return title
def normalizeImdbId(imdbId):
+ """Return 7 digit imdbId.
+
+ >>> normalizeImdbId('http://www.imdb.com/title/tt0159206/')
+ '0159206'
+ >>> normalizeImdbId(159206)
+ '0159206'
+ >>> normalizeImdbId('tt0159206')
+ '0159206'
+ """
if isinstance(imdbId, basestring):
imdbId = re.sub('.*(\d{7}).*', '\\1', imdbId)
elif isinstance(imdbId, int):
diff --git a/oxutils/text.py b/oxutils/text.py
index 157e6c5..ed83290 100644
--- a/oxutils/text.py
+++ b/oxutils/text.py
@@ -66,7 +66,14 @@ def wrap(text, width):
)
def truncateString(s, num):
- "Truncates a string after a certain number of chacters, but ends with a word"
+ """Truncates a string after a certain number of chacters, but ends with a word
+
+ >>> truncateString('Truncates a string after a certain number of chacters, but ends with a word', 23)
+ 'Truncates a string...'
+ >>> truncateString('Truncates a string', 23)
+ 'Truncates a string'
+
+ """
length = int(num)
if len(s) <= length:
return s
@@ -76,10 +83,16 @@ def truncateString(s, num):
ts += " " + words.pop(0)
if words:
ts += "..."
- return ts
+ return ts.strip()
def trimString(string, num):
- "Truncates a string after a certain number of chacters, adding ... at -10 characters"
+ """Truncates a string after a certain number of chacters, adding ... at -10 characters
+
+ >>> trimString('Truncates a string after a certain number of chacters', 23)
+ 'Truncates ...f chacters'
+ >>> trimString('Truncates a string', 23)
+ 'Truncates a string'
+ """
if len(string) > num:
string = string[:num - 13] + '...' + string[-10:]
return string
@@ -99,8 +112,9 @@ def getValidFilename(s):
Returns the given string converted to a string that can be used for a clean
filename. Specifically, leading and trailing spaces are removed;
all non-filename-safe characters are removed.
- >>> get_valid_filename("john's portrait in 2004.jpg")
- 'john_s portrait in 2004.jpg'
+
+ >>> getValidFilename("john's portrait in 2004.jpg")
+ 'john_s_portrait_in_2004.jpg'
"""
s = s.strip()
s = s.replace(' ', '_')
@@ -110,15 +124,15 @@ def getValidFilename(s):
def getTextList(list_, last_word='or'):
"""
- >>> get_text_list(['a', 'b', 'c', 'd'])
+ >>> getTextList(['a', 'b', 'c', 'd'])
'a, b, c or d'
- >>> get_text_list(['a', 'b', 'c'], 'and')
+ >>> getTextList(['a', 'b', 'c'], 'and')
'a, b and c'
- >>> get_text_list(['a', 'b'], 'and')
+ >>> getTextList(['a', 'b'], 'and')
'a and b'
- >>> get_text_list(['a'])
+ >>> getTextList(['a'])
'a'
- >>> get_text_list([])
+ >>> getTextList([])
''
"""
if len(list_) == 0: return ''
@@ -164,8 +178,8 @@ def smartSplit(text):
Supports both single and double quotes, and supports escaping quotes with
backslashes. In the output, strings will keep their initial and trailing
quote marks.
- >>> list(smart_split('This is "a person\'s" test.'))
- ['This', 'is', '"a person\'s"', 'test.']
+ >>> list(smartSplit('This is "a person\\'s" test.'))
+ ['This', 'is', '"a person\\'s"', 'test.']
"""
for bit in smart_split_re.finditer(text):
bit = bit.group(0)