diff --git a/oxutils/__init__.py b/oxutils/__init__.py index e12fc38..240a5c1 100644 --- a/oxutils/__init__.py +++ b/oxutils/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# vi:si:et:sw=2:sts=2:ts=2 +# vi:si:et:sw=4:sts=4:ts=4 # 2008 from hashes import * @@ -11,7 +11,7 @@ import cache #only works if BitTornado is installed try: - from torrent import * + from torrent import * except: - pass + pass diff --git a/oxutils/cache.py b/oxutils/cache.py index c6bff37..e7146e1 100644 --- a/oxutils/cache.py +++ b/oxutils/cache.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# vi:si:et:sw=2:sts=2:ts=2 +# vi:si:et:sw=4:sts=4:ts=4 # 2008 import gzip import StringIO @@ -19,99 +19,99 @@ from net import DEFAULT_HEADERS, getEncoding cache_timeout = 30*24*60*60 # default is 30 days def status(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout): - ''' - >>> status('http://google.com') - 200 - >>> status('http://google.com/mysearch') - 404 - ''' - headers = getHeaders(url, data, headers) - return int(headers['status']) + ''' + >>> status('http://google.com') + 200 + >>> status('http://google.com/mysearch') + 404 + ''' + headers = getHeaders(url, data, headers) + return int(headers['status']) def exists(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout): - ''' - >>> exists('http://google.com') - True - >>> exists('http://google.com/mysearch') - False - ''' - s = status(url, data, headers, timeout) - if s >= 200 and s < 400: - return True - return False + ''' + >>> exists('http://google.com') + True + >>> exists('http://google.com/mysearch') + False + ''' + s = status(url, data, headers, timeout) + if s >= 200 and s < 400: + return True + return False def getHeaders(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout): - url_cache_file = "%s.headers" % getUrlCacheFile(url, data, headers) - url_headers = loadUrlCache(url_cache_file, timeout) - if url_headers: - url_headers = simplejson.loads(url_headers) - else: - url_headers = net.getHeaders(url, data, headers) - saveUrlHeaders(url_cache_file, url_headers) - return url_headers + url_cache_file = "%s.headers" % getUrlCacheFile(url, data, headers) + url_headers = loadUrlCache(url_cache_file, timeout) + if url_headers: + url_headers = simplejson.loads(url_headers) + else: + url_headers = net.getHeaders(url, data, headers) + saveUrlHeaders(url_cache_file, url_headers) + return url_headers def getUrl(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout): - url_cache_file = getUrlCacheFile(url, data, headers) - result = loadUrlCache(url_cache_file, timeout) - if not result: - try: - url_headers, result = net.getUrl(url, data, headers, returnHeaders=True) - except urllib2.HTTPError, e: - e.headers['Status'] = "%s" % e.code - url_headers = dict(e.headers) - result = e.read() - if url_headers.get('content-encoding', None) == 'gzip': - result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read() - saveUrlCache(url_cache_file, result, url_headers) - return result + url_cache_file = getUrlCacheFile(url, data, headers) + result = loadUrlCache(url_cache_file, timeout) + if not result: + try: + url_headers, result = net.getUrl(url, data, headers, returnHeaders=True) + except urllib2.HTTPError, e: + e.headers['Status'] = "%s" % e.code + url_headers = dict(e.headers) + result = e.read() + if url_headers.get('content-encoding', None) == 'gzip': + result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read() + saveUrlCache(url_cache_file, result, url_headers) + return result def getUrlUnicode(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout, _getUrl=getUrl): - data = _getUrl(url, data, headers, timeout) - encoding = getEncoding(data) - if not encoding: - encoding = 'latin-1' - return unicode(data, encoding) + data = _getUrl(url, data, headers, timeout) + encoding = getEncoding(data) + if not encoding: + encoding = 'latin-1' + return unicode(data, encoding) def getCacheBase(): - 'cache base is eather ~/.ox/cache or can set via env variable oxCACHE' - return os.environ.get('oxCACHE', os.path.expanduser('~/.ox/cache')) + 'cache base is eather ~/.ox/cache or can set via env variable oxCACHE' + return os.environ.get('oxCACHE', os.path.expanduser('~/.ox/cache')) def getUrlCacheFile(url, data=None, headers=DEFAULT_HEADERS): - if data: - url_hash = sha.sha(url + '?' + data).hexdigest() - else: - url_hash = sha.sha(url).hexdigest() - domain = ".".join(urlparse.urlparse(url)[1].split('.')[-2:]) - return os.path.join(getCacheBase(), domain, url_hash[:2], url_hash[2:4], url_hash[4:6], url_hash) + if data: + url_hash = sha.sha(url + '?' + data).hexdigest() + else: + url_hash = sha.sha(url).hexdigest() + domain = ".".join(urlparse.urlparse(url)[1].split('.')[-2:]) + return os.path.join(getCacheBase(), domain, url_hash[:2], url_hash[2:4], url_hash[4:6], url_hash) def loadUrlCache(url_cache_file, timeout=cache_timeout): - if timeout == 0: + if timeout == 0: + return None + if os.path.exists(url_cache_file): + ctime = os.stat(url_cache_file).st_ctime + now = time.mktime(time.localtime()) + file_age = now-ctime + if timeout < 0 or file_age < timeout: + f = open(url_cache_file) + data = f.read() + f.close() + return data return None - if os.path.exists(url_cache_file): - ctime = os.stat(url_cache_file).st_ctime - now = time.mktime(time.localtime()) - file_age = now-ctime - if timeout < 0 or file_age < timeout: - f = open(url_cache_file) - data = f.read() - f.close() - return data - return None def saveUrlCache(url_cache_file, data, headers): - folder = os.path.dirname(url_cache_file) - if not os.path.exists(folder): - os.makedirs(folder) - f = open(url_cache_file, 'w') - f.write(data) - f.close() - saveUrlHeaders("%s.headers" % url_cache_file, headers) + folder = os.path.dirname(url_cache_file) + if not os.path.exists(folder): + os.makedirs(folder) + f = open(url_cache_file, 'w') + f.write(data) + f.close() + saveUrlHeaders("%s.headers" % url_cache_file, headers) def saveUrlHeaders(url_cache_file, headers): - folder = os.path.dirname(url_cache_file) - if not os.path.exists(folder): - os.makedirs(folder) - f = open(url_cache_file, 'w') - f.write(simplejson.dumps(headers)) - f.close() + folder = os.path.dirname(url_cache_file) + if not os.path.exists(folder): + os.makedirs(folder) + f = open(url_cache_file, 'w') + f.write(simplejson.dumps(headers)) + f.close() diff --git a/oxutils/format.py b/oxutils/format.py index b24258d..da5eb76 100644 --- a/oxutils/format.py +++ b/oxutils/format.py @@ -1,208 +1,207 @@ # -*- coding: utf-8 -*- -# vi:si:et:sw=2:sts=2:ts=2 +# vi:si:et:sw=4:sts=4:ts=4 import math import re def to36(q): - """ - Converts an integer to base 36 (a useful scheme for human-sayable IDs). + """ + Converts an integer to base 36 (a useful scheme for human-sayable IDs). - >>> to36(35) - 'z' - >>> to36(119292) - '2k1o' - >>> int(to36(939387374), 36) - 939387374 - >>> to36(0) - '0' - >>> to36(-393) - Traceback (most recent call last): - ... - ValueError: must supply a positive integer - """ - if q < 0: raise ValueError, "must supply a positive integer" - letters = "0123456789abcdefghijklmnopqrstuvwxyz" - converted = [] - while q != 0: - q, r = divmod(q, 36) - converted.insert(0, letters[r]) - return "".join(converted) or '0' + >>> to36(35) + 'z' + >>> to36(119292) + '2k1o' + >>> int(to36(939387374), 36) + 939387374 + >>> to36(0) + '0' + >>> to36(-393) + Traceback (most recent call last): + ... + ValueError: must supply a positive integer + """ + if q < 0: raise ValueError, "must supply a positive integer" + letters = "0123456789abcdefghijklmnopqrstuvwxyz" + converted = [] + while q != 0: + q, r = divmod(q, 36) + converted.insert(0, letters[r]) + return "".join(converted) or '0' def from36(q): - return int(q, 36) + return int(q, 36) def intValue(strValue, default=''): - try: - val = re.compile('(\d+)').findall(unicode(strValue).strip())[0] - except: - val = default - return val + try: + val = re.compile('(\d+)').findall(unicode(strValue).strip())[0] + except: + val = default + return val def test_intValue(): - assert intValue('abc23') == '23' - assert intValue(' abc23') == '23' - assert intValue(' abc') == '' + assert intValue('abc23') == '23' + assert intValue(' abc23') == '23' + assert intValue(' abc') == '' def floatValue(strValue, default=''): - try: - val = re.compile('([\d.]+)').findall(unicode(strValue).strip())[0] - except: - val = default - return val + try: + val = re.compile('([\d.]+)').findall(unicode(strValue).strip())[0] + except: + val = default + return val def test_floatValue(): - print "floatValue" - assert floatValue('abc23.4') == '23.4' - assert floatValue(' abc23.4') == '23.4' - assert floatValue(' abc') == '' + assert floatValue('abc23.4') == '23.4' + assert floatValue(' abc23.4') == '23.4' + assert floatValue(' abc') == '' def formatNumber(number, longName, shortName): - """ - Return the number in a human-readable format (23 KB, 23.4 MB, 23.42 GB) - - >>> formatNumber(123, 'Byte', 'B') - '123 Bytes' + """ + Return the number in a human-readable format (23 KB, 23.4 MB, 23.42 GB) + + >>> formatNumber(123, 'Byte', 'B') + '123 Bytes' - >>> formatNumber(1234, 'Byte', 'B') - '1 KB' + >>> formatNumber(1234, 'Byte', 'B') + '1 KB' - >>> formatNumber(1234567, 'Byte', 'B') - '1.2 MB' + >>> formatNumber(1234567, 'Byte', 'B') + '1.2 MB' - >>> formatNumber(1234567890, 'Byte', 'B') - '1.15 GB' + >>> formatNumber(1234567890, 'Byte', 'B') + '1.15 GB' - >>> formatNumber(1234567890123456789, 'Byte', 'B') - '1,096.5166 PB' + >>> formatNumber(1234567890123456789, 'Byte', 'B') + '1,096.5166 PB' - """ - if number < 1024: - return '%s %s%s' % (formatThousands(number), longName, number != 1 and 's' or '') - prefix = ['K', 'M', 'G', 'T', 'P'] - for i in range(5): - if number < math.pow(1024, i + 2) or i == 4: - n = number / math.pow(1024, i + 1) - return '%s %s%s' % (formatThousands('%.*f' % (i, n)), prefix[i], shortName) + """ + if number < 1024: + return '%s %s%s' % (formatThousands(number), longName, number != 1 and 's' or '') + prefix = ['K', 'M', 'G', 'T', 'P'] + for i in range(5): + if number < math.pow(1024, i + 2) or i == 4: + n = number / math.pow(1024, i + 1) + return '%s %s%s' % (formatThousands('%.*f' % (i, n)), prefix[i], shortName) def formatThousands(number, separator = ','): - """ - Return the number with separators (1,000,000) - - >>> formatThousands(1) - '1' - >>> formatThousands(1000) - '1,000' - >>> formatThousands(1000000) - '1,000,000' - """ - string = str(number).split('.') - l = [] - for i, character in enumerate(reversed(string[0])): - if i and (not (i % 3)): - l.insert(0, separator) - l.insert(0, character) - string[0] = ''.join(l) - return '.'.join(string) + """ + Return the number with separators (1,000,000) + + >>> formatThousands(1) + '1' + >>> formatThousands(1000) + '1,000' + >>> formatThousands(1000000) + '1,000,000' + """ + string = str(number).split('.') + l = [] + for i, character in enumerate(reversed(string[0])): + if i and (not (i % 3)): + l.insert(0, separator) + l.insert(0, character) + string[0] = ''.join(l) + return '.'.join(string) def formatBits(number): - return formatNumber(number, 'bit', 'b') + return formatNumber(number, 'bit', 'b') def formatBytes(number): - return formatNumber(number, 'byte', 'B') + return formatNumber(number, 'byte', 'B') def formatPixels(number): - return formatNumber(number, 'pixel', 'px') + return formatNumber(number, 'pixel', 'px') def plural(amount, unit, plural='s'): - ''' - >>> plural(1, 'unit') - '1 unit' - >>> plural(2, 'unit') - '2 units' - ''' - if abs(amount) != 1: - if plural == 's': - unit = unit + plural - else: unit = plural - return "%s %s" % (formatThousands(amount), unit) + ''' + >>> plural(1, 'unit') + '1 unit' + >>> plural(2, 'unit') + '2 units' + ''' + if abs(amount) != 1: + if plural == 's': + unit = unit + plural + else: unit = plural + return "%s %s" % (formatThousands(amount), unit) def ms2runtime(ms): - ''' - >>> ms2runtime(5000) - '5 seconds' - >>> ms2runtime(500000) - '8 minutes 20 seconds' - >>> ms2runtime(50000000) - '13 hours 53 minutes 20 seconds' - >>> ms2runtime(50000000-20000) - '13 hours 53 minutes' - ''' - seconds = int(ms / 1000) - years = 0 - days = 0 - hours = 0 - minutes = 0 - if seconds >= 60: - minutes = int(seconds / 60) - seconds = seconds % 60 - if minutes >= 60: - hours = int(minutes / 60) - minutes = minutes % 60 - if hours >= 24: - days = int(hours / 24) - hours = hours % 24 - if days >= 365: - years = int(days / 365) - days = days % 365 - runtimeString = (plural(years, 'year'), plural(days, 'day'), - plural(hours,'hour'), plural(minutes, 'minute'), plural(seconds, 'second')) - runtimeString = filter(lambda x: not x.startswith('0'), runtimeString) - return " ".join(runtimeString).strip() + ''' + >>> ms2runtime(5000) + '5 seconds' + >>> ms2runtime(500000) + '8 minutes 20 seconds' + >>> ms2runtime(50000000) + '13 hours 53 minutes 20 seconds' + >>> ms2runtime(50000000-20000) + '13 hours 53 minutes' + ''' + seconds = int(ms / 1000) + years = 0 + days = 0 + hours = 0 + minutes = 0 + if seconds >= 60: + minutes = int(seconds / 60) + seconds = seconds % 60 + if minutes >= 60: + hours = int(minutes / 60) + minutes = minutes % 60 + if hours >= 24: + days = int(hours / 24) + hours = hours % 24 + if days >= 365: + years = int(days / 365) + days = days % 365 + runtimeString = (plural(years, 'year'), plural(days, 'day'), + plural(hours,'hour'), plural(minutes, 'minute'), plural(seconds, 'second')) + runtimeString = filter(lambda x: not x.startswith('0'), runtimeString) + return " ".join(runtimeString).strip() def ms2playtime(ms): - ''' - >>> ms2playtime(5000) - '00:05' - >>> ms2playtime(500000) - '08:20' - >>> ms2playtime(50000000) - '13:53:20' - ''' - it = int(ms / 1000) - ms = ms - it*1000 - ss = it % 60 - mm = ((it-ss)/60) % 60 - hh = ((it-(mm*60)-ss)/3600) % 60 - if hh: - playtime= "%02d:%02d:%02d" % (hh, mm, ss) - else: - playtime= "%02d:%02d" % (mm, ss) - return playtime + ''' + >>> ms2playtime(5000) + '00:05' + >>> ms2playtime(500000) + '08:20' + >>> ms2playtime(50000000) + '13:53:20' + ''' + it = int(ms / 1000) + ms = ms - it*1000 + ss = it % 60 + mm = ((it-ss)/60) % 60 + hh = ((it-(mm*60)-ss)/3600) % 60 + if hh: + playtime= "%02d:%02d:%02d" % (hh, mm, ss) + else: + playtime= "%02d:%02d" % (mm, ss) + return playtime def ms2time(ms): - ''' - >>> ms2time(44592123) - '12:23:12.123' - ''' - it = int(ms / 1000) - ms = ms - it*1000 - ss = it % 60 - mm = ((it-ss)/60) % 60 - hh = ((it-(mm*60)-ss)/3600) % 60 - return "%d:%02d:%02d.%03d" % (hh, mm, ss, ms) + ''' + >>> ms2time(44592123) + '12:23:12.123' + ''' + it = int(ms / 1000) + ms = ms - it*1000 + ss = it % 60 + mm = ((it-ss)/60) % 60 + hh = ((it-(mm*60)-ss)/3600) % 60 + return "%d:%02d:%02d.%03d" % (hh, mm, ss, ms) def time2ms(timeString): - ''' - >>> time2ms('12:23:12.123') - 44592123 - ''' - ms = 0.0 - p = timeString.split(':') - for i in range(len(p)): - ms = ms * 60 + float(p[i]) - return int(ms * 1000) + ''' + >>> time2ms('12:23:12.123') + 44592123 + ''' + ms = 0.0 + p = timeString.split(':') + for i in range(len(p)): + ms = ms * 60 + float(p[i]) + return int(ms * 1000) def shiftTime(offset, timeString): - newTime = time2ms(timeString) + offset - return ms2time(newTime) + newTime = time2ms(timeString) + offset + return ms2time(newTime) diff --git a/oxutils/hashes.py b/oxutils/hashes.py index 800c104..4d03684 100644 --- a/oxutils/hashes.py +++ b/oxutils/hashes.py @@ -1,17 +1,17 @@ # -*- coding: utf-8 -*- -# vi:si:et:sw=2:sts=2:ts=2 +# vi:si:et:sw=4:sts=4:ts=4 # GPL written 2008 by j@pad.ma import sha import os def sha1sum(filename): - sha1 = sha.new() - file=open(filename) - buffer=file.read(4096) - while buffer: - sha1.update(buffer) + sha1 = sha.new() + file=open(filename) buffer=file.read(4096) - file.close() - return sha1.hexdigest() + while buffer: + sha1.update(buffer) + buffer=file.read(4096) + file.close() + return sha1.hexdigest() diff --git a/oxutils/html.py b/oxutils/html.py index 52be5ff..afceafb 100644 --- a/oxutils/html.py +++ b/oxutils/html.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# vi:si:et:sw=2:sts=2:ts=2 +# vi:si:et:sw=4:sts=4:ts=4 # GPL written 2008 by j@pad.ma import re import string @@ -26,147 +26,147 @@ trailing_empty_content_re = re.compile(r'(?:

(?: |\s|
)*?

\s*)+\ del x # Temporary variable def escape(html): - ''' - Returns the given HTML with ampersands, quotes and carets encoded + ''' + Returns the given HTML with ampersands, quotes and carets encoded - >>> escape('html "test" & ') - 'html "test" & <brothers>' - ''' - if not isinstance(html, basestring): - html = str(html) - return html.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"').replace("'", ''') + >>> escape('html "test" & ') + 'html "test" & <brothers>' + ''' + if not isinstance(html, basestring): + html = str(html) + return html.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"').replace("'", ''') def linebreaks(value): - ''' - Converts newlines into

and
- ''' - value = re.sub(r'\r\n|\r|\n', '\n', value) # normalize newlines - paras = re.split('\n{2,}', value) - paras = ['

%s

' % p.strip().replace('\n', '
') for p in paras] - return '\n\n'.join(paras) + ''' + Converts newlines into

and
+ ''' + value = re.sub(r'\r\n|\r|\n', '\n', value) # normalize newlines + paras = re.split('\n{2,}', value) + paras = ['

%s

' % p.strip().replace('\n', '
') for p in paras] + return '\n\n'.join(paras) def stripTags(value): - """ - Returns the given HTML with all tags stripped - - >>> stripTags('some

title

') - 'some title asdfasdf' - """ - return re.sub(r'<[^>]*?>', '', value) + """ + Returns the given HTML with all tags stripped + + >>> stripTags('some

title

') + 'some title asdfasdf' + """ + return re.sub(r'<[^>]*?>', '', value) def stripSpacesBetweenTags(value): - "Returns the given HTML with spaces between tags normalized to a single space" - return re.sub(r'>\s+<', '> <', value) + "Returns the given HTML with spaces between tags normalized to a single space" + return re.sub(r'>\s+<', '> <', value) def stripEntities(value): - "Returns the given HTML with all entities (&something;) stripped" - return re.sub(r'&(?:\w+|#\d);', '', value) + "Returns the given HTML with all entities (&something;) stripped" + return re.sub(r'&(?:\w+|#\d);', '', value) def fixAmpersands(value): - "Returns the given HTML with all unencoded ampersands encoded correctly" - return unencoded_ampersands_re.sub('&', value) + "Returns the given HTML with all unencoded ampersands encoded correctly" + return unencoded_ampersands_re.sub('&', value) def urlize(text, trim_url_limit=None, nofollow=False): - """ - Converts any URLs in text into clickable links. Works on http://, https:// and - www. links. Links can have trailing punctuation (periods, commas, close-parens) - and leading punctuation (opening parens) and it'll still do the right thing. + """ + Converts any URLs in text into clickable links. Works on http://, https:// and + www. links. Links can have trailing punctuation (periods, commas, close-parens) + and leading punctuation (opening parens) and it'll still do the right thing. - If trim_url_limit is not None, the URLs in link text will be limited to - trim_url_limit characters. + If trim_url_limit is not None, the URLs in link text will be limited to + trim_url_limit characters. - If nofollow is True, the URLs in link text will get a rel="nofollow" attribute. - """ - trim_url = lambda x, limit=trim_url_limit: limit is not None and (x[:limit] + (len(x) >=limit and '...' or '')) or x - words = word_split_re.split(text) - nofollow_attr = nofollow and ' rel="nofollow"' or '' - for i, word in enumerate(words): - match = punctuation_re.match(word) - if match: - lead, middle, trail = match.groups() - if middle.startswith('www.') or ('@' not in middle and not middle.startswith('http://') and \ - len(middle) > 0 and middle[0] in string.letters + string.digits and \ - (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))): - middle = '%s' % (middle, nofollow_attr, trim_url(middle)) - if middle.startswith('http://') or middle.startswith('https://'): - middle = '%s' % (middle, nofollow_attr, trim_url(middle)) - if '@' in middle and not middle.startswith('www.') and not ':' in middle \ - and simple_email_re.match(middle): - middle = '%s' % (middle, middle) - if lead + middle + trail != word: - words[i] = lead + middle + trail - return ''.join(words) + If nofollow is True, the URLs in link text will get a rel="nofollow" attribute. + """ + trim_url = lambda x, limit=trim_url_limit: limit is not None and (x[:limit] + (len(x) >=limit and '...' or '')) or x + words = word_split_re.split(text) + nofollow_attr = nofollow and ' rel="nofollow"' or '' + for i, word in enumerate(words): + match = punctuation_re.match(word) + if match: + lead, middle, trail = match.groups() + if middle.startswith('www.') or ('@' not in middle and not middle.startswith('http://') and \ + len(middle) > 0 and middle[0] in string.letters + string.digits and \ + (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))): + middle = '%s' % (middle, nofollow_attr, trim_url(middle)) + if middle.startswith('http://') or middle.startswith('https://'): + middle = '%s' % (middle, nofollow_attr, trim_url(middle)) + if '@' in middle and not middle.startswith('www.') and not ':' in middle \ + and simple_email_re.match(middle): + middle = '%s' % (middle, middle) + if lead + middle + trail != word: + words[i] = lead + middle + trail + return ''.join(words) def cleanHtml(text): - """ - Cleans the given HTML. Specifically, it does the following: - * Converts and to and . - * Encodes all ampersands correctly. - * Removes all "target" attributes from tags. - * Removes extraneous HTML, such as presentational tags that open and - immediately close and
. - * Converts hard-coded bullets into HTML unordered lists. - * Removes stuff like "

  

", but only if it's at the - bottom of the text. - """ - from text import normalizeNewlines - text = normalizeNewlines(text) - text = re.sub(r'<(/?)\s*b\s*>', '<\\1strong>', text) - text = re.sub(r'<(/?)\s*i\s*>', '<\\1em>', text) - text = fixAmpersands(text) - # Remove all target="" attributes from
tags. - text = link_target_attribute_re.sub('\\1', text) - # Trim stupid HTML such as
. - text = html_gunk_re.sub('', text) - # Convert hard-coded bullets into HTML unordered lists. - def replace_p_tags(match): - s = match.group().replace('

', '') - for d in DOTS: - s = s.replace('

%s' % d, '

  • ') - return '
      \n%s\n
    ' % s - text = hard_coded_bullets_re.sub(replace_p_tags, text) - # Remove stuff like "

      

    ", but only if it's at the bottom of the text. - text = trailing_empty_content_re.sub('', text) - return text + """ + Cleans the given HTML. Specifically, it does the following: + * Converts and to and . + * Encodes all ampersands correctly. + * Removes all "target" attributes from
    tags. + * Removes extraneous HTML, such as presentational tags that open and + immediately close and
    . + * Converts hard-coded bullets into HTML unordered lists. + * Removes stuff like "

      

    ", but only if it's at the + bottom of the text. + """ + from text import normalizeNewlines + text = normalizeNewlines(text) + text = re.sub(r'<(/?)\s*b\s*>', '<\\1strong>', text) + text = re.sub(r'<(/?)\s*i\s*>', '<\\1em>', text) + text = fixAmpersands(text) + # Remove all target="" attributes from
    tags. + text = link_target_attribute_re.sub('\\1', text) + # Trim stupid HTML such as
    . + text = html_gunk_re.sub('', text) + # Convert hard-coded bullets into HTML unordered lists. + def replace_p_tags(match): + s = match.group().replace('

    ', '
  • ') + for d in DOTS: + s = s.replace('

    %s' % d, '

  • ') + return '
      \n%s\n
    ' % s + text = hard_coded_bullets_re.sub(replace_p_tags, text) + # Remove stuff like "

      

    ", but only if it's at the bottom of the text. + text = trailing_empty_content_re.sub('', text) + return text # This pattern matches a character entity reference (a decimal numeric # references, a hexadecimal numeric reference, or a named reference). charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?') def decodeHtml(html): - """ - >>> decodeHtml('me & you and $&%') - u'me & you and $&%' - """ - if type(html) != unicode: - html = unicode(html)[:] - if type(html) is unicode: - uchr = unichr - else: - uchr = lambda value: value > 255 and unichr(value) or chr(value) - def entitydecode(match, uchr=uchr): - entity = match.group(1) - if entity.startswith('#x'): - return uchr(int(entity[2:], 16)) - elif entity.startswith('#'): - return uchr(int(entity[1:])) - elif entity in name2codepoint: - return uchr(name2codepoint[entity]) + """ + >>> decodeHtml('me & you and $&%') + u'me & you and $&%' + """ + if type(html) != unicode: + html = unicode(html)[:] + if type(html) is unicode: + uchr = unichr else: - return match.group(0) - return charrefpat.sub(entitydecode, html).replace(u'\xa0', ' ') + uchr = lambda value: value > 255 and unichr(value) or chr(value) + def entitydecode(match, uchr=uchr): + entity = match.group(1) + if entity.startswith('#x'): + return uchr(int(entity[2:], 16)) + elif entity.startswith('#'): + return uchr(int(entity[1:])) + elif entity in name2codepoint: + return uchr(name2codepoint[entity]) + else: + return match.group(0) + return charrefpat.sub(entitydecode, html).replace(u'\xa0', ' ') def highlight(text, query, hlClass="hl"): - """ - >>> highlight('me & you and $&%', 'and') - 'me & you and $&%' - """ - if query: - text = text.replace('
    ', '|') - query = re.escape(query).replace('\ ', '.') - m = re.compile("(%s)" % query, re.IGNORECASE).findall(text) - for i in m: - text = re.sub("(%s)" % re.escape(i).replace('\ ', '.'), '\\1' % hlClass, text) - text = text.replace('|', '
    ') - return text + """ + >>> highlight('me & you and $&%', 'and') + 'me & you and $&%' + """ + if query: + text = text.replace('
    ', '|') + query = re.escape(query).replace('\ ', '.') + m = re.compile("(%s)" % query, re.IGNORECASE).findall(text) + for i in m: + text = re.sub("(%s)" % re.escape(i).replace('\ ', '.'), '\\1' % hlClass, text) + text = text.replace('|', '
    ') + return text diff --git a/oxutils/lang.py b/oxutils/lang.py index 10d0637..964ac9f 100644 --- a/oxutils/lang.py +++ b/oxutils/lang.py @@ -1,236 +1,236 @@ # -*- coding: utf-8 -*- -# vi:si:et:sw=2:sts=2:ts=2 +# vi:si:et:sw=4:sts=4:ts=4 _iso639_languages = [ - ("Unknown", "", "", "und"), - ("Afar", "", "aa", "aar"), - ("Abkhazian", "", "ab", "abk"), - ("Afrikaans", "", "af", "afr"), - ("Akan", "", "ak", "aka"), - ("Albanian", "", "sq", "sqi"), - ("Amharic", "", "am", "amh"), - ("Arabic", "", "ar", "ara"), - ("Aragonese", "", "an", "arg"), - ("Armenian", "", "hy", "hye"), - ("Assamese", "", "as", "asm"), - ("Avaric", "", "av", "ava"), - ("Avestan", "", "ae", "ave"), - ("Aymara", "", "ay", "aym"), - ("Azerbaijani", "", "az", "aze"), - ("Bashkir", "", "ba", "bak"), - ("Bambara", "", "bm", "bam"), - ("Basque", "", "eu", "eus"), - ("Belarusian", "", "be", "bel"), - ("Bengali", "", "bn", "ben"), - ("Bihari", "", "bh", "bih"), - ("Bislama", "", "bi", "bis"), - ("Bosnian", "", "bs", "bos"), - ("Breton", "", "br", "bre"), - ("Bulgarian", "", "bg", "bul"), - ("Burmese", "", "my", "mya"), - ("Catalan", "", "ca", "cat"), - ("Chamorro", "", "ch", "cha"), - ("Chechen", "", "ce", "che"), - ("Chinese", "", "zh", "zho"), - ("Church Slavic", "", "cu", "chu"), - ("Chuvash", "", "cv", "chv"), - ("Cornish", "", "kw", "cor"), - ("Corsican", "", "co", "cos"), - ("Cree", "", "cr", "cre"), - ("Czech", "", "cs", "ces"), - ("Danish", "Dansk", "da", "dan"), - ("Divehi", "", "dv", "div"), - ("Dutch", "Nederlands", "nl", "nld"), - ("Dzongkha", "", "dz", "dzo"), - ("English", "English", "en", "eng"), - ("Esperanto", "", "eo", "epo"), - ("Estonian", "", "et", "est"), - ("Ewe", "", "ee", "ewe"), - ("Faroese", "", "fo", "fao"), - ("Fijian", "", "fj", "fij"), - ("Finnish", "Suomi", "fi", "fin"), - ("French", "Francais", "fr", "fra"), - ("Western Frisian", "", "fy", "fry"), - ("Fulah", "", "ff", "ful"), - ("Georgian", "", "ka", "kat"), - ("German", "Deutsch", "de", "deu"), - ("Gaelic (Scots)", "", "gd", "gla"), - ("Irish", "", "ga", "gle"), - ("Galician", "", "gl", "glg"), - ("Manx", "", "gv", "glv"), - ("Greek, Modern", "", "el", "ell"), - ("Guarani", "", "gn", "grn"), - ("Gujarati", "", "gu", "guj"), - ("Haitian", "", "ht", "hat"), - ("Hausa", "", "ha", "hau"), - ("Hebrew", "", "he", "heb"), - ("Herero", "", "hz", "her"), - ("Hindi", "", "hi", "hin"), - ("Hiri Motu", "", "ho", "hmo"), - ("Hungarian", "Magyar", "hu", "hun"), - ("Igbo", "", "ig", "ibo"), - ("Icelandic", "Islenska", "is", "isl"), - ("Ido", "", "io", "ido"), - ("Sichuan Yi", "", "ii", "iii"), - ("Inuktitut", "", "iu", "iku"), - ("Interlingue", "", "ie", "ile"), - ("Interlingua", "", "ia", "ina"), - ("Indonesian", "", "id", "ind"), - ("Inupiaq", "", "ik", "ipk"), - ("Italian", "Italiano", "it", "ita"), - ("Javanese", "", "jv", "jav"), - ("Japanese", "", "ja", "jpn"), - ("Kalaallisut (Greenlandic)", "", "kl", "kal"), - ("Kannada", "", "kn", "kan"), - ("Kashmiri", "", "ks", "kas"), - ("Kanuri", "", "kr", "kau"), - ("Kazakh", "", "kk", "kaz"), - ("Central Khmer", "", "km", "khm"), - ("Kikuyu", "", "ki", "kik"), - ("Kinyarwanda", "", "rw", "kin"), - ("Kirghiz", "", "ky", "kir"), - ("Komi", "", "kv", "kom"), - ("Kongo", "", "kg", "kon"), - ("Korean", "", "ko", "kor"), - ("Kuanyama", "", "kj", "kua"), - ("Kurdish", "", "ku", "kur"), - ("Lao", "", "lo", "lao"), - ("Latin", "", "la", "lat"), - ("Latvian", "", "lv", "lav"), - ("Limburgan", "", "li", "lim"), - ("Lingala", "", "ln", "lin"), - ("Lithuanian", "", "lt", "lit"), - ("Luxembourgish", "", "lb", "ltz"), - ("Luba-Katanga", "", "lu", "lub"), - ("Ganda", "", "lg", "lug"), - ("Macedonian", "", "mk", "mkd"), - ("Marshallese", "", "mh", "mah"), - ("Malayalam", "", "ml", "mal"), - ("Maori", "", "mi", "mri"), - ("Marathi", "", "mr", "mar"), - ("Malay", "", "ms", "msa"), - ("Malagasy", "", "mg", "mlg"), - ("Maltese", "", "mt", "mlt"), - ("Moldavian", "", "mo", "mol"), - ("Mongolian", "", "mn", "mon"), - ("Nauru", "", "na", "nau"), - ("Navajo", "", "nv", "nav"), - ("Ndebele, South", "", "nr", "nbl"), - ("Ndebele, North", "", "nd", "nde"), - ("Ndonga", "", "ng", "ndo"), - ("Nepali", "", "ne", "nep"), - ("Norwegian Nynorsk", "", "nn", "nno"), - ("Norwegian Bokmål", "", "nb", "nob"), - ("Norwegian", "Norsk", "no", "nor"), - ("Chichewa; Nyanja", "", "ny", "nya"), - ("Occitan (post 1500); Provençal", "", "oc", "oci"), - ("Ojibwa", "", "oj", "oji"), - ("Oriya", "", "or", "ori"), - ("Oromo", "", "om", "orm"), - ("Ossetian; Ossetic", "", "os", "oss"), - ("Panjabi", "", "pa", "pan"), - ("Persian", "", "fa", "fas"), - ("Pali", "", "pi", "pli"), - ("Polish", "", "pl", "pol"), - ("Portuguese", "Portugues", "pt", "por"), - ("Pushto", "", "ps", "pus"), - ("Quechua", "", "qu", "que"), - ("Romansh", "", "rm", "roh"), - ("Romanian", "", "ro", "ron"), - ("Rundi", "", "rn", "run"), - ("Russian", "", "ru", "rus"), - ("Sango", "", "sg", "sag"), - ("Sanskrit", "", "sa", "san"), - ("Serbian", "", "sr", "srp"), - ("Croatian", "Hrvatski", "hr", "hrv"), - ("Sinhala", "", "si", "sin"), - ("Slovak", "", "sk", "slk"), - ("Slovenian", "", "sl", "slv"), - ("Northern Sami", "", "se", "sme"), - ("Samoan", "", "sm", "smo"), - ("Shona", "", "sn", "sna"), - ("Sindhi", "", "sd", "snd"), - ("Somali", "", "so", "som"), - ("Sotho, Southern", "", "st", "sot"), - ("Spanish", "Espanol", "es", "spa"), - ("Sardinian", "", "sc", "srd"), - ("Swati", "", "ss", "ssw"), - ("Sundanese", "", "su", "sun"), - ("Swahili", "", "sw", "swa"), - ("Swedish", "Svenska", "sv", "swe"), - ("Tahitian", "", "ty", "tah"), - ("Tamil", "", "ta", "tam"), - ("Tatar", "", "tt", "tat"), - ("Telugu", "", "te", "tel"), - ("Tajik", "", "tg", "tgk"), - ("Tagalog", "", "tl", "tgl"), - ("Thai", "", "th", "tha"), - ("Tibetan", "", "bo", "bod"), - ("Tigrinya", "", "ti", "tir"), - ("Tonga (Tonga Islands)", "", "to", "ton"), - ("Tswana", "", "tn", "tsn"), - ("Tsonga", "", "ts", "tso"), - ("Turkmen", "", "tk", "tuk"), - ("Turkish", "", "tr", "tur"), - ("Twi", "", "tw", "twi"), - ("Uighur", "", "ug", "uig"), - ("Ukrainian", "", "uk", "ukr"), - ("Urdu", "", "ur", "urd"), - ("Uzbek", "", "uz", "uzb"), - ("Venda", "", "ve", "ven"), - ("Vietnamese", "", "vi", "vie"), - ("Volapük", "", "vo", "vol"), - ("Welsh", "", "cy", "cym"), - ("Walloon", "", "wa", "wln"), - ("Wolof", "", "wo", "wol"), - ("Xhosa", "", "xh", "xho"), - ("Yiddish", "", "yi", "yid"), - ("Yoruba", "", "yo", "yor"), - ("Zhuang", "", "za", "zha"), - ("Zulu", "", "zu", "zul"), + ("Unknown", "", "", "und"), + ("Afar", "", "aa", "aar"), + ("Abkhazian", "", "ab", "abk"), + ("Afrikaans", "", "af", "afr"), + ("Akan", "", "ak", "aka"), + ("Albanian", "", "sq", "sqi"), + ("Amharic", "", "am", "amh"), + ("Arabic", "", "ar", "ara"), + ("Aragonese", "", "an", "arg"), + ("Armenian", "", "hy", "hye"), + ("Assamese", "", "as", "asm"), + ("Avaric", "", "av", "ava"), + ("Avestan", "", "ae", "ave"), + ("Aymara", "", "ay", "aym"), + ("Azerbaijani", "", "az", "aze"), + ("Bashkir", "", "ba", "bak"), + ("Bambara", "", "bm", "bam"), + ("Basque", "", "eu", "eus"), + ("Belarusian", "", "be", "bel"), + ("Bengali", "", "bn", "ben"), + ("Bihari", "", "bh", "bih"), + ("Bislama", "", "bi", "bis"), + ("Bosnian", "", "bs", "bos"), + ("Breton", "", "br", "bre"), + ("Bulgarian", "", "bg", "bul"), + ("Burmese", "", "my", "mya"), + ("Catalan", "", "ca", "cat"), + ("Chamorro", "", "ch", "cha"), + ("Chechen", "", "ce", "che"), + ("Chinese", "", "zh", "zho"), + ("Church Slavic", "", "cu", "chu"), + ("Chuvash", "", "cv", "chv"), + ("Cornish", "", "kw", "cor"), + ("Corsican", "", "co", "cos"), + ("Cree", "", "cr", "cre"), + ("Czech", "", "cs", "ces"), + ("Danish", "Dansk", "da", "dan"), + ("Divehi", "", "dv", "div"), + ("Dutch", "Nederlands", "nl", "nld"), + ("Dzongkha", "", "dz", "dzo"), + ("English", "English", "en", "eng"), + ("Esperanto", "", "eo", "epo"), + ("Estonian", "", "et", "est"), + ("Ewe", "", "ee", "ewe"), + ("Faroese", "", "fo", "fao"), + ("Fijian", "", "fj", "fij"), + ("Finnish", "Suomi", "fi", "fin"), + ("French", "Francais", "fr", "fra"), + ("Western Frisian", "", "fy", "fry"), + ("Fulah", "", "ff", "ful"), + ("Georgian", "", "ka", "kat"), + ("German", "Deutsch", "de", "deu"), + ("Gaelic (Scots)", "", "gd", "gla"), + ("Irish", "", "ga", "gle"), + ("Galician", "", "gl", "glg"), + ("Manx", "", "gv", "glv"), + ("Greek, Modern", "", "el", "ell"), + ("Guarani", "", "gn", "grn"), + ("Gujarati", "", "gu", "guj"), + ("Haitian", "", "ht", "hat"), + ("Hausa", "", "ha", "hau"), + ("Hebrew", "", "he", "heb"), + ("Herero", "", "hz", "her"), + ("Hindi", "", "hi", "hin"), + ("Hiri Motu", "", "ho", "hmo"), + ("Hungarian", "Magyar", "hu", "hun"), + ("Igbo", "", "ig", "ibo"), + ("Icelandic", "Islenska", "is", "isl"), + ("Ido", "", "io", "ido"), + ("Sichuan Yi", "", "ii", "iii"), + ("Inuktitut", "", "iu", "iku"), + ("Interlingue", "", "ie", "ile"), + ("Interlingua", "", "ia", "ina"), + ("Indonesian", "", "id", "ind"), + ("Inupiaq", "", "ik", "ipk"), + ("Italian", "Italiano", "it", "ita"), + ("Javanese", "", "jv", "jav"), + ("Japanese", "", "ja", "jpn"), + ("Kalaallisut (Greenlandic)", "", "kl", "kal"), + ("Kannada", "", "kn", "kan"), + ("Kashmiri", "", "ks", "kas"), + ("Kanuri", "", "kr", "kau"), + ("Kazakh", "", "kk", "kaz"), + ("Central Khmer", "", "km", "khm"), + ("Kikuyu", "", "ki", "kik"), + ("Kinyarwanda", "", "rw", "kin"), + ("Kirghiz", "", "ky", "kir"), + ("Komi", "", "kv", "kom"), + ("Kongo", "", "kg", "kon"), + ("Korean", "", "ko", "kor"), + ("Kuanyama", "", "kj", "kua"), + ("Kurdish", "", "ku", "kur"), + ("Lao", "", "lo", "lao"), + ("Latin", "", "la", "lat"), + ("Latvian", "", "lv", "lav"), + ("Limburgan", "", "li", "lim"), + ("Lingala", "", "ln", "lin"), + ("Lithuanian", "", "lt", "lit"), + ("Luxembourgish", "", "lb", "ltz"), + ("Luba-Katanga", "", "lu", "lub"), + ("Ganda", "", "lg", "lug"), + ("Macedonian", "", "mk", "mkd"), + ("Marshallese", "", "mh", "mah"), + ("Malayalam", "", "ml", "mal"), + ("Maori", "", "mi", "mri"), + ("Marathi", "", "mr", "mar"), + ("Malay", "", "ms", "msa"), + ("Malagasy", "", "mg", "mlg"), + ("Maltese", "", "mt", "mlt"), + ("Moldavian", "", "mo", "mol"), + ("Mongolian", "", "mn", "mon"), + ("Nauru", "", "na", "nau"), + ("Navajo", "", "nv", "nav"), + ("Ndebele, South", "", "nr", "nbl"), + ("Ndebele, North", "", "nd", "nde"), + ("Ndonga", "", "ng", "ndo"), + ("Nepali", "", "ne", "nep"), + ("Norwegian Nynorsk", "", "nn", "nno"), + ("Norwegian Bokmål", "", "nb", "nob"), + ("Norwegian", "Norsk", "no", "nor"), + ("Chichewa; Nyanja", "", "ny", "nya"), + ("Occitan (post 1500); Provençal", "", "oc", "oci"), + ("Ojibwa", "", "oj", "oji"), + ("Oriya", "", "or", "ori"), + ("Oromo", "", "om", "orm"), + ("Ossetian; Ossetic", "", "os", "oss"), + ("Panjabi", "", "pa", "pan"), + ("Persian", "", "fa", "fas"), + ("Pali", "", "pi", "pli"), + ("Polish", "", "pl", "pol"), + ("Portuguese", "Portugues", "pt", "por"), + ("Pushto", "", "ps", "pus"), + ("Quechua", "", "qu", "que"), + ("Romansh", "", "rm", "roh"), + ("Romanian", "", "ro", "ron"), + ("Rundi", "", "rn", "run"), + ("Russian", "", "ru", "rus"), + ("Sango", "", "sg", "sag"), + ("Sanskrit", "", "sa", "san"), + ("Serbian", "", "sr", "srp"), + ("Croatian", "Hrvatski", "hr", "hrv"), + ("Sinhala", "", "si", "sin"), + ("Slovak", "", "sk", "slk"), + ("Slovenian", "", "sl", "slv"), + ("Northern Sami", "", "se", "sme"), + ("Samoan", "", "sm", "smo"), + ("Shona", "", "sn", "sna"), + ("Sindhi", "", "sd", "snd"), + ("Somali", "", "so", "som"), + ("Sotho, Southern", "", "st", "sot"), + ("Spanish", "Espanol", "es", "spa"), + ("Sardinian", "", "sc", "srd"), + ("Swati", "", "ss", "ssw"), + ("Sundanese", "", "su", "sun"), + ("Swahili", "", "sw", "swa"), + ("Swedish", "Svenska", "sv", "swe"), + ("Tahitian", "", "ty", "tah"), + ("Tamil", "", "ta", "tam"), + ("Tatar", "", "tt", "tat"), + ("Telugu", "", "te", "tel"), + ("Tajik", "", "tg", "tgk"), + ("Tagalog", "", "tl", "tgl"), + ("Thai", "", "th", "tha"), + ("Tibetan", "", "bo", "bod"), + ("Tigrinya", "", "ti", "tir"), + ("Tonga (Tonga Islands)", "", "to", "ton"), + ("Tswana", "", "tn", "tsn"), + ("Tsonga", "", "ts", "tso"), + ("Turkmen", "", "tk", "tuk"), + ("Turkish", "", "tr", "tur"), + ("Twi", "", "tw", "twi"), + ("Uighur", "", "ug", "uig"), + ("Ukrainian", "", "uk", "ukr"), + ("Urdu", "", "ur", "urd"), + ("Uzbek", "", "uz", "uzb"), + ("Venda", "", "ve", "ven"), + ("Vietnamese", "", "vi", "vie"), + ("Volapük", "", "vo", "vol"), + ("Welsh", "", "cy", "cym"), + ("Walloon", "", "wa", "wln"), + ("Wolof", "", "wo", "wol"), + ("Xhosa", "", "xh", "xho"), + ("Yiddish", "", "yi", "yid"), + ("Yoruba", "", "yo", "yor"), + ("Zhuang", "", "za", "zha"), + ("Zulu", "", "zu", "zul"), ] def codeToLang(code): - code = code.lower() - if len(code) == 2: - for l in _iso639_languages: - if l[2] == code: - return l[0] - elif len(code) == 3: - for l in _iso639_languages: - if l[3] == code: - return l[0] - return None + code = code.lower() + if len(code) == 2: + for l in _iso639_languages: + if l[2] == code: + return l[0] + elif len(code) == 3: + for l in _iso639_languages: + if l[3] == code: + return l[0] + return None def langTo3Code(lang): - lang = englishName(lang) - if lang: - lang=lang.lower() - for l in _iso639_languages: - if l[0].lower() == lang: - return l[3] - return None + lang = englishName(lang) + if lang: + lang=lang.lower() + for l in _iso639_languages: + if l[0].lower() == lang: + return l[3] + return None def langTo2Code(lang): - lang = englishName(lang) - if lang: - lang=lang.lower() - for l in _iso639_languages: - if l[0].lower() == lang: - return l[2] - return None + lang = englishName(lang) + if lang: + lang=lang.lower() + for l in _iso639_languages: + if l[0].lower() == lang: + return l[2] + return None def langCode2To3(code): - langTo3Code(codeToLang(code)) + langTo3Code(codeToLang(code)) def langCode3To2(code): - langTo2Code(codeToLang(code)) + langTo2Code(codeToLang(code)) def englishName(lang): - lang = lang.lower() - for l in _iso639_languages: - if l[1].lower() == lang: - return l[0] - return None + lang = lang.lower() + for l in _iso639_languages: + if l[1].lower() == lang: + return l[0] + return None diff --git a/oxutils/net.py b/oxutils/net.py index 8c57151..f07b8d0 100644 --- a/oxutils/net.py +++ b/oxutils/net.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# vi:si:et:sw=2:sts=2:ts=2 +# vi:si:et:sw=4:sts=4:ts=4 import gzip import StringIO import urllib @@ -10,64 +10,64 @@ from chardet.universaldetector import UniversalDetector # Default headers for HTTP requests. DEFAULT_HEADERS = { - 'User-Agent': 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9) Gecko/2008061015 Firefox/3.0', - 'Accept-Encoding': 'gzip' + 'User-Agent': 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9) Gecko/2008061015 Firefox/3.0', + 'Accept-Encoding': 'gzip' } def status(url, data=None, headers=DEFAULT_HEADERS): - try: - f = openUrl(url, data, headers) - s = f.code - except urllib2.HTTPError, e: - s = e.code - return s + try: + f = openUrl(url, data, headers) + s = f.code + except urllib2.HTTPError, e: + s = e.code + return s def exists(url, data=None, headers=DEFAULT_HEADERS): - s = status(url, data, headers) - if s >= 200 and s < 400: - return True - return False + s = status(url, data, headers) + if s >= 200 and s < 400: + return True + return False def getHeaders(url, data=None, headers=DEFAULT_HEADERS): - try: - f = openUrl(url, data, headers) - f.headers['Status'] = "%s" % f.code - headers = f.headers - f.close() - except urllib2.HTTPError, e: - e.headers['Status'] = "%s" % e.code - headers = e.headers - return dict(headers) + try: + f = openUrl(url, data, headers) + f.headers['Status'] = "%s" % f.code + headers = f.headers + f.close() + except urllib2.HTTPError, e: + e.headers['Status'] = "%s" % e.code + headers = e.headers + return dict(headers) def openUrl(url, data=None, headers=DEFAULT_HEADERS): - url = url.replace(' ', '%20') - req = urllib2.Request(url, data, headers) - return urllib2.urlopen(req) + url = url.replace(' ', '%20') + req = urllib2.Request(url, data, headers) + return urllib2.urlopen(req) def getUrl(url, data=None, headers=DEFAULT_HEADERS, returnHeaders=False): - f = openUrl(url, data, headers) - data = f.read() - f.close() - if f.headers.get('content-encoding', None) == 'gzip': - data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read() - if returnHeaders: - f.headers['Status'] = "%s" % f.code - return dict(f.headers), data - return data + f = openUrl(url, data, headers) + data = f.read() + f.close() + if f.headers.get('content-encoding', None) == 'gzip': + data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read() + if returnHeaders: + f.headers['Status'] = "%s" % f.code + return dict(f.headers), data + return data def getUrlUnicode(url): - data = getUrl(url) - encoding = getEncoding(data) - if not encoding: - encoding = 'latin-1' - return unicode(data, encoding) + data = getUrl(url) + encoding = getEncoding(data) + if not encoding: + encoding = 'latin-1' + return unicode(data, encoding) def getEncoding(data): - detector = UniversalDetector() - for line in data.split('\n'): - detector.feed(line) - if detector.done: - break - detector.close() - return detector.result['encoding'] + detector = UniversalDetector() + for line in data.split('\n'): + detector.feed(line) + if detector.done: + break + detector.close() + return detector.result['encoding'] diff --git a/oxutils/normalize.py b/oxutils/normalize.py index 9d63139..4df4405 100644 --- a/oxutils/normalize.py +++ b/oxutils/normalize.py @@ -1,79 +1,79 @@ # -*- Mode: Python; -*- # -*- coding: utf-8 -*- -# vi:si:et:sw=2:sts=2:ts=2 +# vi:si:et:sw=4:sts=4:ts=4 import re _articles = ('the', 'la', 'a', 'die', 'der', 'le', 'el', - "l'", 'il', 'das', 'les', 'o', 'ein', 'i', 'un', 'los', 'de', - 'an', 'una', 'las', 'eine', 'den', 'gli', 'het', 'os', 'lo', - 'az', 'det', 'ha-', 'een', 'ang', 'oi', 'ta', 'al-', 'dem', - 'mga', 'uno', "un'", 'ett', u'\xcf', 'eines', u'\xc7', 'els', - u'\xd4\xef', u'\xcf\xe9') + "l'", 'il', 'das', 'les', 'o', 'ein', 'i', 'un', 'los', 'de', + 'an', 'una', 'las', 'eine', 'den', 'gli', 'het', 'os', 'lo', + 'az', 'det', 'ha-', 'een', 'ang', 'oi', 'ta', 'al-', 'dem', + 'mga', 'uno', "un'", 'ett', u'\xcf', 'eines', u'\xc7', 'els', + u'\xd4\xef', u'\xcf\xe9') # Articles in a dictionary. _articlesDict = dict([(x, x) for x in _articles]) _spArticles = [] for article in _articles: - if article[-1] not in ("'", '-'): article += ' ' - _spArticles.append(article) + if article[-1] not in ("'", '-'): article += ' ' + _spArticles.append(article) def canonicalTitle(title): - """Return the title in the canonic format 'Movie Title, The'. - - >>> canonicalTitle('The Movie Title') - 'Movie Title, The' - """ - try: - if _articlesDict.has_key(title.split(', ')[-1].lower()): return title - except IndexError: pass - ltitle = title.lower() - for article in _spArticles: - if ltitle.startswith(article): - lart = len(article) - title = '%s, %s' % (title[lart:], title[:lart]) - if article[-1] == ' ': title = title[:-1] - break - ## XXX: an attempt using a dictionary lookup. - ##for artSeparator in (' ', "'", '-'): - ## article = _articlesDict.get(ltitle.split(artSeparator)[0]) - ## if article is not None: - ## lart = len(article) - ## # check titles like "una", "I'm Mad" and "L'abbacchio". - ## if title[lart:] == '' or (artSeparator != ' ' and - ## title[lart:][1] != artSeparator): continue - ## title = '%s, %s' % (title[lart:], title[:lart]) - ## if artSeparator == ' ': title = title[1:] - ## break - return title + """Return the title in the canonic format 'Movie Title, The'. + + >>> canonicalTitle('The Movie Title') + 'Movie Title, The' + """ + try: + if _articlesDict.has_key(title.split(', ')[-1].lower()): return title + except IndexError: pass + ltitle = title.lower() + for article in _spArticles: + if ltitle.startswith(article): + lart = len(article) + title = '%s, %s' % (title[lart:], title[:lart]) + if article[-1] == ' ': title = title[:-1] + break + ## XXX: an attempt using a dictionary lookup. + ##for artSeparator in (' ', "'", '-'): + ## article = _articlesDict.get(ltitle.split(artSeparator)[0]) + ## if article is not None: + ## lart = len(article) + ## # check titles like "una", "I'm Mad" and "L'abbacchio". + ## if title[lart:] == '' or (artSeparator != ' ' and + ## title[lart:][1] != artSeparator): continue + ## title = '%s, %s' % (title[lart:], title[:lart]) + ## if artSeparator == ' ': title = title[1:] + ## break + return title def normalizeTitle(title): - """Return the title in the normal "The Title" format. + """Return the title in the normal "The Title" format. - >>> normalizeTitle('Movie Title, The') - 'The Movie Title' - """ - stitle = title.split(', ') - if len(stitle) > 1 and _articlesDict.has_key(stitle[-1].lower()): - sep = ' ' - if stitle[-1][-1] in ("'", '-'): sep = '' - title = '%s%s%s' % (stitle[-1], sep, ', '.join(stitle[:-1])) - return title + >>> normalizeTitle('Movie Title, The') + 'The Movie Title' + """ + stitle = title.split(', ') + if len(stitle) > 1 and _articlesDict.has_key(stitle[-1].lower()): + sep = ' ' + if stitle[-1][-1] in ("'", '-'): sep = '' + title = '%s%s%s' % (stitle[-1], sep, ', '.join(stitle[:-1])) + return title def normalizeImdbId(imdbId): - """Return 7 digit imdbId. + """Return 7 digit imdbId. - >>> normalizeImdbId('http://www.imdb.com/title/tt0159206/') - '0159206' - >>> normalizeImdbId(159206) - '0159206' - >>> normalizeImdbId('tt0159206') - '0159206' - """ - if isinstance(imdbId, basestring): - imdbId = re.sub('.*(\d{7}).*', '\\1', imdbId) - elif isinstance(imdbId, int): - imdbId = "%07d" % imdbId - return imdbId + >>> normalizeImdbId('http://www.imdb.com/title/tt0159206/') + '0159206' + >>> normalizeImdbId(159206) + '0159206' + >>> normalizeImdbId('tt0159206') + '0159206' + """ + if isinstance(imdbId, basestring): + imdbId = re.sub('.*(\d{7}).*', '\\1', imdbId) + elif isinstance(imdbId, int): + imdbId = "%07d" % imdbId + return imdbId # Common suffixes in surnames. diff --git a/oxutils/text.py b/oxutils/text.py index a7fa7b5..768db8b 100644 --- a/oxutils/text.py +++ b/oxutils/text.py @@ -1,216 +1,216 @@ # -*- coding: utf-8 -*- -# vi:si:et:sw=2:sts=2:ts=2 +# vi:si:et:sw=4:sts=4:ts=4 # GPL written 2008 by j@pad.ma import re def findRe(string, regexp): - result = re.compile(regexp, re.DOTALL).findall(string) - if result: - return result[0].strip() - return '' + result = re.compile(regexp, re.DOTALL).findall(string) + if result: + return result[0].strip() + return '' def findString(string, string0='', string1 = ''): - """Return the string between string0 and string1. + """Return the string between string0 and string1. - If string0 or string1 is left out, begining or end of string is used. + If string0 or string1 is left out, begining or end of string is used. - >>> findString('i am not there', string1=' not there') - 'i am' + >>> findString('i am not there', string1=' not there') + 'i am' - >>> findString('i am not there', 'i am ', ' there') - 'not' + >>> findString('i am not there', 'i am ', ' there') + 'not' - >>> findString('i am not there', 'i am not t') - 'here' + >>> findString('i am not there', 'i am not t') + 'here' - """ - if string0: - string0 = re.escape(string0) - else: - string0 = '^' - if string1: - string1 = re.escape(string1) - else: - string1 = '$' - return findRegexp(string, string0 + '(.*?)' + string1) + """ + if string0: + string0 = re.escape(string0) + else: + string0 = '^' + if string1: + string1 = re.escape(string1) + else: + string1 = '$' + return findRegexp(string, string0 + '(.*?)' + string1) # Capitalizes the first letter of a string. capfirst = lambda x: x and x[0].upper() + x[1:] def removeSpecialCharacters(text): - """ - Removes special characters inserted by Word. - """ - text = text.replace(u'\u2013', '-') - text = text.replace(u'\u2026O', "'") - text = text.replace(u'\u2019', "'") - text = text.replace(u'', "'") - text = text.replace(u'', "'") - text = text.replace(u'', "-") - return text + """ + Removes special characters inserted by Word. + """ + text = text.replace(u'\u2013', '-') + text = text.replace(u'\u2026O', "'") + text = text.replace(u'\u2019', "'") + text = text.replace(u'', "'") + text = text.replace(u'', "'") + text = text.replace(u'', "-") + return text def wrap(text, width): - """ - A word-wrap function that preserves existing line breaks and most spaces in - the text. Expects that existing line breaks are posix newlines (\n). - See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/148061 - """ - return reduce(lambda line, word, width=width: '%s%s%s' % - (line, - ' \n'[(len(line[line.rfind('\n')+1:]) - + len(word.split('\n',1)[0] - ) >= width)], - word), - text.split(' ') - ) + """ + A word-wrap function that preserves existing line breaks and most spaces in + the text. Expects that existing line breaks are posix newlines (\n). + See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/148061 + """ + return reduce(lambda line, word, width=width: '%s%s%s' % + (line, + ' \n'[(len(line[line.rfind('\n')+1:]) + + len(word.split('\n',1)[0] + ) >= width)], + word), + text.split(' ') + ) def truncateString(s, num): - """Truncates a string after a certain number of chacters, but ends with a word + """Truncates a string after a certain number of chacters, but ends with a word - >>> truncateString('Truncates a string after a certain number of chacters, but ends with a word', 23) - 'Truncates a string...' - >>> truncateString('Truncates a string', 23) - 'Truncates a string' + >>> truncateString('Truncates a string after a certain number of chacters, but ends with a word', 23) + 'Truncates a string...' + >>> truncateString('Truncates a string', 23) + 'Truncates a string' - """ - length = int(num) - if len(s) <= length: - return s - words = s.split() - ts = "" - while words and len(ts) + len(words[0]) < length: - ts += " " + words.pop(0) - if words: - ts += "..." - return ts.strip() + """ + length = int(num) + if len(s) <= length: + return s + words = s.split() + ts = "" + while words and len(ts) + len(words[0]) < length: + ts += " " + words.pop(0) + if words: + ts += "..." + return ts.strip() def trimString(string, num): - """Truncates a string after a certain number of chacters, adding ... at -10 characters + """Truncates a string after a certain number of chacters, adding ... at -10 characters - >>> trimString('Truncates a string after a certain number of chacters', 23) - 'Truncates ...f chacters' - >>> trimString('Truncates a string', 23) - 'Truncates a string' - """ - if len(string) > num: - string = string[:num - 13] + '...' + string[-10:] - return string + >>> trimString('Truncates a string after a certain number of chacters', 23) + 'Truncates ...f chacters' + >>> trimString('Truncates a string', 23) + 'Truncates a string' + """ + if len(string) > num: + string = string[:num - 13] + '...' + string[-10:] + return string def truncateWords(s, num): - "Truncates a string after a certain number of words." - length = int(num) - words = s.split() - if len(words) > length: - words = words[:length] - if not words[-1].endswith('...'): - words.append('...') - return ' '.join(words) + "Truncates a string after a certain number of words." + length = int(num) + words = s.split() + if len(words) > length: + words = words[:length] + if not words[-1].endswith('...'): + words.append('...') + return ' '.join(words) def getValidFilename(s): - """ - Returns the given string converted to a string that can be used for a clean - filename. Specifically, leading and trailing spaces are removed; - all non-filename-safe characters are removed. + """ + Returns the given string converted to a string that can be used for a clean + filename. Specifically, leading and trailing spaces are removed; + all non-filename-safe characters are removed. - >>> getValidFilename("john's portrait in 2004.jpg") - 'john_s_portrait_in_2004.jpg' - """ - s = s.strip() - s = s.replace(' ', '_') - s = re.sub(r'[^-A-Za-z0-9_.\[\]\ ]', '_', s) - s = s.replace('__', '_').replace('__', '_') - return s + >>> getValidFilename("john's portrait in 2004.jpg") + 'john_s_portrait_in_2004.jpg' + """ + s = s.strip() + s = s.replace(' ', '_') + s = re.sub(r'[^-A-Za-z0-9_.\[\]\ ]', '_', s) + s = s.replace('__', '_').replace('__', '_') + return s def getTextList(list_, last_word='or'): - """ - >>> getTextList(['a', 'b', 'c', 'd']) - 'a, b, c or d' - >>> getTextList(['a', 'b', 'c'], 'and') - 'a, b and c' - >>> getTextList(['a', 'b'], 'and') - 'a and b' - >>> getTextList(['a']) - 'a' - >>> getTextList([]) - '' - """ - if len(list_) == 0: return '' - if len(list_) == 1: return list_[0] - return '%s %s %s' % (', '.join([str(i) for i in list_][:-1]), last_word, list_[-1]) + """ + >>> getTextList(['a', 'b', 'c', 'd']) + 'a, b, c or d' + >>> getTextList(['a', 'b', 'c'], 'and') + 'a, b and c' + >>> getTextList(['a', 'b'], 'and') + 'a and b' + >>> getTextList(['a']) + 'a' + >>> getTextList([]) + '' + """ + if len(list_) == 0: return '' + if len(list_) == 1: return list_[0] + return '%s %s %s' % (', '.join([str(i) for i in list_][:-1]), last_word, list_[-1]) def getListText(text, last_word='or'): - """ - >>> getListText('a, b, c or d') - ['a', 'b', 'c', 'd'] - >>> getListText('a, b and c', 'and') - ['a', 'b', 'c'] - >>> getListText('a and b', 'and') - ['a', 'b'] - >>> getListText('a') - ['a'] - >>> getListText('') - [] - """ - list_ = [] - if text: - list_ = text.split(', ') - if list_: - i=len(list_)-1 - last = list_[i].split(last_word) - if len(last) == 2: - list_[i] = last[0].strip() - list_.append(last[1].strip()) - return list_ + """ + >>> getListText('a, b, c or d') + ['a', 'b', 'c', 'd'] + >>> getListText('a, b and c', 'and') + ['a', 'b', 'c'] + >>> getListText('a and b', 'and') + ['a', 'b'] + >>> getListText('a') + ['a'] + >>> getListText('') + [] + """ + list_ = [] + if text: + list_ = text.split(', ') + if list_: + i=len(list_)-1 + last = list_[i].split(last_word) + if len(last) == 2: + list_[i] = last[0].strip() + list_.append(last[1].strip()) + return list_ def normalizeNewlines(text): - return re.sub(r'\r\n|\r|\n', '\n', text) + return re.sub(r'\r\n|\r|\n', '\n', text) def recapitalize(text): - "Recapitalizes text, placing caps after end-of-sentence punctuation." -# capwords = () - text = text.lower() - capsRE = re.compile(r'(?:^|(?<=[\.\?\!] ))([a-z])') - text = capsRE.sub(lambda x: x.group(1).upper(), text) -# for capword in capwords: -# capwordRE = re.compile(r'\b%s\b' % capword, re.I) -# text = capwordRE.sub(capword, text) - return text + "Recapitalizes text, placing caps after end-of-sentence punctuation." + #capwords = () + text = text.lower() + capsRE = re.compile(r'(?:^|(?<=[\.\?\!] ))([a-z])') + text = capsRE.sub(lambda x: x.group(1).upper(), text) + #for capword in capwords: + # capwordRE = re.compile(r'\b%s\b' % capword, re.I) + # text = capwordRE.sub(capword, text) + return text def phone2numeric(phone): - "Converts a phone number with letters into its numeric equivalent." - letters = re.compile(r'[A-PR-Y]', re.I) - char2number = lambda m: {'a': '2', 'c': '2', 'b': '2', 'e': '3', - 'd': '3', 'g': '4', 'f': '3', 'i': '4', 'h': '4', 'k': '5', - 'j': '5', 'm': '6', 'l': '5', 'o': '6', 'n': '6', 'p': '7', - 's': '7', 'r': '7', 'u': '8', 't': '8', 'w': '9', 'v': '8', - 'y': '9', 'x': '9'}.get(m.group(0).lower()) - return letters.sub(char2number, phone) + "Converts a phone number with letters into its numeric equivalent." + letters = re.compile(r'[A-PR-Y]', re.I) + char2number = lambda m: {'a': '2', 'c': '2', 'b': '2', 'e': '3', + 'd': '3', 'g': '4', 'f': '3', 'i': '4', 'h': '4', 'k': '5', + 'j': '5', 'm': '6', 'l': '5', 'o': '6', 'n': '6', 'p': '7', + 's': '7', 'r': '7', 'u': '8', 't': '8', 'w': '9', 'v': '8', + 'y': '9', 'x': '9'}.get(m.group(0).lower()) + return letters.sub(char2number, phone) def compressString(s): - import cStringIO, gzip - zbuf = cStringIO.StringIO() - zfile = gzip.GzipFile(mode='wb', compresslevel=6, fileobj=zbuf) - zfile.write(s) - zfile.close() - return zbuf.getvalue() + import cStringIO, gzip + zbuf = cStringIO.StringIO() + zfile = gzip.GzipFile(mode='wb', compresslevel=6, fileobj=zbuf) + zfile.write(s) + zfile.close() + return zbuf.getvalue() smart_split_re = re.compile('("(?:[^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'(?:[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'|[^\\s]+)') def smartSplit(text): - """ - Generator that splits a string by spaces, leaving quoted phrases together. - Supports both single and double quotes, and supports escaping quotes with - backslashes. In the output, strings will keep their initial and trailing - quote marks. - >>> list(smartSplit('This is "a person\\'s" test.')) - ['This', 'is', '"a person\\'s"', 'test.'] - """ - for bit in smart_split_re.finditer(text): - bit = bit.group(0) - if bit[0] == '"': - yield '"' + bit[1:-1].replace('\\"', '"').replace('\\\\', '\\') + '"' - elif bit[0] == "'": - yield "'" + bit[1:-1].replace("\\'", "'").replace("\\\\", "\\") + "'" - else: - yield bit + """ + Generator that splits a string by spaces, leaving quoted phrases together. + Supports both single and double quotes, and supports escaping quotes with + backslashes. In the output, strings will keep their initial and trailing + quote marks. + >>> list(smartSplit('This is "a person\\'s" test.')) + ['This', 'is', '"a person\\'s"', 'test.'] + """ + for bit in smart_split_re.finditer(text): + bit = bit.group(0) + if bit[0] == '"': + yield '"' + bit[1:-1].replace('\\"', '"').replace('\\\\', '\\') + '"' + elif bit[0] == "'": + yield "'" + bit[1:-1].replace("\\'", "'").replace("\\\\", "\\") + "'" + else: + yield bit diff --git a/oxutils/torrent.py b/oxutils/torrent.py index fe452fa..6f53d5b 100644 --- a/oxutils/torrent.py +++ b/oxutils/torrent.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +# vi:si:et:sw=4:sts=4:ts=4 # Written 2007 by j@mailb.org from threading import Event @@ -11,50 +12,50 @@ from BitTornado.bencode import bencode, bdecode def createTorrent(file, url, params = {}, flag = Event(), progress = lambda x: None, progress_percent = 1): - "Creates a torrent for a given file, using url as tracker url" - return make_meta_file(file, url, params, flag, progress, progress_percent) + "Creates a torrent for a given file, using url as tracker url" + return make_meta_file(file, url, params, flag, progress, progress_percent) def getInfoHash(torrentFile): - "Returns Torrent Info Hash from torrent file" - metainfo_file = open(torrentFile, 'rb') - metainfo = bdecode(metainfo_file.read()) - info = metainfo['info'] - return sha.sha(bencode(info)).hexdigest().upper() + "Returns Torrent Info Hash from torrent file" + metainfo_file = open(torrentFile, 'rb') + metainfo = bdecode(metainfo_file.read()) + info = metainfo['info'] + return sha.sha(bencode(info)).hexdigest().upper() def getTorrentInfoFromFile(torrentFile): - f = open(torrentFile, 'rb') - data = f.read() - f.close() - tinfo = getTorrentInfo(data) - tinfo['timestamp'] = stat(torrentFile).st_ctime - return tinfo + f = open(torrentFile, 'rb') + data = f.read() + f.close() + tinfo = getTorrentInfo(data) + tinfo['timestamp'] = stat(torrentFile).st_ctime + return tinfo def getTorrentInfo(data): - "Returns Torrent Info from torrent file" - tinfo = {} - metainfo = bdecode(data) - info = metainfo['info'] - piece_length = info['piece length'] - if info.has_key('length'): - # let's assume we just have one file - file_length = info['length'] - else: - # let's assume we have a directory structure - file_length = 0; - for f in info['files']: - file_length += f['length'] - for key in info: - if key != 'pieces': - tinfo[key] = info[key] - for key in metainfo: - if key != 'info': - tinfo[key] = metainfo[key] - tinfo['size'] = file_length - tinfo['hash'] = sha.sha(bencode(info)).hexdigest() - tinfo['announce'] = metainfo['announce'] - return tinfo + "Returns Torrent Info from torrent file" + tinfo = {} + metainfo = bdecode(data) + info = metainfo['info'] + piece_length = info['piece length'] + if info.has_key('length'): + # let's assume we just have one file + file_length = info['length'] + else: + # let's assume we have a directory structure + file_length = 0; + for f in info['files']: + file_length += f['length'] + for key in info: + if key != 'pieces': + tinfo[key] = info[key] + for key in metainfo: + if key != 'info': + tinfo[key] = metainfo[key] + tinfo['size'] = file_length + tinfo['hash'] = sha.sha(bencode(info)).hexdigest() + tinfo['announce'] = metainfo['announce'] + return tinfo def getTorrentSize(torrentFile): - "Returns Size of files in torrent file in bytes" - return getTorrentInfo(torrentFile)['size'] + "Returns Size of files in torrent file in bytes" + return getTorrentInfo(torrentFile)['size'] diff --git a/setup.py b/setup.py index f9324af..f1728b2 100644 --- a/setup.py +++ b/setup.py @@ -1,30 +1,30 @@ #!/usr/bin/env python -# vi:si:et:sw=2:sts=2:ts=2 +# vi:si:et:sw=4:sts=4:ts=4 # encoding: utf-8 from setuptools import setup, find_packages setup( - name="oxutils", - version="0.1", + name="oxutils", + version="0.1", - description="collection of utils used to work with python", - author="0x", - author_email="code@0xdb.org", - url="http://code.0xdb.org/oxutils", - download_url="http://code.0xdb.org/oxutils/download", - license="GPLv3", - packages=find_packages(), - zip_safe=False, - install_requires=[ - 'chardet', - ], - keywords = [ - ], - classifiers = [ - 'Development Status :: 3 - Alpha', - 'Operating System :: OS Independent', - 'Programming Language :: Python', - 'Topic :: Software Development :: Libraries :: Python Modules', - ], - ) + description="collection of utils used to work with python", + author="0x", + author_email="code@0xdb.org", + url="http://code.0xdb.org/oxutils", + download_url="http://code.0xdb.org/oxutils/download", + license="GPLv3", + packages=find_packages(), + zip_safe=False, + install_requires=[ + 'chardet', + ], + keywords = [ + ], + classifiers = [ + 'Development Status :: 3 - Alpha', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + 'Topic :: Software Development :: Libraries :: Python Modules', + ], +)