add some functions
This commit is contained in:
commit
ef59090610
11 changed files with 649 additions and 0 deletions
18
README
Normal file
18
README
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
python-oxutils some tools to build tools
|
||||||
|
|
||||||
|
Depends:
|
||||||
|
python2.5
|
||||||
|
python-chardet (http://chardet.feedparser.org/)
|
||||||
|
BitTornado(optional)
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
import oxutils
|
||||||
|
|
||||||
|
data = oxutils.cache.readUrl('http:/...')
|
||||||
|
text = oxutils.stripTags(data)
|
||||||
|
oxutils.normalizeNewlines(text)
|
||||||
|
oxutils.formatBytes(len(data))
|
||||||
|
|
||||||
|
oxutils.formatBytes(1234567890)
|
||||||
|
'1.15 GB'
|
||||||
|
|
18
oxutils/__init__.py
Normal file
18
oxutils/__init__.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# vi:si:et:sw=2:sts=2:ts=2
|
||||||
|
# Written 2008 by j@mailb.org
|
||||||
|
|
||||||
|
from hashes import *
|
||||||
|
from html import *
|
||||||
|
from numbers import *
|
||||||
|
from text import *
|
||||||
|
from timeformat import *
|
||||||
|
import net
|
||||||
|
import cache
|
||||||
|
|
||||||
|
#only works if BitTornado is installed
|
||||||
|
try:
|
||||||
|
from bt import *
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
53
oxutils/bt.py
Normal file
53
oxutils/bt.py
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Written 2007 by j@mailb.org
|
||||||
|
|
||||||
|
from threading import Event
|
||||||
|
import sha
|
||||||
|
from os import stat
|
||||||
|
|
||||||
|
from BitTornado.BT1.makemetafile import make_meta_file
|
||||||
|
from BitTornado.bencode import bencode, bdecode
|
||||||
|
|
||||||
|
|
||||||
|
def createTorrent(file, url, params = {}, flag = Event(),
|
||||||
|
progress = lambda x: None, progress_percent = 1):
|
||||||
|
"Creates a torrent for a given file, using url as tracker url"
|
||||||
|
return make_meta_file(file, url, params, flag, progress, progress_percent)
|
||||||
|
|
||||||
|
def getInfoHash(torrentFile):
|
||||||
|
"Returns Torrent Info Hash from torrent file"
|
||||||
|
metainfo_file = open(torrentFile, 'rb')
|
||||||
|
metainfo = bdecode(metainfo_file.read())
|
||||||
|
info = metainfo['info']
|
||||||
|
return sha.sha(bencode(info)).hexdigest().upper()
|
||||||
|
|
||||||
|
def getTorrentInfo(torrentFile):
|
||||||
|
"Returns Torrent Info from torrent file"
|
||||||
|
tinfo = {}
|
||||||
|
metainfo_file = open(torrentFile, 'rb')
|
||||||
|
metainfo = bdecode(metainfo_file.read())
|
||||||
|
metainfo_file.close()
|
||||||
|
info = metainfo['info']
|
||||||
|
piece_length = info['piece length']
|
||||||
|
if info.has_key('length'):
|
||||||
|
# let's assume we just have one file
|
||||||
|
file_length = info['length']
|
||||||
|
else:
|
||||||
|
# let's assume we have a directory structure
|
||||||
|
file_length = 0;
|
||||||
|
for file in info['files']:
|
||||||
|
path = ''
|
||||||
|
for item in file['path']:
|
||||||
|
if (path != ''):
|
||||||
|
path = path + "/"
|
||||||
|
path = path + item
|
||||||
|
file_length += file['length']
|
||||||
|
tinfo['size'] = file_length
|
||||||
|
tinfo['hash'] = sha.sha(bencode(info)).hexdigest()
|
||||||
|
tinfo['timestamp'] = stat(torrentFile).st_ctime
|
||||||
|
return tinfo
|
||||||
|
|
||||||
|
def getTorrentSize(torrentFile):
|
||||||
|
"Returns Size of files in torrent file in bytes"
|
||||||
|
return getTorrentInfo(torrentFile)['size']
|
||||||
|
|
62
oxutils/cache.py
Normal file
62
oxutils/cache.py
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# vi:si:et:sw=2:sts=2:ts=2
|
||||||
|
import os
|
||||||
|
import sha
|
||||||
|
import time
|
||||||
|
import urlparse
|
||||||
|
|
||||||
|
import net
|
||||||
|
from net import DEFAULT_HEADERS
|
||||||
|
|
||||||
|
|
||||||
|
cache_timeout = 30*24*60*60 # default is 30 days
|
||||||
|
|
||||||
|
def getUrlUnicode(url):
|
||||||
|
data = getUrl(url)
|
||||||
|
encoding = chardet.detect(data)['encoding']
|
||||||
|
if not encoding:
|
||||||
|
encoding = 'latin-1'
|
||||||
|
return unicode(data, encoding)
|
||||||
|
|
||||||
|
def getUrl(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
|
||||||
|
url_cache_file = getUrlCacheFile(url, data, headers)
|
||||||
|
result = loadUrlCache(url_cache_file, timeout)
|
||||||
|
if not result:
|
||||||
|
result = net.getUrl(url, data, headers)
|
||||||
|
saveUrlCache(url_cache_file, result)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def getCacheBase():
|
||||||
|
'cache base is eather ~/.ox/cache or can set via env variable oxCACHE'
|
||||||
|
return os.environ.get('oxCACHE', os.path.expanduser('~/.ox/cache'))
|
||||||
|
|
||||||
|
def getUrlCacheFile(url, data=None, headers=DEFAULT_HEADERS):
|
||||||
|
if data:
|
||||||
|
url_hash = sha.sha(url + '?' + data).hexdigest()
|
||||||
|
else:
|
||||||
|
url_hash = sha.sha(url).hexdigest()
|
||||||
|
domain = urlparse.urlparse(url)[1]
|
||||||
|
return os.path.join(getCacheBase(), domain, url_hash[:2], url_hash[2:4], url_hash[4:6], url_hash)
|
||||||
|
|
||||||
|
def loadUrlCache(url_cache_file, data, timeout=cache_timeout):
|
||||||
|
if timeout <= 0:
|
||||||
|
return None
|
||||||
|
if os.path.exists(url_cache_file):
|
||||||
|
ctime = os.stat(url_cache_file).st_ctime
|
||||||
|
now = time.mktime(time.localtime())
|
||||||
|
file_age = now-ctime
|
||||||
|
if file_age < timeout:
|
||||||
|
f = open(url_cache_file)
|
||||||
|
data = f.read()
|
||||||
|
f.close()
|
||||||
|
return data
|
||||||
|
return None
|
||||||
|
|
||||||
|
def saveUrlCache(url_cache_file, data):
|
||||||
|
folder = os.path.dirname(url_cache_file)
|
||||||
|
if not os.path.exists(folder):
|
||||||
|
os.makedirs(folder)
|
||||||
|
f = open(url_cache_file, 'w')
|
||||||
|
f.write(data)
|
||||||
|
f.close()
|
||||||
|
|
17
oxutils/hashes.py
Normal file
17
oxutils/hashes.py
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# vi:si:et:sw=2:sts=2:ts=2
|
||||||
|
# GPL written 2008 by j@pad.ma
|
||||||
|
import sha
|
||||||
|
import os
|
||||||
|
|
||||||
|
def sha1sum(filename):
|
||||||
|
sha1 = sha.new()
|
||||||
|
file=open(filename)
|
||||||
|
buffer=file.read(4096)
|
||||||
|
while buffer:
|
||||||
|
sha1.update(buffer)
|
||||||
|
buffer=file.read(4096)
|
||||||
|
file.close()
|
||||||
|
return sha1.hexdigest()
|
||||||
|
|
||||||
|
|
128
oxutils/html.py
Normal file
128
oxutils/html.py
Normal file
|
@ -0,0 +1,128 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# vi:si:et:sw=2:sts=2:ts=2
|
||||||
|
# GPL written 2008 by j@pad.ma
|
||||||
|
import re
|
||||||
|
import string
|
||||||
|
|
||||||
|
|
||||||
|
# Configuration for urlize() function
|
||||||
|
LEADING_PUNCTUATION = ['(', '<', '<']
|
||||||
|
TRAILING_PUNCTUATION = ['.', ',', ')', '>', '\n', '>', "'", '"']
|
||||||
|
|
||||||
|
# list of possible strings used for bullets in bulleted lists
|
||||||
|
DOTS = ['·', '*', '\xe2\x80\xa2', '•', '•', '•']
|
||||||
|
|
||||||
|
unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)')
|
||||||
|
word_split_re = re.compile(r'(\s+)')
|
||||||
|
punctuation_re = re.compile('^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % \
|
||||||
|
('|'.join([re.escape(x) for x in LEADING_PUNCTUATION]),
|
||||||
|
'|'.join([re.escape(x) for x in TRAILING_PUNCTUATION])))
|
||||||
|
simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
|
||||||
|
link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+')
|
||||||
|
html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE)
|
||||||
|
hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL)
|
||||||
|
trailing_empty_content_re = re.compile(r'(?:<p>(?: |\s|<br \/>)*?</p>\s*)+\Z')
|
||||||
|
del x # Temporary variable
|
||||||
|
|
||||||
|
def escape(html):
|
||||||
|
"Returns the given HTML with ampersands, quotes and carets encoded"
|
||||||
|
if not isinstance(html, basestring):
|
||||||
|
html = str(html)
|
||||||
|
return html.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"').replace("'", ''')
|
||||||
|
|
||||||
|
def linebreaks(value):
|
||||||
|
"Converts newlines into <p> and <br />s"
|
||||||
|
value = re.sub(r'\r\n|\r|\n', '\n', value) # normalize newlines
|
||||||
|
paras = re.split('\n{2,}', value)
|
||||||
|
paras = ['<p>%s</p>' % p.strip().replace('\n', '<br />') for p in paras]
|
||||||
|
return '\n\n'.join(paras)
|
||||||
|
|
||||||
|
def stripTags(value):
|
||||||
|
"Returns the given HTML with all tags stripped"
|
||||||
|
return re.sub(r'<[^>]*?>', '', value)
|
||||||
|
|
||||||
|
def stripSpacesBetweenTags(value):
|
||||||
|
"Returns the given HTML with spaces between tags normalized to a single space"
|
||||||
|
return re.sub(r'>\s+<', '> <', value)
|
||||||
|
|
||||||
|
def stripEntities(value):
|
||||||
|
"Returns the given HTML with all entities (&something;) stripped"
|
||||||
|
return re.sub(r'&(?:\w+|#\d);', '', value)
|
||||||
|
|
||||||
|
def fixAmpersands(value):
|
||||||
|
"Returns the given HTML with all unencoded ampersands encoded correctly"
|
||||||
|
return unencoded_ampersands_re.sub('&', value)
|
||||||
|
|
||||||
|
def urlize(text, trim_url_limit=None, nofollow=False):
|
||||||
|
"""
|
||||||
|
Converts any URLs in text into clickable links. Works on http://, https:// and
|
||||||
|
www. links. Links can have trailing punctuation (periods, commas, close-parens)
|
||||||
|
and leading punctuation (opening parens) and it'll still do the right thing.
|
||||||
|
|
||||||
|
If trim_url_limit is not None, the URLs in link text will be limited to
|
||||||
|
trim_url_limit characters.
|
||||||
|
|
||||||
|
If nofollow is True, the URLs in link text will get a rel="nofollow" attribute.
|
||||||
|
"""
|
||||||
|
trim_url = lambda x, limit=trim_url_limit: limit is not None and (x[:limit] + (len(x) >=limit and '...' or '')) or x
|
||||||
|
words = word_split_re.split(text)
|
||||||
|
nofollow_attr = nofollow and ' rel="nofollow"' or ''
|
||||||
|
for i, word in enumerate(words):
|
||||||
|
match = punctuation_re.match(word)
|
||||||
|
if match:
|
||||||
|
lead, middle, trail = match.groups()
|
||||||
|
if middle.startswith('www.') or ('@' not in middle and not middle.startswith('http://') and \
|
||||||
|
len(middle) > 0 and middle[0] in string.letters + string.digits and \
|
||||||
|
(middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))):
|
||||||
|
middle = '<a href="http://%s"%s>%s</a>' % (middle, nofollow_attr, trim_url(middle))
|
||||||
|
if middle.startswith('http://') or middle.startswith('https://'):
|
||||||
|
middle = '<a href="%s"%s>%s</a>' % (middle, nofollow_attr, trim_url(middle))
|
||||||
|
if '@' in middle and not middle.startswith('www.') and not ':' in middle \
|
||||||
|
and simple_email_re.match(middle):
|
||||||
|
middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
|
||||||
|
if lead + middle + trail != word:
|
||||||
|
words[i] = lead + middle + trail
|
||||||
|
return ''.join(words)
|
||||||
|
|
||||||
|
def cleanHtml(text):
|
||||||
|
"""
|
||||||
|
Cleans the given HTML. Specifically, it does the following:
|
||||||
|
* Converts <b> and <i> to <strong> and <em>.
|
||||||
|
* Encodes all ampersands correctly.
|
||||||
|
* Removes all "target" attributes from <a> tags.
|
||||||
|
* Removes extraneous HTML, such as presentational tags that open and
|
||||||
|
immediately close and <br clear="all">.
|
||||||
|
* Converts hard-coded bullets into HTML unordered lists.
|
||||||
|
* Removes stuff like "<p> </p>", but only if it's at the
|
||||||
|
bottom of the text.
|
||||||
|
"""
|
||||||
|
from text import normalizeNewlines
|
||||||
|
text = normalizeNewlines(text)
|
||||||
|
text = re.sub(r'<(/?)\s*b\s*>', '<\\1strong>', text)
|
||||||
|
text = re.sub(r'<(/?)\s*i\s*>', '<\\1em>', text)
|
||||||
|
text = fix_ampersands(text)
|
||||||
|
# Remove all target="" attributes from <a> tags.
|
||||||
|
text = link_target_attribute_re.sub('\\1', text)
|
||||||
|
# Trim stupid HTML such as <br clear="all">.
|
||||||
|
text = html_gunk_re.sub('', text)
|
||||||
|
# Convert hard-coded bullets into HTML unordered lists.
|
||||||
|
def replace_p_tags(match):
|
||||||
|
s = match.group().replace('</p>', '</li>')
|
||||||
|
for d in DOTS:
|
||||||
|
s = s.replace('<p>%s' % d, '<li>')
|
||||||
|
return '<ul>\n%s\n</ul>' % s
|
||||||
|
text = hard_coded_bullets_re.sub(replace_p_tags, text)
|
||||||
|
# Remove stuff like "<p> </p>", but only if it's at the bottom of the text.
|
||||||
|
text = trailing_empty_content_re.sub('', text)
|
||||||
|
return text
|
||||||
|
|
||||||
|
def highlight(text, query, hlClass="hl"):
|
||||||
|
if query:
|
||||||
|
text = text.replace('<br />', '|')
|
||||||
|
query = re.escape(query).replace('\ ', '.')
|
||||||
|
m = re.compile("(%s)" % query, re.IGNORECASE).findall(text)
|
||||||
|
for i in m:
|
||||||
|
text = re.sub("(%s)" % re.escape(i).replace('\ ', '.'), '<span class="%s">\\1</span>' % hlClass, text)
|
||||||
|
text = text.replace('|', '<br />')
|
||||||
|
return text
|
||||||
|
|
29
oxutils/net.py
Normal file
29
oxutils/net.py
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# vi:si:et:sw=2:sts=2:ts=2
|
||||||
|
import urllib
|
||||||
|
import urllib2
|
||||||
|
|
||||||
|
import chardet
|
||||||
|
|
||||||
|
|
||||||
|
# Default headers for HTTP requests.
|
||||||
|
DEFAULT_HEADERS = {'User-Agent': 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9b5) Gecko/2008041514 Firefox/3.0b5'}
|
||||||
|
|
||||||
|
def openUrl(url, data=None, headers=DEFAULT_HEADERS):
|
||||||
|
url = url.replace(' ', '%20')
|
||||||
|
req = urllib2.Request(url, data, headers)
|
||||||
|
return urllib2.urlopen(req)
|
||||||
|
|
||||||
|
def getUrl(url, data=None, headers=DEFAULT_HEADERS):
|
||||||
|
f = openUrl(url, data, headers)
|
||||||
|
data = f.read()
|
||||||
|
f.close()
|
||||||
|
return data
|
||||||
|
|
||||||
|
def getUrlUnicode(url):
|
||||||
|
data = getUrl(url)
|
||||||
|
encoding = chardet.detect(data)['encoding']
|
||||||
|
if not encoding:
|
||||||
|
encoding = 'latin-1'
|
||||||
|
return unicode(data, encoding)
|
||||||
|
|
99
oxutils/numbers.py
Normal file
99
oxutils/numbers.py
Normal file
|
@ -0,0 +1,99 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# vi:si:et:sw=2:sts=2:ts=2
|
||||||
|
# Written 2007 by j@mailb.org
|
||||||
|
import re
|
||||||
|
|
||||||
|
def to36(q):
|
||||||
|
"""
|
||||||
|
Converts an integer to base 36 (a useful scheme for human-sayable IDs).
|
||||||
|
|
||||||
|
>>> to36(35)
|
||||||
|
'z'
|
||||||
|
>>> to36(119292)
|
||||||
|
'2k1o'
|
||||||
|
>>> int(to36(939387374), 36)
|
||||||
|
939387374
|
||||||
|
>>> to36(0)
|
||||||
|
'0'
|
||||||
|
>>> to36(-393)
|
||||||
|
Traceback (most recent call last):
|
||||||
|
...
|
||||||
|
ValueError: must supply a positive integer
|
||||||
|
|
||||||
|
"""
|
||||||
|
if q < 0: raise ValueError, "must supply a positive integer"
|
||||||
|
letters = "0123456789abcdefghijklmnopqrstuvwxyz"
|
||||||
|
converted = []
|
||||||
|
while q != 0:
|
||||||
|
q, r = divmod(q, 36)
|
||||||
|
converted.insert(0, letters[r])
|
||||||
|
return "".join(converted) or '0'
|
||||||
|
|
||||||
|
def from36(q):
|
||||||
|
return int(q, 36)
|
||||||
|
|
||||||
|
def intValue(strValue, default=''):
|
||||||
|
try:
|
||||||
|
val = re.compile('(\d*)').findall(unicode(strValue))[0]
|
||||||
|
except:
|
||||||
|
val = default
|
||||||
|
return val
|
||||||
|
|
||||||
|
def floatValue(strValue, default=''):
|
||||||
|
try:
|
||||||
|
val = re.compile('([\d.]*)').findall(unicode(strValue))[0]
|
||||||
|
except:
|
||||||
|
val = default
|
||||||
|
return val
|
||||||
|
|
||||||
|
"""
|
||||||
|
Format the value like a 'human-readable' file size (i.e. 13 KB, 4.1 MB, 102
|
||||||
|
bytes, etc).
|
||||||
|
number - number to format.
|
||||||
|
long_name - long name. i.e. byte
|
||||||
|
short - short name, i.e. B
|
||||||
|
"""
|
||||||
|
def formatNumber(number, long_name, short):
|
||||||
|
if not number:
|
||||||
|
return "0 %ss" % long_name
|
||||||
|
number = float(number)
|
||||||
|
if number < 1024:
|
||||||
|
return "%d %s%s" % (number, long_name, number != 1 and 's' or '')
|
||||||
|
if number < 1024 * 1024:
|
||||||
|
return "%d K%s" % ((number / 1024), short)
|
||||||
|
if number < 1024 * 1024 * 1024:
|
||||||
|
return "%.1f M%s" % (number / (1024 * 1024), short)
|
||||||
|
if number < 1024 * 1024 * 1024 * 1024:
|
||||||
|
return "%.2f G%s" % (number / (1024 * 1024 * 1024), short)
|
||||||
|
return "%.3f T%s" % (number / (1024 * 1024 * 1024 * 1024), short)
|
||||||
|
|
||||||
|
def formatBytes(number):
|
||||||
|
return formatNumber(number, 'byte', 'B')
|
||||||
|
|
||||||
|
def formatBit(number):
|
||||||
|
return formatNumber(number, 'bit', 'b')
|
||||||
|
|
||||||
|
'''
|
||||||
|
seperate number with thousand comma
|
||||||
|
'''
|
||||||
|
def numberThousands(n, sep=','):
|
||||||
|
if n < 1000:
|
||||||
|
return "%s" % n
|
||||||
|
ln = list(str(n))
|
||||||
|
ln.reverse()
|
||||||
|
newn = []
|
||||||
|
while len(ln) > 3:
|
||||||
|
newn.extend(ln[:3])
|
||||||
|
newn.append(sep)
|
||||||
|
ln = ln[3:]
|
||||||
|
newn.extend(ln)
|
||||||
|
newn.reverse()
|
||||||
|
return "".join(newn)
|
||||||
|
|
||||||
|
def plural(amount, unit, plural='s'):
|
||||||
|
if abs(amount) != 1:
|
||||||
|
if plural == 's':
|
||||||
|
unit = unit + plural
|
||||||
|
else: unit = plural
|
||||||
|
return "%s %s" % (formatNumber(amount), unit)
|
||||||
|
|
140
oxutils/text.py
Normal file
140
oxutils/text.py
Normal file
|
@ -0,0 +1,140 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# vi:si:et:sw=2:sts=2:ts=2
|
||||||
|
# GPL written 2008 by j@pad.ma
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
# Capitalizes the first letter of a string.
|
||||||
|
capfirst = lambda x: x and x[0].upper() + x[1:]
|
||||||
|
|
||||||
|
def removeSpecialCharacters(text):
|
||||||
|
"""
|
||||||
|
Removes special characters inserted by Word.
|
||||||
|
"""
|
||||||
|
text = text.replace(u'\u2013', '-')
|
||||||
|
text = text.replace(u'\u2026O', "'")
|
||||||
|
text = text.replace(u'\u2019', "'")
|
||||||
|
text = text.replace(u'', "'")
|
||||||
|
text = text.replace(u'', "'")
|
||||||
|
text = text.replace(u'', "-")
|
||||||
|
return text
|
||||||
|
|
||||||
|
def wrap(text, width):
|
||||||
|
"""
|
||||||
|
A word-wrap function that preserves existing line breaks and most spaces in
|
||||||
|
the text. Expects that existing line breaks are posix newlines (\n).
|
||||||
|
See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/148061
|
||||||
|
"""
|
||||||
|
return reduce(lambda line, word, width=width: '%s%s%s' %
|
||||||
|
(line,
|
||||||
|
' \n'[(len(line[line.rfind('\n')+1:])
|
||||||
|
+ len(word.split('\n',1)[0]
|
||||||
|
) >= width)],
|
||||||
|
word),
|
||||||
|
text.split(' ')
|
||||||
|
)
|
||||||
|
|
||||||
|
def truncateString(s, num):
|
||||||
|
"Truncates a string after a certain number of chacters, but ends with a word"
|
||||||
|
length = int(num)
|
||||||
|
if len(s) <= length:
|
||||||
|
return s
|
||||||
|
words = s.split()
|
||||||
|
ts = ""
|
||||||
|
while words and len(ts) + len(words[0]) < length:
|
||||||
|
ts += " " + words.pop(0)
|
||||||
|
if words:
|
||||||
|
ts += "..."
|
||||||
|
return ts
|
||||||
|
|
||||||
|
def truncateWords(s, num):
|
||||||
|
"Truncates a string after a certain number of words."
|
||||||
|
length = int(num)
|
||||||
|
words = s.split()
|
||||||
|
if len(words) > length:
|
||||||
|
words = words[:length]
|
||||||
|
if not words[-1].endswith('...'):
|
||||||
|
words.append('...')
|
||||||
|
return ' '.join(words)
|
||||||
|
|
||||||
|
def getValidFilename(s):
|
||||||
|
"""
|
||||||
|
Returns the given string converted to a string that can be used for a clean
|
||||||
|
filename. Specifically, leading and trailing spaces are removed;
|
||||||
|
all non-filename-safe characters are removed.
|
||||||
|
>>> get_valid_filename("john's portrait in 2004.jpg")
|
||||||
|
'john_s portrait in 2004.jpg'
|
||||||
|
"""
|
||||||
|
s = s.strip()
|
||||||
|
s = s.replace(' ', '_')
|
||||||
|
s = re.sub(r'[^-A-Za-z0-9_.\[\]\ ]', '_', s)
|
||||||
|
s = s.replace('__', '_').replace('__', '_')
|
||||||
|
return s
|
||||||
|
|
||||||
|
def getTextList(list_, last_word='or'):
|
||||||
|
"""
|
||||||
|
>>> get_text_list(['a', 'b', 'c', 'd'])
|
||||||
|
'a, b, c or d'
|
||||||
|
>>> get_text_list(['a', 'b', 'c'], 'and')
|
||||||
|
'a, b and c'
|
||||||
|
>>> get_text_list(['a', 'b'], 'and')
|
||||||
|
'a and b'
|
||||||
|
>>> get_text_list(['a'])
|
||||||
|
'a'
|
||||||
|
>>> get_text_list([])
|
||||||
|
''
|
||||||
|
"""
|
||||||
|
if len(list_) == 0: return ''
|
||||||
|
if len(list_) == 1: return list_[0]
|
||||||
|
return '%s %s %s' % (', '.join([str(i) for i in list_][:-1]), last_word, list_[-1])
|
||||||
|
|
||||||
|
def normalizeNewlines(text):
|
||||||
|
return re.sub(r'\r\n|\r|\n', '\n', text)
|
||||||
|
|
||||||
|
def recapitalize(text):
|
||||||
|
"Recapitalizes text, placing caps after end-of-sentence punctuation."
|
||||||
|
# capwords = ()
|
||||||
|
text = text.lower()
|
||||||
|
capsRE = re.compile(r'(?:^|(?<=[\.\?\!] ))([a-z])')
|
||||||
|
text = capsRE.sub(lambda x: x.group(1).upper(), text)
|
||||||
|
# for capword in capwords:
|
||||||
|
# capwordRE = re.compile(r'\b%s\b' % capword, re.I)
|
||||||
|
# text = capwordRE.sub(capword, text)
|
||||||
|
return text
|
||||||
|
|
||||||
|
def phone2numeric(phone):
|
||||||
|
"Converts a phone number with letters into its numeric equivalent."
|
||||||
|
letters = re.compile(r'[A-PR-Y]', re.I)
|
||||||
|
char2number = lambda m: {'a': '2', 'c': '2', 'b': '2', 'e': '3',
|
||||||
|
'd': '3', 'g': '4', 'f': '3', 'i': '4', 'h': '4', 'k': '5',
|
||||||
|
'j': '5', 'm': '6', 'l': '5', 'o': '6', 'n': '6', 'p': '7',
|
||||||
|
's': '7', 'r': '7', 'u': '8', 't': '8', 'w': '9', 'v': '8',
|
||||||
|
'y': '9', 'x': '9'}.get(m.group(0).lower())
|
||||||
|
return letters.sub(char2number, phone)
|
||||||
|
|
||||||
|
def compressString(s):
|
||||||
|
import cStringIO, gzip
|
||||||
|
zbuf = cStringIO.StringIO()
|
||||||
|
zfile = gzip.GzipFile(mode='wb', compresslevel=6, fileobj=zbuf)
|
||||||
|
zfile.write(s)
|
||||||
|
zfile.close()
|
||||||
|
return zbuf.getvalue()
|
||||||
|
|
||||||
|
smart_split_re = re.compile('("(?:[^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'(?:[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'|[^\\s]+)')
|
||||||
|
def smartSplit(text):
|
||||||
|
"""
|
||||||
|
Generator that splits a string by spaces, leaving quoted phrases together.
|
||||||
|
Supports both single and double quotes, and supports escaping quotes with
|
||||||
|
backslashes. In the output, strings will keep their initial and trailing
|
||||||
|
quote marks.
|
||||||
|
>>> list(smart_split('This is "a person\'s" test.'))
|
||||||
|
['This', 'is', '"a person\'s"', 'test.']
|
||||||
|
"""
|
||||||
|
for bit in smart_split_re.finditer(text):
|
||||||
|
bit = bit.group(0)
|
||||||
|
if bit[0] == '"':
|
||||||
|
yield '"' + bit[1:-1].replace('\\"', '"').replace('\\\\', '\\') + '"'
|
||||||
|
elif bit[0] == "'":
|
||||||
|
yield "'" + bit[1:-1].replace("\\'", "'").replace("\\\\", "\\") + "'"
|
||||||
|
else:
|
||||||
|
yield bit
|
58
oxutils/timeformat.py
Normal file
58
oxutils/timeformat.py
Normal file
|
@ -0,0 +1,58 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# vi:si:et:sw=2:sts=2:ts=2
|
||||||
|
from numbers import plural
|
||||||
|
|
||||||
|
def ms2runtime(ms):
|
||||||
|
seconds = int(ms / 1000)
|
||||||
|
years = 0
|
||||||
|
days = 0
|
||||||
|
hours = 0
|
||||||
|
minutes = 0
|
||||||
|
if seconds >= 60:
|
||||||
|
minutes = int(seconds / 60)
|
||||||
|
seconds = seconds % 60
|
||||||
|
if minutes >= 60:
|
||||||
|
hours = int(minutes / 60)
|
||||||
|
minutes = minutes % 60
|
||||||
|
if hours >= 24:
|
||||||
|
days = int(hours / 24)
|
||||||
|
hours = hours % 24
|
||||||
|
if days >= 365:
|
||||||
|
years = int(days / 365)
|
||||||
|
days = days % 365
|
||||||
|
runtimeString = (plural(years, 'year'), plural(days, 'day'),
|
||||||
|
plural(hours,'hour'), plural(minutes, 'minute'), plural(seconds, 'second'))
|
||||||
|
runtimeString = filter(lambda x: not x.startswith('0'), runtimeString)
|
||||||
|
return " ".join(runtimeString).strip()
|
||||||
|
|
||||||
|
def ms2playtime(ms):
|
||||||
|
it = int(ms / 1000)
|
||||||
|
ms = ms - it*1000
|
||||||
|
ss = it % 60
|
||||||
|
mm = ((it-ss)/60) % 60
|
||||||
|
hh = ((it-(mm*60)-ss)/3600) % 60
|
||||||
|
if hh:
|
||||||
|
playtime= "%02d:%02d:%02d" % (hh, mm, ss)
|
||||||
|
else:
|
||||||
|
playtime= "%02d:%02d" % (mm, ss)
|
||||||
|
return playtime
|
||||||
|
|
||||||
|
def ms2time(ms):
|
||||||
|
it = int(ms / 1000)
|
||||||
|
ms = ms - it*1000
|
||||||
|
ss = it % 60
|
||||||
|
mm = ((it-ss)/60) % 60
|
||||||
|
hh = ((it-(mm*60)-ss)/3600) % 60
|
||||||
|
return "%d:%02d:%02d.%03d" % (hh, mm, ss, ms)
|
||||||
|
|
||||||
|
def time2ms(timeString):
|
||||||
|
ms = 0.0
|
||||||
|
p = timeString.split(':')
|
||||||
|
for i in range(len(p)):
|
||||||
|
ms = ms * 60 + float(p[i])
|
||||||
|
return int(ms * 1000)
|
||||||
|
|
||||||
|
def shiftTime(offset, timeString):
|
||||||
|
newTime = time2ms(timeString) + offset
|
||||||
|
return ms2time(newTime)
|
||||||
|
|
27
setup.py
Normal file
27
setup.py
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# vi:si:et:sw=2:sts=2:ts=2
|
||||||
|
# encoding: utf-8
|
||||||
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name="oxutils",
|
||||||
|
version="0.1",
|
||||||
|
|
||||||
|
description="collection of utils used to work with python",
|
||||||
|
author="ox",
|
||||||
|
author_email="utils@0xdb.org",
|
||||||
|
url="http://code.0xdb.org/python-oxutils",
|
||||||
|
download_url="http://code.0xdb.org/python-oxutils/download",
|
||||||
|
license="GPL",
|
||||||
|
packages=find_packages(),
|
||||||
|
zip_safe=False,
|
||||||
|
keywords = [
|
||||||
|
],
|
||||||
|
classifiers = [
|
||||||
|
'Development Status :: 3 - Alpha',
|
||||||
|
'Operating System :: OS Independent',
|
||||||
|
'Programming Language :: Python',
|
||||||
|
'Topic :: Software Development :: Libraries :: Python Modules',
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
Loading…
Reference in a new issue