install as ox
This commit is contained in:
parent
1d7b6a31f0
commit
d14f13faaf
12 changed files with 2 additions and 1 deletions
|
|
@ -1,18 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
# GPL 2008
|
||||
|
||||
from file import *
|
||||
from format import *
|
||||
from html import *
|
||||
from iso import *
|
||||
from text import *
|
||||
import cache
|
||||
import net
|
||||
|
||||
#only works if BitTornado is installed
|
||||
try:
|
||||
from torrent import *
|
||||
except:
|
||||
pass
|
||||
|
||||
320
oxlib/bencode.py
320
oxlib/bencode.py
|
|
@ -1,320 +0,0 @@
|
|||
# Written by Petru Paler, Uoti Urpala, Ross Cohen and John Hoffman
|
||||
# see LICENSE.txt for license information
|
||||
|
||||
from types import IntType, LongType, StringType, ListType, TupleType, DictType
|
||||
try:
|
||||
from types import BooleanType
|
||||
except ImportError:
|
||||
BooleanType = None
|
||||
try:
|
||||
from types import UnicodeType
|
||||
except ImportError:
|
||||
UnicodeType = None
|
||||
from cStringIO import StringIO
|
||||
|
||||
def decode_int(x, f):
|
||||
f += 1
|
||||
newf = x.index('e', f)
|
||||
try:
|
||||
n = int(x[f:newf])
|
||||
except:
|
||||
n = long(x[f:newf])
|
||||
if x[f] == '-':
|
||||
if x[f + 1] == '0':
|
||||
raise ValueError
|
||||
elif x[f] == '0' and newf != f+1:
|
||||
raise ValueError
|
||||
return (n, newf+1)
|
||||
|
||||
def decode_string(x, f):
|
||||
colon = x.index(':', f)
|
||||
try:
|
||||
n = int(x[f:colon])
|
||||
except (OverflowError, ValueError):
|
||||
n = long(x[f:colon])
|
||||
if x[f] == '0' and colon != f+1:
|
||||
raise ValueError
|
||||
colon += 1
|
||||
return (x[colon:colon+n], colon+n)
|
||||
|
||||
def decode_unicode(x, f):
|
||||
s, f = decode_string(x, f+1)
|
||||
return (s.decode('UTF-8'),f)
|
||||
|
||||
def decode_list(x, f):
|
||||
r, f = [], f+1
|
||||
while x[f] != 'e':
|
||||
v, f = decode_func[x[f]](x, f)
|
||||
r.append(v)
|
||||
return (r, f + 1)
|
||||
|
||||
def decode_dict(x, f):
|
||||
r, f = {}, f+1
|
||||
lastkey = None
|
||||
while x[f] != 'e':
|
||||
k, f = decode_string(x, f)
|
||||
#why is this needed
|
||||
#if lastkey >= k:
|
||||
# raise ValueError
|
||||
lastkey = k
|
||||
r[k], f = decode_func[x[f]](x, f)
|
||||
return (r, f + 1)
|
||||
|
||||
decode_func = {}
|
||||
decode_func['l'] = decode_list
|
||||
decode_func['d'] = decode_dict
|
||||
decode_func['i'] = decode_int
|
||||
decode_func['0'] = decode_string
|
||||
decode_func['1'] = decode_string
|
||||
decode_func['2'] = decode_string
|
||||
decode_func['3'] = decode_string
|
||||
decode_func['4'] = decode_string
|
||||
decode_func['5'] = decode_string
|
||||
decode_func['6'] = decode_string
|
||||
decode_func['7'] = decode_string
|
||||
decode_func['8'] = decode_string
|
||||
decode_func['9'] = decode_string
|
||||
#decode_func['u'] = decode_unicode
|
||||
|
||||
def bdecode(x, sloppy = 1):
|
||||
try:
|
||||
r, l = decode_func[x[0]](x, 0)
|
||||
# except (IndexError, KeyError):
|
||||
except (IndexError, KeyError, ValueError):
|
||||
raise ValueError, "bad bencoded data"
|
||||
if not sloppy and l != len(x):
|
||||
raise ValueError, "bad bencoded data"
|
||||
return r
|
||||
|
||||
def test_bdecode():
|
||||
try:
|
||||
bdecode('0:0:')
|
||||
assert 0
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
bdecode('ie')
|
||||
assert 0
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
bdecode('i341foo382e')
|
||||
assert 0
|
||||
except ValueError:
|
||||
pass
|
||||
assert bdecode('i4e') == 4L
|
||||
assert bdecode('i0e') == 0L
|
||||
assert bdecode('i123456789e') == 123456789L
|
||||
assert bdecode('i-10e') == -10L
|
||||
try:
|
||||
bdecode('i-0e')
|
||||
assert 0
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
bdecode('i123')
|
||||
assert 0
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
bdecode('')
|
||||
assert 0
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
bdecode('i6easd')
|
||||
assert 0
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
bdecode('35208734823ljdahflajhdf')
|
||||
assert 0
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
bdecode('2:abfdjslhfld')
|
||||
assert 0
|
||||
except ValueError:
|
||||
pass
|
||||
assert bdecode('0:') == ''
|
||||
assert bdecode('3:abc') == 'abc'
|
||||
assert bdecode('10:1234567890') == '1234567890'
|
||||
try:
|
||||
bdecode('02:xy')
|
||||
assert 0
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
bdecode('l')
|
||||
assert 0
|
||||
except ValueError:
|
||||
pass
|
||||
assert bdecode('le') == []
|
||||
try:
|
||||
bdecode('leanfdldjfh')
|
||||
assert 0
|
||||
except ValueError:
|
||||
pass
|
||||
assert bdecode('l0:0:0:e') == ['', '', '']
|
||||
try:
|
||||
bdecode('relwjhrlewjh')
|
||||
assert 0
|
||||
except ValueError:
|
||||
pass
|
||||
assert bdecode('li1ei2ei3ee') == [1, 2, 3]
|
||||
assert bdecode('l3:asd2:xye') == ['asd', 'xy']
|
||||
assert bdecode('ll5:Alice3:Bobeli2ei3eee') == [['Alice', 'Bob'], [2, 3]]
|
||||
try:
|
||||
bdecode('d')
|
||||
assert 0
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
bdecode('defoobar')
|
||||
assert 0
|
||||
except ValueError:
|
||||
pass
|
||||
assert bdecode('de') == {}
|
||||
assert bdecode('d3:agei25e4:eyes4:bluee') == {'age': 25, 'eyes': 'blue'}
|
||||
assert bdecode('d8:spam.mp3d6:author5:Alice6:lengthi100000eee') == {'spam.mp3': {'author': 'Alice', 'length': 100000}}
|
||||
try:
|
||||
bdecode('d3:fooe')
|
||||
assert 0
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
bdecode('di1e0:e')
|
||||
assert 0
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
bdecode('d1:b0:1:a0:e')
|
||||
assert 0
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
bdecode('d1:a0:1:a0:e')
|
||||
assert 0
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
bdecode('i03e')
|
||||
assert 0
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
bdecode('l01:ae')
|
||||
assert 0
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
bdecode('9999:x')
|
||||
assert 0
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
bdecode('l0:')
|
||||
assert 0
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
bdecode('d0:0:')
|
||||
assert 0
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
bdecode('d0:')
|
||||
assert 0
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
bencached_marker = []
|
||||
|
||||
class Bencached:
|
||||
def __init__(self, s):
|
||||
self.marker = bencached_marker
|
||||
self.bencoded = s
|
||||
|
||||
BencachedType = type(Bencached('')) # insufficient, but good as a filter
|
||||
|
||||
def encode_bencached(x,r):
|
||||
assert x.marker == bencached_marker
|
||||
r.append(x.bencoded)
|
||||
|
||||
def encode_int(x,r):
|
||||
r.extend(('i',str(x),'e'))
|
||||
|
||||
def encode_bool(x,r):
|
||||
encode_int(int(x),r)
|
||||
|
||||
def encode_string(x,r):
|
||||
r.extend((str(len(x)),':',x))
|
||||
|
||||
def encode_unicode(x,r):
|
||||
#r.append('u')
|
||||
encode_string(x.encode('UTF-8'),r)
|
||||
|
||||
def encode_list(x,r):
|
||||
r.append('l')
|
||||
for e in x:
|
||||
encode_func[type(e)](e, r)
|
||||
r.append('e')
|
||||
|
||||
def encode_dict(x,r):
|
||||
r.append('d')
|
||||
ilist = x.items()
|
||||
ilist.sort()
|
||||
for k,v in ilist:
|
||||
r.extend((str(len(k)),':',k))
|
||||
encode_func[type(v)](v, r)
|
||||
r.append('e')
|
||||
|
||||
encode_func = {}
|
||||
encode_func[BencachedType] = encode_bencached
|
||||
encode_func[IntType] = encode_int
|
||||
encode_func[LongType] = encode_int
|
||||
encode_func[StringType] = encode_string
|
||||
encode_func[ListType] = encode_list
|
||||
encode_func[TupleType] = encode_list
|
||||
encode_func[DictType] = encode_dict
|
||||
if BooleanType:
|
||||
encode_func[BooleanType] = encode_bool
|
||||
if UnicodeType:
|
||||
encode_func[UnicodeType] = encode_unicode
|
||||
|
||||
def bencode(x):
|
||||
r = []
|
||||
try:
|
||||
encode_func[type(x)](x, r)
|
||||
except:
|
||||
print "*** error *** could not encode type %s (value: %s)" % (type(x), x)
|
||||
assert 0
|
||||
return ''.join(r)
|
||||
|
||||
def test_bencode():
|
||||
assert bencode(4) == 'i4e'
|
||||
assert bencode(0) == 'i0e'
|
||||
assert bencode(-10) == 'i-10e'
|
||||
assert bencode(12345678901234567890L) == 'i12345678901234567890e'
|
||||
assert bencode('') == '0:'
|
||||
assert bencode('abc') == '3:abc'
|
||||
assert bencode('1234567890') == '10:1234567890'
|
||||
assert bencode([]) == 'le'
|
||||
assert bencode([1, 2, 3]) == 'li1ei2ei3ee'
|
||||
assert bencode([['Alice', 'Bob'], [2, 3]]) == 'll5:Alice3:Bobeli2ei3eee'
|
||||
assert bencode({}) == 'de'
|
||||
assert bencode({'age': 25, 'eyes': 'blue'}) == 'd3:agei25e4:eyes4:bluee'
|
||||
assert bencode({'spam.mp3': {'author': 'Alice', 'length': 100000}}) == 'd8:spam.mp3d6:author5:Alice6:lengthi100000eee'
|
||||
try:
|
||||
bencode({1: 'foo'})
|
||||
assert 0
|
||||
except AssertionError:
|
||||
pass
|
||||
|
||||
|
||||
try:
|
||||
import psyco
|
||||
psyco.bind(bdecode)
|
||||
psyco.bind(bencode)
|
||||
except ImportError:
|
||||
pass
|
||||
211
oxlib/cache.py
211
oxlib/cache.py
|
|
@ -1,211 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
# GPL 2008
|
||||
import gzip
|
||||
import hashlib
|
||||
import os
|
||||
import StringIO
|
||||
import time
|
||||
import urlparse
|
||||
import urllib2
|
||||
import sqlite3
|
||||
|
||||
import chardet
|
||||
import simplejson
|
||||
|
||||
import net
|
||||
from net import DEFAULT_HEADERS, getEncoding
|
||||
|
||||
|
||||
cache_timeout = 30*24*60*60 # default is 30 days
|
||||
|
||||
|
||||
def status(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
|
||||
'''
|
||||
>>> status('http://google.com')
|
||||
200
|
||||
>>> status('http://google.com/mysearch')
|
||||
404
|
||||
'''
|
||||
headers = getHeaders(url, data, headers)
|
||||
return int(headers['status'])
|
||||
|
||||
def exists(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
|
||||
'''
|
||||
>>> exists('http://google.com')
|
||||
True
|
||||
>>> exists('http://google.com/mysearch')
|
||||
False
|
||||
'''
|
||||
s = status(url, data, headers, timeout)
|
||||
if s >= 200 and s < 400:
|
||||
return True
|
||||
return False
|
||||
|
||||
def getHeaders(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
|
||||
url_headers = _getUrlCache(url, data, headers, timeout, "headers")
|
||||
if url_headers:
|
||||
url_headers = simplejson.loads(url_headers)
|
||||
else:
|
||||
url_headers = net.getHeaders(url, data, headers)
|
||||
_saveUrlCache(url, data, -1, url_headers)
|
||||
return url_headers
|
||||
|
||||
class InvalidResult(Exception):
|
||||
"""Base class for exceptions in this module."""
|
||||
def __init__(self, result, headers):
|
||||
self.result = result
|
||||
self.headers = headers
|
||||
|
||||
def getUrl(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout, valid=None):
|
||||
'''
|
||||
url - url to load
|
||||
data - possible post data
|
||||
headers - headers to send with request
|
||||
timeout - get from cache if cache not older than given seconds, -1 to get from cache
|
||||
valid - function to check if result is ok, its passed result and headers
|
||||
if this function fails, InvalidResult will be raised deal with it in your code
|
||||
'''
|
||||
#FIXME: send last-modified / etag from cache and only update if needed
|
||||
if isinstance(url, unicode):
|
||||
url = url.encode('utf-8')
|
||||
result = _getUrlCache(url, data, headers, timeout)
|
||||
if not result:
|
||||
#print "get data", url
|
||||
try:
|
||||
url_headers, result = net.getUrl(url, data, headers, returnHeaders=True)
|
||||
except urllib2.HTTPError, e:
|
||||
e.headers['Status'] = "%s" % e.code
|
||||
url_headers = dict(e.headers)
|
||||
result = e.read()
|
||||
if url_headers.get('content-encoding', None) == 'gzip':
|
||||
result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read()
|
||||
if not valid or valid(result, url_headers):
|
||||
_saveUrlCache(url, data, result, url_headers)
|
||||
else:
|
||||
raise InvalidResult(result, url_headers)
|
||||
return result
|
||||
|
||||
def getUrlUnicode(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout, _getUrl=getUrl, valid=None):
|
||||
data = _getUrl(url, data, headers, timeout, valid)
|
||||
encoding = getEncoding(data)
|
||||
if not encoding:
|
||||
encoding = 'latin-1'
|
||||
return unicode(data, encoding)
|
||||
|
||||
def _getCacheBase():
|
||||
'cache base is eather ~/.ox/cache or can set via env variable oxCACHE'
|
||||
return os.environ.get('oxCACHE', os.path.expanduser('~/.ox/cache'))
|
||||
|
||||
def _getCacheDB():
|
||||
return os.path.join(_getCacheBase(), "cache.sqlite")
|
||||
|
||||
def _connectDb():
|
||||
conn = sqlite3.connect(_getCacheDB(), timeout=10)
|
||||
conn.text_factory = str
|
||||
return conn
|
||||
|
||||
def _createDb(c):
|
||||
# Create table and indexes
|
||||
c.execute('''CREATE TABLE IF NOT EXISTS cache (url_hash varchar(42) unique, domain text, url text,
|
||||
post_data text, headers text, created int, data blob, only_headers int)''')
|
||||
c.execute('''CREATE INDEX IF NOT EXISTS cache_domain ON cache (domain)''')
|
||||
c.execute('''CREATE INDEX IF NOT EXISTS cache_url ON cache (url)''')
|
||||
c.execute('''CREATE INDEX IF NOT EXISTS cache_url_hash ON cache (url_hash)''')
|
||||
|
||||
|
||||
def _getUrlCache(url, data, headers=DEFAULT_HEADERS, timeout=-1, value="data"):
|
||||
r = None
|
||||
if timeout == 0:
|
||||
return r
|
||||
|
||||
if data:
|
||||
url_hash = hashlib.sha1(url + '?' + data).hexdigest()
|
||||
else:
|
||||
url_hash = hashlib.sha1(url).hexdigest()
|
||||
|
||||
conn = _connectDb()
|
||||
c = conn.cursor()
|
||||
_createDb(c)
|
||||
|
||||
sql = 'SELECT %s FROM cache WHERE url_hash=?' % value
|
||||
if timeout > 0:
|
||||
now = time.mktime(time.localtime())
|
||||
t = (url_hash, now-timeout)
|
||||
sql += ' AND created > ?'
|
||||
else:
|
||||
t = (url_hash, )
|
||||
if value != "headers":
|
||||
sql += ' AND only_headers != 1 '
|
||||
c.execute(sql, t)
|
||||
for row in c:
|
||||
r = row[0]
|
||||
if value == 'data':
|
||||
r = str(r)
|
||||
break
|
||||
|
||||
c.close()
|
||||
conn.close()
|
||||
return r
|
||||
|
||||
def _saveUrlCache(url, post_data, data, headers):
|
||||
if post_data:
|
||||
url_hash = hashlib.sha1(url + '?' + post_data).hexdigest()
|
||||
else:
|
||||
url_hash = hashlib.sha1(url).hexdigest()
|
||||
|
||||
domain = ".".join(urlparse.urlparse(url)[1].split('.')[-2:])
|
||||
|
||||
conn = _connectDb()
|
||||
c = conn.cursor()
|
||||
|
||||
# Create table if not exists
|
||||
_createDb(c)
|
||||
|
||||
# Insert a row of data
|
||||
if not post_data: post_data=""
|
||||
only_headers = 0
|
||||
if data == -1:
|
||||
only_headers = 1
|
||||
data = ""
|
||||
created = time.mktime(time.localtime())
|
||||
t = (url_hash, domain, url, post_data, simplejson.dumps(headers), created, sqlite3.Binary(data), only_headers)
|
||||
c.execute(u"""INSERT OR REPLACE INTO cache values (?, ?, ?, ?, ?, ?, ?, ?)""", t)
|
||||
|
||||
# Save (commit) the changes and clean up
|
||||
conn.commit()
|
||||
c.close()
|
||||
conn.close()
|
||||
|
||||
def migrate_to_db():
|
||||
import re
|
||||
import os
|
||||
import sqlite3
|
||||
import glob
|
||||
|
||||
conn = _connectDb()
|
||||
c = conn.cursor()
|
||||
_createDb(c)
|
||||
|
||||
files = glob.glob(_getCacheBase() + "/*/*/*/*/*")
|
||||
_files = filter(lambda x: not x.endswith(".headers"), files)
|
||||
|
||||
for f in _files:
|
||||
info = re.compile("%s/(.*?)/../../../(.*)" % _getCacheBase()).findall(f)
|
||||
domain = url = info[0][0]
|
||||
url_hash = info[0][1]
|
||||
post_data = ""
|
||||
created = os.stat(f).st_ctime
|
||||
fd = open(f, "r")
|
||||
data = fd.read()
|
||||
fd.close()
|
||||
fd = open(f + ".headers", "r")
|
||||
headers = fd.read()
|
||||
fd.close()
|
||||
t = (url_hash, domain, url, post_data, headers, created, sqlite3.Binary(data), 0)
|
||||
c.execute(u"""INSERT OR REPLACE INTO cache values (?, ?, ?, ?, ?, ?, ?, ?)""", t)
|
||||
|
||||
conn.commit()
|
||||
c.close()
|
||||
conn.close()
|
||||
|
||||
|
|
@ -1,65 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
# GPL 2008
|
||||
from __future__ import division
|
||||
import os
|
||||
import hashlib
|
||||
import sys
|
||||
import struct
|
||||
import subprocess
|
||||
|
||||
import simplejson
|
||||
|
||||
|
||||
def sha1sum(filename):
|
||||
sha1 = hashlib.sha1()
|
||||
file=open(filename)
|
||||
buffer=file.read(4096)
|
||||
while buffer:
|
||||
sha1.update(buffer)
|
||||
buffer=file.read(4096)
|
||||
file.close()
|
||||
return sha1.hexdigest()
|
||||
|
||||
'''
|
||||
os hash - http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes
|
||||
plus modification for files < 64k, buffer is filled with file data and padded with 0
|
||||
'''
|
||||
def oshash(filename):
|
||||
try:
|
||||
longlongformat = 'q' # long long
|
||||
bytesize = struct.calcsize(longlongformat)
|
||||
|
||||
f = open(filename, "rb")
|
||||
|
||||
filesize = os.path.getsize(filename)
|
||||
hash = filesize
|
||||
if filesize < 65536:
|
||||
for x in range(int(filesize/bytesize)):
|
||||
buffer = f.read(bytesize)
|
||||
(l_value,)= struct.unpack(longlongformat, buffer)
|
||||
hash += l_value
|
||||
hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number
|
||||
else:
|
||||
for x in range(int(65536/bytesize)):
|
||||
buffer = f.read(bytesize)
|
||||
(l_value,)= struct.unpack(longlongformat, buffer)
|
||||
hash += l_value
|
||||
hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number
|
||||
f.seek(max(0,filesize-65536),0)
|
||||
for x in range(int(65536/bytesize)):
|
||||
buffer = f.read(bytesize)
|
||||
(l_value,)= struct.unpack(longlongformat, buffer)
|
||||
hash += l_value
|
||||
hash = hash & 0xFFFFFFFFFFFFFFFF
|
||||
f.close()
|
||||
returnedhash = "%016x" % hash
|
||||
return returnedhash
|
||||
except(IOError):
|
||||
return "IOError"
|
||||
|
||||
def avinfo(filename):
|
||||
p = subprocess.Popen(['ffmpeg2theora', '--info', filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
info, error = p.communicate()
|
||||
return simplejson.loads(info)
|
||||
|
||||
270
oxlib/format.py
270
oxlib/format.py
|
|
@ -1,270 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
import math
|
||||
import re
|
||||
|
||||
def to36(q):
|
||||
"""
|
||||
Converts an integer to base 36 (a useful scheme for human-sayable IDs
|
||||
like 'fuck' (739172), 'shit' (1329077) or 'hitler' (1059538851)).
|
||||
|
||||
>>> to36(35)
|
||||
'z'
|
||||
>>> to36(119292)
|
||||
'2k1o'
|
||||
>>> int(to36(939387374), 36)
|
||||
939387374
|
||||
>>> to36(0)
|
||||
'0'
|
||||
>>> to36(-393)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: must supply a positive integer
|
||||
"""
|
||||
if q < 0: raise ValueError, "must supply a positive integer"
|
||||
letters = "0123456789abcdefghijklmnopqrstuvwxyz"
|
||||
converted = []
|
||||
while q != 0:
|
||||
q, r = divmod(q, 36)
|
||||
converted.insert(0, letters[r])
|
||||
return "".join(converted) or '0'
|
||||
|
||||
def from36(q):
|
||||
return int(q, 36)
|
||||
|
||||
def intValue(strValue, default=u''):
|
||||
"""
|
||||
>>> intValue('abc23')
|
||||
u'23'
|
||||
|
||||
>>> intValue(' abc23')
|
||||
u'23'
|
||||
|
||||
>>> intValue('ab')
|
||||
u''
|
||||
"""
|
||||
try:
|
||||
val = re.compile('(\d+)').findall(unicode(strValue).strip())[0]
|
||||
except:
|
||||
val = default
|
||||
return val
|
||||
|
||||
def floatValue(strValue, default=u''):
|
||||
"""
|
||||
>>> floatValue('abc23.4')
|
||||
u'23.4'
|
||||
|
||||
>>> floatValue(' abc23.4')
|
||||
u'23.4'
|
||||
|
||||
>>> floatValue('ab')
|
||||
u''
|
||||
"""
|
||||
try:
|
||||
val = re.compile('([\d.]+)').findall(unicode(strValue).strip())[0]
|
||||
except:
|
||||
val = default
|
||||
return val
|
||||
|
||||
def formatNumber(number, longName, shortName):
|
||||
"""
|
||||
Return the number in a human-readable format (23 KB, 23.4 MB, 23.42 GB)
|
||||
|
||||
>>> formatNumber(123, 'Byte', 'B')
|
||||
'123 Bytes'
|
||||
|
||||
>>> formatNumber(1234, 'Byte', 'B')
|
||||
'1 KB'
|
||||
|
||||
>>> formatNumber(1234567, 'Byte', 'B')
|
||||
'1.2 MB'
|
||||
|
||||
>>> formatNumber(1234567890, 'Byte', 'B')
|
||||
'1.15 GB'
|
||||
|
||||
>>> formatNumber(1234567890123456789, 'Byte', 'B')
|
||||
'1,096.5166 PB'
|
||||
|
||||
>>> formatNumber(-1234567890123456789, 'Byte', 'B')
|
||||
'-1,096.5166 PB'
|
||||
|
||||
"""
|
||||
if abs(number) < 1024:
|
||||
return '%s %s%s' % (formatThousands(number), longName, number != 1 and 's' or '')
|
||||
prefix = ['K', 'M', 'G', 'T', 'P']
|
||||
for i in range(5):
|
||||
if abs(number) < math.pow(1024, i + 2) or i == 4:
|
||||
n = number / math.pow(1024, i + 1)
|
||||
return '%s %s%s' % (formatThousands('%.*f' % (i, n)), prefix[i], shortName)
|
||||
|
||||
def formatThousands(number, separator = ','):
|
||||
"""
|
||||
Return the number with separators (1,000,000)
|
||||
|
||||
>>> formatThousands(1)
|
||||
'1'
|
||||
>>> formatThousands(1000)
|
||||
'1,000'
|
||||
>>> formatThousands(1000000)
|
||||
'1,000,000'
|
||||
"""
|
||||
string = str(number).split('.')
|
||||
l = []
|
||||
for i, character in enumerate(reversed(string[0])):
|
||||
if i and (not (i % 3)):
|
||||
l.insert(0, separator)
|
||||
l.insert(0, character)
|
||||
string[0] = ''.join(l)
|
||||
return '.'.join(string)
|
||||
|
||||
def formatBits(number):
|
||||
return formatNumber(number, 'bit', 'b')
|
||||
|
||||
def formatBytes(number):
|
||||
return formatNumber(number, 'byte', 'B')
|
||||
|
||||
def formatPixels(number):
|
||||
return formatNumber(number, 'pixel', 'px')
|
||||
|
||||
def formatCurrency(amount, currency="$"):
|
||||
if amount:
|
||||
temp = "%.2f" % amount
|
||||
profile=re.compile(r"(\d)(\d\d\d[.,])")
|
||||
while 1:
|
||||
temp, count = re.subn(profile,r"\1,\2",temp)
|
||||
if not count:
|
||||
break
|
||||
if temp.startswith('-'):
|
||||
return "-"+ currency + temp[1:-3]
|
||||
return currency + temp[:-3]
|
||||
else:
|
||||
return ""
|
||||
|
||||
def plural(amount, unit, plural='s'):
|
||||
'''
|
||||
>>> plural(1, 'unit')
|
||||
'1 unit'
|
||||
>>> plural(2, 'unit')
|
||||
'2 units'
|
||||
'''
|
||||
if abs(amount) != 1:
|
||||
if plural == 's':
|
||||
unit = unit + plural
|
||||
else: unit = plural
|
||||
return "%s %s" % (formatThousands(amount), unit)
|
||||
|
||||
def formatDuration(ms, verbosity=0, years=True, hours=True, milliseconds=True):
|
||||
'''
|
||||
verbosity
|
||||
0: D:HH:MM:SS
|
||||
1: Dd Hh Mm Ss
|
||||
2: D days H hours M minutes S seconds
|
||||
years
|
||||
True: 366 days are 1 year 1 day
|
||||
False: 366 days are 366 days
|
||||
hours
|
||||
True: 30 seconds are 00:00:30
|
||||
False: 30 seconds are 00:30
|
||||
milliseconds
|
||||
True: always display milliseconds
|
||||
False: never display milliseconds
|
||||
>>> formatDuration(1000 * 60 * 60 * 24 * 366)
|
||||
'1:001:00:00:00.000'
|
||||
>>> formatDuration(1000 * 60 * 60 * 24 * 366, years=False)
|
||||
'366:00:00:00.000'
|
||||
>>> formatDuration(1000 * 60 * 60 * 24 * 365 + 2003, verbosity=2)
|
||||
'1 year 2 seconds 3 milliseconds'
|
||||
>>> formatDuration(1000 * 30, hours=False, milliseconds=False)
|
||||
'00:30'
|
||||
'''
|
||||
if not ms and ms != 0:
|
||||
return ''
|
||||
if years:
|
||||
y = int(ms / 31536000000)
|
||||
d = int(ms % 31536000000 / 86400000)
|
||||
else:
|
||||
d = int(ms / 86400000)
|
||||
h = int(ms % 86400000 / 3600000)
|
||||
m = int(ms % 3600000 / 60000)
|
||||
s = int(ms % 60000 / 1000)
|
||||
ms = ms % 1000
|
||||
if verbosity == 0:
|
||||
if years and y:
|
||||
duration = "%d:%03d:%02d:%02d:%02d" % (y, d, h, m, s)
|
||||
elif d:
|
||||
duration = "%d:%02d:%02d:%02d" % (d, h, m, s)
|
||||
elif hours or h:
|
||||
duration = "%02d:%02d:%02d" % (h, m, s)
|
||||
else:
|
||||
duration = "%02d:%02d" % (m, s)
|
||||
if milliseconds:
|
||||
duration += ".%03d" % ms
|
||||
else:
|
||||
if verbosity == 1:
|
||||
durations = ["%sd" % d, "%sh" % h, "%sm" % m, "%ss" % s]
|
||||
if years:
|
||||
durations.insert(0, "%sy" % y)
|
||||
if milliseconds:
|
||||
durations.append("%sms" % ms)
|
||||
else:
|
||||
durations = [plural(d, 'day'), plural(h,'hour'),
|
||||
plural(m, 'minute'), plural(s, 'second')]
|
||||
if years:
|
||||
durations.insert(0, plural(y, 'year'))
|
||||
if milliseconds:
|
||||
durations.append(plural(ms, 'millisecond'))
|
||||
durations = filter(lambda x: not x.startswith('0'), durations)
|
||||
duration = ' '.join(durations)
|
||||
return duration
|
||||
|
||||
def ms2runtime(ms, shortenLong=False):
|
||||
# deprecated - use formatDuration
|
||||
'''
|
||||
>>> ms2runtime(5000)
|
||||
'5 seconds'
|
||||
>>> ms2runtime(500000)
|
||||
'8 minutes 20 seconds'
|
||||
>>> ms2runtime(50000000)
|
||||
'13 hours 53 minutes 20 seconds'
|
||||
>>> ms2runtime(50000000-20000)
|
||||
'13 hours 53 minutes'
|
||||
'''
|
||||
if shortenLong and ms > 1000 * 60 * 60 * 24 * 464:
|
||||
return formatDuration(ms, verbosity=1, milliseconds=False)
|
||||
return formatDuration(ms, verbosity=2, milliseconds=False)
|
||||
|
||||
def ms2playtime(ms, hours=False):
|
||||
# deprecated - use formatDuration
|
||||
'''
|
||||
>>> ms2playtime(5000)
|
||||
'00:05'
|
||||
>>> ms2playtime(500000)
|
||||
'08:20'
|
||||
>>> ms2playtime(50000000)
|
||||
'13:53:20'
|
||||
'''
|
||||
return formatDuration(ms, hours=False, years=False, milliseconds=False)
|
||||
|
||||
def ms2time(ms):
|
||||
# deprecated - use formatDuration
|
||||
'''
|
||||
>>> ms2time(44592123)
|
||||
'12:23:12.123'
|
||||
'''
|
||||
return formatDuration(ms, years=False)
|
||||
|
||||
def time2ms(timeString):
|
||||
'''
|
||||
>>> time2ms('12:23:12.123')
|
||||
44592123
|
||||
'''
|
||||
ms = 0.0
|
||||
p = timeString.split(':')
|
||||
for i in range(len(p)):
|
||||
ms = ms * 60 + float(p[i])
|
||||
return int(ms * 1000)
|
||||
|
||||
def shiftTime(offset, timeString):
|
||||
newTime = time2ms(timeString) + offset
|
||||
return ms2time(newTime)
|
||||
|
||||
172
oxlib/html.py
172
oxlib/html.py
|
|
@ -1,172 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
# GPL 2008
|
||||
import re
|
||||
import string
|
||||
from htmlentitydefs import name2codepoint
|
||||
|
||||
|
||||
# Configuration for urlize() function
|
||||
LEADING_PUNCTUATION = ['(', '<', '<']
|
||||
TRAILING_PUNCTUATION = ['.', ',', ')', '>', '\n', '>', "'", '"']
|
||||
|
||||
# list of possible strings used for bullets in bulleted lists
|
||||
DOTS = ['·', '*', '\xe2\x80\xa2', '•', '•', '•']
|
||||
|
||||
unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)')
|
||||
word_split_re = re.compile(r'(\s+)')
|
||||
punctuation_re = re.compile('^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % \
|
||||
('|'.join([re.escape(x) for x in LEADING_PUNCTUATION]),
|
||||
'|'.join([re.escape(x) for x in TRAILING_PUNCTUATION])))
|
||||
simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
|
||||
link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+')
|
||||
html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE)
|
||||
hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL)
|
||||
trailing_empty_content_re = re.compile(r'(?:<p>(?: |\s|<br \/>)*?</p>\s*)+\Z')
|
||||
del x # Temporary variable
|
||||
|
||||
def escape(html):
|
||||
'''
|
||||
Returns the given HTML with ampersands, quotes and carets encoded
|
||||
|
||||
>>> escape('html "test" & <brothers>')
|
||||
'html "test" & <brothers>'
|
||||
'''
|
||||
if not isinstance(html, basestring):
|
||||
html = str(html)
|
||||
return html.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"').replace("'", ''')
|
||||
|
||||
def linebreaks(value):
|
||||
'''
|
||||
Converts newlines into <p> and <br />
|
||||
'''
|
||||
value = re.sub(r'\r\n|\r|\n', '\n', value) # normalize newlines
|
||||
paras = re.split('\n{2,}', value)
|
||||
paras = ['<p>%s</p>' % p.strip().replace('\n', '<br />') for p in paras]
|
||||
return '\n\n'.join(paras)
|
||||
|
||||
def stripTags(value):
|
||||
"""
|
||||
Returns the given HTML with all tags stripped
|
||||
|
||||
>>> stripTags('some <h2>title</h2> <script>asdfasdf</script>')
|
||||
'some title asdfasdf'
|
||||
"""
|
||||
return re.sub(r'<[^>]*?>', '', value)
|
||||
|
||||
def stripSpacesBetweenTags(value):
|
||||
"Returns the given HTML with spaces between tags normalized to a single space"
|
||||
return re.sub(r'>\s+<', '> <', value)
|
||||
|
||||
def stripEntities(value):
|
||||
"Returns the given HTML with all entities (&something;) stripped"
|
||||
return re.sub(r'&(?:\w+|#\d);', '', value)
|
||||
|
||||
def fixAmpersands(value):
|
||||
"Returns the given HTML with all unencoded ampersands encoded correctly"
|
||||
return unencoded_ampersands_re.sub('&', value)
|
||||
|
||||
def urlize(text, trim_url_limit=None, nofollow=False):
|
||||
"""
|
||||
Converts any URLs in text into clickable links. Works on http://, https:// and
|
||||
www. links. Links can have trailing punctuation (periods, commas, close-parens)
|
||||
and leading punctuation (opening parens) and it'll still do the right thing.
|
||||
|
||||
If trim_url_limit is not None, the URLs in link text will be limited to
|
||||
trim_url_limit characters.
|
||||
|
||||
If nofollow is True, the URLs in link text will get a rel="nofollow" attribute.
|
||||
"""
|
||||
trim_url = lambda x, limit=trim_url_limit: limit is not None and (x[:limit] + (len(x) >=limit and '...' or '')) or x
|
||||
words = word_split_re.split(text)
|
||||
nofollow_attr = nofollow and ' rel="nofollow"' or ''
|
||||
for i, word in enumerate(words):
|
||||
match = punctuation_re.match(word)
|
||||
if match:
|
||||
lead, middle, trail = match.groups()
|
||||
if middle.startswith('www.') or ('@' not in middle and not middle.startswith('http://') and \
|
||||
len(middle) > 0 and middle[0] in string.letters + string.digits and \
|
||||
(middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))):
|
||||
middle = '<a href="http://%s"%s>%s</a>' % (middle, nofollow_attr, trim_url(middle))
|
||||
if middle.startswith('http://') or middle.startswith('https://'):
|
||||
middle = '<a href="%s"%s>%s</a>' % (middle, nofollow_attr, trim_url(middle))
|
||||
if '@' in middle and not middle.startswith('www.') and not ':' in middle \
|
||||
and simple_email_re.match(middle):
|
||||
middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
|
||||
if lead + middle + trail != word:
|
||||
words[i] = lead + middle + trail
|
||||
return ''.join(words)
|
||||
|
||||
def cleanHtml(text):
|
||||
"""
|
||||
Cleans the given HTML. Specifically, it does the following:
|
||||
* Converts <b> and <i> to <strong> and <em>.
|
||||
* Encodes all ampersands correctly.
|
||||
* Removes all "target" attributes from <a> tags.
|
||||
* Removes extraneous HTML, such as presentational tags that open and
|
||||
immediately close and <br clear="all">.
|
||||
* Converts hard-coded bullets into HTML unordered lists.
|
||||
* Removes stuff like "<p> </p>", but only if it's at the
|
||||
bottom of the text.
|
||||
"""
|
||||
from text import normalizeNewlines
|
||||
text = normalizeNewlines(text)
|
||||
text = re.sub(r'<(/?)\s*b\s*>', '<\\1strong>', text)
|
||||
text = re.sub(r'<(/?)\s*i\s*>', '<\\1em>', text)
|
||||
text = fixAmpersands(text)
|
||||
# Remove all target="" attributes from <a> tags.
|
||||
text = link_target_attribute_re.sub('\\1', text)
|
||||
# Trim stupid HTML such as <br clear="all">.
|
||||
text = html_gunk_re.sub('', text)
|
||||
# Convert hard-coded bullets into HTML unordered lists.
|
||||
def replace_p_tags(match):
|
||||
s = match.group().replace('</p>', '</li>')
|
||||
for d in DOTS:
|
||||
s = s.replace('<p>%s' % d, '<li>')
|
||||
return '<ul>\n%s\n</ul>' % s
|
||||
text = hard_coded_bullets_re.sub(replace_p_tags, text)
|
||||
# Remove stuff like "<p> </p>", but only if it's at the bottom of the text.
|
||||
text = trailing_empty_content_re.sub('', text)
|
||||
return text
|
||||
|
||||
# This pattern matches a character entity reference (a decimal numeric
|
||||
# references, a hexadecimal numeric reference, or a named reference).
|
||||
charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?')
|
||||
|
||||
def decodeHtml(html):
|
||||
"""
|
||||
>>> decodeHtml('me & you and $&%')
|
||||
u'me & you and $&%'
|
||||
"""
|
||||
if type(html) != unicode:
|
||||
html = unicode(html)[:]
|
||||
if type(html) is unicode:
|
||||
uchr = unichr
|
||||
else:
|
||||
uchr = lambda value: value > 255 and unichr(value) or chr(value)
|
||||
def entitydecode(match, uchr=uchr):
|
||||
entity = match.group(1)
|
||||
if entity.startswith('#x'):
|
||||
return uchr(int(entity[2:], 16))
|
||||
elif entity.startswith('#'):
|
||||
return uchr(int(entity[1:]))
|
||||
elif entity in name2codepoint:
|
||||
return uchr(name2codepoint[entity])
|
||||
else:
|
||||
return match.group(0)
|
||||
return charrefpat.sub(entitydecode, html).replace(u'\xa0', ' ')
|
||||
|
||||
def highlight(text, query, hlClass="hl"):
|
||||
"""
|
||||
>>> highlight('me & you and $&%', 'and')
|
||||
'me & you <span class="hl">and</span> $&%'
|
||||
"""
|
||||
if query:
|
||||
text = text.replace('<br />', '|')
|
||||
query = re.escape(query).replace('\ ', '.')
|
||||
m = re.compile("(%s)" % query, re.IGNORECASE).findall(text)
|
||||
for i in m:
|
||||
text = re.sub("(%s)" % re.escape(i).replace('\ ', '.'), '<span class="%s">\\1</span>' % hlClass, text)
|
||||
text = text.replace('|', '<br />')
|
||||
return text
|
||||
|
||||
243
oxlib/iso.py
243
oxlib/iso.py
|
|
@ -1,243 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
# GPL 2008
|
||||
|
||||
_iso639_languages = [
|
||||
("Unknown", "", "", "und"),
|
||||
("Afar", "", "aa", "aar"),
|
||||
("Abkhazian", "", "ab", "abk"),
|
||||
("Afrikaans", "", "af", "afr"),
|
||||
("Akan", "", "ak", "aka"),
|
||||
("Albanian", "", "sq", "sqi"),
|
||||
("Amharic", "", "am", "amh"),
|
||||
("Arabic", "", "ar", "ara"),
|
||||
("Aragonese", "", "an", "arg"),
|
||||
("Armenian", "", "hy", "hye"),
|
||||
("Assamese", "", "as", "asm"),
|
||||
("Avaric", "", "av", "ava"),
|
||||
("Avestan", "", "ae", "ave"),
|
||||
("Aymara", "", "ay", "aym"),
|
||||
("Azerbaijani", "", "az", "aze"),
|
||||
("Bashkir", "", "ba", "bak"),
|
||||
("Bambara", "", "bm", "bam"),
|
||||
("Basque", "", "eu", "eus"),
|
||||
("Belarusian", "", "be", "bel"),
|
||||
("Bengali", "", "bn", "ben"),
|
||||
("Bihari", "", "bh", "bih"),
|
||||
("Bislama", "", "bi", "bis"),
|
||||
("Bosnian", "", "bs", "bos"),
|
||||
("Breton", "", "br", "bre"),
|
||||
("Bulgarian", "", "bg", "bul"),
|
||||
("Burmese", "", "my", "mya"),
|
||||
("Catalan", "", "ca", "cat"),
|
||||
("Chamorro", "", "ch", "cha"),
|
||||
("Chechen", "", "ce", "che"),
|
||||
("Chinese", "", "zh", "zho"),
|
||||
("Church Slavic", "", "cu", "chu"),
|
||||
("Chuvash", "", "cv", "chv"),
|
||||
("Cornish", "", "kw", "cor"),
|
||||
("Corsican", "", "co", "cos"),
|
||||
("Cree", "", "cr", "cre"),
|
||||
("Czech", "", "cs", "ces"),
|
||||
("Danish", "Dansk", "da", "dan"),
|
||||
("Divehi", "", "dv", "div"),
|
||||
("Dutch", "Nederlands", "nl", "nld"),
|
||||
("Dzongkha", "", "dz", "dzo"),
|
||||
("English", "English", "en", "eng"),
|
||||
("Esperanto", "", "eo", "epo"),
|
||||
("Estonian", "", "et", "est"),
|
||||
("Ewe", "", "ee", "ewe"),
|
||||
("Faroese", "", "fo", "fao"),
|
||||
("Fijian", "", "fj", "fij"),
|
||||
("Finnish", "Suomi", "fi", "fin"),
|
||||
("French", "Francais", "fr", "fra"),
|
||||
("Western Frisian", "", "fy", "fry"),
|
||||
("Fulah", "", "ff", "ful"),
|
||||
("Georgian", "", "ka", "kat"),
|
||||
("German", "Deutsch", "de", "deu"),
|
||||
("Gaelic (Scots)", "", "gd", "gla"),
|
||||
("Irish", "", "ga", "gle"),
|
||||
("Galician", "", "gl", "glg"),
|
||||
("Manx", "", "gv", "glv"),
|
||||
("Greek, Modern", "", "el", "ell"),
|
||||
("Guarani", "", "gn", "grn"),
|
||||
("Gujarati", "", "gu", "guj"),
|
||||
("Haitian", "", "ht", "hat"),
|
||||
("Hausa", "", "ha", "hau"),
|
||||
("Hebrew", "", "he", "heb"),
|
||||
("Herero", "", "hz", "her"),
|
||||
("Hindi", "", "hi", "hin"),
|
||||
("Hiri Motu", "", "ho", "hmo"),
|
||||
("Hungarian", "Magyar", "hu", "hun"),
|
||||
("Igbo", "", "ig", "ibo"),
|
||||
("Icelandic", "Islenska", "is", "isl"),
|
||||
("Ido", "", "io", "ido"),
|
||||
("Sichuan Yi", "", "ii", "iii"),
|
||||
("Inuktitut", "", "iu", "iku"),
|
||||
("Interlingue", "", "ie", "ile"),
|
||||
("Interlingua", "", "ia", "ina"),
|
||||
("Indonesian", "", "id", "ind"),
|
||||
("Inupiaq", "", "ik", "ipk"),
|
||||
("Italian", "Italiano", "it", "ita"),
|
||||
("Javanese", "", "jv", "jav"),
|
||||
("Japanese", "", "ja", "jpn"),
|
||||
("Kalaallisut (Greenlandic)", "", "kl", "kal"),
|
||||
("Kannada", "", "kn", "kan"),
|
||||
("Kashmiri", "", "ks", "kas"),
|
||||
("Kanuri", "", "kr", "kau"),
|
||||
("Kazakh", "", "kk", "kaz"),
|
||||
("Central Khmer", "", "km", "khm"),
|
||||
("Kikuyu", "", "ki", "kik"),
|
||||
("Kinyarwanda", "", "rw", "kin"),
|
||||
("Kirghiz", "", "ky", "kir"),
|
||||
("Komi", "", "kv", "kom"),
|
||||
("Kongo", "", "kg", "kon"),
|
||||
("Korean", "", "ko", "kor"),
|
||||
("Kuanyama", "", "kj", "kua"),
|
||||
("Kurdish", "", "ku", "kur"),
|
||||
("Lao", "", "lo", "lao"),
|
||||
("Latin", "", "la", "lat"),
|
||||
("Latvian", "", "lv", "lav"),
|
||||
("Limburgan", "", "li", "lim"),
|
||||
("Lingala", "", "ln", "lin"),
|
||||
("Lithuanian", "", "lt", "lit"),
|
||||
("Luxembourgish", "", "lb", "ltz"),
|
||||
("Luba-Katanga", "", "lu", "lub"),
|
||||
("Ganda", "", "lg", "lug"),
|
||||
("Macedonian", "", "mk", "mkd"),
|
||||
("Marshallese", "", "mh", "mah"),
|
||||
("Malayalam", "", "ml", "mal"),
|
||||
("Maori", "", "mi", "mri"),
|
||||
("Marathi", "", "mr", "mar"),
|
||||
("Malay", "", "ms", "msa"),
|
||||
("Malagasy", "", "mg", "mlg"),
|
||||
("Maltese", "", "mt", "mlt"),
|
||||
("Moldavian", "", "mo", "mol"),
|
||||
("Mongolian", "", "mn", "mon"),
|
||||
("Nauru", "", "na", "nau"),
|
||||
("Navajo", "", "nv", "nav"),
|
||||
("Ndebele, South", "", "nr", "nbl"),
|
||||
("Ndebele, North", "", "nd", "nde"),
|
||||
("Ndonga", "", "ng", "ndo"),
|
||||
("Nepali", "", "ne", "nep"),
|
||||
("Norwegian Nynorsk", "", "nn", "nno"),
|
||||
("Norwegian Bokmål", "", "nb", "nob"),
|
||||
("Norwegian", "Norsk", "no", "nor"),
|
||||
("Chichewa; Nyanja", "", "ny", "nya"),
|
||||
("Occitan (post 1500); Provençal", "", "oc", "oci"),
|
||||
("Ojibwa", "", "oj", "oji"),
|
||||
("Oriya", "", "or", "ori"),
|
||||
("Oromo", "", "om", "orm"),
|
||||
("Ossetian; Ossetic", "", "os", "oss"),
|
||||
("Panjabi", "", "pa", "pan"),
|
||||
("Persian", "", "fa", "fas"),
|
||||
("Pali", "", "pi", "pli"),
|
||||
("Polish", "", "pl", "pol"),
|
||||
("Portuguese", "Portugues", "pt", "por"),
|
||||
("Pushto", "", "ps", "pus"),
|
||||
("Quechua", "", "qu", "que"),
|
||||
("Romansh", "", "rm", "roh"),
|
||||
("Romanian", "", "ro", "ron"),
|
||||
("Rundi", "", "rn", "run"),
|
||||
("Russian", "", "ru", "rus"),
|
||||
("Sango", "", "sg", "sag"),
|
||||
("Sanskrit", "", "sa", "san"),
|
||||
("Serbian", "", "sr", "srp"),
|
||||
("Croatian", "Hrvatski", "hr", "hrv"),
|
||||
("Sinhala", "", "si", "sin"),
|
||||
("Slovak", "", "sk", "slk"),
|
||||
("Slovenian", "", "sl", "slv"),
|
||||
("Northern Sami", "", "se", "sme"),
|
||||
("Samoan", "", "sm", "smo"),
|
||||
("Shona", "", "sn", "sna"),
|
||||
("Sindhi", "", "sd", "snd"),
|
||||
("Somali", "", "so", "som"),
|
||||
("Sotho, Southern", "", "st", "sot"),
|
||||
("Spanish", "Espanol", "es", "spa"),
|
||||
("Sardinian", "", "sc", "srd"),
|
||||
("Swati", "", "ss", "ssw"),
|
||||
("Sundanese", "", "su", "sun"),
|
||||
("Swahili", "", "sw", "swa"),
|
||||
("Swedish", "Svenska", "sv", "swe"),
|
||||
("Tahitian", "", "ty", "tah"),
|
||||
("Tamil", "", "ta", "tam"),
|
||||
("Tatar", "", "tt", "tat"),
|
||||
("Telugu", "", "te", "tel"),
|
||||
("Tajik", "", "tg", "tgk"),
|
||||
("Tagalog", "", "tl", "tgl"),
|
||||
("Thai", "", "th", "tha"),
|
||||
("Tibetan", "", "bo", "bod"),
|
||||
("Tigrinya", "", "ti", "tir"),
|
||||
("Tonga (Tonga Islands)", "", "to", "ton"),
|
||||
("Tswana", "", "tn", "tsn"),
|
||||
("Tsonga", "", "ts", "tso"),
|
||||
("Turkmen", "", "tk", "tuk"),
|
||||
("Turkish", "", "tr", "tur"),
|
||||
("Twi", "", "tw", "twi"),
|
||||
("Uighur", "", "ug", "uig"),
|
||||
("Ukrainian", "", "uk", "ukr"),
|
||||
("Urdu", "", "ur", "urd"),
|
||||
("Uzbek", "", "uz", "uzb"),
|
||||
("Venda", "", "ve", "ven"),
|
||||
("Vietnamese", "", "vi", "vie"),
|
||||
("Volapük", "", "vo", "vol"),
|
||||
("Welsh", "", "cy", "cym"),
|
||||
("Walloon", "", "wa", "wln"),
|
||||
("Wolof", "", "wo", "wol"),
|
||||
("Xhosa", "", "xh", "xho"),
|
||||
("Yiddish", "", "yi", "yid"),
|
||||
("Yoruba", "", "yo", "yor"),
|
||||
("Zhuang", "", "za", "zha"),
|
||||
("Zulu", "", "zu", "zul"),
|
||||
]
|
||||
|
||||
def codeToLang(code):
|
||||
code = code.lower()
|
||||
if len(code) == 2:
|
||||
for l in _iso639_languages:
|
||||
if l[2] == code:
|
||||
return l[0]
|
||||
elif len(code) == 3:
|
||||
for l in _iso639_languages:
|
||||
if l[3] == code:
|
||||
return l[0]
|
||||
return None
|
||||
|
||||
def langTo3Code(lang):
|
||||
lang = langEnglishName(lang)
|
||||
if lang:
|
||||
lang=lang.lower()
|
||||
for l in _iso639_languages:
|
||||
if l[0].lower() == lang:
|
||||
return l[3]
|
||||
return None
|
||||
|
||||
def langTo2Code(lang):
|
||||
lang = langEnglishName(lang)
|
||||
if lang:
|
||||
lang=lang.lower()
|
||||
for l in _iso639_languages:
|
||||
if l[0].lower() == lang:
|
||||
return l[2]
|
||||
return None
|
||||
|
||||
def langCode2To3(code):
|
||||
langTo3Code(codeToLang(code))
|
||||
|
||||
def langCode3To2(code):
|
||||
langTo2Code(codeToLang(code))
|
||||
|
||||
def langEnglishName(lang):
|
||||
lang = lang.lower()
|
||||
for l in _iso639_languages:
|
||||
if l[1].lower() == lang:
|
||||
return l[0]
|
||||
return None
|
||||
|
||||
def languages2Letter():
|
||||
languages = []
|
||||
for l in _iso639_languages:
|
||||
if l[2]:
|
||||
languages.append(l[2])
|
||||
return languages
|
||||
|
||||
89
oxlib/net.py
89
oxlib/net.py
|
|
@ -1,89 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
# GPL 2008
|
||||
import os
|
||||
import gzip
|
||||
import StringIO
|
||||
import urllib
|
||||
import urllib2
|
||||
|
||||
from chardet.universaldetector import UniversalDetector
|
||||
|
||||
|
||||
# Default headers for HTTP requests.
|
||||
DEFAULT_HEADERS = {
|
||||
'User-Agent': 'Mozilla/5.0 (X11; U; Linux i386; en-US; rv:1.9.1.1) Gecko/20090716 Firefox/3.5',
|
||||
'Accept-Encoding': 'gzip'
|
||||
}
|
||||
|
||||
def status(url, data=None, headers=DEFAULT_HEADERS):
|
||||
try:
|
||||
f = openUrl(url, data, headers)
|
||||
s = f.code
|
||||
except urllib2.HTTPError, e:
|
||||
s = e.code
|
||||
return s
|
||||
|
||||
def exists(url, data=None, headers=DEFAULT_HEADERS):
|
||||
s = status(url, data, headers)
|
||||
if s >= 200 and s < 400:
|
||||
return True
|
||||
return False
|
||||
|
||||
def getHeaders(url, data=None, headers=DEFAULT_HEADERS):
|
||||
try:
|
||||
f = openUrl(url, data, headers)
|
||||
f.headers['Status'] = "%s" % f.code
|
||||
headers = f.headers
|
||||
f.close()
|
||||
except urllib2.HTTPError, e:
|
||||
e.headers['Status'] = "%s" % e.code
|
||||
headers = e.headers
|
||||
return dict(headers)
|
||||
|
||||
def openUrl(url, data=None, headers=DEFAULT_HEADERS):
|
||||
url = url.replace(' ', '%20')
|
||||
req = urllib2.Request(url, data, headers)
|
||||
return urllib2.urlopen(req)
|
||||
|
||||
def getUrl(url, data=None, headers=DEFAULT_HEADERS, returnHeaders=False):
|
||||
f = openUrl(url, data, headers)
|
||||
data = f.read()
|
||||
f.close()
|
||||
if f.headers.get('content-encoding', None) == 'gzip':
|
||||
data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read()
|
||||
if returnHeaders:
|
||||
f.headers['Status'] = "%s" % f.code
|
||||
return dict(f.headers), data
|
||||
return data
|
||||
|
||||
def getUrlUnicode(url):
|
||||
data = getUrl(url)
|
||||
encoding = getEncoding(data)
|
||||
if not encoding:
|
||||
encoding = 'latin-1'
|
||||
return unicode(data, encoding)
|
||||
|
||||
def getEncoding(data):
|
||||
if 'content="text/html; charset=utf-8"' in data:
|
||||
return 'utf-8'
|
||||
elif 'content="text/html; charset=iso-8859-1"' in data:
|
||||
return 'iso-8859-1'
|
||||
detector = UniversalDetector()
|
||||
for line in data.split('\n'):
|
||||
detector.feed(line)
|
||||
if detector.done:
|
||||
break
|
||||
detector.close()
|
||||
return detector.result['encoding']
|
||||
|
||||
def saveUrl(url, filename, overwrite=False):
|
||||
if not os.path.exists(filename) or overwrite:
|
||||
dirname = os.path.dirname(filename)
|
||||
if not os.path.exists(dirname):
|
||||
os.makedirs(dirname)
|
||||
data = getUrl(url)
|
||||
f = open(filename, 'w')
|
||||
f.write(data)
|
||||
f.close()
|
||||
|
||||
|
|
@ -1,161 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
# GPL 2008
|
||||
import re
|
||||
|
||||
_articles = ('the', 'la', 'a', 'die', 'der', 'le', 'el',
|
||||
"l'", 'il', 'das', 'les', 'o', 'ein', 'i', 'un', 'los', 'de',
|
||||
'an', 'una', 'las', 'eine', 'den', 'gli', 'het', 'os', 'lo',
|
||||
'az', 'det', 'ha-', 'een', 'ang', 'oi', 'ta', 'al-', 'dem',
|
||||
'mga', 'uno', "un'", 'ett', u'\xcf', 'eines', u'\xc7', 'els',
|
||||
u'\xd4\xef', u'\xcf\xe9')
|
||||
|
||||
# Articles in a dictionary.
|
||||
_articlesDict = dict([(x, x) for x in _articles])
|
||||
_spArticles = []
|
||||
for article in _articles:
|
||||
if article[-1] not in ("'", '-'): article += ' '
|
||||
_spArticles.append(article)
|
||||
|
||||
def canonicalTitle(title):
|
||||
"""Return the title in the canonic format 'Movie Title, The'.
|
||||
|
||||
>>> canonicalTitle('The Movie Title')
|
||||
'Movie Title, The'
|
||||
"""
|
||||
try:
|
||||
if _articlesDict.has_key(title.split(', ')[-1].lower()): return title
|
||||
except IndexError: pass
|
||||
ltitle = title.lower()
|
||||
for article in _spArticles:
|
||||
if ltitle.startswith(article):
|
||||
lart = len(article)
|
||||
title = '%s, %s' % (title[lart:], title[:lart])
|
||||
if article[-1] == ' ': title = title[:-1]
|
||||
break
|
||||
## XXX: an attempt using a dictionary lookup.
|
||||
##for artSeparator in (' ', "'", '-'):
|
||||
## article = _articlesDict.get(ltitle.split(artSeparator)[0])
|
||||
## if article is not None:
|
||||
## lart = len(article)
|
||||
## # check titles like "una", "I'm Mad" and "L'abbacchio".
|
||||
## if title[lart:] == '' or (artSeparator != ' ' and
|
||||
## title[lart:][1] != artSeparator): continue
|
||||
## title = '%s, %s' % (title[lart:], title[:lart])
|
||||
## if artSeparator == ' ': title = title[1:]
|
||||
## break
|
||||
return title
|
||||
|
||||
def normalizeTitle(title):
|
||||
"""Return the title in the normal "The Title" format.
|
||||
|
||||
>>> normalizeTitle('Movie Title, The')
|
||||
'The Movie Title'
|
||||
"""
|
||||
stitle = title.split(', ')
|
||||
if len(stitle) > 1 and _articlesDict.has_key(stitle[-1].lower()):
|
||||
sep = ' '
|
||||
if stitle[-1][-1] in ("'", '-'): sep = ''
|
||||
title = '%s%s%s' % (stitle[-1], sep, ', '.join(stitle[:-1]))
|
||||
return title
|
||||
|
||||
def normalizeImdbId(imdbId):
|
||||
"""Return 7 digit imdbId.
|
||||
|
||||
>>> normalizeImdbId('http://www.imdb.com/title/tt0159206/')
|
||||
'0159206'
|
||||
>>> normalizeImdbId(159206)
|
||||
'0159206'
|
||||
>>> normalizeImdbId('tt0159206')
|
||||
'0159206'
|
||||
"""
|
||||
if isinstance(imdbId, basestring):
|
||||
imdbId = re.sub('.*(\d{7}).*', '\\1', imdbId)
|
||||
elif isinstance(imdbId, int):
|
||||
imdbId = "%07d" % imdbId
|
||||
return imdbId
|
||||
|
||||
|
||||
# Common suffixes in surnames.
|
||||
_sname_suffixes = ('de', 'la', 'der', 'den', 'del', 'y', 'da', 'van',
|
||||
'e', 'von', 'vom', 'the', 'di', 'du', 'el', 'al')
|
||||
|
||||
def canonicalName(name):
|
||||
"""Return the given name in canonical "Surname, Name" format.
|
||||
It assumes that name is in the 'Name Surname' format.
|
||||
|
||||
>>> canonicalName('Jean Luc Godard')
|
||||
'Godard, Jean Luc'
|
||||
|
||||
>>> canonicalName('Ivan Ivanov-Vano')
|
||||
'Ivanov-Vano, Ivan'
|
||||
|
||||
>>> canonicalName('Gus Van Sant')
|
||||
'Van Sant, Gus'
|
||||
|
||||
>>> canonicalName('Brian De Palma')
|
||||
'De Palma, Brian'
|
||||
"""
|
||||
|
||||
# XXX: some statistics (over 1852406 names):
|
||||
# - just a surname: 51921
|
||||
# - single surname, single name: 1792759
|
||||
# - composed surname, composed name: 7726
|
||||
# - composed surname, single name: 55623
|
||||
# (2: 49259, 3: 5502, 4: 551)
|
||||
# - single surname, composed name: 186604
|
||||
# (2: 178315, 3: 6573, 4: 1219, 5: 352)
|
||||
# Don't convert names already in the canonical format.
|
||||
if name.find(', ') != -1: return name
|
||||
sname = name.split(' ')
|
||||
snl = len(sname)
|
||||
if snl == 2:
|
||||
# Just a name and a surname: how boring...
|
||||
name = '%s, %s' % (sname[1], sname[0])
|
||||
elif snl > 2:
|
||||
lsname = [x.lower() for x in sname]
|
||||
if snl == 3: _indexes = (0, snl-2)
|
||||
else: _indexes = (0, snl-2, snl-3)
|
||||
# Check for common surname prefixes at the beginning and near the end.
|
||||
for index in _indexes:
|
||||
if lsname[index] not in _sname_suffixes: continue
|
||||
try:
|
||||
# Build the surname.
|
||||
surn = '%s %s' % (sname[index], sname[index+1])
|
||||
del sname[index]
|
||||
del sname[index]
|
||||
try:
|
||||
# Handle the "Jr." after the name.
|
||||
if lsname[index+2].startswith('jr'):
|
||||
surn += ' %s' % sname[index]
|
||||
del sname[index]
|
||||
except (IndexError, ValueError):
|
||||
pass
|
||||
name = '%s, %s' % (surn, ' '.join(sname))
|
||||
break
|
||||
except ValueError:
|
||||
continue
|
||||
else:
|
||||
name = '%s, %s' % (sname[-1], ' '.join(sname[:-1]))
|
||||
return name
|
||||
|
||||
def normalizeName(name):
|
||||
"""Return a name in the normal "Name Surname" format.
|
||||
|
||||
>>> normalizeName('Godard, Jean Luc')
|
||||
'Jean Luc Godard'
|
||||
|
||||
>>> normalizeName('Ivanov-Vano, Ivan')
|
||||
'Ivan Ivanov-Vano'
|
||||
|
||||
>>> normalizeName('Van Sant, Gus')
|
||||
'Gus Van Sant'
|
||||
|
||||
>>> normalizeName('De Palma, Brian')
|
||||
'Brian De Palma'
|
||||
"""
|
||||
sname = name.split(', ')
|
||||
if len(sname) == 2:
|
||||
name = '%s %s' % (sname[1], sname[0])
|
||||
return name
|
||||
|
||||
270
oxlib/text.py
270
oxlib/text.py
|
|
@ -1,270 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
# GPL 2008
|
||||
import math
|
||||
import re
|
||||
|
||||
|
||||
def findRe(string, regexp):
|
||||
result = re.compile(regexp, re.DOTALL).findall(string)
|
||||
if result:
|
||||
return result[0].strip()
|
||||
return ''
|
||||
|
||||
def findString(string, string0='', string1 = ''):
|
||||
"""Return the string between string0 and string1.
|
||||
|
||||
If string0 or string1 is left out, begining or end of string is used.
|
||||
|
||||
>>> findString('i am not there', string1=' not there')
|
||||
'i am'
|
||||
|
||||
>>> findString('i am not there', 'i am ', ' there')
|
||||
'not'
|
||||
|
||||
>>> findString('i am not there', 'i am not t')
|
||||
'here'
|
||||
|
||||
"""
|
||||
if string0:
|
||||
string0 = re.escape(string0)
|
||||
else:
|
||||
string0 = '^'
|
||||
if string1:
|
||||
string1 = re.escape(string1)
|
||||
else:
|
||||
string1 = '$'
|
||||
return findRe(string, string0 + '(.*?)' + string1)
|
||||
|
||||
def removeSpecialCharacters(text):
|
||||
"""
|
||||
Removes special characters inserted by Word.
|
||||
"""
|
||||
text = text.replace(u'\u2013', '-')
|
||||
text = text.replace(u'\u2026O', "'")
|
||||
text = text.replace(u'\u2019', "'")
|
||||
text = text.replace(u'', "'")
|
||||
text = text.replace(u'', "'")
|
||||
text = text.replace(u'', "-")
|
||||
return text
|
||||
|
||||
def wrap(text, width):
|
||||
"""
|
||||
A word-wrap function that preserves existing line breaks and most spaces in
|
||||
the text. Expects that existing line breaks are posix newlines (\n).
|
||||
See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/148061
|
||||
"""
|
||||
return reduce(lambda line, word, width=width: '%s%s%s' %
|
||||
(line,
|
||||
' \n'[(len(line[line.rfind('\n')+1:])
|
||||
+ len(word.split('\n',1)[0]
|
||||
) >= width)],
|
||||
word),
|
||||
text.split(' ')
|
||||
)
|
||||
|
||||
def wrapString(string, length=80, separator='\n', balance=False):
|
||||
'''
|
||||
>>> wrapString(u"Anticonstitutionellement, Paris s'eveille", 16)
|
||||
u"Anticonstitution\\nellement, Paris \\ns'eveille"
|
||||
>>> wrapString(u'All you can eat', 12, '\\n', True)
|
||||
u'All you \\ncan eat'
|
||||
'''
|
||||
words = string.split(' ')
|
||||
if balance:
|
||||
# balance lines: test if same number of lines
|
||||
# can be achieved with a shorter line length
|
||||
lines = wrapString(string, length, separator, False).split(separator)
|
||||
if len(lines) > 1:
|
||||
while length > max(map(lambda x : len(x), words)):
|
||||
length -= 1
|
||||
if len(wrapString(string, length, separator, False).split(separator)) > len(lines):
|
||||
length += 1
|
||||
break
|
||||
lines = ['']
|
||||
for word in words:
|
||||
if len(lines[len(lines) - 1] + word + u' ') <= length + 1:
|
||||
# word fits in current line
|
||||
lines[len(lines) - 1] += word + u' ';
|
||||
else:
|
||||
if len(word) <= length:
|
||||
# word fits in next line
|
||||
lines.append(word + u' ')
|
||||
else:
|
||||
# word is longer than line
|
||||
position = length - len(lines[len(lines) - 1])
|
||||
lines[len(lines) - 1] += word[0:position]
|
||||
for i in range(position, len(word), length):
|
||||
lines.append(word[i:i+length]);
|
||||
lines[len(lines) - 1] += u' '
|
||||
return separator.join(lines).strip()
|
||||
|
||||
def truncateString(string, length, padding='...', position='right'):
|
||||
# >>> truncateString('anticonstitutionellement', 16, '...', 'left')
|
||||
# '...utionellement'
|
||||
# >>> truncateString('anticonstitutionellement', 16, '...', 'center')
|
||||
# 'anticon...lement'
|
||||
# >>> truncateString('anticonstitutionellement', 16, '...', 'right')
|
||||
# 'anticonstitut...'
|
||||
stringLength = len(string);
|
||||
paddingLength = len(padding)
|
||||
if stringLength > length:
|
||||
if position == 'left':
|
||||
string = '%s%s' % (padding, string[stringLength + paddingLength - length:])
|
||||
elif position == 'center':
|
||||
left = int(math.ceil(float(length - paddingLength) / 2))
|
||||
right = int(stringLength - math.floor(float(length - paddingLength) / 2))
|
||||
string = '%s%s%s' % (string[:left], padding, string[right:])
|
||||
elif position == 'right':
|
||||
string = '%s%s' % (string[:length - paddingLength], padding)
|
||||
return string;
|
||||
|
||||
def truncateWords(s, num):
|
||||
"""Truncates a string after a certain number of chacters, but ends with a word
|
||||
|
||||
>>> truncateString('Truncates a string after a certain number of chacters, but ends with a word', 23)
|
||||
'Truncates a string...'
|
||||
>>> truncateString('Truncates a string', 23)
|
||||
'Truncates a string'
|
||||
|
||||
"""
|
||||
length = int(num)
|
||||
if len(s) <= length:
|
||||
return s
|
||||
words = s.split()
|
||||
ts = ""
|
||||
while words and len(ts) + len(words[0]) < length:
|
||||
ts += " " + words.pop(0)
|
||||
if words:
|
||||
ts += "..."
|
||||
return ts.strip()
|
||||
|
||||
def trimString(string, num):
|
||||
"""Truncates a string after a certain number of chacters, adding ... at -10 characters
|
||||
|
||||
>>> trimString('Truncates a string after a certain number of chacters', 23)
|
||||
'Truncates ...f chacters'
|
||||
>>> trimString('Truncates a string', 23)
|
||||
'Truncates a string'
|
||||
"""
|
||||
if len(string) > num:
|
||||
string = string[:num - 13] + '...' + string[-10:]
|
||||
return string
|
||||
|
||||
def truncateWords(s, num):
|
||||
"Truncates a string after a certain number of words."
|
||||
length = int(num)
|
||||
words = s.split()
|
||||
if len(words) > length:
|
||||
words = words[:length]
|
||||
if not words[-1].endswith('...'):
|
||||
words.append('...')
|
||||
return ' '.join(words)
|
||||
|
||||
def getValidFilename(s):
|
||||
"""
|
||||
Returns the given string converted to a string that can be used for a clean
|
||||
filename. Specifically, leading and trailing spaces are removed;
|
||||
all non-filename-safe characters are removed.
|
||||
|
||||
>>> getValidFilename("john's portrait in 2004.jpg")
|
||||
'john_s_portrait_in_2004.jpg'
|
||||
"""
|
||||
s = s.strip()
|
||||
s = s.replace(' ', '_')
|
||||
s = re.sub(r'[^-A-Za-z0-9_.\[\]\ ]', '_', s)
|
||||
s = s.replace('__', '_').replace('__', '_')
|
||||
return s
|
||||
|
||||
def getTextList(list_, last_word='or'):
|
||||
"""
|
||||
>>> getTextList([u'a', u'b', u'c', u'd'])
|
||||
u'a, b, c or d'
|
||||
>>> getTextList([u'a', u'b', u'c'], 'and')
|
||||
u'a, b and c'
|
||||
>>> getTextList([u'a', u'b'], 'and')
|
||||
u'a and b'
|
||||
>>> getTextList([u'a'])
|
||||
u'a'
|
||||
>>> getTextList([])
|
||||
''
|
||||
"""
|
||||
if len(list_) == 0: return ''
|
||||
if len(list_) == 1: return list_[0]
|
||||
return u'%s %s %s' % (u', '.join([unicode(i) for i in list_][:-1]), last_word, list_[-1])
|
||||
|
||||
def getListText(text, last_word='or'):
|
||||
"""
|
||||
>>> getListText(u'a, b, c or d')
|
||||
[u'a', u'b', u'c', u'd']
|
||||
>>> getListText(u'a, b and c', u'and')
|
||||
[u'a', u'b', u'c']
|
||||
>>> getListText(u'a and b', u'and')
|
||||
[u'a', u'b']
|
||||
>>> getListText(u'a')
|
||||
[u'a']
|
||||
>>> getListText(u'')
|
||||
[]
|
||||
"""
|
||||
list_ = []
|
||||
if text:
|
||||
list_ = text.split(u', ')
|
||||
if list_:
|
||||
i=len(list_)-1
|
||||
last = list_[i].split(last_word)
|
||||
if len(last) == 2:
|
||||
list_[i] = last[0].strip()
|
||||
list_.append(last[1].strip())
|
||||
return list_
|
||||
|
||||
def normalizeNewlines(text):
|
||||
return re.sub(r'\r\n|\r|\n', '\n', text)
|
||||
|
||||
def recapitalize(text):
|
||||
"Recapitalizes text, placing caps after end-of-sentence punctuation."
|
||||
#capwords = ()
|
||||
text = text.lower()
|
||||
capsRE = re.compile(r'(?:^|(?<=[\.\?\!] ))([a-z])')
|
||||
text = capsRE.sub(lambda x: x.group(1).upper(), text)
|
||||
#for capword in capwords:
|
||||
# capwordRE = re.compile(r'\b%s\b' % capword, re.I)
|
||||
# text = capwordRE.sub(capword, text)
|
||||
return text
|
||||
|
||||
def phone2numeric(phone):
|
||||
"Converts a phone number with letters into its numeric equivalent."
|
||||
letters = re.compile(r'[A-PR-Y]', re.I)
|
||||
char2number = lambda m: {'a': '2', 'c': '2', 'b': '2', 'e': '3',
|
||||
'd': '3', 'g': '4', 'f': '3', 'i': '4', 'h': '4', 'k': '5',
|
||||
'j': '5', 'm': '6', 'l': '5', 'o': '6', 'n': '6', 'p': '7',
|
||||
's': '7', 'r': '7', 'u': '8', 't': '8', 'w': '9', 'v': '8',
|
||||
'y': '9', 'x': '9'}.get(m.group(0).lower())
|
||||
return letters.sub(char2number, phone)
|
||||
|
||||
def compressString(s):
|
||||
import cStringIO, gzip
|
||||
zbuf = cStringIO.StringIO()
|
||||
zfile = gzip.GzipFile(mode='wb', compresslevel=6, fileobj=zbuf)
|
||||
zfile.write(s)
|
||||
zfile.close()
|
||||
return zbuf.getvalue()
|
||||
|
||||
smart_split_re = re.compile('("(?:[^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'(?:[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'|[^\\s]+)')
|
||||
def smartSplit(text):
|
||||
"""
|
||||
Generator that splits a string by spaces, leaving quoted phrases together.
|
||||
Supports both single and double quotes, and supports escaping quotes with
|
||||
backslashes. In the output, strings will keep their initial and trailing
|
||||
quote marks.
|
||||
>>> list(smartSplit('This is "a person\\'s" test.'))
|
||||
['This', 'is', '"a person\\'s"', 'test.']
|
||||
"""
|
||||
for bit in smart_split_re.finditer(text):
|
||||
bit = bit.group(0)
|
||||
if bit[0] == '"':
|
||||
yield '"' + bit[1:-1].replace('\\"', '"').replace('\\\\', '\\') + '"'
|
||||
elif bit[0] == "'":
|
||||
yield "'" + bit[1:-1].replace("\\'", "'").replace("\\\\", "\\") + "'"
|
||||
else:
|
||||
yield bit
|
||||
|
||||
|
|
@ -1,74 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
# GPL 2007
|
||||
|
||||
from threading import Event
|
||||
import hashlib
|
||||
from os import stat
|
||||
import os
|
||||
|
||||
from BitTornado.BT1.makemetafile import make_meta_file
|
||||
from bencode import bencode, bdecode
|
||||
|
||||
|
||||
def createTorrent(file, url, params = {}, flag = Event(),
|
||||
progress = lambda x: None, progress_percent = 1):
|
||||
"Creates a torrent for a given file, using url as tracker url"
|
||||
return make_meta_file(file, url, params, flag, progress, progress_percent)
|
||||
|
||||
def getInfoHash(torrentFile):
|
||||
"Returns Torrent Info Hash from torrent file"
|
||||
metainfo_file = open(torrentFile, 'rb')
|
||||
metainfo = bdecode(metainfo_file.read())
|
||||
info = metainfo['info']
|
||||
return hashlib.sha1(bencode(info)).hexdigest().upper()
|
||||
|
||||
def getTorrentInfoFromFile(torrentFile):
|
||||
f = open(torrentFile, 'rb')
|
||||
data = f.read()
|
||||
f.close()
|
||||
tinfo = getTorrentInfo(data)
|
||||
tinfo['timestamp'] = stat(torrentFile).st_ctime
|
||||
return tinfo
|
||||
|
||||
def getTorrentInfo(data):
|
||||
"Returns Torrent Info from torrent file"
|
||||
tinfo = {}
|
||||
metainfo = bdecode(data)
|
||||
info = metainfo['info']
|
||||
piece_length = info['piece length']
|
||||
if info.has_key('length'):
|
||||
# let's assume we just have one file
|
||||
file_length = info['length']
|
||||
else:
|
||||
# let's assume we have a directory structure
|
||||
file_length = 0;
|
||||
for f in info['files']:
|
||||
file_length += f['length']
|
||||
for key in info:
|
||||
if key != 'pieces':
|
||||
tinfo[key] = info[key]
|
||||
for key in metainfo:
|
||||
if key != 'info':
|
||||
tinfo[key] = metainfo[key]
|
||||
tinfo['size'] = file_length
|
||||
tinfo['hash'] = hashlib.sha1(bencode(info)).hexdigest()
|
||||
tinfo['announce'] = metainfo['announce']
|
||||
return tinfo
|
||||
|
||||
def getFiles(data):
|
||||
files = []
|
||||
info = getTorrentInfo(data)
|
||||
if 'files' in info:
|
||||
for f in info['files']:
|
||||
path = [info['name'], ]
|
||||
path.extend(f['path'])
|
||||
files.append(os.path.join(*path))
|
||||
else:
|
||||
files.append(info['name'])
|
||||
return files
|
||||
|
||||
def getTorrentSize(torrentFile):
|
||||
"Returns Size of files in torrent file in bytes"
|
||||
return getTorrentInfo(torrentFile)['size']
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue