2010-09-03 21:19:19 +00:00
|
|
|
python-ox - the web in a dict
|
2008-04-27 16:54:37 +00:00
|
|
|
|
|
|
|
Depends:
|
2012-09-09 16:48:40 +00:00
|
|
|
python >= 2.6
|
2008-04-27 16:54:37 +00:00
|
|
|
python-chardet (http://chardet.feedparser.org/)
|
2010-07-07 23:25:57 +00:00
|
|
|
python-feedparser (http://www.feedparser.org/)
|
2010-11-23 09:24:38 +00:00
|
|
|
python-lxml (http://codespeak.net/lxml/) [optional]
|
|
|
|
django (otherwise dates < 1900 are not supported) [optional]
|
2008-04-27 16:54:37 +00:00
|
|
|
|
|
|
|
Usage:
|
2010-07-07 23:25:57 +00:00
|
|
|
import ox
|
2008-04-27 16:54:37 +00:00
|
|
|
|
2012-08-14 14:12:43 +00:00
|
|
|
data = ox.cache.read_url('http:/...')
|
|
|
|
text = ox.strip_tags(data)
|
|
|
|
ox.normalize_newlines(text)
|
|
|
|
ox.format_bytes(len(data))
|
2008-04-27 16:54:37 +00:00
|
|
|
|
2012-08-14 14:12:43 +00:00
|
|
|
ox.format_bytes(1234567890)
|
2008-04-27 16:54:37 +00:00
|
|
|
'1.15 GB'
|
|
|
|
|
2010-07-07 23:25:57 +00:00
|
|
|
import ox.web.imdb
|
|
|
|
imdbId = ox.web.imdb.guess('The Matrix')
|
|
|
|
info = ox.web.imdb.Imdb(imdbId)
|
|
|
|
info['year']
|
|
|
|
1999
|
|
|
|
|
2012-01-03 20:00:33 +00:00
|
|
|
For information on ox.django see https://wiki.0x2620.org/wiki/ox.django
|
|
|
|
|
2009-08-21 15:18:03 +00:00
|
|
|
Install:
|
|
|
|
python setup.py install
|
2008-05-05 18:12:27 +00:00
|
|
|
|
2010-07-07 23:25:57 +00:00
|
|
|
Cookies:
|
|
|
|
some ox.web modules require user accont information or cookies to work,
|
|
|
|
those are saved in ~/.ox/auth.json, most basic form looks like this:
|
|
|
|
{
|
|
|
|
"key": "value"
|
|
|
|
}
|
|
|
|
|
2008-05-05 18:12:27 +00:00
|
|
|
Tests:
|
2010-07-07 23:25:57 +00:00
|
|
|
nosetests --with-doctest ox
|