ox gets some tests too

This commit is contained in:
j 2008-05-05 20:33:23 +02:00
parent b3d3f44d20
commit 1b93ae048d
5 changed files with 50 additions and 19 deletions

3
README
View file

@ -6,3 +6,6 @@ Depends:
python-beautifulsoup (http://www.crummy.com/software/BeautifulSoup/) python-beautifulsoup (http://www.crummy.com/software/BeautifulSoup/)
python-feedparser (http://www.feedparser.org/) python-feedparser (http://www.feedparser.org/)
Test:
nosetests --with-doctest ox

View file

@ -3,6 +3,13 @@ from urllib import unquote
from oxutils.cache import getUrl from oxutils.cache import getUrl
def getVideoUrl(url): def getVideoUrl(url):
'''
>>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3opar_priere-pour-refuznik-1-jeanluc-goda_shortfilms')
'http://www.dailymotion.com/get/16/320x240/flv/6191379.flv?key=0a710ad6ffbfe980b1252569d16f957313399d0'
>>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms')
'http://www.dailymotion.com/get/15/320x240/flv/6197800.flv?key=08a18365ca6962c5ff7526f69872c36813399d4'
'''
data = getUrl(url) data = getUrl(url)
video = re.compile('''video", "(.*?)"''').findall(data) video = re.compile('''video", "(.*?)"''').findall(data)
for v in video: for v in video:
@ -10,6 +17,3 @@ def getVideoUrl(url):
return "http://www.dailymotion.com" + v return "http://www.dailymotion.com" + v
return '' return ''
if __name__ == '__main__':
print getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3opar_priere-pour-refuznik-1-jeanluc-goda_shortfilms')
print getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms')

View file

@ -6,9 +6,10 @@ from datetime import datetime
import re import re
import socket import socket
from urllib import quote from urllib import quote
import sha
from oxutils.cache import getUrl, getUrlUnicode from oxutils.cache import getUrl, getUrlUnicode
from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo, intValue from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines
from oxutils.normalize import normalizeImdbId from oxutils.normalize import normalizeImdbId
from torrent import Torrent from torrent import Torrent
@ -45,6 +46,7 @@ def findMovieByImdb(imdbId):
return _parseResultsPage(data) return _parseResultsPage(data)
def getId(mininovaId): def getId(mininovaId):
mininovaId = unicode(mininovaId)
d = findRegexp(mininovaId, "/(\d+)") d = findRegexp(mininovaId, "/(\d+)")
if d: if d:
return d return d
@ -80,15 +82,22 @@ def getData(mininovaId):
torrent[u'imdbId'] = findRegexp(data, 'title/tt(\d{7})') torrent[u'imdbId'] = findRegexp(data, 'title/tt(\d{7})')
torrent[u'description'] = findRegexp(data, '<div id="description">(.*?)</div>') torrent[u'description'] = findRegexp(data, '<div id="description">(.*?)</div>')
if torrent['description']: if torrent['description']:
torrent['description'] = decodeHtml(stripTags(torrent['description'])).strip() torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()
t = getUrl(torrent[u'torrent_link']) t = getUrl(torrent[u'torrent_link'])
torrent[u'torrent_info'] = getTorrentInfo(t) torrent[u'torrent_info'] = getTorrentInfo(t)
return torrent return torrent
class Mininova(Torrent): class Mininova(Torrent):
'''
>>> Mininova('123')
{}
>>> sha.sha(unicode(Mininova('1072195'))).hexdigest()
'ec98268a0aeaef8292f7bcf3585d0bc3910b3fac'
'''
def __init__(self, mininovaId): def __init__(self, mininovaId):
self.data = getData(mininovaId) self.data = getData(mininovaId)
if not self.data:
return
Torrent.__init__(self) Torrent.__init__(self)
ratio = self.data['share ratio'].split(',') ratio = self.data['share ratio'].split(',')
self['seeder'] = int(intValue(ratio[0].replace(',','').strip())) self['seeder'] = int(intValue(ratio[0].replace(',','').strip()))

View file

@ -7,9 +7,10 @@ import re
import socket import socket
from urllib import quote, urlencode from urllib import quote, urlencode
from urllib2 import URLError from urllib2 import URLError
import sha
from oxutils.cache import getUrl, getUrlUnicode from oxutils.cache import getUrl, getUrlUnicode
from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines
from oxutils.normalize import normalizeImdbId from oxutils.normalize import normalizeImdbId
from torrent import Torrent from torrent import Torrent
@ -19,13 +20,13 @@ socket.setdefaulttimeout(10.0)
season_episode = re.compile("S..E..", re.IGNORECASE) season_episode = re.compile("S..E..", re.IGNORECASE)
def getUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout): def _getUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout):
headers = cache.DEFAULT_HEADERS headers = cache.DEFAULT_HEADERS
headers['Cookie'] = 'language=en_EN' headers['Cookie'] = 'language=en_EN'
return cache.getUrl(url, data, headers, timeout) return cache.getUrl(url, data, headers, timeout)
def getUrlUnicode(url): def _getUrlUnicode(url):
return cache.getUrlUnicode(url, _getUrl=getUrl) return cache.getUrlUnicode(url, _getUrl=_getUrl)
def findMovies(query, max_results=10): def findMovies(query, max_results=10):
results = [] results = []
@ -38,7 +39,7 @@ def findMovies(query, max_results=10):
if not url.startswith('/'): if not url.startswith('/'):
url = "/" + url url = "/" + url
url = "http://thepiratebay.org" + url url = "http://thepiratebay.org" + url
data = getUrlUnicode(url) data = _getUrlUnicode(url)
regexp = '''<tr.*?<td class="vertTh"><a href="/browse/(.*?)".*?<td><a href="(/tor/.*?)" class="detLink".*?>(.*?)</a>.*?</tr>''' regexp = '''<tr.*?<td class="vertTh"><a href="/browse/(.*?)".*?<td><a href="(/tor/.*?)" class="detLink".*?>(.*?)</a>.*?</tr>'''
for row in re.compile(regexp, re.DOTALL).findall(data): for row in re.compile(regexp, re.DOTALL).findall(data):
torrentType = row[0] torrentType = row[0]
@ -79,7 +80,7 @@ def getData(piratebayId):
torrent[u'domain'] = 'thepiratebay.org' torrent[u'domain'] = 'thepiratebay.org'
torrent[u'comment_link'] = 'http://thepiratebay.org/tor/%s' % piratebayId torrent[u'comment_link'] = 'http://thepiratebay.org/tor/%s' % piratebayId
data = getUrlUnicode(torrent['comment_link']) data = _getUrlUnicode(torrent['comment_link'])
torrent[u'title'] = findRegexp(data, '<title>(.*?) \(download torrent\) - TPB</title>') torrent[u'title'] = findRegexp(data, '<title>(.*?) \(download torrent\) - TPB</title>')
if not torrent[u'title']: if not torrent[u'title']:
return None return None
@ -91,15 +92,25 @@ def getData(piratebayId):
key = _key_map.get(key, key) key = _key_map.get(key, key)
value = decodeHtml(stripTags(d[1].strip())) value = decodeHtml(stripTags(d[1].strip()))
torrent[key] = value torrent[key] = value
torrent[u'description'] = decodeHtml(stripTags(findRegexp(data, '<div class="nfo">(.*?)</div>'))).strip() torrent[u'description'] = findRegexp(data, '<div class="nfo">(.*?)</div>')
t = getUrl(torrent[u'torrent_link']) if torrent[u'description']:
torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()
t = _getUrl(torrent[u'torrent_link'])
torrent[u'torrent_info'] = getTorrentInfo(t) torrent[u'torrent_info'] = getTorrentInfo(t)
return torrent return torrent
class Thepiratebay(Torrent): class Thepiratebay(Torrent):
'''
>>> Thepiratebay('123')
{}
>>> sha.sha(unicode(Thepiratebay('3951349'))).hexdigest()
'ef64e438e3eef6e6a05cac4eea56b9f0289d3f22'
'''
def __init__(self, piratebayId): def __init__(self, piratebayId):
self.data = getData(piratebayId) self.data = getData(piratebayId)
if not self.data:
return
Torrent.__init__(self) Torrent.__init__(self)
published = self.data['uploaded'] published = self.data['uploaded']
published = published.replace(' GMT', '').split(' +')[0] published = published.replace(' GMT', '').split(' +')[0]

View file

@ -6,12 +6,16 @@ from oxutils import intValue
class Torrent(dict): class Torrent(dict):
'''
>>> Torrent()
{'files': 1, 'domain': u'', 'subtitle language': u'', 'seeder': -1, 'description': u'', 'language': u'', 'title': u'', 'imdbId': u'', 'downloaded': -1, 'leecher': -1, 'torrent_link': u'', 'torrent_info': {}, 'published': u'', 'announce': '', 'infohash': '', 'id': u'', 'comment_link': u'', 'size': -1}
'''
_string_keys = ('id', 'title', 'description', 'infohash', 'torrent_link', 'comment_link', _string_keys = ('id', 'title', 'description', 'infohash', 'torrent_link', 'comment_link',
'imdbId', 'announce', 'domain', 'published', 'language', 'subtitle language') 'imdbId', 'announce', 'domain', 'published', 'language', 'subtitle language')
_int_keys = ('size', 'seeder', 'leecher', 'downloaded', 'files') _int_keys = ('size', 'seeder', 'leecher', 'downloaded', 'files')
_dict_keys = ('torrent_info', ) _dict_keys = ('torrent_info', )
_list_keys = () _list_keys = ()
data = {} data = {'torrent_info': {}}
def __init__(self): def __init__(self):
for key in self._string_keys: for key in self._string_keys:
@ -25,9 +29,9 @@ class Torrent(dict):
if not isinstance(value, int): if not isinstance(value, int):
value = int(intValue(value)) value = int(intValue(value))
self[key] = value self[key] = value
self['infohash'] = self.data['torrent_info']['hash'] self['infohash'] = self.data['torrent_info'].get('hash', '')
self['size'] = self.data['torrent_info']['size'] self['size'] = self.data['torrent_info'].get('size', -1)
self['announce'] = self.data['torrent_info']['announce'] self['announce'] = self.data['torrent_info'].get('announce', '')
if 'files' in self.data['torrent_info']: if 'files' in self.data['torrent_info']:
self['files'] = len(self.data['torrent_info']['files']) self['files'] = len(self.data['torrent_info']['files'])
else: else: