From 1b93ae048d93b7f4a60ed79e5efafd8787f8272f Mon Sep 17 00:00:00 2001 From: j Date: Mon, 5 May 2008 20:33:23 +0200 Subject: [PATCH] ox gets some tests too --- README | 3 +++ ox/dailymotion.py | 10 +++++++--- ox/mininova.py | 15 ++++++++++++--- ox/thepiratebay.py | 29 ++++++++++++++++++++--------- ox/torrent.py | 12 ++++++++---- 5 files changed, 50 insertions(+), 19 deletions(-) diff --git a/README b/README index c5a74e1..74076a9 100644 --- a/README +++ b/README @@ -6,3 +6,6 @@ Depends: python-beautifulsoup (http://www.crummy.com/software/BeautifulSoup/) python-feedparser (http://www.feedparser.org/) + +Test: + nosetests --with-doctest ox diff --git a/ox/dailymotion.py b/ox/dailymotion.py index cb5b576..1dafa75 100644 --- a/ox/dailymotion.py +++ b/ox/dailymotion.py @@ -3,6 +3,13 @@ from urllib import unquote from oxutils.cache import getUrl def getVideoUrl(url): + ''' + >>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3opar_priere-pour-refuznik-1-jeanluc-goda_shortfilms') + 'http://www.dailymotion.com/get/16/320x240/flv/6191379.flv?key=0a710ad6ffbfe980b1252569d16f957313399d0' + + >>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms') + 'http://www.dailymotion.com/get/15/320x240/flv/6197800.flv?key=08a18365ca6962c5ff7526f69872c36813399d4' + ''' data = getUrl(url) video = re.compile('''video", "(.*?)"''').findall(data) for v in video: @@ -10,6 +17,3 @@ def getVideoUrl(url): return "http://www.dailymotion.com" + v return '' -if __name__ == '__main__': - print getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3opar_priere-pour-refuznik-1-jeanluc-goda_shortfilms') - print getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms') diff --git a/ox/mininova.py b/ox/mininova.py index fa64b6f..03ea512 100644 --- a/ox/mininova.py +++ b/ox/mininova.py @@ -6,9 +6,10 @@ from datetime import datetime import re import socket from urllib import quote +import sha from oxutils.cache import getUrl, getUrlUnicode -from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo, intValue +from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines from oxutils.normalize import normalizeImdbId from torrent import Torrent @@ -45,6 +46,7 @@ def findMovieByImdb(imdbId): return _parseResultsPage(data) def getId(mininovaId): + mininovaId = unicode(mininovaId) d = findRegexp(mininovaId, "/(\d+)") if d: return d @@ -80,15 +82,22 @@ def getData(mininovaId): torrent[u'imdbId'] = findRegexp(data, 'title/tt(\d{7})') torrent[u'description'] = findRegexp(data, '
(.*?)
') if torrent['description']: - torrent['description'] = decodeHtml(stripTags(torrent['description'])).strip() + torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip() t = getUrl(torrent[u'torrent_link']) torrent[u'torrent_info'] = getTorrentInfo(t) return torrent - class Mininova(Torrent): + ''' + >>> Mininova('123') + {} + >>> sha.sha(unicode(Mininova('1072195'))).hexdigest() + 'ec98268a0aeaef8292f7bcf3585d0bc3910b3fac' + ''' def __init__(self, mininovaId): self.data = getData(mininovaId) + if not self.data: + return Torrent.__init__(self) ratio = self.data['share ratio'].split(',') self['seeder'] = int(intValue(ratio[0].replace(',','').strip())) diff --git a/ox/thepiratebay.py b/ox/thepiratebay.py index 1d85f73..897c4f8 100644 --- a/ox/thepiratebay.py +++ b/ox/thepiratebay.py @@ -7,9 +7,10 @@ import re import socket from urllib import quote, urlencode from urllib2 import URLError +import sha from oxutils.cache import getUrl, getUrlUnicode -from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo +from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines from oxutils.normalize import normalizeImdbId from torrent import Torrent @@ -19,13 +20,13 @@ socket.setdefaulttimeout(10.0) season_episode = re.compile("S..E..", re.IGNORECASE) -def getUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout): +def _getUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout): headers = cache.DEFAULT_HEADERS headers['Cookie'] = 'language=en_EN' return cache.getUrl(url, data, headers, timeout) -def getUrlUnicode(url): - return cache.getUrlUnicode(url, _getUrl=getUrl) +def _getUrlUnicode(url): + return cache.getUrlUnicode(url, _getUrl=_getUrl) def findMovies(query, max_results=10): results = [] @@ -38,7 +39,7 @@ def findMovies(query, max_results=10): if not url.startswith('/'): url = "/" + url url = "http://thepiratebay.org" + url - data = getUrlUnicode(url) + data = _getUrlUnicode(url) regexp = '''(.*?).*?''' for row in re.compile(regexp, re.DOTALL).findall(data): torrentType = row[0] @@ -79,7 +80,7 @@ def getData(piratebayId): torrent[u'domain'] = 'thepiratebay.org' torrent[u'comment_link'] = 'http://thepiratebay.org/tor/%s' % piratebayId - data = getUrlUnicode(torrent['comment_link']) + data = _getUrlUnicode(torrent['comment_link']) torrent[u'title'] = findRegexp(data, '(.*?) \(download torrent\) - TPB') if not torrent[u'title']: return None @@ -91,15 +92,25 @@ def getData(piratebayId): key = _key_map.get(key, key) value = decodeHtml(stripTags(d[1].strip())) torrent[key] = value - torrent[u'description'] = decodeHtml(stripTags(findRegexp(data, '
(.*?)
'))).strip() - t = getUrl(torrent[u'torrent_link']) + torrent[u'description'] = findRegexp(data, '
(.*?)
') + if torrent[u'description']: + torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip() + t = _getUrl(torrent[u'torrent_link']) torrent[u'torrent_info'] = getTorrentInfo(t) return torrent - class Thepiratebay(Torrent): + ''' + >>> Thepiratebay('123') + {} + + >>> sha.sha(unicode(Thepiratebay('3951349'))).hexdigest() + 'ef64e438e3eef6e6a05cac4eea56b9f0289d3f22' + ''' def __init__(self, piratebayId): self.data = getData(piratebayId) + if not self.data: + return Torrent.__init__(self) published = self.data['uploaded'] published = published.replace(' GMT', '').split(' +')[0] diff --git a/ox/torrent.py b/ox/torrent.py index 21e3df1..785f604 100644 --- a/ox/torrent.py +++ b/ox/torrent.py @@ -6,12 +6,16 @@ from oxutils import intValue class Torrent(dict): + ''' + >>> Torrent() + {'files': 1, 'domain': u'', 'subtitle language': u'', 'seeder': -1, 'description': u'', 'language': u'', 'title': u'', 'imdbId': u'', 'downloaded': -1, 'leecher': -1, 'torrent_link': u'', 'torrent_info': {}, 'published': u'', 'announce': '', 'infohash': '', 'id': u'', 'comment_link': u'', 'size': -1} + ''' _string_keys = ('id', 'title', 'description', 'infohash', 'torrent_link', 'comment_link', 'imdbId', 'announce', 'domain', 'published', 'language', 'subtitle language') _int_keys = ('size', 'seeder', 'leecher', 'downloaded', 'files') _dict_keys = ('torrent_info', ) _list_keys = () - data = {} + data = {'torrent_info': {}} def __init__(self): for key in self._string_keys: @@ -25,9 +29,9 @@ class Torrent(dict): if not isinstance(value, int): value = int(intValue(value)) self[key] = value - self['infohash'] = self.data['torrent_info']['hash'] - self['size'] = self.data['torrent_info']['size'] - self['announce'] = self.data['torrent_info']['announce'] + self['infohash'] = self.data['torrent_info'].get('hash', '') + self['size'] = self.data['torrent_info'].get('size', -1) + self['announce'] = self.data['torrent_info'].get('announce', '') if 'files' in self.data['torrent_info']: self['files'] = len(self.data['torrent_info']['files']) else: