diff --git a/README b/README
index c5a74e1..74076a9 100644
--- a/README
+++ b/README
@@ -6,3 +6,6 @@ Depends:
python-beautifulsoup (http://www.crummy.com/software/BeautifulSoup/)
python-feedparser (http://www.feedparser.org/)
+
+Test:
+ nosetests --with-doctest ox
diff --git a/ox/dailymotion.py b/ox/dailymotion.py
index cb5b576..1dafa75 100644
--- a/ox/dailymotion.py
+++ b/ox/dailymotion.py
@@ -3,6 +3,13 @@ from urllib import unquote
from oxutils.cache import getUrl
def getVideoUrl(url):
+ '''
+ >>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3opar_priere-pour-refuznik-1-jeanluc-goda_shortfilms')
+ 'http://www.dailymotion.com/get/16/320x240/flv/6191379.flv?key=0a710ad6ffbfe980b1252569d16f957313399d0'
+
+ >>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms')
+ 'http://www.dailymotion.com/get/15/320x240/flv/6197800.flv?key=08a18365ca6962c5ff7526f69872c36813399d4'
+ '''
data = getUrl(url)
video = re.compile('''video", "(.*?)"''').findall(data)
for v in video:
@@ -10,6 +17,3 @@ def getVideoUrl(url):
return "http://www.dailymotion.com" + v
return ''
-if __name__ == '__main__':
- print getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3opar_priere-pour-refuznik-1-jeanluc-goda_shortfilms')
- print getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms')
diff --git a/ox/mininova.py b/ox/mininova.py
index fa64b6f..03ea512 100644
--- a/ox/mininova.py
+++ b/ox/mininova.py
@@ -6,9 +6,10 @@ from datetime import datetime
import re
import socket
from urllib import quote
+import sha
from oxutils.cache import getUrl, getUrlUnicode
-from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo, intValue
+from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines
from oxutils.normalize import normalizeImdbId
from torrent import Torrent
@@ -45,6 +46,7 @@ def findMovieByImdb(imdbId):
return _parseResultsPage(data)
def getId(mininovaId):
+ mininovaId = unicode(mininovaId)
d = findRegexp(mininovaId, "/(\d+)")
if d:
return d
@@ -80,15 +82,22 @@ def getData(mininovaId):
torrent[u'imdbId'] = findRegexp(data, 'title/tt(\d{7})')
torrent[u'description'] = findRegexp(data, '
(.*?)
')
if torrent['description']:
- torrent['description'] = decodeHtml(stripTags(torrent['description'])).strip()
+ torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()
t = getUrl(torrent[u'torrent_link'])
torrent[u'torrent_info'] = getTorrentInfo(t)
return torrent
-
class Mininova(Torrent):
+ '''
+ >>> Mininova('123')
+ {}
+ >>> sha.sha(unicode(Mininova('1072195'))).hexdigest()
+ 'ec98268a0aeaef8292f7bcf3585d0bc3910b3fac'
+ '''
def __init__(self, mininovaId):
self.data = getData(mininovaId)
+ if not self.data:
+ return
Torrent.__init__(self)
ratio = self.data['share ratio'].split(',')
self['seeder'] = int(intValue(ratio[0].replace(',','').strip()))
diff --git a/ox/thepiratebay.py b/ox/thepiratebay.py
index 1d85f73..897c4f8 100644
--- a/ox/thepiratebay.py
+++ b/ox/thepiratebay.py
@@ -7,9 +7,10 @@ import re
import socket
from urllib import quote, urlencode
from urllib2 import URLError
+import sha
from oxutils.cache import getUrl, getUrlUnicode
-from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo
+from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines
from oxutils.normalize import normalizeImdbId
from torrent import Torrent
@@ -19,13 +20,13 @@ socket.setdefaulttimeout(10.0)
season_episode = re.compile("S..E..", re.IGNORECASE)
-def getUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout):
+def _getUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout):
headers = cache.DEFAULT_HEADERS
headers['Cookie'] = 'language=en_EN'
return cache.getUrl(url, data, headers, timeout)
-def getUrlUnicode(url):
- return cache.getUrlUnicode(url, _getUrl=getUrl)
+def _getUrlUnicode(url):
+ return cache.getUrlUnicode(url, _getUrl=_getUrl)
def findMovies(query, max_results=10):
results = []
@@ -38,7 +39,7 @@ def findMovies(query, max_results=10):
if not url.startswith('/'):
url = "/" + url
url = "http://thepiratebay.org" + url
- data = getUrlUnicode(url)
+ data = _getUrlUnicode(url)
regexp = '''(.*?).*?'''
for row in re.compile(regexp, re.DOTALL).findall(data):
torrentType = row[0]
@@ -79,7 +80,7 @@ def getData(piratebayId):
torrent[u'domain'] = 'thepiratebay.org'
torrent[u'comment_link'] = 'http://thepiratebay.org/tor/%s' % piratebayId
- data = getUrlUnicode(torrent['comment_link'])
+ data = _getUrlUnicode(torrent['comment_link'])
torrent[u'title'] = findRegexp(data, '(.*?) \(download torrent\) - TPB')
if not torrent[u'title']:
return None
@@ -91,15 +92,25 @@ def getData(piratebayId):
key = _key_map.get(key, key)
value = decodeHtml(stripTags(d[1].strip()))
torrent[key] = value
- torrent[u'description'] = decodeHtml(stripTags(findRegexp(data, '(.*?)
'))).strip()
- t = getUrl(torrent[u'torrent_link'])
+ torrent[u'description'] = findRegexp(data, '(.*?)
')
+ if torrent[u'description']:
+ torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()
+ t = _getUrl(torrent[u'torrent_link'])
torrent[u'torrent_info'] = getTorrentInfo(t)
return torrent
-
class Thepiratebay(Torrent):
+ '''
+ >>> Thepiratebay('123')
+ {}
+
+ >>> sha.sha(unicode(Thepiratebay('3951349'))).hexdigest()
+ 'ef64e438e3eef6e6a05cac4eea56b9f0289d3f22'
+ '''
def __init__(self, piratebayId):
self.data = getData(piratebayId)
+ if not self.data:
+ return
Torrent.__init__(self)
published = self.data['uploaded']
published = published.replace(' GMT', '').split(' +')[0]
diff --git a/ox/torrent.py b/ox/torrent.py
index 21e3df1..785f604 100644
--- a/ox/torrent.py
+++ b/ox/torrent.py
@@ -6,12 +6,16 @@ from oxutils import intValue
class Torrent(dict):
+ '''
+ >>> Torrent()
+ {'files': 1, 'domain': u'', 'subtitle language': u'', 'seeder': -1, 'description': u'', 'language': u'', 'title': u'', 'imdbId': u'', 'downloaded': -1, 'leecher': -1, 'torrent_link': u'', 'torrent_info': {}, 'published': u'', 'announce': '', 'infohash': '', 'id': u'', 'comment_link': u'', 'size': -1}
+ '''
_string_keys = ('id', 'title', 'description', 'infohash', 'torrent_link', 'comment_link',
'imdbId', 'announce', 'domain', 'published', 'language', 'subtitle language')
_int_keys = ('size', 'seeder', 'leecher', 'downloaded', 'files')
_dict_keys = ('torrent_info', )
_list_keys = ()
- data = {}
+ data = {'torrent_info': {}}
def __init__(self):
for key in self._string_keys:
@@ -25,9 +29,9 @@ class Torrent(dict):
if not isinstance(value, int):
value = int(intValue(value))
self[key] = value
- self['infohash'] = self.data['torrent_info']['hash']
- self['size'] = self.data['torrent_info']['size']
- self['announce'] = self.data['torrent_info']['announce']
+ self['infohash'] = self.data['torrent_info'].get('hash', '')
+ self['size'] = self.data['torrent_info'].get('size', -1)
+ self['announce'] = self.data['torrent_info'].get('announce', '')
if 'files' in self.data['torrent_info']:
self['files'] = len(self.data['torrent_info']['files'])
else: