ox gets some tests too
This commit is contained in:
parent
b3d3f44d20
commit
1b93ae048d
5 changed files with 50 additions and 19 deletions
3
README
3
README
|
@ -6,3 +6,6 @@ Depends:
|
|||
python-beautifulsoup (http://www.crummy.com/software/BeautifulSoup/)
|
||||
python-feedparser (http://www.feedparser.org/)
|
||||
|
||||
|
||||
Test:
|
||||
nosetests --with-doctest ox
|
||||
|
|
|
@ -3,6 +3,13 @@ from urllib import unquote
|
|||
from oxutils.cache import getUrl
|
||||
|
||||
def getVideoUrl(url):
|
||||
'''
|
||||
>>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3opar_priere-pour-refuznik-1-jeanluc-goda_shortfilms')
|
||||
'http://www.dailymotion.com/get/16/320x240/flv/6191379.flv?key=0a710ad6ffbfe980b1252569d16f957313399d0'
|
||||
|
||||
>>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms')
|
||||
'http://www.dailymotion.com/get/15/320x240/flv/6197800.flv?key=08a18365ca6962c5ff7526f69872c36813399d4'
|
||||
'''
|
||||
data = getUrl(url)
|
||||
video = re.compile('''video", "(.*?)"''').findall(data)
|
||||
for v in video:
|
||||
|
@ -10,6 +17,3 @@ def getVideoUrl(url):
|
|||
return "http://www.dailymotion.com" + v
|
||||
return ''
|
||||
|
||||
if __name__ == '__main__':
|
||||
print getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3opar_priere-pour-refuznik-1-jeanluc-goda_shortfilms')
|
||||
print getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms')
|
||||
|
|
|
@ -6,9 +6,10 @@ from datetime import datetime
|
|||
import re
|
||||
import socket
|
||||
from urllib import quote
|
||||
import sha
|
||||
|
||||
from oxutils.cache import getUrl, getUrlUnicode
|
||||
from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo, intValue
|
||||
from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines
|
||||
from oxutils.normalize import normalizeImdbId
|
||||
|
||||
from torrent import Torrent
|
||||
|
@ -45,6 +46,7 @@ def findMovieByImdb(imdbId):
|
|||
return _parseResultsPage(data)
|
||||
|
||||
def getId(mininovaId):
|
||||
mininovaId = unicode(mininovaId)
|
||||
d = findRegexp(mininovaId, "/(\d+)")
|
||||
if d:
|
||||
return d
|
||||
|
@ -80,15 +82,22 @@ def getData(mininovaId):
|
|||
torrent[u'imdbId'] = findRegexp(data, 'title/tt(\d{7})')
|
||||
torrent[u'description'] = findRegexp(data, '<div id="description">(.*?)</div>')
|
||||
if torrent['description']:
|
||||
torrent['description'] = decodeHtml(stripTags(torrent['description'])).strip()
|
||||
torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()
|
||||
t = getUrl(torrent[u'torrent_link'])
|
||||
torrent[u'torrent_info'] = getTorrentInfo(t)
|
||||
return torrent
|
||||
|
||||
|
||||
class Mininova(Torrent):
|
||||
'''
|
||||
>>> Mininova('123')
|
||||
{}
|
||||
>>> sha.sha(unicode(Mininova('1072195'))).hexdigest()
|
||||
'ec98268a0aeaef8292f7bcf3585d0bc3910b3fac'
|
||||
'''
|
||||
def __init__(self, mininovaId):
|
||||
self.data = getData(mininovaId)
|
||||
if not self.data:
|
||||
return
|
||||
Torrent.__init__(self)
|
||||
ratio = self.data['share ratio'].split(',')
|
||||
self['seeder'] = int(intValue(ratio[0].replace(',','').strip()))
|
||||
|
|
|
@ -7,9 +7,10 @@ import re
|
|||
import socket
|
||||
from urllib import quote, urlencode
|
||||
from urllib2 import URLError
|
||||
import sha
|
||||
|
||||
from oxutils.cache import getUrl, getUrlUnicode
|
||||
from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo
|
||||
from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines
|
||||
from oxutils.normalize import normalizeImdbId
|
||||
|
||||
from torrent import Torrent
|
||||
|
@ -19,13 +20,13 @@ socket.setdefaulttimeout(10.0)
|
|||
season_episode = re.compile("S..E..", re.IGNORECASE)
|
||||
|
||||
|
||||
def getUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout):
|
||||
def _getUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout):
|
||||
headers = cache.DEFAULT_HEADERS
|
||||
headers['Cookie'] = 'language=en_EN'
|
||||
return cache.getUrl(url, data, headers, timeout)
|
||||
|
||||
def getUrlUnicode(url):
|
||||
return cache.getUrlUnicode(url, _getUrl=getUrl)
|
||||
def _getUrlUnicode(url):
|
||||
return cache.getUrlUnicode(url, _getUrl=_getUrl)
|
||||
|
||||
def findMovies(query, max_results=10):
|
||||
results = []
|
||||
|
@ -38,7 +39,7 @@ def findMovies(query, max_results=10):
|
|||
if not url.startswith('/'):
|
||||
url = "/" + url
|
||||
url = "http://thepiratebay.org" + url
|
||||
data = getUrlUnicode(url)
|
||||
data = _getUrlUnicode(url)
|
||||
regexp = '''<tr.*?<td class="vertTh"><a href="/browse/(.*?)".*?<td><a href="(/tor/.*?)" class="detLink".*?>(.*?)</a>.*?</tr>'''
|
||||
for row in re.compile(regexp, re.DOTALL).findall(data):
|
||||
torrentType = row[0]
|
||||
|
@ -79,7 +80,7 @@ def getData(piratebayId):
|
|||
torrent[u'domain'] = 'thepiratebay.org'
|
||||
torrent[u'comment_link'] = 'http://thepiratebay.org/tor/%s' % piratebayId
|
||||
|
||||
data = getUrlUnicode(torrent['comment_link'])
|
||||
data = _getUrlUnicode(torrent['comment_link'])
|
||||
torrent[u'title'] = findRegexp(data, '<title>(.*?) \(download torrent\) - TPB</title>')
|
||||
if not torrent[u'title']:
|
||||
return None
|
||||
|
@ -91,15 +92,25 @@ def getData(piratebayId):
|
|||
key = _key_map.get(key, key)
|
||||
value = decodeHtml(stripTags(d[1].strip()))
|
||||
torrent[key] = value
|
||||
torrent[u'description'] = decodeHtml(stripTags(findRegexp(data, '<div class="nfo">(.*?)</div>'))).strip()
|
||||
t = getUrl(torrent[u'torrent_link'])
|
||||
torrent[u'description'] = findRegexp(data, '<div class="nfo">(.*?)</div>')
|
||||
if torrent[u'description']:
|
||||
torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()
|
||||
t = _getUrl(torrent[u'torrent_link'])
|
||||
torrent[u'torrent_info'] = getTorrentInfo(t)
|
||||
return torrent
|
||||
|
||||
|
||||
class Thepiratebay(Torrent):
|
||||
'''
|
||||
>>> Thepiratebay('123')
|
||||
{}
|
||||
|
||||
>>> sha.sha(unicode(Thepiratebay('3951349'))).hexdigest()
|
||||
'ef64e438e3eef6e6a05cac4eea56b9f0289d3f22'
|
||||
'''
|
||||
def __init__(self, piratebayId):
|
||||
self.data = getData(piratebayId)
|
||||
if not self.data:
|
||||
return
|
||||
Torrent.__init__(self)
|
||||
published = self.data['uploaded']
|
||||
published = published.replace(' GMT', '').split(' +')[0]
|
||||
|
|
|
@ -6,12 +6,16 @@ from oxutils import intValue
|
|||
|
||||
|
||||
class Torrent(dict):
|
||||
'''
|
||||
>>> Torrent()
|
||||
{'files': 1, 'domain': u'', 'subtitle language': u'', 'seeder': -1, 'description': u'', 'language': u'', 'title': u'', 'imdbId': u'', 'downloaded': -1, 'leecher': -1, 'torrent_link': u'', 'torrent_info': {}, 'published': u'', 'announce': '', 'infohash': '', 'id': u'', 'comment_link': u'', 'size': -1}
|
||||
'''
|
||||
_string_keys = ('id', 'title', 'description', 'infohash', 'torrent_link', 'comment_link',
|
||||
'imdbId', 'announce', 'domain', 'published', 'language', 'subtitle language')
|
||||
_int_keys = ('size', 'seeder', 'leecher', 'downloaded', 'files')
|
||||
_dict_keys = ('torrent_info', )
|
||||
_list_keys = ()
|
||||
data = {}
|
||||
data = {'torrent_info': {}}
|
||||
|
||||
def __init__(self):
|
||||
for key in self._string_keys:
|
||||
|
@ -25,9 +29,9 @@ class Torrent(dict):
|
|||
if not isinstance(value, int):
|
||||
value = int(intValue(value))
|
||||
self[key] = value
|
||||
self['infohash'] = self.data['torrent_info']['hash']
|
||||
self['size'] = self.data['torrent_info']['size']
|
||||
self['announce'] = self.data['torrent_info']['announce']
|
||||
self['infohash'] = self.data['torrent_info'].get('hash', '')
|
||||
self['size'] = self.data['torrent_info'].get('size', -1)
|
||||
self['announce'] = self.data['torrent_info'].get('announce', '')
|
||||
if 'files' in self.data['torrent_info']:
|
||||
self['files'] = len(self.data['torrent_info']['files'])
|
||||
else:
|
||||
|
|
Loading…
Reference in a new issue