ox gets some tests too
This commit is contained in:
parent
b3d3f44d20
commit
1b93ae048d
5 changed files with 50 additions and 19 deletions
3
README
3
README
|
@ -6,3 +6,6 @@ Depends:
|
||||||
python-beautifulsoup (http://www.crummy.com/software/BeautifulSoup/)
|
python-beautifulsoup (http://www.crummy.com/software/BeautifulSoup/)
|
||||||
python-feedparser (http://www.feedparser.org/)
|
python-feedparser (http://www.feedparser.org/)
|
||||||
|
|
||||||
|
|
||||||
|
Test:
|
||||||
|
nosetests --with-doctest ox
|
||||||
|
|
|
@ -3,6 +3,13 @@ from urllib import unquote
|
||||||
from oxutils.cache import getUrl
|
from oxutils.cache import getUrl
|
||||||
|
|
||||||
def getVideoUrl(url):
|
def getVideoUrl(url):
|
||||||
|
'''
|
||||||
|
>>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3opar_priere-pour-refuznik-1-jeanluc-goda_shortfilms')
|
||||||
|
'http://www.dailymotion.com/get/16/320x240/flv/6191379.flv?key=0a710ad6ffbfe980b1252569d16f957313399d0'
|
||||||
|
|
||||||
|
>>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms')
|
||||||
|
'http://www.dailymotion.com/get/15/320x240/flv/6197800.flv?key=08a18365ca6962c5ff7526f69872c36813399d4'
|
||||||
|
'''
|
||||||
data = getUrl(url)
|
data = getUrl(url)
|
||||||
video = re.compile('''video", "(.*?)"''').findall(data)
|
video = re.compile('''video", "(.*?)"''').findall(data)
|
||||||
for v in video:
|
for v in video:
|
||||||
|
@ -10,6 +17,3 @@ def getVideoUrl(url):
|
||||||
return "http://www.dailymotion.com" + v
|
return "http://www.dailymotion.com" + v
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
print getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3opar_priere-pour-refuznik-1-jeanluc-goda_shortfilms')
|
|
||||||
print getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms')
|
|
||||||
|
|
|
@ -6,9 +6,10 @@ from datetime import datetime
|
||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
from urllib import quote
|
from urllib import quote
|
||||||
|
import sha
|
||||||
|
|
||||||
from oxutils.cache import getUrl, getUrlUnicode
|
from oxutils.cache import getUrl, getUrlUnicode
|
||||||
from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo, intValue
|
from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines
|
||||||
from oxutils.normalize import normalizeImdbId
|
from oxutils.normalize import normalizeImdbId
|
||||||
|
|
||||||
from torrent import Torrent
|
from torrent import Torrent
|
||||||
|
@ -45,6 +46,7 @@ def findMovieByImdb(imdbId):
|
||||||
return _parseResultsPage(data)
|
return _parseResultsPage(data)
|
||||||
|
|
||||||
def getId(mininovaId):
|
def getId(mininovaId):
|
||||||
|
mininovaId = unicode(mininovaId)
|
||||||
d = findRegexp(mininovaId, "/(\d+)")
|
d = findRegexp(mininovaId, "/(\d+)")
|
||||||
if d:
|
if d:
|
||||||
return d
|
return d
|
||||||
|
@ -80,15 +82,22 @@ def getData(mininovaId):
|
||||||
torrent[u'imdbId'] = findRegexp(data, 'title/tt(\d{7})')
|
torrent[u'imdbId'] = findRegexp(data, 'title/tt(\d{7})')
|
||||||
torrent[u'description'] = findRegexp(data, '<div id="description">(.*?)</div>')
|
torrent[u'description'] = findRegexp(data, '<div id="description">(.*?)</div>')
|
||||||
if torrent['description']:
|
if torrent['description']:
|
||||||
torrent['description'] = decodeHtml(stripTags(torrent['description'])).strip()
|
torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()
|
||||||
t = getUrl(torrent[u'torrent_link'])
|
t = getUrl(torrent[u'torrent_link'])
|
||||||
torrent[u'torrent_info'] = getTorrentInfo(t)
|
torrent[u'torrent_info'] = getTorrentInfo(t)
|
||||||
return torrent
|
return torrent
|
||||||
|
|
||||||
|
|
||||||
class Mininova(Torrent):
|
class Mininova(Torrent):
|
||||||
|
'''
|
||||||
|
>>> Mininova('123')
|
||||||
|
{}
|
||||||
|
>>> sha.sha(unicode(Mininova('1072195'))).hexdigest()
|
||||||
|
'ec98268a0aeaef8292f7bcf3585d0bc3910b3fac'
|
||||||
|
'''
|
||||||
def __init__(self, mininovaId):
|
def __init__(self, mininovaId):
|
||||||
self.data = getData(mininovaId)
|
self.data = getData(mininovaId)
|
||||||
|
if not self.data:
|
||||||
|
return
|
||||||
Torrent.__init__(self)
|
Torrent.__init__(self)
|
||||||
ratio = self.data['share ratio'].split(',')
|
ratio = self.data['share ratio'].split(',')
|
||||||
self['seeder'] = int(intValue(ratio[0].replace(',','').strip()))
|
self['seeder'] = int(intValue(ratio[0].replace(',','').strip()))
|
||||||
|
|
|
@ -7,9 +7,10 @@ import re
|
||||||
import socket
|
import socket
|
||||||
from urllib import quote, urlencode
|
from urllib import quote, urlencode
|
||||||
from urllib2 import URLError
|
from urllib2 import URLError
|
||||||
|
import sha
|
||||||
|
|
||||||
from oxutils.cache import getUrl, getUrlUnicode
|
from oxutils.cache import getUrl, getUrlUnicode
|
||||||
from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo
|
from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines
|
||||||
from oxutils.normalize import normalizeImdbId
|
from oxutils.normalize import normalizeImdbId
|
||||||
|
|
||||||
from torrent import Torrent
|
from torrent import Torrent
|
||||||
|
@ -19,13 +20,13 @@ socket.setdefaulttimeout(10.0)
|
||||||
season_episode = re.compile("S..E..", re.IGNORECASE)
|
season_episode = re.compile("S..E..", re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
def getUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout):
|
def _getUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout):
|
||||||
headers = cache.DEFAULT_HEADERS
|
headers = cache.DEFAULT_HEADERS
|
||||||
headers['Cookie'] = 'language=en_EN'
|
headers['Cookie'] = 'language=en_EN'
|
||||||
return cache.getUrl(url, data, headers, timeout)
|
return cache.getUrl(url, data, headers, timeout)
|
||||||
|
|
||||||
def getUrlUnicode(url):
|
def _getUrlUnicode(url):
|
||||||
return cache.getUrlUnicode(url, _getUrl=getUrl)
|
return cache.getUrlUnicode(url, _getUrl=_getUrl)
|
||||||
|
|
||||||
def findMovies(query, max_results=10):
|
def findMovies(query, max_results=10):
|
||||||
results = []
|
results = []
|
||||||
|
@ -38,7 +39,7 @@ def findMovies(query, max_results=10):
|
||||||
if not url.startswith('/'):
|
if not url.startswith('/'):
|
||||||
url = "/" + url
|
url = "/" + url
|
||||||
url = "http://thepiratebay.org" + url
|
url = "http://thepiratebay.org" + url
|
||||||
data = getUrlUnicode(url)
|
data = _getUrlUnicode(url)
|
||||||
regexp = '''<tr.*?<td class="vertTh"><a href="/browse/(.*?)".*?<td><a href="(/tor/.*?)" class="detLink".*?>(.*?)</a>.*?</tr>'''
|
regexp = '''<tr.*?<td class="vertTh"><a href="/browse/(.*?)".*?<td><a href="(/tor/.*?)" class="detLink".*?>(.*?)</a>.*?</tr>'''
|
||||||
for row in re.compile(regexp, re.DOTALL).findall(data):
|
for row in re.compile(regexp, re.DOTALL).findall(data):
|
||||||
torrentType = row[0]
|
torrentType = row[0]
|
||||||
|
@ -79,7 +80,7 @@ def getData(piratebayId):
|
||||||
torrent[u'domain'] = 'thepiratebay.org'
|
torrent[u'domain'] = 'thepiratebay.org'
|
||||||
torrent[u'comment_link'] = 'http://thepiratebay.org/tor/%s' % piratebayId
|
torrent[u'comment_link'] = 'http://thepiratebay.org/tor/%s' % piratebayId
|
||||||
|
|
||||||
data = getUrlUnicode(torrent['comment_link'])
|
data = _getUrlUnicode(torrent['comment_link'])
|
||||||
torrent[u'title'] = findRegexp(data, '<title>(.*?) \(download torrent\) - TPB</title>')
|
torrent[u'title'] = findRegexp(data, '<title>(.*?) \(download torrent\) - TPB</title>')
|
||||||
if not torrent[u'title']:
|
if not torrent[u'title']:
|
||||||
return None
|
return None
|
||||||
|
@ -91,15 +92,25 @@ def getData(piratebayId):
|
||||||
key = _key_map.get(key, key)
|
key = _key_map.get(key, key)
|
||||||
value = decodeHtml(stripTags(d[1].strip()))
|
value = decodeHtml(stripTags(d[1].strip()))
|
||||||
torrent[key] = value
|
torrent[key] = value
|
||||||
torrent[u'description'] = decodeHtml(stripTags(findRegexp(data, '<div class="nfo">(.*?)</div>'))).strip()
|
torrent[u'description'] = findRegexp(data, '<div class="nfo">(.*?)</div>')
|
||||||
t = getUrl(torrent[u'torrent_link'])
|
if torrent[u'description']:
|
||||||
|
torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()
|
||||||
|
t = _getUrl(torrent[u'torrent_link'])
|
||||||
torrent[u'torrent_info'] = getTorrentInfo(t)
|
torrent[u'torrent_info'] = getTorrentInfo(t)
|
||||||
return torrent
|
return torrent
|
||||||
|
|
||||||
|
|
||||||
class Thepiratebay(Torrent):
|
class Thepiratebay(Torrent):
|
||||||
|
'''
|
||||||
|
>>> Thepiratebay('123')
|
||||||
|
{}
|
||||||
|
|
||||||
|
>>> sha.sha(unicode(Thepiratebay('3951349'))).hexdigest()
|
||||||
|
'ef64e438e3eef6e6a05cac4eea56b9f0289d3f22'
|
||||||
|
'''
|
||||||
def __init__(self, piratebayId):
|
def __init__(self, piratebayId):
|
||||||
self.data = getData(piratebayId)
|
self.data = getData(piratebayId)
|
||||||
|
if not self.data:
|
||||||
|
return
|
||||||
Torrent.__init__(self)
|
Torrent.__init__(self)
|
||||||
published = self.data['uploaded']
|
published = self.data['uploaded']
|
||||||
published = published.replace(' GMT', '').split(' +')[0]
|
published = published.replace(' GMT', '').split(' +')[0]
|
||||||
|
|
|
@ -6,12 +6,16 @@ from oxutils import intValue
|
||||||
|
|
||||||
|
|
||||||
class Torrent(dict):
|
class Torrent(dict):
|
||||||
|
'''
|
||||||
|
>>> Torrent()
|
||||||
|
{'files': 1, 'domain': u'', 'subtitle language': u'', 'seeder': -1, 'description': u'', 'language': u'', 'title': u'', 'imdbId': u'', 'downloaded': -1, 'leecher': -1, 'torrent_link': u'', 'torrent_info': {}, 'published': u'', 'announce': '', 'infohash': '', 'id': u'', 'comment_link': u'', 'size': -1}
|
||||||
|
'''
|
||||||
_string_keys = ('id', 'title', 'description', 'infohash', 'torrent_link', 'comment_link',
|
_string_keys = ('id', 'title', 'description', 'infohash', 'torrent_link', 'comment_link',
|
||||||
'imdbId', 'announce', 'domain', 'published', 'language', 'subtitle language')
|
'imdbId', 'announce', 'domain', 'published', 'language', 'subtitle language')
|
||||||
_int_keys = ('size', 'seeder', 'leecher', 'downloaded', 'files')
|
_int_keys = ('size', 'seeder', 'leecher', 'downloaded', 'files')
|
||||||
_dict_keys = ('torrent_info', )
|
_dict_keys = ('torrent_info', )
|
||||||
_list_keys = ()
|
_list_keys = ()
|
||||||
data = {}
|
data = {'torrent_info': {}}
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
for key in self._string_keys:
|
for key in self._string_keys:
|
||||||
|
@ -25,9 +29,9 @@ class Torrent(dict):
|
||||||
if not isinstance(value, int):
|
if not isinstance(value, int):
|
||||||
value = int(intValue(value))
|
value = int(intValue(value))
|
||||||
self[key] = value
|
self[key] = value
|
||||||
self['infohash'] = self.data['torrent_info']['hash']
|
self['infohash'] = self.data['torrent_info'].get('hash', '')
|
||||||
self['size'] = self.data['torrent_info']['size']
|
self['size'] = self.data['torrent_info'].get('size', -1)
|
||||||
self['announce'] = self.data['torrent_info']['announce']
|
self['announce'] = self.data['torrent_info'].get('announce', '')
|
||||||
if 'files' in self.data['torrent_info']:
|
if 'files' in self.data['torrent_info']:
|
||||||
self['files'] = len(self.data['torrent_info']['files'])
|
self['files'] = len(self.data['torrent_info']['files'])
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Reference in a new issue