welcome back TPB and Mininova, both with getData(id), findMovieByImdb(imdbId) and findMovie(query) [you need BitTornado installed]

This commit is contained in:
j 2008-05-04 17:05:41 +02:00
parent 28d84dd3eb
commit f9fabfd311
2 changed files with 177 additions and 0 deletions

84
ox/mininova.py Normal file
View file

@ -0,0 +1,84 @@
# -*- Mode: Python; -*-
# -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2
import re
import socket
from urllib import quote
from oxutils.cache import getUrl, getUrlUnicode
from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo
from oxutils.normalize import normalizeImdbId
socket.setdefaulttimeout(10.0)
def _parseResultsPage(data, max_results=10):
results=[]
regexp = '''<tr><td>(.*?)</td><td>(.*?)<a href="/tor/(.*?)">(.*?)</a>.*?</td>.*?</tr>'''
for row in re.compile(regexp, re.DOTALL).findall(data):
torrentDate = row[0]
torrentExtra = row[1]
torrentId = row[2]
torrentTitle = decodeHtml(row[3]).strip()
torrentLink = "http://www.mininova.org/tor/" + torrentId
privateTracker = 'priv.gif' in torrentExtra
if not privateTracker:
results.append((torrentTitle, torrentLink, ''))
return results
def findMovie(query, max_results=10):
'''search for torrents on mininova
'''
url = "http://www.mininova.org/search/%s/seeds" % quote(query)
data = getUrlUnicode(url)
return _parseResultsPage(data, max_results)
def findMovieByImdb(imdbId):
'''find torrents on mininova for a given imdb id
'''
results = []
imdbId = normalizeImdbId(imdbId)
data = getUrlUnicode("http://www.mininova.org/imdb/?imdb=%s" % imdbId)
return _parseResultsPage(data)
def getId(mininovaId):
d = findRegexp(mininovaId, "/(\d+)")
if d:
return d
mininovaId = mininovaId.split('/')
if len(mininovaId) == 1:
return mininovaId[0]
else:
return mininovaId[-1]
def getData(mininovaId):
_key_map = {
'by': u'uploader',
}
mininovaId = getId(mininovaId)
torrent = dict()
torrent[u'id'] = mininovaId
torrent[u'domain'] = 'mininova.org'
torrent[u'comment_link'] = "http://www.mininova.org/tor/%s" % mininovaId
torrent[u'torrent_link'] = "http://www.mininova.org/get/%s" % mininovaId
torrent[u'details_link'] = "http://www.mininova.org/det/%s" % mininovaId
data = getUrlUnicode(torrent['comment_link']) + getUrlUnicode(torrent['details_link'])
if '<h1>Torrent not found...</h1>' in data:
return None
for d in re.compile('<p>.<strong>(.*?):</strong>(.*?)</p>', re.DOTALL).findall(data):
key = d[0].lower().strip()
key = _key_map.get(key, key)
value = decodeHtml(stripTags(d[1].strip()))
torrent[key] = value
torrent[u'title'] = findRegexp(data, '<title>(.*?):.*?</title>')
torrent[u'imdb'] = findRegexp(data, 'title/tt(\d{7})')
torrent[u'description'] = findRegexp(data, '<div id="description">(.*?)</div>')
if torrent['description']:
torrent['description'] = decodeHtml(stripTags(torrent['description'])).strip()
t = getUrl(torrent[u'torrent_link'])
torrent[u'torrent_info'] = getTorrentInfo(t)
return torrent

93
ox/thepiratebay.py Normal file
View file

@ -0,0 +1,93 @@
# -*- Mode: Python; -*-
# -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2
import re
import socket
from urllib import quote, urlencode
from urllib2 import URLError
from oxutils.cache import getUrl, getUrlUnicode
from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo
from oxutils.normalize import normalizeImdbId
socket.setdefaulttimeout(10.0)
season_episode = re.compile("S..E..", re.IGNORECASE)
def getUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout):
headers = cache.DEFAULT_HEADERS
headers['Cookie'] = 'language=en_EN'
return cache.getUrl(url, data, headers, timeout)
def getUrlUnicode(url):
return cache.getUrlUnicode(url, _getUrl=getUrl)
def findMovies(query, max_results=10):
results = []
next = ["http://thepiratebay.org/search/%s/0/3/200" % quote(query), ]
page_count = 1
while next and page_count < 4:
page_count += 1
url = next[0]
if not url.startswith('http'):
if not url.startswith('/'):
url = "/" + url
url = "http://thepiratebay.org" + url
data = getUrlUnicode(url)
regexp = '''<tr.*?<td class="vertTh"><a href="/browse/(.*?)".*?<td><a href="(/tor/.*?)" class="detLink".*?>(.*?)</a>.*?</tr>'''
for row in re.compile(regexp, re.DOTALL).findall(data):
torrentType = row[0]
torrentLink = "http://thepiratebay.org" + row[1]
torrentTitle = decodeHtml(row[2])
# 201 = Movies , 202 = Movie DVDR, 205 TV Shows
if torrentType in ['201']:
results.append((torrentTitle, torrentLink, ''))
if len(results) >= max_results:
return results
next = re.compile('<a.*?href="(.*?)".*?>.*?next.gif.*?</a>').findall(data)
return results
def findMovieByImdb(imdb):
return findMovies("tt" + normalizeImdbId(imdb))
def getId(piratebayId):
if piratebayId.startswith('http://torrents.thepiratebay.org/'):
piratebayId = piratebayId.split('org/')[1]
if 'tor/' in piratebayId:
piratebayId = piratebayId.split('tor/')[1]
d = findRegexp(piratebayId, "/(\d+)")
if d:
piratebayId = d
return piratebayId
def getData(piratebayId):
_key_map = {
'spoken language(s)': u'language',
'texted language(s)': u'subtitle language',
'by': u'uploader',
}
piratebayId = getId(piratebayId)
torrent = dict()
torrent[u'id'] = piratebayId
torrent[u'domain'] = 'thepiratebay.org'
torrent[u'comment_link'] = 'http://thepiratebay.org/tor/%s' % piratebayId
data = getUrlUnicode(torrent['comment_link'])
torrent[u'title'] = findRegexp(data, '<title>(.*?) \(download torrent\) - TPB</title>')
if not torrent[u'title']:
return None
torrent[u'imdb'] = findRegexp(data, 'title/tt(\d{7})')
torrent[u'torrent_link']="http://torrents.thepiratebay.org/%s/%s.torrent" % (piratebayId, quote(torrent['title']))
for d in re.compile('dt>(.*?):</dt>.*?<dd.*?>(.*?)</dd>', re.DOTALL).findall(data):
key = d[0].lower().strip()
key = _key_map.get(key, key)
value = decodeHtml(stripTags(d[1].strip()))
torrent[key] = value
torrent[u'description'] = decodeHtml(stripTags(findRegexp(data, '<div class="nfo">(.*?)</div>'))).strip()
t = getUrl(torrent[u'torrent_link'])
torrent[u'torrent_info'] = getTorrentInfo(t)
return torrent