welcome back TPB and Mininova, both with getData(id), findMovieByImdb(imdbId) and findMovie(query) [you need BitTornado installed]

2008-05-04 17:05:41 +02:00 · 2008-05-04 17:05:41 +02:00 · f9fabfd311
commit f9fabfd311
parent 28d84dd3eb
2 changed files with 177 additions and 0 deletions
--- a/ox/mininova.py
+++ b/ox/mininova.py
@ -0,0 +1,84 @@
 # -*- Mode: Python; -*-
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=2:sts=2:ts=2
 import re
 import socket
 from urllib import quote
 from oxutils.cache import getUrl, getUrlUnicode
 from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo
 from oxutils.normalize import normalizeImdbId
 socket.setdefaulttimeout(10.0)
 def _parseResultsPage(data, max_results=10):
  results=[]
  regexp = '''<tr><td>(.*?)</td><td>(.*?)<a href="/tor/(.*?)">(.*?)</a>.*?</td>.*?</tr>'''
  for row in  re.compile(regexp, re.DOTALL).findall(data):
    torrentDate = row[0]
    torrentExtra = row[1]
    torrentId = row[2]
    torrentTitle = decodeHtml(row[3]).strip()
    torrentLink = "http://www.mininova.org/tor/" + torrentId
    privateTracker = 'priv.gif' in torrentExtra
    if not privateTracker:
      results.append((torrentTitle, torrentLink, ''))
  return results
 def findMovie(query, max_results=10):
  '''search for torrents on mininova
  '''
  url = "http://www.mininova.org/search/%s/seeds" % quote(query)
  data = getUrlUnicode(url)
  return _parseResultsPage(data, max_results)
 def findMovieByImdb(imdbId):
  '''find torrents on mininova for a given imdb id
  '''
  results = []
  imdbId = normalizeImdbId(imdbId)
  data = getUrlUnicode("http://www.mininova.org/imdb/?imdb=%s" % imdbId)
  return _parseResultsPage(data)
 def getId(mininovaId):
  d = findRegexp(mininovaId, "/(\d+)")
  if d:
    return d
  mininovaId = mininovaId.split('/')
  if len(mininovaId) == 1:
    return mininovaId[0]
  else:
    return mininovaId[-1]
 def getData(mininovaId):
  _key_map = {
    'by': u'uploader',
  }
  mininovaId = getId(mininovaId)
  torrent = dict()
  torrent[u'id'] = mininovaId
  torrent[u'domain'] = 'mininova.org'
  torrent[u'comment_link'] = "http://www.mininova.org/tor/%s" % mininovaId
  torrent[u'torrent_link'] = "http://www.mininova.org/get/%s" % mininovaId
  torrent[u'details_link'] = "http://www.mininova.org/det/%s" % mininovaId
  data = getUrlUnicode(torrent['comment_link']) + getUrlUnicode(torrent['details_link'])
  if '<h1>Torrent not found...</h1>' in data:
    return None
  for d in re.compile('<p>.<strong>(.*?):</strong>(.*?)</p>', re.DOTALL).findall(data):
    key = d[0].lower().strip()
    key = _key_map.get(key, key)
    value = decodeHtml(stripTags(d[1].strip()))
    torrent[key] = value
  torrent[u'title'] = findRegexp(data, '<title>(.*?):.*?</title>')
  torrent[u'imdb'] = findRegexp(data, 'title/tt(\d{7})')
  torrent[u'description'] = findRegexp(data, '<div id="description">(.*?)</div>')
  if torrent['description']:
    torrent['description'] = decodeHtml(stripTags(torrent['description'])).strip()
  t = getUrl(torrent[u'torrent_link'])
  torrent[u'torrent_info'] = getTorrentInfo(t)
  return torrent
--- a/ox/thepiratebay.py
+++ b/ox/thepiratebay.py
@ -0,0 +1,93 @@
 # -*- Mode: Python; -*-
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=2:sts=2:ts=2
 import re
 import socket
 from urllib import quote, urlencode
 from urllib2 import URLError
 from oxutils.cache import getUrl, getUrlUnicode
 from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo
 from oxutils.normalize import normalizeImdbId
 socket.setdefaulttimeout(10.0)
 season_episode = re.compile("S..E..", re.IGNORECASE)
 def getUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout):
  headers = cache.DEFAULT_HEADERS
  headers['Cookie'] = 'language=en_EN'
  return cache.getUrl(url, data, headers, timeout)
 def getUrlUnicode(url):
 return cache.getUrlUnicode(url, _getUrl=getUrl)
 def findMovies(query, max_results=10):
  results = []
  next = ["http://thepiratebay.org/search/%s/0/3/200" % quote(query), ]
  page_count = 1
  while next and page_count < 4:
    page_count += 1
    url = next[0]
    if not url.startswith('http'):
      if not url.startswith('/'):
        url = "/" + url
      url = "http://thepiratebay.org" + url
    data = getUrlUnicode(url)
    regexp = '''<tr.*?<td class="vertTh"><a href="/browse/(.*?)".*?<td><a href="(/tor/.*?)" class="detLink".*?>(.*?)</a>.*?</tr>'''
    for row in  re.compile(regexp, re.DOTALL).findall(data):
      torrentType = row[0]
      torrentLink = "http://thepiratebay.org" + row[1]
      torrentTitle = decodeHtml(row[2])
      # 201 = Movies , 202 = Movie DVDR, 205 TV Shows
      if torrentType in ['201']:
          results.append((torrentTitle, torrentLink, ''))
      if len(results) >= max_results:
        return results
    next = re.compile('<a.*?href="(.*?)".*?>.*?next.gif.*?</a>').findall(data)
  return results
 def findMovieByImdb(imdb):
  return findMovies("tt" + normalizeImdbId(imdb))
 def getId(piratebayId):
  if piratebayId.startswith('http://torrents.thepiratebay.org/'):
    piratebayId = piratebayId.split('org/')[1]
  if 'tor/' in piratebayId:
    piratebayId = piratebayId.split('tor/')[1]
  d = findRegexp(piratebayId, "/(\d+)")
  if d:
    piratebayId = d
  return piratebayId
 def getData(piratebayId):
  _key_map = {
    'spoken language(s)': u'language',
    'texted language(s)': u'subtitle language',
    'by': u'uploader',
  }
  piratebayId = getId(piratebayId)
  torrent = dict()
  torrent[u'id'] = piratebayId
  torrent[u'domain'] = 'thepiratebay.org'
  torrent[u'comment_link'] = 'http://thepiratebay.org/tor/%s' % piratebayId
  data = getUrlUnicode(torrent['comment_link'])
  torrent[u'title'] = findRegexp(data, '<title>(.*?) \(download torrent\) - TPB</title>')
  if not torrent[u'title']:
    return None
  torrent[u'imdb'] = findRegexp(data, 'title/tt(\d{7})')
  torrent[u'torrent_link']="http://torrents.thepiratebay.org/%s/%s.torrent" % (piratebayId, quote(torrent['title']))
  for d in re.compile('dt>(.*?):</dt>.*?<dd.*?>(.*?)</dd>', re.DOTALL).findall(data):
    key = d[0].lower().strip()
    key = _key_map.get(key, key)
    value = decodeHtml(stripTags(d[1].strip()))
    torrent[key] = value
  torrent[u'description'] = decodeHtml(stripTags(findRegexp(data, '<div class="nfo">(.*?)</div>'))).strip()
  t = getUrl(torrent[u'torrent_link'])
  torrent[u'torrent_info'] = getTorrentInfo(t)
  return torrent