python-oxweb/ox/mininova.py

# -*- Mode: Python; -*-
# -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2
import re
import socket
from urllib import quote

from oxutils.cache import getUrl, getUrlUnicode
from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo
from oxutils.normalize import normalizeImdbId


socket.setdefaulttimeout(10.0)

def _parseResultsPage(data, max_results=10):
  results=[]
  regexp = '''<tr><td>(.*?)</td><td>(.*?)<a href="/tor/(.*?)">(.*?)</a>.*?</td>.*?</tr>'''
  for row in  re.compile(regexp, re.DOTALL).findall(data):
    torrentDate = row[0]
    torrentExtra = row[1]
    torrentId = row[2]
    torrentTitle = decodeHtml(row[3]).strip()
    torrentLink = "http://www.mininova.org/tor/" + torrentId
    privateTracker = 'priv.gif' in torrentExtra
    if not privateTracker:
      results.append((torrentTitle, torrentLink, ''))
  return results

def findMovie(query, max_results=10):
  '''search for torrents on mininova
  '''
  url = "http://www.mininova.org/search/%s/seeds" % quote(query)
  data = getUrlUnicode(url)
  return _parseResultsPage(data, max_results)

def findMovieByImdb(imdbId):
  '''find torrents on mininova for a given imdb id
  '''
  results = []
  imdbId = normalizeImdbId(imdbId)
  data = getUrlUnicode("http://www.mininova.org/imdb/?imdb=%s" % imdbId)
  return _parseResultsPage(data)

def getId(mininovaId):
  d = findRegexp(mininovaId, "/(\d+)")
  if d:
    return d
  mininovaId = mininovaId.split('/')
  if len(mininovaId) == 1:
    return mininovaId[0]
  else:
    return mininovaId[-1]

def getData(mininovaId):
  _key_map = {
    'by': u'uploader',
  }
  mininovaId = getId(mininovaId)
  torrent = dict()
  torrent[u'id'] = mininovaId
  torrent[u'domain'] = 'mininova.org'
  torrent[u'comment_link'] = "http://www.mininova.org/tor/%s" % mininovaId
  torrent[u'torrent_link'] = "http://www.mininova.org/get/%s" % mininovaId
  torrent[u'details_link'] = "http://www.mininova.org/det/%s" % mininovaId

  data = getUrlUnicode(torrent['comment_link']) + getUrlUnicode(torrent['details_link'])
  if '<h1>Torrent not found...</h1>' in data:
    return None

  for d in re.compile('<p>.<strong>(.*?):</strong>(.*?)</p>', re.DOTALL).findall(data):
    key = d[0].lower().strip()
    key = _key_map.get(key, key)
    value = decodeHtml(stripTags(d[1].strip()))
    torrent[key] = value

  torrent[u'title'] = findRegexp(data, '<title>(.*?):.*?</title>')
  torrent[u'imdb'] = findRegexp(data, 'title/tt(\d{7})')
  torrent[u'description'] = findRegexp(data, '<div id="description">(.*?)</div>')
  if torrent['description']:
    torrent['description'] = decodeHtml(stripTags(torrent['description'])).strip()
  t = getUrl(torrent[u'torrent_link'])
  torrent[u'torrent_info'] = getTorrentInfo(t)
  return torrent
welcome back TPB and Mininova, both with getData(id), findMovieByImdb(imdbId) and findMovie(query) [you need BitTornado installed] 2008-05-04 15:05:41 +00:00			`# -- Mode: Python; --`
			`# -- coding: utf-8 --`
			`# vi:si:et:sw=2:sts=2:ts=2`
			`import re`
			`import socket`
			`from urllib import quote`

			`from oxutils.cache import getUrl, getUrlUnicode`
			`from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo`
			`from oxutils.normalize import normalizeImdbId`


			`socket.setdefaulttimeout(10.0)`

			`def _parseResultsPage(data, max_results=10):`
			`results=[]`
			`regexp = '''<tr><td>(.?)</td><td>(.?)<a href="/tor/(.?)">(.?)</a>.?</td>.?</tr>'''`
			`for row in re.compile(regexp, re.DOTALL).findall(data):`
			`torrentDate = row[0]`
			`torrentExtra = row[1]`
			`torrentId = row[2]`
			`torrentTitle = decodeHtml(row[3]).strip()`
			`torrentLink = "http://www.mininova.org/tor/" + torrentId`
			`privateTracker = 'priv.gif' in torrentExtra`
			`if not privateTracker:`
			`results.append((torrentTitle, torrentLink, ''))`
			`return results`

			`def findMovie(query, max_results=10):`
			`'''search for torrents on mininova`
			`'''`
			`url = "http://www.mininova.org/search/%s/seeds" % quote(query)`
			`data = getUrlUnicode(url)`
			`return _parseResultsPage(data, max_results)`

			`def findMovieByImdb(imdbId):`
			`'''find torrents on mininova for a given imdb id`
			`'''`
			`results = []`
			`imdbId = normalizeImdbId(imdbId)`
			`data = getUrlUnicode("http://www.mininova.org/imdb/?imdb=%s" % imdbId)`
			`return _parseResultsPage(data)`

			`def getId(mininovaId):`
			`d = findRegexp(mininovaId, "/(\d+)")`
			`if d:`
			`return d`
			`mininovaId = mininovaId.split('/')`
			`if len(mininovaId) == 1:`
			`return mininovaId[0]`
			`else:`
			`return mininovaId[-1]`

			`def getData(mininovaId):`
			`_key_map = {`
			`'by': u'uploader',`
			`}`
			`mininovaId = getId(mininovaId)`
			`torrent = dict()`
			`torrent[u'id'] = mininovaId`
			`torrent[u'domain'] = 'mininova.org'`
			`torrent[u'comment_link'] = "http://www.mininova.org/tor/%s" % mininovaId`
			`torrent[u'torrent_link'] = "http://www.mininova.org/get/%s" % mininovaId`
			`torrent[u'details_link'] = "http://www.mininova.org/det/%s" % mininovaId`

			`data = getUrlUnicode(torrent['comment_link']) + getUrlUnicode(torrent['details_link'])`
			`if '<h1>Torrent not found...</h1>' in data:`
			`return None`

			`for d in re.compile('<p>.<strong>(.?):</strong>(.?)</p>', re.DOTALL).findall(data):`
			`key = d[0].lower().strip()`
			`key = _key_map.get(key, key)`
			`value = decodeHtml(stripTags(d[1].strip()))`
			`torrent[key] = value`

			`torrent[u'title'] = findRegexp(data, '<title>(.?):.?</title>')`
			`torrent[u'imdb'] = findRegexp(data, 'title/tt(\d{7})')`
			`torrent[u'description'] = findRegexp(data, '<div id="description">(.*?)</div>')`
			`if torrent['description']:`
			`torrent['description'] = decodeHtml(stripTags(torrent['description'])).strip()`
			`t = getUrl(torrent[u'torrent_link'])`
			`torrent[u'torrent_info'] = getTorrentInfo(t)`
			`return torrent`