python-oxweb/ox/mininova.py

# -*- Mode: Python; -*-
# -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2

from datetime import datetime
import re
import socket
from urllib import quote

from oxutils.cache import getUrl, getUrlUnicode
from oxutils import findRe, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines
from oxutils.normalize import normalizeImdbId

from torrent import Torrent

socket.setdefaulttimeout(10.0)

def _parseResultsPage(data, max_results=10):
  results=[]
  regexp = '''<tr><td>(.*?)</td><td>(.*?)<a href="/tor/(.*?)">(.*?)</a>.*?</td>.*?</tr>'''
  for row in  re.compile(regexp, re.DOTALL).findall(data):
    torrentDate = row[0]
    torrentExtra = row[1]
    torrentId = row[2]
    torrentTitle = decodeHtml(row[3]).strip()
    torrentLink = "http://www.mininova.org/tor/" + torrentId
    privateTracker = 'priv.gif' in torrentExtra
    if not privateTracker:
      results.append((torrentTitle, torrentLink, ''))
  return results

def findMovie(query, max_results=10):
  '''search for torrents on mininova
  '''
  url = "http://www.mininova.org/search/%s/seeds" % quote(query)
  data = getUrlUnicode(url)
  return _parseResultsPage(data, max_results)

def findMovieByImdb(imdbId):
  '''find torrents on mininova for a given imdb id
  '''
  results = []
  imdbId = normalizeImdbId(imdbId)
  data = getUrlUnicode("http://www.mininova.org/imdb/?imdb=%s" % imdbId)
  return _parseResultsPage(data)

def getId(mininovaId):
  mininovaId = unicode(mininovaId)
  d = findRe(mininovaId, "/(\d+)")
  if d:
    return d
  mininovaId = mininovaId.split('/')
  if len(mininovaId) == 1:
    return mininovaId[0]
  else:
    return mininovaId[-1]

def getData(mininovaId):
  _key_map = {
    'by': u'uploader',
  }
  mininovaId = getId(mininovaId)
  torrent = dict()
  torrent[u'id'] = mininovaId
  torrent[u'domain'] = 'mininova.org'
  torrent[u'comment_link'] = "http://www.mininova.org/tor/%s" % mininovaId
  torrent[u'torrent_link'] = "http://www.mininova.org/get/%s" % mininovaId
  torrent[u'details_link'] = "http://www.mininova.org/det/%s" % mininovaId

  data = getUrlUnicode(torrent['comment_link']) + getUrlUnicode(torrent['details_link'])
  if '<h1>Torrent not found...</h1>' in data:
    return None

  for d in re.compile('<p>.<strong>(.*?):</strong>(.*?)</p>', re.DOTALL).findall(data):
    key = d[0].lower().strip()
    key = _key_map.get(key, key)
    value = decodeHtml(stripTags(d[1].strip()))
    torrent[key] = value

  torrent[u'title'] = findRe(data, '<title>(.*?):.*?</title>')
  torrent[u'imdbId'] = findRe(data, 'title/tt(\d{7})')
  torrent[u'description'] = findRe(data, '<div id="description">(.*?)</div>')
  if torrent['description']:
    torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()
  t = getUrl(torrent[u'torrent_link'])
  torrent[u'torrent_info'] = getTorrentInfo(t)
  return torrent

class Mininova(Torrent):
  '''
  >>> Mininova('123')
  {}
  >>> Mininova('1072195')['infohash']
  '72dfa59d2338e4a48c78cec9de25964cddb64104'
  '''
  def __init__(self, mininovaId):
    self.data = getData(mininovaId)
    if not self.data:
      return
    Torrent.__init__(self)
    ratio = self.data['share ratio'].split(',')
    self['seeder'] = int(intValue(ratio[0].replace(',','').strip()))
    self['leecher'] = int(intValue(ratio[1].replace(',','').strip()))
    self['downloaded'] = int(intValue(self.data['downloads'].replace(',','').strip()))
    published =  self.data['added on']
    published = published.split(' +')[0]
    self['published'] =  datetime.strptime(published, "%a, %d %b %Y %H:%M:%S")
welcome back TPB and Mininova, both with getData(id), findMovieByImdb(imdbId) and findMovie(query) [you need BitTornado installed] 2008-05-04 15:05:41 +00:00			`# -- Mode: Python; --`
			`# -- coding: utf-8 --`
			`# vi:si:et:sw=2:sts=2:ts=2`
introducing Torrent dict, torrent info abstraction dict class 2008-05-05 11:09:29 +00:00
			`from datetime import datetime`
welcome back TPB and Mininova, both with getData(id), findMovieByImdb(imdbId) and findMovie(query) [you need BitTornado installed] 2008-05-04 15:05:41 +00:00			`import re`
			`import socket`
			`from urllib import quote`

			`from oxutils.cache import getUrl, getUrlUnicode`
findRegexp -> findRe, update setup.py 2008-05-07 09:45:00 +00:00			`from oxutils import findRe, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines`
welcome back TPB and Mininova, both with getData(id), findMovieByImdb(imdbId) and findMovie(query) [you need BitTornado installed] 2008-05-04 15:05:41 +00:00			`from oxutils.normalize import normalizeImdbId`

introducing Torrent dict, torrent info abstraction dict class 2008-05-05 11:09:29 +00:00			`from torrent import Torrent`
welcome back TPB and Mininova, both with getData(id), findMovieByImdb(imdbId) and findMovie(query) [you need BitTornado installed] 2008-05-04 15:05:41 +00:00
			`socket.setdefaulttimeout(10.0)`

			`def _parseResultsPage(data, max_results=10):`
			`results=[]`
			`regexp = '''<tr><td>(.?)</td><td>(.?)<a href="/tor/(.?)">(.?)</a>.?</td>.?</tr>'''`
			`for row in re.compile(regexp, re.DOTALL).findall(data):`
			`torrentDate = row[0]`
			`torrentExtra = row[1]`
			`torrentId = row[2]`
			`torrentTitle = decodeHtml(row[3]).strip()`
			`torrentLink = "http://www.mininova.org/tor/" + torrentId`
			`privateTracker = 'priv.gif' in torrentExtra`
			`if not privateTracker:`
			`results.append((torrentTitle, torrentLink, ''))`
			`return results`

			`def findMovie(query, max_results=10):`
			`'''search for torrents on mininova`
			`'''`
			`url = "http://www.mininova.org/search/%s/seeds" % quote(query)`
			`data = getUrlUnicode(url)`
			`return _parseResultsPage(data, max_results)`

			`def findMovieByImdb(imdbId):`
			`'''find torrents on mininova for a given imdb id`
			`'''`
			`results = []`
			`imdbId = normalizeImdbId(imdbId)`
			`data = getUrlUnicode("http://www.mininova.org/imdb/?imdb=%s" % imdbId)`
			`return _parseResultsPage(data)`

			`def getId(mininovaId):`
ox gets some tests too 2008-05-05 18:33:23 +00:00			`mininovaId = unicode(mininovaId)`
findRegexp -> findRe, update setup.py 2008-05-07 09:45:00 +00:00			`d = findRe(mininovaId, "/(\d+)")`
welcome back TPB and Mininova, both with getData(id), findMovieByImdb(imdbId) and findMovie(query) [you need BitTornado installed] 2008-05-04 15:05:41 +00:00			`if d:`
			`return d`
			`mininovaId = mininovaId.split('/')`
			`if len(mininovaId) == 1:`
			`return mininovaId[0]`
			`else:`
			`return mininovaId[-1]`

			`def getData(mininovaId):`
			`_key_map = {`
			`'by': u'uploader',`
			`}`
			`mininovaId = getId(mininovaId)`
			`torrent = dict()`
			`torrent[u'id'] = mininovaId`
			`torrent[u'domain'] = 'mininova.org'`
			`torrent[u'comment_link'] = "http://www.mininova.org/tor/%s" % mininovaId`
			`torrent[u'torrent_link'] = "http://www.mininova.org/get/%s" % mininovaId`
			`torrent[u'details_link'] = "http://www.mininova.org/det/%s" % mininovaId`

			`data = getUrlUnicode(torrent['comment_link']) + getUrlUnicode(torrent['details_link'])`
			`if '<h1>Torrent not found...</h1>' in data:`
			`return None`

			`for d in re.compile('<p>.<strong>(.?):</strong>(.?)</p>', re.DOTALL).findall(data):`
			`key = d[0].lower().strip()`
			`key = _key_map.get(key, key)`
			`value = decodeHtml(stripTags(d[1].strip()))`
			`torrent[key] = value`

findRegexp -> findRe, update setup.py 2008-05-07 09:45:00 +00:00			`torrent[u'title'] = findRe(data, '<title>(.?):.?</title>')`
			`torrent[u'imdbId'] = findRe(data, 'title/tt(\d{7})')`
			`torrent[u'description'] = findRe(data, '<div id="description">(.*?)</div>')`
welcome back TPB and Mininova, both with getData(id), findMovieByImdb(imdbId) and findMovie(query) [you need BitTornado installed] 2008-05-04 15:05:41 +00:00			`if torrent['description']:`
ox gets some tests too 2008-05-05 18:33:23 +00:00			`torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()`
welcome back TPB and Mininova, both with getData(id), findMovieByImdb(imdbId) and findMovie(query) [you need BitTornado installed] 2008-05-04 15:05:41 +00:00			`t = getUrl(torrent[u'torrent_link'])`
			`torrent[u'torrent_info'] = getTorrentInfo(t)`
			`return torrent`

introducing Torrent dict, torrent info abstraction dict class 2008-05-05 11:09:29 +00:00			`class Mininova(Torrent):`
ox gets some tests too 2008-05-05 18:33:23 +00:00			`'''`
			`>>> Mininova('123')`
			`{}`
better torrent tests, add getMovieId test 2008-05-08 10:43:35 +00:00			`>>> Mininova('1072195')['infohash']`
			`'72dfa59d2338e4a48c78cec9de25964cddb64104'`
ox gets some tests too 2008-05-05 18:33:23 +00:00			`'''`
introducing Torrent dict, torrent info abstraction dict class 2008-05-05 11:09:29 +00:00			`def __init__(self, mininovaId):`
			`self.data = getData(mininovaId)`
ox gets some tests too 2008-05-05 18:33:23 +00:00			`if not self.data:`
			`return`
introducing Torrent dict, torrent info abstraction dict class 2008-05-05 11:09:29 +00:00			`Torrent.__init__(self)`
			`ratio = self.data['share ratio'].split(',')`
			`self['seeder'] = int(intValue(ratio[0].replace(',','').strip()))`
			`self['leecher'] = int(intValue(ratio[1].replace(',','').strip()))`
			`self['downloaded'] = int(intValue(self.data['downloads'].replace(',','').strip()))`
			`published = self.data['added on']`
			`published = published.split(' +')[0]`
			`self['published'] = datetime.strptime(published, "%a, %d %b %Y %H:%M:%S")`