python-oxweb/ox/mininova.py

# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from datetime import datetime
import re
import socket
from urllib import quote

from oxutils.cache import getUrl, getUrlUnicode
from oxutils import findRe, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines
from oxutils.normalize import normalizeImdbId
import oxutils

from torrent import Torrent


def _parseResultsPage(data, max_results=10):
    results=[]
    regexp = '''<tr><td>(.*?)</td><td>(.*?)<a href="/tor/(.*?)">(.*?)</a>.*?</td>.*?</tr>'''
    for row in  re.compile(regexp, re.DOTALL).findall(data):
        torrentDate = row[0]
        torrentExtra = row[1]
        torrentId = row[2]
        torrentTitle = decodeHtml(row[3]).strip()
        torrentLink = "http://www.mininova.org/tor/" + torrentId
        privateTracker = 'priv.gif' in torrentExtra
        if not privateTracker:
            results.append((torrentTitle, torrentLink, ''))
    return results

def findMovie(query, max_results=10):
    '''search for torrents on mininova
    '''
    url = "http://www.mininova.org/search/%s/seeds" % quote(query)
    data = getUrlUnicode(url)
    return _parseResultsPage(data, max_results)

def findMovieByImdb(imdbId):
    '''find torrents on mininova for a given imdb id
    '''
    results = []
    imdbId = normalizeImdbId(imdbId)
    data = getUrlUnicode("http://www.mininova.org/imdb/?imdb=%s" % imdbId)
    return _parseResultsPage(data)

def getId(mininovaId):
    mininovaId = unicode(mininovaId)
    d = findRe(mininovaId, "/(\d+)")
    if d:
        return d
    mininovaId = mininovaId.split('/')
    if len(mininovaId) == 1:
        return mininovaId[0]
    else:
        return mininovaId[-1]

def exists(mininovaId):
    mininovaId = getId(mininovaId)
    data = oxutils.net.getUrl("http://www.mininova.org/tor/%s" % mininovaId)
    if not data or 'Torrent not found...' in data:
        return False
    if 'tracker</a> of this torrent requires registration.' in data:
        return False
    return True

def getData(mininovaId):
    _key_map = {
        'by': u'uploader',
    }
    mininovaId = getId(mininovaId)
    torrent = dict()
    torrent[u'id'] = mininovaId
    torrent[u'domain'] = 'mininova.org'
    torrent[u'comment_link'] = "http://www.mininova.org/tor/%s" % mininovaId
    torrent[u'torrent_link'] = "http://www.mininova.org/get/%s" % mininovaId
    torrent[u'details_link'] = "http://www.mininova.org/det/%s" % mininovaId

    data = getUrlUnicode(torrent['comment_link']) + getUrlUnicode(torrent['details_link'])
    if '<h1>Torrent not found...</h1>' in data:
        return None

    for d in re.compile('<p>.<strong>(.*?):</strong>(.*?)</p>', re.DOTALL).findall(data):
        key = d[0].lower().strip()
        key = _key_map.get(key, key)
        value = decodeHtml(stripTags(d[1].strip()))
        torrent[key] = value

    torrent[u'title'] = findRe(data, '<title>(.*?):.*?</title>')
    torrent[u'imdbId'] = findRe(data, 'title/tt(\d{7})')
    torrent[u'description'] = findRe(data, '<div id="description">(.*?)</div>')
    if torrent['description']:
        torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()
    t = getUrl(torrent[u'torrent_link'])
    torrent[u'torrent_info'] = getTorrentInfo(t)
    return torrent

class Mininova(Torrent):
    '''
    >>> Mininova('123')
    {}
    >>> Mininova('1072195')['infohash']
    '72dfa59d2338e4a48c78cec9de25964cddb64104'
    '''
    def __init__(self, mininovaId):
        self.data = getData(mininovaId)
        if not self.data:
            return
        Torrent.__init__(self)
        ratio = self.data['share ratio'].split(',')
        self['seeder'] = -1
        self['leecher'] = -1
        if len(ratio) == 2:
            val = intValue(ratio[0].replace(',','').strip())
            if val:
                self['seeder'] = int(val)
            val = intValue(ratio[1].replace(',','').strip())
            if val:
                self['leecher'] = int(val)
        val = intValue(self.data['downloads'].replace(',','').strip())
        if val:
            self['downloaded'] = int(val)
        else:
            self['downloaded'] = -1
        published =  self.data['added on']
        published = published.split(' +')[0]
        self['published'] =  datetime.strptime(published, "%a, %d %b %Y %H:%M:%S")
welcome back TPB and Mininova, both with getData(id), findMovieByImdb(imdbId) and findMovie(query) [you need BitTornado installed] 2008-05-04 15:05:41 +00:00			`# -- coding: utf-8 --`
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 09:47:02 +00:00			`# vi:si:et:sw=4:sts=4:ts=4`
introducing Torrent dict, torrent info abstraction dict class 2008-05-05 11:09:29 +00:00			`from datetime import datetime`
welcome back TPB and Mininova, both with getData(id), findMovieByImdb(imdbId) and findMovie(query) [you need BitTornado installed] 2008-05-04 15:05:41 +00:00			`import re`
			`import socket`
			`from urllib import quote`

			`from oxutils.cache import getUrl, getUrlUnicode`
findRegexp -> findRe, update setup.py 2008-05-07 09:45:00 +00:00			`from oxutils import findRe, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines`
welcome back TPB and Mininova, both with getData(id), findMovieByImdb(imdbId) and findMovie(query) [you need BitTornado installed] 2008-05-04 15:05:41 +00:00			`from oxutils.normalize import normalizeImdbId`
check if torrent site still exists 2008-05-25 10:04:13 +00:00			`import oxutils`
welcome back TPB and Mininova, both with getData(id), findMovieByImdb(imdbId) and findMovie(query) [you need BitTornado installed] 2008-05-04 15:05:41 +00:00
introducing Torrent dict, torrent info abstraction dict class 2008-05-05 11:09:29 +00:00			`from torrent import Torrent`
welcome back TPB and Mininova, both with getData(id), findMovieByImdb(imdbId) and findMovie(query) [you need BitTornado installed] 2008-05-04 15:05:41 +00:00

			`def _parseResultsPage(data, max_results=10):`
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 09:47:02 +00:00			`results=[]`
			`regexp = '''<tr><td>(.?)</td><td>(.?)<a href="/tor/(.?)">(.?)</a>.?</td>.?</tr>'''`
			`for row in re.compile(regexp, re.DOTALL).findall(data):`
			`torrentDate = row[0]`
			`torrentExtra = row[1]`
			`torrentId = row[2]`
			`torrentTitle = decodeHtml(row[3]).strip()`
			`torrentLink = "http://www.mininova.org/tor/" + torrentId`
			`privateTracker = 'priv.gif' in torrentExtra`
			`if not privateTracker:`
			`results.append((torrentTitle, torrentLink, ''))`
			`return results`
welcome back TPB and Mininova, both with getData(id), findMovieByImdb(imdbId) and findMovie(query) [you need BitTornado installed] 2008-05-04 15:05:41 +00:00
			`def findMovie(query, max_results=10):`
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 09:47:02 +00:00			`'''search for torrents on mininova`
			`'''`
			`url = "http://www.mininova.org/search/%s/seeds" % quote(query)`
			`data = getUrlUnicode(url)`
			`return _parseResultsPage(data, max_results)`
welcome back TPB and Mininova, both with getData(id), findMovieByImdb(imdbId) and findMovie(query) [you need BitTornado installed] 2008-05-04 15:05:41 +00:00
			`def findMovieByImdb(imdbId):`
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 09:47:02 +00:00			`'''find torrents on mininova for a given imdb id`
			`'''`
			`results = []`
			`imdbId = normalizeImdbId(imdbId)`
			`data = getUrlUnicode("http://www.mininova.org/imdb/?imdb=%s" % imdbId)`
			`return _parseResultsPage(data)`
welcome back TPB and Mininova, both with getData(id), findMovieByImdb(imdbId) and findMovie(query) [you need BitTornado installed] 2008-05-04 15:05:41 +00:00
			`def getId(mininovaId):`
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 09:47:02 +00:00			`mininovaId = unicode(mininovaId)`
			`d = findRe(mininovaId, "/(\d+)")`
			`if d:`
			`return d`
			`mininovaId = mininovaId.split('/')`
			`if len(mininovaId) == 1:`
			`return mininovaId[0]`
			`else:`
			`return mininovaId[-1]`
welcome back TPB and Mininova, both with getData(id), findMovieByImdb(imdbId) and findMovie(query) [you need BitTornado installed] 2008-05-04 15:05:41 +00:00
check if torrent site still exists 2008-05-25 10:04:13 +00:00			`def exists(mininovaId):`
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 09:47:02 +00:00			`mininovaId = getId(mininovaId)`
			`data = oxutils.net.getUrl("http://www.mininova.org/tor/%s" % mininovaId)`
			`if not data or 'Torrent not found...' in data:`
			`return False`
			`if 'tracker</a> of this torrent requires registration.' in data:`
			`return False`
			`return True`
check if torrent site still exists 2008-05-25 10:04:13 +00:00
welcome back TPB and Mininova, both with getData(id), findMovieByImdb(imdbId) and findMovie(query) [you need BitTornado installed] 2008-05-04 15:05:41 +00:00			`def getData(mininovaId):`
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 09:47:02 +00:00			`_key_map = {`
			`'by': u'uploader',`
			`}`
			`mininovaId = getId(mininovaId)`
			`torrent = dict()`
			`torrent[u'id'] = mininovaId`
			`torrent[u'domain'] = 'mininova.org'`
			`torrent[u'comment_link'] = "http://www.mininova.org/tor/%s" % mininovaId`
			`torrent[u'torrent_link'] = "http://www.mininova.org/get/%s" % mininovaId`
			`torrent[u'details_link'] = "http://www.mininova.org/det/%s" % mininovaId`
welcome back TPB and Mininova, both with getData(id), findMovieByImdb(imdbId) and findMovie(query) [you need BitTornado installed] 2008-05-04 15:05:41 +00:00
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 09:47:02 +00:00			`data = getUrlUnicode(torrent['comment_link']) + getUrlUnicode(torrent['details_link'])`
			`if '<h1>Torrent not found...</h1>' in data:`
			`return None`
welcome back TPB and Mininova, both with getData(id), findMovieByImdb(imdbId) and findMovie(query) [you need BitTornado installed] 2008-05-04 15:05:41 +00:00
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 09:47:02 +00:00			`for d in re.compile('<p>.<strong>(.?):</strong>(.?)</p>', re.DOTALL).findall(data):`
			`key = d[0].lower().strip()`
			`key = _key_map.get(key, key)`
			`value = decodeHtml(stripTags(d[1].strip()))`
			`torrent[key] = value`
welcome back TPB and Mininova, both with getData(id), findMovieByImdb(imdbId) and findMovie(query) [you need BitTornado installed] 2008-05-04 15:05:41 +00:00
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 09:47:02 +00:00			`torrent[u'title'] = findRe(data, '<title>(.?):.?</title>')`
			`torrent[u'imdbId'] = findRe(data, 'title/tt(\d{7})')`
			`torrent[u'description'] = findRe(data, '<div id="description">(.*?)</div>')`
			`if torrent['description']:`
			`torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()`
			`t = getUrl(torrent[u'torrent_link'])`
			`torrent[u'torrent_info'] = getTorrentInfo(t)`
			`return torrent`
welcome back TPB and Mininova, both with getData(id), findMovieByImdb(imdbId) and findMovie(query) [you need BitTornado installed] 2008-05-04 15:05:41 +00:00
introducing Torrent dict, torrent info abstraction dict class 2008-05-05 11:09:29 +00:00			`class Mininova(Torrent):`
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 09:47:02 +00:00			`'''`
			`>>> Mininova('123')`
			`{}`
			`>>> Mininova('1072195')['infohash']`
			`'72dfa59d2338e4a48c78cec9de25964cddb64104'`
			`'''`
			`def __init__(self, mininovaId):`
			`self.data = getData(mininovaId)`
			`if not self.data:`
			`return`
			`Torrent.__init__(self)`
			`ratio = self.data['share ratio'].split(',')`
			`self['seeder'] = -1`
			`self['leecher'] = -1`
			`if len(ratio) == 2:`
			`val = intValue(ratio[0].replace(',','').strip())`
			`if val:`
			`self['seeder'] = int(val)`
			`val = intValue(ratio[1].replace(',','').strip())`
			`if val:`
			`self['leecher'] = int(val)`
			`val = intValue(self.data['downloads'].replace(',','').strip())`
			`if val:`
			`self['downloaded'] = int(val)`
			`else:`
			`self['downloaded'] = -1`
			`published = self.data['added on']`
			`published = published.split(' +')[0]`
			`self['published'] = datetime.strptime(published, "%a, %d %b %Y %H:%M:%S")`
introducing Torrent dict, torrent info abstraction dict class 2008-05-05 11:09:29 +00:00