diff --git a/ox/mininova.py b/ox/mininova.py new file mode 100644 index 0000000..8e26170 --- /dev/null +++ b/ox/mininova.py @@ -0,0 +1,84 @@ +# -*- Mode: Python; -*- +# -*- coding: utf-8 -*- +# vi:si:et:sw=2:sts=2:ts=2 +import re +import socket +from urllib import quote + +from oxutils.cache import getUrl, getUrlUnicode +from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo +from oxutils.normalize import normalizeImdbId + + +socket.setdefaulttimeout(10.0) + +def _parseResultsPage(data, max_results=10): + results=[] + regexp = '''(.*?)(.*?)(.*?).*?.*?''' + for row in re.compile(regexp, re.DOTALL).findall(data): + torrentDate = row[0] + torrentExtra = row[1] + torrentId = row[2] + torrentTitle = decodeHtml(row[3]).strip() + torrentLink = "http://www.mininova.org/tor/" + torrentId + privateTracker = 'priv.gif' in torrentExtra + if not privateTracker: + results.append((torrentTitle, torrentLink, '')) + return results + +def findMovie(query, max_results=10): + '''search for torrents on mininova + ''' + url = "http://www.mininova.org/search/%s/seeds" % quote(query) + data = getUrlUnicode(url) + return _parseResultsPage(data, max_results) + +def findMovieByImdb(imdbId): + '''find torrents on mininova for a given imdb id + ''' + results = [] + imdbId = normalizeImdbId(imdbId) + data = getUrlUnicode("http://www.mininova.org/imdb/?imdb=%s" % imdbId) + return _parseResultsPage(data) + +def getId(mininovaId): + d = findRegexp(mininovaId, "/(\d+)") + if d: + return d + mininovaId = mininovaId.split('/') + if len(mininovaId) == 1: + return mininovaId[0] + else: + return mininovaId[-1] + +def getData(mininovaId): + _key_map = { + 'by': u'uploader', + } + mininovaId = getId(mininovaId) + torrent = dict() + torrent[u'id'] = mininovaId + torrent[u'domain'] = 'mininova.org' + torrent[u'comment_link'] = "http://www.mininova.org/tor/%s" % mininovaId + torrent[u'torrent_link'] = "http://www.mininova.org/get/%s" % mininovaId + torrent[u'details_link'] = "http://www.mininova.org/det/%s" % mininovaId + + data = getUrlUnicode(torrent['comment_link']) + getUrlUnicode(torrent['details_link']) + if '

Torrent not found...

' in data: + return None + + for d in re.compile('

.(.*?):(.*?)

', re.DOTALL).findall(data): + key = d[0].lower().strip() + key = _key_map.get(key, key) + value = decodeHtml(stripTags(d[1].strip())) + torrent[key] = value + + torrent[u'title'] = findRegexp(data, '(.*?):.*?') + torrent[u'imdb'] = findRegexp(data, 'title/tt(\d{7})') + torrent[u'description'] = findRegexp(data, '
(.*?)
') + if torrent['description']: + torrent['description'] = decodeHtml(stripTags(torrent['description'])).strip() + t = getUrl(torrent[u'torrent_link']) + torrent[u'torrent_info'] = getTorrentInfo(t) + return torrent + diff --git a/ox/thepiratebay.py b/ox/thepiratebay.py new file mode 100644 index 0000000..468aaf7 --- /dev/null +++ b/ox/thepiratebay.py @@ -0,0 +1,93 @@ +# -*- Mode: Python; -*- +# -*- coding: utf-8 -*- +# vi:si:et:sw=2:sts=2:ts=2 + +import re +import socket +from urllib import quote, urlencode +from urllib2 import URLError + +from oxutils.cache import getUrl, getUrlUnicode +from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo +from oxutils.normalize import normalizeImdbId + + +socket.setdefaulttimeout(10.0) + +season_episode = re.compile("S..E..", re.IGNORECASE) + + +def getUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout): + headers = cache.DEFAULT_HEADERS + headers['Cookie'] = 'language=en_EN' + return cache.getUrl(url, data, headers, timeout) + +def getUrlUnicode(url): + return cache.getUrlUnicode(url, _getUrl=getUrl) + +def findMovies(query, max_results=10): + results = [] + next = ["http://thepiratebay.org/search/%s/0/3/200" % quote(query), ] + page_count = 1 + while next and page_count < 4: + page_count += 1 + url = next[0] + if not url.startswith('http'): + if not url.startswith('/'): + url = "/" + url + url = "http://thepiratebay.org" + url + data = getUrlUnicode(url) + regexp = '''(.*?).*?''' + for row in re.compile(regexp, re.DOTALL).findall(data): + torrentType = row[0] + torrentLink = "http://thepiratebay.org" + row[1] + torrentTitle = decodeHtml(row[2]) + # 201 = Movies , 202 = Movie DVDR, 205 TV Shows + if torrentType in ['201']: + results.append((torrentTitle, torrentLink, '')) + if len(results) >= max_results: + return results + next = re.compile('.*?next.gif.*?').findall(data) + return results + +def findMovieByImdb(imdb): + return findMovies("tt" + normalizeImdbId(imdb)) + +def getId(piratebayId): + if piratebayId.startswith('http://torrents.thepiratebay.org/'): + piratebayId = piratebayId.split('org/')[1] + if 'tor/' in piratebayId: + piratebayId = piratebayId.split('tor/')[1] + d = findRegexp(piratebayId, "/(\d+)") + if d: + piratebayId = d + return piratebayId + +def getData(piratebayId): + _key_map = { + 'spoken language(s)': u'language', + 'texted language(s)': u'subtitle language', + 'by': u'uploader', + } + piratebayId = getId(piratebayId) + torrent = dict() + torrent[u'id'] = piratebayId + torrent[u'domain'] = 'thepiratebay.org' + torrent[u'comment_link'] = 'http://thepiratebay.org/tor/%s' % piratebayId + + data = getUrlUnicode(torrent['comment_link']) + torrent[u'title'] = findRegexp(data, '(.*?) \(download torrent\) - TPB') + if not torrent[u'title']: + return None + torrent[u'imdb'] = findRegexp(data, 'title/tt(\d{7})') + torrent[u'torrent_link']="http://torrents.thepiratebay.org/%s/%s.torrent" % (piratebayId, quote(torrent['title'])) + for d in re.compile('dt>(.*?):.*?(.*?)', re.DOTALL).findall(data): + key = d[0].lower().strip() + key = _key_map.get(key, key) + value = decodeHtml(stripTags(d[1].strip())) + torrent[key] = value + torrent[u'description'] = decodeHtml(stripTags(findRegexp(data, '
(.*?)
'))).strip() + t = getUrl(torrent[u'torrent_link']) + torrent[u'torrent_info'] = getTorrentInfo(t) + return torrent +