From b3d3f44d206e61b68a5130f3e68d402d06e18575 Mon Sep 17 00:00:00 2001 From: j Date: Mon, 5 May 2008 13:09:29 +0200 Subject: [PATCH] introducing Torrent dict, torrent info abstraction dict class --- ox/mininova.py | 20 ++++++++++++++++++-- ox/thepiratebay.py | 16 +++++++++++++++- ox/torrent.py | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 3 deletions(-) create mode 100644 ox/torrent.py diff --git a/ox/mininova.py b/ox/mininova.py index 8e26170..fa64b6f 100644 --- a/ox/mininova.py +++ b/ox/mininova.py @@ -1,14 +1,17 @@ # -*- Mode: Python; -*- # -*- coding: utf-8 -*- # vi:si:et:sw=2:sts=2:ts=2 + +from datetime import datetime import re import socket from urllib import quote from oxutils.cache import getUrl, getUrlUnicode -from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo +from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo, intValue from oxutils.normalize import normalizeImdbId +from torrent import Torrent socket.setdefaulttimeout(10.0) @@ -74,7 +77,7 @@ def getData(mininovaId): torrent[key] = value torrent[u'title'] = findRegexp(data, '(.*?):.*?') - torrent[u'imdb'] = findRegexp(data, 'title/tt(\d{7})') + torrent[u'imdbId'] = findRegexp(data, 'title/tt(\d{7})') torrent[u'description'] = findRegexp(data, '
(.*?)
') if torrent['description']: torrent['description'] = decodeHtml(stripTags(torrent['description'])).strip() @@ -82,3 +85,16 @@ def getData(mininovaId): torrent[u'torrent_info'] = getTorrentInfo(t) return torrent + +class Mininova(Torrent): + def __init__(self, mininovaId): + self.data = getData(mininovaId) + Torrent.__init__(self) + ratio = self.data['share ratio'].split(',') + self['seeder'] = int(intValue(ratio[0].replace(',','').strip())) + self['leecher'] = int(intValue(ratio[1].replace(',','').strip())) + self['downloaded'] = int(intValue(self.data['downloads'].replace(',','').strip())) + published = self.data['added on'] + published = published.split(' +')[0] + self['published'] = datetime.strptime(published, "%a, %d %b %Y %H:%M:%S") + diff --git a/ox/thepiratebay.py b/ox/thepiratebay.py index 468aaf7..1d85f73 100644 --- a/ox/thepiratebay.py +++ b/ox/thepiratebay.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- # vi:si:et:sw=2:sts=2:ts=2 +from datetime import datetime import re import socket from urllib import quote, urlencode @@ -11,6 +12,7 @@ from oxutils.cache import getUrl, getUrlUnicode from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo from oxutils.normalize import normalizeImdbId +from torrent import Torrent socket.setdefaulttimeout(10.0) @@ -68,6 +70,8 @@ def getData(piratebayId): 'spoken language(s)': u'language', 'texted language(s)': u'subtitle language', 'by': u'uploader', + 'leechers': 'leecher', + 'seeders': 'seeder', } piratebayId = getId(piratebayId) torrent = dict() @@ -79,7 +83,8 @@ def getData(piratebayId): torrent[u'title'] = findRegexp(data, '(.*?) \(download torrent\) - TPB') if not torrent[u'title']: return None - torrent[u'imdb'] = findRegexp(data, 'title/tt(\d{7})') + torrent[u'title'] = decodeHtml(torrent[u'title']).strip() + torrent[u'imdbId'] = findRegexp(data, 'title/tt(\d{7})') torrent[u'torrent_link']="http://torrents.thepiratebay.org/%s/%s.torrent" % (piratebayId, quote(torrent['title'])) for d in re.compile('dt>(.*?):.*?(.*?)', re.DOTALL).findall(data): key = d[0].lower().strip() @@ -91,3 +96,12 @@ def getData(piratebayId): torrent[u'torrent_info'] = getTorrentInfo(t) return torrent + +class Thepiratebay(Torrent): + def __init__(self, piratebayId): + self.data = getData(piratebayId) + Torrent.__init__(self) + published = self.data['uploaded'] + published = published.replace(' GMT', '').split(' +')[0] + self['published'] = datetime.strptime(published, "%Y-%m-%d %H:%M:%S") + diff --git a/ox/torrent.py b/ox/torrent.py new file mode 100644 index 0000000..21e3df1 --- /dev/null +++ b/ox/torrent.py @@ -0,0 +1,35 @@ +# -*- Mode: Python; -*- +# -*- coding: utf-8 -*- +# vi:si:et:sw=2:sts=2:ts=2 + +from oxutils import intValue + + +class Torrent(dict): + _string_keys = ('id', 'title', 'description', 'infohash', 'torrent_link', 'comment_link', + 'imdbId', 'announce', 'domain', 'published', 'language', 'subtitle language') + _int_keys = ('size', 'seeder', 'leecher', 'downloaded', 'files') + _dict_keys = ('torrent_info', ) + _list_keys = () + data = {} + + def __init__(self): + for key in self._string_keys: + self[key] = self.data.get(key, u'') + for key in self._dict_keys: + self[key] = self.data.get(key, {}) + for key in self._list_keys: + self[key] = self.data.get(key, []) + for key in self._int_keys: + value = self.data.get(key, -1) + if not isinstance(value, int): + value = int(intValue(value)) + self[key] = value + self['infohash'] = self.data['torrent_info']['hash'] + self['size'] = self.data['torrent_info']['size'] + self['announce'] = self.data['torrent_info']['announce'] + if 'files' in self.data['torrent_info']: + self['files'] = len(self.data['torrent_info']['files']) + else: + self['files'] = 1 +