introducing Torrent dict, torrent info abstraction dict class

This commit is contained in:
j 2008-05-05 13:09:29 +02:00
parent a065f0650e
commit b3d3f44d20
3 changed files with 68 additions and 3 deletions

View file

@ -1,14 +1,17 @@
# -*- Mode: Python; -*- # -*- Mode: Python; -*-
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2 # vi:si:et:sw=2:sts=2:ts=2
from datetime import datetime
import re import re
import socket import socket
from urllib import quote from urllib import quote
from oxutils.cache import getUrl, getUrlUnicode from oxutils.cache import getUrl, getUrlUnicode
from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo, intValue
from oxutils.normalize import normalizeImdbId from oxutils.normalize import normalizeImdbId
from torrent import Torrent
socket.setdefaulttimeout(10.0) socket.setdefaulttimeout(10.0)
@ -74,7 +77,7 @@ def getData(mininovaId):
torrent[key] = value torrent[key] = value
torrent[u'title'] = findRegexp(data, '<title>(.*?):.*?</title>') torrent[u'title'] = findRegexp(data, '<title>(.*?):.*?</title>')
torrent[u'imdb'] = findRegexp(data, 'title/tt(\d{7})') torrent[u'imdbId'] = findRegexp(data, 'title/tt(\d{7})')
torrent[u'description'] = findRegexp(data, '<div id="description">(.*?)</div>') torrent[u'description'] = findRegexp(data, '<div id="description">(.*?)</div>')
if torrent['description']: if torrent['description']:
torrent['description'] = decodeHtml(stripTags(torrent['description'])).strip() torrent['description'] = decodeHtml(stripTags(torrent['description'])).strip()
@ -82,3 +85,16 @@ def getData(mininovaId):
torrent[u'torrent_info'] = getTorrentInfo(t) torrent[u'torrent_info'] = getTorrentInfo(t)
return torrent return torrent
class Mininova(Torrent):
def __init__(self, mininovaId):
self.data = getData(mininovaId)
Torrent.__init__(self)
ratio = self.data['share ratio'].split(',')
self['seeder'] = int(intValue(ratio[0].replace(',','').strip()))
self['leecher'] = int(intValue(ratio[1].replace(',','').strip()))
self['downloaded'] = int(intValue(self.data['downloads'].replace(',','').strip()))
published = self.data['added on']
published = published.split(' +')[0]
self['published'] = datetime.strptime(published, "%a, %d %b %Y %H:%M:%S")

View file

@ -2,6 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2 # vi:si:et:sw=2:sts=2:ts=2
from datetime import datetime
import re import re
import socket import socket
from urllib import quote, urlencode from urllib import quote, urlencode
@ -11,6 +12,7 @@ from oxutils.cache import getUrl, getUrlUnicode
from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo
from oxutils.normalize import normalizeImdbId from oxutils.normalize import normalizeImdbId
from torrent import Torrent
socket.setdefaulttimeout(10.0) socket.setdefaulttimeout(10.0)
@ -68,6 +70,8 @@ def getData(piratebayId):
'spoken language(s)': u'language', 'spoken language(s)': u'language',
'texted language(s)': u'subtitle language', 'texted language(s)': u'subtitle language',
'by': u'uploader', 'by': u'uploader',
'leechers': 'leecher',
'seeders': 'seeder',
} }
piratebayId = getId(piratebayId) piratebayId = getId(piratebayId)
torrent = dict() torrent = dict()
@ -79,7 +83,8 @@ def getData(piratebayId):
torrent[u'title'] = findRegexp(data, '<title>(.*?) \(download torrent\) - TPB</title>') torrent[u'title'] = findRegexp(data, '<title>(.*?) \(download torrent\) - TPB</title>')
if not torrent[u'title']: if not torrent[u'title']:
return None return None
torrent[u'imdb'] = findRegexp(data, 'title/tt(\d{7})') torrent[u'title'] = decodeHtml(torrent[u'title']).strip()
torrent[u'imdbId'] = findRegexp(data, 'title/tt(\d{7})')
torrent[u'torrent_link']="http://torrents.thepiratebay.org/%s/%s.torrent" % (piratebayId, quote(torrent['title'])) torrent[u'torrent_link']="http://torrents.thepiratebay.org/%s/%s.torrent" % (piratebayId, quote(torrent['title']))
for d in re.compile('dt>(.*?):</dt>.*?<dd.*?>(.*?)</dd>', re.DOTALL).findall(data): for d in re.compile('dt>(.*?):</dt>.*?<dd.*?>(.*?)</dd>', re.DOTALL).findall(data):
key = d[0].lower().strip() key = d[0].lower().strip()
@ -91,3 +96,12 @@ def getData(piratebayId):
torrent[u'torrent_info'] = getTorrentInfo(t) torrent[u'torrent_info'] = getTorrentInfo(t)
return torrent return torrent
class Thepiratebay(Torrent):
def __init__(self, piratebayId):
self.data = getData(piratebayId)
Torrent.__init__(self)
published = self.data['uploaded']
published = published.replace(' GMT', '').split(' +')[0]
self['published'] = datetime.strptime(published, "%Y-%m-%d %H:%M:%S")

35
ox/torrent.py Normal file
View file

@ -0,0 +1,35 @@
# -*- Mode: Python; -*-
# -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2
from oxutils import intValue
class Torrent(dict):
_string_keys = ('id', 'title', 'description', 'infohash', 'torrent_link', 'comment_link',
'imdbId', 'announce', 'domain', 'published', 'language', 'subtitle language')
_int_keys = ('size', 'seeder', 'leecher', 'downloaded', 'files')
_dict_keys = ('torrent_info', )
_list_keys = ()
data = {}
def __init__(self):
for key in self._string_keys:
self[key] = self.data.get(key, u'')
for key in self._dict_keys:
self[key] = self.data.get(key, {})
for key in self._list_keys:
self[key] = self.data.get(key, [])
for key in self._int_keys:
value = self.data.get(key, -1)
if not isinstance(value, int):
value = int(intValue(value))
self[key] = value
self['infohash'] = self.data['torrent_info']['hash']
self['size'] = self.data['torrent_info']['size']
self['announce'] = self.data['torrent_info']['announce']
if 'files' in self.data['torrent_info']:
self['files'] = len(self.data['torrent_info']['files'])
else:
self['files'] = 1