From 7c90d07438876b685159f66d5cb1dd97c61901bd Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Tue, 29 Jul 2008 19:04:23 +0200 Subject: [PATCH] add some sites, fix some bugs --- oxweb/__init__.py | 3 ++- oxweb/epguides.py | 20 ++++++++++++------- oxweb/imdb.py | 4 +++- oxweb/metacritic.py | 45 +++++++++++++++++++++++++++++++++++++++++++ oxweb/oxdb.py | 10 ++++++++++ oxweb/piratecinema.py | 12 ++++++++++++ oxweb/tv.py | 2 +- 7 files changed, 86 insertions(+), 10 deletions(-) create mode 100644 oxweb/metacritic.py create mode 100644 oxweb/oxdb.py create mode 100644 oxweb/piratecinema.py diff --git a/oxweb/__init__.py b/oxweb/__init__.py index aaacdc1..ac2b910 100644 --- a/oxweb/__init__.py +++ b/oxweb/__init__.py @@ -5,4 +5,5 @@ __version__ = '0.1.0' import imdb import wikipedia import google - +import piratecinema +import oxdb diff --git a/oxweb/epguides.py b/oxweb/epguides.py index 51b8b4d..4e24b95 100644 --- a/oxweb/epguides.py +++ b/oxweb/epguides.py @@ -30,14 +30,20 @@ def getShowData(url): for episode in re.compile('(\d+?)\..*?(\d+?-.\d.*?) .*?(\d+?) .*?(.*?) (.*?)').findall(data): air_date = episode[3].strip() #'22 Sep 04' -> 2004-09-22 - air_date = time.strftime('%Y-%m-%d', time.strptime(air_date, '%d %b %y')) + try: + air_date = time.strftime('%Y-%m-%d', time.strptime(air_date, '%d %b %y')) + except: + pass s = episode[1].split('-')[0].strip() e = episode[1].split('-')[-1].strip() - r['episodes']['S%02dE%02d' % (int(s), int(e))] = { - 'prod code': episode[2], - 'air date': air_date, - 'url': episode[4], - 'title':episode[5], - } + try: + r['episodes']['S%02dE%02d' % (int(s), int(e))] = { + 'prod code': episode[2], + 'air date': air_date, + 'url': episode[4], + 'title':episode[5], + } + except: + print "oxweb.epguides failed,", url return r diff --git a/oxweb/imdb.py b/oxweb/imdb.py index 535c788..0d4f1dd 100644 --- a/oxweb/imdb.py +++ b/oxweb/imdb.py @@ -81,6 +81,8 @@ def getMovieInfo(imdbId): txt = [cleanUp(k) for k in txt.split(', ')] elif title in ('country', 'language'): txt = [cleanUp(txt), ] + if title == 'tv series': + info['series_imdb'] = findRe(i[1], 'tt(\d{7})') if title == 'original air date': info['series_episode_info'] = txt.split('\n')[-1].strip() txt = txt.split('\n')[0].strip() @@ -646,7 +648,7 @@ def guess(title, director=''): search = 'site:imdb.com "%s"' % title for (name, url, desc) in google.find(search, 2): if url.startswith('http://www.imdb.com/title/tt'): - return url[28:35] + return normalizeImdbId(int(oxlib.intValue(url))) try: req = urllib2.Request(imdb_url, None, oxlib.net.DEFAULT_HEADERS) diff --git a/oxweb/metacritic.py b/oxweb/metacritic.py new file mode 100644 index 0000000..220870c --- /dev/null +++ b/oxweb/metacritic.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +# vi:si:et:sw=4:sts=4:ts=4 +import re +from urllib import quote + +from oxlib.cache import getUrl, getUrlUnicode +from oxlib import findRe, decodeHtml, stripTags + + +def getMetacriticShowUrl(title): + title = quote(title) + url = "http://www.metacritic.com/search/process?ty=6&ts=%s&tfs=tvshow_title&x=0&y=0&sb=0&release_date_s=&release_date_e=&metascore_s=&metascore_e=" % title + data = getUrl(url) + return findRe(data, '(http://www.metacritic.com/tv/shows/.*?)\?') + +def getData(title, url=None): + if not url: + url = getMetacriticShowUrl(title) + if not url: + return None + data = getUrlUnicode(url) + score = findRe(data, 'ALT="Metascore: (.*?)"') + if score: + score = int(score) + else: + score = -1 + + reviews = re.compile( + '
(.*?)
' + '.*?(.*?)' + '.*?(.*?)
' + '.*?
(.*?)
' + '.*?.*?
(.*?).*?
(.*?)(.*?)') r['title'] = findRe(data, '.*?: (.*?) - TV.com ') #episode score