add some sites, fix some bugs

This commit is contained in:
j 2008-07-29 19:04:23 +02:00
parent 1101c6bec6
commit 7c90d07438
7 changed files with 86 additions and 10 deletions

View file

@ -5,4 +5,5 @@ __version__ = '0.1.0'
import imdb import imdb
import wikipedia import wikipedia
import google import google
import piratecinema
import oxdb

View file

@ -30,14 +30,20 @@ def getShowData(url):
for episode in re.compile('(\d+?)\..*?(\d+?-.\d.*?) .*?(\d+?) .*?(.*?) <a target="_blank" href="(.*?)">(.*?)</a>').findall(data): for episode in re.compile('(\d+?)\..*?(\d+?-.\d.*?) .*?(\d+?) .*?(.*?) <a target="_blank" href="(.*?)">(.*?)</a>').findall(data):
air_date = episode[3].strip() air_date = episode[3].strip()
#'22 Sep 04' -> 2004-09-22 #'22 Sep 04' -> 2004-09-22
air_date = time.strftime('%Y-%m-%d', time.strptime(air_date, '%d %b %y')) try:
air_date = time.strftime('%Y-%m-%d', time.strptime(air_date, '%d %b %y'))
except:
pass
s = episode[1].split('-')[0].strip() s = episode[1].split('-')[0].strip()
e = episode[1].split('-')[-1].strip() e = episode[1].split('-')[-1].strip()
r['episodes']['S%02dE%02d' % (int(s), int(e))] = { try:
'prod code': episode[2], r['episodes']['S%02dE%02d' % (int(s), int(e))] = {
'air date': air_date, 'prod code': episode[2],
'url': episode[4], 'air date': air_date,
'title':episode[5], 'url': episode[4],
} 'title':episode[5],
}
except:
print "oxweb.epguides failed,", url
return r return r

View file

@ -81,6 +81,8 @@ def getMovieInfo(imdbId):
txt = [cleanUp(k) for k in txt.split(', ')] txt = [cleanUp(k) for k in txt.split(', ')]
elif title in ('country', 'language'): elif title in ('country', 'language'):
txt = [cleanUp(txt), ] txt = [cleanUp(txt), ]
if title == 'tv series':
info['series_imdb'] = findRe(i[1], 'tt(\d{7})')
if title == 'original air date': if title == 'original air date':
info['series_episode_info'] = txt.split('\n')[-1].strip() info['series_episode_info'] = txt.split('\n')[-1].strip()
txt = txt.split('\n')[0].strip() txt = txt.split('\n')[0].strip()
@ -646,7 +648,7 @@ def guess(title, director=''):
search = 'site:imdb.com "%s"' % title search = 'site:imdb.com "%s"' % title
for (name, url, desc) in google.find(search, 2): for (name, url, desc) in google.find(search, 2):
if url.startswith('http://www.imdb.com/title/tt'): if url.startswith('http://www.imdb.com/title/tt'):
return url[28:35] return normalizeImdbId(int(oxlib.intValue(url)))
try: try:
req = urllib2.Request(imdb_url, None, oxlib.net.DEFAULT_HEADERS) req = urllib2.Request(imdb_url, None, oxlib.net.DEFAULT_HEADERS)

45
oxweb/metacritic.py Normal file
View file

@ -0,0 +1,45 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import re
from urllib import quote
from oxlib.cache import getUrl, getUrlUnicode
from oxlib import findRe, decodeHtml, stripTags
def getMetacriticShowUrl(title):
title = quote(title)
url = "http://www.metacritic.com/search/process?ty=6&ts=%s&tfs=tvshow_title&x=0&y=0&sb=0&release_date_s=&release_date_e=&metascore_s=&metascore_e=" % title
data = getUrl(url)
return findRe(data, '(http://www.metacritic.com/tv/shows/.*?)\?')
def getData(title, url=None):
if not url:
url = getMetacriticShowUrl(title)
if not url:
return None
data = getUrlUnicode(url)
score = findRe(data, 'ALT="Metascore: (.*?)"')
if score:
score = int(score)
else:
score = -1
reviews = re.compile(
'<div class="scoreandreview"><div class="criticscore">(.*?)</div>'
'.*?<span class="publication">(.*?)</span>'
'.*?<span class="criticname">(.*?)</span></div>'
'.*?<div class="quote">(.*?)<br>'
'.*?<a href="(.*?)" ', re.DOTALL).findall(data)
metacritics = []
for review in reviews:
metacritics.append({
'score': int(review[0]),
'publication':review[1],
'critic':decodeHtml(review[2]),
'quote': stripTags(review[3]).strip(),
'link': review[4],
})
return dict(score = score, critics = metacritics, url = url)

10
oxweb/oxdb.py Normal file
View file

@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import oxlib.cache
def getPosterUrl(id):
url = "http://0xdb.org/%s/poster.0xdb.jpg" % id
if oxlib.cache.exists(url):
return url
return ''

12
oxweb/piratecinema.py Normal file
View file

@ -0,0 +1,12 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import oxlib.cache
from oxlib.cache import exists
def getPosterUrl(id):
url = "http://piratecinema.org/posters/%s/%s.jpg" % (id[:4], id)
if oxlib.cache.exists(url):
return url
return ''

View file

@ -16,7 +16,7 @@ def getEpisodeData(url):
''' '''
data = getUrlUnicode(url) data = getUrlUnicode(url)
r = {} r = {}
r['description'] = findRe(data, 'div id="main-col">.*?<div>(.*?)</div').split('\r')[0] r['description'] = stripTags(findRe(data, 'div id="main-col">.*?<div>(.*?)</div').split('\r')[0])
r['show'] = findRe(data, '<h1>(.*?)</h1>') r['show'] = findRe(data, '<h1>(.*?)</h1>')
r['title'] = findRe(data, '<title>.*?: (.*?) - TV.com </title>') r['title'] = findRe(data, '<title>.*?: (.*?) - TV.com </title>')
#episode score #episode score