python-ox/ox/web/metacritic.py
2010-07-08 01:25:57 +02:00

45 lines
1.4 KiB
Python

# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import re
from urllib import quote
from ox.cache import readUrl, readUrlUnicode
from ox import findRe, decodeHtml, stripTags
def getMetacriticShowUrl(title):
title = quote(title)
url = "http://www.metacritic.com/search/process?ty=6&ts=%s&tfs=tvshow_title&x=0&y=0&sb=0&release_date_s=&release_date_e=&metascore_s=&metascore_e=" % title
data = readUrl(url)
return findRe(data, '(http://www.metacritic.com/tv/shows/.*?)\?')
def getData(title, url=None):
if not url:
url = getMetacriticShowUrl(title)
if not url:
return None
data = readUrlUnicode(url)
score = findRe(data, 'ALT="Metascore: (.*?)"')
if score:
score = int(score)
else:
score = -1
reviews = re.compile(
'<div class="scoreandreview"><div class="criticscore">(.*?)</div>'
'.*?<span class="publication">(.*?)</span>'
'.*?<span class="criticname">(.*?)</span></div>'
'.*?<div class="quote">(.*?)<br>'
'.*?<a href="(.*?)" ', re.DOTALL).findall(data)
metacritics = []
for review in reviews:
metacritics.append({
'score': int(review[0]),
'publication':review[1],
'critic':decodeHtml(review[2]),
'quote': stripTags(review[3]).strip(),
'link': review[4],
})
return dict(score = score, critics = metacritics, url = url)