60 lines
1.8 KiB
Python
60 lines
1.8 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
from utils import read_url, read_url_utf8
|
|
import re
|
|
from urllib import quote
|
|
|
|
def getMetacriticShowUrl(title):
|
|
title = quote(title)
|
|
search_url = "http://www.metacritic.com/search/process?ty=6&ts=%s&tfs=tvshow_title&x=0&y=0&sb=0&release_date_s=&release_date_e=&metascore_s=&metascore_e=" % title
|
|
search = read_url(search_url)
|
|
urls = re.compile('(http://www.metacritic.com/tv/shows/.*?)\?').findall(search)
|
|
if urls:
|
|
return urls[0]
|
|
else:
|
|
return ''
|
|
|
|
def scrapeMetacritic(title, url=None):
|
|
if not url:
|
|
url = getMetacriticShowUrl(title)
|
|
if not url:
|
|
return None
|
|
data = read_url(url)
|
|
score = re.compile('''ALT="Metascore: (.*?)"''').findall(data)
|
|
if score:
|
|
score = int(score[0])
|
|
else:
|
|
score = -1
|
|
|
|
reviews = re.compile('(<DIV CLASS="scoreandreview">.*)').findall(data.replace('\n',''))[0]
|
|
|
|
reg = '''<DIV CLASS="scoreandreview"><DIV CLASS="criticscore">(.*?)</DIV>.*?
|
|
<SPAN CLASS="publication">(.*?)
|
|
<DIV CLASS="quote">(.*?)
|
|
</DIV>.*?</DIV>.*?</DIV>
|
|
'''.replace('\n','')
|
|
reviews = re.compile(reg).findall(reviews)
|
|
|
|
metacritics = []
|
|
for r in reviews:
|
|
critic ={}
|
|
critic['score'] = int(r[0].strip())
|
|
publication = r[1].split('</SPAN>')
|
|
criticname = ''
|
|
if len(publication) > 1:
|
|
criticname = publication[1].replace('<SPAN CLASS="criticname">','').strip()
|
|
publication = publication[0]
|
|
critic['publication'] = publication
|
|
critic['critic'] = criticname
|
|
quote = r[2].split('<BR>')
|
|
link = ''
|
|
if len(quote) > 1:
|
|
link = re.compile('<A HREF="(.*?)" TARGET="_blank"><IMG SRC="/_images/readreview.gif"').findall(quote[1])[0]
|
|
quote = quote[0].strip()
|
|
critic['quote'] = quote
|
|
critic['link'] = link
|
|
metacritics.append(critic)
|
|
|
|
return dict(score = score, critics = metacritics, url = url)
|
|
|
|
|