34 lines
No EOL
1,018 B
Python
34 lines
No EOL
1,018 B
Python
# -*- Mode: Python; -*-
|
|
# -*- coding: utf-8 -*-
|
|
# vi:si:et:sw=2:sts=2:ts=2
|
|
|
|
import re
|
|
from urllib import quote
|
|
from BeautifulSoup import BeautifulSoup
|
|
|
|
from utils import read_url, read_url_utf8, stripTags
|
|
|
|
def getGoogleMovieId(title):
|
|
url = 'http://google.com/movies?q=%s&btnG=Search+Movies' % quote(title)
|
|
data = read_url(url)
|
|
cids = re.compile('reviews\?cid=(.*?)&').findall(data)
|
|
if cids:
|
|
return cids[0]
|
|
return ''
|
|
|
|
def getGoogleMovieData(title, year = None, cid = None):
|
|
gdata = {
|
|
'title': title,
|
|
'year': year,
|
|
'cid': cid,
|
|
'rating': '',
|
|
}
|
|
if not cid:
|
|
cid = getGoogleMovieId("%s (%s)" % (title, year))
|
|
if cid:
|
|
gdata['cid'] = cid
|
|
data = read_url('http://www.google.com/movies/reviews?cid=%s' % cid)
|
|
gdata['rating'] = re.compile('font size=.3><b><nobr>(.*?) / 5').findall(data)[0]
|
|
gdata['reviews'] = re.compile('Based on (.*?) reviews').findall(data)[0]
|
|
gdata['year'] = re.compile("<title>.*?\((.*?)\).*?</title").findall(data)[0]
|
|
return gdata |