# -*- Mode: Python; -*- # -*- coding: utf-8 -*- # vi:si:et:sw=2:sts=2:ts=2 import re from urllib import quote from BeautifulSoup import BeautifulSoup from utils import read_url, read_url_utf8, stripTags def getGoogleMovieId(title): url = 'http://google.com/movies?q=%s&btnG=Search+Movies' % quote(title) data = read_url(url) cids = re.compile('reviews\?cid=(.*?)&').findall(data) if cids: return cids[0] return '' def getGoogleMovieData(title, year = None, cid = None): gdata = { 'title': title, 'year': year, 'cid': cid, 'rating': '', } if not cid: cid = getGoogleMovieId("%s (%s)" % (title, year)) if cid: gdata['cid'] = cid data = read_url('http://www.google.com/movies/reviews?cid=%s' % cid) gdata['rating'] = re.compile('font size=.3>(.*?) / 5').findall(data)[0] gdata['reviews'] = re.compile('Based on (.*?) reviews').findall(data)[0] gdata['year'] = re.compile(".*?\((.*?)\).*?</title").findall(data)[0] return gdata