scrapeit/scrapeit/googlemovie.py

34 lines
1018 B
Python

# -*- Mode: Python; -*-
# -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2
import re
from urllib import quote
from BeautifulSoup import BeautifulSoup
from utils import read_url, read_url_utf8, stripTags
def getGoogleMovieId(title):
url = 'http://google.com/movies?q=%s&btnG=Search+Movies' % quote(title)
data = read_url(url)
cids = re.compile('reviews\?cid=(.*?)&').findall(data)
if cids:
return cids[0]
return ''
def getGoogleMovieData(title, year = None, cid = None):
gdata = {
'title': title,
'year': year,
'cid': cid,
'rating': '',
}
if not cid:
cid = getGoogleMovieId("%s (%s)" % (title, year))
if cid:
gdata['cid'] = cid
data = read_url('http://www.google.com/movies/reviews?cid=%s' % cid)
gdata['rating'] = re.compile('font size=.3><b><nobr>(.*?)&nbsp;/&nbsp;5').findall(data)[0]
gdata['reviews'] = re.compile('Based on (.*?) reviews').findall(data)[0]
gdata['year'] = re.compile("<title>.*?\((.*?)\).*?</title").findall(data)[0]
return gdata