scrapeit/scrapeit/rottentomatoes.py

38 lines
1004 B
Python

# -*- Mode: Python; -*-
# -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2
from urllib import quote
import re
from BeautifulSoup import BeautifulSoup
from utils import read_url
def getRottenTomatoes(rating = 70):
'''
Get movie TITLES
rated ABOVE 70 or value passed as first argument
from RottenTomatoes
'''
movies = []
offset = 0
titles = ['1']
while titles:
url = "http://www.rottentomatoes.com/browser.php?movietype=1&genre=&tomatometer=&avgrating=%s&numreviews=10&mpaa=&x=56&y=10&start_index=%d" % (rating, offset)
page = read_url(url)
soup = BeautifulSoup(page)
titles = [link.contents[0] for link in soup.findAll('a', {'class': 'movie-link'})]
data = str(soup)
ratings = re.compile('<span class="bold">(.*?) %</span>').findall(data)
ratings = ratings[len(ratings)- len(titles):]
for title in titles:
movies.append({'title': title, 'rating': ratings[titles.index(title)], 'torrent': ''})
offset += 10
return movies