scrapeit/scrapeit/rottentomatoes.py

# -*- Mode: Python; -*-
# -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2

from urllib import quote
import re

from BeautifulSoup import BeautifulSoup

from utils import read_url


def getRottenTomatoes(rating = 70):
  '''
    Get movie TITLES
    rated ABOVE 70 or value passed as first argument
    from RottenTomatoes
  '''
  movies = []
  offset = 0
  titles = ['1']
  while titles:
    url = "http://www.rottentomatoes.com/browser.php?movietype=1&genre=&tomatometer=&avgrating=%s&numreviews=10&mpaa=&x=56&y=10&start_index=%d" % (rating, offset)
    page = read_url(url)
    soup = BeautifulSoup(page)
    titles = [link.contents[0] for link in soup.findAll('a', {'class': 'movie-link'})]
    data = str(soup)
    ratings = re.compile('<span class="bold">(.*?) %</span>').findall(data)

    ratings = ratings[len(ratings)- len(titles):]

    for title in titles:
      movies.append({'title': title, 'rating': ratings[titles.index(title)], 'torrent': ''})

    offset += 10
  return movies