parse external reviews

This commit is contained in:
j 2007-09-03 16:56:10 +00:00
parent afe518fc7d
commit 7bb7cf8beb
1 changed files with 21 additions and 2 deletions

View File

@ -107,6 +107,8 @@ class IMDb:
self.triviaUrl = "%strivia" % self.pageUrl
self.locationSource = None
self.locationUrl = "%slocations" % self.pageUrl
self.externalreviewsSource = None
self.externalreviewsUrl = "%sexternalreviews" % self.pageUrl
def getPage(self, forcereload = False):
if forcereload or not self.pageSource:
@ -269,6 +271,7 @@ class IMDb:
IMDbDict['locations'] = self.parseLocations()
IMDbDict['release_date'] = self.parseReleaseinfo()
IMDbDict['business'] = self.parseBusiness()
IMDbDict['reviews'] = self.parseExternalreviews()
self.IMDbDict = IMDbDict
if IMDbDict['episode_of']:
@ -458,6 +461,22 @@ class IMDb:
business['profit'] = business['gross'] - business['budget']
return business
def getExternalreviews(self, forcereload = False):
if forcereload or not self.externalreviewsSource:
self.externalreviewsSource = read_url_utf8(self.externalreviewsUrl)
return self.externalreviewsSource
def parseExternalreviews(self):
soup = BeautifulSoup(self.getExternalreviews())
ol = soup('ol')
if ol:
ol = ol[0]
ret = {}
for li in ol('li'):
ret[li('a')[0].get('href')] = li('a')[0].contents[0]
return ret
return {}
def guess(title, director=''):
#FIXME: proper file -> title
title = title.split('-')[0]