parse external reviews

This commit is contained in:
j 2007-09-03 16:56:10 +00:00
parent afe518fc7d
commit 7bb7cf8beb

View file

@ -107,6 +107,8 @@ class IMDb:
self.triviaUrl = "%strivia" % self.pageUrl self.triviaUrl = "%strivia" % self.pageUrl
self.locationSource = None self.locationSource = None
self.locationUrl = "%slocations" % self.pageUrl self.locationUrl = "%slocations" % self.pageUrl
self.externalreviewsSource = None
self.externalreviewsUrl = "%sexternalreviews" % self.pageUrl
def getPage(self, forcereload = False): def getPage(self, forcereload = False):
if forcereload or not self.pageSource: if forcereload or not self.pageSource:
@ -269,6 +271,7 @@ class IMDb:
IMDbDict['locations'] = self.parseLocations() IMDbDict['locations'] = self.parseLocations()
IMDbDict['release_date'] = self.parseReleaseinfo() IMDbDict['release_date'] = self.parseReleaseinfo()
IMDbDict['business'] = self.parseBusiness() IMDbDict['business'] = self.parseBusiness()
IMDbDict['reviews'] = self.parseExternalreviews()
self.IMDbDict = IMDbDict self.IMDbDict = IMDbDict
if IMDbDict['episode_of']: if IMDbDict['episode_of']:
@ -458,6 +461,22 @@ class IMDb:
business['profit'] = business['gross'] - business['budget'] business['profit'] = business['gross'] - business['budget']
return business return business
def getExternalreviews(self, forcereload = False):
if forcereload or not self.externalreviewsSource:
self.externalreviewsSource = read_url_utf8(self.externalreviewsUrl)
return self.externalreviewsSource
def parseExternalreviews(self):
soup = BeautifulSoup(self.getExternalreviews())
ol = soup('ol')
if ol:
ol = ol[0]
ret = {}
for li in ol('li'):
ret[li('a')[0].get('href')] = li('a')[0].contents[0]
return ret
return {}
def guess(title, director=''): def guess(title, director=''):
#FIXME: proper file -> title #FIXME: proper file -> title
title = title.split('-')[0] title = title.split('-')[0]