parse external reviews
This commit is contained in:
parent
afe518fc7d
commit
7bb7cf8beb
1 changed files with 21 additions and 2 deletions
|
@ -107,6 +107,8 @@ class IMDb:
|
|||
self.triviaUrl = "%strivia" % self.pageUrl
|
||||
self.locationSource = None
|
||||
self.locationUrl = "%slocations" % self.pageUrl
|
||||
self.externalreviewsSource = None
|
||||
self.externalreviewsUrl = "%sexternalreviews" % self.pageUrl
|
||||
|
||||
def getPage(self, forcereload = False):
|
||||
if forcereload or not self.pageSource:
|
||||
|
@ -269,6 +271,7 @@ class IMDb:
|
|||
IMDbDict['locations'] = self.parseLocations()
|
||||
IMDbDict['release_date'] = self.parseReleaseinfo()
|
||||
IMDbDict['business'] = self.parseBusiness()
|
||||
IMDbDict['reviews'] = self.parseExternalreviews()
|
||||
self.IMDbDict = IMDbDict
|
||||
|
||||
if IMDbDict['episode_of']:
|
||||
|
@ -458,6 +461,22 @@ class IMDb:
|
|||
business['profit'] = business['gross'] - business['budget']
|
||||
return business
|
||||
|
||||
def getExternalreviews(self, forcereload = False):
|
||||
if forcereload or not self.externalreviewsSource:
|
||||
self.externalreviewsSource = read_url_utf8(self.externalreviewsUrl)
|
||||
return self.externalreviewsSource
|
||||
|
||||
def parseExternalreviews(self):
|
||||
soup = BeautifulSoup(self.getExternalreviews())
|
||||
ol = soup('ol')
|
||||
if ol:
|
||||
ol = ol[0]
|
||||
ret = {}
|
||||
for li in ol('li'):
|
||||
ret[li('a')[0].get('href')] = li('a')[0].contents[0]
|
||||
return ret
|
||||
return {}
|
||||
|
||||
def guess(title, director=''):
|
||||
#FIXME: proper file -> title
|
||||
title = title.split('-')[0]
|
||||
|
|
Loading…
Reference in a new issue