parse external reviews
This commit is contained in:
parent
afe518fc7d
commit
7bb7cf8beb
1 changed files with 21 additions and 2 deletions
|
@ -107,7 +107,9 @@ class IMDb:
|
|||
self.triviaUrl = "%strivia" % self.pageUrl
|
||||
self.locationSource = None
|
||||
self.locationUrl = "%slocations" % self.pageUrl
|
||||
|
||||
self.externalreviewsSource = None
|
||||
self.externalreviewsUrl = "%sexternalreviews" % self.pageUrl
|
||||
|
||||
def getPage(self, forcereload = False):
|
||||
if forcereload or not self.pageSource:
|
||||
self.pageSource = read_url_utf8(self.pageUrl)
|
||||
|
@ -269,6 +271,7 @@ class IMDb:
|
|||
IMDbDict['locations'] = self.parseLocations()
|
||||
IMDbDict['release_date'] = self.parseReleaseinfo()
|
||||
IMDbDict['business'] = self.parseBusiness()
|
||||
IMDbDict['reviews'] = self.parseExternalreviews()
|
||||
self.IMDbDict = IMDbDict
|
||||
|
||||
if IMDbDict['episode_of']:
|
||||
|
@ -437,7 +440,7 @@ class IMDb:
|
|||
if forcereload or not self.businessSource:
|
||||
self.businessSource = read_url_utf8(self.businessUrl)
|
||||
return self.businessSource
|
||||
|
||||
|
||||
def parseBusiness(self):
|
||||
soup = BeautifulSoup(self.getBusiness())
|
||||
business = {'budget': 0, 'gross': 0, 'profit': 0}
|
||||
|
@ -457,7 +460,23 @@ class IMDb:
|
|||
if business['budget'] and business['gross']:
|
||||
business['profit'] = business['gross'] - business['budget']
|
||||
return business
|
||||
|
||||
def getExternalreviews(self, forcereload = False):
|
||||
if forcereload or not self.externalreviewsSource:
|
||||
self.externalreviewsSource = read_url_utf8(self.externalreviewsUrl)
|
||||
return self.externalreviewsSource
|
||||
|
||||
def parseExternalreviews(self):
|
||||
soup = BeautifulSoup(self.getExternalreviews())
|
||||
ol = soup('ol')
|
||||
if ol:
|
||||
ol = ol[0]
|
||||
ret = {}
|
||||
for li in ol('li'):
|
||||
ret[li('a')[0].get('href')] = li('a')[0].contents[0]
|
||||
return ret
|
||||
return {}
|
||||
|
||||
def guess(title, director=''):
|
||||
#FIXME: proper file -> title
|
||||
title = title.split('-')[0]
|
||||
|
|
Loading…
Reference in a new issue