parse external reviews
This commit is contained in:
parent
afe518fc7d
commit
7bb7cf8beb
1 changed files with 21 additions and 2 deletions
|
@ -107,6 +107,8 @@ class IMDb:
|
||||||
self.triviaUrl = "%strivia" % self.pageUrl
|
self.triviaUrl = "%strivia" % self.pageUrl
|
||||||
self.locationSource = None
|
self.locationSource = None
|
||||||
self.locationUrl = "%slocations" % self.pageUrl
|
self.locationUrl = "%slocations" % self.pageUrl
|
||||||
|
self.externalreviewsSource = None
|
||||||
|
self.externalreviewsUrl = "%sexternalreviews" % self.pageUrl
|
||||||
|
|
||||||
def getPage(self, forcereload = False):
|
def getPage(self, forcereload = False):
|
||||||
if forcereload or not self.pageSource:
|
if forcereload or not self.pageSource:
|
||||||
|
@ -269,6 +271,7 @@ class IMDb:
|
||||||
IMDbDict['locations'] = self.parseLocations()
|
IMDbDict['locations'] = self.parseLocations()
|
||||||
IMDbDict['release_date'] = self.parseReleaseinfo()
|
IMDbDict['release_date'] = self.parseReleaseinfo()
|
||||||
IMDbDict['business'] = self.parseBusiness()
|
IMDbDict['business'] = self.parseBusiness()
|
||||||
|
IMDbDict['reviews'] = self.parseExternalreviews()
|
||||||
self.IMDbDict = IMDbDict
|
self.IMDbDict = IMDbDict
|
||||||
|
|
||||||
if IMDbDict['episode_of']:
|
if IMDbDict['episode_of']:
|
||||||
|
@ -458,6 +461,22 @@ class IMDb:
|
||||||
business['profit'] = business['gross'] - business['budget']
|
business['profit'] = business['gross'] - business['budget']
|
||||||
return business
|
return business
|
||||||
|
|
||||||
|
def getExternalreviews(self, forcereload = False):
|
||||||
|
if forcereload or not self.externalreviewsSource:
|
||||||
|
self.externalreviewsSource = read_url_utf8(self.externalreviewsUrl)
|
||||||
|
return self.externalreviewsSource
|
||||||
|
|
||||||
|
def parseExternalreviews(self):
|
||||||
|
soup = BeautifulSoup(self.getExternalreviews())
|
||||||
|
ol = soup('ol')
|
||||||
|
if ol:
|
||||||
|
ol = ol[0]
|
||||||
|
ret = {}
|
||||||
|
for li in ol('li'):
|
||||||
|
ret[li('a')[0].get('href')] = li('a')[0].contents[0]
|
||||||
|
return ret
|
||||||
|
return {}
|
||||||
|
|
||||||
def guess(title, director=''):
|
def guess(title, director=''):
|
||||||
#FIXME: proper file -> title
|
#FIXME: proper file -> title
|
||||||
title = title.split('-')[0]
|
title = title.split('-')[0]
|
||||||
|
|
Loading…
Reference in a new issue