parse external reviews
This commit is contained in:
parent
afe518fc7d
commit
7bb7cf8beb
1 changed files with 21 additions and 2 deletions
|
@ -107,7 +107,9 @@ class IMDb:
|
||||||
self.triviaUrl = "%strivia" % self.pageUrl
|
self.triviaUrl = "%strivia" % self.pageUrl
|
||||||
self.locationSource = None
|
self.locationSource = None
|
||||||
self.locationUrl = "%slocations" % self.pageUrl
|
self.locationUrl = "%slocations" % self.pageUrl
|
||||||
|
self.externalreviewsSource = None
|
||||||
|
self.externalreviewsUrl = "%sexternalreviews" % self.pageUrl
|
||||||
|
|
||||||
def getPage(self, forcereload = False):
|
def getPage(self, forcereload = False):
|
||||||
if forcereload or not self.pageSource:
|
if forcereload or not self.pageSource:
|
||||||
self.pageSource = read_url_utf8(self.pageUrl)
|
self.pageSource = read_url_utf8(self.pageUrl)
|
||||||
|
@ -269,6 +271,7 @@ class IMDb:
|
||||||
IMDbDict['locations'] = self.parseLocations()
|
IMDbDict['locations'] = self.parseLocations()
|
||||||
IMDbDict['release_date'] = self.parseReleaseinfo()
|
IMDbDict['release_date'] = self.parseReleaseinfo()
|
||||||
IMDbDict['business'] = self.parseBusiness()
|
IMDbDict['business'] = self.parseBusiness()
|
||||||
|
IMDbDict['reviews'] = self.parseExternalreviews()
|
||||||
self.IMDbDict = IMDbDict
|
self.IMDbDict = IMDbDict
|
||||||
|
|
||||||
if IMDbDict['episode_of']:
|
if IMDbDict['episode_of']:
|
||||||
|
@ -437,7 +440,7 @@ class IMDb:
|
||||||
if forcereload or not self.businessSource:
|
if forcereload or not self.businessSource:
|
||||||
self.businessSource = read_url_utf8(self.businessUrl)
|
self.businessSource = read_url_utf8(self.businessUrl)
|
||||||
return self.businessSource
|
return self.businessSource
|
||||||
|
|
||||||
def parseBusiness(self):
|
def parseBusiness(self):
|
||||||
soup = BeautifulSoup(self.getBusiness())
|
soup = BeautifulSoup(self.getBusiness())
|
||||||
business = {'budget': 0, 'gross': 0, 'profit': 0}
|
business = {'budget': 0, 'gross': 0, 'profit': 0}
|
||||||
|
@ -457,7 +460,23 @@ class IMDb:
|
||||||
if business['budget'] and business['gross']:
|
if business['budget'] and business['gross']:
|
||||||
business['profit'] = business['gross'] - business['budget']
|
business['profit'] = business['gross'] - business['budget']
|
||||||
return business
|
return business
|
||||||
|
|
||||||
|
def getExternalreviews(self, forcereload = False):
|
||||||
|
if forcereload or not self.externalreviewsSource:
|
||||||
|
self.externalreviewsSource = read_url_utf8(self.externalreviewsUrl)
|
||||||
|
return self.externalreviewsSource
|
||||||
|
|
||||||
|
def parseExternalreviews(self):
|
||||||
|
soup = BeautifulSoup(self.getExternalreviews())
|
||||||
|
ol = soup('ol')
|
||||||
|
if ol:
|
||||||
|
ol = ol[0]
|
||||||
|
ret = {}
|
||||||
|
for li in ol('li'):
|
||||||
|
ret[li('a')[0].get('href')] = li('a')[0].contents[0]
|
||||||
|
return ret
|
||||||
|
return {}
|
||||||
|
|
||||||
def guess(title, director=''):
|
def guess(title, director=''):
|
||||||
#FIXME: proper file -> title
|
#FIXME: proper file -> title
|
||||||
title = title.split('-')[0]
|
title = title.split('-')[0]
|
||||||
|
|
Loading…
Reference in a new issue