diff --git a/ox/imdb.py b/ox/imdb.py index 38972b1..70983f9 100644 --- a/ox/imdb.py +++ b/ox/imdb.py @@ -56,11 +56,11 @@ def getRawMovieData(imdbId): data['media'] = {} data['media']['images'] = getMovieImages(imdbId) data['media']['trailers'] = getMovieTrailers(imdbId) + data['plotsummary'] = getMoviePlot(imdbId) return data def getMovieInfo(imdbId): data = getUrlUnicode(getUrlBase(imdbId)) - soup = BeautifulSoup(data) info = dict() info['poster'] = findRe(data, 'name="poster".*?(.*?)') + print html_title if not html_title: - html_title = soup('title') + html_title = findRe(data, '(.*?)') if html_title: - html_title = unicode(html_title[0]) html_title = html_title.replace('
', ' ').replace(' ', ' ') title = decodeHtml(html_title) title = stripTags(title) @@ -192,6 +192,12 @@ def getMovieQuotes(imdbId): quotes = [(q[0].strip(),q[1].strip()) for q in quotes] return quotes +def getMoviePlot(imdbId): + url = "%s/plotsummary" % getUrlBase(imdbId) + data = getUrlUnicode(url) + plot = findRe(data, '

(.*?)') + return plot + def getMovieTechnical(imdbId): url = "%s/technical" % getUrlBase(imdbId) data = getUrlUnicode(url) @@ -316,7 +322,6 @@ class IMDb: self.businessUrl = "%sbusiness" % self.pageUrl self.creditsUrl = "%sfullcredits" % self.pageUrl self.episodesUrl = "%sepisodes" % self.pageUrl - self.plotUrl = "%splotsummary" % self.pageUrl self.releaseinfoUrl = "%sreleaseinfo" % self.pageUrl def getPage(self): @@ -461,7 +466,7 @@ class IMDb: else: IMDbDict['tvshow'] = False IMDbDict['credits'] = self.getCredits() - IMDbDict['plot'] = self.parsePlot() + IMDbDict['plot'] = getMoviePlot(self.imdb) IMDbDict['keywords'] = getMovieKeywords(self.imdb) IMDbDict['trivia'] = getMovieTrivia(self.imdb) @@ -496,18 +501,6 @@ class IMDb: self.credits = credits return self.credits - def parsePlot(self): - data = getUrlUnicode(self.plotUrl) - soup = BeautifulSoup(data) - plot = soup('p', {'class':'plotpar'}) - if plot: - plot = unicode(plot[0]).split('')[0] - else: - plot = u'' - plot = stripTags(plot).strip() - self.plot = plot - return plot - def parseEpisodes(self): episodes = {} data = getUrlUnicode(self.episodesUrl)