From 54492f33a6945ddc1c6ef5f592c494dcb73155e4 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Thu, 6 Aug 2009 12:10:57 +0200 Subject: [PATCH] less BeautifulSoup --- oxweb/imdb.py | 64 +++++++++++++++++++++------------------------------ 1 file changed, 26 insertions(+), 38 deletions(-) diff --git a/oxweb/imdb.py b/oxweb/imdb.py index d02f896..5539af7 100644 --- a/oxweb/imdb.py +++ b/oxweb/imdb.py @@ -187,18 +187,21 @@ def getMovieInfo(imdbId): def getMovieRuntimeSeconds(imdbId): info = getMovieInfo(imdbId) - value = info['runtime'][0] - parsed_value = findRe(value, '(.*?) min') - parsed_value = findRe(parsed_value, '([0-9]+)') - if not parsed_value: - parsed_value = findRe(value, '(.*?) sec') + if 'runtime' in info: + value = info['runtime'][0] + parsed_value = findRe(value, '(.*?) min') parsed_value = findRe(parsed_value, '([0-9]+)') if not parsed_value: - parsed_value = 0 + parsed_value = findRe(value, '(.*?) sec') + parsed_value = findRe(parsed_value, '([0-9]+)') + if not parsed_value: + parsed_value = 0 + else: + parsed_value = int(parsed_value) else: - parsed_value = int(parsed_value) + parsed_value = int(parsed_value) * 60 else: - parsed_value = int(parsed_value) * 60 + parsed_value = -1 return parsed_value def getMoviePoster(imdbId): @@ -325,10 +328,7 @@ def getMovieCompanyCredits(imdbId): def getMovieLocations(imdbId): url = "%slocations" % getUrlBase(imdbId) data = getUrlUnicode(url) - soup = BeautifulSoup(data) - locations = [] - for key in soup('a', {'href': re.compile('^/List')}): - locations.append(decodeHtml(key.string)) + locations = re.compile('
', '').strip() - if t.startswith('
  • ') and t.endswith('
  • '): - t = t[4:-5].strip() - t=decodeHtml(t) - trivia.append(t) - return trivia + trivia = re.compile('
  • (.*?)
  • ').findall(data) + def clean(t): + t = decodeHtml(t) + t = t.replace(u'”', '"') + if t.endswith('

    '): + t = t[:-8] + return t.strip() + trivia = [clean(t) for t in trivia] + return trivia def getMovieConnections(imdbId): url = "%smovieconnections" % getUrlBase(imdbId) @@ -393,21 +391,11 @@ def getMovieKeywords(imdbId): def getMovieExternalReviews(imdbId): url = "%sexternalreviews" % getUrlBase(imdbId) data = getUrlUnicode(url) - soup = BeautifulSoup(data) - ol = soup('ol') - if ol: - ol = ol[0] - ret = {} - for li in ol('li'): - try: - a = li('a')[0] - href = a.get('href') - txt = a.contents[0] - ret[href] = txt - except: - pass - return ret - return {} + _reviews = re.compile('
  • (.*?)
  • ').findall(data) + reviews = {} + for r in _reviews: + reviews[r[0]] = r[1] + return reviews def getMovieReleaseDate(imdbId): releasedates = getMovieReleaseDates(imdbId)