From 6a1ecbe21c7914e091ec778182578001c229f341 Mon Sep 17 00:00:00 2001 From: j Date: Sun, 11 May 2008 17:07:09 +0200 Subject: [PATCH] imdb needs some decodeHtml and credits are all optional --- ox/imdb.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/ox/imdb.py b/ox/imdb.py index 8bc5b6f..a603acb 100644 --- a/ox/imdb.py +++ b/ox/imdb.py @@ -150,7 +150,7 @@ def creditList(data, section=None): credits_ = re.compile('''.*?(.*?)(.*?)''').findall(data) credits = [] for c_ in credits_: - c = [c_[0].strip(), c_[1].strip()] + c = [decodeHtml(c_[0]).strip(), decodeHtml(c_[1]).strip()] if section=='writers': c[1] = c[1].replace('
', '').strip().replace(')', '').replace('(','') if c[1].endswith(' and'): c[1] = c[1][:-4] @@ -256,6 +256,7 @@ def getMovieTrivia(imdbId): t = str(t).replace('
', '').strip() if t.startswith('
  • ') and t.endswith('
  • '): t = t[4:-5].strip() + t=decodeHtml(t) trivia.append(t) return trivia @@ -339,9 +340,13 @@ class IMDb: parsed_value = int(parsed_value) * 60 elif key in ('country', 'language'): parsed_value = value.split(' / ') + if len(parsed_value) == 1: + parsed_value = parsed_value[0].split(' | ') parsed_value = [v.strip() for v in parsed_value] elif key == 'genre': parsed_value = value.replace('more', '').strip().split(' / ') + if len(parsed_value) == 1: + parsed_value = parsed_value[0].split(' | ') parsed_value = [v.strip() for v in parsed_value] elif key == 'tagline': parsed_value = value.replace('more', '').strip() @@ -484,10 +489,10 @@ class IMDb: def getNames(creditList): return [stripTags(c[0]) for c in creditList] - credits['director'] = getNames(raw_credits['directors']) - credits['writer'] = getNames(raw_credits['writers']) - credits['producer'] = getNames(raw_credits['producers']) - credits['cast'] = [(stripTags(c[0]),stripTags(c[1])) for c in raw_credits['cast']] + credits['director'] = getNames(raw_credits.get('directors', '')) + credits['writer'] = getNames(raw_credits.get('writers', '')) + credits['producer'] = getNames(raw_credits.get('producers', '')) + credits['cast'] = [(stripTags(c[0]),stripTags(c[1])) for c in raw_credits.get('cast', [])] self.credits = credits return self.credits