imdb needs some decodeHtml and credits are all optional

2008-05-11 17:07:09 +02:00 · 2008-05-11 17:07:09 +02:00 · 6a1ecbe21c
commit 6a1ecbe21c
parent ceb4d21506
1 changed files with 10 additions and 5 deletions
--- a/ox/imdb.py
+++ b/ox/imdb.py
@ -150,7 +150,7 @@ def creditList(data, section=None):
    credits_ = re.compile('''<tr>.*?<td valign="top">(.*?)</td><td.*?</td><td valign="top">(.*?)</td></tr>''').findall(data)
  credits = []
  for c_ in credits_:
-    c = [c_[0].strip(), c_[1].strip()]
+    c = [decodeHtml(c_[0]).strip(), decodeHtml(c_[1]).strip()]
    if section=='writers':
      c[1] = c[1].replace('<br>', '').strip().replace(')', '').replace('(','')
      if c[1].endswith(' and'): c[1] = c[1][:-4]
@ -256,6 +256,7 @@ def getMovieTrivia(imdbId):
      t = str(t).replace('<br />', '').strip()
      if t.startswith('<li>') and t.endswith('</li>'):
        t = t[4:-5].strip()
+      t=decodeHtml(t)
      trivia.append(t)
  return trivia

@ -339,9 +340,13 @@ class IMDb:
        parsed_value = int(parsed_value) * 60
    elif key in ('country', 'language'):
      parsed_value = value.split(' / ')
+      if len(parsed_value) == 1:
+        parsed_value = parsed_value[0].split(' | ')
      parsed_value = [v.strip() for v in parsed_value]
    elif key == 'genre':
      parsed_value = value.replace('more', '').strip().split(' / ')
+      if len(parsed_value) == 1:
+        parsed_value = parsed_value[0].split(' | ')
      parsed_value = [v.strip() for v in parsed_value]
    elif key == 'tagline':
      parsed_value = value.replace('more', '').strip()
@ -484,10 +489,10 @@ class IMDb:
    def getNames(creditList):
      return [stripTags(c[0]) for c in creditList]

-    credits['director'] = getNames(raw_credits['directors'])
-    credits['writer'] = getNames(raw_credits['writers'])
-    credits['producer'] = getNames(raw_credits['producers'])
-    credits['cast'] = [(stripTags(c[0]),stripTags(c[1])) for c in raw_credits['cast']]
+    credits['director'] = getNames(raw_credits.get('directors', ''))
+    credits['writer'] = getNames(raw_credits.get('writers', ''))
+    credits['producer'] = getNames(raw_credits.get('producers', ''))
+    credits['cast'] = [(stripTags(c[0]),stripTags(c[1])) for c in raw_credits.get('cast', [])]

    self.credits = credits
    return self.credits