imdb needs some decodeHtml and credits are all optional

This commit is contained in:
j 2008-05-11 17:07:09 +02:00
parent ceb4d21506
commit 6a1ecbe21c

View file

@ -150,7 +150,7 @@ def creditList(data, section=None):
credits_ = re.compile('''<tr>.*?<td valign="top">(.*?)</td><td.*?</td><td valign="top">(.*?)</td></tr>''').findall(data) credits_ = re.compile('''<tr>.*?<td valign="top">(.*?)</td><td.*?</td><td valign="top">(.*?)</td></tr>''').findall(data)
credits = [] credits = []
for c_ in credits_: for c_ in credits_:
c = [c_[0].strip(), c_[1].strip()] c = [decodeHtml(c_[0]).strip(), decodeHtml(c_[1]).strip()]
if section=='writers': if section=='writers':
c[1] = c[1].replace('<br>', '').strip().replace(')', '').replace('(','') c[1] = c[1].replace('<br>', '').strip().replace(')', '').replace('(','')
if c[1].endswith(' and'): c[1] = c[1][:-4] if c[1].endswith(' and'): c[1] = c[1][:-4]
@ -256,6 +256,7 @@ def getMovieTrivia(imdbId):
t = str(t).replace('<br />', '').strip() t = str(t).replace('<br />', '').strip()
if t.startswith('<li>') and t.endswith('</li>'): if t.startswith('<li>') and t.endswith('</li>'):
t = t[4:-5].strip() t = t[4:-5].strip()
t=decodeHtml(t)
trivia.append(t) trivia.append(t)
return trivia return trivia
@ -339,9 +340,13 @@ class IMDb:
parsed_value = int(parsed_value) * 60 parsed_value = int(parsed_value) * 60
elif key in ('country', 'language'): elif key in ('country', 'language'):
parsed_value = value.split(' / ') parsed_value = value.split(' / ')
if len(parsed_value) == 1:
parsed_value = parsed_value[0].split(' | ')
parsed_value = [v.strip() for v in parsed_value] parsed_value = [v.strip() for v in parsed_value]
elif key == 'genre': elif key == 'genre':
parsed_value = value.replace('more', '').strip().split(' / ') parsed_value = value.replace('more', '').strip().split(' / ')
if len(parsed_value) == 1:
parsed_value = parsed_value[0].split(' | ')
parsed_value = [v.strip() for v in parsed_value] parsed_value = [v.strip() for v in parsed_value]
elif key == 'tagline': elif key == 'tagline':
parsed_value = value.replace('more', '').strip() parsed_value = value.replace('more', '').strip()
@ -484,10 +489,10 @@ class IMDb:
def getNames(creditList): def getNames(creditList):
return [stripTags(c[0]) for c in creditList] return [stripTags(c[0]) for c in creditList]
credits['director'] = getNames(raw_credits['directors']) credits['director'] = getNames(raw_credits.get('directors', ''))
credits['writer'] = getNames(raw_credits['writers']) credits['writer'] = getNames(raw_credits.get('writers', ''))
credits['producer'] = getNames(raw_credits['producers']) credits['producer'] = getNames(raw_credits.get('producers', ''))
credits['cast'] = [(stripTags(c[0]),stripTags(c[1])) for c in raw_credits['cast']] credits['cast'] = [(stripTags(c[0]),stripTags(c[1])) for c in raw_credits.get('cast', [])]
self.credits = credits self.credits = credits
return self.credits return self.credits