imdb needs some decodeHtml and credits are all optional
This commit is contained in:
parent
ceb4d21506
commit
6a1ecbe21c
1 changed files with 10 additions and 5 deletions
15
ox/imdb.py
15
ox/imdb.py
|
@ -150,7 +150,7 @@ def creditList(data, section=None):
|
|||
credits_ = re.compile('''<tr>.*?<td valign="top">(.*?)</td><td.*?</td><td valign="top">(.*?)</td></tr>''').findall(data)
|
||||
credits = []
|
||||
for c_ in credits_:
|
||||
c = [c_[0].strip(), c_[1].strip()]
|
||||
c = [decodeHtml(c_[0]).strip(), decodeHtml(c_[1]).strip()]
|
||||
if section=='writers':
|
||||
c[1] = c[1].replace('<br>', '').strip().replace(')', '').replace('(','')
|
||||
if c[1].endswith(' and'): c[1] = c[1][:-4]
|
||||
|
@ -256,6 +256,7 @@ def getMovieTrivia(imdbId):
|
|||
t = str(t).replace('<br />', '').strip()
|
||||
if t.startswith('<li>') and t.endswith('</li>'):
|
||||
t = t[4:-5].strip()
|
||||
t=decodeHtml(t)
|
||||
trivia.append(t)
|
||||
return trivia
|
||||
|
||||
|
@ -339,9 +340,13 @@ class IMDb:
|
|||
parsed_value = int(parsed_value) * 60
|
||||
elif key in ('country', 'language'):
|
||||
parsed_value = value.split(' / ')
|
||||
if len(parsed_value) == 1:
|
||||
parsed_value = parsed_value[0].split(' | ')
|
||||
parsed_value = [v.strip() for v in parsed_value]
|
||||
elif key == 'genre':
|
||||
parsed_value = value.replace('more', '').strip().split(' / ')
|
||||
if len(parsed_value) == 1:
|
||||
parsed_value = parsed_value[0].split(' | ')
|
||||
parsed_value = [v.strip() for v in parsed_value]
|
||||
elif key == 'tagline':
|
||||
parsed_value = value.replace('more', '').strip()
|
||||
|
@ -484,10 +489,10 @@ class IMDb:
|
|||
def getNames(creditList):
|
||||
return [stripTags(c[0]) for c in creditList]
|
||||
|
||||
credits['director'] = getNames(raw_credits['directors'])
|
||||
credits['writer'] = getNames(raw_credits['writers'])
|
||||
credits['producer'] = getNames(raw_credits['producers'])
|
||||
credits['cast'] = [(stripTags(c[0]),stripTags(c[1])) for c in raw_credits['cast']]
|
||||
credits['director'] = getNames(raw_credits.get('directors', ''))
|
||||
credits['writer'] = getNames(raw_credits.get('writers', ''))
|
||||
credits['producer'] = getNames(raw_credits.get('producers', ''))
|
||||
credits['cast'] = [(stripTags(c[0]),stripTags(c[1])) for c in raw_credits.get('cast', [])]
|
||||
|
||||
self.credits = credits
|
||||
return self.credits
|
||||
|
|
Loading…
Reference in a new issue