imdb needs some decodeHtml and credits are all optional
This commit is contained in:
parent
ceb4d21506
commit
6a1ecbe21c
1 changed files with 10 additions and 5 deletions
15
ox/imdb.py
15
ox/imdb.py
|
@ -150,7 +150,7 @@ def creditList(data, section=None):
|
||||||
credits_ = re.compile('''<tr>.*?<td valign="top">(.*?)</td><td.*?</td><td valign="top">(.*?)</td></tr>''').findall(data)
|
credits_ = re.compile('''<tr>.*?<td valign="top">(.*?)</td><td.*?</td><td valign="top">(.*?)</td></tr>''').findall(data)
|
||||||
credits = []
|
credits = []
|
||||||
for c_ in credits_:
|
for c_ in credits_:
|
||||||
c = [c_[0].strip(), c_[1].strip()]
|
c = [decodeHtml(c_[0]).strip(), decodeHtml(c_[1]).strip()]
|
||||||
if section=='writers':
|
if section=='writers':
|
||||||
c[1] = c[1].replace('<br>', '').strip().replace(')', '').replace('(','')
|
c[1] = c[1].replace('<br>', '').strip().replace(')', '').replace('(','')
|
||||||
if c[1].endswith(' and'): c[1] = c[1][:-4]
|
if c[1].endswith(' and'): c[1] = c[1][:-4]
|
||||||
|
@ -256,6 +256,7 @@ def getMovieTrivia(imdbId):
|
||||||
t = str(t).replace('<br />', '').strip()
|
t = str(t).replace('<br />', '').strip()
|
||||||
if t.startswith('<li>') and t.endswith('</li>'):
|
if t.startswith('<li>') and t.endswith('</li>'):
|
||||||
t = t[4:-5].strip()
|
t = t[4:-5].strip()
|
||||||
|
t=decodeHtml(t)
|
||||||
trivia.append(t)
|
trivia.append(t)
|
||||||
return trivia
|
return trivia
|
||||||
|
|
||||||
|
@ -339,9 +340,13 @@ class IMDb:
|
||||||
parsed_value = int(parsed_value) * 60
|
parsed_value = int(parsed_value) * 60
|
||||||
elif key in ('country', 'language'):
|
elif key in ('country', 'language'):
|
||||||
parsed_value = value.split(' / ')
|
parsed_value = value.split(' / ')
|
||||||
|
if len(parsed_value) == 1:
|
||||||
|
parsed_value = parsed_value[0].split(' | ')
|
||||||
parsed_value = [v.strip() for v in parsed_value]
|
parsed_value = [v.strip() for v in parsed_value]
|
||||||
elif key == 'genre':
|
elif key == 'genre':
|
||||||
parsed_value = value.replace('more', '').strip().split(' / ')
|
parsed_value = value.replace('more', '').strip().split(' / ')
|
||||||
|
if len(parsed_value) == 1:
|
||||||
|
parsed_value = parsed_value[0].split(' | ')
|
||||||
parsed_value = [v.strip() for v in parsed_value]
|
parsed_value = [v.strip() for v in parsed_value]
|
||||||
elif key == 'tagline':
|
elif key == 'tagline':
|
||||||
parsed_value = value.replace('more', '').strip()
|
parsed_value = value.replace('more', '').strip()
|
||||||
|
@ -484,10 +489,10 @@ class IMDb:
|
||||||
def getNames(creditList):
|
def getNames(creditList):
|
||||||
return [stripTags(c[0]) for c in creditList]
|
return [stripTags(c[0]) for c in creditList]
|
||||||
|
|
||||||
credits['director'] = getNames(raw_credits['directors'])
|
credits['director'] = getNames(raw_credits.get('directors', ''))
|
||||||
credits['writer'] = getNames(raw_credits['writers'])
|
credits['writer'] = getNames(raw_credits.get('writers', ''))
|
||||||
credits['producer'] = getNames(raw_credits['producers'])
|
credits['producer'] = getNames(raw_credits.get('producers', ''))
|
||||||
credits['cast'] = [(stripTags(c[0]),stripTags(c[1])) for c in raw_credits['cast']]
|
credits['cast'] = [(stripTags(c[0]),stripTags(c[1])) for c in raw_credits.get('cast', [])]
|
||||||
|
|
||||||
self.credits = credits
|
self.credits = credits
|
||||||
return self.credits
|
return self.credits
|
||||||
|
|
Loading…
Reference in a new issue