parse keywords
This commit is contained in:
parent
e6782b3c17
commit
d630f4b19c
1 changed files with 16 additions and 5 deletions
|
@ -178,6 +178,17 @@ def get_release_date(metadata):
|
|||
return min(dates)
|
||||
|
||||
|
||||
def get_keywords(metadata):
|
||||
try:
|
||||
keywords = [
|
||||
row['rowTitle']
|
||||
for row in metadata['props']['pageProps']['contentData']['section']['items']
|
||||
]
|
||||
except:
|
||||
keywords = []
|
||||
return keywords
|
||||
|
||||
|
||||
def get_entity_metadata(metadata):
|
||||
data = {}
|
||||
entity = metadata['props']['pageProps']['contentData']['entityMetadata']
|
||||
|
@ -276,11 +287,6 @@ class Imdb(SiteParser):
|
|||
'gross': zebra_table('Cumulative Worldwide Gross', more=[
|
||||
lambda data: find_re(decode_html(data).replace(',', ''), '\d+')
|
||||
], type='int'),
|
||||
'keyword': {
|
||||
'page': 'keywords',
|
||||
're': 'data-item-keyword="(.*?)"',
|
||||
'type': 'list'
|
||||
},
|
||||
'language': zebra_list('Language', more=['<a.*?>(.*?)</a>']),
|
||||
'originalTitle': {
|
||||
'page': 'releaseinfo',
|
||||
|
@ -543,6 +549,11 @@ class Imdb(SiteParser):
|
|||
if releasedate:
|
||||
self['releasedate'] = releasedate
|
||||
|
||||
metadata = self.get_page_data('keywords')
|
||||
keywords = get_keywords(metadata)
|
||||
if keywords:
|
||||
self['keywords'] = keywords
|
||||
|
||||
if 'summary' not in self and 'storyline' in self:
|
||||
self['summary'] = self.pop('storyline')
|
||||
if 'summary' in self:
|
||||
|
|
Loading…
Reference in a new issue