parse keywords

This commit is contained in:
j 2023-07-06 18:32:45 +05:30
parent e6782b3c17
commit d630f4b19c

View file

@ -178,6 +178,17 @@ def get_release_date(metadata):
return min(dates) return min(dates)
def get_keywords(metadata):
try:
keywords = [
row['rowTitle']
for row in metadata['props']['pageProps']['contentData']['section']['items']
]
except:
keywords = []
return keywords
def get_entity_metadata(metadata): def get_entity_metadata(metadata):
data = {} data = {}
entity = metadata['props']['pageProps']['contentData']['entityMetadata'] entity = metadata['props']['pageProps']['contentData']['entityMetadata']
@ -276,11 +287,6 @@ class Imdb(SiteParser):
'gross': zebra_table('Cumulative Worldwide Gross', more=[ 'gross': zebra_table('Cumulative Worldwide Gross', more=[
lambda data: find_re(decode_html(data).replace(',', ''), '\d+') lambda data: find_re(decode_html(data).replace(',', ''), '\d+')
], type='int'), ], type='int'),
'keyword': {
'page': 'keywords',
're': 'data-item-keyword="(.*?)"',
'type': 'list'
},
'language': zebra_list('Language', more=['<a.*?>(.*?)</a>']), 'language': zebra_list('Language', more=['<a.*?>(.*?)</a>']),
'originalTitle': { 'originalTitle': {
'page': 'releaseinfo', 'page': 'releaseinfo',
@ -543,6 +549,11 @@ class Imdb(SiteParser):
if releasedate: if releasedate:
self['releasedate'] = releasedate self['releasedate'] = releasedate
metadata = self.get_page_data('keywords')
keywords = get_keywords(metadata)
if keywords:
self['keywords'] = keywords
if 'summary' not in self and 'storyline' in self: if 'summary' not in self and 'storyline' in self:
self['summary'] = self.pop('storyline') self['summary'] = self.pop('storyline')
if 'summary' in self: if 'summary' in self: