parse keywords
This commit is contained in:
parent
e6782b3c17
commit
d630f4b19c
1 changed files with 16 additions and 5 deletions
|
@ -178,6 +178,17 @@ def get_release_date(metadata):
|
||||||
return min(dates)
|
return min(dates)
|
||||||
|
|
||||||
|
|
||||||
|
def get_keywords(metadata):
|
||||||
|
try:
|
||||||
|
keywords = [
|
||||||
|
row['rowTitle']
|
||||||
|
for row in metadata['props']['pageProps']['contentData']['section']['items']
|
||||||
|
]
|
||||||
|
except:
|
||||||
|
keywords = []
|
||||||
|
return keywords
|
||||||
|
|
||||||
|
|
||||||
def get_entity_metadata(metadata):
|
def get_entity_metadata(metadata):
|
||||||
data = {}
|
data = {}
|
||||||
entity = metadata['props']['pageProps']['contentData']['entityMetadata']
|
entity = metadata['props']['pageProps']['contentData']['entityMetadata']
|
||||||
|
@ -276,11 +287,6 @@ class Imdb(SiteParser):
|
||||||
'gross': zebra_table('Cumulative Worldwide Gross', more=[
|
'gross': zebra_table('Cumulative Worldwide Gross', more=[
|
||||||
lambda data: find_re(decode_html(data).replace(',', ''), '\d+')
|
lambda data: find_re(decode_html(data).replace(',', ''), '\d+')
|
||||||
], type='int'),
|
], type='int'),
|
||||||
'keyword': {
|
|
||||||
'page': 'keywords',
|
|
||||||
're': 'data-item-keyword="(.*?)"',
|
|
||||||
'type': 'list'
|
|
||||||
},
|
|
||||||
'language': zebra_list('Language', more=['<a.*?>(.*?)</a>']),
|
'language': zebra_list('Language', more=['<a.*?>(.*?)</a>']),
|
||||||
'originalTitle': {
|
'originalTitle': {
|
||||||
'page': 'releaseinfo',
|
'page': 'releaseinfo',
|
||||||
|
@ -543,6 +549,11 @@ class Imdb(SiteParser):
|
||||||
if releasedate:
|
if releasedate:
|
||||||
self['releasedate'] = releasedate
|
self['releasedate'] = releasedate
|
||||||
|
|
||||||
|
metadata = self.get_page_data('keywords')
|
||||||
|
keywords = get_keywords(metadata)
|
||||||
|
if keywords:
|
||||||
|
self['keywords'] = keywords
|
||||||
|
|
||||||
if 'summary' not in self and 'storyline' in self:
|
if 'summary' not in self and 'storyline' in self:
|
||||||
self['summary'] = self.pop('storyline')
|
self['summary'] = self.pop('storyline')
|
||||||
if 'summary' in self:
|
if 'summary' in self:
|
||||||
|
|
Loading…
Reference in a new issue