diff --git a/ox/web/imdb.py b/ox/web/imdb.py index e9f1973..755a63e 100644 --- a/ox/web/imdb.py +++ b/ox/web/imdb.py @@ -178,6 +178,17 @@ def get_release_date(metadata): return min(dates) +def get_keywords(metadata): + try: + keywords = [ + row['rowTitle'] + for row in metadata['props']['pageProps']['contentData']['section']['items'] + ] + except: + keywords = [] + return keywords + + def get_entity_metadata(metadata): data = {} entity = metadata['props']['pageProps']['contentData']['entityMetadata'] @@ -276,11 +287,6 @@ class Imdb(SiteParser): 'gross': zebra_table('Cumulative Worldwide Gross', more=[ lambda data: find_re(decode_html(data).replace(',', ''), '\d+') ], type='int'), - 'keyword': { - 'page': 'keywords', - 're': 'data-item-keyword="(.*?)"', - 'type': 'list' - }, 'language': zebra_list('Language', more=['(.*?)']), 'originalTitle': { 'page': 'releaseinfo', @@ -543,6 +549,11 @@ class Imdb(SiteParser): if releasedate: self['releasedate'] = releasedate + metadata = self.get_page_data('keywords') + keywords = get_keywords(metadata) + if keywords: + self['keywords'] = keywords + if 'summary' not in self and 'storyline' in self: self['summary'] = self.pop('storyline') if 'summary' in self: