From 3debebf9232059f0c3f944dbbe90c9b3833cc40c Mon Sep 17 00:00:00 2001 From: j Date: Fri, 10 Mar 2023 17:39:31 +0100 Subject: [PATCH] imdb fixes --- ox/web/imdb.py | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/ox/web/imdb.py b/ox/web/imdb.py index d683973..b541b38 100644 --- a/ox/web/imdb.py +++ b/ox/web/imdb.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # vi:si:et:sw=4:sts=4:ts=4 -from __future__ import print_function +from collections import defaultdict import json import re @@ -17,7 +17,7 @@ from .. import cache from . siteparser import SiteParser from . import duckduckgo from ..utils import datetime -from ..geo import normalize_country_name +from ..geo import normalize_country_name, get_country_name def prepare_url(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None, unicode=False): @@ -176,18 +176,36 @@ def get_release_date(metadata): return min(dates) +def get_entity_metadata(metadata): + data = {} + entity = metadata['props']['pageProps']['contentData']['entityMetadata'] + data['title'] = entity['titleText']['text'] + data['originalTitle'] = entity['originalTitleText']['text'] + data['year'] = entity['releaseYear']['year'] + data['plot'] = entity['plot']['plotText']['plainText'] + data['country'] = [get_country_name(c['id']) for c in entity['countriesOfOrigin']['countries']] + data['poster'] = metadata['props']['pageProps']['contentData']['posterData']['image']['url'] + return data + + def alternative_titles(metadata): - titles = [] + titles = defaultdict(list) akas = get_category_by_id(metadata, 'akas') + + skip = [ + metadata['props']['pageProps']['contentData']['entityMetadata']['titleText']['text'], + metadata['props']['pageProps']['contentData']['entityMetadata']['originalTitleText']['text'] + ] for row in akas['section']['items']: content = row['listContent'][0] - titles.append({ - 'title': content['text'], - 'country': row['rowTitle'], - }) - if content.get('subText'): - titles[-1]['subText'] = content['subText'] - return titles + title = content['text'] + country = row['rowTitle'] + if title in skip: + continue + titles[title].append(country) + #if content.get('subText'): + # titles[-1]['subText'] = content['subText'] + return [kv for kv in titles.items()] '''