diff --git a/ox/web/imdb.py b/ox/web/imdb.py index d683973..06e3e9d 100644 --- a/ox/web/imdb.py +++ b/ox/web/imdb.py @@ -2,13 +2,12 @@ # vi:si:et:sw=4:sts=4:ts=4 from __future__ import print_function -import json import re import time import unicodedata from six.moves.urllib.parse import urlencode -from six import string_types +from six import text_type, string_types from .. import find_re, strip_tags, decode_html from .. import cache @@ -107,89 +106,6 @@ def technical(label): } -def tech_spec(metadata): - tech = {} - for row in metadata['props']['pageProps']['contentData']['section']['items']: - title = { - 'aspect ratio': 'aspectratio', - 'sound mix': 'sound', - }.get(row['rowTitle'].lower(), row['rowTitle'].lower()) - tech[title] = [] - for content in row['listContent']: - value = content['text'] - tech[title].append(value) - return tech - - -def movie_connections(metadata): - connections = {} - for row in metadata['props']['pageProps']['contentData']['categories']: - title = { - }.get(row['name'], row['name']) - if title not in connections: - connections[title] = [] - - for item in row['section']['items']: - item_ = { - 'id': item['id'][2:], - } - - item_['title'] = re.compile('(.*?)').findall(item['listContent'][0]['html'])[0] - if len(item['listContent']) >=2: - item_['description'] = strip_tags(item['listContent'][1]['html']) - connections[title].append(item_) - return connections - - -def get_category_by_id(metadata, id): - for category in metadata['props']['pageProps']['contentData']['categories']: - if category['id'] == id: - return category - - -def get_release_date(metadata): - releases = get_category_by_id(metadata, 'releases') - def parse_date(d): - parsed = None - for fmt in ( - '%B %d, %Y', - '%d %B %Y', - '%B %Y', - ): - try: - parsed = datetime.strptime(d, fmt) - break - except: - pass - if not parsed: - return None - return '%d-%02d-%02d' % (parsed.year, parsed.month, parsed.day) - - dates = [] - for item in releases['section']['items']: - content = item['listContent'][0] - date = parse_date(content['text']) - if date: - dates.append(date) - - if dates: - return min(dates) - - -def alternative_titles(metadata): - titles = [] - akas = get_category_by_id(metadata, 'akas') - for row in akas['section']['items']: - content = row['listContent'][0] - titles.append({ - 'title': content['text'], - 'country': row['rowTitle'], - }) - if content.get('subText'): - titles[-1]['subText'] = content['subText'] - return titles - - ''' 'posterIds': { 'page': 'posters', @@ -200,17 +116,18 @@ def alternative_titles(metadata): class Imdb(SiteParser): ''' - >>> Imdb('0068646')['title'] == 'The Godfather' + >>> Imdb('0068646')['title'] == text_type(u'The Godfather') True - >>> Imdb('0133093')['title'] == 'The Matrix' + >>> Imdb('0133093')['title'] == text_type(u'The Matrix') True ''' regex = { 'alternativeTitles': { 'page': 'releaseinfo', 're': [ - '