#!/usr/bin/python3 from optparse import OptionParser import json import codecs import sys import os from datetime import datetime import ox def add_metadata(films, country, output): meta = [] api = ox.API('https://indiancine.ma/api/') current_year = datetime.now().year if os.path.exists(output): with open(output) as fd: meta = json.load(fd) ignore = output + '.ignored' if os.path.exists(ignore): with open(ignore) as fd: ignored = fd.read().strip().split('\n') else: ignored = [] known_ids = set([f['imdbId'] for f in meta] + ignored) def save(): with codecs.open(output, 'w', encoding='utf-8') as fd: json.dump(meta, fd, indent=1, ensure_ascii=False) with open(ignore, 'w') as fd: fd.write('\n'.join(ignored)) for info in films: if info['imdbId'] in known_ids: continue skip = False for key in ('Mini-Series', 'TV Series', 'TV Movie', 'TV Special', 'Video Game'): if key in info['title']: skip = True if skip: continue keys = [ 'language', 'productionCompany', 'director', 'runtime', 'alternativeTitles', 'color', 'sound', 'summary', 'country', 'isSeries', 'title', 'originalTitle', 'year' ] extra = api.getMetadata(id=info['imdbId'], keys=keys)['data'] print(info) print(extra) if not extra: save() print('lets try again') extra = api.getMetadata(id=info['imdbId'], keys=keys)['data'] print(extra) y = extra.get('year') if y: y = int(y) if '(????)' in info.get('title', '') or not y or y >= current_year: ignored.append(info['imdbId']) print('skip unknown or current year', info['imdbId'], info.get('title'), info.get('year')) continue if 'isSeries' in extra or ('country' in extra and country not in extra['country']): ignored.append(info['imdbId']) print('ignoring', info['imdbId'], info.get('title')) continue if 'originalTitle' in extra: info['alternativeTitles'] = [[info['title'], '']] info['title'] = extra.pop('originalTitle') else: info['title'] = extra['title'] for key in extra: if key not in info: info[key] = extra[key] print(info['imdbId'], info['title']) meta.append(info) if len(meta) % 100 == 0: save() save() return meta if __name__ == '__main__': usage = "usage: %prog [options] country films.json films_with_metadata.json" parser = OptionParser(usage=usage) (opts, args) = parser.parse_args() if len(args) != 3: parser.print_help() sys.exit(1) country, filename, output = args with open(filename) as fd: films = json.load(fd) add_metadata(films, country, output)