99 lines
3 KiB
Python
Executable file
99 lines
3 KiB
Python
Executable file
#!/usr/bin/python3
|
|
from optparse import OptionParser
|
|
import json
|
|
import codecs
|
|
import sys
|
|
import os
|
|
from datetime import datetime
|
|
|
|
import ox
|
|
|
|
def add_metadata(films, country, output):
|
|
meta = []
|
|
api = ox.API('https://indiancine.ma/api/')
|
|
current_year = datetime.now().year
|
|
|
|
if os.path.exists(output):
|
|
with open(output) as fd:
|
|
meta = json.load(fd)
|
|
|
|
ignore = output + '.ignored'
|
|
if os.path.exists(ignore):
|
|
with open(ignore) as fd:
|
|
ignored = fd.read().strip().split('\n')
|
|
else:
|
|
ignored = []
|
|
|
|
known_ids = set([f['imdbId'] for f in meta] + ignored)
|
|
|
|
def save():
|
|
with codecs.open(output, 'w', encoding='utf-8') as fd:
|
|
json.dump(meta, fd, indent=1, ensure_ascii=False)
|
|
with open(ignore, 'w') as fd:
|
|
fd.write('\n'.join(ignored))
|
|
|
|
for info in films:
|
|
if info['imdbId'] in known_ids:
|
|
continue
|
|
skip = False
|
|
for key in ('Mini-Series', 'TV Series', 'TV Movie', 'TV Special', 'Video Game'):
|
|
if key in info['title']:
|
|
skip = True
|
|
if skip:
|
|
continue
|
|
keys = [
|
|
'language', 'productionCompany', 'director',
|
|
'runtime', 'alternativeTitles',
|
|
'color', 'sound',
|
|
'summary', 'country',
|
|
'isSeries',
|
|
'title',
|
|
'originalTitle', 'year'
|
|
]
|
|
extra = api.getMetadata(id=info['imdbId'], keys=keys)['data']
|
|
print(info)
|
|
print(extra)
|
|
if not extra:
|
|
save()
|
|
print('lets try again')
|
|
extra = api.getMetadata(id=info['imdbId'], keys=keys)['data']
|
|
print(extra)
|
|
y = extra.get('year')
|
|
if y:
|
|
y = int(y)
|
|
if '(????)' in info.get('title', '') or not y or y >= current_year:
|
|
ignored.append(info['imdbId'])
|
|
print('skip unknown or current year', info['imdbId'], info.get('title'), info.get('year'))
|
|
continue
|
|
if 'isSeries' in extra or ('country' in extra and country not in extra['country']):
|
|
ignored.append(info['imdbId'])
|
|
print('ignoring', info['imdbId'], info.get('title'))
|
|
continue
|
|
if 'originalTitle' in extra:
|
|
info['alternativeTitles'] = [[info['title'], '']]
|
|
info['title'] = extra.pop('originalTitle')
|
|
else:
|
|
info['title'] = extra['title']
|
|
for key in extra:
|
|
if key not in info:
|
|
info[key] = extra[key]
|
|
print(info['imdbId'], info['title'])
|
|
meta.append(info)
|
|
if len(meta) % 100 == 0:
|
|
save()
|
|
save()
|
|
return meta
|
|
|
|
|
|
if __name__ == '__main__':
|
|
usage = "usage: %prog [options] country films.json films_with_metadata.json"
|
|
parser = OptionParser(usage=usage)
|
|
(opts, args) = parser.parse_args()
|
|
if len(args) != 3:
|
|
parser.print_help()
|
|
sys.exit(1)
|
|
country, filename, output = args
|
|
with open(filename) as fd:
|
|
films = json.load(fd)
|
|
add_metadata(films, country, output)
|
|
|