cinematools/add_metadata.py
2018-04-02 20:28:42 +05:30

80 lines
2.4 KiB
Python
Executable file

#!/usr/bin/python
from optparse import OptionParser
import json
import codecs
import sys
import os
import ox
def add_metadata(films, country, output):
meta = []
api = ox.API('https://indiancine.ma/api/')
if os.path.exists(output):
with open(output) as fd:
meta = json.load(fd)
known_ids = set([f['imdbId'] for f in meta])
def save():
with codecs.open(output, 'w', encoding='utf-8') as fd:
json.dump(meta, fd, indent=1, ensure_ascii=False)
for info in films:
if info['imdbId'] in known_ids:
continue
skip = False
for key in ('Mini-Series', 'TV Series', 'TV Movie', 'TV Special', 'Video Game'):
if key in info['title']:
skip = True
if skip:
continue
keys = [
'language', 'productionCompany', 'director',
'runtime', 'alternativeTitles',
'color', 'sound',
'summary', 'country',
'isSeries',
'title',
'originalTitle', 'year'
]
extra = api.getMetadata(id=info['imdbId'], keys=keys)['data']
print info
print extra
if not extra:
save()
print 'lets try again'
extra = api.getMetadata(id=info['imdbId'], keys=keys)['data']
print extra
if 'isSeries' in extra or ('country' in extra and not country in extra['country']):
info['delete'] = True
print 'deleting', info['imdbId'], info.get('title')
continue
if 'originalTitle' in extra:
info['alternativeTitles'] = [[info['title'], '']]
info['title'] = extra.pop('originalTitle')
else:
info['title'] = extra['title']
for key in extra:
if key not in info:
info[key] = extra[key]
print info['imdbId'], info['title']
meta.append(info)
if len(meta) % 100 == 0:
save()
save()
return meta
if __name__ == '__main__':
usage = "usage: %prog [options] country films.json films_with_metadata.json"
parser = OptionParser(usage=usage)
(opts, args) = parser.parse_args()
if len(args) != 3:
parser.print_help()
sys.exit(1)
country, filename, output = args
with open(filename) as fd:
films = json.load(fd)
add_metadata(films, country, output)