write metadata to other file, support resume

This commit is contained in:
j 2013-08-16 13:49:32 +02:00
parent c7537c800c
commit ba93143980

View file

@ -3,12 +3,34 @@ from optparse import OptionParser
import json import json
import codecs import codecs
import sys import sys
import os
import ox import ox
def add_metadata(films, country): def add_metadata(films, country, output):
meta = []
api = ox.API('https://indiancine.ma/api/') api = ox.API('https://indiancine.ma/api/')
if os.path.exists(output):
with open(output) as fd:
meta = json.load(fd)
known_ids = set([f['imdbId'] for f in meta])
def save():
with codecs.open(output, 'w', encoding='utf-8') as fd:
json.dump(meta, fd, indent=1, ensure_ascii=False)
for info in films: for info in films:
if info['imdbId'] in known_ids:
continue
skip = False
for key in ('Mini-Series', 'TV Series', 'TV Movie', 'TV Special'):
if key in info['title']:
skip = True
if skip:
continue
extra = api.getMetadata(id=info['imdbId'], keys=[ extra = api.getMetadata(id=info['imdbId'], keys=[
'language', 'productionCompany', 'director', 'language', 'productionCompany', 'director',
'runtime', 'alternativeTitles', 'runtime', 'alternativeTitles',
@ -18,6 +40,8 @@ def add_metadata(films, country):
'title', 'title',
'originalTitle', 'year' 'originalTitle', 'year'
])['data'] ])['data']
print info
print extra
if 'isSeries' in extra or ('country' in extra and not country in extra['country']): if 'isSeries' in extra or ('country' in extra and not country in extra['country']):
info['delete'] = True info['delete'] = True
print 'deleting', info['imdbId'], info.get('title') print 'deleting', info['imdbId'], info.get('title')
@ -31,19 +55,21 @@ def add_metadata(films, country):
if key not in info: if key not in info:
info[key] = extra[key] info[key] = extra[key]
print info['imdbId'], info['title'] print info['imdbId'], info['title']
return filter(lambda f: not f.get('delete', False), films) meta.append(info)
if len(meta) % 100 == 0:
save()
save()
return meta
if __name__ == '__main__': if __name__ == '__main__':
usage = "usage: %prog [options] country films.json" usage = "usage: %prog [options] country films.json films_with_metadata.json"
parser = OptionParser(usage=usage) parser = OptionParser(usage=usage)
(opts, args) = parser.parse_args() (opts, args) = parser.parse_args()
if len(args) != 2: if len(args) != 3:
parser.print_help() parser.print_help()
sys.exit(1) sys.exit(1)
country, filename = args country, filename, output = args
with open(filename) as fd: with open(filename) as fd:
films = json.load(fd) films = json.load(fd)
films = add_metadata(films, country) add_metadata(films, country, output)
with codecs.open(filename, 'w', encoding='utf-8') as fd:
json.dump(films, fd, indent=1, ensure_ascii=False)