write metadata to other file, support resume
This commit is contained in:
parent
c7537c800c
commit
ba93143980
1 changed files with 34 additions and 8 deletions
|
@ -3,12 +3,34 @@ from optparse import OptionParser
|
|||
import json
|
||||
import codecs
|
||||
import sys
|
||||
import os
|
||||
|
||||
import ox
|
||||
|
||||
def add_metadata(films, country):
|
||||
def add_metadata(films, country, output):
|
||||
meta = []
|
||||
api = ox.API('https://indiancine.ma/api/')
|
||||
|
||||
if os.path.exists(output):
|
||||
with open(output) as fd:
|
||||
meta = json.load(fd)
|
||||
|
||||
known_ids = set([f['imdbId'] for f in meta])
|
||||
|
||||
def save():
|
||||
with codecs.open(output, 'w', encoding='utf-8') as fd:
|
||||
json.dump(meta, fd, indent=1, ensure_ascii=False)
|
||||
|
||||
for info in films:
|
||||
if info['imdbId'] in known_ids:
|
||||
continue
|
||||
skip = False
|
||||
for key in ('Mini-Series', 'TV Series', 'TV Movie', 'TV Special'):
|
||||
if key in info['title']:
|
||||
skip = True
|
||||
if skip:
|
||||
continue
|
||||
|
||||
extra = api.getMetadata(id=info['imdbId'], keys=[
|
||||
'language', 'productionCompany', 'director',
|
||||
'runtime', 'alternativeTitles',
|
||||
|
@ -18,6 +40,8 @@ def add_metadata(films, country):
|
|||
'title',
|
||||
'originalTitle', 'year'
|
||||
])['data']
|
||||
print info
|
||||
print extra
|
||||
if 'isSeries' in extra or ('country' in extra and not country in extra['country']):
|
||||
info['delete'] = True
|
||||
print 'deleting', info['imdbId'], info.get('title')
|
||||
|
@ -31,19 +55,21 @@ def add_metadata(films, country):
|
|||
if key not in info:
|
||||
info[key] = extra[key]
|
||||
print info['imdbId'], info['title']
|
||||
return filter(lambda f: not f.get('delete', False), films)
|
||||
meta.append(info)
|
||||
if len(meta) % 100 == 0:
|
||||
save()
|
||||
save()
|
||||
return meta
|
||||
|
||||
if __name__ == '__main__':
|
||||
usage = "usage: %prog [options] country films.json"
|
||||
usage = "usage: %prog [options] country films.json films_with_metadata.json"
|
||||
parser = OptionParser(usage=usage)
|
||||
(opts, args) = parser.parse_args()
|
||||
if len(args) != 2:
|
||||
if len(args) != 3:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
country, filename = args
|
||||
country, filename, output = args
|
||||
with open(filename) as fd:
|
||||
films = json.load(fd)
|
||||
films = add_metadata(films, country)
|
||||
add_metadata(films, country, output)
|
||||
|
||||
with codecs.open(filename, 'w', encoding='utf-8') as fd:
|
||||
json.dump(films, fd, indent=1, ensure_ascii=False)
|
||||
|
|
Loading…
Reference in a new issue