write metadata to other file, support resume
This commit is contained in:
parent
c7537c800c
commit
ba93143980
1 changed files with 34 additions and 8 deletions
|
@ -3,12 +3,34 @@ from optparse import OptionParser
|
||||||
import json
|
import json
|
||||||
import codecs
|
import codecs
|
||||||
import sys
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
import ox
|
import ox
|
||||||
|
|
||||||
def add_metadata(films, country):
|
def add_metadata(films, country, output):
|
||||||
|
meta = []
|
||||||
api = ox.API('https://indiancine.ma/api/')
|
api = ox.API('https://indiancine.ma/api/')
|
||||||
|
|
||||||
|
if os.path.exists(output):
|
||||||
|
with open(output) as fd:
|
||||||
|
meta = json.load(fd)
|
||||||
|
|
||||||
|
known_ids = set([f['imdbId'] for f in meta])
|
||||||
|
|
||||||
|
def save():
|
||||||
|
with codecs.open(output, 'w', encoding='utf-8') as fd:
|
||||||
|
json.dump(meta, fd, indent=1, ensure_ascii=False)
|
||||||
|
|
||||||
for info in films:
|
for info in films:
|
||||||
|
if info['imdbId'] in known_ids:
|
||||||
|
continue
|
||||||
|
skip = False
|
||||||
|
for key in ('Mini-Series', 'TV Series', 'TV Movie', 'TV Special'):
|
||||||
|
if key in info['title']:
|
||||||
|
skip = True
|
||||||
|
if skip:
|
||||||
|
continue
|
||||||
|
|
||||||
extra = api.getMetadata(id=info['imdbId'], keys=[
|
extra = api.getMetadata(id=info['imdbId'], keys=[
|
||||||
'language', 'productionCompany', 'director',
|
'language', 'productionCompany', 'director',
|
||||||
'runtime', 'alternativeTitles',
|
'runtime', 'alternativeTitles',
|
||||||
|
@ -18,6 +40,8 @@ def add_metadata(films, country):
|
||||||
'title',
|
'title',
|
||||||
'originalTitle', 'year'
|
'originalTitle', 'year'
|
||||||
])['data']
|
])['data']
|
||||||
|
print info
|
||||||
|
print extra
|
||||||
if 'isSeries' in extra or ('country' in extra and not country in extra['country']):
|
if 'isSeries' in extra or ('country' in extra and not country in extra['country']):
|
||||||
info['delete'] = True
|
info['delete'] = True
|
||||||
print 'deleting', info['imdbId'], info.get('title')
|
print 'deleting', info['imdbId'], info.get('title')
|
||||||
|
@ -31,19 +55,21 @@ def add_metadata(films, country):
|
||||||
if key not in info:
|
if key not in info:
|
||||||
info[key] = extra[key]
|
info[key] = extra[key]
|
||||||
print info['imdbId'], info['title']
|
print info['imdbId'], info['title']
|
||||||
return filter(lambda f: not f.get('delete', False), films)
|
meta.append(info)
|
||||||
|
if len(meta) % 100 == 0:
|
||||||
|
save()
|
||||||
|
save()
|
||||||
|
return meta
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
usage = "usage: %prog [options] country films.json"
|
usage = "usage: %prog [options] country films.json films_with_metadata.json"
|
||||||
parser = OptionParser(usage=usage)
|
parser = OptionParser(usage=usage)
|
||||||
(opts, args) = parser.parse_args()
|
(opts, args) = parser.parse_args()
|
||||||
if len(args) != 2:
|
if len(args) != 3:
|
||||||
parser.print_help()
|
parser.print_help()
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
country, filename = args
|
country, filename, output = args
|
||||||
with open(filename) as fd:
|
with open(filename) as fd:
|
||||||
films = json.load(fd)
|
films = json.load(fd)
|
||||||
films = add_metadata(films, country)
|
add_metadata(films, country, output)
|
||||||
|
|
||||||
with codecs.open(filename, 'w', encoding='utf-8') as fd:
|
|
||||||
json.dump(films, fd, indent=1, ensure_ascii=False)
|
|
||||||
|
|
Loading…
Reference in a new issue