248 lines
9 KiB
Python
Executable file
248 lines
9 KiB
Python
Executable file
#!/usr/bin/python3
|
|
from argparse import ArgumentParser
|
|
from collections import defaultdict
|
|
from copy import deepcopy
|
|
from glob import glob
|
|
import json
|
|
import os
|
|
import re
|
|
import string
|
|
import subprocess
|
|
import sys
|
|
|
|
import ox
|
|
import ox.web.auth
|
|
|
|
|
|
base_url = 'http://127.0.0.1:2620'
|
|
|
|
FRAME_DURATION = 1/60
|
|
MAX_DURATION = 40
|
|
|
|
HIDDEN_TAGS = [
|
|
"women with white males",
|
|
"gene z hanrahan"
|
|
]
|
|
|
|
# items to not use at all
|
|
BLACKLIST = [
|
|
'XN'
|
|
]
|
|
|
|
api = None
|
|
|
|
def get_api():
|
|
global api
|
|
if not api:
|
|
api = ox.API(base_url + '/api/')
|
|
api.signin(**ox.web.auth.get('cdosea'))
|
|
|
|
|
|
def update_subtitles():
|
|
get_api()
|
|
items = api.find({
|
|
'query': {
|
|
'conditions': [{'key': 'tags', 'value': 'Vocal', 'operator': '=='}]
|
|
},
|
|
'keys': ['id', 'title'],
|
|
'range': [0, 1000]})['data']['items']
|
|
for item in items:
|
|
'''
|
|
info = api.findMedia({
|
|
'query': {
|
|
'conditions': [
|
|
{'key': 'id', 'operator': '==', 'value': item['id']}
|
|
]
|
|
},
|
|
'keys': ['id', 'extension'],
|
|
'range': [0, 1]
|
|
})['data']['items'][0]
|
|
'''
|
|
item['subtitles'] = api.get({'id': item['id'], 'keys': ['layers']})['data']['layers']['subtitles']
|
|
|
|
return items
|
|
|
|
def get_subtitles(items, id):
|
|
for item in items:
|
|
if item['title'].startswith(id):
|
|
return deepcopy(item['subtitles'])
|
|
|
|
def render_subtitles(item_json, output_json, output_srt, lang):
|
|
with open(item_json) as fd:
|
|
item = json.load(fd)
|
|
|
|
subtitles = []
|
|
position = 0
|
|
subs = {}
|
|
for clip in item['vocals']:
|
|
if not clip.get('blank'):
|
|
# vocals/A/A4_chaton.wav
|
|
id = clip['path'].split('/')[-1][:2]
|
|
clip_subtitles = get_subtitles(items, id)
|
|
clip_subtitles.sort(key=lambda c: (c['in'], c['out'], c['id']))
|
|
|
|
for sub in clip_subtitles:
|
|
sub_in = float('%0.3f' % (sub['in'] + position))
|
|
sub_out = float('%0.3f' % (sub['out'] + position))
|
|
sub_id = '%0.3f-%0.3f' % (sub_in, sub_out)
|
|
if sub_id not in subs:
|
|
subs[sub_id] = {
|
|
'in': sub_in,
|
|
'out': sub_out,
|
|
'value': [],
|
|
}
|
|
if isinstance(lang, list):
|
|
subs[sub_id]['ids'] = []
|
|
subs[sub_id]['languages'] = []
|
|
|
|
slang = re.compile('span lang="(..)"').findall(sub['value'])
|
|
value = sub['value'].replace('<br>', '').strip()
|
|
if slang:
|
|
slang = slang[0]
|
|
value = value.replace('<span lang="' + slang + '">', '').replace('</span>', '').strip()
|
|
else:
|
|
slang = 'en'
|
|
# just use strip_tags?
|
|
# value = ox.strip_tags(ox.decode_html(sub['value']))
|
|
if isinstance(lang, list) and slang in lang:
|
|
if lang.index(slang) == 0:
|
|
subs[sub_id]['value'].insert(0, value)
|
|
else:
|
|
subs[sub_id]['value'].append(value)
|
|
subs[sub_id]['ids'].append(sub['id'])
|
|
subs[sub_id]['languages'].append(slang)
|
|
elif slang == lang:
|
|
subs[sub_id]['value'].append(value)
|
|
position += clip['duration']
|
|
|
|
if isinstance(lang, list):
|
|
#fixme = [sub for sub in subs.values() if [s for s in list(subs.values())]]
|
|
fixme = [sub for sub in subs.values() if set(sub['languages']) != set(lang) and sub['value']]
|
|
if fixme:
|
|
remove = []
|
|
for key, sub in list(subs.items()):
|
|
intersections = []
|
|
for s in list(subs.values()):
|
|
intersections += overlaps(sub, s)
|
|
if intersections:
|
|
points = list(sorted(set([sub['in'], sub['out']] + intersections)))
|
|
#print(points, sub['value'])
|
|
sub_in = points[0]
|
|
for sub_out in points[1:]:
|
|
sub_id = '%0.3f-%0.3f' % (sub_in, sub_out)
|
|
if sub_id not in subs:
|
|
subs[sub_id] = {
|
|
'in': sub_in,
|
|
'out': sub_out,
|
|
'value': [],
|
|
'ids': [],
|
|
'languages': []
|
|
}
|
|
if not sub['value']:
|
|
continue
|
|
if set(subs[sub_id]['languages']) != set(lang):
|
|
if not subs[sub_id]['languages']:
|
|
subs[sub_id]['value'] += sub['value']
|
|
subs[sub_id]['languages'] += sub['languages']
|
|
subs[sub_id]['ids'] += sub['ids']
|
|
elif subs[sub_id]['languages'] == [lang[0]] \
|
|
and sub['languages'][0] not in subs[sub_id]['languages']:
|
|
subs[sub_id]['value'].append(sub['value'][0])
|
|
subs[sub_id]['languages'].append(sub['languages'][0])
|
|
subs[sub_id]['ids'] += sub['ids']
|
|
elif subs[sub_id]['languages'] == [lang[1]] \
|
|
and sub['languages'][0] not in subs[sub_id]['languages']:
|
|
subs[sub_id]['value'].insert(0, sub['value'][0])
|
|
subs[sub_id]['languages'].insert(0, sub['languages'][0])
|
|
subs[sub_id]['ids'] += sub['ids']
|
|
#else:
|
|
# print('WTF', sub['languages'], subs[sub_id]['languages'])
|
|
|
|
sub_in = sub_out
|
|
remove.append(key)
|
|
#for key, sub in list(subs.items()):
|
|
# if len(sub['languages']) == 1:
|
|
# del subs[key]
|
|
for key in remove:
|
|
if len(subs[key]['languages']) == 1:
|
|
del subs[key]
|
|
for key, sub in list(subs.items()):
|
|
if abs(sub['out'] - sub['in']) <= 0.040001:
|
|
del subs[key]
|
|
subs = sorted(subs.values(), key=lambda c: (c['in'], c['out']))
|
|
for sub in subs:
|
|
sub['value'] = '\n'.join(sub['value'])
|
|
if sub['value'].strip():
|
|
subtitles.append(sub)
|
|
|
|
merged = []
|
|
p = None
|
|
for sub in subtitles:
|
|
if not p:
|
|
merged.append(sub)
|
|
p = sub
|
|
else:
|
|
if p['out'] > sub['in']:
|
|
if p['value'] == sub['value']:
|
|
p['out'] = max(p['out'], sub['out'])
|
|
else:
|
|
p['out'] = sub['in']
|
|
merged.append(sub)
|
|
p = sub
|
|
else:
|
|
merged.append(sub)
|
|
p = sub
|
|
subtitles = merged
|
|
|
|
if output_srt:
|
|
with open(output_srt, 'wb') as fd:
|
|
fd.write(ox.srt.encode(subtitles))
|
|
with open(output_json, 'w') as fd:
|
|
json.dump(subtitles, fd, indent=4, ensure_ascii=False, sort_keys=True)
|
|
|
|
|
|
def overlaps(src, other):
|
|
src_in = float('%0.2f' % src['in'])
|
|
src_out = float('%0.2f' % src['out'])
|
|
other_in = float('%0.2f' % other['in'])
|
|
other_out = float('%0.2f' % other['out'])
|
|
points = []
|
|
if src_in != other_in or src_out != other_out:
|
|
# src inside
|
|
if other_in >= src_in and other_in < src_out:
|
|
points += [other['in']]
|
|
if other_out > src_in and other_out <= src_out:
|
|
points += [other['out']]
|
|
return points
|
|
|
|
|
|
if __name__ == '__main__':
|
|
usage = "usage: %(prog)s [options] json"
|
|
parser = ArgumentParser(usage=usage)
|
|
parser.add_argument('-p', '--prefix', dest='prefix',
|
|
help='version prefix', default='.')
|
|
parser.add_argument('files', metavar='path', type=str, nargs='*', help='json files')
|
|
opts = parser.parse_args()
|
|
|
|
if os.path.exists('subtitles.json'):
|
|
items = json.load(open('subtitles.json'))
|
|
else:
|
|
items = update_subtitles()
|
|
with open('subtitles.json', 'w') as fd:
|
|
json.dump(items, fd, indent=4, ensure_ascii=False, sort_keys=True)
|
|
|
|
files = opts.files
|
|
if not files:
|
|
files = glob(os.path.join(opts.prefix, 'output/*/*.json'))
|
|
for item_json in files:
|
|
prefix = 'public/' + item_json.split('/')[-1][0].lower() + item_json.split('/')[-2] + '.'
|
|
prefix = os.path.join(opts.prefix, prefix)
|
|
output_json = prefix + '1080p.json'
|
|
output_srt = prefix + '1080p.srt'
|
|
render_subtitles(item_json, output_json, output_srt, ['ko', 'en'])
|
|
output_json = prefix + 'no-en.json'
|
|
output_srt = prefix + 'no-en.srt'
|
|
render_subtitles(item_json, output_json, output_srt, ['no', 'en'])
|
|
for lang in ('en', 'ko', 'no'):
|
|
output_json = prefix + lang + '.json'
|
|
render_subtitles(item_json, output_json, None, lang)
|