#!/usr/bin/python3 import os import sys import json import subprocess from collections import defaultdict import string from glob import glob from copy import deepcopy import ox import ox.web.auth base_url = 'http://127.0.0.1:2620' FRAME_DURATION = 1/60 MAX_DURATION = 40 HIDDEN_TAGS = [ "women with white males", "gene z hanrahan" ] # items to not use at all BLACKLIST = [ 'XN' ] api = None def get_api(): global api if not api: api = ox.API(base_url + '/api/') api.signin(**ox.web.auth.get('cdosea')) def update_subtitles(): get_api() items = api.find({ 'query': { 'conditions': [{'key': 'tags', 'value': 'Vocal', 'operator': '=='}] }, 'keys': ['id', 'title'], 'range': [0, 1000]})['data']['items'] for item in items: ''' info = api.findMedia({ 'query': { 'conditions': [ {'key': 'id', 'operator': '==', 'value': item['id']} ] }, 'keys': ['id', 'extension'], 'range': [0, 1] })['data']['items'][0] ''' item['subtitles'] = api.get({'id': item['id'], 'keys': ['layers']})['data']['layers']['subtitles'] return items def get_subtitles(items, id): for item in items: if item['title'].startswith(id): return deepcopy(item['subtitles']) def render_subtitles(item_json, output_json, output_srt, lang=None): with open(item_json) as fd: item = json.load(fd) subtitles = [] position = 0 subs = {} for clip in item['vocals']: if not clip.get('blank'): # vocals/A/A4_chaton.wav id = clip['path'].split('/')[-1][:2] clip_subtitles = get_subtitles(items, id) clip_subtitles.sort(key=lambda c: (c['in'], c['out'], c['id'])) for sub in clip_subtitles: sub_in = float('%0.3f' % (sub['in'] + position)) sub_out = float('%0.3f' % (sub['out'] + position)) sub_id = '%0.3f-%0.3f' % (sub_in, sub_out) if sub_id not in subs: subs[sub_id] = { 'in': sub_in, 'out': sub_out, 'value': [], 'ids': [] } is_korean = 'lang="ko"' in sub['value'] value = sub['value'].replace('
', '') value = value.replace('', '').replace('', '').strip() # just use strip_tags? # value = ox.strip_tags(ox.decode_html(sub['value'])) if lang is None: if is_korean: subs[sub_id]['value'].insert(0, value) else: subs[sub_id]['value'].append(value) subs[sub_id]['ids'].append(sub['id']) elif lang == 'ko' and is_korean: subs[sub_id]['value'].append(value) elif lang == 'en' and not is_korean: subs[sub_id]['value'].append(value) position += clip['duration'] subs = sorted(subs.values(), key=lambda c: (c['in'], c['out'])) for sub in subs: sub['value'] = '\n'.join(sub['value']) subtitles.append(sub) if output_srt: with open(output_srt, 'wb') as fd: fd.write(ox.srt.encode(subtitles)) with open(output_json, 'w') as fd: json.dump(subtitles, fd, indent=4, ensure_ascii=False) if __name__ == '__main__': if os.path.exists('subtitles.json'): items = json.load(open('subtitles.json')) else: items = update_subtitles() with open('subtitles.json', 'w') as fd: json.dump(items, fd, indent=4, ensure_ascii=False) if len(sys.argv) > 1: files = sys.argv[1:] else: files = glob('output/*/*.json') for item_json in files: prefix = 'public/' + item_json.split('/')[-1][0].lower() + item_json.split('/')[-2] + '.' output_json = prefix + '1080p.json' output_srt = prefix + '10800.srt' render_subtitles(item_json, output_json, output_srt) for lang in ('en', 'ko'): output_json = prefix + lang + '.json' render_subtitles(item_json, output_json, None, lang)