pandora_t_for_time/management/commands/import_subtitles.py

107 lines
4.0 KiB
Python

import json
import os
import subprocess
import ox
from django.core.management.base import BaseCommand
from django.conf import settings
from item.models import Item
from annotation.models import Annotation
class Command(BaseCommand):
help = 'export all subtitles for translations'
def add_arguments(self, parser):
parser.add_argument('--lang', action='store', dest='lang', default=None, help='subtitle language')
parser.add_argument('--test', action='store_true', dest='test', default=False, help='test run')
parser.add_argument('args', metavar='args', type=str, nargs='*', help='file or url')
def handle(self, filename, **options):
if not options["lang"]:
print("--lang is required")
return
lang = options["lang"]
if filename.startswith("http"):
data = ox.net.read_url(filename).decode()
else:
with open(filename) as fd:
data = fd.read()
data = data.strip().split('\n## ')[1:]
invalid = []
valid = []
for block in data:
title, block = block.split('\n', 1)
block = block.strip()
title = title.strip()
item_id = title.split(' ')[-1]
item = Item.objects.get(public_id=item_id)
subtitles_en = item.annotations.filter(layer="subtitles", languages=None).exclude(value='')
lines = block.split('\n\n')
if len(lines) != subtitles_en.count():
print('%s: number of subtitles does not match, en: %s vs %s: %s' % (title, subtitles_en.count(), lang, len(lines)))
continue
if options["test"]:
print('%s: valid %s subtitles' % (title, len(lines)))
else:
n = 0
item.annotations.filter(layer="subtitles", languages=lang).delete()
for sub_en in subtitles_en.order_by('start'):
sub = Annotation()
sub.item = sub_en.item
sub.user = sub_en.user
sub.layer = sub_en.layer
sub.start = sub_en.start
sub.end = sub_en.end
sub.value = '<span lang="%s">%s</span>' % (lang, lines[n])
sub.save()
n += 1
'''
srt = 'vocals_txt/%s/%s' % (title[0], title.replace('.wav', '.srt'))
filename = 'vocals_txt/%s/%s' % (title[0], title.replace('.wav', '.' + lang + '.srt'))
folder = os.path.dirname(filename)
if not os.path.exists(folder):
os.makedirs(folder)
data = json.load(open(srt + '.json'))
subs = block.replace('\n\n', '\n').split('\n')
if len(data) != len(subs):
print('invalid', title, 'expected', len(data), 'got', len(subs))
invalid.append('## %s\n\n%s' % (title, block))
valid.append('## %s\n\n%s' % (title, '\n\n'.join([d['value'] for d in data])))
continue
for i, sub in enumerate(data):
sub['value'] = subs[i]
kodata = ox.srt.encode(data)
current = None
if os.path.exists(filename):
with open(filename, 'rb') as fd:
current = fd.read()
if current != kodata:
print('update', title, filename)
with open(filename, 'wb') as fd:
fd.write(kodata)
with open(filename + '.json', 'w') as fd:
ko = [{
'in': s['in'],
'out': s['out'],
'value': s['value'],
} for s in data]
json.dump(ko, fd, ensure_ascii=False, indent=4)
if invalid:
with open('invalid_%s_subtitles.txt' % lang, 'w') as fd:
fd.write('\n\n\n\n'.join(invalid))
with open('invalid_%s_subtitles_en.txt' % lang, 'w') as fd:
fd.write('\n\n\n\n'.join(valid))
'''