pandora_t_for_time/render.py

#!/usr/bin/python3
from collections import defaultdict
from glob import glob
import json
import os
import re
import shutil
import subprocess
import sys
import time
from pathlib import Path

import ox
import lxml.etree

from .pi import random
from .render_kdenlive import KDEnliveProject, _CACHE, melt_xml, get_melt


def random_int(seq, length):
    n = n_ = length - 1
    #print('len', n)
    if n == 0:
        return n
    r = seq() / 9 * 10
    base = 10
    while n > 10:
        n /= 10
        r += seq() / 9 * 10
        base += 10
    r = int(round(n_ * r / base))
    return r

def random_choice(seq, items, pop=False):
    n = random_int(seq, len(items))
    if pop:
        return items.pop(n)
    return items[n]

def chance(seq, chance):
    return (seq() / 10) < chance

def get_clip_by_seqid(clips, seqid):
    selected = None
    for i, clip in enumerate(clips):
        if clip['seqid'] == seqid:
            selected = i
            break
    if selected is not None:
        return clips.pop(i)
    return None


def write_if_new(path, data, mode=''):
    read_mode = 'r' + mode
    write_mode = 'w' + mode
    if os.path.exists(path):
        with open(path, read_mode) as fd:
            old = fd.read()
    else:
        old = ""
    is_new = data != old
    if path.endswith(".kdenlive"):
        is_new = re.sub(r'\{.{36}\}', '', data) != re.sub(r'\{.{36}\}', '', old)
    if is_new:
        with open(path, write_mode) as fd:
            fd.write(data)

def format_duration(duration, fps):
    return float('%0.5f' % (round(duration * fps) / fps))

def compose(clips, target=150, base=1024, voice_over=None, options=None):
    if options is None:
        options = {}
    fps = 24
    length = 0
    scene = {
        'front': {
            'V1': [],
            'V2': [],
        },
        'back': {
            'V1': [],
            'V2': [],
        },
        'audio-back': {
            'A1': [],
        },
        'audio-center': {
            'A1': [],
        },
        'audio-front': {
            'A1': [],
            'A2': [],
            'A3': [],
            'A4': [],
        },
        'audio-rear': {
            'A1': [],
            'A2': [],
            'A3': [],
            'A4': [],
        },
    }
    all_clips = clips.copy()
    seq = random(10000 + base * 1000)
    used = []

    voice_overs = []
    sub_offset = 0
    if voice_over:
        vo_keys = list(sorted(voice_over))
        if chance(seq, 0.5):
            vo_key = vo_keys[random_int(seq, len(vo_keys))]
            voice_overs.append(voice_over[vo_key])
        elif len(vo_keys) >= 2:
            vo1 = vo_keys.pop(random_int(seq, len(vo_keys)))
            vo2 = vo_keys.pop(random_int(seq, len(vo_keys)))
            voice_overs.append(voice_over[vo1])
            if voice_over[vo1]["duration"] + voice_over[vo2]["duration"] < target:
                print("adding second vo")
                voice_overs.append(voice_over[vo2])
        print("vo:", [x['src'] for x in voice_overs], list(sorted(voice_over)))
        vo_min = sum([vo['duration'] for vo in voice_overs])
        sub_offset = 0
        if vo_min > target:
            target = vo_min
        elif vo_min < target:
            offset = format_duration((target - vo_min) / 2, fps)
            scene['audio-center']['A1'].append({
                'blank': True,
                'duration': offset
            })
            scene['audio-rear']['A1'].append({
                'blank': True,
                'duration': offset
            })
            vo_min += offset
            sub_offset = offset
        subs = []
        for vo in voice_overs:
            voc = vo.copy()
            a, b = '-11', '-3'
            if 'Whispered' in voc['src']:
                a, b = '-8', '0'
            elif 'Read' in voc['src']:
                a, b = '-7.75', '0.25'
            elif 'Free' in voc['src']:
                a, b = '-8.8', '-0.8'
            elif 'Ashley' in voc['src']:
                a, b = '-9.5', '-1.50'
            elif 'Melody' in voc['src']:
                a, b = '-5.25', '-0.25'
            if options.get('stereo_downmix'):
                a, b = '-9', '-1'
                if 'Whispered' in voc['src']:
                    a, b = '-6', '2'
                elif 'Read' in voc['src']:
                    a, b = '-5.75', '2.25'
                elif 'Free' in voc['src']:
                    a, b = '-6.8', '3.2'
                elif 'Ashley' in voc['src']:
                    a, b = '-7.5', '0.50'
                elif 'Melody' in voc['src']:
                    a, b = '-3.25', '1.75'
            voc['filter'] = {'volume': a}
            scene['audio-center']['A1'].append(voc)
            vo_low = vo.copy()
            vo_low['filter'] = {'volume': b}
            scene['audio-rear']['A1'].append(vo_low)
            for sub in voc.get("subs", []):
                sub = sub.copy()
                sub["in"] += sub_offset
                sub["out"] += sub_offset
                subs.append(sub)
            sub_offset += voc["duration"]
        if subs:
            scene["subtitles"] = subs

    clip = None
    while target - length > 0 and clips:
        # coin flip which site is visible (50% chance)
        if length:
            remaining = target - length
            remaining = remaining * 1.05  # allow for max of 10% over time
            clips_ = [c for c in clips if c['duration'] <= remaining]
            if clips_:
                clips = clips_
        if clip:
            if chance(seq, 0.5):
                next_seqid = clip['seqid'] + 1
                clip = get_clip_by_seqid(clips, next_seqid)
            else:
                clip = None
        if not clip:
            clip = random_choice(seq, clips, True)
        if not clips:
            print("not enough clips, need to reset")
            clips = [c for c in all_clips if c != clip and c not in used]
            if not clips:
                print("not enough clips, also consider used")
                clips = [c for c in all_clips if c != clip]
            if not clips:
                print("not enough clips, also consider last clip")
                clips = all_clips.copy()
        if length + clip['duration'] > target and length >= vo_min:
            break
        print('%06.3f %06.3f' % (length, clip['duration']), os.path.basename(clip['original']))
        length += int(clip['duration'] * fps) / fps

        if "foreground" not in clip and "animation" in clip:
            fg = clip['animation']
            transparancy = 1
        else:
            fg = clip['foreground']
            if 'animation' in clip and chance(seq, 0.15):
                fg = clip['animation']
                transparancy = 1
            else:
                if 'foreground2' in clip:
                    if 'foreground3' in clip:
                        n = seq()
                        if n <= 3:  # 0,1,2,3
                            clip['foreground']
                        elif n <= 6:  # 4,5,6
                            clip['foreground2']
                        else:  # 7,8,9
                            clip['foreground3']
                    elif chance(seq, 0.5):
                        fg = clip['foreground2']
                transparancy = seq() / 9
                transparancy = 1
        if 'foley' in clip:
            foley = clip['foley']
        else:
            foley = fg
        scene['front']['V2'].append({
            'duration': clip['duration'],
            'src': fg,
            "filter": {
                'transparency': transparancy,
            }
        })

        transparency = seq() / 9
        # 50% of time no transparancy of foregroudnd layer
        # 50% some transparancy, 25%, 50%, 75% levels of transparancy
        transparancy = 1
        # coin flip which site is visible (50% chance)
        #if chance(seq, 0.5):
        if chance(seq, 0.8):
            transparency_front = transparency
            transparency_back = 0
        else:
            transparency_back = random_choice(seq, [0.25, 0.5, 0.75, 1])
            transparency_front = 0
        transparency_original = seq() / 9
        transparency_original = 1
        if "background" in clip:
            scene['front']['V1'].append({
                'duration': clip['duration'],
                'src': clip['background'],
                "filter": {
                    'transparency': transparency_front
                }
            })
            scene['back']['V2'].append({
                'duration': clip['duration'],
                'src': clip['background'],
                "filter": {
                    'transparency': transparency_back
                }
            })
        else:
            scene['front']['V1'].append({
                'duration': clip['duration'],
                'src': clip['animation'],
                "filter": {
                    'transparency': 0,
                }
            })
            scene['back']['V2'].append({
                'duration': clip['duration'],
                'src': clip['original'],
                "filter": {
                    'transparency': 0,
                }
            })

        scene['back']['V1'].append({
            'duration': clip['duration'],
            'src': clip['original'],
            "filter": {
                'transparency': transparency_original,
            }
        })
        # 50 % chance to blur original from 0 to 30
        if chance(seq, 0.5):
            blur = seq() * 3
            if blur:
                scene['back']['V1'][-1]['filter']['blur'] = blur
        volume_back = '-8.2'
        if options.get('stereo_downmix'):
            volume_back = '-7.2'
        scene['audio-back']['A1'].append({
            'duration': clip['duration'],
            'src': clip['original'],
            'filter': {'volume': volume_back},
        })
        # TBD: Foley
        cf_volume = '-2.5'
        scene['audio-front']['A2'].append({
            'duration': clip['duration'],
            'src': foley,
            'filter': {'volume': cf_volume},
        })
        scene['audio-rear']['A2'].append({
            'duration': clip['duration'],
            'src': foley,
            'filter': {'volume': cf_volume},
        })
        used.append(clip)
    print("scene duration %0.3f (target: %0.3f, vo_min: %0.3f)" % (length, target, vo_min))
    scene_duration = int(get_scene_duration(scene) * fps)
    sub_offset = int(sub_offset * fps)
    if sub_offset < scene_duration:
        delta = format_duration((scene_duration - sub_offset) / fps, fps)
        print(">> add %0.3f of silence.. %0.3f (scene_duration)" % (delta, scene_duration / fps))
        scene['audio-center']['A1'].append({
            'blank': True,
            'duration': delta
        })
        scene['audio-rear']['A1'].append({
            'blank': True,
            'duration': delta
        })
    elif sub_offset > scene_duration:
        delta = format_duration((scene_duration - sub_offset) / fps, fps)
        scene['audio-center']['A1'][-1]["duration"] += delta
        scene['audio-rear']['A1'][-1]["duration"] += delta
        print("WTF, needed to cut %s new duration: %s" % (delta, scene['audio-center']['A1'][-1]["duration"]))
        print(scene['audio-center']['A1'][-1])
    return scene, used

def get_track_duration(scene, k, n):
    duration = 0
    for key, value in scene.items():
        if key == k:
            for name, clips in value.items():
                if name == n:
                    for clip in clips:
                        duration += int(clip['duration'] * 24)
                    return duration / 24

def get_scene_duration(scene):
    if isinstance(scene, str):
        with open(scene) as fd:
            scene = json.load(fd)
    duration = 0
    for key, value in scene.items():
        for name, clips in value.items():
            for clip in clips:
                duration += int(clip['duration'] * 24)
            return duration / 24

def get_offset_duration(prefix):
    duration = 0
    for root, folders, files in os.walk(prefix):
        for f in files:
            if f == 'scene.json':
                duration += get_scene_duration(scene)
    return duration

def write_subtitles(data, folder, options):
    data = fix_overlaps(data)
    path = folder / "front.srt"
    if options.get("subtitle_format") == "srt":
        srt = ox.srt.encode(data)
        write_if_new(str(path), srt, 'b')
    else:
        if os.path.exists(path):
            os.unlink(path)
        path = folder / "front.ass"
        ass = ass_encode(data, options)
        write_if_new(str(path), ass, '')


def render(root, scene, prefix='', options=None):
    if options is None: options = {}
    fps = 24
    files = []
    scene_duration = int(get_scene_duration(scene) * fps)
    for timeline, data in scene.items():
        if timeline == "subtitles":
            folder = Path(root) / prefix
            write_subtitles(data, folder, options)
            continue
        #print(timeline)
        project = KDEnliveProject(root)

        tracks = []
        track_durations = {}
        for track, clips in data.items():
            #print(track)
            for clip in clips:
                project.append_clip(track, clip)
            track_durations[track] = sum([int(c['duration'] * fps) for c in clips])
        if timeline.startswith('audio-'):
            track_duration = project.get_duration()
            delta = scene_duration - track_duration
            if delta > 0:
                for track in track_durations:
                    if track_durations[track] == track_duration:
                        project.append_clip(track, {'blank': True, "duration": delta/fps})

        path = os.path.join(root, prefix + "%s.kdenlive" % timeline)
        project_xml = project.to_xml()
        write_if_new(path, project_xml)

        if options["debug"]:
            # check duration
            out_duration = get_project_duration(path)
            p_duration = project.get_duration()
            print(path, 'out: %s, project: %s, scene: %s' %(out_duration, p_duration, scene_duration))
            if p_duration != scene_duration:
                print(path, 'FAIL project: %s, scene: %s' %(p_duration, scene_duration))
                _cache = os.path.join(root, "cache.json")
                with open(_cache, "w") as fd:
                    json.dump(_CACHE, fd)
                sys.exit(1)
            if out_duration != p_duration:
                print(path, 'fail got: %s expected: %s' %(out_duration, p_duration))
                sys.exit(1)

        files.append(path)
    return files

def get_project_duration(file):
    out = melt_xml(file)
    chain = lxml.etree.fromstring(out).xpath('producer')[0]
    duration = int(chain.attrib['out']) + 1
    return duration

def get_fragments(clips, voice_over, prefix):
    import itemlist.models
    import item.models

    fragments = []

    for l in itemlist.models.List.objects.filter(status='featured').order_by('name'):
        if l.name.split(' ')[0].isdigit():
            fragment = {
                'name': l.name,
                'tags': [],
                'anti-tags': [],
                'description': l.description
            }
            for con in l.query['conditions']:
                if "conditions" in con:
                    for sub in con["conditions"]:
                        if sub['key'] == "tags" and sub['operator'] == '==':
                            fragment['tags'].append(sub['value'])
                        elif sub['key'] == "tags" and sub['operator'] == '!=':
                            fragment['tags'].append(sub['value'])
                        else:
                            print(l.name, 'unknown sub condition', sub)
                elif con.get('key') == "tags" and con['operator'] == '==':
                    fragment['tags'].append(con['value'])
                elif con.get('key') == "tags" and con['operator'] == '!=':
                    fragment['anti-tags'].append(con['value'])

            fragment["id"] = int(fragment['name'].split(' ')[0])
            originals = []
            for i in l.get_items(l.user):
                orig = i.files.filter(selected=True).first()
                if orig:
                    ext = os.path.splitext(orig.data.path)[1]
                    type_ = i.data['type'][0].lower()
                    target = os.path.join(prefix, type_, i.data['title'] + ext)
                    originals.append(target)
            fragment['clips'] = []
            for clip in clips:
                #if set(clip['tags']) & set(fragment['tags']) and not set(clip['tags']) & set(fragment['anti-tags']):
                key = 'original'
                original = clip['original']
                if 'original_censored' in clip:
                    original = clip['original_censored']
                if original in originals:
                    fragment['clips'].append(clip)
            fragment["voice_over"] = voice_over.get(str(fragment["id"]), {})
            fragments.append(fragment)
    fragments.sort(key=lambda f: ox.sort_string(f['name']))
    return fragments

def parse_lang(lang):
    if lang and "," in lang:
        lang = lang.split(',')
    if isinstance(lang, list):
        tlang = lang[1:]
        lang = lang[0]
    else:
        tlang = None
    if lang == "en":
        lang = None
    return lang, tlang


def render_all(options):
    options = load_defaults(options)
    prefix = options['prefix']
    duration = int(options['duration'])
    base = int(options['offset'])

    _cache = os.path.join(prefix, "cache.json")
    if os.path.exists(_cache):
        with open(_cache) as fd:
            _CACHE.update(json.load(fd))

    with open(os.path.join(prefix, "clips.json")) as fd:
        clips = json.load(fd)
    with open(os.path.join(prefix, "voice_over.json")) as fd:
        voice_over = json.load(fd)
    fragments = get_fragments(clips, voice_over, prefix)
    with open(os.path.join(prefix, "fragments.json"), "w") as fd:
        json.dump(fragments, fd, indent=2, ensure_ascii=False)
    position = target_position = 0
    target = fragment_target = duration / len(fragments)
    base_prefix = os.path.join(prefix, 'render', str(base))
    clips_used = []

    stats = defaultdict(lambda: 0)
    fragment_base = base
    for fragment in fragments:
        fragment_base += 1
        fragment_id = int(fragment['name'].split(' ')[0])
        name = fragment['name'].replace(' ', '_')
        if fragment_id < 10:
            name = '0' + name
        if not fragment['clips']:
            print("skipping empty fragment", name)
            continue
        fragment_prefix = os.path.join(base_prefix, name)
        os.makedirs(fragment_prefix, exist_ok=True)
        fragment_clips = fragment['clips']
        unused_fragment_clips = [c for c in fragment_clips if c not in clips_used]
        print('fragment clips', len(fragment_clips), 'unused', len(unused_fragment_clips))
        scene, used = compose(
            unused_fragment_clips,
            target=target,
            base=fragment_base,
            voice_over=fragment['voice_over'],
            options=options
        )
        clips_used += used
        scene_duration = get_scene_duration(scene)
        print("%s %6.3f -> %6.3f (%6.3f)" % (name, target, scene_duration, fragment_target))
        src = [a for a in scene['audio-rear']['A1'] if 'src' in a][0]['src']
        stats[src.split('/')[-2]] += 1

        position += scene_duration
        target_position += fragment_target
        if position > target_position:
            target = fragment_target - (position-target_position)
            print("adjusting target duration for next fragment: %6.3f -> %6.3f" % (fragment_target, target))
        elif position < target_position:
            target = target + 0.1 * fragment_target

        timelines = render(prefix, scene, fragment_prefix[len(prefix) + 1:] + '/', options)

        scene_json = json.dumps(scene, indent=2, ensure_ascii=False)
        write_if_new(os.path.join(fragment_prefix, 'scene.json'), scene_json)

        if not options['no_video'] and not options["single_file"]:
            for timeline in timelines:
                print(timeline)
                ext = '.mp4'
                if '/audio' in timeline:
                    ext = '.wav'
                cmd = get_melt() + [
                    timeline,
                    '-quiet',
                    '-consumer', 'avformat:%s' % timeline.replace('.kdenlive', ext),
                ]
                if ext == '.wav':
                    cmd += ['vn=1']
                else:
                    #if not timeline.endswith("back.kdenlive"):
                    cmd += ['an=1']
                    cmd += ['vcodec=libx264', 'x264opts=keyint=1', 'crf=15']
                subprocess.call(cmd)
                if ext == '.wav' and timeline.endswith('audio.kdenlive'):
                    cmd = [
                        'ffmpeg', '-y',
                        '-nostats', '-loglevel', 'error',
                        '-i',
                        timeline.replace('.kdenlive', ext),
                        timeline.replace('.kdenlive', '.mp4')
                    ]
                    subprocess.call(cmd)
                    os.unlink(timeline.replace('.kdenlive', ext))

            cmds = []
            fragment_prefix = Path(fragment_prefix)
            for src, out1, out2 in (
                ("audio-front.wav", "fl.wav", "fr.wav"),
                ("audio-center.wav", "fc.wav", "lfe.wav"),
                ("audio-rear.wav", "bl.wav", "br.wav"),
            ):
                cmds.append([
                    "ffmpeg", "-y",
                    "-nostats", "-loglevel", "error",
                    "-i", fragment_prefix / src,
                    "-filter_complex",
                    "[0:0]pan=1|c0=c0[left]; [0:0]pan=1|c0=c1[right]",
                    "-map", "[left]", fragment_prefix / out1,
                    "-map", "[right]", fragment_prefix / out2,
                ])
            cmds.append([
                "ffmpeg", "-y",
                "-nostats", "-loglevel", "error",
                "-i", fragment_prefix / "fl.wav",
                "-i", fragment_prefix / "fr.wav",
                "-i", fragment_prefix / "fc.wav",
                "-i", fragment_prefix / "lfe.wav",
                "-i", fragment_prefix / "bl.wav",
                "-i", fragment_prefix / "br.wav",
                "-filter_complex", "[0:a][1:a][2:a][3:a][4:a][5:a]amerge=inputs=6[a]",
                "-map", "[a]", "-c:a", "aac", fragment_prefix / "audio-5.1.mp4"
            ])
            audio_front = "audio-5.1.mp4"
            audio_back = "audio-back.wav"
            copy = '-c'
            if options["stereo_downmix"]:
                cmds.append([
                    "ffmpeg", "-y",
                    "-nostats", "-loglevel", "error",
                    "-i", fragment_prefix / "audio-front.wav",
                    "-i", fragment_prefix / "audio-center.wav",
                    "-i", fragment_prefix / "audio-rear.wav",
                    "-i", fragment_prefix / audio_back,
                    "-filter_complex",
                    "amix=inputs=4:duration=longest:dropout_transition=0",
                    '-ac', '2', fragment_prefix / "audio-stereo.wav"
                ])
                audio_front = "audio-stereo.wav"
                audio_back = "audio-stereo.wav"
                copy = '-c:v'

            cmds.append([
                "ffmpeg", "-y",
                "-nostats", "-loglevel", "error",
                "-i", fragment_prefix / "front.mp4",
                "-i", fragment_prefix / audio_front,
                copy, "copy",
                "-movflags", "+faststart",
                fragment_prefix / "front-mixed.mp4",
            ])
            cmds.append([
                "ffmpeg", "-y",
                "-nostats", "-loglevel", "error",
                "-i", fragment_prefix / "back.mp4",
                "-i", fragment_prefix / audio_back,
                "-c:v", "copy",
                "-movflags", "+faststart",
                fragment_prefix / "back-audio.mp4",
            ])
            for cmd in cmds:
                if options["debug"]:
                    print(" ".join([str(x) for x in cmd]))
                subprocess.call(cmd)

            for a, b in (
                ("back-audio.mp4", "back.mp4"),
                ("front-mixed.mp4", "front.mp4"),
            ):
                duration_a = ox.avinfo(str(fragment_prefix / a))['duration']
                duration_b = ox.avinfo(str(fragment_prefix / b))['duration']
                if duration_a != duration_b:
                    print('!!', duration_a, fragment_prefix / a)
                    print('!!', duration_b, fragment_prefix / b)
                    sys.exit(-1)
            shutil.move(fragment_prefix / "back-audio.mp4", fragment_prefix / "back.mp4")
            shutil.move(fragment_prefix / "front-mixed.mp4", fragment_prefix / "front.mp4")
            if options["keep_audio"]:
                shutil.move(fragment_prefix / "audio-center.wav", fragment_prefix / "vocals.wav")
                shutil.move(fragment_prefix / "audio-front.wav", fragment_prefix / "foley.wav")
                shutil.move(fragment_prefix / "audio-back.wav", fragment_prefix / "original.wav")
            for fn in (
                "audio-5.1.mp4",
                "audio-center.wav", "audio-rear.wav",
                "audio-front.wav", "audio-back.wav", "back-audio.mp4",
                "fl.wav", "fr.wav", "fc.wav", "lfe.wav", "bl.wav", "br.wav",
                "audio-stereo.wav",
            ):
                fn = fragment_prefix / fn
                if os.path.exists(fn):
                    os.unlink(fn)

    if options["single_file"]:
        cmds = []
        base_prefix = Path(base_prefix)
        for timeline in (
            "front",
            "back",
            "audio-back",
            "audio-center",
            "audio-front",
            "audio-rear",
        ):
            timelines = list(sorted(glob('%s/*/%s.kdenlive' % (base_prefix, timeline))))
            ext = '.mp4'
            if '/audio' in timelines[0]:
                ext = '.wav'
            out = base_prefix / (timeline + ext)
            cmd = get_melt() + timelines + [
                '-quiet',
                '-consumer', 'avformat:%s' % out,
            ]
            if ext == '.wav':
                cmd += ['vn=1']
            else:
                cmd += ['an=1']
                cmd += ['vcodec=libx264', 'x264opts=keyint=1', 'crf=15']
            cmds.append(cmd)
        for src, out1, out2 in (
            ("audio-front.wav", "fl.wav", "fr.wav"),
            ("audio-center.wav", "fc.wav", "lfe.wav"),
            ("audio-rear.wav", "bl.wav", "br.wav"),
        ):
            cmds.append([
                "ffmpeg", "-y",
                "-nostats", "-loglevel", "error",
                "-i", base_prefix / src,
                "-filter_complex",
                "[0:0]pan=1|c0=c0[left]; [0:0]pan=1|c0=c1[right]",
                "-map", "[left]", base_prefix / out1,
                "-map", "[right]", base_prefix / out2,
            ])
        cmds.append([
            "ffmpeg", "-y",
            "-nostats", "-loglevel", "error",
            "-i", base_prefix / "fl.wav",
            "-i", base_prefix / "fr.wav",
            "-i", base_prefix / "fc.wav",
            "-i", base_prefix / "lfe.wav",
            "-i", base_prefix / "bl.wav",
            "-i", base_prefix / "br.wav",
            "-filter_complex", "[0:a][1:a][2:a][3:a][4:a][5:a]amerge=inputs=6[a]",
            "-map", "[a]", "-c:a", "aac", base_prefix / "audio-5.1.mp4"
        ])
        cmds.append([
            "ffmpeg", "-y",
            "-nostats", "-loglevel", "error",
            "-i", base_prefix / "front.mp4",
            "-i", base_prefix / "audio-5.1.mp4",
            "-c", "copy",
            "-movflags", "+faststart",
            base_prefix / "front-mixed.mp4",
        ])
        cmds.append([
            "ffmpeg", "-y",
            "-nostats", "-loglevel", "error",
            "-i", base_prefix / "back.mp4",
            "-i", base_prefix / "audio-back.wav",
            "-c:v", "copy",
            "-movflags", "+faststart",
            base_prefix / "back-audio.mp4",
        ])
        for cmd in cmds:
            if options["debug"]:
                print(" ".join([str(x) for x in cmd]))
            subprocess.call(cmd)

        for a, b in (
            ("back-audio.mp4", "back.mp4"),
            ("front-mixed.mp4", "back.mp4"),
        ):
            duration_a = ox.avinfo(str(base_prefix / a))['duration']
            duration_b = ox.avinfo(str(base_prefix / b))['duration']
            if duration_a != duration_b:
                print('!!', duration_a, base_prefix / a)
                print('!!', duration_b, base_prefix / b)
                sys.exit(-1)
        shutil.move(base_prefix / "back-audio.mp4", base_prefix / "back.mp4")
        shutil.move(base_prefix / "front-mixed.mp4", base_prefix / "front.mp4")
        if options["keep_audio"]:
            shutil.move(base_prefix / "audio-center.wav", base_prefix / "vocals.wav")
            shutil.move(base_prefix / "audio-front.wav", base_prefix / "foley.wav")
            shutil.move(base_prefix / "audio-back.wav", base_prefix / "original.wav")
        for fn in (
            "audio-5.1.mp4",
            "audio-center.wav", "audio-rear.wav",
            "audio-front.wav", "audio-back.wav", "back-audio.mp4",
            "fl.wav", "fr.wav", "fc.wav", "lfe.wav", "bl.wav", "br.wav",
        ):
            fn = base_prefix / fn
            if os.path.exists(fn):
                os.unlink(fn)
        join_subtitles(base_prefix, options)

    print("Duration - Target: %s Actual: %s" % (target_position, position))
    print(json.dumps(dict(stats), sort_keys=True, indent=2))
    with open(_cache, "w") as fd:
        json.dump(_CACHE, fd)


def add_translations(sub, lang):
    value = sub.value.replace('<br/>', '<br>').replace('<br>\n', '\n').replace('<br>', '\n').strip()
    if sub.languages:
        value = ox.strip_tags(value)
    if lang:
        for slang in lang:
            if slang == "en":
                slang = None
            for tsub in sub.item.annotations.filter(layer="subtitles", start=sub.start, end=sub.end, languages=slang):
                tvalue = tsub.value.replace('<br/>', '<br>').replace('<br>\n', '\n').replace('<br>', '\n').strip()
                if tsub.languages:
                    tvalue = ox.strip_tags(tvalue)
                value += '\n' + tvalue
    return value

def add_translations_dict(sub, langs):
    values = {}
    value = sub.value.replace('<br/>', '<br>').replace('<br>\n', '\n').replace('<br>', '\n').strip()
    if sub.languages:
        value = ox.strip_tags(value)
        values[sub.languages] = value
    else:
        values["en"] = value
    for slang in langs:
        slang_value = None if slang == "en" else slang
        if sub.languages == slang_value:
            continue

        for tsub in sub.item.annotations.filter(
            layer="subtitles", start=sub.start, end=sub.end,
            languages=slang_value
        ):
            tvalue = tsub.value.replace('<br/>', '<br>').replace('<br>\n', '\n').replace('<br>', '\n').strip()
            if tsub.languages:
                tvalue = ox.strip_tags(tvalue)
            values[slang] = tvalue
    return values


def get_srt(sub, offset, lang, tlang):
    sdata = sub.json(keys=['in', 'out', 'value'])
    sdata['value'] = sdata['value'].replace('<br/>', '<br>').replace('<br>\n', '\n').replace('<br>', '\n').strip()
    if tlang:
        sdata['value'] = add_translations(sub, tlang)
    langs = [lang]
    if tlang:
        langs += tlang
    sdata['values'] = add_translations_dict(sub, langs)
    if offset:
        sdata["in"] += offset
        sdata["out"] += offset
    return sdata

def fix_overlaps(data):
    previous = None
    for sub in data:
        if previous is None:
            previous = sub
        else:
            if sub['in'] < previous['out']:
                previous['out'] = sub['in'] - 0.001
            previous = sub
    return data

def shift_clips(data, offset):
    for clip in data:
        clip['in'] += offset
        clip['out'] += offset

def scene_subtitles(scene, options):
    import item.models
    offset = 0
    subs = []
    lang, tlang = parse_lang(options["lang"])
    for clip in scene['audio-center']['A1']:
        if not clip.get("blank"):
            batch, fragment_id = clip['src'].replace('.wav', '').split('/')[-2:]
            vo = item.models.Item.objects.filter(
                data__batch__icontains=batch, data__title__startswith=fragment_id + '_'
            ).first()
            if vo:
                #print("%s => %s %s" % (clip['src'], vo, vo.get('batch')))
                for sub in vo.annotations.filter(
                    layer="subtitles"
                ).filter(
                    languages=None if lang == "en" else lang
                ).exclude(value="").order_by("start"):
                    sdata = get_srt(sub, offset, lang, tlang)
                    subs.append(sdata)
            else:
                print("could not find vo for %s" % clip['src'])
        offset += clip['duration']
    return subs


def load_defaults(options):
    path = os.path.join(options["prefix"], "options.json")
    if os.path.exists(path):
        with open(path) as fd:
            defaults = json.loads(fd)
        for key in defaults:
            if key not in options:
                options[key] = defaults[key]
    return options


def update_subtitles(options):
    import item.models

    options = load_defaults(options)
    prefix = Path(options['prefix'])
    base = int(options['offset'])
    lang, tlang = parse_lang(options["lang"])

    _cache = os.path.join(prefix, "cache.json")
    if os.path.exists(_cache):
        with open(_cache) as fd:
            _CACHE.update(json.load(fd))

    base_prefix = prefix / 'render' / str(base)
    for folder in os.listdir(base_prefix):
        folder = base_prefix / folder
        scene_json = folder / "scene.json"
        if not os.path.exists(scene_json):
            continue
        with open(scene_json) as fd:
            scene = json.load(fd)
        subs = scene_subtitles(scene, options)
        write_subtitles(subs, folder, options)

def ass_encode(subs, options):
    if "lang" in options:
        langs = options["lang"].split(',')
    else:
        langs = list(subs[0]["values"])
    print('ass_encode', langs, options)
    print(subs)

    header = '''[Script Info]
ScriptType: v4.00+
PlayResX: 1920
PlayResY: 1080
ScaledBorderAndShadow: yes
YCbCr Matrix: None

[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
'''
    ass = header
    offset = 10
    height = 42
    styles = []
    for lang in reversed(langs):
        font = 'SimHei' if lang in ('zh', 'jp') else 'Menlo'
        size = 46 if font == 'SimHei' else 42
        styles.append(
            f'Style: {lang},{font},{size},&Hffffff,&Hffffff,&H0,&H0,0,0,0,0,100,100,0,0,1,1,0,2,10,10,{offset},1'
        )
        offset += size + 20
    ass += '\n'.join(reversed(styles)) + '\n'
    events = [
        'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text'
    ]
    for sub in subs:
        start = ox.format_timecode(sub['in']).rstrip('0')
        stop = ox.format_timecode(sub['out']).rstrip('0')
        for lang in reversed(langs):
            value = sub['values'][lang]
            event = f'Dialogue: 0,{start},{stop},{lang},,0,0,0,,{value}'
            events.append(event)
    ass += '\n\n[Events]\n' + '\n'.join(events) + '\n'
    return ass

def update_m3u(render_prefix, exclude=[]):
    files = ox.sorted_strings(glob(render_prefix + "*/*/back.mp4"))
    for ex in exclude:
        files = [f for f in files if not f.startswith(ex + "/")]
    back_m3u = "\n".join(files)
    back_m3u = back_m3u.replace(render_prefix, "")
    front_m3u = back_m3u.replace("back.mp4", "front.mp4")

    back_m3u_f = render_prefix + "back.m3u"
    front_m3u_f = render_prefix + "front.m3u"

    with open(back_m3u_f + "_", "w") as fd:
        fd.write(back_m3u)
    with open(front_m3u_f + "_", "w") as fd:
        fd.write(front_m3u)
    shutil.move(front_m3u_f + "_", front_m3u_f)
    cmd = ["scp", front_m3u_f, "front:" + front_m3u_f]
    subprocess.check_call(cmd)
    shutil.move(back_m3u_f + "_", back_m3u_f)


def render_infinity(options):
    prefix = options['prefix']
    duration = int(options['duration'])

    defaults = {
        "offset": 100,
        "max-items": 30,
        "no_video": False,
    }
    state_f = os.path.join(prefix, "infinity.json")
    if os.path.exists(state_f):
        with open(state_f) as fd:
            state = json.load(fd)
    else:
        state = {}
    for key in ("prefix", "duration", "debug", "single_file", "keep_audio", "stereo_downmix"):
        state[key] = options[key]

    for key in defaults:
        if key not in state:
            state[key] = defaults[key]

    while True:
        render_prefix = state["prefix"] + "/render/"
        current = [
            f for f in os.listdir(render_prefix)
            if f.isdigit() and os.path.isdir(render_prefix + f) and state["offset"] > int(f) >= 100
        ]
        if len(current) > state["max-items"]:
            current = ox.sorted_strings(current)
            remove = current[:-state["max-items"]]
            update_m3u(render_prefix, exclude=remove)
            for folder in remove:
                folder = render_prefix + folder
                print("remove", folder)
                shutil.rmtree(folder)
                cmd = ["ssh", "front", "rm", "-rf", folder]
                #print(cmd)
                subprocess.check_call(cmd)
        render_all(state)
        path = "%s%s/" % (render_prefix, state["offset"])
        cmd = ['rsync', '-a', path, "front:" + path]
        subprocess.check_call(cmd)
        update_m3u(render_prefix)
        state["offset"] += 1
        with open(state_f + "~", "w") as fd:
            json.dump(state, fd, indent=2)
        shutil.move(state_f + "~", state_f)


def join_subtitles(base_prefix, options):
    '''
    subtitles = list(sorted(glob('%s/*/front.srt' % base_prefix)))
    data = []
    position = 0
    for srt in subtitles:
        scene = srt.replace('front.srt', 'scene.json')
        data += ox.srt.load(srt, offset=position)
        position += get_scene_duration(scene)
    with open(base_prefix / 'front.srt', 'wb') as fd:
        fd.write(ox.srt.encode(data))
    '''
    scenes = list(sorted(glob('%s/*/scene.json' % base_prefix)))
    data = []
    position = 0
    for scene in scenes:
        subs = scene_subtitles(scene, options)
        data += shift_clips(subs, position)
        position += get_scene_duration(scene)
    write_subtitles(data, base_prefix, options)

def resolve_roman(s):
    extra = re.compile(r'^\d+(.*?)$').findall(s)
    if extra:
        extra = extra[0].lower()
        new = {
            'i': '1', 'ii': '2', 'iii': '3', 'iv': '4', 'v': '5',
            'vi': '6', 'vii': 7, 'viii': '8', 'ix': '9', 'x': '10'
        }.get(extra, extra)
        return s.replace(extra, new)
    return s

def generate_clips(options):
    import item.models
    import itemlist.models

    prefix = options['prefix']
    lang, tlang = parse_lang(options["lang"])
    if options['censored']:
        censored_list = itemlist.models.List.get(options["censored"])
        censored = list(censored_list.get_items(
            censored_list.user
        ).all().values_list('public_id', flat=True))
    clips = []
    for i in item.models.Item.objects.filter(sort__type='original'):
        original_target = ""
        qs = item.models.Item.objects.filter(data__title=i.data['title']).exclude(id=i.id)
        if qs.count() >= 1:
            clip = {}
            durations = []
            for e in item.models.Item.objects.filter(data__title=i.data['title']):
                if 'type' not in e.data:
                    print("ignoring invalid video %s (no type)" % e)
                    continue
                if not e.files.filter(selected=True).exists():
                    continue
                source = e.files.filter(selected=True)[0].data.path
                ext = os.path.splitext(source)[1]
                type_ = e.data['type'][0].lower()
                target = os.path.join(prefix, type_, i.data['title'] + ext)
                os.makedirs(os.path.dirname(target), exist_ok=True)
                if os.path.islink(target):
                    os.unlink(target)
                os.symlink(source, target)
                if type_ == "original":
                    original_target = target
                if options['censored'] and e.public_id in censored:
                    clip[type_ + "_censored"] = target
                    target = '/srv/t_for_time/censored.mp4'
                clip[type_] = target
                durations.append(e.files.filter(selected=True)[0].duration)
            clip["duration"] = min(durations)
            if not clip["duration"]:
                print('!!', durations, clip)
                continue
            cd = format_duration(clip["duration"], 24)
            #if cd != clip["duration"]:
            #    print(clip["duration"], '->', cd, durations, clip)
            clip["duration"] = cd
            clip['tags'] = i.data.get('tags', [])
            clip['editingtags'] = i.data.get('editingtags', [])
            name = os.path.basename(original_target)
            seqid = re.sub(r"Hotel Aporia_(\d+)", "S\\1_", name)
            seqid = re.sub(r"Night March_(\d+)", "S\\1_", seqid)
            seqid = re.sub(r"_(\d+)H_(\d+)", "_S\\1\\2_", seqid)
            seqid = seqid.split('_')[:2]
            seqid = [b[1:] if b[0] in ('B', 'S') else '0' for b in seqid]
            seqid[1] = resolve_roman(seqid[1])
            seqid[1] = ''.join([b for b in seqid[1] if b.isdigit()])
            if not seqid[1]:
                seqid[1] = '0'
            try:
                clip['seqid'] = int(''.join(['%06d' % int(b) for b in seqid]))
            except:
                print(name, seqid, 'failed')
                raise
            if "original" in clip and "foreground" in clip and "background" in clip:
                clips.append(clip)
            elif "original" in clip and "animation" in clip:
                clips.append(clip)
            else:
                print("ignoring incomplete video", i)

    with open(os.path.join(prefix, 'clips.json'), 'w') as fd:
        json.dump(clips, fd, indent=2, ensure_ascii=False)

    print("using", len(clips), "clips")

    voice_over = defaultdict(dict)
    for vo in item.models.Item.objects.filter(
        data__type__contains="Voice Over",
    ):
        fragment_id = int(vo.get('title').split('_')[0])
        source = vo.files.filter(selected=True)[0]
        batch = vo.get('batch')[0].replace('Text-', '')
        src = source.data.path
        target = os.path.join(prefix, 'voice_over', batch, '%s.wav' % fragment_id)
        os.makedirs(os.path.dirname(target), exist_ok=True)
        if os.path.islink(target):
            os.unlink(target)
        os.symlink(src, target)
        subs = []
        for sub in vo.annotations.filter(
            layer="subtitles", languages=lang
        ).exclude(value="").order_by("start"):
            sdata = get_srt(sub, 0, lang, tlang)
            subs.append(sdata)
        voice_over[fragment_id][batch] = {
            "src": target,
            "duration": format_duration(source.duration, 24),
            "subs": subs
        }
    with open(os.path.join(prefix, 'voice_over.json'), 'w') as fd:
        json.dump(voice_over, fd, indent=2, ensure_ascii=False)

    if options['censored']:
        censored_mp4 = '/srv/t_for_time/censored.mp4'
        if not os.path.exists(censored_mp4):
            cmd = [
                "ffmpeg",
                "-nostats", "-loglevel", "error",
                "-f", "lavfi",
                "-i", "color=color=white:size=1920x1080:rate=24",
                "-t", "3600",
                "-c:v", "libx264",
                "-pix_fmt", "yuv420p",
                censored_mp4
            ]
            subprocess.call(cmd)