pandora_p_for_power/render.py

#!/usr/bin/python3
from collections import defaultdict
from glob import glob
import json
import os
import re
import shutil
import subprocess
import sys
import time
from pathlib import Path

import ox

from .pi import random
from .render_kdenlive import KDEnliveProject, _CACHE, get_melt
from .utils import resolve_roman, write_if_new, format_duration
from .render_utils import *

default_prefix = "/srv/p_for_power"


def get_loudnorm(file):
    if "loudnorm" in file.info:
        return file.info["loudnorm"]
    source = file.data.path
    cmd = [
        "ffmpeg",
        "-i", source,
        "-vn",
        "-af", "loudnorm=print_format=json",
        "-f", "null",
        "-"
    ]
    result = subprocess.run(cmd, capture_output=True, text=True)
    json_match = re.search(r"\{[\s\S]*\}", result.stderr)
    if not json_match:
        raise RuntimeError("Could not find loudnorm JSON output in ffmpeg output")
    loudnorm_data = json.loads(json_match.group(0))

    input_i = float(loudnorm_data.get("input_i", 0))            # Integrated loudness
    input_lra = float(loudnorm_data.get("input_lra", 0))        # Loudness range
    input_tp = float(loudnorm_data.get("input_tp", 0))          # True peak
    input_thresh = float(loudnorm_data.get("input_thresh", 0))  # Threshold
    loudnorm = f"L: {input_i:.6f}\tR: {input_lra:.6f}\tP {input_tp:.6f}"
    file.info["loudnorm"] = loudnorm
    file.save()
    return loudnorm


def compose(clips, fragment, target=150, base=1024, voice_over=None, options=None):
    if options is None:
        options = {}
    fps = 24
    length = 0
    scene = {
        'front': {
            'V1': [],
            'V2': [],
        },
        'audio-center': {
            'A1': [],
            'A2': [],
        },
        'audio-front': {
            'A1': [],
            'A2': [],
            'A3': [],
            'A4': [],
        },
        'audio-rear': {
            'A1': [],
            'A2': [],
            'A3': [],
            'A4': [],
        },
    }
    all_clips = clips.copy()
    seq = random(10000 + base * 1000)
    used = []

    selected_clips_length = 0
    ai_length = 0
    selected_clips = []

    tags = []
    while selected_clips_length < target * 1.1:
        if not tags:
            tags = fragment["tags"].copy()
        tag = random_choice(seq, tags, pop=True)

        non_ai_clips = []
        ai_clips = []
        for clip in clips:
            if tag in clip["tags"]:
                if 'ai' in clip:
                    ai_clips.append(clip)
                else:
                    non_ai_clips.append(clip)
        if ai_length < target * 0.6 and ai_clips:
            clip = random_choice(seq, ai_clips, pop=True)
            clip["use_ai"] = True
            selected_clips.append(clip)
            selected_clips_length += clip['duration']
            ai_length += clip['duration']
            clips = [c for c in clips if c['id'] != clip['id']]
            continue

        available_clips = non_ai_clips + ai_clips
        if available_clips:
            clip = random_choice(seq, available_clips, pop=True)
            clip["use_ai"] = False
            selected_clips.append(clip)
            selected_clips_length += clip['duration']
            clips = [c for c in clips if c['id'] != clip['id']]

    clips = selected_clips
    clip = None
    while target - length > 0 and clips:
        '''
        if clip:
            if chance(seq, 0.5):
                next_seqid = clip['seqid'] + 1
                clip = get_clip_by_seqid(clips, next_seqid)
            else:
                clip = None
        '''
        clip = None
        if not clip:
            # FIXME: while not all clips have AI versions make sure we have one 50% of the time
            clip = random_choice(seq, clips, True)
        next_length = length + clip['duration']
        if target - next_length < -target*0.1:
            break
        clip_duration = format_duration(clip['duration'], fps)
        if clip['duration'] != clip_duration:
            print("WTF", clip, clip['duration'], clip_duration)
        length += clip_duration

        # 50/50 source or ai
        src = clip['source']
        audio = clip['source']
        # select ai if we have one
        if 'ai' in clip and clip.get("use_ai"):
            src = random_choice(seq, list(clip['ai'].values()), False)

        print('%07.3f-%07.3f %07.3f %s (%s)' % (
            length-clip_duration,
            length,
            clip_duration,
            os.path.basename(clip['source']),
            src.split('/')[-2]
        ))

        scene['front']['V2'].append({
            'duration': clip_duration,
            'src': src,
            "filter": {
            }
        })

        volume_front = '-17'
        if clip.get('volume') is not None:
            volume_front = '%0.2f' % (float(volume_front) + clip['volume'])

        '''
            'dynamic_loudness': [
                ["target_loudness", "-35"],
                ["min_gain", "-15"],
                ["max_gin", "15"],
            ],
        '''

        audio_filter = {
            'mono': [
                ["channels", "2"],
            ],
            'loudness': [
                ["program", "-17"],
                ["results", clip["loudnorm"]],
            ],
            'volume': volume_front,
            'fadein': '00:00:00.125'
        }
        scene['audio-front']['A2'].append({
            'duration': clip_duration,
            'src': audio,
            'filter': audio_filter.copy()
        })

        length = format_duration(length, fps)
        ad = get_scene_duration(scene, track='audio-front:A2')
        vd = get_scene_duration(scene, track='front:V2')
        if ad == vd and abs(ad-length) > 1/48:
            print('v: ', vd, 'ad', ad, 'length:', length, 'fixup')
            length = ad
        if abs(length -vd) > 1/48 or abs(length - ad) > 1/48 or ad != vd:
            print('vd: ', vd, 'ad', ad, 'length:', length)
            print(clip)
            sys.exit(-1)
        used.append(clip)
        if not clips and target - length > 0:
            print("not enough clips, need to reset")
            used_ids = {c['id'] for c in used}
            clips = [c for c in all_clips if c != clip and c['id'] not in used_ids]
            if not clips:
                print("not enough clips, also consider used")
                clips = [c for c in all_clips if c != clip]
            if not clips:
                print("not enough clips, also consider last clip")
                clips = all_clips.copy()
            for clip in clips:
                if "ai" in clip:
                    clip["use_ai"] = True

    scene_duration = int(round(get_scene_duration(scene) * fps))
    voice_overs = []
    sub_offset = 0
    subs = []
    print("--")
    print("Voice Over:")
    if voice_over:
        vo_keys = list(sorted(voice_over))
        while int(sub_offset * fps) < scene_duration and vo_keys:
            if sub_offset:
                gap = (5 * fps + random_int(seq, 10 * fps)) / fps
            else:
                gap = (2 * fps + random_int(seq, 5 * fps)) / fps
            gap = format_duration(gap, fps)
            if int((sub_offset + gap) * fps) > scene_duration:
                gap = format_duration((scene_duration - int(sub_offset * fps)) / fps, fps)
            for tl, track in (
                ('audio-center', 'A1'),
                ('audio-center', 'A2'),
                ('audio-rear', 'A1'),
                ('audio-rear', 'A2'),
            ):
                scene[tl][track].append({
                    'blank': True,
                    'duration': gap
                })
            print('%07.3f-%07.3f %07.3f' % (sub_offset, sub_offset+gap, gap), 'silence')
            sub_offset += gap

            vo_key = random_choice(seq, vo_keys, pop=True)
            variant = random_int(seq, len(voice_over[vo_key]))
            vo = voice_over[vo_key][variant]
            if isinstance(vo, list):
                vo, vo_b = vo

            else:
                vo_b = None
            while int((vo['duration'] + sub_offset) * fps) > scene_duration:
                if not vo_keys:
                    vo = None
                    break
                vo_key = random_choice(seq, vo_keys, pop=True)
                variant = random_int(seq, len(voice_over[vo_key]))
                vo = voice_over[vo_key][variant]
                if isinstance(vo, list):
                    vo, vo_b = vo
                else:
                    vo_b = None
            if vo is None:
                break
            print('%07.3f-%07.3f %07.3f' % (sub_offset, sub_offset+vo["duration"], vo["duration"]), vo["src"].split('/')[-1])
            voice_overs.append(vo)
            voc = vo.copy()
            a, b = '-11', '-3'
            if options.get('stereo_downmix'):
                a, b = '-9', '-1'
            voc['filter'] = {'volume': a}
            scene['audio-center']['A1'].append(voc)
            vo_low = vo.copy()
            vo_low['filter'] = {'volume': b}
            scene['audio-rear']['A1'].append(vo_low)
            for sub in voc.get("subs", []):
                sub = sub.copy()
                sub["in"] += sub_offset
                sub["out"] += sub_offset
                subs.append(sub)
            if vo_b:
                vo_b = vo_b.copy()
                vo_b['filter'] = {'volume': a}
                scene['audio-center']['A2'].append(vo_b)
                vo_b = vo_b.copy()
                vo_b['filter'] = {'volume': b}
                scene['audio-rear']['A2'].append(vo_b)
            else:
                for tl, track in (
                    ('audio-center', 'A2'),
                    ('audio-rear', 'A2'),
                ):
                    scene[tl][track].append({
                        'blank': True,
                        'duration': voc["duration"]
                    })
            sub_offset += voc["duration"]
        if subs:
            scene["subtitles"] = subs
        sub_offset = format_duration(sub_offset, fps)

    if sub_offset < scene_duration/fps:
        gap = scene_duration/fps - sub_offset
        print('%07.3f-%07.3f %07.3f' % (sub_offset, sub_offset+gap, gap), 'silence')
        scene['audio-center']['A1'].append({
            'blank': True,
            'duration': gap
        })
        scene['audio-rear']['A1'].append({
            'blank': True,
            'duration': gap
        })
        sub_offset += gap
    '''
    print("scene duration: %0.3f vo: %0.3f (length: %0.3f, target: %0.3f)" % (
        get_scene_duration(scene),
        sub_offset,
        length,
        target
    ))
    '''
    print("scene duration: %0.3f (target: %0.3f)" % (
        get_scene_duration(scene),
        target
    ))
    return scene, used

def write_subtitles(data, folder, options):
    data = fix_overlaps(data)
    path = folder / "front.srt"
    if options.get("subtitle_format") == "srt":
        srt = ox.srt.encode(data)
        write_if_new(str(path), srt, 'b')
        path = folder / "front.ass"
        if os.path.exists(path):
            os.unlink(path)
    else:
        if os.path.exists(path):
            os.unlink(path)
        path = folder / "front.ass"
        ass = ass_encode(data, options)
        write_if_new(str(path), ass, '')


def render(root, scene, prefix='', options=None):
    if options is None:
        options = {}
    fps = 24
    files = []
    scene_duration = int(get_scene_duration(scene) * fps)
    for timeline, data in scene.items():
        if timeline == "subtitles":
            folder = Path(root) / prefix
            write_subtitles(data, folder, options)
            continue
        #print(timeline)
        project = KDEnliveProject(root)

        tracks = []
        track_durations = {}
        for track, clips in data.items():
            #print(track)
            for clip in clips:
                project.append_clip(track, clip)
            track_durations[track] = sum([int(c['duration'] * fps) for c in clips])
        if timeline.startswith('audio-'):
            track_duration = project.get_duration()
            delta = scene_duration - track_duration
            if delta > 0:
                for track in track_durations:
                    if track_durations[track] == track_duration:
                        project.append_clip(track, {'blank': True, "duration": delta/fps})

        path = os.path.join(root, prefix + "%s.kdenlive" % timeline)
        project_xml = project.to_xml()
        write_if_new(path, project_xml)

        if options["debug"]:
            # check duration
            out_duration = get_project_duration(path)
            p_duration = project.get_duration()
            print(path, 'out: %s, project: %s, scene: %s' %(out_duration, p_duration, scene_duration))
            if p_duration != scene_duration:
                print(path, 'FAIL project: %s, scene: %s' %(p_duration, scene_duration))
                _cache = os.path.join(root, "cache.json")
                with open(_cache, "w") as fd:
                    json.dump(_CACHE, fd)
                sys.exit(1)
            if out_duration != p_duration:
                print(path, 'fail got: %s expected: %s' %(out_duration, p_duration))
                sys.exit(1)

        files.append(path)
    return files

def get_fragments(clips, voice_over, prefix):
    import itemlist.models
    import item.models

    fragments = []

    for l in itemlist.models.List.objects.filter(status='featured').order_by('name'):
        if l.name.split(' ')[0].isdigit():
            fragment_id = l.name.split(' ')[0]
            fragment = {
                'id': fragment_id,
                'name': l.name,
                'tags': [],
                'anti-tags': [],
                'description': l.description
            }
            for con in l.query['conditions']:
                if "conditions" in con:
                    for sub in con["conditions"]:
                        if sub['key'] == "tags" and sub['operator'] == '==':
                            fragment['tags'].append(sub['value'])
                        elif sub['key'] == "tags" and sub['operator'] == '!=':
                            fragment['tags'].append(sub['value'])
                        elif sub['key'] == 'type' and sub['value'] in ('source', ''):
                            pass
                        else:
                            print(l.name, 'unknown sub condition', sub)
                elif con.get('key') == "tags" and con['operator'] == '==':
                    fragment['tags'].append(con['value'])
                elif con.get('key') == "tags" and con['operator'] == '!=':
                    fragment['anti-tags'].append(con['value'])

            fragment["id"] = int(fragment['name'].split(' ')[0])
            sources = []
            for i in l.get_items(l.user):
                orig = i.files.filter(selected=True).first()
                if orig:
                    ext = os.path.splitext(orig.data.path)[1]
                    if 'type' not in i.data:
                        print("FIXME", i)
                        continue
                    type_ = i.data['type'][0].lower()
                    target = os.path.join(prefix, 'video', type_, i.data['title'] + ext)
                    sources.append(target)
            fragment['clips'] = []
            for clip in clips:
                #if set(clip['tags']) & set(fragment['tags']) and not set(clip['tags']) & set(fragment['anti-tags']):
                key = 'source'
                source = clip['source']
                if source in sources:
                    fragment['clips'].append(clip)
            fragment["voice_over"] = voice_over.get(fragment_id, {})
            fragments.append(fragment)
    fragments.sort(key=lambda f: ox.sort_string(f['name']))
    return fragments

def render_all(options):
    options = load_defaults(options)
    prefix = options['prefix']
    duration = int(options['duration'])
    base = int(options['offset'])

    _cache = os.path.join(prefix, "cache.json")
    if os.path.exists(_cache):
        with open(_cache) as fd:
            _CACHE.update(json.load(fd))

    with open(os.path.join(prefix, "clips.json")) as fd:
        clips = json.load(fd)
    with open(os.path.join(prefix, "voice_over.json")) as fd:
        voice_over = json.load(fd)
    fragments = get_fragments(clips, voice_over, prefix)
    with open(os.path.join(prefix, "fragments.json"), "w") as fd:
        json.dump(fragments, fd, indent=2, ensure_ascii=False)
    position = target_position = 0
    target = fragment_target = duration / len(fragments)
    base_prefix = os.path.join(prefix, 'render', str(base))
    clips_used = []

    stats = defaultdict(lambda: 0)
    fragment_base = base
    for fragment in fragments:
        fragment_base += 1
        fragment_id = int(fragment['name'].split(' ')[0])
        if options.get("fragment") and int(options["fragment"]) != fragment_id:
            continue
        name = fragment['name'].replace(' ', '_')
        if not fragment['clips']:
            print("skipping empty fragment", name)
            continue
        fragment_prefix = os.path.join(base_prefix, name)
        os.makedirs(fragment_prefix, exist_ok=True)
        fragment_clips = fragment['clips']
        used_ids = {c['id'] for c in clips_used}
        unused_fragment_clips = [c for c in fragment_clips if c['id'] not in clips_used]
        print('fragment clips', len(fragment_clips), 'unused', len(unused_fragment_clips))
        print('--')
        print('Video:')
        scene, used = compose(
            unused_fragment_clips,
            fragment=fragment,
            target=target,
            base=fragment_base,
            voice_over=fragment['voice_over'],
            options=options
        )
        clips_used += used
        scene_duration = get_scene_duration(scene)
        print("%s %6.3f -> %6.3f (%6.3f)" % (name, target, scene_duration, fragment_target))
        src = [a for a in scene['audio-rear']['A1'] if 'src' in a]
        if src:
            src = src[0]['src']
            stats[src.split('/')[-2]] += 1
        else:
            print("!! fixme, fragment without VO")

        position += scene_duration
        target_position += fragment_target
        if position > target_position:
            target = fragment_target - (position-target_position)
            print("adjusting target duration for next fragment: %6.3f -> %6.3f" % (fragment_target, target))
        elif position < target_position:
            target = target + 0.1 * fragment_target

        timelines = render(prefix, scene, fragment_prefix[len(prefix) + 1:] + '/', options)

        scene_json = json.dumps(scene, indent=2, ensure_ascii=False)
        write_if_new(os.path.join(fragment_prefix, 'scene.json'), scene_json)

        if not options['no_video'] and not options["single_file"]:
            for timeline in timelines:
                print(timeline)
                ext = '.mp4'
                if '/audio' in timeline:
                    ext = '.wav'
                cmd = get_melt() + [
                    timeline,
                    '-quiet',
                    '-consumer', 'avformat:%s' % timeline.replace('.kdenlive', ext),
                ]
                if ext == '.wav':
                    cmd += ['vn=1']
                else:
                    cmd += ['an=1']
                    #cmd += ['vcodec=libx264', 'x264opts=keyint=1', 'crf=15']
                subprocess.call(cmd)
                if ext == '.wav' and timeline.endswith('audio.kdenlive'):
                    cmd = [
                        'ffmpeg', '-y',
                        '-nostats', '-loglevel', 'error',
                        '-i',
                        timeline.replace('.kdenlive', ext),
                        timeline.replace('.kdenlive', '.mp4')
                    ]
                    subprocess.call(cmd)
                    os.unlink(timeline.replace('.kdenlive', ext))

            cmds = []
            fragment_prefix = Path(fragment_prefix)
            for src, out1, out2 in (
                ("audio-front.wav", "fl.wav", "fr.wav"),
                ("audio-center.wav", "fc.wav", "lfe.wav"),
                ("audio-rear.wav", "bl.wav", "br.wav"),
            ):
                cmds.append([
                    "ffmpeg", "-y",
                    "-nostats", "-loglevel", "error",
                    "-i", fragment_prefix / src,
                    "-filter_complex",
                    "[0:0]pan=1|c0=c0[left]; [0:0]pan=1|c0=c1[right]",
                    "-map", "[left]", fragment_prefix / out1,
                    "-map", "[right]", fragment_prefix / out2,
                ])
            cmds.append([
                "ffmpeg", "-y",
                "-nostats", "-loglevel", "error",
                "-i", fragment_prefix / "fl.wav",
                "-i", fragment_prefix / "fr.wav",
                "-i", fragment_prefix / "fc.wav",
                "-i", fragment_prefix / "lfe.wav",
                "-i", fragment_prefix / "bl.wav",
                "-i", fragment_prefix / "br.wav",
                "-filter_complex", "[0:a][1:a][2:a][3:a][4:a][5:a]amerge=inputs=6[a]",
                "-map", "[a]", "-c:a", "aac", fragment_prefix / "audio-5.1.mp4"
            ])
            audio_front = "audio-5.1.mp4"
            copy = '-c'
            if options["stereo_downmix"]:
                cmds.append([
                    "ffmpeg", "-y",
                    "-nostats", "-loglevel", "error",
                    "-i", fragment_prefix / "audio-front.wav",
                    "-i", fragment_prefix / "audio-center.wav",
                    "-i", fragment_prefix / "audio-rear.wav",
                    "-filter_complex",
                    "amix=inputs=4:duration=longest:dropout_transition=0",
                    '-ac', '2', fragment_prefix / "audio-stereo.wav"
                ])
                audio_front = "audio-stereo.wav"
                copy = '-c:v'

            cmds.append([
                "ffmpeg", "-y",
                "-nostats", "-loglevel", "error",
                "-i", fragment_prefix / "front.mp4",
                "-i", fragment_prefix / audio_front,
                copy, "copy",
                "-movflags", "+faststart",
                fragment_prefix / "front-mixed.mp4",
            ])
            for cmd in cmds:
                if options["debug"]:
                    print(" ".join([str(x) for x in cmd]))
                subprocess.call(cmd)

            for a, b in (
                ("front-mixed.mp4", "front.mp4"),
                ("audio-center.wav", "front.mp4"),
                ("audio-rear.wav", "front.mp4"),
                ("audio-front.wav", "front.mp4"),
                ("audio-5.1.mp4", "front.mp4"),
            ):
                duration_a = ox.avinfo(str(fragment_prefix / a))['duration']
                duration_b = ox.avinfo(str(fragment_prefix / b))['duration']
                if abs(duration_a - duration_b) > 1/48:
                    print('!!', duration_a, fragment_prefix / a)
                    print('!!', duration_b, fragment_prefix / b)
                    #sys.exit(-1)
            shutil.move(fragment_prefix / "front-mixed.mp4", fragment_prefix / "front.mp4")
            cleanup = [
                "audio-5.1.mp4",
                "audio-center.wav", "audio-rear.wav",
                "audio-front.wav",
                "fl.wav", "fr.wav", "fc.wav", "lfe.wav", "bl.wav", "br.wav",
                "audio-stereo.wav",
            ]
            if options["keep_audio"]:
                shutil.move(fragment_prefix / "audio-center.wav", fragment_prefix / "vocals.wav")
                shutil.move(fragment_prefix / "audio-front.wav", fragment_prefix / "foley.wav")
            else:
                cleanup += [
                    "vocals.wav",
                    "foley.wav"
                ]
            for fn in cleanup:
                fn = fragment_prefix / fn
                if os.path.exists(fn):
                    os.unlink(fn)

    if options["single_file"]:
        cmds = []
        base_prefix = Path(base_prefix)
        for timeline in (
            "front",
            "audio-center",
            "audio-front",
            "audio-rear",
        ):
            timelines = list(sorted(glob('%s/*/%s.kdenlive' % (base_prefix, timeline))))
            ext = '.mp4'
            if '/audio' in timelines[0]:
                ext = '.wav'
            out = base_prefix / (timeline + ext)
            cmd = get_melt() + timelines + [
                '-quiet',
                '-consumer', 'avformat:%s' % out,
            ]
            if ext == '.wav':
                cmd += ['vn=1']
            else:
                cmd += ['an=1']
                cmd += ['vcodec=libx264', 'x264opts=keyint=1', 'crf=15']
            cmds.append(cmd)
        for src, out1, out2 in (
            ("audio-front.wav", "fl.wav", "fr.wav"),
            ("audio-center.wav", "fc.wav", "lfe.wav"),
            ("audio-rear.wav", "bl.wav", "br.wav"),
        ):
            cmds.append([
                "ffmpeg", "-y",
                "-nostats", "-loglevel", "error",
                "-i", base_prefix / src,
                "-filter_complex",
                "[0:0]pan=1|c0=c0[left]; [0:0]pan=1|c0=c1[right]",
                "-map", "[left]", base_prefix / out1,
                "-map", "[right]", base_prefix / out2,
            ])
        cmds.append([
            "ffmpeg", "-y",
            "-nostats", "-loglevel", "error",
            "-i", base_prefix / "fl.wav",
            "-i", base_prefix / "fr.wav",
            "-i", base_prefix / "fc.wav",
            "-i", base_prefix / "lfe.wav",
            "-i", base_prefix / "bl.wav",
            "-i", base_prefix / "br.wav",
            "-filter_complex", "[0:a][1:a][2:a][3:a][4:a][5:a]amerge=inputs=6[a]",
            "-map", "[a]", "-c:a", "aac", base_prefix / "audio-5.1.mp4"
        ])
        cmds.append([
            "ffmpeg", "-y",
            "-nostats", "-loglevel", "error",
            "-i", base_prefix / "front.mp4",
            "-i", base_prefix / "audio-5.1.mp4",
            "-c", "copy",
            "-movflags", "+faststart",
            base_prefix / "front-mixed.mp4",
        ])
        for cmd in cmds:
            if options["debug"]:
                print(" ".join([str(x) for x in cmd]))
            subprocess.call(cmd)

        shutil.move(base_prefix / "front-mixed.mp4", base_prefix / "front.mp4")
        if options["keep_audio"]:
            shutil.move(base_prefix / "audio-center.wav", base_prefix / "vocals.wav")
            shutil.move(base_prefix / "audio-front.wav", base_prefix / "foley.wav")
        for fn in (
            "audio-5.1.mp4",
            "audio-center.wav", "audio-rear.wav",
            "audio-front.wav",
            "fl.wav", "fr.wav", "fc.wav", "lfe.wav", "bl.wav", "br.wav",
        ):
            fn = base_prefix / fn
            if os.path.exists(fn):
                os.unlink(fn)
        join_subtitles(base_prefix, options)

    print("Duration - Target: %s Actual: %s" % (target_position, position))
    print(json.dumps(dict(stats), sort_keys=True, indent=2))
    with open(_cache, "w") as fd:
        json.dump(_CACHE, fd)


def add_translations(sub, lang):
    value = sub.value.replace('<br/>', '<br>').replace('<br>\n', '\n').replace('<br>', '\n').strip()
    if sub.languages:
        value = ox.strip_tags(value)
    if lang:
        for slang in lang:
            if slang == "en":
                slang = None
            for tsub in sub.item.annotations.filter(layer="subtitles", start=sub.start, end=sub.end, languages=slang):
                tvalue = tsub.value.replace('<br/>', '<br>').replace('<br>\n', '\n').replace('<br>', '\n').strip()
                if tsub.languages:
                    tvalue = ox.strip_tags(tvalue)
                value += '\n' + tvalue
    return value

def add_translations_dict(sub, langs):
    values = {}
    value = sub.value.replace('<br/>', '<br>').replace('<br>\n', '\n').replace('<br>', '\n').strip()
    if sub.languages:
        value = ox.strip_tags(value)
        values[sub.languages] = value
    else:
        values["en"] = value
    for slang in langs:
        slang_value = None if slang == "en" else slang
        if sub.languages == slang_value:
            continue

        for tsub in sub.item.annotations.filter(
            layer="subtitles", start=sub.start, end=sub.end,
            languages=slang_value
        ):
            tvalue = tsub.value.replace('<br/>', '<br>').replace('<br>\n', '\n').replace('<br>', '\n').strip()
            if tsub.languages:
                tvalue = ox.strip_tags(tvalue)
            values[slang] = tvalue
    return values

def get_srt(sub, offset, lang, tlang):
    sdata = sub.json(keys=['in', 'out', 'value'])
    sdata['value'] = sdata['value'].replace('<br/>', '<br>').replace('<br>\n', '\n').replace('<br>', '\n').strip()
    if tlang:
        sdata['value'] = add_translations(sub, tlang)
    langs = [lang]
    if tlang:
        langs += tlang
    sdata['values'] = add_translations_dict(sub, langs)
    if offset:
        sdata["in"] += offset
        sdata["out"] += offset
    return sdata

def scene_subtitles(scene, options):
    import item.models
    offset = 0
    subs = []
    lang, tlang = parse_lang(options["lang"])
    for clip in scene['audio-center']['A1']:
        if not clip.get("blank"):
            batch, fragment_id = clip['src'].replace('.wav', '').split('/')[-2:]
            vo = item.models.Item.objects.filter(
                data__batch__icontains=batch, data__title__startswith=fragment_id + '_'
            ).first()
            if vo:
                #print("%s => %s %s" % (clip['src'], vo, vo.get('batch')))
                for sub in vo.annotations.filter(
                    layer="subtitles"
                ).filter(
                    languages=None if lang == "en" else lang
                ).exclude(value="").order_by("start"):
                    sdata = get_srt(sub, offset, lang, tlang)
                    subs.append(sdata)
            else:
                print("could not find vo for %s" % clip['src'])
        offset += clip['duration']
    return subs


def load_defaults(options):
    path = os.path.join(options["prefix"], "options.json")
    if os.path.exists(path):
        with open(path) as fd:
            defaults = json.load(fd)
        for key in defaults:
            if key not in options:
                options[key] = defaults[key]
    return options


def update_subtitles(options):
    import item.models

    options = load_defaults(options)
    prefix = Path(options['prefix'])
    base = int(options['offset'])
    lang, tlang = parse_lang(options["lang"])

    _cache = os.path.join(prefix, "cache.json")
    if os.path.exists(_cache):
        with open(_cache) as fd:
            _CACHE.update(json.load(fd))

    base_prefix = prefix / 'render' / str(base)
    for folder in os.listdir(base_prefix):
        folder = base_prefix / folder
        scene_json = folder / "scene.json"
        if not os.path.exists(scene_json):
            continue
        with open(scene_json) as fd:
            scene = json.load(fd)
        subs = scene_subtitles(scene, options)
        write_subtitles(subs, folder, options)

def update_m3u(render_prefix, exclude=[]):
    files = ox.sorted_strings(glob(render_prefix + "*/*/front.mp4"))
    for ex in exclude:
        files = [f for f in files if not f.startswith(ex + "/")]
    front_m3u = "\n".join(files)
    front_m3u = front_m3u.replace(render_prefix, "")

    front_m3u_f = render_prefix + "front.m3u"

    with open(front_m3u_f + "_", "w") as fd:
        fd.write(front_m3u)
    shutil.move(front_m3u_f + "_", front_m3u_f)

def render_infinity(options):
    options = load_defaults(options)
    prefix = options['prefix']
    duration = int(options['duration'])

    defaults = {
        "offset": 100,
        "max-items": 30,
        "no_video": False,
    }
    state_f = os.path.join(prefix, "infinity.json")
    if os.path.exists(state_f):
        with open(state_f) as fd:
            state = json.load(fd)
    else:
        state = {}
    for key in ("prefix", "duration", "debug", "single_file", "keep_audio", "stereo_downmix"):
        state[key] = options[key]

    for key in defaults:
        if key not in state:
            state[key] = defaults[key]

    while True:
        render_prefix = state["prefix"] + "/render/"
        current = [
            f for f in os.listdir(render_prefix)
            if f.isdigit() and os.path.isdir(render_prefix + f) and state["offset"] > int(f) >= 100
        ]
        if len(current) > state["max-items"]:
            current = ox.sorted_strings(current)
            remove = current[:-state["max-items"]]
            update_m3u(render_prefix, exclude=remove)
            for folder in remove:
                folder = render_prefix + folder
                print("remove", folder)
                shutil.rmtree(folder)
        render_all(state)
        update_m3u(render_prefix)
        state["offset"] += 1
        with open(state_f + "~", "w") as fd:
            json.dump(state, fd, indent=2)
        shutil.move(state_f + "~", state_f)


def join_subtitles(base_prefix, options):
    scenes = list(sorted(glob('%s/*/scene.json' % base_prefix)))
    data = []
    position = 0
    for scene_json in scenes:
        with open(scene_json) as fd:
            scene = json.load(fd)
        subs = scene_subtitles(scene, options)
        data += shift_clips(subs, position)
        position += get_scene_duration(scene)
    write_subtitles(data, base_prefix, options)

def generate_clips(options):
    import item.models
    import itemlist.models

    fps = 24
    options = load_defaults(options)
    prefix = options['prefix']
    lang, tlang = parse_lang(options["lang"])
    clips = []
    for i in item.models.Item.objects.filter(sort__type='source'):
        source_target = ""
        qs = item.models.Item.objects.filter(data__title=i.data['title']).exclude(id=i.id)
        #if qs.count() >= 1:
        if True:
            clip = {}
            durations = []
            for e in item.models.Item.objects.filter(data__title=i.data['title']):
                if 'skip' in e.data.get('tags', []):
                    continue
                if 'type' not in e.data:
                    print("ignoring invalid video %s (no type)" % e)
                    continue
                if not e.files.filter(selected=True).exists():
                    continue
                selected = e.files.filter(selected=True)[0]
                source = selected.data.path
                ext = os.path.splitext(source)[1]
                type_ = e.data['type'][0].lower()
                if type_.startswith('ai:'):
                    if 'ai' not in clip:
                        clip['ai'] = {}
                    ai_type = type_[3:]
                    n = 1
                    while ai_type in clip['ai']:
                        ai_type = '%s-%s' % (type_[3:], n)
                        n += 1
                    clip['ai'][ai_type] = target
                    type_ = 'ai:' + ai_type
                target = os.path.join(prefix, 'video', type_, i.data['title'] + ext)
                if type_ == "source":
                    source_target = target
                    clip['loudnorm'] = get_loudnorm(e.files.filter(selected=True)[0])
                if type_.startswith('ai:'):
                    clip['ai'][ai_type] = target
                else:
                    clip[type_] = target
                os.makedirs(os.path.dirname(target), exist_ok=True)
                if os.path.islink(target):
                    os.unlink(target)
                os.symlink(source, target)
                durations.append(selected.duration)
            if not durations:
                print(i.public_id, 'no duration!', clip)
                continue
            clip["duration"] = min(durations) - 1/24
            # trim to a multiple of the output fps
            d1 = format_duration(clip["duration"], fps)
            if d1 != clip["duration"]:
                clip["duration"] = d1
            if not clip["duration"]:
                print('!!', durations, clip)
                continue
            cd = format_duration(clip["duration"], fps)
            clip["duration"] = cd
            clip['tags'] = i.data.get('tags', [])
            adjust_volume = i.data.get('adjustvolume', '')
            if adjust_volume:
                clip['volume'] = float(adjust_volume)
            clip['id'] = i.public_id
            name = os.path.basename(source_target)
            seqid = re.sub(r"Hotel Aporia_(\d+)", "S\\1_", name)
            seqid = re.sub(r"Night March_(\d+)", "S\\1_", seqid)
            seqid = re.sub(r"_(\d+)H_(\d+)", "_S\\1\\2_", seqid)
            seqid = seqid.split('_')[:2]
            seqid = [b[1:] if b[:1] in ('B', 'S') else '0' for b in seqid]
            seqid[1] = resolve_roman(seqid[1])
            seqid[1] = ''.join([b for b in seqid[1] if b.isdigit()])
            if not seqid[1]:
                seqid[1] = '0'
            try:
                clip['seqid'] = int(''.join(['%06d' % int(b) for b in seqid]))
            except:
                print(name, seqid, 'failed')
                raise

            clips.append(clip)

    with open(os.path.join(prefix, 'clips.json'), 'w') as fd:
        json.dump(clips, fd, indent=2, ensure_ascii=False)

    print("using", len(clips), "clips")

    voice_over = {}
    for vo in item.models.Item.objects.filter(
        data__type__icontains="voice over",
    ):
        title = vo.get('title')
        parts = title.split('-')

        fragment = '%02d' % int(parts[0].replace('ch', ''))
        type = parts[1]
        variant = '-'.join(parts[2:]).split('-ElevenLabs')[0]
        source = vo.files.filter(selected=True)[0]
        src = source.data.path
        ext = src.split('.')[-1]
        target = os.path.join(prefix, 'voice_over', fragment, '%s-%s.%s' % (type, variant, ext))
        os.makedirs(os.path.dirname(target), exist_ok=True)
        if os.path.islink(target):
            os.unlink(target)
        os.symlink(src, target)
        subs = []
        for sub in vo.annotations.filter(
            layer="subtitles", languages=lang
        ).exclude(value="").order_by("start"):
            sdata = get_srt(sub, 0, lang, tlang)
            subs.append(sdata)
        if fragment not in voice_over:
            voice_over[fragment] = {}
        if type not in voice_over[fragment]:
            voice_over[fragment][type] = []
        vo_variant = {
            "variant": variant,
            "src": target,
            #"duration": format_duration(source.duration, fps, True),
            "duration": source.duration,
            "subs": subs
        }
        done = False
        if type == 'quote':
            if '-a-t' in variant:
                b_variant = variant.replace('-a-t', '-b-t').split('-t')[0]
                for old in voice_over[fragment][type]:
                    print(type(old))
                    if isinstance(old, list) and old[0]['variant'].startswith(b_variant):
                        old.insert(0, vo_variant)
                        done = True
            elif '-b-t' in variant:
                a_variant = variant.replace('-b-t', '-a-t').split('-t')[0]
                for old in voice_over[fragment][type]:
                    if isinstance(old, list) and old[0]['variant'].startswith(a_variant):
                        old.append(vo_variant)
                        done = True
            if not done and '-a-t' in variant or '-b-t' in variant:
                vo_variant = [vo_variant]
        if not done:
            voice_over[fragment][type].append(vo_variant)
    with open(os.path.join(prefix, 'voice_over.json'), 'w') as fd:
        json.dump(voice_over, fd, indent=2, ensure_ascii=False)