make sure all tracks are exactly the same length

single video render
pad audio tracks to scene duration
2024-12-04 09:16:24 +00:00 · 2024-12-03 20:12:15 +00:00 · 2024-12-03 19:35:37 +00:00
4 changed files with 215 additions and 21 deletions
--- a/management/commands/generate_clips.py
+++ b/management/commands/generate_clips.py
@ -23,6 +23,9 @@ def resolve_roman(s):
        return s.replace(extra, new)
    return s
 def format_duration(duration, fps):
    return float('%0.5f' % (round(duration * fps) / fps))
 class Command(BaseCommand):
    help = 'generate symlinks to clips and clips.json'
@ -68,6 +71,10 @@ class Command(BaseCommand):
                if not clip["duration"]:
                    print('!!', durations, clip)
                    continue
                cd = format_duration(clip["duration"], 24)
                #if cd != clip["duration"]:
                #    print(clip["duration"], '->', cd, durations, clip)
                clip["duration"] = cd
                clip['tags'] = i.data.get('tags', [])
                clip['editingtags'] = i.data.get('editingtags', [])
                name = os.path.basename(clip['original'])
@ -117,7 +124,7 @@ class Command(BaseCommand):
                subs.append(sdata)
            voice_over[fragment_id][batch] = {
                "src": target,
-                "duration": source.duration,
+                "duration": format_duration(source.duration, 24),
                "subs": subs
            }
        with open(os.path.join(prefix, 'voice_over.json'), 'w') as fd:
--- a/management/commands/render.py
+++ b/management/commands/render.py
@ -16,6 +16,9 @@ class Command(BaseCommand):
        parser.add_argument('--duration', action='store', dest='duration', default="3600", help='target duration of all fragments in seconds')
        parser.add_argument('--offset', action='store', dest='offset', default="1024", help='inital offset in pi')
        parser.add_argument('--no-video', action='store_true', dest='no_video', default=False, help='don\'t render video')
        parser.add_argument('--single-file', action='store_true', dest='single_file', default=False, help='render to single video')
        parser.add_argument('--keep-audio', action='store_true', dest='keep_audio', default=False, help='keep independent audio tracks')
        parser.add_argument('--debug', action='store_true', dest='debug', default=False, help='output more info')
    def handle(self, **options):
        render_all(options)
--- a/render.py
+++ b/render.py
@ -11,8 +11,10 @@ import time
 from pathlib import Path
 import ox
 import lxml.etree
 from .pi import random
-from .render_kdenlive import KDEnliveProject, _CACHE
+from .render_kdenlive import KDEnliveProject, _CACHE, melt_xml, get_melt
 def random_int(seq, length):
@ -64,8 +66,11 @@ def write_if_new(path, data, mode=''):
        with open(path, write_mode) as fd:
            fd.write(data)
 def format_duration(duration, fps):
    return float('%0.5f' % (round(duration * fps) / fps))
 def compose(clips, target=150, base=1024, voice_over=None):
    fps = 24
    length = 0
    scene = {
        'front': {
@ -100,6 +105,7 @@ def compose(clips, target=150, base=1024, voice_over=None):
    used = []
    voice_overs = []
    sub_offset = 0
    if voice_over:
        vo_keys = list(sorted(voice_over))
        if chance(seq, 0.5):
@ -118,7 +124,7 @@ def compose(clips, target=150, base=1024, voice_over=None):
        if vo_min > target:
            target = vo_min
        elif vo_min < target:
-            offset = (target - vo_min) / 2
+            offset = format_duration((target - vo_min) / 2, fps)
            scene['audio-center']['A1'].append({
                'blank': True,
                'duration': offset
@ -186,7 +192,7 @@ def compose(clips, target=150, base=1024, voice_over=None):
        if length + clip['duration'] > target and length >= vo_min:
            break
        print('%06.3f %06.3f' % (length, clip['duration']), os.path.basename(clip['original']))
-        length += clip['duration']
+        length += int(clip['duration'] * fps) / fps
        if "foreground" not in clip and "animation" in clip:
            fg = clip['animation']
@ -298,30 +304,61 @@ def compose(clips, target=150, base=1024, voice_over=None):
        })
        used.append(clip)
    print("scene duration %0.3f (target: %0.3f, vo_min: %0.3f)" % (length, target, vo_min))
    scene_duration = int(get_scene_duration(scene) * fps)
    sub_offset = int(sub_offset * fps)
    if sub_offset < scene_duration:
        delta = format_duration((scene_duration - sub_offset) / fps, fps)
        print(">> add %0.3f of silence.. %0.3f (scene_duration)" % (delta, scene_duration / fps))
        scene['audio-center']['A1'].append({
            'blank': True,
            'duration': delta
        })
        scene['audio-rear']['A1'].append({
            'blank': True,
            'duration': delta
        })
    elif sub_offset > scene_duration:
        delta = format_duration((scene_duration - sub_offset) / fps, fps)
        scene['audio-center']['A1'][-1]["duration"] += delta
        scene['audio-rear']['A1'][-1]["duration"] += delta
        print("WTF, needed to cut %s new duration: %s" % (delta, scene['audio-center']['A1'][-1]["duration"]))
        print(scene['audio-center']['A1'][-1])
    return scene, used
 def get_track_duration(scene, k, n):
    duration = 0
    for key, value in scene.items():
        if key == k:
            for name, clips in value.items():
                if name == n:
                    for clip in clips:
                        duration += int(clip['duration'] * 24)
                    return duration / 24
 def get_scene_duration(scene):
    if isinstance(scene, str):
        with open(scene) as fd:
            scene = json.load(fd)
    duration = 0
    for key, value in scene.items():
        for name, clips in value.items():
            for clip in clips:
-                duration += clip['duration']
+                duration += int(clip['duration'] * 24)
-            return duration
+            return duration / 24
 def get_offset_duration(prefix):
    duration = 0
    for root, folders, files in os.walk(prefix):
        for f in files:
            if f == 'scene.json':
                path = os.path.join(root, f)
                scene = json.load(open(path))
                duration += get_scene_duration(scene)
    return duration
-def render(root, scene, prefix=''):
+def render(root, scene, prefix='', options=None):
    if options is None: options = {}
    fps = 24
    files = []
-    scene_duration = int(get_scene_duration(scene) * 24)
+    scene_duration = int(get_scene_duration(scene) * fps)
    for timeline, data in scene.items():
        if timeline == "subtitles":
            path = os.path.join(root, prefix + "front.srt")
@ -338,21 +375,42 @@ def render(root, scene, prefix=''):
            #print(track)
            for clip in clips:
                project.append_clip(track, clip)
-            track_durations[track] = int(sum([c['duration'] for c in clips]) * 24)
+            track_durations[track] = sum([int(c['duration'] * fps) for c in clips])
        if timeline.startswith('audio-'):
            track_duration = project.get_duration()
            delta = scene_duration - track_duration
            if delta > 0:
                for track in track_durations:
                    if track_durations[track] == track_duration:
-                        project.append_clip(track, {'blank': True, "duration": delta/24})
+                        project.append_clip(track, {'blank': True, "duration": delta/fps})
-                        break
+
        path = os.path.join(root, prefix + "%s.kdenlive" % timeline)
        project_xml = project.to_xml()
        write_if_new(path, project_xml)
        if options["debug"]:
            # check duration
            out_duration = get_project_duration(path)
            p_duration = project.get_duration()
            print(path, 'out: %s, project: %s, scene: %s' %(out_duration, p_duration, scene_duration))
            if p_duration != scene_duration:
                print(path, 'FAIL project: %s, scene: %s' %(p_duration, scene_duration))
                _cache = os.path.join(root, "cache.json")
                with open(_cache, "w") as fd:
                    json.dump(_CACHE, fd)
                sys.exit(1)
            if out_duration != p_duration:
                print(path, 'fail got: %s expected: %s' %(out_duration, p_duration))
                sys.exit(1)
        files.append(path)
    return files
 def get_project_duration(file):
    out = melt_xml(file)
    chain = lxml.etree.fromstring(out).xpath('producer')[0]
    duration = int(chain.attrib['out']) + 1
    return duration
 def get_fragments(clips, voice_over, prefix):
    import itemlist.models
@ -455,20 +513,19 @@ def render_all(options):
        elif position < target_position:
            target = target + 0.1 * fragment_target
-        timelines = render(prefix, scene, fragment_prefix[len(prefix) + 1:] + '/')
+        timelines = render(prefix, scene, fragment_prefix[len(prefix) + 1:] + '/', options)
        scene_json = json.dumps(scene, indent=2, ensure_ascii=False)
        write_if_new(os.path.join(fragment_prefix, 'scene.json'), scene_json)
-        if not options['no_video']:
+        if not options['no_video'] and not options["single_file"]:
            for timeline in timelines:
                print(timeline)
                ext = '.mp4'
                if '/audio' in timeline:
                    ext = '.wav'
-                cmd = [
+                cmd = get_melt() + [
-                    'xvfb-run', '-a',
+                    timeline,
                    'melt', timeline,
                    '-quiet',
                    '-consumer', 'avformat:%s' % timeline.replace('.kdenlive', ext),
                ]
@ -490,8 +547,8 @@ def render_all(options):
                    subprocess.call(cmd)
                    os.unlink(timeline.replace('.kdenlive', ext))
            fragment_prefix = Path(fragment_prefix)
            cmds = []
            fragment_prefix = Path(fragment_prefix)
            for src, out1, out2 in (
                ("audio-front.wav", "fl.wav", "fr.wav"),
                ("audio-center.wav", "fc.wav", "lfe.wav"),
@ -535,7 +592,8 @@ def render_all(options):
                fragment_prefix / "back-audio.mp4",
            ])
            for cmd in cmds:
-                #print(" ".join([str(x) for x in cmd]))
+                if options["debug"]:
                    print(" ".join([str(x) for x in cmd]))
                subprocess.call(cmd)
            for a, b in (
@ -550,6 +608,10 @@ def render_all(options):
                    sys.exit(-1)
            shutil.move(fragment_prefix / "back-audio.mp4", fragment_prefix / "back.mp4")
            shutil.move(fragment_prefix / "front-5.1.mp4", fragment_prefix / "front.mp4")
            if options["keep_audio"]:
                shutil.move(fragment_prefix / "audio-center.wav", fragment_prefix / "vocals.wav")
                shutil.move(fragment_prefix / "audio-front.wav", fragment_prefix / "foley.wav")
                shutil.move(fragment_prefix / "audio-back.wav", fragment_prefix / "original.wav")
            for fn in (
                "audio-5.1.mp4",
                "audio-center.wav", "audio-rear.wav",
@ -560,6 +622,106 @@ def render_all(options):
                if os.path.exists(fn):
                    os.unlink(fn)
    if options["single_file"]:
        cmds = []
        base_prefix = Path(base_prefix)
        for timeline in (
            "front",
            "back",
            "audio-back",
            "audio-center",
            "audio-front",
            "audio-rear",
        ):
            timelines = list(sorted(glob('%s/*/%s.kdenlive' % (base_prefix, timeline))))
            ext = '.mp4'
            if '/audio' in timelines[0]:
                ext = '.wav'
            out = base_prefix / (timeline + ext)
            cmd = get_melt() + timelines + [
                '-quiet',
                '-consumer', 'avformat:%s' % out,
            ]
            if ext == '.wav':
                cmd += ['vn=1']
            else:
                cmd += ['an=1']
                cmd += ['vcodec=libx264', 'x264opts=keyint=1', 'crf=15']
            cmds.append(cmd)
        for src, out1, out2 in (
            ("audio-front.wav", "fl.wav", "fr.wav"),
            ("audio-center.wav", "fc.wav", "lfe.wav"),
            ("audio-rear.wav", "bl.wav", "br.wav"),
        ):
            cmds.append([
                "ffmpeg", "-y",
                "-nostats", "-loglevel", "error",
                "-i", base_prefix / src,
                "-filter_complex",
                "[0:0]pan=1|c0=c0[left]; [0:0]pan=1|c0=c1[right]",
                "-map", "[left]", base_prefix / out1,
                "-map", "[right]", base_prefix / out2,
            ])
        cmds.append([
            "ffmpeg", "-y",
            "-nostats", "-loglevel", "error",
            "-i", base_prefix / "fl.wav",
            "-i", base_prefix / "fr.wav",
            "-i", base_prefix / "fc.wav",
            "-i", base_prefix / "lfe.wav",
            "-i", base_prefix / "bl.wav",
            "-i", base_prefix / "br.wav",
            "-filter_complex", "[0:a][1:a][2:a][3:a][4:a][5:a]amerge=inputs=6[a]",
            "-map", "[a]", "-c:a", "aac", base_prefix / "audio-5.1.mp4"
        ])
        cmds.append([
            "ffmpeg", "-y",
            "-nostats", "-loglevel", "error",
            "-i", base_prefix / "front.mp4",
            "-i", base_prefix / "audio-5.1.mp4",
            "-c", "copy",
            base_prefix / "front-5.1.mp4",
        ])
        cmds.append([
            "ffmpeg", "-y",
            "-nostats", "-loglevel", "error",
            "-i", base_prefix / "back.mp4",
            "-i", base_prefix / "audio-back.wav",
            "-c:v", "copy",
            base_prefix / "back-audio.mp4",
        ])
        for cmd in cmds:
            if options["debug"]:
                print(" ".join([str(x) for x in cmd]))
            subprocess.call(cmd)
        for a, b in (
            ("back-audio.mp4", "back.mp4"),
            ("front-5.1.mp4", "back.mp4"),
        ):
            duration_a = ox.avinfo(str(base_prefix / a))['duration']
            duration_b = ox.avinfo(str(base_prefix / b))['duration']
            if duration_a != duration_b:
                print('!!', duration_a, base_prefix / a)
                print('!!', duration_b, base_prefix / b)
                sys.exit(-1)
        shutil.move(base_prefix / "back-audio.mp4", base_prefix / "back.mp4")
        shutil.move(base_prefix / "front-5.1.mp4", base_prefix / "front.mp4")
        if options["keep_audio"]:
            shutil.move(base_prefix / "audio-center.wav", base_prefix / "vocals.wav")
            shutil.move(base_prefix / "audio-front.wav", base_prefix / "foley.wav")
            shutil.move(base_prefix / "audio-back.wav", base_prefix / "original.wav")
        for fn in (
            "audio-5.1.mp4",
            "audio-center.wav", "audio-rear.wav",
            "audio-front.wav", "audio-back.wav", "back-audio.mp4",
            "fl.wav", "fr.wav", "fc.wav", "lfe.wav", "bl.wav", "br.wav",
        ):
            fn = base_prefix / fn
            if os.path.exists(fn):
                os.unlink(fn)
        join_subtitles(base_prefix)
    print("Duration - Target: %s Actual: %s" % (target_position, position))
    print(json.dumps(dict(stats), sort_keys=True, indent=2))
    with open(_cache, "w") as fd:
@ -715,3 +877,15 @@ def render_infinity(options):
        with open(state_f + "~", "w") as fd:
            json.dump(state, fd, indent=2)
        shutil.move(state_f + "~", state_f)
 def join_subtitles(base_prefix):
    subtitles = list(sorted(glob('%s/*/front.srt' % base_prefix)))
    data = []
    position = 0
    for srt in subtitles:
        scene = srt.replace('front.srt', 'scene.json')
        data += ox.srt.load(srt, offset=position)
        position += get_scene_duration(scene)
    with open(base_prefix / 'front.srt', 'wb') as fd:
        fd.write(ox.srt.encode(data))
--- a/render_kdenlive.py
+++ b/render_kdenlive.py
@ -4,6 +4,7 @@ import subprocess
 import lxml.etree
 import uuid
 import os
 import sys
 _CACHE = {}
 _IDS = defaultdict(int)
@ -12,6 +13,14 @@ def get_propery(element, name):
    return element.xpath('property[@name="%s"]' % name)[0].text
 def get_melt():
    cmd = ['melt']
    if 'XDG_RUNTIME_DIR' not in os.environ:
        os.environ['XDG_RUNTIME_DIR'] = '/tmp/runtime-pandora'
    if 'DISPLAY' not in os.environ:
        cmd = ['xvfb-run', '-a'] + cmd
    return cmd
 def melt_xml(file):
    out = None
    real_path = os.path.realpath(file)
@ -20,7 +29,8 @@ def melt_xml(file):
        if os.stat(real_path).st_mtime != ts:
            out = None
    if not out:
-        out = subprocess.check_output(['melt', file, '-consumer', 'xml']).decode()
+        cmd = get_melt() + [file, '-consumer', 'xml']
        out = subprocess.check_output(cmd).decode()
        _CACHE[file] = [os.stat(real_path).st_mtime, out]
    return out
Author	SHA1	Message	Date
j	b2552d6059	make sure all tracks are exactly the same length	2024-12-04 09:16:24 +00:00
j	95a41fc2e2	single video render	2024-12-03 20:12:15 +00:00
j	2a2516bff9	pad audio tracks to scene duration	2024-12-03 19:35:37 +00:00