From 2a2516bff91e3be63f9ef0f59751fc0dda20f6c6 Mon Sep 17 00:00:00 2001
From: j <j@mailb.org>
Date: Tue, 3 Dec 2024 19:35:37 +0000
Subject: [PATCH 1/3] pad audio tracks to scene duration

---
 render.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/render.py b/render.py
index 4cb2c4a..97d412a 100644
--- a/render.py
+++ b/render.py
@@ -66,6 +66,7 @@ def write_if_new(path, data, mode=''):
 
 
 def compose(clips, target=150, base=1024, voice_over=None):
+    fps = 24
     length = 0
     scene = {
         'front': {
@@ -100,6 +101,7 @@ def compose(clips, target=150, base=1024, voice_over=None):
     used = []
 
     voice_overs = []
+    sub_offset = 0
     if voice_over:
         vo_keys = list(sorted(voice_over))
         if chance(seq, 0.5):
@@ -118,7 +120,7 @@ def compose(clips, target=150, base=1024, voice_over=None):
         if vo_min > target:
             target = vo_min
         elif vo_min < target:
-            offset = (target - vo_min) / 2
+            offset = int(((target - vo_min) / 2) * fps) / fps
             scene['audio-center']['A1'].append({
                 'blank': True,
                 'duration': offset
@@ -298,6 +300,16 @@ def compose(clips, target=150, base=1024, voice_over=None):
         })
         used.append(clip)
     print("scene duration %0.3f (target: %0.3f, vo_min: %0.3f)" % (length, target, vo_min))
+    if sub_offset < length:
+        delta = length - sub_offset
+        scene['audio-center']['A1'].append({
+            'blank': True,
+            'duration': delta
+        })
+        scene['audio-rear']['A1'].append({
+            'blank': True,
+            'duration': delta
+        })
     return scene, used
 
 def get_scene_duration(scene):
@@ -321,7 +333,7 @@ def get_offset_duration(prefix):
 def render(root, scene, prefix=''):
     fps = 24
     files = []
-    scene_duration = int(get_scene_duration(scene) * 24)
+    scene_duration = int(get_scene_duration(scene) * fps)
     for timeline, data in scene.items():
         if timeline == "subtitles":
             path = os.path.join(root, prefix + "front.srt")
@@ -338,14 +350,14 @@ def render(root, scene, prefix=''):
             #print(track)
             for clip in clips:
                 project.append_clip(track, clip)
-            track_durations[track] = int(sum([c['duration'] for c in clips]) * 24)
+            track_durations[track] = int(sum([c['duration'] for c in clips]) * fps)
         if timeline.startswith('audio-'):
             track_duration = project.get_duration()
             delta = scene_duration - track_duration
             if delta > 0:
                 for track in track_durations:
                     if track_durations[track] == track_duration:
-                        project.append_clip(track, {'blank': True, "duration": delta/24})
+                        project.append_clip(track, {'blank': True, "duration": delta/fps})
                         break
         path = os.path.join(root, prefix + "%s.kdenlive" % timeline)
         project_xml = project.to_xml()

From 95a41fc2e2a1ab7c1ec1ba4659a97124d4af5307 Mon Sep 17 00:00:00 2001
From: j <j@mailb.org>
Date: Tue, 3 Dec 2024 20:12:15 +0000
Subject: [PATCH 2/3] single video render

---
 management/commands/render.py |   3 +
 render.py                     | 133 ++++++++++++++++++++++++++++++++--
 2 files changed, 131 insertions(+), 5 deletions(-)

diff --git a/management/commands/render.py b/management/commands/render.py
index ace0d61..54e66c2 100644
--- a/management/commands/render.py
+++ b/management/commands/render.py
@@ -16,6 +16,9 @@ class Command(BaseCommand):
         parser.add_argument('--duration', action='store', dest='duration', default="3600", help='target duration of all fragments in seconds')
         parser.add_argument('--offset', action='store', dest='offset', default="1024", help='inital offset in pi')
         parser.add_argument('--no-video', action='store_true', dest='no_video', default=False, help='don\'t render video')
+        parser.add_argument('--single-file', action='store_true', dest='single_file', default=False, help='render to single video')
+        parser.add_argument('--keep-audio', action='store_true', dest='keep_audio', default=False, help='keep independent audio tracks')
+        parser.add_argument('--debug', action='store_true', dest='debug', default=False, help='output more info')
 
     def handle(self, **options):
         render_all(options)
diff --git a/render.py b/render.py
index 97d412a..32d43a7 100644
--- a/render.py
+++ b/render.py
@@ -313,6 +313,9 @@ def compose(clips, target=150, base=1024, voice_over=None):
     return scene, used
 
 def get_scene_duration(scene):
+    if isinstance(scene, str):
+        with open(scene) as fd:
+            scene = json.load(fd)
     duration = 0
     for key, value in scene.items():
         for name, clips in value.items():
@@ -325,8 +328,6 @@ def get_offset_duration(prefix):
     for root, folders, files in os.walk(prefix):
         for f in files:
             if f == 'scene.json':
-                path = os.path.join(root, f)
-                scene = json.load(open(path))
                 duration += get_scene_duration(scene)
     return duration
 
@@ -414,6 +415,8 @@ def get_fragments(clips, voice_over, prefix):
     return fragments
 
 
+def render_timeline(options):
+
 def render_all(options):
     prefix = options['prefix']
     duration = int(options['duration'])
@@ -472,7 +475,7 @@ def render_all(options):
         scene_json = json.dumps(scene, indent=2, ensure_ascii=False)
         write_if_new(os.path.join(fragment_prefix, 'scene.json'), scene_json)
 
-        if not options['no_video']:
+        if not options['no_video'] and not options["single_file"]:
             for timeline in timelines:
                 print(timeline)
                 ext = '.mp4'
@@ -502,8 +505,8 @@ def render_all(options):
                     subprocess.call(cmd)
                     os.unlink(timeline.replace('.kdenlive', ext))
 
-            fragment_prefix = Path(fragment_prefix)
             cmds = []
+            fragment_prefix = Path(fragment_prefix)
             for src, out1, out2 in (
                 ("audio-front.wav", "fl.wav", "fr.wav"),
                 ("audio-center.wav", "fc.wav", "lfe.wav"),
@@ -547,7 +550,8 @@ def render_all(options):
                 fragment_prefix / "back-audio.mp4",
             ])
             for cmd in cmds:
-                #print(" ".join([str(x) for x in cmd]))
+                if options["debug"]:
+                    print(" ".join([str(x) for x in cmd]))
                 subprocess.call(cmd)
 
             for a, b in (
@@ -562,6 +566,10 @@ def render_all(options):
                     sys.exit(-1)
             shutil.move(fragment_prefix / "back-audio.mp4", fragment_prefix / "back.mp4")
             shutil.move(fragment_prefix / "front-5.1.mp4", fragment_prefix / "front.mp4")
+            if options["keep_audio"]:
+                shutil.move(fragment_prefix / "audio-center.wav", fragment_prefix / "vocals.wav")
+                shutil.move(fragment_prefix / "audio-front.wav", fragment_prefix / "foley.wav")
+                shutil.move(fragment_prefix / "audio-back.wav", fragment_prefix / "original.wav")
             for fn in (
                 "audio-5.1.mp4",
                 "audio-center.wav", "audio-rear.wav",
@@ -572,6 +580,109 @@ def render_all(options):
                 if os.path.exists(fn):
                     os.unlink(fn)
 
+    if options["single_file"]:
+        cmds = []
+        base_prefix = Path(base_prefix)
+        for timeline in (
+            "front",
+            "back",
+            "audio-back",
+            "audio-center",
+            "audio-front",
+            "audio-rear",
+        ):
+            timelines = list(sorted(glob('%s/*/%s.kdenlive' % (base_prefix, timeline))))
+            ext = '.mp4'
+            if '/audio' in timelines[0]:
+                ext = '.wav'
+            out = base_prefix / (timeline + ext)
+            cmd = [
+                'xvfb-run', '-a',
+                'melt'
+            ] + timelines + [
+                '-quiet',
+                '-consumer', 'avformat:%s' % out,
+            ]
+            if ext == '.wav':
+                cmd += ['vn=1']
+            else:
+                cmd += ['an=1']
+                cmd += ['vcodec=libx264', 'x264opts=keyint=1', 'crf=15']
+            cmds.append(cmd)
+        for src, out1, out2 in (
+            ("audio-front.wav", "fl.wav", "fr.wav"),
+            ("audio-center.wav", "fc.wav", "lfe.wav"),
+            ("audio-rear.wav", "bl.wav", "br.wav"),
+        ):
+            cmds.append([
+                "ffmpeg", "-y",
+                "-nostats", "-loglevel", "error",
+                "-i", base_prefix / src,
+                "-filter_complex",
+                "[0:0]pan=1|c0=c0[left]; [0:0]pan=1|c0=c1[right]",
+                "-map", "[left]", base_prefix / out1,
+                "-map", "[right]", base_prefix / out2,
+            ])
+        cmds.append([
+            "ffmpeg", "-y",
+            "-nostats", "-loglevel", "error",
+            "-i", base_prefix / "fl.wav",
+            "-i", base_prefix / "fr.wav",
+            "-i", base_prefix / "fc.wav",
+            "-i", base_prefix / "lfe.wav",
+            "-i", base_prefix / "bl.wav",
+            "-i", base_prefix / "br.wav",
+            "-filter_complex", "[0:a][1:a][2:a][3:a][4:a][5:a]amerge=inputs=6[a]",
+            "-map", "[a]", "-c:a", "aac", base_prefix / "audio-5.1.mp4"
+        ])
+        cmds.append([
+            "ffmpeg", "-y",
+            "-nostats", "-loglevel", "error",
+            "-i", base_prefix / "front.mp4",
+            "-i", base_prefix / "audio-5.1.mp4",
+            "-c", "copy",
+            base_prefix / "front-5.1.mp4",
+        ])
+        cmds.append([
+            "ffmpeg", "-y",
+            "-nostats", "-loglevel", "error",
+            "-i", base_prefix / "back.mp4",
+            "-i", base_prefix / "audio-back.wav",
+            "-c:v", "copy",
+            base_prefix / "back-audio.mp4",
+        ])
+        for cmd in cmds:
+            if options["debug"]:
+                print(" ".join([str(x) for x in cmd]))
+            subprocess.call(cmd)
+
+        for a, b in (
+            ("back-audio.mp4", "back.mp4"),
+            ("front-5.1.mp4", "back.mp4"),
+        ):
+            duration_a = ox.avinfo(str(base_prefix / a))['duration']
+            duration_b = ox.avinfo(str(base_prefix / b))['duration']
+            if duration_a != duration_b:
+                print('!!', duration_a, base_prefix / a)
+                print('!!', duration_b, base_prefix / b)
+                sys.exit(-1)
+        shutil.move(base_prefix / "back-audio.mp4", base_prefix / "back.mp4")
+        shutil.move(base_prefix / "front-5.1.mp4", base_prefix / "front.mp4")
+        if options["keep_audio"]:
+            shutil.move(base_prefix / "audio-center.wav", base_prefix / "vocals.wav")
+            shutil.move(base_prefix / "audio-front.wav", base_prefix / "foley.wav")
+            shutil.move(base_prefix / "audio-back.wav", base_prefix / "original.wav")
+        for fn in (
+            "audio-5.1.mp4",
+            "audio-center.wav", "audio-rear.wav",
+            "audio-front.wav", "audio-back.wav", "back-audio.mp4",
+            "fl.wav", "fr.wav", "fc.wav", "lfe.wav", "bl.wav", "br.wav",
+        ):
+            fn = base_prefix / fn
+            if os.path.exists(fn):
+                os.unlink(fn)
+        join_subtitles(base_prefix)
+
     print("Duration - Target: %s Actual: %s" % (target_position, position))
     print(json.dumps(dict(stats), sort_keys=True, indent=2))
     with open(_cache, "w") as fd:
@@ -727,3 +838,15 @@ def render_infinity(options):
         with open(state_f + "~", "w") as fd:
             json.dump(state, fd, indent=2)
         shutil.move(state_f + "~", state_f)
+
+
+def join_subtitles(base_prefix):
+    subtitles = list(sorted(glob('%s/*/front.srt' % base_prefix)))
+    data = []
+    position = 0
+    for srt in subtitles:
+        scene = srt.replace('front.srt', 'scene.json')
+        data += ox.srt.load(srt, offset=position)
+        position += get_scene_duration(scene)
+    with open(base_prefix / 'front.srt', 'wb') as fd:
+        fd.write(ox.srt.encode(data))

From b2552d6059ed1fd51953e3cf3407371d9b5f47d5 Mon Sep 17 00:00:00 2001
From: j <j@mailb.org>
Date: Wed, 4 Dec 2024 09:16:24 +0000
Subject: [PATCH 3/3] make sure all tracks are exactly the same length

---
 management/commands/generate_clips.py |  9 ++-
 render.py                             | 79 ++++++++++++++++++++-------
 render_kdenlive.py                    | 12 +++-
 3 files changed, 78 insertions(+), 22 deletions(-)

diff --git a/management/commands/generate_clips.py b/management/commands/generate_clips.py
index 23bc61c..e2ecca9 100644
--- a/management/commands/generate_clips.py
+++ b/management/commands/generate_clips.py
@@ -23,6 +23,9 @@ def resolve_roman(s):
         return s.replace(extra, new)
     return s
 
+def format_duration(duration, fps):
+    return float('%0.5f' % (round(duration * fps) / fps))
+
 
 class Command(BaseCommand):
     help = 'generate symlinks to clips and clips.json'
@@ -68,6 +71,10 @@ class Command(BaseCommand):
                 if not clip["duration"]:
                     print('!!', durations, clip)
                     continue
+                cd = format_duration(clip["duration"], 24)
+                #if cd != clip["duration"]:
+                #    print(clip["duration"], '->', cd, durations, clip)
+                clip["duration"] = cd
                 clip['tags'] = i.data.get('tags', [])
                 clip['editingtags'] = i.data.get('editingtags', [])
                 name = os.path.basename(clip['original'])
@@ -117,7 +124,7 @@ class Command(BaseCommand):
                 subs.append(sdata)
             voice_over[fragment_id][batch] = {
                 "src": target,
-                "duration": source.duration,
+                "duration": format_duration(source.duration, 24),
                 "subs": subs
             }
         with open(os.path.join(prefix, 'voice_over.json'), 'w') as fd:
diff --git a/render.py b/render.py
index 32d43a7..36518bf 100644
--- a/render.py
+++ b/render.py
@@ -11,8 +11,10 @@ import time
 from pathlib import Path
 
 import ox
+import lxml.etree
+
 from .pi import random
-from .render_kdenlive import KDEnliveProject, _CACHE
+from .render_kdenlive import KDEnliveProject, _CACHE, melt_xml, get_melt
 
 
 def random_int(seq, length):
@@ -64,6 +66,8 @@ def write_if_new(path, data, mode=''):
         with open(path, write_mode) as fd:
             fd.write(data)
 
+def format_duration(duration, fps):
+    return float('%0.5f' % (round(duration * fps) / fps))
 
 def compose(clips, target=150, base=1024, voice_over=None):
     fps = 24
@@ -120,7 +124,7 @@ def compose(clips, target=150, base=1024, voice_over=None):
         if vo_min > target:
             target = vo_min
         elif vo_min < target:
-            offset = int(((target - vo_min) / 2) * fps) / fps
+            offset = format_duration((target - vo_min) / 2, fps)
             scene['audio-center']['A1'].append({
                 'blank': True,
                 'duration': offset
@@ -188,7 +192,7 @@ def compose(clips, target=150, base=1024, voice_over=None):
         if length + clip['duration'] > target and length >= vo_min:
             break
         print('%06.3f %06.3f' % (length, clip['duration']), os.path.basename(clip['original']))
-        length += clip['duration']
+        length += int(clip['duration'] * fps) / fps
 
         if "foreground" not in clip and "animation" in clip:
             fg = clip['animation']
@@ -300,8 +304,11 @@ def compose(clips, target=150, base=1024, voice_over=None):
         })
         used.append(clip)
     print("scene duration %0.3f (target: %0.3f, vo_min: %0.3f)" % (length, target, vo_min))
-    if sub_offset < length:
-        delta = length - sub_offset
+    scene_duration = int(get_scene_duration(scene) * fps)
+    sub_offset = int(sub_offset * fps)
+    if sub_offset < scene_duration:
+        delta = format_duration((scene_duration - sub_offset) / fps, fps)
+        print(">> add %0.3f of silence.. %0.3f (scene_duration)" % (delta, scene_duration / fps))
         scene['audio-center']['A1'].append({
             'blank': True,
             'duration': delta
@@ -310,8 +317,24 @@ def compose(clips, target=150, base=1024, voice_over=None):
             'blank': True,
             'duration': delta
         })
+    elif sub_offset > scene_duration:
+        delta = format_duration((scene_duration - sub_offset) / fps, fps)
+        scene['audio-center']['A1'][-1]["duration"] += delta
+        scene['audio-rear']['A1'][-1]["duration"] += delta
+        print("WTF, needed to cut %s new duration: %s" % (delta, scene['audio-center']['A1'][-1]["duration"]))
+        print(scene['audio-center']['A1'][-1])
     return scene, used
 
+def get_track_duration(scene, k, n):
+    duration = 0
+    for key, value in scene.items():
+        if key == k:
+            for name, clips in value.items():
+                if name == n:
+                    for clip in clips:
+                        duration += int(clip['duration'] * 24)
+                    return duration / 24
+
 def get_scene_duration(scene):
     if isinstance(scene, str):
         with open(scene) as fd:
@@ -320,8 +343,8 @@ def get_scene_duration(scene):
     for key, value in scene.items():
         for name, clips in value.items():
             for clip in clips:
-                duration += clip['duration']
-            return duration
+                duration += int(clip['duration'] * 24)
+            return duration / 24
 
 def get_offset_duration(prefix):
     duration = 0
@@ -331,7 +354,8 @@ def get_offset_duration(prefix):
                 duration += get_scene_duration(scene)
     return duration
 
-def render(root, scene, prefix=''):
+def render(root, scene, prefix='', options=None):
+    if options is None: options = {}
     fps = 24
     files = []
     scene_duration = int(get_scene_duration(scene) * fps)
@@ -351,7 +375,7 @@ def render(root, scene, prefix=''):
             #print(track)
             for clip in clips:
                 project.append_clip(track, clip)
-            track_durations[track] = int(sum([c['duration'] for c in clips]) * fps)
+            track_durations[track] = sum([int(c['duration'] * fps) for c in clips])
         if timeline.startswith('audio-'):
             track_duration = project.get_duration()
             delta = scene_duration - track_duration
@@ -359,13 +383,34 @@ def render(root, scene, prefix=''):
                 for track in track_durations:
                     if track_durations[track] == track_duration:
                         project.append_clip(track, {'blank': True, "duration": delta/fps})
-                        break
+
         path = os.path.join(root, prefix + "%s.kdenlive" % timeline)
         project_xml = project.to_xml()
         write_if_new(path, project_xml)
+
+        if options["debug"]:
+            # check duration
+            out_duration = get_project_duration(path)
+            p_duration = project.get_duration()
+            print(path, 'out: %s, project: %s, scene: %s' %(out_duration, p_duration, scene_duration))
+            if p_duration != scene_duration:
+                print(path, 'FAIL project: %s, scene: %s' %(p_duration, scene_duration))
+                _cache = os.path.join(root, "cache.json")
+                with open(_cache, "w") as fd:
+                    json.dump(_CACHE, fd)
+                sys.exit(1)
+            if out_duration != p_duration:
+                print(path, 'fail got: %s expected: %s' %(out_duration, p_duration))
+                sys.exit(1)
+
         files.append(path)
     return files
 
+def get_project_duration(file):
+    out = melt_xml(file)
+    chain = lxml.etree.fromstring(out).xpath('producer')[0]
+    duration = int(chain.attrib['out']) + 1
+    return duration
 
 def get_fragments(clips, voice_over, prefix):
     import itemlist.models
@@ -415,8 +460,6 @@ def get_fragments(clips, voice_over, prefix):
     return fragments
 
 
-def render_timeline(options):
-
 def render_all(options):
     prefix = options['prefix']
     duration = int(options['duration'])
@@ -470,7 +513,7 @@ def render_all(options):
         elif position < target_position:
             target = target + 0.1 * fragment_target
 
-        timelines = render(prefix, scene, fragment_prefix[len(prefix) + 1:] + '/')
+        timelines = render(prefix, scene, fragment_prefix[len(prefix) + 1:] + '/', options)
 
         scene_json = json.dumps(scene, indent=2, ensure_ascii=False)
         write_if_new(os.path.join(fragment_prefix, 'scene.json'), scene_json)
@@ -481,9 +524,8 @@ def render_all(options):
                 ext = '.mp4'
                 if '/audio' in timeline:
                     ext = '.wav'
-                cmd = [
-                    'xvfb-run', '-a',
-                    'melt', timeline,
+                cmd = get_melt() + [
+                    timeline,
                     '-quiet',
                     '-consumer', 'avformat:%s' % timeline.replace('.kdenlive', ext),
                 ]
@@ -596,10 +638,7 @@ def render_all(options):
             if '/audio' in timelines[0]:
                 ext = '.wav'
             out = base_prefix / (timeline + ext)
-            cmd = [
-                'xvfb-run', '-a',
-                'melt'
-            ] + timelines + [
+            cmd = get_melt() + timelines + [
                 '-quiet',
                 '-consumer', 'avformat:%s' % out,
             ]
diff --git a/render_kdenlive.py b/render_kdenlive.py
index 2431500..cdf755b 100644
--- a/render_kdenlive.py
+++ b/render_kdenlive.py
@@ -4,6 +4,7 @@ import subprocess
 import lxml.etree
 import uuid
 import os
+import sys
 
 _CACHE = {}
 _IDS = defaultdict(int)
@@ -12,6 +13,14 @@ def get_propery(element, name):
     return element.xpath('property[@name="%s"]' % name)[0].text
 
 
+def get_melt():
+    cmd = ['melt']
+    if 'XDG_RUNTIME_DIR' not in os.environ:
+        os.environ['XDG_RUNTIME_DIR'] = '/tmp/runtime-pandora'
+    if 'DISPLAY' not in os.environ:
+        cmd = ['xvfb-run', '-a'] + cmd
+    return cmd
+
 def melt_xml(file):
     out = None
     real_path = os.path.realpath(file)
@@ -20,7 +29,8 @@ def melt_xml(file):
         if os.stat(real_path).st_mtime != ts:
             out = None
     if not out:
-        out = subprocess.check_output(['melt', file, '-consumer', 'xml']).decode()
+        cmd = get_melt() + [file, '-consumer', 'xml']
+        out = subprocess.check_output(cmd).decode()
         _CACHE[file] = [os.stat(real_path).st_mtime, out]
     return out