From 2a2516bff91e3be63f9ef0f59751fc0dda20f6c6 Mon Sep 17 00:00:00 2001 From: j Date: Tue, 3 Dec 2024 19:35:37 +0000 Subject: [PATCH 1/3] pad audio tracks to scene duration --- render.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/render.py b/render.py index 4cb2c4a..97d412a 100644 --- a/render.py +++ b/render.py @@ -66,6 +66,7 @@ def write_if_new(path, data, mode=''): def compose(clips, target=150, base=1024, voice_over=None): + fps = 24 length = 0 scene = { 'front': { @@ -100,6 +101,7 @@ def compose(clips, target=150, base=1024, voice_over=None): used = [] voice_overs = [] + sub_offset = 0 if voice_over: vo_keys = list(sorted(voice_over)) if chance(seq, 0.5): @@ -118,7 +120,7 @@ def compose(clips, target=150, base=1024, voice_over=None): if vo_min > target: target = vo_min elif vo_min < target: - offset = (target - vo_min) / 2 + offset = int(((target - vo_min) / 2) * fps) / fps scene['audio-center']['A1'].append({ 'blank': True, 'duration': offset @@ -298,6 +300,16 @@ def compose(clips, target=150, base=1024, voice_over=None): }) used.append(clip) print("scene duration %0.3f (target: %0.3f, vo_min: %0.3f)" % (length, target, vo_min)) + if sub_offset < length: + delta = length - sub_offset + scene['audio-center']['A1'].append({ + 'blank': True, + 'duration': delta + }) + scene['audio-rear']['A1'].append({ + 'blank': True, + 'duration': delta + }) return scene, used def get_scene_duration(scene): @@ -321,7 +333,7 @@ def get_offset_duration(prefix): def render(root, scene, prefix=''): fps = 24 files = [] - scene_duration = int(get_scene_duration(scene) * 24) + scene_duration = int(get_scene_duration(scene) * fps) for timeline, data in scene.items(): if timeline == "subtitles": path = os.path.join(root, prefix + "front.srt") @@ -338,14 +350,14 @@ def render(root, scene, prefix=''): #print(track) for clip in clips: project.append_clip(track, clip) - track_durations[track] = int(sum([c['duration'] for c in clips]) * 24) + track_durations[track] = int(sum([c['duration'] for c in clips]) * fps) if timeline.startswith('audio-'): track_duration = project.get_duration() delta = scene_duration - track_duration if delta > 0: for track in track_durations: if track_durations[track] == track_duration: - project.append_clip(track, {'blank': True, "duration": delta/24}) + project.append_clip(track, {'blank': True, "duration": delta/fps}) break path = os.path.join(root, prefix + "%s.kdenlive" % timeline) project_xml = project.to_xml() From 95a41fc2e2a1ab7c1ec1ba4659a97124d4af5307 Mon Sep 17 00:00:00 2001 From: j Date: Tue, 3 Dec 2024 20:12:15 +0000 Subject: [PATCH 2/3] single video render --- management/commands/render.py | 3 + render.py | 133 ++++++++++++++++++++++++++++++++-- 2 files changed, 131 insertions(+), 5 deletions(-) diff --git a/management/commands/render.py b/management/commands/render.py index ace0d61..54e66c2 100644 --- a/management/commands/render.py +++ b/management/commands/render.py @@ -16,6 +16,9 @@ class Command(BaseCommand): parser.add_argument('--duration', action='store', dest='duration', default="3600", help='target duration of all fragments in seconds') parser.add_argument('--offset', action='store', dest='offset', default="1024", help='inital offset in pi') parser.add_argument('--no-video', action='store_true', dest='no_video', default=False, help='don\'t render video') + parser.add_argument('--single-file', action='store_true', dest='single_file', default=False, help='render to single video') + parser.add_argument('--keep-audio', action='store_true', dest='keep_audio', default=False, help='keep independent audio tracks') + parser.add_argument('--debug', action='store_true', dest='debug', default=False, help='output more info') def handle(self, **options): render_all(options) diff --git a/render.py b/render.py index 97d412a..32d43a7 100644 --- a/render.py +++ b/render.py @@ -313,6 +313,9 @@ def compose(clips, target=150, base=1024, voice_over=None): return scene, used def get_scene_duration(scene): + if isinstance(scene, str): + with open(scene) as fd: + scene = json.load(fd) duration = 0 for key, value in scene.items(): for name, clips in value.items(): @@ -325,8 +328,6 @@ def get_offset_duration(prefix): for root, folders, files in os.walk(prefix): for f in files: if f == 'scene.json': - path = os.path.join(root, f) - scene = json.load(open(path)) duration += get_scene_duration(scene) return duration @@ -414,6 +415,8 @@ def get_fragments(clips, voice_over, prefix): return fragments +def render_timeline(options): + def render_all(options): prefix = options['prefix'] duration = int(options['duration']) @@ -472,7 +475,7 @@ def render_all(options): scene_json = json.dumps(scene, indent=2, ensure_ascii=False) write_if_new(os.path.join(fragment_prefix, 'scene.json'), scene_json) - if not options['no_video']: + if not options['no_video'] and not options["single_file"]: for timeline in timelines: print(timeline) ext = '.mp4' @@ -502,8 +505,8 @@ def render_all(options): subprocess.call(cmd) os.unlink(timeline.replace('.kdenlive', ext)) - fragment_prefix = Path(fragment_prefix) cmds = [] + fragment_prefix = Path(fragment_prefix) for src, out1, out2 in ( ("audio-front.wav", "fl.wav", "fr.wav"), ("audio-center.wav", "fc.wav", "lfe.wav"), @@ -547,7 +550,8 @@ def render_all(options): fragment_prefix / "back-audio.mp4", ]) for cmd in cmds: - #print(" ".join([str(x) for x in cmd])) + if options["debug"]: + print(" ".join([str(x) for x in cmd])) subprocess.call(cmd) for a, b in ( @@ -562,6 +566,10 @@ def render_all(options): sys.exit(-1) shutil.move(fragment_prefix / "back-audio.mp4", fragment_prefix / "back.mp4") shutil.move(fragment_prefix / "front-5.1.mp4", fragment_prefix / "front.mp4") + if options["keep_audio"]: + shutil.move(fragment_prefix / "audio-center.wav", fragment_prefix / "vocals.wav") + shutil.move(fragment_prefix / "audio-front.wav", fragment_prefix / "foley.wav") + shutil.move(fragment_prefix / "audio-back.wav", fragment_prefix / "original.wav") for fn in ( "audio-5.1.mp4", "audio-center.wav", "audio-rear.wav", @@ -572,6 +580,109 @@ def render_all(options): if os.path.exists(fn): os.unlink(fn) + if options["single_file"]: + cmds = [] + base_prefix = Path(base_prefix) + for timeline in ( + "front", + "back", + "audio-back", + "audio-center", + "audio-front", + "audio-rear", + ): + timelines = list(sorted(glob('%s/*/%s.kdenlive' % (base_prefix, timeline)))) + ext = '.mp4' + if '/audio' in timelines[0]: + ext = '.wav' + out = base_prefix / (timeline + ext) + cmd = [ + 'xvfb-run', '-a', + 'melt' + ] + timelines + [ + '-quiet', + '-consumer', 'avformat:%s' % out, + ] + if ext == '.wav': + cmd += ['vn=1'] + else: + cmd += ['an=1'] + cmd += ['vcodec=libx264', 'x264opts=keyint=1', 'crf=15'] + cmds.append(cmd) + for src, out1, out2 in ( + ("audio-front.wav", "fl.wav", "fr.wav"), + ("audio-center.wav", "fc.wav", "lfe.wav"), + ("audio-rear.wav", "bl.wav", "br.wav"), + ): + cmds.append([ + "ffmpeg", "-y", + "-nostats", "-loglevel", "error", + "-i", base_prefix / src, + "-filter_complex", + "[0:0]pan=1|c0=c0[left]; [0:0]pan=1|c0=c1[right]", + "-map", "[left]", base_prefix / out1, + "-map", "[right]", base_prefix / out2, + ]) + cmds.append([ + "ffmpeg", "-y", + "-nostats", "-loglevel", "error", + "-i", base_prefix / "fl.wav", + "-i", base_prefix / "fr.wav", + "-i", base_prefix / "fc.wav", + "-i", base_prefix / "lfe.wav", + "-i", base_prefix / "bl.wav", + "-i", base_prefix / "br.wav", + "-filter_complex", "[0:a][1:a][2:a][3:a][4:a][5:a]amerge=inputs=6[a]", + "-map", "[a]", "-c:a", "aac", base_prefix / "audio-5.1.mp4" + ]) + cmds.append([ + "ffmpeg", "-y", + "-nostats", "-loglevel", "error", + "-i", base_prefix / "front.mp4", + "-i", base_prefix / "audio-5.1.mp4", + "-c", "copy", + base_prefix / "front-5.1.mp4", + ]) + cmds.append([ + "ffmpeg", "-y", + "-nostats", "-loglevel", "error", + "-i", base_prefix / "back.mp4", + "-i", base_prefix / "audio-back.wav", + "-c:v", "copy", + base_prefix / "back-audio.mp4", + ]) + for cmd in cmds: + if options["debug"]: + print(" ".join([str(x) for x in cmd])) + subprocess.call(cmd) + + for a, b in ( + ("back-audio.mp4", "back.mp4"), + ("front-5.1.mp4", "back.mp4"), + ): + duration_a = ox.avinfo(str(base_prefix / a))['duration'] + duration_b = ox.avinfo(str(base_prefix / b))['duration'] + if duration_a != duration_b: + print('!!', duration_a, base_prefix / a) + print('!!', duration_b, base_prefix / b) + sys.exit(-1) + shutil.move(base_prefix / "back-audio.mp4", base_prefix / "back.mp4") + shutil.move(base_prefix / "front-5.1.mp4", base_prefix / "front.mp4") + if options["keep_audio"]: + shutil.move(base_prefix / "audio-center.wav", base_prefix / "vocals.wav") + shutil.move(base_prefix / "audio-front.wav", base_prefix / "foley.wav") + shutil.move(base_prefix / "audio-back.wav", base_prefix / "original.wav") + for fn in ( + "audio-5.1.mp4", + "audio-center.wav", "audio-rear.wav", + "audio-front.wav", "audio-back.wav", "back-audio.mp4", + "fl.wav", "fr.wav", "fc.wav", "lfe.wav", "bl.wav", "br.wav", + ): + fn = base_prefix / fn + if os.path.exists(fn): + os.unlink(fn) + join_subtitles(base_prefix) + print("Duration - Target: %s Actual: %s" % (target_position, position)) print(json.dumps(dict(stats), sort_keys=True, indent=2)) with open(_cache, "w") as fd: @@ -727,3 +838,15 @@ def render_infinity(options): with open(state_f + "~", "w") as fd: json.dump(state, fd, indent=2) shutil.move(state_f + "~", state_f) + + +def join_subtitles(base_prefix): + subtitles = list(sorted(glob('%s/*/front.srt' % base_prefix))) + data = [] + position = 0 + for srt in subtitles: + scene = srt.replace('front.srt', 'scene.json') + data += ox.srt.load(srt, offset=position) + position += get_scene_duration(scene) + with open(base_prefix / 'front.srt', 'wb') as fd: + fd.write(ox.srt.encode(data)) From b2552d6059ed1fd51953e3cf3407371d9b5f47d5 Mon Sep 17 00:00:00 2001 From: j Date: Wed, 4 Dec 2024 09:16:24 +0000 Subject: [PATCH 3/3] make sure all tracks are exactly the same length --- management/commands/generate_clips.py | 9 ++- render.py | 79 ++++++++++++++++++++------- render_kdenlive.py | 12 +++- 3 files changed, 78 insertions(+), 22 deletions(-) diff --git a/management/commands/generate_clips.py b/management/commands/generate_clips.py index 23bc61c..e2ecca9 100644 --- a/management/commands/generate_clips.py +++ b/management/commands/generate_clips.py @@ -23,6 +23,9 @@ def resolve_roman(s): return s.replace(extra, new) return s +def format_duration(duration, fps): + return float('%0.5f' % (round(duration * fps) / fps)) + class Command(BaseCommand): help = 'generate symlinks to clips and clips.json' @@ -68,6 +71,10 @@ class Command(BaseCommand): if not clip["duration"]: print('!!', durations, clip) continue + cd = format_duration(clip["duration"], 24) + #if cd != clip["duration"]: + # print(clip["duration"], '->', cd, durations, clip) + clip["duration"] = cd clip['tags'] = i.data.get('tags', []) clip['editingtags'] = i.data.get('editingtags', []) name = os.path.basename(clip['original']) @@ -117,7 +124,7 @@ class Command(BaseCommand): subs.append(sdata) voice_over[fragment_id][batch] = { "src": target, - "duration": source.duration, + "duration": format_duration(source.duration, 24), "subs": subs } with open(os.path.join(prefix, 'voice_over.json'), 'w') as fd: diff --git a/render.py b/render.py index 32d43a7..36518bf 100644 --- a/render.py +++ b/render.py @@ -11,8 +11,10 @@ import time from pathlib import Path import ox +import lxml.etree + from .pi import random -from .render_kdenlive import KDEnliveProject, _CACHE +from .render_kdenlive import KDEnliveProject, _CACHE, melt_xml, get_melt def random_int(seq, length): @@ -64,6 +66,8 @@ def write_if_new(path, data, mode=''): with open(path, write_mode) as fd: fd.write(data) +def format_duration(duration, fps): + return float('%0.5f' % (round(duration * fps) / fps)) def compose(clips, target=150, base=1024, voice_over=None): fps = 24 @@ -120,7 +124,7 @@ def compose(clips, target=150, base=1024, voice_over=None): if vo_min > target: target = vo_min elif vo_min < target: - offset = int(((target - vo_min) / 2) * fps) / fps + offset = format_duration((target - vo_min) / 2, fps) scene['audio-center']['A1'].append({ 'blank': True, 'duration': offset @@ -188,7 +192,7 @@ def compose(clips, target=150, base=1024, voice_over=None): if length + clip['duration'] > target and length >= vo_min: break print('%06.3f %06.3f' % (length, clip['duration']), os.path.basename(clip['original'])) - length += clip['duration'] + length += int(clip['duration'] * fps) / fps if "foreground" not in clip and "animation" in clip: fg = clip['animation'] @@ -300,8 +304,11 @@ def compose(clips, target=150, base=1024, voice_over=None): }) used.append(clip) print("scene duration %0.3f (target: %0.3f, vo_min: %0.3f)" % (length, target, vo_min)) - if sub_offset < length: - delta = length - sub_offset + scene_duration = int(get_scene_duration(scene) * fps) + sub_offset = int(sub_offset * fps) + if sub_offset < scene_duration: + delta = format_duration((scene_duration - sub_offset) / fps, fps) + print(">> add %0.3f of silence.. %0.3f (scene_duration)" % (delta, scene_duration / fps)) scene['audio-center']['A1'].append({ 'blank': True, 'duration': delta @@ -310,8 +317,24 @@ def compose(clips, target=150, base=1024, voice_over=None): 'blank': True, 'duration': delta }) + elif sub_offset > scene_duration: + delta = format_duration((scene_duration - sub_offset) / fps, fps) + scene['audio-center']['A1'][-1]["duration"] += delta + scene['audio-rear']['A1'][-1]["duration"] += delta + print("WTF, needed to cut %s new duration: %s" % (delta, scene['audio-center']['A1'][-1]["duration"])) + print(scene['audio-center']['A1'][-1]) return scene, used +def get_track_duration(scene, k, n): + duration = 0 + for key, value in scene.items(): + if key == k: + for name, clips in value.items(): + if name == n: + for clip in clips: + duration += int(clip['duration'] * 24) + return duration / 24 + def get_scene_duration(scene): if isinstance(scene, str): with open(scene) as fd: @@ -320,8 +343,8 @@ def get_scene_duration(scene): for key, value in scene.items(): for name, clips in value.items(): for clip in clips: - duration += clip['duration'] - return duration + duration += int(clip['duration'] * 24) + return duration / 24 def get_offset_duration(prefix): duration = 0 @@ -331,7 +354,8 @@ def get_offset_duration(prefix): duration += get_scene_duration(scene) return duration -def render(root, scene, prefix=''): +def render(root, scene, prefix='', options=None): + if options is None: options = {} fps = 24 files = [] scene_duration = int(get_scene_duration(scene) * fps) @@ -351,7 +375,7 @@ def render(root, scene, prefix=''): #print(track) for clip in clips: project.append_clip(track, clip) - track_durations[track] = int(sum([c['duration'] for c in clips]) * fps) + track_durations[track] = sum([int(c['duration'] * fps) for c in clips]) if timeline.startswith('audio-'): track_duration = project.get_duration() delta = scene_duration - track_duration @@ -359,13 +383,34 @@ def render(root, scene, prefix=''): for track in track_durations: if track_durations[track] == track_duration: project.append_clip(track, {'blank': True, "duration": delta/fps}) - break + path = os.path.join(root, prefix + "%s.kdenlive" % timeline) project_xml = project.to_xml() write_if_new(path, project_xml) + + if options["debug"]: + # check duration + out_duration = get_project_duration(path) + p_duration = project.get_duration() + print(path, 'out: %s, project: %s, scene: %s' %(out_duration, p_duration, scene_duration)) + if p_duration != scene_duration: + print(path, 'FAIL project: %s, scene: %s' %(p_duration, scene_duration)) + _cache = os.path.join(root, "cache.json") + with open(_cache, "w") as fd: + json.dump(_CACHE, fd) + sys.exit(1) + if out_duration != p_duration: + print(path, 'fail got: %s expected: %s' %(out_duration, p_duration)) + sys.exit(1) + files.append(path) return files +def get_project_duration(file): + out = melt_xml(file) + chain = lxml.etree.fromstring(out).xpath('producer')[0] + duration = int(chain.attrib['out']) + 1 + return duration def get_fragments(clips, voice_over, prefix): import itemlist.models @@ -415,8 +460,6 @@ def get_fragments(clips, voice_over, prefix): return fragments -def render_timeline(options): - def render_all(options): prefix = options['prefix'] duration = int(options['duration']) @@ -470,7 +513,7 @@ def render_all(options): elif position < target_position: target = target + 0.1 * fragment_target - timelines = render(prefix, scene, fragment_prefix[len(prefix) + 1:] + '/') + timelines = render(prefix, scene, fragment_prefix[len(prefix) + 1:] + '/', options) scene_json = json.dumps(scene, indent=2, ensure_ascii=False) write_if_new(os.path.join(fragment_prefix, 'scene.json'), scene_json) @@ -481,9 +524,8 @@ def render_all(options): ext = '.mp4' if '/audio' in timeline: ext = '.wav' - cmd = [ - 'xvfb-run', '-a', - 'melt', timeline, + cmd = get_melt() + [ + timeline, '-quiet', '-consumer', 'avformat:%s' % timeline.replace('.kdenlive', ext), ] @@ -596,10 +638,7 @@ def render_all(options): if '/audio' in timelines[0]: ext = '.wav' out = base_prefix / (timeline + ext) - cmd = [ - 'xvfb-run', '-a', - 'melt' - ] + timelines + [ + cmd = get_melt() + timelines + [ '-quiet', '-consumer', 'avformat:%s' % out, ] diff --git a/render_kdenlive.py b/render_kdenlive.py index 2431500..cdf755b 100644 --- a/render_kdenlive.py +++ b/render_kdenlive.py @@ -4,6 +4,7 @@ import subprocess import lxml.etree import uuid import os +import sys _CACHE = {} _IDS = defaultdict(int) @@ -12,6 +13,14 @@ def get_propery(element, name): return element.xpath('property[@name="%s"]' % name)[0].text +def get_melt(): + cmd = ['melt'] + if 'XDG_RUNTIME_DIR' not in os.environ: + os.environ['XDG_RUNTIME_DIR'] = '/tmp/runtime-pandora' + if 'DISPLAY' not in os.environ: + cmd = ['xvfb-run', '-a'] + cmd + return cmd + def melt_xml(file): out = None real_path = os.path.realpath(file) @@ -20,7 +29,8 @@ def melt_xml(file): if os.stat(real_path).st_mtime != ts: out = None if not out: - out = subprocess.check_output(['melt', file, '-consumer', 'xml']).decode() + cmd = get_melt() + [file, '-consumer', 'xml'] + out = subprocess.check_output(cmd).decode() _CACHE[file] = [os.stat(real_path).st_mtime, out] return out