From 0c4f55006d71f72870e28095e2216077ad2fd68b Mon Sep 17 00:00:00 2001 From: j Date: Mon, 26 Jan 2026 18:34:27 +0100 Subject: [PATCH 1/3] fix scene duration --- render_utils.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/render_utils.py b/render_utils.py index 8a39d7e..21c86dd 100644 --- a/render_utils.py +++ b/render_utils.py @@ -5,6 +5,7 @@ import lxml.etree import ox from .render_kdenlive import melt_xml +from .utils import format_duration def parse_lang(lang): @@ -57,17 +58,20 @@ def get_clip_by_seqid(clips, seqid): return None -def get_scene_duration(scene): +def get_scene_duration(scene, fps=24, track=None): if isinstance(scene, str): with open(scene) as fd: scene = json.load(fd) duration = 0 for key, value in scene.items(): for name, clips in value.items(): + if track and '%s:%s' % (key, name) != track: + continue if clips: for clip in clips: - duration += int(clip["duration"] * 24) - return duration / 24 + duration += round(clip["duration"] * fps) + #print("scene duration based on %s:%s is %s %s" % (key, name, duration / fps, format_duration(duration / fps, fps))) + return duration / fps def get_offset_duration(prefix): From c47e6a5e15b0d53ec127f1b6aa6201e328ed561f Mon Sep 17 00:00:00 2001 From: j Date: Mon, 26 Jan 2026 18:34:55 +0100 Subject: [PATCH 2/3] source material might not be in same fps, import with profile --- render_kdenlive.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/render_kdenlive.py b/render_kdenlive.py index 1cdc6b3..245d8aa 100644 --- a/render_kdenlive.py +++ b/render_kdenlive.py @@ -21,7 +21,7 @@ def get_melt(): cmd = ['xvfb-run', '-a'] + cmd return cmd -def melt_xml(file): +def melt_xml(file, profile='atsc_1080p_24'): out = None real_path = os.path.realpath(file) if file in _CACHE and isinstance(_CACHE[file], list): @@ -29,7 +29,7 @@ def melt_xml(file): if os.stat(real_path).st_mtime != ts: out = None if not out: - cmd = get_melt() + [file, '-consumer', 'xml'] + cmd = get_melt() + [file, '-profile', profile, '-consumer', 'xml'] out = subprocess.check_output(cmd).decode() _CACHE[file] = [os.stat(real_path).st_mtime, out] return out @@ -73,6 +73,7 @@ class KDEnliveProject: self._width = int(width) self._height = int(height) self._fps = int(frame_rate_num) / int(frame_rate_den) + self.profile = 'atsc_1080p_24' self._tree = self.get_element("mlt", attrib={ "LC_NUMERIC": "C", @@ -444,7 +445,7 @@ class KDEnliveProject: return prefix + self.get_counter(prefix) def get_chain(self, file, kdenlive_id=None): - out = melt_xml(file) + out = melt_xml(file, self.profile) chain = lxml.etree.fromstring(out).xpath('producer')[0] chain.tag = 'chain' chain.attrib['id'] = self.get_id('chain') @@ -597,7 +598,11 @@ class KDEnliveProject: else: print('!!', track_id) - frames = int(self._fps * clip['duration']) + frames = int(round(self._fps * clip['duration'])) + if track_id[0] == "V": + if abs(self._fps * clip['duration'] - frames) > 1/48: + delta = abs(self._fps * clip['duration'] - frames) * 24 + print("Track alignment issues", self._fps * clip['duration'], frames, clip.get('src', clip), delta) self._duration[track_id] += frames if clip.get("blank"): From f8cbbd55c78df2353d600e68cb6464ecd41964c9 Mon Sep 17 00:00:00 2001 From: j Date: Mon, 26 Jan 2026 18:35:29 +0100 Subject: [PATCH 3/3] various duration issues, prepare for double vo special case --- render.py | 113 +++++++++++++++++++++++++++++++++++++++++------------- utils.py | 7 +++- 2 files changed, 92 insertions(+), 28 deletions(-) diff --git a/render.py b/render.py index 57925ef..2229bff 100644 --- a/render.py +++ b/render.py @@ -61,6 +61,7 @@ def compose(clips, fragment, target=150, base=1024, voice_over=None, options=Non }, 'audio-center': { 'A1': [], + 'A2': [], }, 'audio-front': { 'A1': [], @@ -132,7 +133,9 @@ def compose(clips, fragment, target=150, base=1024, voice_over=None, options=Non next_length = length + clip['duration'] if target - next_length < -target*0.1: break - clip_duration = int(clip['duration'] * fps) / fps + clip_duration = format_duration(clip['duration'], fps) + if clip['duration'] != clip_duration: + print("WTF", clip, clip['duration'], clip_duration) length += clip_duration # 50/50 source or ai @@ -145,13 +148,13 @@ def compose(clips, fragment, target=150, base=1024, voice_over=None, options=Non print('%07.3f-%07.3f %07.3f %s (%s)' % ( length-clip_duration, length, - clip['duration'], + clip_duration, os.path.basename(clip['source']), src.split('/')[-2] )) scene['front']['V2'].append({ - 'duration': clip['duration'], + 'duration': clip_duration, 'src': src, "filter": { } @@ -181,10 +184,21 @@ def compose(clips, fragment, target=150, base=1024, voice_over=None, options=Non 'fadein': '00:00:00.125' } scene['audio-front']['A2'].append({ - 'duration': clip['duration'], + 'duration': clip_duration, 'src': audio, 'filter': audio_filter.copy() }) + + length = format_duration(length, fps) + ad = get_scene_duration(scene, track='audio-front:A2') + vd = get_scene_duration(scene, track='front:V2') + if ad == vd and abs(ad-length) > 1/48: + print('v: ', vd, 'ad', ad, 'length:', length, 'fixup') + length = ad + if abs(length -vd) > 1/48 or abs(length - ad) > 1/48 or ad != vd: + print('vd: ', vd, 'ad', ad, 'length:', length) + print(clip) + sys.exit(-1) used.append(clip) if not clips and target - length > 0: print("not enough clips, need to reset") @@ -200,7 +214,7 @@ def compose(clips, fragment, target=150, base=1024, voice_over=None, options=Non if "ai" in clip: clip["use_ai"] = True - scene_duration = int(get_scene_duration(scene) * fps) + scene_duration = int(round(get_scene_duration(scene) * fps)) voice_overs = [] sub_offset = 0 subs = [] @@ -214,22 +228,28 @@ def compose(clips, fragment, target=150, base=1024, voice_over=None, options=Non else: gap = (2 * fps + random_int(seq, 5 * fps)) / fps gap = format_duration(gap, fps) - if int((sub_offset + gap)* fps) > scene_duration: + if int((sub_offset + gap) * fps) > scene_duration: gap = format_duration((scene_duration - int(sub_offset * fps)) / fps, fps) - scene['audio-center']['A1'].append({ - 'blank': True, - 'duration': gap - }) - scene['audio-rear']['A1'].append({ - 'blank': True, - 'duration': gap - }) + for tl, track in ( + ('audio-center', 'A1'), + ('audio-center', 'A2'), + ('audio-rear', 'A1'), + ('audio-rear', 'A2'), + ): + scene[tl][track].append({ + 'blank': True, + 'duration': gap + }) print('%07.3f-%07.3f %07.3f' % (sub_offset, sub_offset+gap, gap), 'silence') sub_offset += gap vo_key = random_choice(seq, vo_keys, pop=True) variant = random_int(seq, len(voice_over[vo_key])) vo = voice_over[vo_key][variant] + if isinstance(vo, list): + vo, vo_b = vo + else: + vo_b = None while int((vo['duration'] + sub_offset) * fps) > scene_duration: if not vo_keys: vo = None @@ -237,6 +257,10 @@ def compose(clips, fragment, target=150, base=1024, voice_over=None, options=Non vo_key = random_choice(seq, vo_keys, pop=True) variant = random_int(seq, len(voice_over[vo_key])) vo = voice_over[vo_key][variant] + if isinstance(vo, list): + vo, vo_b = vo + else: + vo_b = None if vo is None: break print('%07.3f-%07.3f %07.3f' % (sub_offset, sub_offset+vo["duration"], vo["duration"]), vo["src"].split('/')[-1]) @@ -255,13 +279,29 @@ def compose(clips, fragment, target=150, base=1024, voice_over=None, options=Non sub["in"] += sub_offset sub["out"] += sub_offset subs.append(sub) + if vo_b: + vo_b = vo_b.copy() + vo_b['filter'] = {'volume': a} + scene['audio-center']['A2'].append(vo_b) + vo_b = vo_b.copy() + vo_b['filter'] = {'volume': b} + scene['audio-rear']['A1'].append(vo_b) + else: + for tl, track in ( + ('audio-center', 'A2'), + ('audio-rear', 'A2'), + ): + scene[tl][track].append({ + 'blank': True, + 'duration': voc["duration"] + }) sub_offset += voc["duration"] if subs: scene["subtitles"] = subs + sub_offset = format_duration(sub_offset, fps) - sub_offset = int(sub_offset * fps) - if sub_offset < scene_duration: - gap = format_duration((scene_duration - sub_offset) / fps, fps) + if sub_offset < scene_duration/fps: + gap = scene_duration/fps - sub_offset print('%07.3f-%07.3f %07.3f' % (sub_offset, sub_offset+gap, gap), 'silence') scene['audio-center']['A1'].append({ 'blank': True, @@ -272,7 +312,18 @@ def compose(clips, fragment, target=150, base=1024, voice_over=None, options=Non 'duration': gap }) sub_offset += gap - print("scene duration %0.3f (target: %0.3f)" % (length, target)) + ''' + print("scene duration: %0.3f vo: %0.3f (length: %0.3f, target: %0.3f)" % ( + get_scene_duration(scene), + sub_offset, + length, + target + )) + ''' + print("scene duration: %0.3f (target: %0.3f)" % ( + get_scene_duration(scene), + target + )) return scene, used def write_subtitles(data, folder, options): @@ -560,10 +611,14 @@ def render_all(options): for a, b in ( ("front-mixed.mp4", "front.mp4"), + ("audio-center.wav", "front.mp4"), + ("audio-rear.wav", "front.mp4"), + ("audio-front.wav", "front.mp4"), + ("audio-5.1.mp4", "front.mp4"), ): duration_a = ox.avinfo(str(fragment_prefix / a))['duration'] duration_b = ox.avinfo(str(fragment_prefix / b))['duration'] - if duration_a != duration_b: + if abs(duration_a - duration_b) > 1/48: print('!!', duration_a, fragment_prefix / a) print('!!', duration_b, fragment_prefix / b) sys.exit(-1) @@ -854,6 +909,7 @@ def generate_clips(options): import item.models import itemlist.models + fps = 24 options = load_defaults(options) prefix = options['prefix'] lang, tlang = parse_lang(options["lang"]) @@ -873,7 +929,8 @@ def generate_clips(options): continue if not e.files.filter(selected=True).exists(): continue - source = e.files.filter(selected=True)[0].data.path + selected = e.files.filter(selected=True)[0] + source = selected.data.path ext = os.path.splitext(source)[1] type_ = e.data['type'][0].lower() if type_.startswith('ai:'): @@ -885,7 +942,7 @@ def generate_clips(options): ai_type = '%s-%s' % (type_[3:], n) n += 1 clip['ai'][ai_type] = target - type_ = ai_type + type_ = 'ai:' + ai_type target = os.path.join(prefix, 'video', type_, i.data['title'] + ext) if type_ == "source": source_target = target @@ -898,19 +955,22 @@ def generate_clips(options): if os.path.islink(target): os.unlink(target) os.symlink(source, target) - durations.append(e.files.filter(selected=True)[0].duration) + durations.append(selected.duration) if not durations: print(i.public_id, 'no duration!', clip) continue - clip["duration"] = min(durations) + if len(set(durations)) > 1: + print(clip, durations) + + clip["duration"] = min(durations) - 1/24 # trim to a multiple of the output fps - d1 = int(clip["duration"] * 24) / 24 + d1 = format_duration(clip["duration"], fps) if d1 != clip["duration"]: clip["duration"] = d1 if not clip["duration"]: print('!!', durations, clip) continue - cd = format_duration(clip["duration"], 24) + cd = format_duration(clip["duration"], fps) clip["duration"] = cd clip['tags'] = i.data.get('tags', []) adjust_volume = i.data.get('adjustvolume', '') @@ -971,7 +1031,8 @@ def generate_clips(options): voice_over[fragment][type].append({ "variant": variant, "src": target, - "duration": format_duration(source.duration, 24), + #"duration": format_duration(source.duration, fps, True), + "duration": source.duration, "subs": subs }) with open(os.path.join(prefix, 'voice_over.json'), 'w') as fd: diff --git a/utils.py b/utils.py index 8824d2b..1cdd826 100644 --- a/utils.py +++ b/utils.py @@ -57,6 +57,9 @@ def write_if_new(path, data, mode=''): with open(path, write_mode) as fd: fd.write(data) -def format_duration(duration, fps): - return float('%0.5f' % (round(duration * fps) / fps)) +def format_duration(duration, fps, audio=False): + if audio: + return float('%0.5f' % (int(duration * fps) / fps)) + else: + return float('%0.5f' % (round(duration * fps) / fps))