From f08520e47f5984ca2d9da730ea6193899e84e284 Mon Sep 17 00:00:00 2001
From: j <j@mailb.org>
Date: Tue, 27 Jan 2026 12:34:22 +0100
Subject: [PATCH] double VO

---
 generate.py | 11 +++++++++--
 render.py   | 49 ++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 45 insertions(+), 15 deletions(-)

diff --git a/generate.py b/generate.py
index 74d531c..3ac3c65 100644
--- a/generate.py
+++ b/generate.py
@@ -1243,20 +1243,27 @@ def process_reshoot_firstframe(character='P1'):
             print('>> skip', item)
             continue
         if item.sort.duration > 30:
-            reshoot_item_segments(item, character)
+            pass
+            #reshoot_item_segments(item, character)
         else:
             cid = get_character_document(character).get_id()
             first_frame = item.documents.filter(
                 data__character=cid, data__position=position
             ).order_by('-created').first()
             if not first_frame:
-                first_frame = replace_character(item, character, position)
+                try:
+                    first_frame = replace_character(item, character, position)
+                except:
+                    item.refresh_from_db()
+                    add_tag(item, 'ai-failed')
+                    print('>> failed', item)
             if first_frame.items.filter(data__type__icontains='ai:').exists():
                 continue
             print(item, first_frame)
             try:
                 reshoot_item(item, first_frame=first_frame)
             except:
+                item.refresh_from_db()
                 add_tag(item, 'ai-failed')
                 print('>> failed', item)
 
diff --git a/render.py b/render.py
index 2229bff..6497e0d 100644
--- a/render.py
+++ b/render.py
@@ -248,6 +248,7 @@ def compose(clips, fragment, target=150, base=1024, voice_over=None, options=Non
             vo = voice_over[vo_key][variant]
             if isinstance(vo, list):
                 vo, vo_b = vo
+
             else:
                 vo_b = None
             while int((vo['duration'] + sub_offset) * fps) > scene_duration:
@@ -285,7 +286,7 @@ def compose(clips, fragment, target=150, base=1024, voice_over=None, options=Non
                 scene['audio-center']['A2'].append(vo_b)
                 vo_b = vo_b.copy()
                 vo_b['filter'] = {'volume': b}
-                scene['audio-rear']['A1'].append(vo_b)
+                scene['audio-rear']['A2'].append(vo_b)
             else:
                 for tl, track in (
                     ('audio-center', 'A2'),
@@ -621,18 +622,24 @@ def render_all(options):
                 if abs(duration_a - duration_b) > 1/48:
                     print('!!', duration_a, fragment_prefix / a)
                     print('!!', duration_b, fragment_prefix / b)
-                    sys.exit(-1)
+                    #sys.exit(-1)
             shutil.move(fragment_prefix / "front-mixed.mp4", fragment_prefix / "front.mp4")
-            if options["keep_audio"]:
-                shutil.move(fragment_prefix / "audio-center.wav", fragment_prefix / "vocals.wav")
-                shutil.move(fragment_prefix / "audio-front.wav", fragment_prefix / "foley.wav")
-            for fn in (
+            cleanup = [
                 "audio-5.1.mp4",
                 "audio-center.wav", "audio-rear.wav",
                 "audio-front.wav",
                 "fl.wav", "fr.wav", "fc.wav", "lfe.wav", "bl.wav", "br.wav",
                 "audio-stereo.wav",
-            ):
+            ]
+            if options["keep_audio"]:
+                shutil.move(fragment_prefix / "audio-center.wav", fragment_prefix / "vocals.wav")
+                shutil.move(fragment_prefix / "audio-front.wav", fragment_prefix / "foley.wav")
+            else:
+                cleanup += [
+                    "vocals.wav",
+                    "foley.wav"
+                ]
+            for fn in cleanup:
                 fn = fragment_prefix / fn
                 if os.path.exists(fn):
                     os.unlink(fn)
@@ -959,9 +966,6 @@ def generate_clips(options):
             if not durations:
                 print(i.public_id, 'no duration!', clip)
                 continue
-            if len(set(durations)) > 1:
-                print(clip, durations)
-
             clip["duration"] = min(durations) - 1/24
             # trim to a multiple of the output fps
             d1 = format_duration(clip["duration"], fps)
@@ -1009,7 +1013,7 @@ def generate_clips(options):
 
         fragment = '%02d' % int(parts[0].replace('ch', ''))
         type = parts[1]
-        variant = '-'.join(parts[2:4])
+        variant = '-'.join(parts[2:]).split('-ElevenLabs')[0]
         source = vo.files.filter(selected=True)[0]
         src = source.data.path
         ext = src.split('.')[-1]
@@ -1028,12 +1032,31 @@ def generate_clips(options):
             voice_over[fragment] = {}
         if type not in voice_over[fragment]:
             voice_over[fragment][type] = []
-        voice_over[fragment][type].append({
+        vo_variant = {
             "variant": variant,
             "src": target,
             #"duration": format_duration(source.duration, fps, True),
             "duration": source.duration,
             "subs": subs
-        })
+        }
+        done = False
+        if type == 'quote':
+            if '-a-t' in variant:
+                b_variant = variant.replace('-a-t', '-b-t').split('-t')[0]
+                for old in voice_over[fragment][type]:
+                    print(type(old))
+                    if isinstance(old, list) and old[0]['variant'].startswith(b_variant):
+                        old.insert(0, vo_variant)
+                        done = True
+            elif '-b-t' in variant:
+                a_variant = variant.replace('-b-t', '-a-t').split('-t')[0]
+                for old in voice_over[fragment][type]:
+                    if isinstance(old, list) and old[0]['variant'].startswith(a_variant):
+                        old.append(vo_variant)
+                        done = True
+            if not done and '-a-t' in variant or '-b-t' in variant:
+                vo_variant = [vo_variant]
+        if not done:
+            voice_over[fragment][type].append(vo_variant)
     with open(os.path.join(prefix, 'voice_over.json'), 'w') as fd:
         json.dump(voice_over, fd, indent=2, ensure_ascii=False)