a/v render. gen fixes

2026-01-24 13:26:30 +01:00 · 2026-01-24 13:26:30 +01:00 · d62d4c5746
commit d62d4c5746
parent f6fbb9ab81
6 changed files with 706 additions and 268 deletions
--- a/render.py
+++ b/render.py
@ -20,7 +20,36 @@ from .render_utils import *
 default_prefix = "/srv/p_for_power"


-def compose(clips, target=150, base=1024, voice_over=None, options=None):
+def get_loudnorm(file):
+    if "loudnorm" in file.info:
+        return file.info["loudnorm"]
+    source = file.data.path
+    cmd = [
+        "ffmpeg",
+        "-i", source,
+        "-vn",
+        "-af", "loudnorm=print_format=json",
+        "-f", "null",
+        "-"
+    ]
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    json_match = re.search(r"\{[\s\S]*\}", result.stderr)
+    if not json_match:
+        raise RuntimeError("Could not find loudnorm JSON output in ffmpeg output")
+    loudnorm_data = json.loads(json_match.group(0))
+
+    input_i = float(loudnorm_data.get("input_i", 0))            # Integrated loudness
+    input_lra = float(loudnorm_data.get("input_lra", 0))        # Loudness range
+    input_tp = float(loudnorm_data.get("input_tp", 0))          # True peak
+    input_thresh = float(loudnorm_data.get("input_thresh", 0))  # Threshold
+    loudnorm = f"L: {input_i:.6f}\tR: {input_lra:.6f}\tP {input_tp:.6f}"
+    file.info["loudnorm"] = loudnorm
+    file.save()
+    return loudnorm
+
+
+
+def compose(clips, fragment, target=150, base=1024, voice_over=None, options=None):
    if options is None:
        options = {}
    fps = 24
@ -50,40 +79,175 @@ def compose(clips, target=150, base=1024, voice_over=None, options=None):
    seq = random(10000 + base * 1000)
    used = []

+    selected_clips_length = 0
+    ai_length = 0 
+    selected_clips = []
+
+    tags = []
+    while selected_clips_length < target:
+        if not tags:
+            tags = fragment["tags"].copy()
+        tag = random_choice(seq, tags, pop=True)
+
+        non_ai_clips = []
+        ai_clips = []
+        for clip in clips:
+            if tag in clip["tags"]:
+                if 'ai' in clip:
+                    ai_clips.append(clip)
+                else:
+                    non_ai_clips.append(clip)
+        if ai_length < target * 0.6 and ai_clips:
+            clip = random_choice(seq, ai_clips, pop=True)
+            clip["use_ai"] = True
+            selected_clips.append(clip)
+            selected_clips_length += clip['duration']
+            ai_length += clip['duration']
+            clips = [c for c in clips if c['id'] != clip['id']]
+            continue
+
+        available_clips = non_ai_clips + ai_clips
+        if available_clips:
+            clip = random_choice(seq, available_clips, pop=True)
+            clip["use_ai"] = False
+            selected_clips.append(clip)
+            selected_clips_length += clip['duration']
+            clips = [c for c in clips if c['id'] != clip['id']]
+
+    clips = selected_clips
+    clip = None
+    while target - length > 0 and clips:
+        '''
+        if clip:
+            if chance(seq, 0.5):
+                next_seqid = clip['seqid'] + 1
+                clip = get_clip_by_seqid(clips, next_seqid)
+            else:
+                clip = None
+        '''
+        clip = None
+        if not clip:
+            # FIXME: while not all clips have AI versions make sure we have one 50% of the time
+            clip = random_choice(seq, clips, True)
+        next_length = length + clip['duration'] 
+        if target - next_length < -target*0.1:
+            break
+        length += int(clip['duration'] * fps) / fps
+
+        # 50/50 source or ai
+        src = clip['source']
+        audio = clip['source']
+        # select ai if we have one
+        if 'ai' in clip:
+            if clip["use_ai"]:
+                src = random_choice(seq, list(clip['ai'].values()), False)
+
+        print('%07.3f-%07.3f %07.3f %s (%s)' % (
+            length-clip['duration'],
+            length,
+            clip['duration'],
+            os.path.basename(clip['source']),
+            src.split('/')[-2]
+        ))
+
+        scene['front']['V2'].append({
+            'duration': clip['duration'],
+            'src': src,
+            "filter": {
+            }
+        })
+
+        volume_front = '-2.5'
+        volume_rear = '-8.5'
+        if clip.get('volume') is not None:
+            volume_front = '%0.2f' % (float(volume_front) + clip['volume'])
+            volume_rear = '%0.2f' % (float(volume_rear) + clip['volume'])
+
+        '''
+            'dynamic_loudness': [
+                ["target_loudness", "-35"],
+                ["min_gain", "-15"],
+                ["max_gin", "15"],
+            ],
+        '''
+
+        audio_filter = {
+            'mono': [
+                ["channels", "2"],
+            ],
+            'loudness': [
+                ["program", "-17"],
+                ["results", clip["loudnorm"]],
+            ],
+            'volume': volume_front,
+            'fadein': '00:00:00.125'
+        }
+        scene['audio-front']['A2'].append({
+            'duration': clip['duration'],
+            'src': audio,
+            'filter': audio_filter.copy()
+        })
+        '''
+        audio_filter['volume'] = volume_rear
+        scene['audio-rear']['A2'].append({
+            'duration': clip['duration'],
+            'src': audio,
+            'filter': audio_filter.copy()
+        })
+        '''
+        used.append(clip)
+        if not clips and target - length > 0:
+            print("not enough clips, need to reset")
+            used_ids = {c['id'] for c in used}
+            clips = [c for c in all_clips if c != clip and c['id'] not in used_ids]
+            if not clips:
+                print("not enough clips, also consider used")
+                clips = [c for c in all_clips if c != clip]
+            if not clips:
+                print("not enough clips, also consider last clip")
+                clips = all_clips.copy()
+
+    scene_duration = int(get_scene_duration(scene) * fps)
    voice_overs = []
    sub_offset = 0
    vo_min = 0
+    subs = []
+    print("--")
+    print("Voice Over:")
    if voice_over:
        vo_keys = list(sorted(voice_over))
-        if chance(seq, 0.5):
-            vo_key = vo_keys[random_int(seq, len(vo_keys))]
-            voice_overs.append(voice_over[vo_key])
-        elif len(vo_keys) >= 2:
-            vo1 = vo_keys.pop(random_int(seq, len(vo_keys)))
-            vo2 = vo_keys.pop(random_int(seq, len(vo_keys)))
-            voice_overs.append(voice_over[vo1])
-            if voice_over[vo1]["duration"] + voice_over[vo2]["duration"] < target:
-                print("adding second vo")
-                voice_overs.append(voice_over[vo2])
-        print("vo:", [x['src'] for x in voice_overs], list(sorted(voice_over)))
-        vo_min = sum([vo['duration'] for vo in voice_overs])
-        sub_offset = 0
-        if vo_min > target:
-            target = vo_min
-        elif vo_min < target:
-            offset = format_duration((target - vo_min) / 2, fps)
+        while int(sub_offset * fps) < scene_duration:
+            if sub_offset:
+                gap = (5 * fps + random_int(seq, 10 * fps)) / fps
+            else:
+                gap = (2 * fps + random_int(seq, 5 * fps)) / fps
+            if int((sub_offset + gap)* fps) > scene_duration:
+                gap = format_duration((scene_duration - int(sub_offset * fps)) / fps, fps)
            scene['audio-center']['A1'].append({
                'blank': True,
-                'duration': offset
+                'duration': gap
            })
            scene['audio-rear']['A1'].append({
                'blank': True,
-                'duration': offset
+                'duration': gap
            })
-            vo_min += offset
-            sub_offset = offset
-        subs = []
-        for vo in voice_overs:
+            print('%07.3f-%07.3f %07.3f' % (sub_offset, sub_offset+gap, gap), 'silence')
+            sub_offset += gap
+
+            vo_key = random_choice(seq, vo_keys, pop=True)
+            variant = random_int(seq, len(voice_over[vo_key]))
+            vo = voice_over[vo_key][variant]
+            while int((vo['duration'] + sub_offset) * fps) > scene_duration:
+                if not vo_keys:
+                    vo = None
+                    break
+                vo_key = random_choice(seq, vo_keys, pop=True)
+                variant = random_int(seq, len(voice_over[vo_key]))
+                vo = voice_over[vo_key][variant]
+            if vo is None:
+                break
+            print('%07.3f-%07.3f %07.3f' % (sub_offset, sub_offset+vo["duration"], vo["duration"]), vo["src"].split('/')[-1])
+            voice_overs.append(vo)
            voc = vo.copy()
            a, b = '-11', '-3'
            if 'Whispered' in voc['src']:
@ -122,118 +286,20 @@ def compose(clips, target=150, base=1024, voice_over=None, options=None):
        if subs:
            scene["subtitles"] = subs

-    selected_clips_length = 0
-    selected_clips = []
-    non_ai_clips = []
-    for clip in clips:
-        if 'ai' in clip:
-            selected_clips.append(clip)
-            selected_clips_length += clip['duration']
-        else:
-            non_ai_clips.append(clip)
-
-    while selected_clips_length < target and non_ai_clips:
-        clip = random_choice(seq, non_ai_clips, pop=True)
-        selected_clips.append(clip)
-        selected_clips_length += clip['duration']
-
-    clips = selected_clips
-    clip = None
-    while target - length > 0 and clips:
-        '''
-        if clip:
-            if chance(seq, 0.5):
-                next_seqid = clip['seqid'] + 1
-                clip = get_clip_by_seqid(clips, next_seqid)
-            else:
-                clip = None
-        '''
-        clip = None
-        if not clip:
-            # FIXME: while not all clips have AI versions make sure we have one 50% of the time
-            clip = random_choice(seq, clips, True)
-        if not clips:
-            print("not enough clips, need to reset")
-            clips = [c for c in all_clips if c != clip and c not in used]
-            if not clips:
-                print("not enough clips, also consider used")
-                clips = [c for c in all_clips if c != clip]
-            if not clips:
-                print("not enough clips, also consider last clip")
-                clips = all_clips.copy()
-        if length + clip['duration'] > target and length >= vo_min:
-            break
-        length += int(clip['duration'] * fps) / fps
-
-        # 50/50 source or ai
-        src = clip['source']
-        audio = clip['source']
-        # select ai if we have one
-        if 'ai' in clip:
-            if True or chance(seq, 0.5):
-                src = random_choice(seq, list(clip['ai'].values()), False)
-
-        print('%07.3f %07.3f' % (length, clip['duration']), src.split('/')[-2], os.path.basename(clip['source']))
-
-        scene['front']['V2'].append({
-            'duration': clip['duration'],
-            'src': src,
-            "filter": {
-            }
-        })
-
-        volume_front = '-2.5'
-        volume_rear = '-8.5'
-        if clip.get('volume') is not None:
-            volume_front = '%0.2f' % (float(volume_front) + clip['volume'])
-            volume_rear = '%0.2f' % (float(volume_rear) + clip['volume'])
-
-        audio_filter = {
-            'mono': [
-                ["channels", "2"],
-            ],
-            'dynamic_loudness': [
-                ["target_loudness", "-35"],
-                ["min_gain", "-15"],
-                ["max_gin", "15"],
-            ],
-            'volume': volume_front,
-            'fadein': '00:00:00.125'
-        }
-        scene['audio-front']['A2'].append({
-            'duration': clip['duration'],
-            'src': audio,
-            'filter': audio_filter.copy()
-        })
-        '''
-        audio_filter['volume'] = volume_rear
-        scene['audio-rear']['A2'].append({
-            'duration': clip['duration'],
-            'src': audio,
-            'filter': audio_filter.copy()
-        })
-        '''
-        used.append(clip)
-    print("scene duration %0.3f (target: %0.3f, vo_min: %0.3f)" % (length, target, vo_min))
-    scene_duration = int(get_scene_duration(scene) * fps)
    sub_offset = int(sub_offset * fps)
    if sub_offset < scene_duration:
-        delta = format_duration((scene_duration - sub_offset) / fps, fps)
-        print(">> add %0.3f of silence.. %0.3f (scene_duration)" % (delta, scene_duration / fps))
+        gap = format_duration((scene_duration - sub_offset) / fps, fps)
+        print('%07.3f-%07.3f %07.3f' % (sub_offset, sub_offset+gap, gap), 'silence')
        scene['audio-center']['A1'].append({
            'blank': True,
-            'duration': delta
+            'duration': gap
        })
        scene['audio-rear']['A1'].append({
            'blank': True,
-            'duration': delta
+            'duration': gap
        })
-    elif sub_offset > scene_duration:
-        delta = format_duration((scene_duration - sub_offset) / fps, fps)
-        scene['audio-center']['A1'][-1]["duration"] += delta
-        scene['audio-rear']['A1'][-1]["duration"] += delta
-        print("WTF, needed to cut %s new duration: %s" % (delta, scene['audio-center']['A1'][-1]["duration"]))
-        print(scene['audio-center']['A1'][-1])
+        sub_offset += gap
+    print("scene duration %0.3f (target: %0.3f)" % (length, target))
    return scene, used

 def write_subtitles(data, folder, options):
@ -312,7 +378,9 @@ def get_fragments(clips, voice_over, prefix):

    for l in itemlist.models.List.objects.filter(status='featured').order_by('name'):
        if l.name.split(' ')[0].isdigit():
+            fragment_id = l.name.split(' ')[0]
            fragment = {
+                'id': fragment_id,
                'name': l.name,
                'tags': [],
                'anti-tags': [],
@ -344,7 +412,7 @@ def get_fragments(clips, voice_over, prefix):
                        print("FIXME", i)
                        continue
                    type_ = i.data['type'][0].lower()
-                    target = os.path.join(prefix, type_, i.data['title'] + ext)
+                    target = os.path.join(prefix, 'video', type_, i.data['title'] + ext)
                    sources.append(target)
            fragment['clips'] = []
            for clip in clips:
@ -353,7 +421,7 @@ def get_fragments(clips, voice_over, prefix):
                source = clip['source']
                if source in sources:
                    fragment['clips'].append(clip)
-            fragment["voice_over"] = voice_over.get(str(fragment["id"]), {})
+            fragment["voice_over"] = voice_over.get(fragment_id, {})
            fragments.append(fragment)
    fragments.sort(key=lambda f: ox.sort_string(f['name']))
    return fragments
@ -386,21 +454,23 @@ def render_all(options):
    for fragment in fragments:
        fragment_base += 1
        fragment_id = int(fragment['name'].split(' ')[0])
-        if options["chapter"] and int(options["chapter"]) != fragment_id:
+        if options["fragment"] and int(options["fragment"]) != fragment_id:
            continue
        name = fragment['name'].replace(' ', '_')
-        if fragment_id < 10:
-            name = '0' + name
        if not fragment['clips']:
            print("skipping empty fragment", name)
            continue
        fragment_prefix = os.path.join(base_prefix, name)
        os.makedirs(fragment_prefix, exist_ok=True)
        fragment_clips = fragment['clips']
-        unused_fragment_clips = [c for c in fragment_clips if c not in clips_used]
+        used_ids = {c['id'] for c in clips_used}
+        unused_fragment_clips = [c for c in fragment_clips if c['id'] not in clips_used]
        print('fragment clips', len(fragment_clips), 'unused', len(unused_fragment_clips))
+        print('--')
+        print('Video:')
        scene, used = compose(
            unused_fragment_clips,
+            fragment=fragment,
            target=target,
            base=fragment_base,
            voice_over=fragment['voice_over'],
@ -414,7 +484,7 @@ def render_all(options):
            src = src[0]['src']
            stats[src.split('/')[-2]] += 1
        else:
-            print("!! fixme, chapter without VO")
+            print("!! fixme, fragment without VO")

        position += scene_duration
        target_position += fragment_target
@ -833,13 +903,14 @@ def generate_clips(options):
                source = e.files.filter(selected=True)[0].data.path
                ext = os.path.splitext(source)[1]
                type_ = e.data['type'][0].lower()
-                target = os.path.join(prefix, type_, i.data['title'] + ext)
+                target = os.path.join(prefix, 'video', type_, i.data['title'] + ext)
                os.makedirs(os.path.dirname(target), exist_ok=True)
                if os.path.islink(target):
                    os.unlink(target)
                os.symlink(source, target)
                if type_ == "source":
                    source_target = target
+                    clip['loudnorm'] = get_loudnorm(e.files.filter(selected=True)[0])
                if type_.startswith('ai:'):
                    if 'ai' not in clip:
                        clip['ai'] = {}
@ -888,17 +959,20 @@ def generate_clips(options):

    print("using", len(clips), "clips")

-    voice_over = defaultdict(dict)
+    voice_over = {}
    for vo in item.models.Item.objects.filter(
        data__type__icontains="voice over",
    ):
        title = vo.get('title')
-        fragment_id = int(title.split('_')[2].replace('gen', ''))
+        parts = title.split('-')
+
+        fragment = '%02d' % int(parts[0].replace('ch', ''))
+        type = parts[1]
+        variant = '-'.join(parts[2:4])
        source = vo.files.filter(selected=True)[0]
-        #batch = vo.get('batch')[0].replace('Text-', '')
-        batch = title.split('_')[3]
        src = source.data.path
-        target = os.path.join(prefix, 'voice_over', batch, '%s.wav' % fragment_id)
+        ext = src.split('.')[-1]
+        target = os.path.join(prefix, 'voice_over', fragment, '%s-%s.%s' % (type, variant, ext))
        os.makedirs(os.path.dirname(target), exist_ok=True)
        if os.path.islink(target):
            os.unlink(target)
@ -909,10 +983,15 @@ def generate_clips(options):
        ).exclude(value="").order_by("start"):
            sdata = get_srt(sub, 0, lang, tlang)
            subs.append(sdata)
-        voice_over[fragment_id][batch] = {
+        if fragment not in voice_over:
+            voice_over[fragment] = {}
+        if type not in voice_over[fragment]:
+            voice_over[fragment][type] = []
+        voice_over[fragment][type].append({
+            "variant": variant,
            "src": target,
            "duration": format_duration(source.duration, 24),
            "subs": subs
-        }
+        })
    with open(os.path.join(prefix, 'voice_over.json'), 'w') as fd:
        json.dump(voice_over, fd, indent=2, ensure_ascii=False)