a/v render. gen fixes

This commit is contained in:
j 2026-01-24 13:26:30 +01:00
commit d62d4c5746
6 changed files with 706 additions and 268 deletions

365
render.py
View file

@ -20,7 +20,36 @@ from .render_utils import *
default_prefix = "/srv/p_for_power"
def compose(clips, target=150, base=1024, voice_over=None, options=None):
def get_loudnorm(file):
if "loudnorm" in file.info:
return file.info["loudnorm"]
source = file.data.path
cmd = [
"ffmpeg",
"-i", source,
"-vn",
"-af", "loudnorm=print_format=json",
"-f", "null",
"-"
]
result = subprocess.run(cmd, capture_output=True, text=True)
json_match = re.search(r"\{[\s\S]*\}", result.stderr)
if not json_match:
raise RuntimeError("Could not find loudnorm JSON output in ffmpeg output")
loudnorm_data = json.loads(json_match.group(0))
input_i = float(loudnorm_data.get("input_i", 0)) # Integrated loudness
input_lra = float(loudnorm_data.get("input_lra", 0)) # Loudness range
input_tp = float(loudnorm_data.get("input_tp", 0)) # True peak
input_thresh = float(loudnorm_data.get("input_thresh", 0)) # Threshold
loudnorm = f"L: {input_i:.6f}\tR: {input_lra:.6f}\tP {input_tp:.6f}"
file.info["loudnorm"] = loudnorm
file.save()
return loudnorm
def compose(clips, fragment, target=150, base=1024, voice_over=None, options=None):
if options is None:
options = {}
fps = 24
@ -50,40 +79,175 @@ def compose(clips, target=150, base=1024, voice_over=None, options=None):
seq = random(10000 + base * 1000)
used = []
selected_clips_length = 0
ai_length = 0
selected_clips = []
tags = []
while selected_clips_length < target:
if not tags:
tags = fragment["tags"].copy()
tag = random_choice(seq, tags, pop=True)
non_ai_clips = []
ai_clips = []
for clip in clips:
if tag in clip["tags"]:
if 'ai' in clip:
ai_clips.append(clip)
else:
non_ai_clips.append(clip)
if ai_length < target * 0.6 and ai_clips:
clip = random_choice(seq, ai_clips, pop=True)
clip["use_ai"] = True
selected_clips.append(clip)
selected_clips_length += clip['duration']
ai_length += clip['duration']
clips = [c for c in clips if c['id'] != clip['id']]
continue
available_clips = non_ai_clips + ai_clips
if available_clips:
clip = random_choice(seq, available_clips, pop=True)
clip["use_ai"] = False
selected_clips.append(clip)
selected_clips_length += clip['duration']
clips = [c for c in clips if c['id'] != clip['id']]
clips = selected_clips
clip = None
while target - length > 0 and clips:
'''
if clip:
if chance(seq, 0.5):
next_seqid = clip['seqid'] + 1
clip = get_clip_by_seqid(clips, next_seqid)
else:
clip = None
'''
clip = None
if not clip:
# FIXME: while not all clips have AI versions make sure we have one 50% of the time
clip = random_choice(seq, clips, True)
next_length = length + clip['duration']
if target - next_length < -target*0.1:
break
length += int(clip['duration'] * fps) / fps
# 50/50 source or ai
src = clip['source']
audio = clip['source']
# select ai if we have one
if 'ai' in clip:
if clip["use_ai"]:
src = random_choice(seq, list(clip['ai'].values()), False)
print('%07.3f-%07.3f %07.3f %s (%s)' % (
length-clip['duration'],
length,
clip['duration'],
os.path.basename(clip['source']),
src.split('/')[-2]
))
scene['front']['V2'].append({
'duration': clip['duration'],
'src': src,
"filter": {
}
})
volume_front = '-2.5'
volume_rear = '-8.5'
if clip.get('volume') is not None:
volume_front = '%0.2f' % (float(volume_front) + clip['volume'])
volume_rear = '%0.2f' % (float(volume_rear) + clip['volume'])
'''
'dynamic_loudness': [
["target_loudness", "-35"],
["min_gain", "-15"],
["max_gin", "15"],
],
'''
audio_filter = {
'mono': [
["channels", "2"],
],
'loudness': [
["program", "-17"],
["results", clip["loudnorm"]],
],
'volume': volume_front,
'fadein': '00:00:00.125'
}
scene['audio-front']['A2'].append({
'duration': clip['duration'],
'src': audio,
'filter': audio_filter.copy()
})
'''
audio_filter['volume'] = volume_rear
scene['audio-rear']['A2'].append({
'duration': clip['duration'],
'src': audio,
'filter': audio_filter.copy()
})
'''
used.append(clip)
if not clips and target - length > 0:
print("not enough clips, need to reset")
used_ids = {c['id'] for c in used}
clips = [c for c in all_clips if c != clip and c['id'] not in used_ids]
if not clips:
print("not enough clips, also consider used")
clips = [c for c in all_clips if c != clip]
if not clips:
print("not enough clips, also consider last clip")
clips = all_clips.copy()
scene_duration = int(get_scene_duration(scene) * fps)
voice_overs = []
sub_offset = 0
vo_min = 0
subs = []
print("--")
print("Voice Over:")
if voice_over:
vo_keys = list(sorted(voice_over))
if chance(seq, 0.5):
vo_key = vo_keys[random_int(seq, len(vo_keys))]
voice_overs.append(voice_over[vo_key])
elif len(vo_keys) >= 2:
vo1 = vo_keys.pop(random_int(seq, len(vo_keys)))
vo2 = vo_keys.pop(random_int(seq, len(vo_keys)))
voice_overs.append(voice_over[vo1])
if voice_over[vo1]["duration"] + voice_over[vo2]["duration"] < target:
print("adding second vo")
voice_overs.append(voice_over[vo2])
print("vo:", [x['src'] for x in voice_overs], list(sorted(voice_over)))
vo_min = sum([vo['duration'] for vo in voice_overs])
sub_offset = 0
if vo_min > target:
target = vo_min
elif vo_min < target:
offset = format_duration((target - vo_min) / 2, fps)
while int(sub_offset * fps) < scene_duration:
if sub_offset:
gap = (5 * fps + random_int(seq, 10 * fps)) / fps
else:
gap = (2 * fps + random_int(seq, 5 * fps)) / fps
if int((sub_offset + gap)* fps) > scene_duration:
gap = format_duration((scene_duration - int(sub_offset * fps)) / fps, fps)
scene['audio-center']['A1'].append({
'blank': True,
'duration': offset
'duration': gap
})
scene['audio-rear']['A1'].append({
'blank': True,
'duration': offset
'duration': gap
})
vo_min += offset
sub_offset = offset
subs = []
for vo in voice_overs:
print('%07.3f-%07.3f %07.3f' % (sub_offset, sub_offset+gap, gap), 'silence')
sub_offset += gap
vo_key = random_choice(seq, vo_keys, pop=True)
variant = random_int(seq, len(voice_over[vo_key]))
vo = voice_over[vo_key][variant]
while int((vo['duration'] + sub_offset) * fps) > scene_duration:
if not vo_keys:
vo = None
break
vo_key = random_choice(seq, vo_keys, pop=True)
variant = random_int(seq, len(voice_over[vo_key]))
vo = voice_over[vo_key][variant]
if vo is None:
break
print('%07.3f-%07.3f %07.3f' % (sub_offset, sub_offset+vo["duration"], vo["duration"]), vo["src"].split('/')[-1])
voice_overs.append(vo)
voc = vo.copy()
a, b = '-11', '-3'
if 'Whispered' in voc['src']:
@ -122,118 +286,20 @@ def compose(clips, target=150, base=1024, voice_over=None, options=None):
if subs:
scene["subtitles"] = subs
selected_clips_length = 0
selected_clips = []
non_ai_clips = []
for clip in clips:
if 'ai' in clip:
selected_clips.append(clip)
selected_clips_length += clip['duration']
else:
non_ai_clips.append(clip)
while selected_clips_length < target and non_ai_clips:
clip = random_choice(seq, non_ai_clips, pop=True)
selected_clips.append(clip)
selected_clips_length += clip['duration']
clips = selected_clips
clip = None
while target - length > 0 and clips:
'''
if clip:
if chance(seq, 0.5):
next_seqid = clip['seqid'] + 1
clip = get_clip_by_seqid(clips, next_seqid)
else:
clip = None
'''
clip = None
if not clip:
# FIXME: while not all clips have AI versions make sure we have one 50% of the time
clip = random_choice(seq, clips, True)
if not clips:
print("not enough clips, need to reset")
clips = [c for c in all_clips if c != clip and c not in used]
if not clips:
print("not enough clips, also consider used")
clips = [c for c in all_clips if c != clip]
if not clips:
print("not enough clips, also consider last clip")
clips = all_clips.copy()
if length + clip['duration'] > target and length >= vo_min:
break
length += int(clip['duration'] * fps) / fps
# 50/50 source or ai
src = clip['source']
audio = clip['source']
# select ai if we have one
if 'ai' in clip:
if True or chance(seq, 0.5):
src = random_choice(seq, list(clip['ai'].values()), False)
print('%07.3f %07.3f' % (length, clip['duration']), src.split('/')[-2], os.path.basename(clip['source']))
scene['front']['V2'].append({
'duration': clip['duration'],
'src': src,
"filter": {
}
})
volume_front = '-2.5'
volume_rear = '-8.5'
if clip.get('volume') is not None:
volume_front = '%0.2f' % (float(volume_front) + clip['volume'])
volume_rear = '%0.2f' % (float(volume_rear) + clip['volume'])
audio_filter = {
'mono': [
["channels", "2"],
],
'dynamic_loudness': [
["target_loudness", "-35"],
["min_gain", "-15"],
["max_gin", "15"],
],
'volume': volume_front,
'fadein': '00:00:00.125'
}
scene['audio-front']['A2'].append({
'duration': clip['duration'],
'src': audio,
'filter': audio_filter.copy()
})
'''
audio_filter['volume'] = volume_rear
scene['audio-rear']['A2'].append({
'duration': clip['duration'],
'src': audio,
'filter': audio_filter.copy()
})
'''
used.append(clip)
print("scene duration %0.3f (target: %0.3f, vo_min: %0.3f)" % (length, target, vo_min))
scene_duration = int(get_scene_duration(scene) * fps)
sub_offset = int(sub_offset * fps)
if sub_offset < scene_duration:
delta = format_duration((scene_duration - sub_offset) / fps, fps)
print(">> add %0.3f of silence.. %0.3f (scene_duration)" % (delta, scene_duration / fps))
gap = format_duration((scene_duration - sub_offset) / fps, fps)
print('%07.3f-%07.3f %07.3f' % (sub_offset, sub_offset+gap, gap), 'silence')
scene['audio-center']['A1'].append({
'blank': True,
'duration': delta
'duration': gap
})
scene['audio-rear']['A1'].append({
'blank': True,
'duration': delta
'duration': gap
})
elif sub_offset > scene_duration:
delta = format_duration((scene_duration - sub_offset) / fps, fps)
scene['audio-center']['A1'][-1]["duration"] += delta
scene['audio-rear']['A1'][-1]["duration"] += delta
print("WTF, needed to cut %s new duration: %s" % (delta, scene['audio-center']['A1'][-1]["duration"]))
print(scene['audio-center']['A1'][-1])
sub_offset += gap
print("scene duration %0.3f (target: %0.3f)" % (length, target))
return scene, used
def write_subtitles(data, folder, options):
@ -312,7 +378,9 @@ def get_fragments(clips, voice_over, prefix):
for l in itemlist.models.List.objects.filter(status='featured').order_by('name'):
if l.name.split(' ')[0].isdigit():
fragment_id = l.name.split(' ')[0]
fragment = {
'id': fragment_id,
'name': l.name,
'tags': [],
'anti-tags': [],
@ -344,7 +412,7 @@ def get_fragments(clips, voice_over, prefix):
print("FIXME", i)
continue
type_ = i.data['type'][0].lower()
target = os.path.join(prefix, type_, i.data['title'] + ext)
target = os.path.join(prefix, 'video', type_, i.data['title'] + ext)
sources.append(target)
fragment['clips'] = []
for clip in clips:
@ -353,7 +421,7 @@ def get_fragments(clips, voice_over, prefix):
source = clip['source']
if source in sources:
fragment['clips'].append(clip)
fragment["voice_over"] = voice_over.get(str(fragment["id"]), {})
fragment["voice_over"] = voice_over.get(fragment_id, {})
fragments.append(fragment)
fragments.sort(key=lambda f: ox.sort_string(f['name']))
return fragments
@ -386,21 +454,23 @@ def render_all(options):
for fragment in fragments:
fragment_base += 1
fragment_id = int(fragment['name'].split(' ')[0])
if options["chapter"] and int(options["chapter"]) != fragment_id:
if options["fragment"] and int(options["fragment"]) != fragment_id:
continue
name = fragment['name'].replace(' ', '_')
if fragment_id < 10:
name = '0' + name
if not fragment['clips']:
print("skipping empty fragment", name)
continue
fragment_prefix = os.path.join(base_prefix, name)
os.makedirs(fragment_prefix, exist_ok=True)
fragment_clips = fragment['clips']
unused_fragment_clips = [c for c in fragment_clips if c not in clips_used]
used_ids = {c['id'] for c in clips_used}
unused_fragment_clips = [c for c in fragment_clips if c['id'] not in clips_used]
print('fragment clips', len(fragment_clips), 'unused', len(unused_fragment_clips))
print('--')
print('Video:')
scene, used = compose(
unused_fragment_clips,
fragment=fragment,
target=target,
base=fragment_base,
voice_over=fragment['voice_over'],
@ -414,7 +484,7 @@ def render_all(options):
src = src[0]['src']
stats[src.split('/')[-2]] += 1
else:
print("!! fixme, chapter without VO")
print("!! fixme, fragment without VO")
position += scene_duration
target_position += fragment_target
@ -833,13 +903,14 @@ def generate_clips(options):
source = e.files.filter(selected=True)[0].data.path
ext = os.path.splitext(source)[1]
type_ = e.data['type'][0].lower()
target = os.path.join(prefix, type_, i.data['title'] + ext)
target = os.path.join(prefix, 'video', type_, i.data['title'] + ext)
os.makedirs(os.path.dirname(target), exist_ok=True)
if os.path.islink(target):
os.unlink(target)
os.symlink(source, target)
if type_ == "source":
source_target = target
clip['loudnorm'] = get_loudnorm(e.files.filter(selected=True)[0])
if type_.startswith('ai:'):
if 'ai' not in clip:
clip['ai'] = {}
@ -888,17 +959,20 @@ def generate_clips(options):
print("using", len(clips), "clips")
voice_over = defaultdict(dict)
voice_over = {}
for vo in item.models.Item.objects.filter(
data__type__icontains="voice over",
):
title = vo.get('title')
fragment_id = int(title.split('_')[2].replace('gen', ''))
parts = title.split('-')
fragment = '%02d' % int(parts[0].replace('ch', ''))
type = parts[1]
variant = '-'.join(parts[2:4])
source = vo.files.filter(selected=True)[0]
#batch = vo.get('batch')[0].replace('Text-', '')
batch = title.split('_')[3]
src = source.data.path
target = os.path.join(prefix, 'voice_over', batch, '%s.wav' % fragment_id)
ext = src.split('.')[-1]
target = os.path.join(prefix, 'voice_over', fragment, '%s-%s.%s' % (type, variant, ext))
os.makedirs(os.path.dirname(target), exist_ok=True)
if os.path.islink(target):
os.unlink(target)
@ -909,10 +983,15 @@ def generate_clips(options):
).exclude(value="").order_by("start"):
sdata = get_srt(sub, 0, lang, tlang)
subs.append(sdata)
voice_over[fragment_id][batch] = {
if fragment not in voice_over:
voice_over[fragment] = {}
if type not in voice_over[fragment]:
voice_over[fragment][type] = []
voice_over[fragment][type].append({
"variant": variant,
"src": target,
"duration": format_duration(source.duration, 24),
"subs": subs
}
})
with open(os.path.join(prefix, 'voice_over.json'), 'w') as fd:
json.dump(voice_over, fd, indent=2, ensure_ascii=False)