Compare commits

...

3 commits

Author SHA1 Message Date
j
b2552d6059 make sure all tracks are exactly the same length 2024-12-04 09:16:24 +00:00
j
95a41fc2e2 single video render 2024-12-03 20:12:15 +00:00
j
2a2516bff9 pad audio tracks to scene duration 2024-12-03 19:35:37 +00:00
4 changed files with 215 additions and 21 deletions

View file

@ -23,6 +23,9 @@ def resolve_roman(s):
return s.replace(extra, new) return s.replace(extra, new)
return s return s
def format_duration(duration, fps):
return float('%0.5f' % (round(duration * fps) / fps))
class Command(BaseCommand): class Command(BaseCommand):
help = 'generate symlinks to clips and clips.json' help = 'generate symlinks to clips and clips.json'
@ -68,6 +71,10 @@ class Command(BaseCommand):
if not clip["duration"]: if not clip["duration"]:
print('!!', durations, clip) print('!!', durations, clip)
continue continue
cd = format_duration(clip["duration"], 24)
#if cd != clip["duration"]:
# print(clip["duration"], '->', cd, durations, clip)
clip["duration"] = cd
clip['tags'] = i.data.get('tags', []) clip['tags'] = i.data.get('tags', [])
clip['editingtags'] = i.data.get('editingtags', []) clip['editingtags'] = i.data.get('editingtags', [])
name = os.path.basename(clip['original']) name = os.path.basename(clip['original'])
@ -117,7 +124,7 @@ class Command(BaseCommand):
subs.append(sdata) subs.append(sdata)
voice_over[fragment_id][batch] = { voice_over[fragment_id][batch] = {
"src": target, "src": target,
"duration": source.duration, "duration": format_duration(source.duration, 24),
"subs": subs "subs": subs
} }
with open(os.path.join(prefix, 'voice_over.json'), 'w') as fd: with open(os.path.join(prefix, 'voice_over.json'), 'w') as fd:

View file

@ -16,6 +16,9 @@ class Command(BaseCommand):
parser.add_argument('--duration', action='store', dest='duration', default="3600", help='target duration of all fragments in seconds') parser.add_argument('--duration', action='store', dest='duration', default="3600", help='target duration of all fragments in seconds')
parser.add_argument('--offset', action='store', dest='offset', default="1024", help='inital offset in pi') parser.add_argument('--offset', action='store', dest='offset', default="1024", help='inital offset in pi')
parser.add_argument('--no-video', action='store_true', dest='no_video', default=False, help='don\'t render video') parser.add_argument('--no-video', action='store_true', dest='no_video', default=False, help='don\'t render video')
parser.add_argument('--single-file', action='store_true', dest='single_file', default=False, help='render to single video')
parser.add_argument('--keep-audio', action='store_true', dest='keep_audio', default=False, help='keep independent audio tracks')
parser.add_argument('--debug', action='store_true', dest='debug', default=False, help='output more info')
def handle(self, **options): def handle(self, **options):
render_all(options) render_all(options)

212
render.py
View file

@ -11,8 +11,10 @@ import time
from pathlib import Path from pathlib import Path
import ox import ox
import lxml.etree
from .pi import random from .pi import random
from .render_kdenlive import KDEnliveProject, _CACHE from .render_kdenlive import KDEnliveProject, _CACHE, melt_xml, get_melt
def random_int(seq, length): def random_int(seq, length):
@ -64,8 +66,11 @@ def write_if_new(path, data, mode=''):
with open(path, write_mode) as fd: with open(path, write_mode) as fd:
fd.write(data) fd.write(data)
def format_duration(duration, fps):
return float('%0.5f' % (round(duration * fps) / fps))
def compose(clips, target=150, base=1024, voice_over=None): def compose(clips, target=150, base=1024, voice_over=None):
fps = 24
length = 0 length = 0
scene = { scene = {
'front': { 'front': {
@ -100,6 +105,7 @@ def compose(clips, target=150, base=1024, voice_over=None):
used = [] used = []
voice_overs = [] voice_overs = []
sub_offset = 0
if voice_over: if voice_over:
vo_keys = list(sorted(voice_over)) vo_keys = list(sorted(voice_over))
if chance(seq, 0.5): if chance(seq, 0.5):
@ -118,7 +124,7 @@ def compose(clips, target=150, base=1024, voice_over=None):
if vo_min > target: if vo_min > target:
target = vo_min target = vo_min
elif vo_min < target: elif vo_min < target:
offset = (target - vo_min) / 2 offset = format_duration((target - vo_min) / 2, fps)
scene['audio-center']['A1'].append({ scene['audio-center']['A1'].append({
'blank': True, 'blank': True,
'duration': offset 'duration': offset
@ -186,7 +192,7 @@ def compose(clips, target=150, base=1024, voice_over=None):
if length + clip['duration'] > target and length >= vo_min: if length + clip['duration'] > target and length >= vo_min:
break break
print('%06.3f %06.3f' % (length, clip['duration']), os.path.basename(clip['original'])) print('%06.3f %06.3f' % (length, clip['duration']), os.path.basename(clip['original']))
length += clip['duration'] length += int(clip['duration'] * fps) / fps
if "foreground" not in clip and "animation" in clip: if "foreground" not in clip and "animation" in clip:
fg = clip['animation'] fg = clip['animation']
@ -298,30 +304,61 @@ def compose(clips, target=150, base=1024, voice_over=None):
}) })
used.append(clip) used.append(clip)
print("scene duration %0.3f (target: %0.3f, vo_min: %0.3f)" % (length, target, vo_min)) print("scene duration %0.3f (target: %0.3f, vo_min: %0.3f)" % (length, target, vo_min))
scene_duration = int(get_scene_duration(scene) * fps)
sub_offset = int(sub_offset * fps)
if sub_offset < scene_duration:
delta = format_duration((scene_duration - sub_offset) / fps, fps)
print(">> add %0.3f of silence.. %0.3f (scene_duration)" % (delta, scene_duration / fps))
scene['audio-center']['A1'].append({
'blank': True,
'duration': delta
})
scene['audio-rear']['A1'].append({
'blank': True,
'duration': delta
})
elif sub_offset > scene_duration:
delta = format_duration((scene_duration - sub_offset) / fps, fps)
scene['audio-center']['A1'][-1]["duration"] += delta
scene['audio-rear']['A1'][-1]["duration"] += delta
print("WTF, needed to cut %s new duration: %s" % (delta, scene['audio-center']['A1'][-1]["duration"]))
print(scene['audio-center']['A1'][-1])
return scene, used return scene, used
def get_track_duration(scene, k, n):
duration = 0
for key, value in scene.items():
if key == k:
for name, clips in value.items():
if name == n:
for clip in clips:
duration += int(clip['duration'] * 24)
return duration / 24
def get_scene_duration(scene): def get_scene_duration(scene):
if isinstance(scene, str):
with open(scene) as fd:
scene = json.load(fd)
duration = 0 duration = 0
for key, value in scene.items(): for key, value in scene.items():
for name, clips in value.items(): for name, clips in value.items():
for clip in clips: for clip in clips:
duration += clip['duration'] duration += int(clip['duration'] * 24)
return duration return duration / 24
def get_offset_duration(prefix): def get_offset_duration(prefix):
duration = 0 duration = 0
for root, folders, files in os.walk(prefix): for root, folders, files in os.walk(prefix):
for f in files: for f in files:
if f == 'scene.json': if f == 'scene.json':
path = os.path.join(root, f)
scene = json.load(open(path))
duration += get_scene_duration(scene) duration += get_scene_duration(scene)
return duration return duration
def render(root, scene, prefix=''): def render(root, scene, prefix='', options=None):
if options is None: options = {}
fps = 24 fps = 24
files = [] files = []
scene_duration = int(get_scene_duration(scene) * 24) scene_duration = int(get_scene_duration(scene) * fps)
for timeline, data in scene.items(): for timeline, data in scene.items():
if timeline == "subtitles": if timeline == "subtitles":
path = os.path.join(root, prefix + "front.srt") path = os.path.join(root, prefix + "front.srt")
@ -338,21 +375,42 @@ def render(root, scene, prefix=''):
#print(track) #print(track)
for clip in clips: for clip in clips:
project.append_clip(track, clip) project.append_clip(track, clip)
track_durations[track] = int(sum([c['duration'] for c in clips]) * 24) track_durations[track] = sum([int(c['duration'] * fps) for c in clips])
if timeline.startswith('audio-'): if timeline.startswith('audio-'):
track_duration = project.get_duration() track_duration = project.get_duration()
delta = scene_duration - track_duration delta = scene_duration - track_duration
if delta > 0: if delta > 0:
for track in track_durations: for track in track_durations:
if track_durations[track] == track_duration: if track_durations[track] == track_duration:
project.append_clip(track, {'blank': True, "duration": delta/24}) project.append_clip(track, {'blank': True, "duration": delta/fps})
break
path = os.path.join(root, prefix + "%s.kdenlive" % timeline) path = os.path.join(root, prefix + "%s.kdenlive" % timeline)
project_xml = project.to_xml() project_xml = project.to_xml()
write_if_new(path, project_xml) write_if_new(path, project_xml)
if options["debug"]:
# check duration
out_duration = get_project_duration(path)
p_duration = project.get_duration()
print(path, 'out: %s, project: %s, scene: %s' %(out_duration, p_duration, scene_duration))
if p_duration != scene_duration:
print(path, 'FAIL project: %s, scene: %s' %(p_duration, scene_duration))
_cache = os.path.join(root, "cache.json")
with open(_cache, "w") as fd:
json.dump(_CACHE, fd)
sys.exit(1)
if out_duration != p_duration:
print(path, 'fail got: %s expected: %s' %(out_duration, p_duration))
sys.exit(1)
files.append(path) files.append(path)
return files return files
def get_project_duration(file):
out = melt_xml(file)
chain = lxml.etree.fromstring(out).xpath('producer')[0]
duration = int(chain.attrib['out']) + 1
return duration
def get_fragments(clips, voice_over, prefix): def get_fragments(clips, voice_over, prefix):
import itemlist.models import itemlist.models
@ -455,20 +513,19 @@ def render_all(options):
elif position < target_position: elif position < target_position:
target = target + 0.1 * fragment_target target = target + 0.1 * fragment_target
timelines = render(prefix, scene, fragment_prefix[len(prefix) + 1:] + '/') timelines = render(prefix, scene, fragment_prefix[len(prefix) + 1:] + '/', options)
scene_json = json.dumps(scene, indent=2, ensure_ascii=False) scene_json = json.dumps(scene, indent=2, ensure_ascii=False)
write_if_new(os.path.join(fragment_prefix, 'scene.json'), scene_json) write_if_new(os.path.join(fragment_prefix, 'scene.json'), scene_json)
if not options['no_video']: if not options['no_video'] and not options["single_file"]:
for timeline in timelines: for timeline in timelines:
print(timeline) print(timeline)
ext = '.mp4' ext = '.mp4'
if '/audio' in timeline: if '/audio' in timeline:
ext = '.wav' ext = '.wav'
cmd = [ cmd = get_melt() + [
'xvfb-run', '-a', timeline,
'melt', timeline,
'-quiet', '-quiet',
'-consumer', 'avformat:%s' % timeline.replace('.kdenlive', ext), '-consumer', 'avformat:%s' % timeline.replace('.kdenlive', ext),
] ]
@ -490,8 +547,8 @@ def render_all(options):
subprocess.call(cmd) subprocess.call(cmd)
os.unlink(timeline.replace('.kdenlive', ext)) os.unlink(timeline.replace('.kdenlive', ext))
fragment_prefix = Path(fragment_prefix)
cmds = [] cmds = []
fragment_prefix = Path(fragment_prefix)
for src, out1, out2 in ( for src, out1, out2 in (
("audio-front.wav", "fl.wav", "fr.wav"), ("audio-front.wav", "fl.wav", "fr.wav"),
("audio-center.wav", "fc.wav", "lfe.wav"), ("audio-center.wav", "fc.wav", "lfe.wav"),
@ -535,7 +592,8 @@ def render_all(options):
fragment_prefix / "back-audio.mp4", fragment_prefix / "back-audio.mp4",
]) ])
for cmd in cmds: for cmd in cmds:
#print(" ".join([str(x) for x in cmd])) if options["debug"]:
print(" ".join([str(x) for x in cmd]))
subprocess.call(cmd) subprocess.call(cmd)
for a, b in ( for a, b in (
@ -550,6 +608,10 @@ def render_all(options):
sys.exit(-1) sys.exit(-1)
shutil.move(fragment_prefix / "back-audio.mp4", fragment_prefix / "back.mp4") shutil.move(fragment_prefix / "back-audio.mp4", fragment_prefix / "back.mp4")
shutil.move(fragment_prefix / "front-5.1.mp4", fragment_prefix / "front.mp4") shutil.move(fragment_prefix / "front-5.1.mp4", fragment_prefix / "front.mp4")
if options["keep_audio"]:
shutil.move(fragment_prefix / "audio-center.wav", fragment_prefix / "vocals.wav")
shutil.move(fragment_prefix / "audio-front.wav", fragment_prefix / "foley.wav")
shutil.move(fragment_prefix / "audio-back.wav", fragment_prefix / "original.wav")
for fn in ( for fn in (
"audio-5.1.mp4", "audio-5.1.mp4",
"audio-center.wav", "audio-rear.wav", "audio-center.wav", "audio-rear.wav",
@ -560,6 +622,106 @@ def render_all(options):
if os.path.exists(fn): if os.path.exists(fn):
os.unlink(fn) os.unlink(fn)
if options["single_file"]:
cmds = []
base_prefix = Path(base_prefix)
for timeline in (
"front",
"back",
"audio-back",
"audio-center",
"audio-front",
"audio-rear",
):
timelines = list(sorted(glob('%s/*/%s.kdenlive' % (base_prefix, timeline))))
ext = '.mp4'
if '/audio' in timelines[0]:
ext = '.wav'
out = base_prefix / (timeline + ext)
cmd = get_melt() + timelines + [
'-quiet',
'-consumer', 'avformat:%s' % out,
]
if ext == '.wav':
cmd += ['vn=1']
else:
cmd += ['an=1']
cmd += ['vcodec=libx264', 'x264opts=keyint=1', 'crf=15']
cmds.append(cmd)
for src, out1, out2 in (
("audio-front.wav", "fl.wav", "fr.wav"),
("audio-center.wav", "fc.wav", "lfe.wav"),
("audio-rear.wav", "bl.wav", "br.wav"),
):
cmds.append([
"ffmpeg", "-y",
"-nostats", "-loglevel", "error",
"-i", base_prefix / src,
"-filter_complex",
"[0:0]pan=1|c0=c0[left]; [0:0]pan=1|c0=c1[right]",
"-map", "[left]", base_prefix / out1,
"-map", "[right]", base_prefix / out2,
])
cmds.append([
"ffmpeg", "-y",
"-nostats", "-loglevel", "error",
"-i", base_prefix / "fl.wav",
"-i", base_prefix / "fr.wav",
"-i", base_prefix / "fc.wav",
"-i", base_prefix / "lfe.wav",
"-i", base_prefix / "bl.wav",
"-i", base_prefix / "br.wav",
"-filter_complex", "[0:a][1:a][2:a][3:a][4:a][5:a]amerge=inputs=6[a]",
"-map", "[a]", "-c:a", "aac", base_prefix / "audio-5.1.mp4"
])
cmds.append([
"ffmpeg", "-y",
"-nostats", "-loglevel", "error",
"-i", base_prefix / "front.mp4",
"-i", base_prefix / "audio-5.1.mp4",
"-c", "copy",
base_prefix / "front-5.1.mp4",
])
cmds.append([
"ffmpeg", "-y",
"-nostats", "-loglevel", "error",
"-i", base_prefix / "back.mp4",
"-i", base_prefix / "audio-back.wav",
"-c:v", "copy",
base_prefix / "back-audio.mp4",
])
for cmd in cmds:
if options["debug"]:
print(" ".join([str(x) for x in cmd]))
subprocess.call(cmd)
for a, b in (
("back-audio.mp4", "back.mp4"),
("front-5.1.mp4", "back.mp4"),
):
duration_a = ox.avinfo(str(base_prefix / a))['duration']
duration_b = ox.avinfo(str(base_prefix / b))['duration']
if duration_a != duration_b:
print('!!', duration_a, base_prefix / a)
print('!!', duration_b, base_prefix / b)
sys.exit(-1)
shutil.move(base_prefix / "back-audio.mp4", base_prefix / "back.mp4")
shutil.move(base_prefix / "front-5.1.mp4", base_prefix / "front.mp4")
if options["keep_audio"]:
shutil.move(base_prefix / "audio-center.wav", base_prefix / "vocals.wav")
shutil.move(base_prefix / "audio-front.wav", base_prefix / "foley.wav")
shutil.move(base_prefix / "audio-back.wav", base_prefix / "original.wav")
for fn in (
"audio-5.1.mp4",
"audio-center.wav", "audio-rear.wav",
"audio-front.wav", "audio-back.wav", "back-audio.mp4",
"fl.wav", "fr.wav", "fc.wav", "lfe.wav", "bl.wav", "br.wav",
):
fn = base_prefix / fn
if os.path.exists(fn):
os.unlink(fn)
join_subtitles(base_prefix)
print("Duration - Target: %s Actual: %s" % (target_position, position)) print("Duration - Target: %s Actual: %s" % (target_position, position))
print(json.dumps(dict(stats), sort_keys=True, indent=2)) print(json.dumps(dict(stats), sort_keys=True, indent=2))
with open(_cache, "w") as fd: with open(_cache, "w") as fd:
@ -715,3 +877,15 @@ def render_infinity(options):
with open(state_f + "~", "w") as fd: with open(state_f + "~", "w") as fd:
json.dump(state, fd, indent=2) json.dump(state, fd, indent=2)
shutil.move(state_f + "~", state_f) shutil.move(state_f + "~", state_f)
def join_subtitles(base_prefix):
subtitles = list(sorted(glob('%s/*/front.srt' % base_prefix)))
data = []
position = 0
for srt in subtitles:
scene = srt.replace('front.srt', 'scene.json')
data += ox.srt.load(srt, offset=position)
position += get_scene_duration(scene)
with open(base_prefix / 'front.srt', 'wb') as fd:
fd.write(ox.srt.encode(data))

View file

@ -4,6 +4,7 @@ import subprocess
import lxml.etree import lxml.etree
import uuid import uuid
import os import os
import sys
_CACHE = {} _CACHE = {}
_IDS = defaultdict(int) _IDS = defaultdict(int)
@ -12,6 +13,14 @@ def get_propery(element, name):
return element.xpath('property[@name="%s"]' % name)[0].text return element.xpath('property[@name="%s"]' % name)[0].text
def get_melt():
cmd = ['melt']
if 'XDG_RUNTIME_DIR' not in os.environ:
os.environ['XDG_RUNTIME_DIR'] = '/tmp/runtime-pandora'
if 'DISPLAY' not in os.environ:
cmd = ['xvfb-run', '-a'] + cmd
return cmd
def melt_xml(file): def melt_xml(file):
out = None out = None
real_path = os.path.realpath(file) real_path = os.path.realpath(file)
@ -20,7 +29,8 @@ def melt_xml(file):
if os.stat(real_path).st_mtime != ts: if os.stat(real_path).st_mtime != ts:
out = None out = None
if not out: if not out:
out = subprocess.check_output(['melt', file, '-consumer', 'xml']).decode() cmd = get_melt() + [file, '-consumer', 'xml']
out = subprocess.check_output(cmd).decode()
_CACHE[file] = [os.stat(real_path).st_mtime, out] _CACHE[file] = [os.stat(real_path).st_mtime, out]
return out return out