Compare commits

...

3 commits

4 changed files with 108 additions and 35 deletions

113
render.py
View file

@ -61,6 +61,7 @@ def compose(clips, fragment, target=150, base=1024, voice_over=None, options=Non
}, },
'audio-center': { 'audio-center': {
'A1': [], 'A1': [],
'A2': [],
}, },
'audio-front': { 'audio-front': {
'A1': [], 'A1': [],
@ -132,7 +133,9 @@ def compose(clips, fragment, target=150, base=1024, voice_over=None, options=Non
next_length = length + clip['duration'] next_length = length + clip['duration']
if target - next_length < -target*0.1: if target - next_length < -target*0.1:
break break
clip_duration = int(clip['duration'] * fps) / fps clip_duration = format_duration(clip['duration'], fps)
if clip['duration'] != clip_duration:
print("WTF", clip, clip['duration'], clip_duration)
length += clip_duration length += clip_duration
# 50/50 source or ai # 50/50 source or ai
@ -145,13 +148,13 @@ def compose(clips, fragment, target=150, base=1024, voice_over=None, options=Non
print('%07.3f-%07.3f %07.3f %s (%s)' % ( print('%07.3f-%07.3f %07.3f %s (%s)' % (
length-clip_duration, length-clip_duration,
length, length,
clip['duration'], clip_duration,
os.path.basename(clip['source']), os.path.basename(clip['source']),
src.split('/')[-2] src.split('/')[-2]
)) ))
scene['front']['V2'].append({ scene['front']['V2'].append({
'duration': clip['duration'], 'duration': clip_duration,
'src': src, 'src': src,
"filter": { "filter": {
} }
@ -181,10 +184,21 @@ def compose(clips, fragment, target=150, base=1024, voice_over=None, options=Non
'fadein': '00:00:00.125' 'fadein': '00:00:00.125'
} }
scene['audio-front']['A2'].append({ scene['audio-front']['A2'].append({
'duration': clip['duration'], 'duration': clip_duration,
'src': audio, 'src': audio,
'filter': audio_filter.copy() 'filter': audio_filter.copy()
}) })
length = format_duration(length, fps)
ad = get_scene_duration(scene, track='audio-front:A2')
vd = get_scene_duration(scene, track='front:V2')
if ad == vd and abs(ad-length) > 1/48:
print('v: ', vd, 'ad', ad, 'length:', length, 'fixup')
length = ad
if abs(length -vd) > 1/48 or abs(length - ad) > 1/48 or ad != vd:
print('vd: ', vd, 'ad', ad, 'length:', length)
print(clip)
sys.exit(-1)
used.append(clip) used.append(clip)
if not clips and target - length > 0: if not clips and target - length > 0:
print("not enough clips, need to reset") print("not enough clips, need to reset")
@ -200,7 +214,7 @@ def compose(clips, fragment, target=150, base=1024, voice_over=None, options=Non
if "ai" in clip: if "ai" in clip:
clip["use_ai"] = True clip["use_ai"] = True
scene_duration = int(get_scene_duration(scene) * fps) scene_duration = int(round(get_scene_duration(scene) * fps))
voice_overs = [] voice_overs = []
sub_offset = 0 sub_offset = 0
subs = [] subs = []
@ -214,22 +228,28 @@ def compose(clips, fragment, target=150, base=1024, voice_over=None, options=Non
else: else:
gap = (2 * fps + random_int(seq, 5 * fps)) / fps gap = (2 * fps + random_int(seq, 5 * fps)) / fps
gap = format_duration(gap, fps) gap = format_duration(gap, fps)
if int((sub_offset + gap)* fps) > scene_duration: if int((sub_offset + gap) * fps) > scene_duration:
gap = format_duration((scene_duration - int(sub_offset * fps)) / fps, fps) gap = format_duration((scene_duration - int(sub_offset * fps)) / fps, fps)
scene['audio-center']['A1'].append({ for tl, track in (
'blank': True, ('audio-center', 'A1'),
'duration': gap ('audio-center', 'A2'),
}) ('audio-rear', 'A1'),
scene['audio-rear']['A1'].append({ ('audio-rear', 'A2'),
'blank': True, ):
'duration': gap scene[tl][track].append({
}) 'blank': True,
'duration': gap
})
print('%07.3f-%07.3f %07.3f' % (sub_offset, sub_offset+gap, gap), 'silence') print('%07.3f-%07.3f %07.3f' % (sub_offset, sub_offset+gap, gap), 'silence')
sub_offset += gap sub_offset += gap
vo_key = random_choice(seq, vo_keys, pop=True) vo_key = random_choice(seq, vo_keys, pop=True)
variant = random_int(seq, len(voice_over[vo_key])) variant = random_int(seq, len(voice_over[vo_key]))
vo = voice_over[vo_key][variant] vo = voice_over[vo_key][variant]
if isinstance(vo, list):
vo, vo_b = vo
else:
vo_b = None
while int((vo['duration'] + sub_offset) * fps) > scene_duration: while int((vo['duration'] + sub_offset) * fps) > scene_duration:
if not vo_keys: if not vo_keys:
vo = None vo = None
@ -237,6 +257,10 @@ def compose(clips, fragment, target=150, base=1024, voice_over=None, options=Non
vo_key = random_choice(seq, vo_keys, pop=True) vo_key = random_choice(seq, vo_keys, pop=True)
variant = random_int(seq, len(voice_over[vo_key])) variant = random_int(seq, len(voice_over[vo_key]))
vo = voice_over[vo_key][variant] vo = voice_over[vo_key][variant]
if isinstance(vo, list):
vo, vo_b = vo
else:
vo_b = None
if vo is None: if vo is None:
break break
print('%07.3f-%07.3f %07.3f' % (sub_offset, sub_offset+vo["duration"], vo["duration"]), vo["src"].split('/')[-1]) print('%07.3f-%07.3f %07.3f' % (sub_offset, sub_offset+vo["duration"], vo["duration"]), vo["src"].split('/')[-1])
@ -255,13 +279,29 @@ def compose(clips, fragment, target=150, base=1024, voice_over=None, options=Non
sub["in"] += sub_offset sub["in"] += sub_offset
sub["out"] += sub_offset sub["out"] += sub_offset
subs.append(sub) subs.append(sub)
if vo_b:
vo_b = vo_b.copy()
vo_b['filter'] = {'volume': a}
scene['audio-center']['A2'].append(vo_b)
vo_b = vo_b.copy()
vo_b['filter'] = {'volume': b}
scene['audio-rear']['A1'].append(vo_b)
else:
for tl, track in (
('audio-center', 'A2'),
('audio-rear', 'A2'),
):
scene[tl][track].append({
'blank': True,
'duration': voc["duration"]
})
sub_offset += voc["duration"] sub_offset += voc["duration"]
if subs: if subs:
scene["subtitles"] = subs scene["subtitles"] = subs
sub_offset = format_duration(sub_offset, fps)
sub_offset = int(sub_offset * fps) if sub_offset < scene_duration/fps:
if sub_offset < scene_duration: gap = scene_duration/fps - sub_offset
gap = format_duration((scene_duration - sub_offset) / fps, fps)
print('%07.3f-%07.3f %07.3f' % (sub_offset, sub_offset+gap, gap), 'silence') print('%07.3f-%07.3f %07.3f' % (sub_offset, sub_offset+gap, gap), 'silence')
scene['audio-center']['A1'].append({ scene['audio-center']['A1'].append({
'blank': True, 'blank': True,
@ -272,7 +312,18 @@ def compose(clips, fragment, target=150, base=1024, voice_over=None, options=Non
'duration': gap 'duration': gap
}) })
sub_offset += gap sub_offset += gap
print("scene duration %0.3f (target: %0.3f)" % (length, target)) '''
print("scene duration: %0.3f vo: %0.3f (length: %0.3f, target: %0.3f)" % (
get_scene_duration(scene),
sub_offset,
length,
target
))
'''
print("scene duration: %0.3f (target: %0.3f)" % (
get_scene_duration(scene),
target
))
return scene, used return scene, used
def write_subtitles(data, folder, options): def write_subtitles(data, folder, options):
@ -560,10 +611,14 @@ def render_all(options):
for a, b in ( for a, b in (
("front-mixed.mp4", "front.mp4"), ("front-mixed.mp4", "front.mp4"),
("audio-center.wav", "front.mp4"),
("audio-rear.wav", "front.mp4"),
("audio-front.wav", "front.mp4"),
("audio-5.1.mp4", "front.mp4"),
): ):
duration_a = ox.avinfo(str(fragment_prefix / a))['duration'] duration_a = ox.avinfo(str(fragment_prefix / a))['duration']
duration_b = ox.avinfo(str(fragment_prefix / b))['duration'] duration_b = ox.avinfo(str(fragment_prefix / b))['duration']
if duration_a != duration_b: if abs(duration_a - duration_b) > 1/48:
print('!!', duration_a, fragment_prefix / a) print('!!', duration_a, fragment_prefix / a)
print('!!', duration_b, fragment_prefix / b) print('!!', duration_b, fragment_prefix / b)
sys.exit(-1) sys.exit(-1)
@ -854,6 +909,7 @@ def generate_clips(options):
import item.models import item.models
import itemlist.models import itemlist.models
fps = 24
options = load_defaults(options) options = load_defaults(options)
prefix = options['prefix'] prefix = options['prefix']
lang, tlang = parse_lang(options["lang"]) lang, tlang = parse_lang(options["lang"])
@ -873,7 +929,8 @@ def generate_clips(options):
continue continue
if not e.files.filter(selected=True).exists(): if not e.files.filter(selected=True).exists():
continue continue
source = e.files.filter(selected=True)[0].data.path selected = e.files.filter(selected=True)[0]
source = selected.data.path
ext = os.path.splitext(source)[1] ext = os.path.splitext(source)[1]
type_ = e.data['type'][0].lower() type_ = e.data['type'][0].lower()
if type_.startswith('ai:'): if type_.startswith('ai:'):
@ -885,7 +942,7 @@ def generate_clips(options):
ai_type = '%s-%s' % (type_[3:], n) ai_type = '%s-%s' % (type_[3:], n)
n += 1 n += 1
clip['ai'][ai_type] = target clip['ai'][ai_type] = target
type_ = ai_type type_ = 'ai:' + ai_type
target = os.path.join(prefix, 'video', type_, i.data['title'] + ext) target = os.path.join(prefix, 'video', type_, i.data['title'] + ext)
if type_ == "source": if type_ == "source":
source_target = target source_target = target
@ -898,19 +955,22 @@ def generate_clips(options):
if os.path.islink(target): if os.path.islink(target):
os.unlink(target) os.unlink(target)
os.symlink(source, target) os.symlink(source, target)
durations.append(e.files.filter(selected=True)[0].duration) durations.append(selected.duration)
if not durations: if not durations:
print(i.public_id, 'no duration!', clip) print(i.public_id, 'no duration!', clip)
continue continue
clip["duration"] = min(durations) if len(set(durations)) > 1:
print(clip, durations)
clip["duration"] = min(durations) - 1/24
# trim to a multiple of the output fps # trim to a multiple of the output fps
d1 = int(clip["duration"] * 24) / 24 d1 = format_duration(clip["duration"], fps)
if d1 != clip["duration"]: if d1 != clip["duration"]:
clip["duration"] = d1 clip["duration"] = d1
if not clip["duration"]: if not clip["duration"]:
print('!!', durations, clip) print('!!', durations, clip)
continue continue
cd = format_duration(clip["duration"], 24) cd = format_duration(clip["duration"], fps)
clip["duration"] = cd clip["duration"] = cd
clip['tags'] = i.data.get('tags', []) clip['tags'] = i.data.get('tags', [])
adjust_volume = i.data.get('adjustvolume', '') adjust_volume = i.data.get('adjustvolume', '')
@ -971,7 +1031,8 @@ def generate_clips(options):
voice_over[fragment][type].append({ voice_over[fragment][type].append({
"variant": variant, "variant": variant,
"src": target, "src": target,
"duration": format_duration(source.duration, 24), #"duration": format_duration(source.duration, fps, True),
"duration": source.duration,
"subs": subs "subs": subs
}) })
with open(os.path.join(prefix, 'voice_over.json'), 'w') as fd: with open(os.path.join(prefix, 'voice_over.json'), 'w') as fd:

View file

@ -21,7 +21,7 @@ def get_melt():
cmd = ['xvfb-run', '-a'] + cmd cmd = ['xvfb-run', '-a'] + cmd
return cmd return cmd
def melt_xml(file): def melt_xml(file, profile='atsc_1080p_24'):
out = None out = None
real_path = os.path.realpath(file) real_path = os.path.realpath(file)
if file in _CACHE and isinstance(_CACHE[file], list): if file in _CACHE and isinstance(_CACHE[file], list):
@ -29,7 +29,7 @@ def melt_xml(file):
if os.stat(real_path).st_mtime != ts: if os.stat(real_path).st_mtime != ts:
out = None out = None
if not out: if not out:
cmd = get_melt() + [file, '-consumer', 'xml'] cmd = get_melt() + [file, '-profile', profile, '-consumer', 'xml']
out = subprocess.check_output(cmd).decode() out = subprocess.check_output(cmd).decode()
_CACHE[file] = [os.stat(real_path).st_mtime, out] _CACHE[file] = [os.stat(real_path).st_mtime, out]
return out return out
@ -73,6 +73,7 @@ class KDEnliveProject:
self._width = int(width) self._width = int(width)
self._height = int(height) self._height = int(height)
self._fps = int(frame_rate_num) / int(frame_rate_den) self._fps = int(frame_rate_num) / int(frame_rate_den)
self.profile = 'atsc_1080p_24'
self._tree = self.get_element("mlt", attrib={ self._tree = self.get_element("mlt", attrib={
"LC_NUMERIC": "C", "LC_NUMERIC": "C",
@ -444,7 +445,7 @@ class KDEnliveProject:
return prefix + self.get_counter(prefix) return prefix + self.get_counter(prefix)
def get_chain(self, file, kdenlive_id=None): def get_chain(self, file, kdenlive_id=None):
out = melt_xml(file) out = melt_xml(file, self.profile)
chain = lxml.etree.fromstring(out).xpath('producer')[0] chain = lxml.etree.fromstring(out).xpath('producer')[0]
chain.tag = 'chain' chain.tag = 'chain'
chain.attrib['id'] = self.get_id('chain') chain.attrib['id'] = self.get_id('chain')
@ -597,7 +598,11 @@ class KDEnliveProject:
else: else:
print('!!', track_id) print('!!', track_id)
frames = int(self._fps * clip['duration']) frames = int(round(self._fps * clip['duration']))
if track_id[0] == "V":
if abs(self._fps * clip['duration'] - frames) > 1/48:
delta = abs(self._fps * clip['duration'] - frames) * 24
print("Track alignment issues", self._fps * clip['duration'], frames, clip.get('src', clip), delta)
self._duration[track_id] += frames self._duration[track_id] += frames
if clip.get("blank"): if clip.get("blank"):

View file

@ -5,6 +5,7 @@ import lxml.etree
import ox import ox
from .render_kdenlive import melt_xml from .render_kdenlive import melt_xml
from .utils import format_duration
def parse_lang(lang): def parse_lang(lang):
@ -57,17 +58,20 @@ def get_clip_by_seqid(clips, seqid):
return None return None
def get_scene_duration(scene): def get_scene_duration(scene, fps=24, track=None):
if isinstance(scene, str): if isinstance(scene, str):
with open(scene) as fd: with open(scene) as fd:
scene = json.load(fd) scene = json.load(fd)
duration = 0 duration = 0
for key, value in scene.items(): for key, value in scene.items():
for name, clips in value.items(): for name, clips in value.items():
if track and '%s:%s' % (key, name) != track:
continue
if clips: if clips:
for clip in clips: for clip in clips:
duration += int(clip["duration"] * 24) duration += round(clip["duration"] * fps)
return duration / 24 #print("scene duration based on %s:%s is %s %s" % (key, name, duration / fps, format_duration(duration / fps, fps)))
return duration / fps
def get_offset_duration(prefix): def get_offset_duration(prefix):

View file

@ -57,6 +57,9 @@ def write_if_new(path, data, mode=''):
with open(path, write_mode) as fd: with open(path, write_mode) as fd:
fd.write(data) fd.write(data)
def format_duration(duration, fps): def format_duration(duration, fps, audio=False):
return float('%0.5f' % (round(duration * fps) / fps)) if audio:
return float('%0.5f' % (int(duration * fps) / fps))
else:
return float('%0.5f' % (round(duration * fps) / fps))