a/v render. gen fixes

This commit is contained in:
j 2026-01-24 13:26:30 +01:00
commit d62d4c5746
6 changed files with 706 additions and 268 deletions

View file

@ -1256,7 +1256,7 @@ examples (config.SITENAME.jsonc) that are part of this pan.do/ra distribution.
"entitiesType": "",
"filters": [
{"id": "type", "sort": [{"key": "items", "operator": "-"}]},
{"id": "chapter", "sort": [{"key": "name", "operator": "-"}]},
{"id": "chapter", "sort": [{"key": "name", "operator": "+"}]},
{"id": "tags", "sort": [{"key": "items", "operator": "-"}]},
{"id": "keywords", "sort": [{"key": "items", "operator": "-"}]},
{"id": "year", "sort": [{"key": "items", "operator": "-"}]}

View file

@ -3,6 +3,7 @@ import hashlib
import math
import os
import time
import shutil
import cv2
import ox
@ -29,6 +30,7 @@ headers = {
def public_url(path):
return path.replace("/srv/pandora/static/", settings.PUBLIC_URL + "static/")
def public_document_url(document):
url = "%sdocuments/%s/source.%s?token=%s" % (
settings.PUBLIC_URL,
@ -38,6 +40,7 @@ def public_document_url(document):
)
return url
def public_video_url(item):
url = "%s%s/download/source/?token=%s" % (
settings.PUBLIC_URL,
@ -47,6 +50,16 @@ def public_video_url(item):
return url
def public_frame_url(item, position):
image = "%s%s/source%s.png?token=%s" % (
settings.PUBLIC_URL,
item.public_id,
position,
settings.PUBLIC_TOKEN,
)
return image
def trim_video(src, dst, frames, start0=False):
cap = cv2.VideoCapture(src)
fps = cap.get(cv2.CAP_PROP_FPS)
@ -93,6 +106,8 @@ def bytedance_task(data):
for key, value in defaults.items():
if key not in data:
data[key] = value
if data["model"] in EP:
data["model"] = EP[data["model"]]
print(data)
r = requests.post(url, headers=headers, json=data).json()
print(r)
@ -112,6 +127,8 @@ def bytedance_response(data):
if key not in data:
data[key] = value
print(data)
if data["model"] in EP:
data["model"] = EP[data["model"]]
response = requests.post(url, headers=headers, json=data).json()
print(response)
return response
@ -134,7 +151,8 @@ def t2v_bytedance(prompt, duration, output):
)
return status
def i2v_bytedance(first_frame, prompt, duration, output, last_frame=None):
def i2v_bytedance(first_frame, prompt, duration, output, last_frame=None, seed=None):
nduration = max(4, int(math.ceil(duration)))
data = {
"duration": nduration,
@ -151,11 +169,15 @@ def i2v_bytedance(first_frame, prompt, duration, output, last_frame=None):
],
}
if last_frame:
data["content"].append({
data["content"].append(
{
"type": "image_url",
"role": "last_frame",
"image_url": {"url": last_frame},
})
}
)
if seed:
data["seed"] = seed
status = bytedance_task(data)
output_url = status["content"]["video_url"]
ox.net.save_url(output_url, output, overwrite=True)
@ -167,6 +189,7 @@ def i2v_bytedance(first_frame, prompt, duration, output, last_frame=None):
)
return status
def first_last(first_frame, last_frame, prompt, duration, output):
nduration = max(4, int(math.ceil(duration)))
data = {
@ -260,17 +283,16 @@ def remake_video(item_id, prompt):
prompt_hash = hashlib.sha1(prompt.encode()).hexdigest()
position = n = 0
processed = []
prefix = "/srv/pandora/static/power/cache/%s_%s" % (item.public_id, prompt_hash)
for segment in segments:
if isinstance(segment, list):
stype, segment = segment
else:
stype = "n"
duration = segment - position
if stype == "c":
first_frame_path = (
"/srv/pandora/static/power/cache/%s_%s/%06d.mp4.last_frame.png"
% (item.public_id, prompt_hash, n - 1)
)
first_frame_path = "%s/%06d.mp4.last_frame.png" % (prefix, n - 1)
first_frame = public_url(first_frame_path)
else:
first_frame = "%s%s/source%s.png?token=%s" % (
@ -286,18 +308,10 @@ def remake_video(item_id, prompt):
last_frame_position,
settings.PUBLIC_TOKEN,
)
output = "/srv/pandora/static/power/cache/%s_%s/%06d.mp4" % (
item.public_id,
prompt_hash,
n,
)
output = "%s/%06d.mp4" % (prefix, n)
if not os.path.exists(output):
first_last(first_frame, last_frame, prompt, duration, output)
trimmed = "/srv/pandora/static/power/cache/%s_%s/%06d_trimmed.mp4" % (
item.public_id,
prompt_hash,
n,
)
trimmed = "%s/%06d_trimmed.mp4" % (prefix, n)
frames = int(duration * 24)
if not os.path.exists(trimmed):
trim_video(output, trimmed, frames, stype == "c")
@ -305,45 +319,57 @@ def remake_video(item_id, prompt):
position = segment
n += 1
joined_output = "/srv/pandora/static/power/cache/%s_%s.mp4" % (
item.public_id,
prompt_hash,
)
joined_output = "%s/joined.mp4" % (prefix,)
join_segments(processed, joined_output)
return joined_output
def prepare_image(image, prompt, out=None):
EP = {
"seedream-4-5-251128": "ep-20260122071519-pbf7l",
"seed-1-8-251228": "ep-20260122071243-8qfrk",
"seedance-1-5-pro-251215": "ep-20260122071613-blmsd",
}
def bytedance_image_generation(data):
model = "seedream-4-5-251128"
if not image.startswith("http:"):
image = public_url(image)
data = {
url = "https://ark.ap-southeast.bytepluses.com/api/v3/images/generations"
defaults = {
"model": model,
"prompt": prompt,
"image": image,
"size": "2560x1440",
"watermark": False,
}
url = "https://ark.ap-southeast.bytepluses.com/api/v3/images/generations"
for key in defaults:
if key not in data:
data[key] = defaults[key]
if data["model"] in EP:
data["model"] = EP[data["model"]]
print("prepare_image", data)
r = requests.post(url, headers=headers, json=data).json()
print(r)
output_url = r["data"][0]["url"]
response = requests.post(url, headers=headers, json=data).json()
print(response)
return response["data"][0]["url"]
def prepare_image(image, prompt, out=None):
if not image.startswith("http:"):
image = public_url(image)
data = {
"prompt": prompt,
"image": image,
"size": "2560x1440",
}
output_url = bytedance_image_generation(data)
if out is None:
out = image + ".ai.png"
ox.net.save_url(output_url, out, overwrite=True)
return r
def process_frame(item, prompt, character=None, position=0):
def process_frame(item, prompt, character=None, position=0, seed=None):
model = "seedream-4-5-251128"
if isinstance(item, str):
item = Item.objects.get(public_id=item)
image = "%s%s/source%s.png?token=%s" % (
settings.PUBLIC_URL,
item.public_id,
position,
settings.PUBLIC_TOKEN,
)
if isinstance(character, Document):
character = public_document_url(character)
image = public_frame_url(item, position)
if character is not None:
image = [image, character]
data = {
@ -351,34 +377,159 @@ def process_frame(item, prompt, character=None, position=0):
"prompt": prompt,
"image": image,
"size": "2560x1440",
"watermark": False,
}
url = "https://ark.ap-southeast.bytepluses.com/api/v3/images/generations"
print("prepare_image", data)
response = requests.post(url, headers=headers, json=data).json()
print(response)
url = response["data"][0]["url"]
if seed:
data["seed"] = seed
url = bytedance_image_generation(data)
img = add_ai_image(item, position, url)
img.refresh_from_db()
img.data['model'] = model
img.data['prompt'] = prompt
img.data['source'] = item.public_id
img.data["model"] = model
img.data["prompt"] = prompt
img.data["source"] = item.public_id
if character:
img.data['source'] += ' ' + character.split('?')[0]
img.data["source"] += " " + character.split("?")[0]
print(img, img.data)
img.save()
img.update_sort()
img.update_find()
return img
def replace_character(item, character, position=0):
prompt = "Replace the foreground character in image 1 with the character in image 2, keep the posture, clothing, background, light, atmosphere from image 1, but take the facial features and personality from image 2. Make sure the size of the character is adjusted since the new character is a child and make sure the size of the head matches the body. The quality of the image should be the same between foreground and background, adjust the quality of the character to match the background. Use the style of image 1 for the character: if image 1 is a photo make the character a real person, if image 1 is a drawing make the character a drawn character, if image 1 is a comic use a comic character and so on"
prompt = "Replace the foreground character in image 1 with the character in image 2, keep the posture, clothing, background, light, atmosphere from image 1, but take the facial features and personality from image 2. Make sure the size of the character is adjusted since the new character is a child and make sure the size of the head matches the body. The quality of the image should be the same between foreground and background, adjust the quality of the character to match the background. Use the style of image 1 for the character: if image 1 is a photo make the character a real person, if image 1 is a drawing make the character a drawn character, if image 1 is a comic use a comic character"
"""
REPLACE_CHARACTER_PROMPT = "Replace the foreground character in image 1 with the character in image 2, keep the posture, clothing, background, light, atmosphere from image 1, but take the facial features and personality from image 2. Make sure the size of the character is adjusted since the new character is a child and make sure the size of the head matches the body. The quality of the image should be the same between foreground and background, adjust the quality of the character to match the background. Use the style of image 1 for the character: if image 1 is a photo make the character a real person, if image 1 is a drawing make the character a drawn character, if image 1 is a comic use a comic character and so on"
"""
REPLACE_CHARACTER_PROMPT = "Replace the foreground character in image 1 with the character in image 2, keep the posture, clothing, background, light, atmosphere from image 1, but take the facial features and personality from image 2. Make sure the size of the character is adjusted since the new character is a child and make sure the size of the head matches the body. The quality of the image should be the same between foreground and background, adjust the quality of the character to match the background. Use the style of image 1 for the character: if image 1 is a photo make the character a real person, if image 1 is a drawing make the character a drawn character, if image 1 is a comic use a comic character"
def fal_replace_character(item, character, position=0):
if isinstance(item, str):
item = Item.objects.get(public_id=item)
prompt = REPLACE_CHARACTER_PROMPT.replace("image 1", "@image 1").replace(
"image 2", "@image 2"
)
if character == "P5":
prompt = prompt.replace('child', 'teenager')
prompt = prompt.replace("child", "teenager")
if character in ("P1", "P2", "P3", "P4", "P5"):
character = public_document_url(Document.objects.get(data__title="Character " + character))
return process_frame(item, prompt, character, position)
character = Document.objects.get(data__title="Character " + character)
if isinstance(character, Document):
character = public_document_url(character)
image = public_frame_url(item, position)
image = [image, character]
url = flux_edit_image(image, prompt)
img = add_ai_image(item, position, url)
img.refresh_from_db()
img.data["model"] = "flux-2-pro"
img.data["prompt"] = prompt
img.data["source"] = item.public_id
img.data["source"] += " " + character.split("?")[0]
print(img, img.data)
img.save()
img.update_sort()
img.update_find()
return img
def replace_character(item, character, position=0, seed=None):
prompt = REPLACE_CHARACTER_PROMPT
if character == "P5":
prompt = prompt.replace("child", "teenager")
if character in ("P1", "P2", "P3", "P4", "P5"):
character = public_document_url(
Document.objects.get(data__title="Character " + character)
)
return process_frame(item, prompt, character, position, seed=seed)
def kling_lipsync(audio_item, video_item):
video_url = public_video_url(video_item)
audio_url = public_video_url(audio_item)
model = "fal-ai/kling-video/lipsync/audio-to-video"
data = {
"video_url": video_url,
"audio_url": audio_url
}
print(data)
handler = fal_client.submit(model, arguments=data)
request_id = handler.request_id
print(request_id)
result = fal_wait_for(model, request_id)
print(result)
output_url = result["video"]["url"]
ox.net.save_url(output_url, output, overwrite=True)
ai = add_ai_variant(item, output, "ai:lipsync")
ai.data["model"] = model
ai.save()
if not keep:
shutil.rmtree(os.path.dirname(output))
img.add(ai)
return ai
def kling_v2v_reference(item, character, keep=False):
# https://fal.ai/models/fal-ai/kling-video/o1/video-to-video/reference/api
if isinstance(item, str):
item = Item.objects.get(public_id=item)
if character in ("P1", "P2", "P3", "P4", "P5"):
character = public_document_url(
Document.objects.get(data__title="Character " + character)
)
video_url = public_video_url(item)
prompt = "Replace the main character in @Video1 with the character from the reference images, adjust the style of the character to match the style of the video"
model = "fal-ai/kling-video/o1/video-to-video/reference"
prompt_hash = hashlib.sha1((prompt + character).encode()).hexdigest()
output = "/srv/pandora/static/power/cache/%s_%s/ai.mp4" % (
item.public_id,
prompt_hash,
)
for d in [3, 4, 5, 6, 7, 8, 9, 10]:
if d > item.sort.duration:
break
duration = d
data = {
"prompt": prompt,
"image_url": image_url,
"video_url": video_url,
"keep_original_sound": False,
"character_orientation": "video",
}
data = {
"prompt": prompt,
"keep_audio": False,
"aspect_ratio": "16:9",
"video_url": video_url,
"image_urls": [image_url],
"duration": str(duration)
}
'''
data["elements"] = [
{
"reference_image_urls": [
image_url,
image_url,
],
"frontal_image_url": image_url
}
]
'''
print(data)
handler = fal_client.submit(model, arguments=data)
request_id = handler.request_id
print(request_id)
result = fal_wait_for(model, request_id)
print(result)
output_url = result["video"]["url"]
ox.net.save_url(output_url, output, overwrite=True)
ai = add_ai_variant(item, output, "ai:v2v-replace")
ai.data["prompt"] = ox.escape_html(prompt)
ai.data["firstframe"] = image_url.split("?")[0]
ai.data["model"] = model
ai.save()
if not keep:
shutil.rmtree(os.path.dirname(output))
img.add(ai)
return ai
def replace_character_motion_control(item, character, keep=False):
if isinstance(item, str):
@ -394,7 +545,10 @@ def replace_character_motion_control(item, character, keep=False):
prompt = ""
model = "fal-ai/kling-video/v2.6/pro/motion-control"
prompt_hash = hashlib.sha1((prompt + image_url).encode()).hexdigest()
output = "/srv/pandora/static/power/cache/%s_%s/ai.mp4" % (item.public_id, prompt_hash)
output = "/srv/pandora/static/power/cache/%s_%s/ai.mp4" % (
item.public_id,
prompt_hash,
)
data = {
"prompt": prompt,
"image_url": image_url,
@ -412,7 +566,7 @@ def replace_character_motion_control(item, character, keep=False):
ox.net.save_url(output_url, output, overwrite=True)
ai = add_ai_variant(item, output, "ai:replace:p1:motion-control")
ai.data["prompt"] = ox.escape_html(prompt)
ai.data['firstframe'] = image_url.split('?')[0]
ai.data["firstframe"] = image_url.split("?")[0]
ai.data["model"] = model
ai.save()
if not keep:
@ -420,6 +574,7 @@ def replace_character_motion_control(item, character, keep=False):
img.add(ai)
return ai
def describe_video(url, neutral=False):
if neutral:
prompt = (
@ -453,15 +608,19 @@ def describe_video(url, neutral=False):
response = bytedance_response(data)
return response["output"][1]["content"][0]["text"]
def describe_item(item, neutral=False):
if isinstance(item, str):
item = Item.objects.get(public_id=item)
video_url = public_video_url(item)
return describe_video(video_url, neutral)
def reshoot_item(item, extra_prompt=None, first_frame=None, keep=False):
if isinstance(item, str):
item = Item.objects.get(public_id=item)
if isinstance(first_frame, Document):
first_frame = public_document_url(first_frame)
duration = item.sort.duration
frames = int(duration * 24)
prompt = describe_item(item, first_frame is not None)
@ -491,7 +650,7 @@ def reshoot_item(item, extra_prompt=None, first_frame=None, keep=False):
ai.data["model"] = status["model"]
ai.data["seed"] = status["seed"]
if first_frame:
ai.data["firstframe"] = first_frame.split('?')[0]
ai.data["firstframe"] = first_frame.split("?")[0]
if isinstance(first_frame, Document):
first_frame.add(ai)
ai.save()
@ -500,6 +659,75 @@ def reshoot_item(item, extra_prompt=None, first_frame=None, keep=False):
return ai
def reshoot_item_segments(item, character, keep=False):
if isinstance(item, str):
item = Item.objects.get(public_id=item)
max_duration = 12
source = item.files.all()[0].data.path
info = ox.avinfo(source)
duration = info["duration"]
if duration < max_duration:
segments = [duration]
else:
segments = get_item_segments(item, max_duration=max_duration)
print(segments)
prompt_hash = hashlib.sha1("reshoot_segment".encode()).hexdigest()
processed = []
prefix = "/srv/pandora/static/power/cache/%s_%s" % (item.public_id, prompt_hash)
video_segments = fragment_video(source, prefix, segments)
prompts = []
first_frames = []
seed = None
n = 0
position = 0
for segment in segments:
if isinstance(segment, list):
stype, segment = segment
else:
stype = "n"
output = "%s/%06d.mp4" % (prefix, n)
output_ai = "%s/%06d_ai.mp4" % (prefix, n)
segment_duration = segment - position
if os.path.exists(output):
segment_video_url = public_url(output)
prompt = describe_video(segment_video_url, neutral=True)
prompts.append("Segment %s: " % (n + 1) + prompt)
segment_character = character
if position:
segment_character = segment_first_frame_url
segment_first_frame = replace_character(
item, segment_character, position, seed=seed
)
segment_first_frame_url = public_document_url(segment_first_frame)
status = i2v_bytedance(
segment_first_frame_url, prompt, segment_duration, output_ai, seed=seed
)
seeed = status["seed"]
trimmed = "%s/%06d_ai_trimmed.mp4" % (prefix, n)
frames = int(segment_duration * 24)
trim_video(output_ai, trimmed, frames, stype == "c")
processed.append(trimmed)
first_frames.append(segment_first_frame)
n += 1
position = segment
joined_output = "%s/joined.mp4" % (prefix)
join_segments(processed, joined_output)
ai = add_ai_variant(item, joined_output, "ai:0:reshoot-firstframe")
prompt = "\n\n".join(prompts)
ai.data["prompt"] = ox.escape_html(prompt)
ai.data["firstframe"] = " ".join([ox.toAZ(ff.id) for ff in first_frames])
ai.data["model"] = status["model"]
ai.data["seed"] = seed
ai.save()
if not keep:
shutil.rmtree(os.path.dirname(joined_output))
for first_frame in first_frames:
first_frame.add(ai)
if not keep:
shutil.rmtree(prefix)
return ai
def describe_image(url):
system_prompt = ""
system_prompt = "You are an image analyst describing different aspects of an image. You are focused on the form, composition, and task shown in the image."
@ -527,6 +755,7 @@ def transform_remake_video(item_id, image_prompt, video_prompt):
prompt_hash = hashlib.sha1((image_prompt + video_prompt).encode()).hexdigest()
position = n = 0
processed = []
prefix = "/srv/pandora/static/power/cache/%s_%s" % (item.public_id, prompt_hash)
for segment in segments:
if isinstance(segment, list):
stype, segment = segment
@ -534,10 +763,7 @@ def transform_remake_video(item_id, image_prompt, video_prompt):
stype = "n"
duration = segment - position
if stype == "c":
first_frame_path = (
"/srv/pandora/static/power/cache/%s_%s/%06d.mp4.last_frame.png"
% (item.public_id, prompt_hash, n - 1)
)
first_frame_path = "%s/%06d.mp4.last_frame.png" % (prefix, n - 1)
first_frame = public_url(first_frame_path)
else:
first_frame = "%s%s/source%s.png?token=%s" % (
@ -546,10 +772,7 @@ def transform_remake_video(item_id, image_prompt, video_prompt):
position,
settings.PUBLIC_TOKEN,
)
first_frame_path = (
"/srv/pandora/static/power/cache/%s_%s/%06d.first_frame.png"
% (item.public_id, prompt_hash, n)
)
first_frame_path = "%s/%06d.first_frame.png" % (prefix, n)
if not os.path.exists(first_frame_path):
prepare_image(first_frame, image_prompt, first_frame_path)
first_frame = public_url(first_frame_path)
@ -560,26 +783,15 @@ def transform_remake_video(item_id, image_prompt, video_prompt):
last_frame_position,
settings.PUBLIC_TOKEN,
)
last_frame_path = (
"/srv/pandora/static/power/cache/%s_%s/%06d.last_frame.png"
% (item.public_id, prompt_hash, n)
)
last_frame_path = "%s/%06d.last_frame.png" % (prefix, n)
if not os.path.exists(last_frame_path):
prepare_image(last_frame, image_prompt, last_frame_path)
last_frame = public_url(last_frame_path)
output = "/srv/pandora/static/power/cache/%s_%s/%06d.mp4" % (
item.public_id,
prompt_hash,
n,
)
output = "%s/%06d.mp4" % (prefix, n)
if not os.path.exists(output):
first_last(first_frame, last_frame, video_prompt, duration, output)
trimmed = "/srv/pandora/static/power/cache/%s_%s/%06d_trimmed.mp4" % (
item.public_id,
prompt_hash,
n,
)
trimmed = "%s/%06d_trimmed.mp4" % (prefix, n)
frames = int(duration * 24)
if not os.path.exists(trimmed):
trim_video(output, trimmed, frames, stype == "c")
@ -587,10 +799,7 @@ def transform_remake_video(item_id, image_prompt, video_prompt):
position = segment
n += 1
joined_output = "/srv/pandora/static/power/cache/%s_%s.mp4" % (
item.public_id,
prompt_hash,
)
joined_output = "%s/joined.mp4" % (prefix,)
join_segments(processed, joined_output)
return joined_output
@ -625,6 +834,7 @@ def restyle_video(item_id, prompt):
ox.net.save_url(output_url, output_path, overwrite=True)
return output_path
def fal_wait_for(model, request_id):
status = fal_client.status(model, request_id, with_logs=True)
while isinstance(status, fal_client.InProgress):
@ -633,6 +843,7 @@ def fal_wait_for(model, request_id):
result = fal_client.result(model, request_id)
return result
def motion_control_preprocess_image(item_id, image_prompt, video_prompt):
item = Item.objects.get(public_id=item_id)
video_url = public_video_url(item)
@ -686,7 +897,7 @@ def luma_wait_for(id):
return status
def luma_modify_segment(video_url, prompt, first_frame=None, mode='flex_2'):
def luma_modify_segment(video_url, prompt, first_frame=None, mode="flex_2"):
# also got that at fal-ai/luma-dream-machine/ray-2/modify
url = "https://api.lumalabs.ai/dream-machine/v1/generations/video/modify"
payload = {
@ -791,6 +1002,7 @@ def in_the_style_of_fal(image, style):
prompt = "apply style from @image 2 to @image 1 keep the position of the person in @image 1 but take light, colors, clothing from @image 2"
return flux_edit_image([image, style], prompt)
def in_the_style_of_byte(image, style):
prompt = "apply style from image 2 to image 1 keep the position of the person in image 1 but take light, colors, clothing from image 2"
image_model_name = "seedream-4-5-251128"
@ -813,7 +1025,7 @@ def in_the_style_of_byte(image, style):
def luma_modify_item(item, prompt="", image_prompt=None, first_frame=None, keep=False):
mode = 'flex_2'
mode = "flex_2"
if isinstance(item, str):
item = Item.objects.get(public_id=item)
source = item.files.all()[0].data.path
@ -839,36 +1051,22 @@ def luma_modify_item(item, prompt="", image_prompt=None, first_frame=None, keep=
stype, segment = segment
else:
stype = "n"
output = "/srv/pandora/static/power/cache/%s_%s/%06d.mp4" % (
item.public_id,
prompt_hash,
n,
)
output_ai = "/srv/pandora/static/power/cache/%s_%s/%06d_ai.mp4" % (
item.public_id,
prompt_hash,
n,
)
output = "%s/%06d.mp4" % (prefix, n)
output_ai = "%s/%06d_ai.mp4" % (prefix, n)
if os.path.exists(output):
video_url = luma_modify_segment(
public_url(output),
prompt,
first_frame=first_frame_url,
mode=mode
public_url(output), prompt, first_frame=first_frame_url, mode=mode
)
ox.net.save_url(video_url, output_ai, overwrite=True)
processed.append(output_ai)
n += 1
joined_output = "/srv/pandora/static/power/cache/%s_%s/joined.mp4" % (
item.public_id,
prompt_hash,
)
joined_output = "%s/joined.mp4" % (prefix,)
join_segments(processed, joined_output)
ai = add_ai_variant(item, joined_output, "ai:replace:p1:luma")
ai.data["prompt"] = ox.escape_html(prompt)
if first_frame:
ai.data['firstframe'] = first_frame_url.split('?')[0]
ai.data["model"] = 'ray-2:%s' % mode
ai.data["firstframe"] = first_frame_url.split("?")[0]
ai.data["model"] = "ray-2:%s" % mode
ai.save()
if not keep:
shutil.rmtree(os.path.dirname(joined_output))
@ -885,6 +1083,7 @@ def add_ai_variant(item, video_path, type):
ai.user = item.user
ai.data["type"] = [type]
ai.data["title"] = item.data["title"]
ai.data["chapter"] = item.data.get("chapter", "")
ai.save()
file = File()
file.oshash = ox.oshash(video_path)
@ -904,20 +1103,22 @@ def add_ai_variant(item, video_path, type):
file.extract_stream()
return ai
def add_ai_image(item, position, url, extension=None):
if extension is None:
extension = url.split('.')[-1].split('?')[0]
if extension == 'jpeg': extension = 'jpg'
extension = url.split(".")[-1].split("?")[0]
if extension == "jpeg":
extension = "jpg"
file = Document(user=item.user)
file.data['title'] = '%s at %s' % (item.get('title'), position)
file.data['position'] = position
file.data["title"] = "%s at %s" % (item.get("title"), position)
file.data["position"] = position
file.extension = extension
file.width = -1
file.pages = -1
file.uploading = True
file.save()
file.uploading = True
name = 'data.%s' % file.extension
name = "data.%s" % file.extension
file.file.name = file.path(name)
ox.net.save_url(url, file.file.path, overwrite=True)
file.get_info()
@ -927,3 +1128,56 @@ def add_ai_image(item, position, url, extension=None):
file.update_sort()
file.add(item)
return file
def add_tag(item, tag):
if 'tags' not in item.data:
item.data['tags'] = []
item.data['tags'].append(tag)
item.save()
def process_motion_firstframe():
l = itemlist.models.List.objects.get(name='Motion-Firstframe')
for i in l.items.all():
ai = Item.objects.filter(data__type__icontains='ai').filter(data__title=i.data['title'])
if ai.exists() or 'ai-failed' in i.data.get('tags', []):
print('>> skip', i)
continue
print(i)
try:
reshoot_item_segments(i, 'P1', keep=True)
except:
add_tag(i, 'ai-failed')
print('>> failed', i)
def extract_firstframe(character='P1'):
for item in Item.objects.filter(data__type__icontains="source"):
if 'ai-failed' in item.data.get('tags', []):
continue
if not item.documents.all().exists():
print(item)
try:
first_frame = replace_character(item, character, 0)
except:
add_tag(item, 'ai-failed')
def process_reshoot_firstframe():
l = itemlist.models.List.objects.get(name='Reshoot-Firstframe')
for i in l.items.all():
if i.sort.duration > 30: continue
if i.public_id == 'HZ': continue
if i.documents.all().count():
ai = Item.objects.filter(data__type__icontains='ai').filter(data__title=i.data['title'])
if ai.exists() or 'ai-failed' in i.data.get('tags', []):
print('>> skip', i)
continue
first_frame = i.documents.all().order_by('-created').first()
if not first_frame:
first_frame = replace_character(i, 'P1', 0)
print(i, first_frame, i.documents.all().count())
try:
replace_character_motion_control(i, first_frame)
except:
add_tag(i, 'ai-failed')
print('>> failed', i)

View file

@ -16,7 +16,7 @@ class Command(BaseCommand):
parser.add_argument('--duration', action='store', dest='duration', default="3600", help='target duration of all fragments in seconds')
parser.add_argument('--offset', action='store', dest='offset', default="1024", help='inital offset in pi')
parser.add_argument('--no-video', action='store_true', dest='no_video', default=False, help='don\'t render video')
parser.add_argument('--chapter', action='store', dest='chapter', default=None, help='chapter')
parser.add_argument('--fragment', action='store', dest='fragment', default=None, help='fragment')
parser.add_argument('--single-file', action='store_true', dest='single_file', default=False, help='render to single video')
parser.add_argument('--keep-audio', action='store_true', dest='keep_audio', default=False, help='keep independent audio tracks')
parser.add_argument('--stereo-downmix', action='store_true', dest='stereo_downmix', default=False, help='stereo downmix')

View file

@ -0,0 +1,18 @@
import json
import os
import subprocess
from django.core.management.base import BaseCommand
from django.conf import settings
from ...render_sound import render_all
class Command(BaseCommand):
help = 'genrate kdenlive porject and render'
def add_arguments(self, parser):
parser.add_argument('--debug', action='store_true', dest='debug', default=False, help='output more info')
def handle(self, **options):
render_all(options)

365
render.py
View file

@ -20,7 +20,36 @@ from .render_utils import *
default_prefix = "/srv/p_for_power"
def compose(clips, target=150, base=1024, voice_over=None, options=None):
def get_loudnorm(file):
if "loudnorm" in file.info:
return file.info["loudnorm"]
source = file.data.path
cmd = [
"ffmpeg",
"-i", source,
"-vn",
"-af", "loudnorm=print_format=json",
"-f", "null",
"-"
]
result = subprocess.run(cmd, capture_output=True, text=True)
json_match = re.search(r"\{[\s\S]*\}", result.stderr)
if not json_match:
raise RuntimeError("Could not find loudnorm JSON output in ffmpeg output")
loudnorm_data = json.loads(json_match.group(0))
input_i = float(loudnorm_data.get("input_i", 0)) # Integrated loudness
input_lra = float(loudnorm_data.get("input_lra", 0)) # Loudness range
input_tp = float(loudnorm_data.get("input_tp", 0)) # True peak
input_thresh = float(loudnorm_data.get("input_thresh", 0)) # Threshold
loudnorm = f"L: {input_i:.6f}\tR: {input_lra:.6f}\tP {input_tp:.6f}"
file.info["loudnorm"] = loudnorm
file.save()
return loudnorm
def compose(clips, fragment, target=150, base=1024, voice_over=None, options=None):
if options is None:
options = {}
fps = 24
@ -50,40 +79,175 @@ def compose(clips, target=150, base=1024, voice_over=None, options=None):
seq = random(10000 + base * 1000)
used = []
selected_clips_length = 0
ai_length = 0
selected_clips = []
tags = []
while selected_clips_length < target:
if not tags:
tags = fragment["tags"].copy()
tag = random_choice(seq, tags, pop=True)
non_ai_clips = []
ai_clips = []
for clip in clips:
if tag in clip["tags"]:
if 'ai' in clip:
ai_clips.append(clip)
else:
non_ai_clips.append(clip)
if ai_length < target * 0.6 and ai_clips:
clip = random_choice(seq, ai_clips, pop=True)
clip["use_ai"] = True
selected_clips.append(clip)
selected_clips_length += clip['duration']
ai_length += clip['duration']
clips = [c for c in clips if c['id'] != clip['id']]
continue
available_clips = non_ai_clips + ai_clips
if available_clips:
clip = random_choice(seq, available_clips, pop=True)
clip["use_ai"] = False
selected_clips.append(clip)
selected_clips_length += clip['duration']
clips = [c for c in clips if c['id'] != clip['id']]
clips = selected_clips
clip = None
while target - length > 0 and clips:
'''
if clip:
if chance(seq, 0.5):
next_seqid = clip['seqid'] + 1
clip = get_clip_by_seqid(clips, next_seqid)
else:
clip = None
'''
clip = None
if not clip:
# FIXME: while not all clips have AI versions make sure we have one 50% of the time
clip = random_choice(seq, clips, True)
next_length = length + clip['duration']
if target - next_length < -target*0.1:
break
length += int(clip['duration'] * fps) / fps
# 50/50 source or ai
src = clip['source']
audio = clip['source']
# select ai if we have one
if 'ai' in clip:
if clip["use_ai"]:
src = random_choice(seq, list(clip['ai'].values()), False)
print('%07.3f-%07.3f %07.3f %s (%s)' % (
length-clip['duration'],
length,
clip['duration'],
os.path.basename(clip['source']),
src.split('/')[-2]
))
scene['front']['V2'].append({
'duration': clip['duration'],
'src': src,
"filter": {
}
})
volume_front = '-2.5'
volume_rear = '-8.5'
if clip.get('volume') is not None:
volume_front = '%0.2f' % (float(volume_front) + clip['volume'])
volume_rear = '%0.2f' % (float(volume_rear) + clip['volume'])
'''
'dynamic_loudness': [
["target_loudness", "-35"],
["min_gain", "-15"],
["max_gin", "15"],
],
'''
audio_filter = {
'mono': [
["channels", "2"],
],
'loudness': [
["program", "-17"],
["results", clip["loudnorm"]],
],
'volume': volume_front,
'fadein': '00:00:00.125'
}
scene['audio-front']['A2'].append({
'duration': clip['duration'],
'src': audio,
'filter': audio_filter.copy()
})
'''
audio_filter['volume'] = volume_rear
scene['audio-rear']['A2'].append({
'duration': clip['duration'],
'src': audio,
'filter': audio_filter.copy()
})
'''
used.append(clip)
if not clips and target - length > 0:
print("not enough clips, need to reset")
used_ids = {c['id'] for c in used}
clips = [c for c in all_clips if c != clip and c['id'] not in used_ids]
if not clips:
print("not enough clips, also consider used")
clips = [c for c in all_clips if c != clip]
if not clips:
print("not enough clips, also consider last clip")
clips = all_clips.copy()
scene_duration = int(get_scene_duration(scene) * fps)
voice_overs = []
sub_offset = 0
vo_min = 0
subs = []
print("--")
print("Voice Over:")
if voice_over:
vo_keys = list(sorted(voice_over))
if chance(seq, 0.5):
vo_key = vo_keys[random_int(seq, len(vo_keys))]
voice_overs.append(voice_over[vo_key])
elif len(vo_keys) >= 2:
vo1 = vo_keys.pop(random_int(seq, len(vo_keys)))
vo2 = vo_keys.pop(random_int(seq, len(vo_keys)))
voice_overs.append(voice_over[vo1])
if voice_over[vo1]["duration"] + voice_over[vo2]["duration"] < target:
print("adding second vo")
voice_overs.append(voice_over[vo2])
print("vo:", [x['src'] for x in voice_overs], list(sorted(voice_over)))
vo_min = sum([vo['duration'] for vo in voice_overs])
sub_offset = 0
if vo_min > target:
target = vo_min
elif vo_min < target:
offset = format_duration((target - vo_min) / 2, fps)
while int(sub_offset * fps) < scene_duration:
if sub_offset:
gap = (5 * fps + random_int(seq, 10 * fps)) / fps
else:
gap = (2 * fps + random_int(seq, 5 * fps)) / fps
if int((sub_offset + gap)* fps) > scene_duration:
gap = format_duration((scene_duration - int(sub_offset * fps)) / fps, fps)
scene['audio-center']['A1'].append({
'blank': True,
'duration': offset
'duration': gap
})
scene['audio-rear']['A1'].append({
'blank': True,
'duration': offset
'duration': gap
})
vo_min += offset
sub_offset = offset
subs = []
for vo in voice_overs:
print('%07.3f-%07.3f %07.3f' % (sub_offset, sub_offset+gap, gap), 'silence')
sub_offset += gap
vo_key = random_choice(seq, vo_keys, pop=True)
variant = random_int(seq, len(voice_over[vo_key]))
vo = voice_over[vo_key][variant]
while int((vo['duration'] + sub_offset) * fps) > scene_duration:
if not vo_keys:
vo = None
break
vo_key = random_choice(seq, vo_keys, pop=True)
variant = random_int(seq, len(voice_over[vo_key]))
vo = voice_over[vo_key][variant]
if vo is None:
break
print('%07.3f-%07.3f %07.3f' % (sub_offset, sub_offset+vo["duration"], vo["duration"]), vo["src"].split('/')[-1])
voice_overs.append(vo)
voc = vo.copy()
a, b = '-11', '-3'
if 'Whispered' in voc['src']:
@ -122,118 +286,20 @@ def compose(clips, target=150, base=1024, voice_over=None, options=None):
if subs:
scene["subtitles"] = subs
selected_clips_length = 0
selected_clips = []
non_ai_clips = []
for clip in clips:
if 'ai' in clip:
selected_clips.append(clip)
selected_clips_length += clip['duration']
else:
non_ai_clips.append(clip)
while selected_clips_length < target and non_ai_clips:
clip = random_choice(seq, non_ai_clips, pop=True)
selected_clips.append(clip)
selected_clips_length += clip['duration']
clips = selected_clips
clip = None
while target - length > 0 and clips:
'''
if clip:
if chance(seq, 0.5):
next_seqid = clip['seqid'] + 1
clip = get_clip_by_seqid(clips, next_seqid)
else:
clip = None
'''
clip = None
if not clip:
# FIXME: while not all clips have AI versions make sure we have one 50% of the time
clip = random_choice(seq, clips, True)
if not clips:
print("not enough clips, need to reset")
clips = [c for c in all_clips if c != clip and c not in used]
if not clips:
print("not enough clips, also consider used")
clips = [c for c in all_clips if c != clip]
if not clips:
print("not enough clips, also consider last clip")
clips = all_clips.copy()
if length + clip['duration'] > target and length >= vo_min:
break
length += int(clip['duration'] * fps) / fps
# 50/50 source or ai
src = clip['source']
audio = clip['source']
# select ai if we have one
if 'ai' in clip:
if True or chance(seq, 0.5):
src = random_choice(seq, list(clip['ai'].values()), False)
print('%07.3f %07.3f' % (length, clip['duration']), src.split('/')[-2], os.path.basename(clip['source']))
scene['front']['V2'].append({
'duration': clip['duration'],
'src': src,
"filter": {
}
})
volume_front = '-2.5'
volume_rear = '-8.5'
if clip.get('volume') is not None:
volume_front = '%0.2f' % (float(volume_front) + clip['volume'])
volume_rear = '%0.2f' % (float(volume_rear) + clip['volume'])
audio_filter = {
'mono': [
["channels", "2"],
],
'dynamic_loudness': [
["target_loudness", "-35"],
["min_gain", "-15"],
["max_gin", "15"],
],
'volume': volume_front,
'fadein': '00:00:00.125'
}
scene['audio-front']['A2'].append({
'duration': clip['duration'],
'src': audio,
'filter': audio_filter.copy()
})
'''
audio_filter['volume'] = volume_rear
scene['audio-rear']['A2'].append({
'duration': clip['duration'],
'src': audio,
'filter': audio_filter.copy()
})
'''
used.append(clip)
print("scene duration %0.3f (target: %0.3f, vo_min: %0.3f)" % (length, target, vo_min))
scene_duration = int(get_scene_duration(scene) * fps)
sub_offset = int(sub_offset * fps)
if sub_offset < scene_duration:
delta = format_duration((scene_duration - sub_offset) / fps, fps)
print(">> add %0.3f of silence.. %0.3f (scene_duration)" % (delta, scene_duration / fps))
gap = format_duration((scene_duration - sub_offset) / fps, fps)
print('%07.3f-%07.3f %07.3f' % (sub_offset, sub_offset+gap, gap), 'silence')
scene['audio-center']['A1'].append({
'blank': True,
'duration': delta
'duration': gap
})
scene['audio-rear']['A1'].append({
'blank': True,
'duration': delta
'duration': gap
})
elif sub_offset > scene_duration:
delta = format_duration((scene_duration - sub_offset) / fps, fps)
scene['audio-center']['A1'][-1]["duration"] += delta
scene['audio-rear']['A1'][-1]["duration"] += delta
print("WTF, needed to cut %s new duration: %s" % (delta, scene['audio-center']['A1'][-1]["duration"]))
print(scene['audio-center']['A1'][-1])
sub_offset += gap
print("scene duration %0.3f (target: %0.3f)" % (length, target))
return scene, used
def write_subtitles(data, folder, options):
@ -312,7 +378,9 @@ def get_fragments(clips, voice_over, prefix):
for l in itemlist.models.List.objects.filter(status='featured').order_by('name'):
if l.name.split(' ')[0].isdigit():
fragment_id = l.name.split(' ')[0]
fragment = {
'id': fragment_id,
'name': l.name,
'tags': [],
'anti-tags': [],
@ -344,7 +412,7 @@ def get_fragments(clips, voice_over, prefix):
print("FIXME", i)
continue
type_ = i.data['type'][0].lower()
target = os.path.join(prefix, type_, i.data['title'] + ext)
target = os.path.join(prefix, 'video', type_, i.data['title'] + ext)
sources.append(target)
fragment['clips'] = []
for clip in clips:
@ -353,7 +421,7 @@ def get_fragments(clips, voice_over, prefix):
source = clip['source']
if source in sources:
fragment['clips'].append(clip)
fragment["voice_over"] = voice_over.get(str(fragment["id"]), {})
fragment["voice_over"] = voice_over.get(fragment_id, {})
fragments.append(fragment)
fragments.sort(key=lambda f: ox.sort_string(f['name']))
return fragments
@ -386,21 +454,23 @@ def render_all(options):
for fragment in fragments:
fragment_base += 1
fragment_id = int(fragment['name'].split(' ')[0])
if options["chapter"] and int(options["chapter"]) != fragment_id:
if options["fragment"] and int(options["fragment"]) != fragment_id:
continue
name = fragment['name'].replace(' ', '_')
if fragment_id < 10:
name = '0' + name
if not fragment['clips']:
print("skipping empty fragment", name)
continue
fragment_prefix = os.path.join(base_prefix, name)
os.makedirs(fragment_prefix, exist_ok=True)
fragment_clips = fragment['clips']
unused_fragment_clips = [c for c in fragment_clips if c not in clips_used]
used_ids = {c['id'] for c in clips_used}
unused_fragment_clips = [c for c in fragment_clips if c['id'] not in clips_used]
print('fragment clips', len(fragment_clips), 'unused', len(unused_fragment_clips))
print('--')
print('Video:')
scene, used = compose(
unused_fragment_clips,
fragment=fragment,
target=target,
base=fragment_base,
voice_over=fragment['voice_over'],
@ -414,7 +484,7 @@ def render_all(options):
src = src[0]['src']
stats[src.split('/')[-2]] += 1
else:
print("!! fixme, chapter without VO")
print("!! fixme, fragment without VO")
position += scene_duration
target_position += fragment_target
@ -833,13 +903,14 @@ def generate_clips(options):
source = e.files.filter(selected=True)[0].data.path
ext = os.path.splitext(source)[1]
type_ = e.data['type'][0].lower()
target = os.path.join(prefix, type_, i.data['title'] + ext)
target = os.path.join(prefix, 'video', type_, i.data['title'] + ext)
os.makedirs(os.path.dirname(target), exist_ok=True)
if os.path.islink(target):
os.unlink(target)
os.symlink(source, target)
if type_ == "source":
source_target = target
clip['loudnorm'] = get_loudnorm(e.files.filter(selected=True)[0])
if type_.startswith('ai:'):
if 'ai' not in clip:
clip['ai'] = {}
@ -888,17 +959,20 @@ def generate_clips(options):
print("using", len(clips), "clips")
voice_over = defaultdict(dict)
voice_over = {}
for vo in item.models.Item.objects.filter(
data__type__icontains="voice over",
):
title = vo.get('title')
fragment_id = int(title.split('_')[2].replace('gen', ''))
parts = title.split('-')
fragment = '%02d' % int(parts[0].replace('ch', ''))
type = parts[1]
variant = '-'.join(parts[2:4])
source = vo.files.filter(selected=True)[0]
#batch = vo.get('batch')[0].replace('Text-', '')
batch = title.split('_')[3]
src = source.data.path
target = os.path.join(prefix, 'voice_over', batch, '%s.wav' % fragment_id)
ext = src.split('.')[-1]
target = os.path.join(prefix, 'voice_over', fragment, '%s-%s.%s' % (type, variant, ext))
os.makedirs(os.path.dirname(target), exist_ok=True)
if os.path.islink(target):
os.unlink(target)
@ -909,10 +983,15 @@ def generate_clips(options):
).exclude(value="").order_by("start"):
sdata = get_srt(sub, 0, lang, tlang)
subs.append(sdata)
voice_over[fragment_id][batch] = {
if fragment not in voice_over:
voice_over[fragment] = {}
if type not in voice_over[fragment]:
voice_over[fragment][type] = []
voice_over[fragment][type].append({
"variant": variant,
"src": target,
"duration": format_duration(source.duration, 24),
"subs": subs
}
})
with open(os.path.join(prefix, 'voice_over.json'), 'w') as fd:
json.dump(voice_over, fd, indent=2, ensure_ascii=False)

View file

@ -8,9 +8,93 @@ import item.models
from .render_kdenlive import KDEnliveProject, _CACHE
from .render import default_prefix as root
from .render import load_defaults
def render_all(options):
if os.path.exists(os.path.join(root, "render/forest-5.1.mp4"):
print("forest-5.1.mp4 exists, skipping")
else:
render_forest()
if os.path.exists(os.path.join(root, "render/music-5.1.mp4"):
print("music-5.1.mp4 exists, skipping")
else:
render_music()
def render_music():
pass
# Stereo Mix, playing on 5.1 front left/right
project = KDEnliveProject(root)
qs = item.models.Item.objects.filter(
data__type__icontains='music'
).order_by('sort__title')
for clip in qs:
src = clip.files.all()[0].data.path
project.append_clip('A1', {
"src": src,
"duration": clip.sort.duration,
"filter": {
},
})
path = os.path.join(root, "music.kdenlive")
with open(path, 'w') as fd:
fd.write(project.to_xml())
os.chdir(root)
cmd = [
"melt", "music.kdenlive", '-quiet', '-consumer', 'avformat:music.wav'
]
print(" ".join([str(x) for x in cmd]))
subprocess.call(cmd)
info = ox.avinfo('music.wav')
cmds = []
cmds.append([
"ffmpeg", "-y",
"-nostats", "-loglevel", "error",
"-f", "lavfi", "-i", "anullsrc=r=48000:cl=mono",
"-t", str(info["duration"]),
"music_silence.wav"
])
for src, out1, out2 in (
('music.wav', "music_left.wav", "music_right.wav"),
):
cmds.append([
"ffmpeg", "-y",
"-nostats", "-loglevel", "error",
"-i", src,
"-filter_complex",
"[0:0]pan=1|c0=c0[left]; [0:0]pan=1|c0=c1[right]",
"-map", "[left]", out1,
"-map", "[right]", out2,
])
cmds.append([
"ffmpeg", "-y",
"-nostats", "-loglevel", "error",
"-i", "music_left.wav",
"-i", "music_right.wav",
"-i", "music_silence.wav",
"-i", "music_silence.wav",
"-i", "music_silence.wav",
"-i", "music_silence.wav",
"-filter_complex", "[0:a][1:a][2:a][3:a][4:a][5:a]amerge=inputs=6[a]",
"-map", "[a]",
"-ar", "48000",
"-c:a", "aac", "render/music-5.1.mp4"
])
for cmd in cmds:
print(" ".join([str(x) for x in cmd]))
subprocess.call(cmd)
for name in (
"music.kdenlive",
"music.wav",
"music_left.wav",
"music_right.wav",
"music_silence.wav",
):
if os.path.exists(name):
os.unlink(name)
def render_forest():
# Stereo Mix, playing on 5.1 rear left/right
@ -32,11 +116,14 @@ def render_forest():
with open(path, 'w') as fd:
fd.write(project.to_xml())
cmds = []
cmds.append([
os.chdir(root)
cmd = [
"melt", "forest.kdenlive", '-quiet', '-consumer', 'avformat:forest.wav'
])
]
print(" ".join([str(x) for x in cmd]))
subprocess.call(cmd)
info = ox.avinfo('forest.wav')
cmds = []
cmds.append([
"ffmpeg", "-y",
"-nostats", "-loglevel", "error",
@ -61,10 +148,10 @@ def render_forest():
cmds.append([
"ffmpeg", "-y",
"-nostats", "-loglevel", "error",
"-i", "silence.wav",
"-i", "silence.wav",
"-i", "silence.wav",
"-i", "silence.wav",
"-i", "forest_silence.wav",
"-i", "forest_silence.wav",
"-i", "forest_silence.wav",
"-i", "forest_silence.wav",
"-i", "forest_left.wav",
"-i", "forest_right.wav",
"-filter_complex", "[0:a][1:a][2:a][3:a][4:a][5:a]amerge=inputs=6[a]",
@ -72,15 +159,15 @@ def render_forest():
"-ar", "48000",
"-c:a", "aac", "render/forest-5.1.mp4"
])
os.chdir(root)
for cmd in cmds:
print(" ".join([str(x) for x in cmd]))
subprocess.call(cmd)
for name in (
"forest.kdenlive",
"forest.wav",
"forest_left.wav",
"forest_right.wav",
"silence.wav",
"forest_silence.wav",
):
if os.path.exists(name):
os.unlink(name)