a/v render. gen fixes

2026-01-24 13:26:30 +01:00 · 2026-01-24 13:26:30 +01:00 · d62d4c5746
commit d62d4c5746
parent f6fbb9ab81
6 changed files with 706 additions and 268 deletions
--- a/config.jsonc
+++ b/config.jsonc
@ -1256,7 +1256,7 @@ examples (config.SITENAME.jsonc) that are part of this pan.do/ra distribution.
            "entitiesType": "",
            "filters": [
                {"id": "type", "sort": [{"key": "items", "operator": "-"}]},
-                {"id": "chapter", "sort": [{"key": "name", "operator": "-"}]},
+                {"id": "chapter", "sort": [{"key": "name", "operator": "+"}]},
                {"id": "tags", "sort": [{"key": "items", "operator": "-"}]},
                {"id": "keywords", "sort": [{"key": "items", "operator": "-"}]},
                {"id": "year", "sort": [{"key": "items", "operator": "-"}]}
--- a/generate.py
+++ b/generate.py
@ -3,6 +3,7 @@ import hashlib
 import math
 import os
 import time
+import shutil

 import cv2
 import ox
@ -29,6 +30,7 @@ headers = {
 def public_url(path):
    return path.replace("/srv/pandora/static/", settings.PUBLIC_URL + "static/")

+
 def public_document_url(document):
    url = "%sdocuments/%s/source.%s?token=%s" % (
        settings.PUBLIC_URL,
@ -38,6 +40,7 @@ def public_document_url(document):
    )
    return url

+
 def public_video_url(item):
    url = "%s%s/download/source/?token=%s" % (
        settings.PUBLIC_URL,
@ -47,6 +50,16 @@ def public_video_url(item):
    return url


+def public_frame_url(item, position):
+    image = "%s%s/source%s.png?token=%s" % (
+        settings.PUBLIC_URL,
+        item.public_id,
+        position,
+        settings.PUBLIC_TOKEN,
+    )
+    return image
+
+
 def trim_video(src, dst, frames, start0=False):
    cap = cv2.VideoCapture(src)
    fps = cap.get(cv2.CAP_PROP_FPS)
@ -93,6 +106,8 @@ def bytedance_task(data):
    for key, value in defaults.items():
        if key not in data:
            data[key] = value
+    if data["model"] in EP:
+        data["model"] = EP[data["model"]]
    print(data)
    r = requests.post(url, headers=headers, json=data).json()
    print(r)
@ -112,6 +127,8 @@ def bytedance_response(data):
        if key not in data:
            data[key] = value
    print(data)
+    if data["model"] in EP:
+        data["model"] = EP[data["model"]]
    response = requests.post(url, headers=headers, json=data).json()
    print(response)
    return response
@ -134,7 +151,8 @@ def t2v_bytedance(prompt, duration, output):
        )
    return status

-def i2v_bytedance(first_frame, prompt, duration, output, last_frame=None):
+
+def i2v_bytedance(first_frame, prompt, duration, output, last_frame=None, seed=None):
    nduration = max(4, int(math.ceil(duration)))
    data = {
        "duration": nduration,
@ -151,11 +169,15 @@ def i2v_bytedance(first_frame, prompt, duration, output, last_frame=None):
        ],
    }
    if last_frame:
-        data["content"].append({
+        data["content"].append(
+            {
                "type": "image_url",
                "role": "last_frame",
                "image_url": {"url": last_frame},
-        })
+            }
+        )
+    if seed:
+        data["seed"] = seed
    status = bytedance_task(data)
    output_url = status["content"]["video_url"]
    ox.net.save_url(output_url, output, overwrite=True)
@ -167,6 +189,7 @@ def i2v_bytedance(first_frame, prompt, duration, output, last_frame=None):
        )
    return status

+
 def first_last(first_frame, last_frame, prompt, duration, output):
    nduration = max(4, int(math.ceil(duration)))
    data = {
@ -260,17 +283,16 @@ def remake_video(item_id, prompt):
    prompt_hash = hashlib.sha1(prompt.encode()).hexdigest()
    position = n = 0
    processed = []
+    prefix = "/srv/pandora/static/power/cache/%s_%s" % (item.public_id, prompt_hash)
    for segment in segments:
        if isinstance(segment, list):
            stype, segment = segment
        else:
            stype = "n"
        duration = segment - position
+
        if stype == "c":
-            first_frame_path = (
-                "/srv/pandora/static/power/cache/%s_%s/%06d.mp4.last_frame.png"
-                % (item.public_id, prompt_hash, n - 1)
-            )
+            first_frame_path = "%s/%06d.mp4.last_frame.png" % (prefix, n - 1)
            first_frame = public_url(first_frame_path)
        else:
            first_frame = "%s%s/source%s.png?token=%s" % (
@ -286,18 +308,10 @@ def remake_video(item_id, prompt):
            last_frame_position,
            settings.PUBLIC_TOKEN,
        )
-        output = "/srv/pandora/static/power/cache/%s_%s/%06d.mp4" % (
-            item.public_id,
-            prompt_hash,
-            n,
-        )
+        output = "%s/%06d.mp4" % (prefix, n)
        if not os.path.exists(output):
            first_last(first_frame, last_frame, prompt, duration, output)
-        trimmed = "/srv/pandora/static/power/cache/%s_%s/%06d_trimmed.mp4" % (
-            item.public_id,
-            prompt_hash,
-            n,
-        )
+        trimmed = "%s/%06d_trimmed.mp4" % (prefix, n)
        frames = int(duration * 24)
        if not os.path.exists(trimmed):
            trim_video(output, trimmed, frames, stype == "c")
@ -305,45 +319,57 @@ def remake_video(item_id, prompt):
        position = segment
        n += 1

-    joined_output = "/srv/pandora/static/power/cache/%s_%s.mp4" % (
-        item.public_id,
-        prompt_hash,
-    )
+    joined_output = "%s/joined.mp4" % (prefix,)
    join_segments(processed, joined_output)
    return joined_output


-def prepare_image(image, prompt, out=None):
+EP = {
+    "seedream-4-5-251128": "ep-20260122071519-pbf7l",
+    "seed-1-8-251228": "ep-20260122071243-8qfrk",
+    "seedance-1-5-pro-251215": "ep-20260122071613-blmsd",
+}
+
+def bytedance_image_generation(data):
    model = "seedream-4-5-251128"
-    if not image.startswith("http:"):
-        image = public_url(image)
-    data = {
+    url = "https://ark.ap-southeast.bytepluses.com/api/v3/images/generations"
+    defaults = {
        "model": model,
-        "prompt": prompt,
-        "image": image,
        "size": "2560x1440",
        "watermark": False,
    }
-    url = "https://ark.ap-southeast.bytepluses.com/api/v3/images/generations"
+    for key in defaults:
+        if key not in data:
+            data[key] = defaults[key]
+    if data["model"] in EP:
+        data["model"] = EP[data["model"]]
    print("prepare_image", data)
-    r = requests.post(url, headers=headers, json=data).json()
-    print(r)
-    output_url = r["data"][0]["url"]
+    response = requests.post(url, headers=headers, json=data).json()
+    print(response)
+    return response["data"][0]["url"]
+
+
+def prepare_image(image, prompt, out=None):
+    if not image.startswith("http:"):
+        image = public_url(image)
+    data = {
+        "prompt": prompt,
+        "image": image,
+        "size": "2560x1440",
+    }
+    output_url = bytedance_image_generation(data)
    if out is None:
        out = image + ".ai.png"
    ox.net.save_url(output_url, out, overwrite=True)
    return r

-def process_frame(item, prompt, character=None, position=0):
+def process_frame(item, prompt, character=None, position=0, seed=None):
    model = "seedream-4-5-251128"
    if isinstance(item, str):
        item = Item.objects.get(public_id=item)
-    image = "%s%s/source%s.png?token=%s" % (
-        settings.PUBLIC_URL,
-        item.public_id,
-        position,
-        settings.PUBLIC_TOKEN,
-    )
+    if isinstance(character, Document):
+        character = public_document_url(character)
+    image = public_frame_url(item, position)
    if character is not None:
        image = [image, character]
    data = {
@ -351,34 +377,159 @@ def process_frame(item, prompt, character=None, position=0):
        "prompt": prompt,
        "image": image,
        "size": "2560x1440",
-        "watermark": False,
    }
-    url = "https://ark.ap-southeast.bytepluses.com/api/v3/images/generations"
-    print("prepare_image", data)
-    response = requests.post(url, headers=headers, json=data).json()
-    print(response)
-    url = response["data"][0]["url"]
+    if seed:
+        data["seed"] = seed
+    url = bytedance_image_generation(data)
    img = add_ai_image(item, position, url)
    img.refresh_from_db()
-    img.data['model'] = model
-    img.data['prompt'] = prompt
-    img.data['source'] = item.public_id
+    img.data["model"] = model
+    img.data["prompt"] = prompt
+    img.data["source"] = item.public_id
    if character:
-        img.data['source'] += ' ' + character.split('?')[0]
+        img.data["source"] += " " + character.split("?")[0]
    print(img, img.data)
    img.save()
    img.update_sort()
    img.update_find()
    return img

-def replace_character(item, character, position=0):
-    prompt = "Replace the foreground character in image 1 with the character in image 2, keep the posture, clothing, background, light, atmosphere from image 1, but take the facial features and personality from image 2. Make sure the size of the character is adjusted since the new character is a child and make sure the size of the head matches the body. The quality of the image should be the same between foreground and background, adjust the quality of the character to match the background. Use the style of image 1 for the character: if image 1 is a photo make the character a real person, if image 1 is a drawing make the character a drawn character, if image 1 is a comic use a comic character and so on"
-    prompt = "Replace the foreground character in image 1 with the character in image 2, keep the posture, clothing, background, light, atmosphere from image 1, but take the facial features and personality from image 2. Make sure the size of the character is adjusted since the new character is a child and make sure the size of the head matches the body. The quality of the image should be the same between foreground and background, adjust the quality of the character to match the background. Use the style of image 1 for the character: if image 1 is a photo make the character a real person, if image 1 is a drawing make the character a drawn character, if image 1 is a comic use a comic character"
+
+"""
+REPLACE_CHARACTER_PROMPT = "Replace the foreground character in image 1 with the character in image 2, keep the posture, clothing, background, light, atmosphere from image 1, but take the facial features and personality from image 2. Make sure the size of the character is adjusted since the new character is a child and make sure the size of the head matches the body. The quality of the image should be the same between foreground and background, adjust the quality of the character to match the background. Use the style of image 1 for the character: if image 1 is a photo make the character a real person, if image 1 is a drawing make the character a drawn character, if image 1 is a comic use a comic character and so on"
+"""
+
+REPLACE_CHARACTER_PROMPT = "Replace the foreground character in image 1 with the character in image 2, keep the posture, clothing, background, light, atmosphere from image 1, but take the facial features and personality from image 2. Make sure the size of the character is adjusted since the new character is a child and make sure the size of the head matches the body. The quality of the image should be the same between foreground and background, adjust the quality of the character to match the background. Use the style of image 1 for the character: if image 1 is a photo make the character a real person, if image 1 is a drawing make the character a drawn character, if image 1 is a comic use a comic character"
+
+
+def fal_replace_character(item, character, position=0):
+    if isinstance(item, str):
+        item = Item.objects.get(public_id=item)
+    prompt = REPLACE_CHARACTER_PROMPT.replace("image 1", "@image 1").replace(
+        "image 2", "@image 2"
+    )
    if character == "P5":
-        prompt = prompt.replace('child', 'teenager')
+        prompt = prompt.replace("child", "teenager")
    if character in ("P1", "P2", "P3", "P4", "P5"):
-        character = public_document_url(Document.objects.get(data__title="Character " + character))
-    return process_frame(item, prompt, character, position)
+        character = Document.objects.get(data__title="Character " + character)
+    if isinstance(character, Document):
+        character = public_document_url(character)
+    image = public_frame_url(item, position)
+    image = [image, character]
+    url = flux_edit_image(image, prompt)
+    img = add_ai_image(item, position, url)
+    img.refresh_from_db()
+    img.data["model"] = "flux-2-pro"
+    img.data["prompt"] = prompt
+    img.data["source"] = item.public_id
+    img.data["source"] += " " + character.split("?")[0]
+    print(img, img.data)
+    img.save()
+    img.update_sort()
+    img.update_find()
+    return img
+
+
+def replace_character(item, character, position=0, seed=None):
+    prompt = REPLACE_CHARACTER_PROMPT
+    if character == "P5":
+        prompt = prompt.replace("child", "teenager")
+    if character in ("P1", "P2", "P3", "P4", "P5"):
+        character = public_document_url(
+            Document.objects.get(data__title="Character " + character)
+        )
+    return process_frame(item, prompt, character, position, seed=seed)
+
+
+def kling_lipsync(audio_item, video_item):
+    video_url = public_video_url(video_item)
+    audio_url = public_video_url(audio_item)
+    model = "fal-ai/kling-video/lipsync/audio-to-video"
+    data = {
+        "video_url": video_url,
+        "audio_url": audio_url
+    }
+    print(data)
+    handler = fal_client.submit(model, arguments=data)
+    request_id = handler.request_id
+    print(request_id)
+    result = fal_wait_for(model, request_id)
+    print(result)
+    output_url = result["video"]["url"]
+    ox.net.save_url(output_url, output, overwrite=True)
+    ai = add_ai_variant(item, output, "ai:lipsync")
+    ai.data["model"] = model
+    ai.save()
+    if not keep:
+        shutil.rmtree(os.path.dirname(output))
+    img.add(ai)
+    return ai
+
+def kling_v2v_reference(item, character, keep=False):
+    # https://fal.ai/models/fal-ai/kling-video/o1/video-to-video/reference/api
+    if isinstance(item, str):
+        item = Item.objects.get(public_id=item)
+
+    if character in ("P1", "P2", "P3", "P4", "P5"):
+        character = public_document_url(
+            Document.objects.get(data__title="Character " + character)
+        )
+    video_url = public_video_url(item)
+    prompt = "Replace the main character in @Video1 with the character from the reference images, adjust the style of the character to match the style of the video"
+    model = "fal-ai/kling-video/o1/video-to-video/reference"
+    prompt_hash = hashlib.sha1((prompt + character).encode()).hexdigest()
+    output = "/srv/pandora/static/power/cache/%s_%s/ai.mp4" % (
+        item.public_id,
+        prompt_hash,
+    )
+    for d in [3, 4, 5, 6, 7, 8, 9, 10]:
+        if d > item.sort.duration:
+            break
+    duration = d
+
+    data = {
+        "prompt": prompt,
+        "image_url": image_url,
+        "video_url": video_url,
+        "keep_original_sound": False,
+        "character_orientation": "video",
+    }
+    data = {
+      "prompt": prompt,
+      "keep_audio": False,
+      "aspect_ratio": "16:9",
+      "video_url": video_url,
+      "image_urls": [image_url],
+      "duration": str(duration)
+    }
+    '''
+    data["elements"] = [
+        {
+        "reference_image_urls": [
+            image_url,
+            image_url,
+        ],
+        "frontal_image_url": image_url
+        }
+    ]
+    '''
+    print(data)
+    handler = fal_client.submit(model, arguments=data)
+    request_id = handler.request_id
+    print(request_id)
+    result = fal_wait_for(model, request_id)
+    print(result)
+    output_url = result["video"]["url"]
+    ox.net.save_url(output_url, output, overwrite=True)
+    ai = add_ai_variant(item, output, "ai:v2v-replace")
+    ai.data["prompt"] = ox.escape_html(prompt)
+    ai.data["firstframe"] = image_url.split("?")[0]
+    ai.data["model"] = model
+    ai.save()
+    if not keep:
+        shutil.rmtree(os.path.dirname(output))
+    img.add(ai)
+    return ai

 def replace_character_motion_control(item, character, keep=False):
    if isinstance(item, str):
@ -394,7 +545,10 @@ def replace_character_motion_control(item, character, keep=False):
    prompt = ""
    model = "fal-ai/kling-video/v2.6/pro/motion-control"
    prompt_hash = hashlib.sha1((prompt + image_url).encode()).hexdigest()
-    output = "/srv/pandora/static/power/cache/%s_%s/ai.mp4" % (item.public_id, prompt_hash)
+    output = "/srv/pandora/static/power/cache/%s_%s/ai.mp4" % (
+        item.public_id,
+        prompt_hash,
+    )
    data = {
        "prompt": prompt,
        "image_url": image_url,
@ -412,7 +566,7 @@ def replace_character_motion_control(item, character, keep=False):
    ox.net.save_url(output_url, output, overwrite=True)
    ai = add_ai_variant(item, output, "ai:replace:p1:motion-control")
    ai.data["prompt"] = ox.escape_html(prompt)
-    ai.data['firstframe'] = image_url.split('?')[0]
+    ai.data["firstframe"] = image_url.split("?")[0]
    ai.data["model"] = model
    ai.save()
    if not keep:
@ -420,6 +574,7 @@ def replace_character_motion_control(item, character, keep=False):
    img.add(ai)
    return ai

+
 def describe_video(url, neutral=False):
    if neutral:
        prompt = (
@ -453,15 +608,19 @@ def describe_video(url, neutral=False):
    response = bytedance_response(data)
    return response["output"][1]["content"][0]["text"]

+
 def describe_item(item, neutral=False):
    if isinstance(item, str):
        item = Item.objects.get(public_id=item)
    video_url = public_video_url(item)
    return describe_video(video_url, neutral)

+
 def reshoot_item(item, extra_prompt=None, first_frame=None, keep=False):
    if isinstance(item, str):
        item = Item.objects.get(public_id=item)
+    if isinstance(first_frame, Document):
+        first_frame = public_document_url(first_frame)
    duration = item.sort.duration
    frames = int(duration * 24)
    prompt = describe_item(item, first_frame is not None)
@ -491,7 +650,7 @@ def reshoot_item(item, extra_prompt=None, first_frame=None, keep=False):
    ai.data["model"] = status["model"]
    ai.data["seed"] = status["seed"]
    if first_frame:
-        ai.data["firstframe"] = first_frame.split('?')[0]
+        ai.data["firstframe"] = first_frame.split("?")[0]
        if isinstance(first_frame, Document):
            first_frame.add(ai)
    ai.save()
@ -500,6 +659,75 @@ def reshoot_item(item, extra_prompt=None, first_frame=None, keep=False):
    return ai


+def reshoot_item_segments(item, character, keep=False):
+    if isinstance(item, str):
+        item = Item.objects.get(public_id=item)
+    max_duration = 12
+    source = item.files.all()[0].data.path
+    info = ox.avinfo(source)
+    duration = info["duration"]
+    if duration < max_duration:
+        segments = [duration]
+    else:
+        segments = get_item_segments(item, max_duration=max_duration)
+    print(segments)
+    prompt_hash = hashlib.sha1("reshoot_segment".encode()).hexdigest()
+    processed = []
+    prefix = "/srv/pandora/static/power/cache/%s_%s" % (item.public_id, prompt_hash)
+    video_segments = fragment_video(source, prefix, segments)
+    prompts = []
+    first_frames = []
+    seed = None
+    n = 0
+    position = 0
+    for segment in segments:
+        if isinstance(segment, list):
+            stype, segment = segment
+        else:
+            stype = "n"
+        output = "%s/%06d.mp4" % (prefix, n)
+        output_ai = "%s/%06d_ai.mp4" % (prefix, n)
+        segment_duration = segment - position
+        if os.path.exists(output):
+            segment_video_url = public_url(output)
+            prompt = describe_video(segment_video_url, neutral=True)
+            prompts.append("Segment %s: " % (n + 1) + prompt)
+            segment_character = character
+            if position:
+                segment_character = segment_first_frame_url
+            segment_first_frame = replace_character(
+                item, segment_character, position, seed=seed
+            )
+            segment_first_frame_url = public_document_url(segment_first_frame)
+            status = i2v_bytedance(
+                segment_first_frame_url, prompt, segment_duration, output_ai, seed=seed
+            )
+            seeed = status["seed"]
+            trimmed = "%s/%06d_ai_trimmed.mp4" % (prefix, n)
+            frames = int(segment_duration * 24)
+            trim_video(output_ai, trimmed, frames, stype == "c")
+            processed.append(trimmed)
+            first_frames.append(segment_first_frame)
+        n += 1
+        position = segment
+    joined_output = "%s/joined.mp4" % (prefix)
+    join_segments(processed, joined_output)
+    ai = add_ai_variant(item, joined_output, "ai:0:reshoot-firstframe")
+    prompt = "\n\n".join(prompts)
+    ai.data["prompt"] = ox.escape_html(prompt)
+    ai.data["firstframe"] = " ".join([ox.toAZ(ff.id) for ff in first_frames])
+    ai.data["model"] = status["model"]
+    ai.data["seed"] = seed
+    ai.save()
+    if not keep:
+        shutil.rmtree(os.path.dirname(joined_output))
+    for first_frame in first_frames:
+        first_frame.add(ai)
+    if not keep:
+        shutil.rmtree(prefix)
+    return ai
+
+
 def describe_image(url):
    system_prompt = ""
    system_prompt = "You are an image analyst describing different aspects of an image. You are focused on the form, composition, and task shown in the image."
@ -527,6 +755,7 @@ def transform_remake_video(item_id, image_prompt, video_prompt):
    prompt_hash = hashlib.sha1((image_prompt + video_prompt).encode()).hexdigest()
    position = n = 0
    processed = []
+    prefix = "/srv/pandora/static/power/cache/%s_%s" % (item.public_id, prompt_hash)
    for segment in segments:
        if isinstance(segment, list):
            stype, segment = segment
@ -534,10 +763,7 @@ def transform_remake_video(item_id, image_prompt, video_prompt):
            stype = "n"
        duration = segment - position
        if stype == "c":
-            first_frame_path = (
-                "/srv/pandora/static/power/cache/%s_%s/%06d.mp4.last_frame.png"
-                % (item.public_id, prompt_hash, n - 1)
-            )
+            first_frame_path = "%s/%06d.mp4.last_frame.png" % (prefix, n - 1)
            first_frame = public_url(first_frame_path)
        else:
            first_frame = "%s%s/source%s.png?token=%s" % (
@ -546,10 +772,7 @@ def transform_remake_video(item_id, image_prompt, video_prompt):
                position,
                settings.PUBLIC_TOKEN,
            )
-            first_frame_path = (
-                "/srv/pandora/static/power/cache/%s_%s/%06d.first_frame.png"
-                % (item.public_id, prompt_hash, n)
-            )
+            first_frame_path = "%s/%06d.first_frame.png" % (prefix, n)
            if not os.path.exists(first_frame_path):
                prepare_image(first_frame, image_prompt, first_frame_path)
            first_frame = public_url(first_frame_path)
@ -560,26 +783,15 @@ def transform_remake_video(item_id, image_prompt, video_prompt):
            last_frame_position,
            settings.PUBLIC_TOKEN,
        )
-        last_frame_path = (
-            "/srv/pandora/static/power/cache/%s_%s/%06d.last_frame.png"
-            % (item.public_id, prompt_hash, n)
-        )
+        last_frame_path = "%s/%06d.last_frame.png" % (prefix, n)
        if not os.path.exists(last_frame_path):
            prepare_image(last_frame, image_prompt, last_frame_path)
        last_frame = public_url(last_frame_path)

-        output = "/srv/pandora/static/power/cache/%s_%s/%06d.mp4" % (
-            item.public_id,
-            prompt_hash,
-            n,
-        )
+        output = "%s/%06d.mp4" % (prefix, n)
        if not os.path.exists(output):
            first_last(first_frame, last_frame, video_prompt, duration, output)
-        trimmed = "/srv/pandora/static/power/cache/%s_%s/%06d_trimmed.mp4" % (
-            item.public_id,
-            prompt_hash,
-            n,
-        )
+        trimmed = "%s/%06d_trimmed.mp4" % (prefix, n)
        frames = int(duration * 24)
        if not os.path.exists(trimmed):
            trim_video(output, trimmed, frames, stype == "c")
@ -587,10 +799,7 @@ def transform_remake_video(item_id, image_prompt, video_prompt):
        position = segment
        n += 1

-    joined_output = "/srv/pandora/static/power/cache/%s_%s.mp4" % (
-        item.public_id,
-        prompt_hash,
-    )
+    joined_output = "%s/joined.mp4" % (prefix,)
    join_segments(processed, joined_output)
    return joined_output

@ -625,6 +834,7 @@ def restyle_video(item_id, prompt):
    ox.net.save_url(output_url, output_path, overwrite=True)
    return output_path

+
 def fal_wait_for(model, request_id):
    status = fal_client.status(model, request_id, with_logs=True)
    while isinstance(status, fal_client.InProgress):
@ -633,6 +843,7 @@ def fal_wait_for(model, request_id):
    result = fal_client.result(model, request_id)
    return result

+
 def motion_control_preprocess_image(item_id, image_prompt, video_prompt):
    item = Item.objects.get(public_id=item_id)
    video_url = public_video_url(item)
@ -686,7 +897,7 @@ def luma_wait_for(id):
    return status


-def luma_modify_segment(video_url, prompt, first_frame=None, mode='flex_2'):
+def luma_modify_segment(video_url, prompt, first_frame=None, mode="flex_2"):
    # also got that at fal-ai/luma-dream-machine/ray-2/modify
    url = "https://api.lumalabs.ai/dream-machine/v1/generations/video/modify"
    payload = {
@ -791,6 +1002,7 @@ def in_the_style_of_fal(image, style):
    prompt = "apply style from @image 2 to @image 1 keep the position of the person in @image 1 but take light, colors, clothing from @image 2"
    return flux_edit_image([image, style], prompt)

+
 def in_the_style_of_byte(image, style):
    prompt = "apply style from image 2 to image 1 keep the position of the person in image 1 but take light, colors, clothing from image 2"
    image_model_name = "seedream-4-5-251128"
@ -813,7 +1025,7 @@ def in_the_style_of_byte(image, style):


 def luma_modify_item(item, prompt="", image_prompt=None, first_frame=None, keep=False):
-    mode = 'flex_2'
+    mode = "flex_2"
    if isinstance(item, str):
        item = Item.objects.get(public_id=item)
    source = item.files.all()[0].data.path
@ -839,36 +1051,22 @@ def luma_modify_item(item, prompt="", image_prompt=None, first_frame=None, keep=
            stype, segment = segment
        else:
            stype = "n"
-        output = "/srv/pandora/static/power/cache/%s_%s/%06d.mp4" % (
-            item.public_id,
-            prompt_hash,
-            n,
-        )
-        output_ai = "/srv/pandora/static/power/cache/%s_%s/%06d_ai.mp4" % (
-            item.public_id,
-            prompt_hash,
-            n,
-        )
+        output = "%s/%06d.mp4" % (prefix, n)
+        output_ai = "%s/%06d_ai.mp4" % (prefix, n)
        if os.path.exists(output):
            video_url = luma_modify_segment(
-                public_url(output),
-                prompt,
-                first_frame=first_frame_url,
-                mode=mode
+                public_url(output), prompt, first_frame=first_frame_url, mode=mode
            )
            ox.net.save_url(video_url, output_ai, overwrite=True)
            processed.append(output_ai)
        n += 1
-    joined_output = "/srv/pandora/static/power/cache/%s_%s/joined.mp4" % (
-        item.public_id,
-        prompt_hash,
-    )
+    joined_output = "%s/joined.mp4" % (prefix,)
    join_segments(processed, joined_output)
    ai = add_ai_variant(item, joined_output, "ai:replace:p1:luma")
    ai.data["prompt"] = ox.escape_html(prompt)
    if first_frame:
-        ai.data['firstframe'] = first_frame_url.split('?')[0]
-    ai.data["model"] = 'ray-2:%s' % mode
+        ai.data["firstframe"] = first_frame_url.split("?")[0]
+    ai.data["model"] = "ray-2:%s" % mode
    ai.save()
    if not keep:
        shutil.rmtree(os.path.dirname(joined_output))
@ -885,6 +1083,7 @@ def add_ai_variant(item, video_path, type):
    ai.user = item.user
    ai.data["type"] = [type]
    ai.data["title"] = item.data["title"]
+    ai.data["chapter"] = item.data.get("chapter", "")
    ai.save()
    file = File()
    file.oshash = ox.oshash(video_path)
@ -904,20 +1103,22 @@ def add_ai_variant(item, video_path, type):
    file.extract_stream()
    return ai

+
 def add_ai_image(item, position, url, extension=None):
    if extension is None:
-        extension = url.split('.')[-1].split('?')[0]
-        if extension == 'jpeg': extension = 'jpg'
+        extension = url.split(".")[-1].split("?")[0]
+        if extension == "jpeg":
+            extension = "jpg"
    file = Document(user=item.user)
-    file.data['title'] = '%s at %s' % (item.get('title'), position)
-    file.data['position'] = position
+    file.data["title"] = "%s at %s" % (item.get("title"), position)
+    file.data["position"] = position
    file.extension = extension
    file.width = -1
    file.pages = -1
    file.uploading = True
    file.save()
    file.uploading = True
-    name = 'data.%s' % file.extension
+    name = "data.%s" % file.extension
    file.file.name = file.path(name)
    ox.net.save_url(url, file.file.path, overwrite=True)
    file.get_info()
@ -927,3 +1128,56 @@ def add_ai_image(item, position, url, extension=None):
    file.update_sort()
    file.add(item)
    return file
+
+def add_tag(item, tag):
+    if 'tags' not in item.data:
+        item.data['tags'] = []
+    item.data['tags'].append(tag)
+    item.save()
+
+def process_motion_firstframe():
+    l = itemlist.models.List.objects.get(name='Motion-Firstframe')
+    for i in l.items.all():
+            ai = Item.objects.filter(data__type__icontains='ai').filter(data__title=i.data['title'])
+            if ai.exists() or 'ai-failed' in i.data.get('tags', []):
+                print('>> skip', i)
+                continue
+            print(i)
+            try:
+                reshoot_item_segments(i, 'P1', keep=True)
+            except:
+                add_tag(i, 'ai-failed')
+                print('>> failed', i)
+
+
+def extract_firstframe(character='P1'):
+    for item in Item.objects.filter(data__type__icontains="source"):
+        if 'ai-failed' in item.data.get('tags', []):
+            continue
+        if not item.documents.all().exists():
+            print(item)
+            try:
+                first_frame = replace_character(item, character, 0)
+            except:
+                add_tag(item, 'ai-failed')
+
+def process_reshoot_firstframe():
+    l = itemlist.models.List.objects.get(name='Reshoot-Firstframe')
+    for i in l.items.all():
+        if i.sort.duration > 30: continue
+        if i.public_id == 'HZ': continue
+        if i.documents.all().count():
+            ai = Item.objects.filter(data__type__icontains='ai').filter(data__title=i.data['title'])
+            if ai.exists() or 'ai-failed' in i.data.get('tags', []):
+                print('>> skip', i)
+                continue
+            first_frame = i.documents.all().order_by('-created').first()
+            if not first_frame:
+                first_frame = replace_character(i, 'P1', 0)
+            print(i, first_frame, i.documents.all().count())
+            try:
+                replace_character_motion_control(i, first_frame)
+            except:
+                add_tag(i, 'ai-failed')
+                print('>> failed', i)
+
--- a/management/commands/render.py
+++ b/management/commands/render.py
@ -16,7 +16,7 @@ class Command(BaseCommand):
        parser.add_argument('--duration', action='store', dest='duration', default="3600", help='target duration of all fragments in seconds')
        parser.add_argument('--offset', action='store', dest='offset', default="1024", help='inital offset in pi')
        parser.add_argument('--no-video', action='store_true', dest='no_video', default=False, help='don\'t render video')
-        parser.add_argument('--chapter', action='store', dest='chapter', default=None, help='chapter')
+        parser.add_argument('--fragment', action='store', dest='fragment', default=None, help='fragment')
        parser.add_argument('--single-file', action='store_true', dest='single_file', default=False, help='render to single video')
        parser.add_argument('--keep-audio', action='store_true', dest='keep_audio', default=False, help='keep independent audio tracks')
        parser.add_argument('--stereo-downmix', action='store_true', dest='stereo_downmix', default=False, help='stereo downmix')
--- a/management/commands/render_sound.py
+++ b/management/commands/render_sound.py
@ -0,0 +1,18 @@
+import json
+import os
+import subprocess
+
+from django.core.management.base import BaseCommand
+from django.conf import settings
+
+from ...render_sound import render_all
+
+
+class Command(BaseCommand):
+    help = 'genrate kdenlive porject and render'
+
+    def add_arguments(self, parser):
+        parser.add_argument('--debug', action='store_true', dest='debug', default=False, help='output more info')
+
+    def handle(self, **options):
+        render_all(options)
--- a/render.py
+++ b/render.py
@ -20,7 +20,36 @@ from .render_utils import *
 default_prefix = "/srv/p_for_power"


-def compose(clips, target=150, base=1024, voice_over=None, options=None):
+def get_loudnorm(file):
+    if "loudnorm" in file.info:
+        return file.info["loudnorm"]
+    source = file.data.path
+    cmd = [
+        "ffmpeg",
+        "-i", source,
+        "-vn",
+        "-af", "loudnorm=print_format=json",
+        "-f", "null",
+        "-"
+    ]
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    json_match = re.search(r"\{[\s\S]*\}", result.stderr)
+    if not json_match:
+        raise RuntimeError("Could not find loudnorm JSON output in ffmpeg output")
+    loudnorm_data = json.loads(json_match.group(0))
+
+    input_i = float(loudnorm_data.get("input_i", 0))            # Integrated loudness
+    input_lra = float(loudnorm_data.get("input_lra", 0))        # Loudness range
+    input_tp = float(loudnorm_data.get("input_tp", 0))          # True peak
+    input_thresh = float(loudnorm_data.get("input_thresh", 0))  # Threshold
+    loudnorm = f"L: {input_i:.6f}\tR: {input_lra:.6f}\tP {input_tp:.6f}"
+    file.info["loudnorm"] = loudnorm
+    file.save()
+    return loudnorm
+
+
+
+def compose(clips, fragment, target=150, base=1024, voice_over=None, options=None):
    if options is None:
        options = {}
    fps = 24
@ -50,40 +79,175 @@ def compose(clips, target=150, base=1024, voice_over=None, options=None):
    seq = random(10000 + base * 1000)
    used = []

+    selected_clips_length = 0
+    ai_length = 0 
+    selected_clips = []
+
+    tags = []
+    while selected_clips_length < target:
+        if not tags:
+            tags = fragment["tags"].copy()
+        tag = random_choice(seq, tags, pop=True)
+
+        non_ai_clips = []
+        ai_clips = []
+        for clip in clips:
+            if tag in clip["tags"]:
+                if 'ai' in clip:
+                    ai_clips.append(clip)
+                else:
+                    non_ai_clips.append(clip)
+        if ai_length < target * 0.6 and ai_clips:
+            clip = random_choice(seq, ai_clips, pop=True)
+            clip["use_ai"] = True
+            selected_clips.append(clip)
+            selected_clips_length += clip['duration']
+            ai_length += clip['duration']
+            clips = [c for c in clips if c['id'] != clip['id']]
+            continue
+
+        available_clips = non_ai_clips + ai_clips
+        if available_clips:
+            clip = random_choice(seq, available_clips, pop=True)
+            clip["use_ai"] = False
+            selected_clips.append(clip)
+            selected_clips_length += clip['duration']
+            clips = [c for c in clips if c['id'] != clip['id']]
+
+    clips = selected_clips
+    clip = None
+    while target - length > 0 and clips:
+        '''
+        if clip:
+            if chance(seq, 0.5):
+                next_seqid = clip['seqid'] + 1
+                clip = get_clip_by_seqid(clips, next_seqid)
+            else:
+                clip = None
+        '''
+        clip = None
+        if not clip:
+            # FIXME: while not all clips have AI versions make sure we have one 50% of the time
+            clip = random_choice(seq, clips, True)
+        next_length = length + clip['duration'] 
+        if target - next_length < -target*0.1:
+            break
+        length += int(clip['duration'] * fps) / fps
+
+        # 50/50 source or ai
+        src = clip['source']
+        audio = clip['source']
+        # select ai if we have one
+        if 'ai' in clip:
+            if clip["use_ai"]:
+                src = random_choice(seq, list(clip['ai'].values()), False)
+
+        print('%07.3f-%07.3f %07.3f %s (%s)' % (
+            length-clip['duration'],
+            length,
+            clip['duration'],
+            os.path.basename(clip['source']),
+            src.split('/')[-2]
+        ))
+
+        scene['front']['V2'].append({
+            'duration': clip['duration'],
+            'src': src,
+            "filter": {
+            }
+        })
+
+        volume_front = '-2.5'
+        volume_rear = '-8.5'
+        if clip.get('volume') is not None:
+            volume_front = '%0.2f' % (float(volume_front) + clip['volume'])
+            volume_rear = '%0.2f' % (float(volume_rear) + clip['volume'])
+
+        '''
+            'dynamic_loudness': [
+                ["target_loudness", "-35"],
+                ["min_gain", "-15"],
+                ["max_gin", "15"],
+            ],
+        '''
+
+        audio_filter = {
+            'mono': [
+                ["channels", "2"],
+            ],
+            'loudness': [
+                ["program", "-17"],
+                ["results", clip["loudnorm"]],
+            ],
+            'volume': volume_front,
+            'fadein': '00:00:00.125'
+        }
+        scene['audio-front']['A2'].append({
+            'duration': clip['duration'],
+            'src': audio,
+            'filter': audio_filter.copy()
+        })
+        '''
+        audio_filter['volume'] = volume_rear
+        scene['audio-rear']['A2'].append({
+            'duration': clip['duration'],
+            'src': audio,
+            'filter': audio_filter.copy()
+        })
+        '''
+        used.append(clip)
+        if not clips and target - length > 0:
+            print("not enough clips, need to reset")
+            used_ids = {c['id'] for c in used}
+            clips = [c for c in all_clips if c != clip and c['id'] not in used_ids]
+            if not clips:
+                print("not enough clips, also consider used")
+                clips = [c for c in all_clips if c != clip]
+            if not clips:
+                print("not enough clips, also consider last clip")
+                clips = all_clips.copy()
+
+    scene_duration = int(get_scene_duration(scene) * fps)
    voice_overs = []
    sub_offset = 0
    vo_min = 0
+    subs = []
+    print("--")
+    print("Voice Over:")
    if voice_over:
        vo_keys = list(sorted(voice_over))
-        if chance(seq, 0.5):
-            vo_key = vo_keys[random_int(seq, len(vo_keys))]
-            voice_overs.append(voice_over[vo_key])
-        elif len(vo_keys) >= 2:
-            vo1 = vo_keys.pop(random_int(seq, len(vo_keys)))
-            vo2 = vo_keys.pop(random_int(seq, len(vo_keys)))
-            voice_overs.append(voice_over[vo1])
-            if voice_over[vo1]["duration"] + voice_over[vo2]["duration"] < target:
-                print("adding second vo")
-                voice_overs.append(voice_over[vo2])
-        print("vo:", [x['src'] for x in voice_overs], list(sorted(voice_over)))
-        vo_min = sum([vo['duration'] for vo in voice_overs])
-        sub_offset = 0
-        if vo_min > target:
-            target = vo_min
-        elif vo_min < target:
-            offset = format_duration((target - vo_min) / 2, fps)
+        while int(sub_offset * fps) < scene_duration:
+            if sub_offset:
+                gap = (5 * fps + random_int(seq, 10 * fps)) / fps
+            else:
+                gap = (2 * fps + random_int(seq, 5 * fps)) / fps
+            if int((sub_offset + gap)* fps) > scene_duration:
+                gap = format_duration((scene_duration - int(sub_offset * fps)) / fps, fps)
            scene['audio-center']['A1'].append({
                'blank': True,
-                'duration': offset
+                'duration': gap
            })
            scene['audio-rear']['A1'].append({
                'blank': True,
-                'duration': offset
+                'duration': gap
            })
-            vo_min += offset
-            sub_offset = offset
-        subs = []
-        for vo in voice_overs:
+            print('%07.3f-%07.3f %07.3f' % (sub_offset, sub_offset+gap, gap), 'silence')
+            sub_offset += gap
+
+            vo_key = random_choice(seq, vo_keys, pop=True)
+            variant = random_int(seq, len(voice_over[vo_key]))
+            vo = voice_over[vo_key][variant]
+            while int((vo['duration'] + sub_offset) * fps) > scene_duration:
+                if not vo_keys:
+                    vo = None
+                    break
+                vo_key = random_choice(seq, vo_keys, pop=True)
+                variant = random_int(seq, len(voice_over[vo_key]))
+                vo = voice_over[vo_key][variant]
+            if vo is None:
+                break
+            print('%07.3f-%07.3f %07.3f' % (sub_offset, sub_offset+vo["duration"], vo["duration"]), vo["src"].split('/')[-1])
+            voice_overs.append(vo)
            voc = vo.copy()
            a, b = '-11', '-3'
            if 'Whispered' in voc['src']:
@ -122,118 +286,20 @@ def compose(clips, target=150, base=1024, voice_over=None, options=None):
        if subs:
            scene["subtitles"] = subs

-    selected_clips_length = 0
-    selected_clips = []
-    non_ai_clips = []
-    for clip in clips:
-        if 'ai' in clip:
-            selected_clips.append(clip)
-            selected_clips_length += clip['duration']
-        else:
-            non_ai_clips.append(clip)
-
-    while selected_clips_length < target and non_ai_clips:
-        clip = random_choice(seq, non_ai_clips, pop=True)
-        selected_clips.append(clip)
-        selected_clips_length += clip['duration']
-
-    clips = selected_clips
-    clip = None
-    while target - length > 0 and clips:
-        '''
-        if clip:
-            if chance(seq, 0.5):
-                next_seqid = clip['seqid'] + 1
-                clip = get_clip_by_seqid(clips, next_seqid)
-            else:
-                clip = None
-        '''
-        clip = None
-        if not clip:
-            # FIXME: while not all clips have AI versions make sure we have one 50% of the time
-            clip = random_choice(seq, clips, True)
-        if not clips:
-            print("not enough clips, need to reset")
-            clips = [c for c in all_clips if c != clip and c not in used]
-            if not clips:
-                print("not enough clips, also consider used")
-                clips = [c for c in all_clips if c != clip]
-            if not clips:
-                print("not enough clips, also consider last clip")
-                clips = all_clips.copy()
-        if length + clip['duration'] > target and length >= vo_min:
-            break
-        length += int(clip['duration'] * fps) / fps
-
-        # 50/50 source or ai
-        src = clip['source']
-        audio = clip['source']
-        # select ai if we have one
-        if 'ai' in clip:
-            if True or chance(seq, 0.5):
-                src = random_choice(seq, list(clip['ai'].values()), False)
-
-        print('%07.3f %07.3f' % (length, clip['duration']), src.split('/')[-2], os.path.basename(clip['source']))
-
-        scene['front']['V2'].append({
-            'duration': clip['duration'],
-            'src': src,
-            "filter": {
-            }
-        })
-
-        volume_front = '-2.5'
-        volume_rear = '-8.5'
-        if clip.get('volume') is not None:
-            volume_front = '%0.2f' % (float(volume_front) + clip['volume'])
-            volume_rear = '%0.2f' % (float(volume_rear) + clip['volume'])
-
-        audio_filter = {
-            'mono': [
-                ["channels", "2"],
-            ],
-            'dynamic_loudness': [
-                ["target_loudness", "-35"],
-                ["min_gain", "-15"],
-                ["max_gin", "15"],
-            ],
-            'volume': volume_front,
-            'fadein': '00:00:00.125'
-        }
-        scene['audio-front']['A2'].append({
-            'duration': clip['duration'],
-            'src': audio,
-            'filter': audio_filter.copy()
-        })
-        '''
-        audio_filter['volume'] = volume_rear
-        scene['audio-rear']['A2'].append({
-            'duration': clip['duration'],
-            'src': audio,
-            'filter': audio_filter.copy()
-        })
-        '''
-        used.append(clip)
-    print("scene duration %0.3f (target: %0.3f, vo_min: %0.3f)" % (length, target, vo_min))
-    scene_duration = int(get_scene_duration(scene) * fps)
    sub_offset = int(sub_offset * fps)
    if sub_offset < scene_duration:
-        delta = format_duration((scene_duration - sub_offset) / fps, fps)
-        print(">> add %0.3f of silence.. %0.3f (scene_duration)" % (delta, scene_duration / fps))
+        gap = format_duration((scene_duration - sub_offset) / fps, fps)
+        print('%07.3f-%07.3f %07.3f' % (sub_offset, sub_offset+gap, gap), 'silence')
        scene['audio-center']['A1'].append({
            'blank': True,
-            'duration': delta
+            'duration': gap
        })
        scene['audio-rear']['A1'].append({
            'blank': True,
-            'duration': delta
+            'duration': gap
        })
-    elif sub_offset > scene_duration:
-        delta = format_duration((scene_duration - sub_offset) / fps, fps)
-        scene['audio-center']['A1'][-1]["duration"] += delta
-        scene['audio-rear']['A1'][-1]["duration"] += delta
-        print("WTF, needed to cut %s new duration: %s" % (delta, scene['audio-center']['A1'][-1]["duration"]))
-        print(scene['audio-center']['A1'][-1])
+        sub_offset += gap
+    print("scene duration %0.3f (target: %0.3f)" % (length, target))
    return scene, used

 def write_subtitles(data, folder, options):
@ -312,7 +378,9 @@ def get_fragments(clips, voice_over, prefix):

    for l in itemlist.models.List.objects.filter(status='featured').order_by('name'):
        if l.name.split(' ')[0].isdigit():
+            fragment_id = l.name.split(' ')[0]
            fragment = {
+                'id': fragment_id,
                'name': l.name,
                'tags': [],
                'anti-tags': [],
@ -344,7 +412,7 @@ def get_fragments(clips, voice_over, prefix):
                        print("FIXME", i)
                        continue
                    type_ = i.data['type'][0].lower()
-                    target = os.path.join(prefix, type_, i.data['title'] + ext)
+                    target = os.path.join(prefix, 'video', type_, i.data['title'] + ext)
                    sources.append(target)
            fragment['clips'] = []
            for clip in clips:
@ -353,7 +421,7 @@ def get_fragments(clips, voice_over, prefix):
                source = clip['source']
                if source in sources:
                    fragment['clips'].append(clip)
-            fragment["voice_over"] = voice_over.get(str(fragment["id"]), {})
+            fragment["voice_over"] = voice_over.get(fragment_id, {})
            fragments.append(fragment)
    fragments.sort(key=lambda f: ox.sort_string(f['name']))
    return fragments
@ -386,21 +454,23 @@ def render_all(options):
    for fragment in fragments:
        fragment_base += 1
        fragment_id = int(fragment['name'].split(' ')[0])
-        if options["chapter"] and int(options["chapter"]) != fragment_id:
+        if options["fragment"] and int(options["fragment"]) != fragment_id:
            continue
        name = fragment['name'].replace(' ', '_')
-        if fragment_id < 10:
-            name = '0' + name
        if not fragment['clips']:
            print("skipping empty fragment", name)
            continue
        fragment_prefix = os.path.join(base_prefix, name)
        os.makedirs(fragment_prefix, exist_ok=True)
        fragment_clips = fragment['clips']
-        unused_fragment_clips = [c for c in fragment_clips if c not in clips_used]
+        used_ids = {c['id'] for c in clips_used}
+        unused_fragment_clips = [c for c in fragment_clips if c['id'] not in clips_used]
        print('fragment clips', len(fragment_clips), 'unused', len(unused_fragment_clips))
+        print('--')
+        print('Video:')
        scene, used = compose(
            unused_fragment_clips,
+            fragment=fragment,
            target=target,
            base=fragment_base,
            voice_over=fragment['voice_over'],
@ -414,7 +484,7 @@ def render_all(options):
            src = src[0]['src']
            stats[src.split('/')[-2]] += 1
        else:
-            print("!! fixme, chapter without VO")
+            print("!! fixme, fragment without VO")

        position += scene_duration
        target_position += fragment_target
@ -833,13 +903,14 @@ def generate_clips(options):
                source = e.files.filter(selected=True)[0].data.path
                ext = os.path.splitext(source)[1]
                type_ = e.data['type'][0].lower()
-                target = os.path.join(prefix, type_, i.data['title'] + ext)
+                target = os.path.join(prefix, 'video', type_, i.data['title'] + ext)
                os.makedirs(os.path.dirname(target), exist_ok=True)
                if os.path.islink(target):
                    os.unlink(target)
                os.symlink(source, target)
                if type_ == "source":
                    source_target = target
+                    clip['loudnorm'] = get_loudnorm(e.files.filter(selected=True)[0])
                if type_.startswith('ai:'):
                    if 'ai' not in clip:
                        clip['ai'] = {}
@ -888,17 +959,20 @@ def generate_clips(options):

    print("using", len(clips), "clips")

-    voice_over = defaultdict(dict)
+    voice_over = {}
    for vo in item.models.Item.objects.filter(
        data__type__icontains="voice over",
    ):
        title = vo.get('title')
-        fragment_id = int(title.split('_')[2].replace('gen', ''))
+        parts = title.split('-')
+
+        fragment = '%02d' % int(parts[0].replace('ch', ''))
+        type = parts[1]
+        variant = '-'.join(parts[2:4])
        source = vo.files.filter(selected=True)[0]
-        #batch = vo.get('batch')[0].replace('Text-', '')
-        batch = title.split('_')[3]
        src = source.data.path
-        target = os.path.join(prefix, 'voice_over', batch, '%s.wav' % fragment_id)
+        ext = src.split('.')[-1]
+        target = os.path.join(prefix, 'voice_over', fragment, '%s-%s.%s' % (type, variant, ext))
        os.makedirs(os.path.dirname(target), exist_ok=True)
        if os.path.islink(target):
            os.unlink(target)
@ -909,10 +983,15 @@ def generate_clips(options):
        ).exclude(value="").order_by("start"):
            sdata = get_srt(sub, 0, lang, tlang)
            subs.append(sdata)
-        voice_over[fragment_id][batch] = {
+        if fragment not in voice_over:
+            voice_over[fragment] = {}
+        if type not in voice_over[fragment]:
+            voice_over[fragment][type] = []
+        voice_over[fragment][type].append({
+            "variant": variant,
            "src": target,
            "duration": format_duration(source.duration, 24),
            "subs": subs
-        }
+        })
    with open(os.path.join(prefix, 'voice_over.json'), 'w') as fd:
        json.dump(voice_over, fd, indent=2, ensure_ascii=False)
--- a/render_sound.py
+++ b/render_sound.py
@ -8,9 +8,93 @@ import item.models

 from .render_kdenlive import KDEnliveProject, _CACHE
 from .render import default_prefix as root
+from .render import load_defaults
+
+def render_all(options):
+    if os.path.exists(os.path.join(root, "render/forest-5.1.mp4"):
+        print("forest-5.1.mp4 exists, skipping")
+    else:
+        render_forest()
+    if os.path.exists(os.path.join(root, "render/music-5.1.mp4"):
+        print("music-5.1.mp4 exists, skipping")
+    else:
+        render_music()

 def render_music():
-    pass
+    # Stereo Mix, playing on 5.1 front left/right
+    project = KDEnliveProject(root)
+    qs = item.models.Item.objects.filter(
+        data__type__icontains='music'
+    ).order_by('sort__title')
+
+    for clip in qs:
+        src = clip.files.all()[0].data.path
+        project.append_clip('A1', {
+            "src": src,
+            "duration": clip.sort.duration,
+            "filter": {
+            },
+        })
+
+    path = os.path.join(root, "music.kdenlive")
+    with open(path, 'w') as fd:
+        fd.write(project.to_xml())
+
+    os.chdir(root)
+    cmd = [
+        "melt", "music.kdenlive", '-quiet', '-consumer', 'avformat:music.wav'
+    ]
+    print(" ".join([str(x) for x in cmd]))
+    subprocess.call(cmd)
+    info = ox.avinfo('music.wav')
+    cmds = []
+    cmds.append([
+        "ffmpeg", "-y",
+        "-nostats", "-loglevel", "error",
+        "-f", "lavfi", "-i", "anullsrc=r=48000:cl=mono",
+        "-t", str(info["duration"]),
+        "music_silence.wav"
+    ])
+
+    for src, out1, out2 in (
+        ('music.wav', "music_left.wav", "music_right.wav"),
+    ):
+        cmds.append([
+            "ffmpeg", "-y",
+            "-nostats", "-loglevel", "error",
+            "-i", src,
+            "-filter_complex",
+            "[0:0]pan=1|c0=c0[left]; [0:0]pan=1|c0=c1[right]",
+            "-map", "[left]", out1,
+            "-map", "[right]", out2,
+        ])
+
+    cmds.append([
+        "ffmpeg", "-y",
+        "-nostats", "-loglevel", "error",
+        "-i", "music_left.wav",
+        "-i", "music_right.wav",
+        "-i", "music_silence.wav",
+        "-i", "music_silence.wav",
+        "-i", "music_silence.wav",
+        "-i", "music_silence.wav",
+        "-filter_complex", "[0:a][1:a][2:a][3:a][4:a][5:a]amerge=inputs=6[a]",
+        "-map", "[a]",
+        "-ar", "48000",
+        "-c:a", "aac", "render/music-5.1.mp4"
+    ])
+    for cmd in cmds:
+        print(" ".join([str(x) for x in cmd]))
+        subprocess.call(cmd)
+    for name in (
+        "music.kdenlive",
+        "music.wav",
+        "music_left.wav",
+        "music_right.wav",
+        "music_silence.wav",
+    ):
+        if os.path.exists(name):
+            os.unlink(name)

 def render_forest():
    # Stereo Mix, playing on 5.1 rear left/right
@ -32,11 +116,14 @@ def render_forest():
    with open(path, 'w') as fd:
        fd.write(project.to_xml())

-    cmds = []
-    cmds.append([
+    os.chdir(root)
+    cmd = [
        "melt", "forest.kdenlive", '-quiet', '-consumer', 'avformat:forest.wav'
-    ])
+    ]
+    print(" ".join([str(x) for x in cmd]))
+    subprocess.call(cmd)
    info = ox.avinfo('forest.wav')
+    cmds = []
    cmds.append([
        "ffmpeg", "-y",
        "-nostats", "-loglevel", "error",
@ -61,10 +148,10 @@ def render_forest():
    cmds.append([
        "ffmpeg", "-y",
        "-nostats", "-loglevel", "error",
-        "-i", "silence.wav",
-        "-i", "silence.wav",
-        "-i", "silence.wav",
-        "-i", "silence.wav",
+        "-i", "forest_silence.wav",
+        "-i", "forest_silence.wav",
+        "-i", "forest_silence.wav",
+        "-i", "forest_silence.wav",
        "-i", "forest_left.wav",
        "-i", "forest_right.wav",
        "-filter_complex", "[0:a][1:a][2:a][3:a][4:a][5:a]amerge=inputs=6[a]",
@ -72,15 +159,15 @@ def render_forest():
        "-ar", "48000",
        "-c:a", "aac", "render/forest-5.1.mp4"
    ])
-    os.chdir(root)
    for cmd in cmds:
        print(" ".join([str(x) for x in cmd]))
        subprocess.call(cmd)
    for name in (
+        "forest.kdenlive",
        "forest.wav",
        "forest_left.wav",
        "forest_right.wav",
-        "silence.wav",
+        "forest_silence.wav",
    ):
        if os.path.exists(name):
            os.unlink(name)