diff --git a/generate.py b/generate.py index 2ed5c83..9774c40 100644 --- a/generate.py +++ b/generate.py @@ -492,6 +492,38 @@ def process_frame(item, prompt, character=None, position=0, seed=None): img.update_find() return img +def replace_background(image, background, prompt=None, seed=None): + model = "seedream-4-5-251128" + position = 0 + if prompt is None: + prompt = "Place the character from image 2 into image 1" + if isinstance(background, Item): + background_url = public_frame_url(background, position) + else: + background_url = public_document_url(background) + images = [ + background_url, + public_document_url(image), + ] + data = { + "model": model, + "prompt": prompt, + "image": images, + "size": "2560x1440", + } + if seed: + data["seed"] = seed + url = bytedance_image_generation(data) + img = add_ai_image(image, position, url) + img.refresh_from_db() + img.data["model"] = model + img.data["prompt"] = prompt + img.data["source"] = image.get_id() + img.save() + img.update_sort() + img.update_find() + return img + def get_character_document(character, type="Single Character", age=None): if character in ("P1", "P2", "P3", "P4", "P5"): title = type + " " + character @@ -504,7 +536,7 @@ def get_character_document(character, type="Single Character", age=None): REPLACE_CHARACTER_PROMPT = "Replace the foreground character in image 1 with the character in image 2, keep the posture, clothing, background, light, atmosphere from image 1, but take the facial features and personality from image 2. Make sure the size of the character is adjusted since the new character is a child and make sure the size of the head matches the body. The quality of the image should be the same between foreground and background, adjust the quality of the character to match the background. Use the style of image 1 for the character: if image 1 is a photo make the character a real person, if image 1 is a drawing make the character a drawn character, if image 1 is a comic use a comic character and so on" """ -REPLACE_CHARACTER_PROMPT = "Replace the foreground character in image 1 with the character in image 2, keep the posture, clothing, background, light, atmosphere from image 1, but take the facial features and personality from image 2. Make sure the size of the character is adjusted since the new character is a child and make sure the size of the head matches the body. The quality of the image should be the same between foreground and background, adjust the quality of the character to match the background. Use the style of image 1 for the character: if image 1 is a photo make the character a real person, if image 1 is a drawing make the character a drawn character, if image 1 is a comic use a comic character, restore any blured out regions of the image" +REPLACE_CHARACTER_PROMPT = "Replace the foreground character in image 1 with the character in image 2, keep the posture, clothing, background, light, atmosphere from image 1, but take the facial features and personality from image 2. Make sure the size of the character is adjusted since the new character is a child and make sure the size of the head matches the body. The quality of the image should be the same between foreground and background, adjust the quality of the character to match the background. Use the style of image 1 for the character: if image 1 is a photo make the character a real person, if image 1 is a drawing make the character a drawn character, if image 1 is a comic use a comic character, restore any blurred out regions of the image" def fal_replace_character(item, character, position=0): @@ -540,8 +572,9 @@ def fal_replace_character(item, character, position=0): return img -def replace_character(item, character, position=0, seed=None, extra=None, age=None): - prompt = REPLACE_CHARACTER_PROMPT +def replace_character(item, character, position=0, seed=None, extra=None, age=None, prompt=None): + if prompt is None: + prompt = REPLACE_CHARACTER_PROMPT if age: prompt = prompt.replace("child", "person") elif character == "P5": @@ -707,16 +740,32 @@ def kling_v2v_edit(item, background, keep=False): return ai def wan_reference_to_video(foreground, background, keep=False): - foreground_url = public_video_url(foreground) - background_url = public_video_url(background) prompt = "Use the character from @Video1 and use @Video2 as background" + prompt = "Character1 dances in the foreground, Character2 as background" model = "wan/v2.6/reference-to-video" - prompt_hash = hashlib.sha1((prompt + foreground_url + background_url).encode()).hexdigest() + + foreground_url = public_video_url(foreground) + #background_url = public_video_url(background) + src = background.files.all()[0].data.path item = background - output = "/srv/pandora/static/power/cache/%s_%s/ai.mp4" % ( + prompt_hash = hashlib.sha1((prompt + foreground_url + src).encode()).hexdigest() + prefix = "/srv/pandora/static/power/cache/%s_%s" % ( item.public_id, prompt_hash, ) + os.makedirs(prefix, exist_ok=True) + frames = int(foreground.sort.duration * 24) + dst = prefix + "/background.mp4" + trim_video(src, dst, frames) + if not os.path.exists(dst): + raise Exception + + fg = prefix + "/foreground.mp4" + shutil.copy(foreground.files.all()[0].data.path, fg) + foreground_url = public_url(fg) + background_url = public_url(dst) + item = background + output = prefix + "/ai.mp4" for d in [5, 10]: if d > item.sort.duration: break @@ -730,7 +779,8 @@ def wan_reference_to_video(foreground, background, keep=False): ], "aspect_ratio": "16:9", "resolution": "720p", - "enable_prompt_expansion": False, + "duration": str(duration), + "enable_prompt_expansion": True, "multi_shots": True, "enable_safety_checker": False } @@ -846,12 +896,12 @@ def ltx_a2v(item, character, prompt=None, first_frame=None, keep=False, expand_p return ai -def vo2video(vo, item, character, position=0, prompt=None, expand_prompt=False): - first_frame = replace_character(item, charcater, position) +def vo2video(vo, item, character, position=0, prompt=None, age=None, expand_prompt=False): + first_frame = replace_character(item, character, position, age=age) if prompt is None: # the painting becomes animated and the girl looks into the camera and speaks - prompt = "the scene and character become animated, the character looks into the camera and speaks" - return ltx_a2v(audio, character=character, prompt=prompt, first_frame=first_frame, expand_prompt=expand_prompt) + prompt = "the scene and person become animated, the person looks into the camera and speaks" + return ltx_a2v(vo, character=character, prompt=prompt, first_frame=first_frame, expand_prompt=expand_prompt) def ltx_v2v(item, character, prompt=None, keep=False): @@ -901,16 +951,25 @@ def ltx_v2v(item, character, prompt=None, keep=False): shutil.rmtree(prefix) return ai -def replace_character_motion_control(item, character, keep=False): +def replace_character_motion_control(item, character, first_frame=None, background=None, keep=False): if isinstance(item, str): item = Item.objects.get(public_id=item) - # FIXME get character from documents - if isinstance(character, str): - img = replace_character(item, character, 0) + add = [] + if first_frame: + img = first_frame + image_url = public_document_url(first_frame) else: - img = character - image_url = public_document_url(img) + # FIXME get character from documents + if isinstance(character, str): + img = replace_character(item, character, 0) + else: + img = character + if background: + add.append(img) + img = replace_background(img, background) + image_url = public_document_url(img) + video_url = public_video_url(item) prompt = "" model = "fal-ai/kling-video/v2.6/pro/motion-control" @@ -942,6 +1001,8 @@ def replace_character_motion_control(item, character, keep=False): if not keep: shutil.rmtree(os.path.dirname(output)) img.add(ai) + for img_ in add: + img_.add(ai) return ai @@ -1045,7 +1106,7 @@ def reshoot_item(item, extra_prompt=None, first_frame=None, keep=False, prompt=N return ai -def reshoot_item_segments(item, character, keep=False): +def reshoot_item_segments(item, character, age=None, keep=False): if isinstance(item, str): item = Item.objects.get(public_id=item) max_duration = 12 @@ -1092,7 +1153,7 @@ def reshoot_item_segments(item, character, keep=False): segment_character = character if segment_character: segment_first_frame = replace_character( - item, segment_character, position, seed=seed + item, segment_character, position, seed=seed, age=age ) segment_first_frame_url = public_document_url(segment_first_frame) else: @@ -1510,7 +1571,7 @@ def add_ai_image(item, position, url, extension=None): extension = "jpg" file = Document(user=item.user) file.rightslevel = 2 - file.data["title"] = "%s at %s" % (item.get("title"), position) + file.data["title"] = "%s at %s" % (item.data["title"], position) file.data["position"] = position file.extension = extension file.width = -1 @@ -1527,7 +1588,8 @@ def add_ai_image(item, position, url, extension=None): file.save() file.update_sort() file.update_find() - file.add(item) + if isinstance(item, Item): + file.add(item) return file def add_tag(item, tag): @@ -1556,7 +1618,8 @@ def process_reshoot_firstframe(character='P1', age=None, l=None): if 'ai-failed' in item.data.get('tags', []): print('>> skip', item) continue - if item.sort.duration > 30: + if item.sort.duration > 12: + print("only up to 12 second for single shot") pass #reshoot_item_segments(item, character) else: diff --git a/render.py b/render.py index 7c9d2c9..3275cb0 100644 --- a/render.py +++ b/render.py @@ -560,7 +560,10 @@ def render_all(options): cmd += ['vn=1'] else: cmd += ['an=1'] - #cmd += ['vcodec=libx264', 'x264opts=keyint=1', 'crf=15'] + if options.get("use_qsv"): + cmd += ['vcodec=h264_qsv', 'pix_fmt=nv12', 'rc=icq', 'global_quality=17'] + elif options.get("only_keyframes"): + cmd += ['vcodec=libx264', 'x264opts=keyint=1', 'crf=15'] subprocess.call(cmd) if ext == '.wav' and timeline.endswith('audio.kdenlive'): cmd = [ @@ -689,7 +692,10 @@ def render_all(options): cmd += ['vn=1'] else: cmd += ['an=1'] - cmd += ['vcodec=libx264', 'x264opts=keyint=1', 'crf=15'] + if options.get("use_qsv"): + cmd += ['vcodec=h264_qsv', 'pix_fmt=nv12', 'rc=icq', 'global_quality=17'] + elif options.get("only_keyframes"): + cmd += ['vcodec=libx264', 'x264opts=keyint=1', 'crf=15'] cmds.append(cmd) for src, out1, out2 in ( ("audio-front.wav", "fl.wav", "fr.wav"), @@ -1196,8 +1202,15 @@ def unused_tags(): def fragment_statistics(): import itemlist.models - import item.models + from item.models import Item stats = {} + duration = {} + ai_duration = {} + prefix = default_prefix + + with open(os.path.join(prefix, "clips.json")) as fd: + clips = json.load(fd) + for l in itemlist.models.List.objects.filter(status='featured').order_by('name'): if l.name.split(' ')[0].isdigit(): fragment_id = l.name.split(' ')[0] @@ -1224,11 +1237,14 @@ def fragment_statistics(): elif con.get('key') == "tags" and con['operator'] == '!==': fragment['anti-tags'].append(con['value'].lower().strip()) + if fragment_id not in stats: stats[fragment_id] = {} for tag in fragment['tags']: stats[fragment_id][tag] = 0 + duration[fragment_id] = ai_duration[fragment_id] = 0 + for item in l.get_items(l.user).all(): item_tags = [t.lower().strip() for t in item.get('tags')] if set(item_tags) & set(fragment['anti-tags']): @@ -1236,9 +1252,16 @@ def fragment_statistics(): for tag in set(fragment['tags']): if tag in item_tags: stats[fragment_id][tag] += 1 + duration[fragment_id] += item.sort.duration + for ai in Item.objects.filter(data__title=item.data['title']).filter(data__type__icontains='ai:'): + ai_duration[fragment_id] += ai.sort.duration with open("/srv/pandora/static/power/fragments.txt", "w") as fd: for fragment, data in stats.items(): - fd.write("%s\n" % fragment) + fd.write("%s (%s source material, %s ai material)\n" % ( + fragment, + ox.format_duration(1000*duration[fragment], 1, milliseconds=False), + ox.format_duration(1000*ai_duration[fragment], 1, milliseconds=False)) + ) for tag in sorted(data): fd.write(" %s: %s\n" % (tag, data[tag])) return stats