diff --git a/generate.py b/generate.py index f359e38..dc0fce2 100644 --- a/generate.py +++ b/generate.py @@ -38,6 +38,14 @@ def public_document_url(document): ) return url +def public_video_url(item): + url = "%s%s/download/source/?token=%s" % ( + settings.PUBLIC_URL, + item.public_id, + settings.PUBLIC_TOKEN, + ) + return url + def trim_video(src, dst, frames, start0=False): cap = cv2.VideoCapture(src) @@ -379,11 +387,7 @@ def replace_character_motion_control(item, character, keep=False): else: img = character image_url = public_document_url(img) - video_url = "%s%s/download/source/?token=%s" % ( - settings.PUBLIC_URL, - item.public_id, - settings.PUBLIC_TOKEN, - ) + video_url = public_video_url(item) prompt = "" model = "fal-ai/kling-video/v2.6/pro/motion-control" prompt_hash = hashlib.sha1((prompt + image_url).encode()).hexdigest() @@ -413,6 +417,28 @@ def replace_character_motion_control(item, character, keep=False): img.add(ai) return ai +def describe_video_neutral(url): + prompt = ( + "Detect cuts or scene changes and describe each scene, use as much details as you can. " + "Describe each person incudling detalied apreance, haircut in a gender neutral way, " + "describe each objects, animal or plant, describe foreground and backgroud, " + "describe from what angle the scene is filmed, incude details about camera model, lense, depth of field used to film this scene. " + "Use the format: . CAMERA CUT TO . CAMERA CUT TO . " + "Don't mention it if you don't find a cut." + ) + data = { + "input": [ + { + "role": "user", + "content": [ + {"type": "input_video", "video_url": url, "fps": 1}, + {"type": "input_text", "text": prompt}, + ], + } + ], + } + response = bytedance_response(data) + return response["output"][1]["content"][0]["text"] def describe_video(url): prompt = ( @@ -441,11 +467,7 @@ def describe_video(url): def describe_item(item): if isinstance(item, str): item = Item.objects.get(public_id=item) - video_url = "%s%s/download/source/?token=%s" % ( - settings.PUBLIC_URL, - item.public_id, - settings.PUBLIC_TOKEN, - ) + video_url = public_video_url(item) return describe_video(video_url) @@ -454,7 +476,11 @@ def reshoot_item(item, extra_prompt=None, first_frame=None, keep=False): item = Item.objects.get(public_id=item) duration = item.sort.duration frames = int(duration * 24) - prompt = describe_item(item) + if first_frame: + prompt = describe_item_neutral(item) + else: + prompt = describe_item(item) + if extra_prompt: prompt += " " + extra_prompt prompt_hash = hashlib.sha1((prompt).encode()).hexdigest() @@ -586,11 +612,7 @@ def transform_remake_video(item_id, image_prompt, video_prompt): def restyle_video(item_id, prompt): item = Item.objects.get(public_id=item_id) - video_url = "%s%s/download/source/?token=%s" % ( - settings.PUBLIC_URL, - item.public_id, - settings.PUBLIC_TOKEN, - ) + video_url = public_video_url(item) model = "decart/lucy-restyle" handler = fal_client.submit( model, @@ -628,11 +650,7 @@ def fal_wait_for(model, request_id): def motion_control_preprocess_image(item_id, image_prompt, video_prompt): item = Item.objects.get(public_id=item_id) - video_url = "%s%s/download/source/?token=%s" % ( - settings.PUBLIC_URL, - item.public_id, - settings.PUBLIC_TOKEN, - ) + video_url = public_video_url(item) model = "fal-ai/kling-video/v2.6/pro/motion-control" prompt_hash = hashlib.sha1((image_prompt + video_prompt).encode()).hexdigest() output = "/srv/pandora/static/power/cache/%s_%s.mp4" % (item.public_id, prompt_hash)