diff --git a/generate.py b/generate.py index 5c47fa3..0e2cce9 100644 --- a/generate.py +++ b/generate.py @@ -420,15 +420,25 @@ def replace_character_motion_control(item, character, keep=False): img.add(ai) return ai -def describe_video_neutral(url): - prompt = ( - "Detect cuts or scene changes and describe each scene, use as much details as you can. " - "Describe each person incudling detalied apreance, haircut in a gender neutral way, " - "describe each objects, animal or plant, describe foreground and backgroud, " - "describe from what angle the scene is filmed, incude details about camera model, lense, depth of field used to film this scene. " - "Use the format: . CAMERA CUT TO . CAMERA CUT TO . " - "Don't mention it if you don't find a cut." - ) +def describe_video(url, neutral=False): + if neutral: + prompt = ( + "Detect cuts or scene changes and describe each scene, use as much details as you can. " + "Describe each person incudling detalied apreance, haircut in a gender neutral way, " + "describe each objects, animal or plant, describe foreground and backgroud, " + "describe from what angle the scene is filmed, incude details about camera model, lense, depth of field used to film this scene. " + "Use the format: . CAMERA CUT TO . CAMERA CUT TO . " + "Don't mention it if you don't find a cut." + ) + else: + prompt = ( + "Detect cuts or scene changes and describe each scene, use as much details as you can. " + "Describe each person incudling detalied apreance, ethnicity, haircolor, haircut, " + "describe each objects, animal or plant, describe foreground and backgroud, " + "describe from what angle the scene is filmed, incude details about camera model, lense, depth of field used to film this scene. " + "Use the format: . CAMERA CUT TO . CAMERA CUT TO . " + "Don't mention it if you don't find a cut." + ) data = { "input": [ { @@ -443,46 +453,17 @@ def describe_video_neutral(url): response = bytedance_response(data) return response["output"][1]["content"][0]["text"] -def describe_video(url): - prompt = ( - "Detect cuts or scene changes and describe each scene, use as much details as you can. " - "Describe each person incudling detalied apreance, ethnicity, haircolor, haircut, " - "describe each objects, animal or plant, describe foreground and backgroud, " - "describe from what angle the scene is filmed, incude details about camera model, lense, depth of field used to film this scene. " - "Use the format: . CAMERA CUT TO . CAMERA CUT TO . " - "Don't mention it if you don't find a cut." - ) - data = { - "input": [ - { - "role": "user", - "content": [ - {"type": "input_video", "video_url": url, "fps": 1}, - {"type": "input_text", "text": prompt}, - ], - } - ], - } - response = bytedance_response(data) - return response["output"][1]["content"][0]["text"] - - -def describe_item(item): +def describe_item(item, neutral=False): if isinstance(item, str): item = Item.objects.get(public_id=item) - video_url = public_video_url(item) - return describe_video(video_url) - + return describe_video(video_url, neutral) def reshoot_item(item, extra_prompt=None, first_frame=None, keep=False): if isinstance(item, str): item = Item.objects.get(public_id=item) duration = item.sort.duration frames = int(duration * 24) - if first_frame: - prompt = describe_item_neutral(item) - else: - prompt = describe_item(item) + prompt = describe_item(item, first_frame is not None) if extra_prompt: prompt += " " + extra_prompt