2025-01-24 11:51:00 +05:30
|
|
|
import json
|
2024-07-07 15:32:37 +01:00
|
|
|
import logging
|
|
|
|
import os
|
|
|
|
import shutil
|
|
|
|
import signal
|
|
|
|
import subprocess
|
|
|
|
import tempfile
|
2024-07-07 15:46:41 +01:00
|
|
|
import time
|
2024-07-07 15:32:37 +01:00
|
|
|
|
|
|
|
import ox
|
|
|
|
import ox.iso
|
|
|
|
|
|
|
|
from django.conf import settings
|
|
|
|
|
|
|
|
from annotation import tasks
|
|
|
|
from item import utils
|
2024-07-07 15:46:41 +01:00
|
|
|
from itemlist.models import List
|
|
|
|
from item.models import Item
|
|
|
|
from user.models import User
|
2024-07-07 15:32:37 +01:00
|
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
2025-01-25 11:43:27 +05:30
|
|
|
TARGET_LENGTH = getattr(settings, 'TRANSCRIBE_TARGET_LENGTH', 200)
|
2025-01-24 08:56:51 +05:30
|
|
|
|
|
|
|
def prepare_annotations(result, join_sentences=False):
|
|
|
|
if join_sentences:
|
|
|
|
return prepare_joint_annotations(result)
|
|
|
|
annotations = []
|
2025-01-24 18:00:58 +05:30
|
|
|
for segment in result["segments"]:
|
2025-01-24 08:56:51 +05:30
|
|
|
annotations.append(
|
|
|
|
{
|
|
|
|
"in": segment["start"],
|
2025-01-24 18:00:58 +05:30
|
|
|
"out": segment["end"] + 0.3,
|
|
|
|
"value": segment["text"].strip(),
|
2025-01-24 08:56:51 +05:30
|
|
|
}
|
|
|
|
)
|
|
|
|
return annotations
|
|
|
|
|
|
|
|
|
2025-01-25 11:43:27 +05:30
|
|
|
def prepare_joint_annotations(result, target_length=TARGET_LENGTH):
|
2025-01-24 08:56:51 +05:30
|
|
|
abbrevs = ["Mr.", "Mrs.", "Dr."]
|
|
|
|
ignore = []
|
|
|
|
phrase_sounds = []
|
|
|
|
segments = result["segments"]
|
|
|
|
all_words = []
|
|
|
|
for s in segments:
|
|
|
|
all_words.extend(s["words"])
|
|
|
|
new_segs = []
|
|
|
|
sentence = ""
|
|
|
|
for w in all_words:
|
|
|
|
if not w == all_words[-1]:
|
|
|
|
next_w = all_words[all_words.index(w) + 1]
|
|
|
|
else:
|
|
|
|
# w is last word
|
|
|
|
if sentence == "" and w["text"] in ignore:
|
|
|
|
continue
|
|
|
|
if sentence == "":
|
|
|
|
in_ = w["start"]
|
|
|
|
# 0th word of a sentence
|
|
|
|
if w["text"] in ignore and next_w["text"][0].isupper():
|
|
|
|
continue
|
|
|
|
|
|
|
|
if sentence == "The music " and next_w["text"][0] == "The":
|
|
|
|
sentence = ""
|
|
|
|
continue
|
|
|
|
sentence += w["text"] + " "
|
|
|
|
|
|
|
|
# if this is a short sentence and next word starts less than 1 sec away
|
|
|
|
# and not last word of entire text
|
|
|
|
if (
|
|
|
|
w["text"].endswith(".")
|
|
|
|
and w != all_words[-1]
|
|
|
|
and (next_w["start"] - w["end"]) < 0.8
|
|
|
|
and len(sentence) < target_length
|
|
|
|
and next_w["text"] not in ignore
|
|
|
|
):
|
|
|
|
# then do not end this sentence yet
|
|
|
|
continue
|
|
|
|
if (
|
|
|
|
w["text"].endswith(".") and w["text"] not in abbrevs and len(w["text"]) > 2
|
|
|
|
) or (
|
|
|
|
w["text"] in ignore
|
|
|
|
and sentence.strip() == w["text"]
|
|
|
|
and (w == all_words[-1] or next_w["text"][0].isupper())
|
|
|
|
):
|
|
|
|
# end the sentence, delay end a bit
|
|
|
|
out_ = w["end"] + 0.3
|
|
|
|
sentence_dict = {"in": in_, "out": out_, "value": sentence.strip()}
|
|
|
|
new_segs.append(sentence_dict)
|
|
|
|
sentence = ""
|
|
|
|
annotations = list(filter(lambda i: i["value"].strip() not in ignore, new_segs))
|
|
|
|
return annotations
|
|
|
|
|
|
|
|
|
2025-01-24 18:00:58 +05:30
|
|
|
def run_demucs(src, output):
|
|
|
|
cmd = [
|
|
|
|
"/opt/whisper-timestamped/bin/demucs"
|
|
|
|
"--two-stems", "vocals",
|
|
|
|
"-o", output,
|
|
|
|
src
|
|
|
|
]
|
|
|
|
subprocess.check_call(cmd)
|
|
|
|
wav = glob("%s/htdemucs/*/vocals.wav" % output)[0]
|
|
|
|
return wav
|
2025-01-24 08:56:51 +05:30
|
|
|
|
2025-01-24 18:00:58 +05:30
|
|
|
|
|
|
|
def run_whisper(src, language=None, translate=False, gpu=False, model="small", demucs=False):
|
2025-01-24 09:54:39 +05:30
|
|
|
tmp = tempfile.mkdtemp()
|
|
|
|
|
2025-01-24 18:00:58 +05:30
|
|
|
if demucs:
|
|
|
|
try:
|
|
|
|
src = run_demucs(src, tmp)
|
|
|
|
except:
|
|
|
|
logger.error("failed to run demucs for %s", src)
|
|
|
|
shutil.rmtree(tmp)
|
|
|
|
return None
|
|
|
|
|
|
|
|
output = os.path.join(tmp, "output.json")
|
2025-01-24 08:56:51 +05:30
|
|
|
run_py = os.path.join(os.path.dirname(os.path.abspath(__file__)), "run_whisper.py")
|
|
|
|
cmd = ["/opt/whisper-timestamped/bin/python", run_py]
|
2025-01-24 11:52:29 +05:30
|
|
|
cmd += ["--model", model]
|
2025-01-24 08:56:51 +05:30
|
|
|
if language:
|
|
|
|
cmd += ["--language", language]
|
|
|
|
if translate and language in translate:
|
|
|
|
cmd += ["--translate"]
|
|
|
|
language = "en"
|
|
|
|
|
2025-01-24 09:54:39 +05:30
|
|
|
cmd += [src, output]
|
2025-01-24 08:56:51 +05:30
|
|
|
|
|
|
|
try:
|
2025-01-24 10:18:25 +05:30
|
|
|
subprocess.check_call(cmd)
|
2025-01-24 08:56:51 +05:30
|
|
|
except:
|
2025-01-24 18:00:58 +05:30
|
|
|
logger.error("failed to run: %s", cmd)
|
|
|
|
shutil.rmtree(tmp)
|
|
|
|
return None
|
|
|
|
|
2025-01-24 09:54:39 +05:30
|
|
|
with open(output) as fd:
|
|
|
|
response = json.load(fd)
|
2025-01-24 18:00:58 +05:30
|
|
|
|
|
|
|
#shutil.rmtree(tmp)
|
|
|
|
return response
|
|
|
|
|
|
|
|
def extract_subtitles(item, user, layer, translate, gpu=False, join_sentences=False, model="small"):
|
|
|
|
language = None
|
|
|
|
if "language" not in item.data:
|
|
|
|
language = None
|
|
|
|
else:
|
|
|
|
language = ox.iso.langTo2Code(item.data["language"][0])
|
|
|
|
if not item.streams():
|
|
|
|
logger.error("skip item without media %s: %s", item.public_id)
|
|
|
|
return False
|
|
|
|
src = item.streams()[0].media.path
|
|
|
|
response = run_whisper(src, language, translate, gpu, model)
|
|
|
|
if not response:
|
|
|
|
logger.error("extract failed for %s", item.public_id)
|
|
|
|
return False
|
2025-01-24 08:56:51 +05:30
|
|
|
annotations = prepare_annotations(response, join_sentences=join_sentences)
|
|
|
|
if not annotations:
|
|
|
|
return False
|
|
|
|
|
|
|
|
if language and language != "en":
|
|
|
|
for annotation in annotations:
|
|
|
|
annotation["value"] = '<span lang="%s">%s</span>' % (
|
|
|
|
language,
|
|
|
|
annotation["value"],
|
|
|
|
)
|
|
|
|
|
|
|
|
tasks.add_annotations.delay(
|
|
|
|
{
|
|
|
|
"item": item.public_id,
|
|
|
|
"layer": layer,
|
|
|
|
"user": user.username,
|
|
|
|
"annotations": annotations,
|
|
|
|
}
|
|
|
|
)
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
2025-01-24 11:52:29 +05:30
|
|
|
def extract_subtitles_cmd(item, user, layer, translate, gpu=False, model="small"):
|
2024-07-07 15:32:37 +01:00
|
|
|
if "language" not in item.data:
|
2024-07-08 15:33:03 +01:00
|
|
|
language = None
|
|
|
|
else:
|
|
|
|
language = ox.iso.langTo2Code(item.data["language"][0])
|
2024-07-07 15:32:37 +01:00
|
|
|
if not item.streams():
|
|
|
|
logger.error("skip item without media %s: %s", item.public_id)
|
|
|
|
return False
|
|
|
|
src = item.streams()[0].media.path
|
|
|
|
|
|
|
|
tmp = tempfile.mkdtemp()
|
2025-01-24 11:52:29 +05:30
|
|
|
cmd = ["/opt/whisper-timestamped/bin/whisper_timestamped", "--model", model]
|
2024-07-08 15:33:03 +01:00
|
|
|
if language:
|
2025-01-24 08:56:51 +05:30
|
|
|
cmd += ["--language", language]
|
2024-07-08 15:33:03 +01:00
|
|
|
if translate and language in translate:
|
2025-01-24 08:56:51 +05:30
|
|
|
cmd += ["--task", "translate"]
|
|
|
|
language = "en"
|
2024-07-07 15:32:37 +01:00
|
|
|
if not gpu:
|
|
|
|
cmd += [
|
2025-01-24 08:56:51 +05:30
|
|
|
"--fp16",
|
|
|
|
"False",
|
2024-07-07 15:32:37 +01:00
|
|
|
]
|
|
|
|
|
|
|
|
cmd += [
|
2025-01-24 08:56:51 +05:30
|
|
|
"-f",
|
|
|
|
"srt",
|
2024-07-07 15:32:37 +01:00
|
|
|
"--accurate",
|
2025-01-24 08:56:51 +05:30
|
|
|
"--output_dir",
|
|
|
|
tmp,
|
2024-07-07 15:32:37 +01:00
|
|
|
src,
|
|
|
|
]
|
|
|
|
try:
|
|
|
|
subprocess.check_output(cmd)
|
|
|
|
except:
|
2025-01-24 08:56:51 +05:30
|
|
|
logger.error(
|
|
|
|
"failed to extract subtitles from item %s\n%s", item.public_id, cmd
|
|
|
|
)
|
2024-07-07 15:32:37 +01:00
|
|
|
shutil.rmtree(tmp)
|
|
|
|
return False
|
|
|
|
annotations = []
|
|
|
|
for f in os.listdir(tmp):
|
|
|
|
if f.endswith(".srt") and "words.srt" not in f:
|
|
|
|
srt = os.path.join(tmp, f)
|
|
|
|
annotations = ox.srt.load(srt)
|
|
|
|
if not annotations:
|
|
|
|
logger.error("no subtitles detected %s", item.public_id)
|
|
|
|
return True
|
2024-07-08 15:35:02 +01:00
|
|
|
if language and language != "en":
|
2024-07-07 15:32:37 +01:00
|
|
|
for annotation in annotations:
|
2025-01-24 08:56:51 +05:30
|
|
|
annotation["value"] = '<span lang="%s">%s</span>' % (
|
|
|
|
language,
|
|
|
|
annotation["value"],
|
|
|
|
)
|
|
|
|
|
|
|
|
tasks.add_annotations.delay(
|
|
|
|
{
|
|
|
|
"item": item.public_id,
|
|
|
|
"layer": layer,
|
|
|
|
"user": user.username,
|
|
|
|
"annotations": annotations,
|
|
|
|
}
|
|
|
|
)
|
2024-07-07 15:32:37 +01:00
|
|
|
shutil.rmtree(tmp)
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
def main(**kwargs):
|
2025-01-24 08:56:51 +05:30
|
|
|
user = User.objects.get(username=kwargs["user"])
|
|
|
|
queue = List.objects.get(user=user, name=kwargs["queue"])
|
|
|
|
done = List.objects.get(user=user, name=kwargs["done"])
|
2024-07-07 15:32:37 +01:00
|
|
|
layer = kwargs.get("layer")
|
|
|
|
translate = kwargs.get("translate")
|
|
|
|
if translate:
|
2025-01-24 08:56:51 +05:30
|
|
|
translate = dict([tt.split(":") for tt in translate.split(",")])
|
2024-07-07 15:32:37 +01:00
|
|
|
if not layer:
|
2025-01-24 08:56:51 +05:30
|
|
|
layer = utils.get_by_key(settings.CONFIG["layers"], "isSubtitles", True)
|
2024-07-07 15:32:37 +01:00
|
|
|
if layer:
|
|
|
|
layer = layer["id"]
|
|
|
|
else:
|
|
|
|
logger.error("no layer defined and config has no subtitle layer")
|
|
|
|
return
|
|
|
|
try:
|
|
|
|
while True:
|
|
|
|
wait = True
|
2024-07-07 15:46:41 +01:00
|
|
|
for item in queue.get_items(queue.user).all():
|
2025-01-24 08:56:51 +05:30
|
|
|
if extract_subtitles(
|
|
|
|
item,
|
|
|
|
user,
|
|
|
|
layer,
|
|
|
|
translate,
|
|
|
|
kwargs.get("gpu"),
|
|
|
|
join_sentences=kwargs.get("join_sentences"),
|
2025-01-24 12:10:35 +05:30
|
|
|
model=kwargs.get("model", "small")
|
2025-01-24 08:56:51 +05:30
|
|
|
):
|
2024-07-08 15:43:23 +01:00
|
|
|
done.items.add(item)
|
2024-07-07 15:32:37 +01:00
|
|
|
queue.items.remove(item)
|
|
|
|
wait = False
|
|
|
|
if wait:
|
2025-01-24 08:56:51 +05:30
|
|
|
time.sleep(5 * 60)
|
2024-07-07 15:32:37 +01:00
|
|
|
except KeyboardInterrupt:
|
|
|
|
pass
|