cleanups, add demucs option

This commit is contained in:
j 2025-01-24 18:00:58 +05:30
commit cb7514d988

View file

@ -26,12 +26,12 @@ def prepare_annotations(result, join_sentences=False):
if join_sentences: if join_sentences:
return prepare_joint_annotations(result) return prepare_joint_annotations(result)
annotations = [] annotations = []
for segments in result["segments"]: for segment in result["segments"]:
annotations.append( annotations.append(
{ {
"in": segment["start"], "in": segment["start"],
"end": segment["end"] + 0.3, "out": segment["end"] + 0.3,
"value": segment["text"], "value": segment["text"].strip(),
} }
) )
return annotations return annotations
@ -92,20 +92,30 @@ def prepare_joint_annotations(result, target_length=200):
return annotations return annotations
def extract_subtitles(item, user, layer, translate, gpu=False, join_sentences=False, model="small"): def run_demucs(src, output):
language = None cmd = [
if "language" not in item.data: "/opt/whisper-timestamped/bin/demucs"
language = None "--two-stems", "vocals",
else: "-o", output,
language = ox.iso.langTo2Code(item.data["language"][0]) src
if not item.streams(): ]
logger.error("skip item without media %s: %s", item.public_id) subprocess.check_call(cmd)
return False wav = glob("%s/htdemucs/*/vocals.wav" % output)[0]
src = item.streams()[0].media.path return wav
def run_whisper(src, language=None, translate=False, gpu=False, model="small", demucs=False):
tmp = tempfile.mkdtemp() tmp = tempfile.mkdtemp()
output = os.path.join(tmp, "output.json")
if demucs:
try:
src = run_demucs(src, tmp)
except:
logger.error("failed to run demucs for %s", src)
shutil.rmtree(tmp)
return None
output = os.path.join(tmp, "output.json")
run_py = os.path.join(os.path.dirname(os.path.abspath(__file__)), "run_whisper.py") run_py = os.path.join(os.path.dirname(os.path.abspath(__file__)), "run_whisper.py")
cmd = ["/opt/whisper-timestamped/bin/python", run_py] cmd = ["/opt/whisper-timestamped/bin/python", run_py]
cmd += ["--model", model] cmd += ["--model", model]
@ -120,13 +130,30 @@ def extract_subtitles(item, user, layer, translate, gpu=False, join_sentences=Fa
try: try:
subprocess.check_call(cmd) subprocess.check_call(cmd)
except: except:
logger.error( logger.error("failed to run: %s", cmd)
"failed to extract subtitles from item %s\n%s", item.public_id, cmd shutil.rmtree(tmp)
) return None
return False
with open(output) as fd: with open(output) as fd:
response = json.load(fd) response = json.load(fd)
shutil.rmtree(tmp)
#shutil.rmtree(tmp)
return response
def extract_subtitles(item, user, layer, translate, gpu=False, join_sentences=False, model="small"):
language = None
if "language" not in item.data:
language = None
else:
language = ox.iso.langTo2Code(item.data["language"][0])
if not item.streams():
logger.error("skip item without media %s: %s", item.public_id)
return False
src = item.streams()[0].media.path
response = run_whisper(src, language, translate, gpu, model)
if not response:
logger.error("extract failed for %s", item.public_id)
return False
annotations = prepare_annotations(response, join_sentences=join_sentences) annotations = prepare_annotations(response, join_sentences=join_sentences)
if not annotations: if not annotations:
return False return False