Compare commits

...

3 commits

Author SHA1 Message Date
j
98c680e89c re-enable language/translate 2025-01-24 11:51:00 +05:30
j
7f11a62243 select model 2025-01-24 11:50:49 +05:30
j
4853d9bfb7 update depends 2025-01-24 11:50:41 +05:30
3 changed files with 9 additions and 8 deletions

View file

@ -14,8 +14,8 @@ add "transcribe" to LOCAL_APPS in local_setttings.py
apt install portaudio19-dev apt install portaudio19-dev
python3 -m venv venv /opt/whisper-timestamped/ python3 -m venv venv /opt/whisper-timestamped/
/opt/whisper-timestamped/bin/pip install \ /opt/whisper-timestamped/bin/pip install \
torch==1.13.1+cpu \ torch==2.3.1+cpu \
torchaudio==0.13.1+cpu \ torchaudio==2.3.1+cpu \
-f https://download.pytorch.org/whl/torch_stable.html -f https://download.pytorch.org/whl/torch_stable.html
/opt/whisper-timestamped/bin/pip install whisper-timestamped transformers auditok /opt/whisper-timestamped/bin/pip install whisper-timestamped transformers auditok

View file

@ -6,8 +6,9 @@ import sys
import whisper_timestamped as whisper import whisper_timestamped as whisper
def transcribe(path, language=None, translate=None): def transcribe(path, language=None, translate=None, model="small"):
model = whisper.load_model("large-v3-turbo") #model = whisper.load_model("large-v3-turbo")
model = whisper.load_model(model)
audio = whisper.load_audio(path) audio = whisper.load_audio(path)
if translate: if translate:
task = "translate" task = "translate"
@ -33,10 +34,11 @@ if __name__ == "__main__":
usage = "usage: %(prog)s [options] path output" usage = "usage: %(prog)s [options] path output"
parser = ArgumentParser(usage=usage, prog="run_whisper") parser = ArgumentParser(usage=usage, prog="run_whisper")
parser.add_argument("-l", "--language", dest="language", default=None) parser.add_argument("-l", "--language", dest="language", default=None)
parser.add_argument("-t", "--translate", dest="translate", default=None) parser.add_argument("-m", "--model", dest="model", default="small")
parser.add_argument("-t", "--translate", dest="translate", action='store_true', default=False)
parser.add_argument("path", metavar="path", type=str, help="media input") parser.add_argument("path", metavar="path", type=str, help="media input")
parser.add_argument("output", metavar="output", type=str, help="json output") parser.add_argument("output", metavar="output", type=str, help="json output")
opts = parser.parse_args() opts = parser.parse_args()
result = transcribe(opts.path, opts.language, opts.translate) result = transcribe(opts.path, opts.language, opts.translate, opts.model)
with open(opts.output, "w") as fd: with open(opts.output, "w") as fd:
json.dump(result, fd, indent=2, ensure_ascii=False) json.dump(result, fd, indent=2, ensure_ascii=False)

View file

@ -1,3 +1,4 @@
import json
import logging import logging
import os import os
import shutil import shutil
@ -93,12 +94,10 @@ def prepare_joint_annotations(result, target_length=200):
def extract_subtitles(item, user, layer, translate, gpu=False, join_sentences=False): def extract_subtitles(item, user, layer, translate, gpu=False, join_sentences=False):
language = None language = None
"""
if "language" not in item.data: if "language" not in item.data:
language = None language = None
else: else:
language = ox.iso.langTo2Code(item.data["language"][0]) language = ox.iso.langTo2Code(item.data["language"][0])
"""
if not item.streams(): if not item.streams():
logger.error("skip item without media %s: %s", item.public_id) logger.error("skip item without media %s: %s", item.public_id)
return False return False