diff --git a/README.md b/README.md index 932199e..2c7eb6a 100644 --- a/README.md +++ b/README.md @@ -14,8 +14,8 @@ add "transcribe" to LOCAL_APPS in local_setttings.py apt install portaudio19-dev python3 -m venv venv /opt/whisper-timestamped/ /opt/whisper-timestamped/bin/pip install \ - torch==2.3.1+cpu \ - torchaudio==2.3.1+cpu \ + torch==1.13.1+cpu \ + torchaudio==0.13.1+cpu \ -f https://download.pytorch.org/whl/torch_stable.html /opt/whisper-timestamped/bin/pip install whisper-timestamped transformers auditok diff --git a/run_whisper.py b/run_whisper.py index 471347e..e56b988 100644 --- a/run_whisper.py +++ b/run_whisper.py @@ -6,9 +6,8 @@ import sys import whisper_timestamped as whisper -def transcribe(path, language=None, translate=None, model="small"): - #model = whisper.load_model("large-v3-turbo") - model = whisper.load_model(model) +def transcribe(path, language=None, translate=None): + model = whisper.load_model("large-v3-turbo") audio = whisper.load_audio(path) if translate: task = "translate" @@ -34,11 +33,10 @@ if __name__ == "__main__": usage = "usage: %(prog)s [options] path output" parser = ArgumentParser(usage=usage, prog="run_whisper") parser.add_argument("-l", "--language", dest="language", default=None) - parser.add_argument("-m", "--model", dest="model", default="small") - parser.add_argument("-t", "--translate", dest="translate", action='store_true', default=False) + parser.add_argument("-t", "--translate", dest="translate", default=None) parser.add_argument("path", metavar="path", type=str, help="media input") parser.add_argument("output", metavar="output", type=str, help="json output") opts = parser.parse_args() - result = transcribe(opts.path, opts.language, opts.translate, opts.model) + result = transcribe(opts.path, opts.language, opts.translate) with open(opts.output, "w") as fd: json.dump(result, fd, indent=2, ensure_ascii=False) diff --git a/transcribe.py b/transcribe.py index b750f8c..8d31085 100644 --- a/transcribe.py +++ b/transcribe.py @@ -1,4 +1,3 @@ -import json import logging import os import shutil @@ -94,10 +93,12 @@ def prepare_joint_annotations(result, target_length=200): def extract_subtitles(item, user, layer, translate, gpu=False, join_sentences=False): language = None + """ if "language" not in item.data: language = None else: language = ox.iso.langTo2Code(item.data["language"][0]) + """ if not item.streams(): logger.error("skip item without media %s: %s", item.public_id) return False