diff --git a/run_whisper.py b/run_whisper.py new file mode 100644 index 0000000..e92baae --- /dev/null +++ b/run_whisper.py @@ -0,0 +1,40 @@ +#!/usr/bin/python3 +from argparse import ArgumentParser +import json +import sys + +import whisper_timestamped as whisper + + +def transcribe(path, language=None, translate=None): + model = whisper.load_model("turbo") + audio = whisper.load_audio(path) + if translate: + task = "translate" + else: + task = "transcribe" + + results = whisper.transcribe( + model, + audio, + language=language, + compute_word_confidence=False, + beam_size=5, + best_of=5, + temperature=(0.0, 0.2, 0.4, 0.6, 0.8, 1.0), + fp16=False, + task=task, + vad="auditok", + ) + return results + + +if __name__ == "__main__": + usage = "usage: %(prog)s [options] path" + parser = ArgumentParser(usage=usage, prog="run_whisper") + parser.add_argument("-l", "--language", dest="language", default=None) + parser.add_argument("-t", "--translate", dest="translate", default=None) + parser.add_argument("path", metavar="path", type=str, help="media") + opts = parser.parse_args() + result = transcribe(opts.path, opts.language, opts.translate) + print(json.dumps(result, indent=2, ensure_ascii=False))