pandora_transcribe/run_whisper.py
2025-01-24 11:50:49 +05:30

44 lines
1.4 KiB
Python

#!/usr/bin/python3
from argparse import ArgumentParser
import json
import sys
import whisper_timestamped as whisper
def transcribe(path, language=None, translate=None, model="small"):
#model = whisper.load_model("large-v3-turbo")
model = whisper.load_model(model)
audio = whisper.load_audio(path)
if translate:
task = "translate"
else:
task = "transcribe"
results = whisper.transcribe(
model,
audio,
language=language,
compute_word_confidence=False,
beam_size=5,
best_of=5,
temperature=(0.0, 0.2, 0.4, 0.6, 0.8, 1.0),
fp16=False,
task=task,
vad="auditok",
)
return results
if __name__ == "__main__":
usage = "usage: %(prog)s [options] path output"
parser = ArgumentParser(usage=usage, prog="run_whisper")
parser.add_argument("-l", "--language", dest="language", default=None)
parser.add_argument("-m", "--model", dest="model", default="small")
parser.add_argument("-t", "--translate", dest="translate", action='store_true', default=False)
parser.add_argument("path", metavar="path", type=str, help="media input")
parser.add_argument("output", metavar="output", type=str, help="json output")
opts = parser.parse_args()
result = transcribe(opts.path, opts.language, opts.translate, opts.model)
with open(opts.output, "w") as fd:
json.dump(result, fd, indent=2, ensure_ascii=False)