41 lines
1.1 KiB
Python
41 lines
1.1 KiB
Python
|
#!/usr/bin/python3
|
||
|
from argparse import ArgumentParser
|
||
|
import json
|
||
|
import sys
|
||
|
|
||
|
import whisper_timestamped as whisper
|
||
|
|
||
|
|
||
|
def transcribe(path, language=None, translate=None):
|
||
|
model = whisper.load_model("turbo")
|
||
|
audio = whisper.load_audio(path)
|
||
|
if translate:
|
||
|
task = "translate"
|
||
|
else:
|
||
|
task = "transcribe"
|
||
|
|
||
|
results = whisper.transcribe(
|
||
|
model,
|
||
|
audio,
|
||
|
language=language,
|
||
|
compute_word_confidence=False,
|
||
|
beam_size=5,
|
||
|
best_of=5,
|
||
|
temperature=(0.0, 0.2, 0.4, 0.6, 0.8, 1.0),
|
||
|
fp16=False,
|
||
|
task=task,
|
||
|
vad="auditok",
|
||
|
)
|
||
|
return results
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
usage = "usage: %(prog)s [options] path"
|
||
|
parser = ArgumentParser(usage=usage, prog="run_whisper")
|
||
|
parser.add_argument("-l", "--language", dest="language", default=None)
|
||
|
parser.add_argument("-t", "--translate", dest="translate", default=None)
|
||
|
parser.add_argument("path", metavar="path", type=str, help="media")
|
||
|
opts = parser.parse_args()
|
||
|
result = transcribe(opts.path, opts.language, opts.translate)
|
||
|
print(json.dumps(result, indent=2, ensure_ascii=False))
|