#!/usr/bin/python3 from argparse import ArgumentParser import json import sys import whisper_timestamped as whisper def transcribe(path, language=None, translate=None, model="small"): #model = whisper.load_model("large-v3-turbo") model = whisper.load_model(model) audio = whisper.load_audio(path) if translate: task = "translate" else: task = "transcribe" results = whisper.transcribe( model, audio, language=language, compute_word_confidence=False, beam_size=5, best_of=5, temperature=(0.0, 0.2, 0.4, 0.6, 0.8, 1.0), fp16=False, task=task, vad="auditok", ) return results if __name__ == "__main__": usage = "usage: %(prog)s [options] path output" parser = ArgumentParser(usage=usage, prog="run_whisper") parser.add_argument("-l", "--language", dest="language", default=None) parser.add_argument("-m", "--model", dest="model", default="small") parser.add_argument("-t", "--translate", dest="translate", action='store_true', default=False) parser.add_argument("path", metavar="path", type=str, help="media input") parser.add_argument("output", metavar="output", type=str, help="json output") opts = parser.parse_args() result = transcribe(opts.path, opts.language, opts.translate, opts.model) with open(opts.output, "w") as fd: json.dump(result, fd, indent=2, ensure_ascii=False)