Compare commits
3 commits
5e53ad8556
...
98c680e89c
| Author | SHA1 | Date | |
|---|---|---|---|
| 98c680e89c | |||
| 7f11a62243 | |||
| 4853d9bfb7 |
3 changed files with 9 additions and 8 deletions
|
|
@ -14,8 +14,8 @@ add "transcribe" to LOCAL_APPS in local_setttings.py
|
||||||
apt install portaudio19-dev
|
apt install portaudio19-dev
|
||||||
python3 -m venv venv /opt/whisper-timestamped/
|
python3 -m venv venv /opt/whisper-timestamped/
|
||||||
/opt/whisper-timestamped/bin/pip install \
|
/opt/whisper-timestamped/bin/pip install \
|
||||||
torch==1.13.1+cpu \
|
torch==2.3.1+cpu \
|
||||||
torchaudio==0.13.1+cpu \
|
torchaudio==2.3.1+cpu \
|
||||||
-f https://download.pytorch.org/whl/torch_stable.html
|
-f https://download.pytorch.org/whl/torch_stable.html
|
||||||
/opt/whisper-timestamped/bin/pip install whisper-timestamped transformers auditok
|
/opt/whisper-timestamped/bin/pip install whisper-timestamped transformers auditok
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,8 +6,9 @@ import sys
|
||||||
import whisper_timestamped as whisper
|
import whisper_timestamped as whisper
|
||||||
|
|
||||||
|
|
||||||
def transcribe(path, language=None, translate=None):
|
def transcribe(path, language=None, translate=None, model="small"):
|
||||||
model = whisper.load_model("large-v3-turbo")
|
#model = whisper.load_model("large-v3-turbo")
|
||||||
|
model = whisper.load_model(model)
|
||||||
audio = whisper.load_audio(path)
|
audio = whisper.load_audio(path)
|
||||||
if translate:
|
if translate:
|
||||||
task = "translate"
|
task = "translate"
|
||||||
|
|
@ -33,10 +34,11 @@ if __name__ == "__main__":
|
||||||
usage = "usage: %(prog)s [options] path output"
|
usage = "usage: %(prog)s [options] path output"
|
||||||
parser = ArgumentParser(usage=usage, prog="run_whisper")
|
parser = ArgumentParser(usage=usage, prog="run_whisper")
|
||||||
parser.add_argument("-l", "--language", dest="language", default=None)
|
parser.add_argument("-l", "--language", dest="language", default=None)
|
||||||
parser.add_argument("-t", "--translate", dest="translate", default=None)
|
parser.add_argument("-m", "--model", dest="model", default="small")
|
||||||
|
parser.add_argument("-t", "--translate", dest="translate", action='store_true', default=False)
|
||||||
parser.add_argument("path", metavar="path", type=str, help="media input")
|
parser.add_argument("path", metavar="path", type=str, help="media input")
|
||||||
parser.add_argument("output", metavar="output", type=str, help="json output")
|
parser.add_argument("output", metavar="output", type=str, help="json output")
|
||||||
opts = parser.parse_args()
|
opts = parser.parse_args()
|
||||||
result = transcribe(opts.path, opts.language, opts.translate)
|
result = transcribe(opts.path, opts.language, opts.translate, opts.model)
|
||||||
with open(opts.output, "w") as fd:
|
with open(opts.output, "w") as fd:
|
||||||
json.dump(result, fd, indent=2, ensure_ascii=False)
|
json.dump(result, fd, indent=2, ensure_ascii=False)
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
|
@ -93,12 +94,10 @@ def prepare_joint_annotations(result, target_length=200):
|
||||||
|
|
||||||
def extract_subtitles(item, user, layer, translate, gpu=False, join_sentences=False):
|
def extract_subtitles(item, user, layer, translate, gpu=False, join_sentences=False):
|
||||||
language = None
|
language = None
|
||||||
"""
|
|
||||||
if "language" not in item.data:
|
if "language" not in item.data:
|
||||||
language = None
|
language = None
|
||||||
else:
|
else:
|
||||||
language = ox.iso.langTo2Code(item.data["language"][0])
|
language = ox.iso.langTo2Code(item.data["language"][0])
|
||||||
"""
|
|
||||||
if not item.streams():
|
if not item.streams():
|
||||||
logger.error("skip item without media %s: %s", item.public_id)
|
logger.error("skip item without media %s: %s", item.public_id)
|
||||||
return False
|
return False
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue