Compare commits
2 commits
d2da5ae581
...
eb2d12a905
| Author | SHA1 | Date | |
|---|---|---|---|
| eb2d12a905 | |||
| f28d37e33e |
1 changed files with 17 additions and 8 deletions
|
|
@ -21,6 +21,7 @@ from user.models import User
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
TARGET_LENGTH = getattr(settings, 'TRANSCRIBE_TARGET_LENGTH', 200)
|
||||||
|
|
||||||
def prepare_annotations(result, join_sentences=False):
|
def prepare_annotations(result, join_sentences=False):
|
||||||
if join_sentences:
|
if join_sentences:
|
||||||
|
|
@ -37,7 +38,7 @@ def prepare_annotations(result, join_sentences=False):
|
||||||
return annotations
|
return annotations
|
||||||
|
|
||||||
|
|
||||||
def prepare_joint_annotations(result, target_length=200):
|
def prepare_joint_annotations(result, target_length=TARGET_LENGTH):
|
||||||
abbrevs = ["Mr.", "Mrs.", "Dr."]
|
abbrevs = ["Mr.", "Mrs.", "Dr."]
|
||||||
ignore = []
|
ignore = []
|
||||||
phrase_sounds = []
|
phrase_sounds = []
|
||||||
|
|
@ -149,15 +150,23 @@ def extract_subtitles(item, user, layer, translate, gpu=False, join_sentences=Fa
|
||||||
if not item.streams():
|
if not item.streams():
|
||||||
logger.error("skip item without media %s: %s", item.public_id)
|
logger.error("skip item without media %s: %s", item.public_id)
|
||||||
return False
|
return False
|
||||||
src = item.streams()[0].media.path
|
|
||||||
response = run_whisper(src, language, translate, gpu, model)
|
offset = 0
|
||||||
if not response:
|
annotations = []
|
||||||
logger.error("extract failed for %s", item.public_id)
|
for stream in item.streams():
|
||||||
return False
|
src = stream.media.path
|
||||||
annotations = prepare_annotations(response, join_sentences=join_sentences)
|
response = run_whisper(src, language, translate, gpu, model)
|
||||||
|
if not response:
|
||||||
|
logger.error("extract failed for %s", item.public_id)
|
||||||
|
return False
|
||||||
|
for annotation in prepare_annotations(response, join_sentences=join_sentences):
|
||||||
|
if offset:
|
||||||
|
annotation['in'] += offset
|
||||||
|
annotation['out'] += offset
|
||||||
|
annotations.append(annotation)
|
||||||
|
offset += stream.duration
|
||||||
if not annotations:
|
if not annotations:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if language and language != "en":
|
if language and language != "en":
|
||||||
for annotation in annotations:
|
for annotation in annotations:
|
||||||
annotation["value"] = '<span lang="%s">%s</span>' % (
|
annotation["value"] = '<span lang="%s">%s</span>' % (
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue