diff --git a/transcribe.py b/transcribe.py index 622feb3..58520bd 100644 --- a/transcribe.py +++ b/transcribe.py @@ -150,15 +150,23 @@ def extract_subtitles(item, user, layer, translate, gpu=False, join_sentences=Fa if not item.streams(): logger.error("skip item without media %s: %s", item.public_id) return False - src = item.streams()[0].media.path - response = run_whisper(src, language, translate, gpu, model) - if not response: - logger.error("extract failed for %s", item.public_id) - return False - annotations = prepare_annotations(response, join_sentences=join_sentences) + + offset = 0 + annotations = [] + for stream in item.streams(): + src = stream.media.path + response = run_whisper(src, language, translate, gpu, model) + if not response: + logger.error("extract failed for %s", item.public_id) + return False + for annotation in prepare_annotations(response, join_sentences=join_sentences): + if offset: + annotation['in'] += offset + annotation['out'] += offset + annotations.append(annotation) + offset += stream.duration if not annotations: return False - if language and language != "en": for annotation in annotations: annotation["value"] = '%s' % (