From f28d37e33e3662072eac44e3dcb09750bad87968 Mon Sep 17 00:00:00 2001
From: j <j@mailb.org>
Date: Sat, 25 Jan 2025 11:43:27 +0530
Subject: [PATCH 1/2] add TRANSCRIBE_TARGET_LENGTH setting

---
 transcribe.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/transcribe.py b/transcribe.py
index 748a80c..622feb3 100644
--- a/transcribe.py
+++ b/transcribe.py
@@ -21,6 +21,7 @@ from user.models import User
 
 logger = logging.getLogger(__name__)
 
+TARGET_LENGTH = getattr(settings, 'TRANSCRIBE_TARGET_LENGTH', 200)
 
 def prepare_annotations(result, join_sentences=False):
     if join_sentences:
@@ -37,7 +38,7 @@ def prepare_annotations(result, join_sentences=False):
     return annotations
 
 
-def prepare_joint_annotations(result, target_length=200):
+def prepare_joint_annotations(result, target_length=TARGET_LENGTH):
     abbrevs = ["Mr.", "Mrs.", "Dr."]
     ignore = []
     phrase_sounds = []

From eb2d12a905464ffe3b67361279c4ef89c77c5a98 Mon Sep 17 00:00:00 2001
From: j <j@mailb.org>
Date: Sat, 25 Jan 2025 14:36:15 +0530
Subject: [PATCH 2/2] support multipart items

---
 transcribe.py | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/transcribe.py b/transcribe.py
index 622feb3..58520bd 100644
--- a/transcribe.py
+++ b/transcribe.py
@@ -150,15 +150,23 @@ def extract_subtitles(item, user, layer, translate, gpu=False, join_sentences=Fa
     if not item.streams():
         logger.error("skip item without media %s: %s", item.public_id)
         return False
-    src = item.streams()[0].media.path
-    response = run_whisper(src, language, translate, gpu, model)
-    if not response:
-        logger.error("extract failed for  %s", item.public_id)
-        return False
-    annotations = prepare_annotations(response, join_sentences=join_sentences)
+
+    offset = 0
+    annotations = []
+    for stream in item.streams():
+        src = stream.media.path
+        response = run_whisper(src, language, translate, gpu, model)
+        if not response:
+            logger.error("extract failed for  %s", item.public_id)
+            return False
+        for annotation in prepare_annotations(response, join_sentences=join_sentences):
+            if offset:
+                annotation['in'] += offset
+                annotation['out'] += offset
+            annotations.append(annotation)
+        offset += stream.duration
     if not annotations:
         return False
-
     if language and language != "en":
         for annotation in annotations:
             annotation["value"] = '<span lang="%s">%s</span>' % (