srt parsing moved to ox.srt.load

2012-01-02 22:15:01 +05:30 · 2012-01-02 22:15:01 +05:30 · 7757582aaf
commit 7757582aaf
parent 4c25291c88
1 changed files with 5 additions and 61 deletions
--- a/pandora/archive/models.py
+++ b/pandora/archive/models.py
@ -171,67 +171,11 @@ class File(models.Model):
        return None

    def srt(self, offset=0):
-
-        def _detectEncoding(fp):
-            bomDict={ # bytepattern : name
-                      (0x00, 0x00, 0xFE, 0xFF): "utf_32_be",
-                      (0xFF, 0xFE, 0x00, 0x00): "utf_32_le",
-                      (0xFE, 0xFF, None, None): "utf_16_be",
-                      (0xFF, 0xFE, None, None): "utf_16_le",
-                      (0xEF, 0xBB, 0xBF, None): "utf_8",
-                    }
-
-            # go to beginning of file and get the first 4 bytes
-            oldFP = fp.tell()
-            fp.seek(0)
-            (byte1, byte2, byte3, byte4) = tuple(map(ord, fp.read(4)))
-
-            # try bom detection using 4 bytes, 3 bytes, or 2 bytes
-            bomDetection = bomDict.get((byte1, byte2, byte3, byte4))
-            if not bomDetection:
-                bomDetection = bomDict.get((byte1, byte2, byte3, None))
-                if not bomDetection:
-                    bomDetection = bomDict.get((byte1, byte2, None, None))
-            ## if BOM detected, we're done :-)
-            fp.seek(oldFP)
-            if bomDetection:
-                return bomDetection
-
-            encoding = 'latin-1'
-            #more character detecting magick using http://chardet.feedparser.org/
-            fp.seek(0)
-            rawdata = fp.read()
-            encoding = chardet.detect(rawdata)['encoding']
-            fp.seek(oldFP)
-            return encoding
-
-        def parseTime(t):
-            return offset + ox.time2ms(t.replace(',', '.')) / 1000
-
-        srt = []
-
-        f = open(self.data.path)
-        encoding = _detectEncoding(f)
-        data = f.read()
-        f.close()
-        try:
-            data = unicode(data, encoding)
-        except:
-            try:
-                data = unicode(data, 'latin-1')
-            except:
-                print "failed to detect encoding, giving up"
-                return srt
-        data = data.replace('\r\n', '\n')
-        srts = re.compile('(\d\d:\d\d:\d\d[,.]\d\d\d)\s*-->\s*(\d\d:\d\d:\d\d[,.]\d\d\d)\s*(.+?)\n\n', re.DOTALL)
-        i = 0
-        for s in srts.findall(data):
-            _s = {'id': str(i),
-                  'in': parseTime(s[0]), 'out': parseTime(s[1]), 'value': s[2].strip()}
-            if srt and srt[-1]['out'] > _s['in']:
-                srt[-1]['out'] = _s['in']
-            srt.append(_s)
-            i += 1
+        srt = ox.load_srt(self.data.path)
+        #subtitles should not overlap
+        for i in range(1, len(srt)):
+            if srt[i-1]['out'] > srt[i]['in']:
+                srt[i-1]['out'] = srt[i]['in']
        return srt

    def editable(self, user):