From 7757582aaf88da4275fb461beb8199c4be9c6ae8 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Mon, 2 Jan 2012 22:15:01 +0530 Subject: [PATCH] srt parsing moved to ox.srt.load --- pandora/archive/models.py | 66 +++------------------------------------ 1 file changed, 5 insertions(+), 61 deletions(-) diff --git a/pandora/archive/models.py b/pandora/archive/models.py index c8c2de376..a1308d032 100644 --- a/pandora/archive/models.py +++ b/pandora/archive/models.py @@ -171,67 +171,11 @@ class File(models.Model): return None def srt(self, offset=0): - - def _detectEncoding(fp): - bomDict={ # bytepattern : name - (0x00, 0x00, 0xFE, 0xFF): "utf_32_be", - (0xFF, 0xFE, 0x00, 0x00): "utf_32_le", - (0xFE, 0xFF, None, None): "utf_16_be", - (0xFF, 0xFE, None, None): "utf_16_le", - (0xEF, 0xBB, 0xBF, None): "utf_8", - } - - # go to beginning of file and get the first 4 bytes - oldFP = fp.tell() - fp.seek(0) - (byte1, byte2, byte3, byte4) = tuple(map(ord, fp.read(4))) - - # try bom detection using 4 bytes, 3 bytes, or 2 bytes - bomDetection = bomDict.get((byte1, byte2, byte3, byte4)) - if not bomDetection: - bomDetection = bomDict.get((byte1, byte2, byte3, None)) - if not bomDetection: - bomDetection = bomDict.get((byte1, byte2, None, None)) - ## if BOM detected, we're done :-) - fp.seek(oldFP) - if bomDetection: - return bomDetection - - encoding = 'latin-1' - #more character detecting magick using http://chardet.feedparser.org/ - fp.seek(0) - rawdata = fp.read() - encoding = chardet.detect(rawdata)['encoding'] - fp.seek(oldFP) - return encoding - - def parseTime(t): - return offset + ox.time2ms(t.replace(',', '.')) / 1000 - - srt = [] - - f = open(self.data.path) - encoding = _detectEncoding(f) - data = f.read() - f.close() - try: - data = unicode(data, encoding) - except: - try: - data = unicode(data, 'latin-1') - except: - print "failed to detect encoding, giving up" - return srt - data = data.replace('\r\n', '\n') - srts = re.compile('(\d\d:\d\d:\d\d[,.]\d\d\d)\s*-->\s*(\d\d:\d\d:\d\d[,.]\d\d\d)\s*(.+?)\n\n', re.DOTALL) - i = 0 - for s in srts.findall(data): - _s = {'id': str(i), - 'in': parseTime(s[0]), 'out': parseTime(s[1]), 'value': s[2].strip()} - if srt and srt[-1]['out'] > _s['in']: - srt[-1]['out'] = _s['in'] - srt.append(_s) - i += 1 + srt = ox.load_srt(self.data.path) + #subtitles should not overlap + for i in range(1, len(srt)): + if srt[i-1]['out'] > srt[i]['in']: + srt[i-1]['out'] = srt[i]['in'] return srt def editable(self, user):