subtitles
This commit is contained in:
parent
c5b74c6f77
commit
e64dd48ee2
4 changed files with 115 additions and 85 deletions
|
|
@ -1,5 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
from datetime import datetime
|
||||
import os.path
|
||||
import random
|
||||
|
|
@ -19,6 +20,7 @@ import ox
|
|||
from ox import stripTags
|
||||
from ox.normalize import canonicalTitle, canonicalName
|
||||
from firefogg import Firefogg
|
||||
import chardet
|
||||
|
||||
from backend import utils
|
||||
from pandora.backend.models import Movie
|
||||
|
|
@ -149,6 +151,65 @@ class File(models.Model):
|
|||
return self.data.read()
|
||||
return None
|
||||
|
||||
def srt(self):
|
||||
def _detectEncoding(fp):
|
||||
bomDict={ # bytepattern : name
|
||||
(0x00, 0x00, 0xFE, 0xFF) : "utf_32_be",
|
||||
(0xFF, 0xFE, 0x00, 0x00) : "utf_32_le",
|
||||
(0xFE, 0xFF, None, None) : "utf_16_be",
|
||||
(0xFF, 0xFE, None, None) : "utf_16_le",
|
||||
(0xEF, 0xBB, 0xBF, None) : "utf_8",
|
||||
}
|
||||
|
||||
# go to beginning of file and get the first 4 bytes
|
||||
oldFP = fp.tell()
|
||||
fp.seek(0)
|
||||
(byte1, byte2, byte3, byte4) = tuple(map(ord, fp.read(4)))
|
||||
|
||||
# try bom detection using 4 bytes, 3 bytes, or 2 bytes
|
||||
bomDetection = bomDict.get((byte1, byte2, byte3, byte4))
|
||||
if not bomDetection :
|
||||
bomDetection = bomDict.get((byte1, byte2, byte3, None))
|
||||
if not bomDetection :
|
||||
bomDetection = bomDict.get((byte1, byte2, None, None))
|
||||
|
||||
## if BOM detected, we're done :-)
|
||||
fp.seek(oldFP)
|
||||
if bomDetection :
|
||||
return bomDetection
|
||||
|
||||
encoding = 'latin-1'
|
||||
#more character detecting magick using http://chardet.feedparser.org/
|
||||
fp.seek(0)
|
||||
rawdata = fp.read()
|
||||
encoding = chardet.detect(rawdata)['encoding']
|
||||
fp.seek(oldFP)
|
||||
return encoding
|
||||
|
||||
def parseTime(t):
|
||||
return ox.time2ms(t.replace(',', '.')) / 1000
|
||||
|
||||
srt = []
|
||||
|
||||
f = open(self.data.path)
|
||||
encoding = _detectEncoding(f)
|
||||
data = f.read()
|
||||
f.close()
|
||||
try:
|
||||
data = unicode(data, encoding)
|
||||
except:
|
||||
try:
|
||||
data = unicode(data, 'latin-1')
|
||||
except:
|
||||
print "failed to detect encoding, giving up"
|
||||
return srt
|
||||
|
||||
srts = re.compile('(\d\d:\d\d:\d\d[,.]\d\d\d)\s*-->\s*(\d\d:\d\d:\d\d[,.]\d\d\d)\s*(.+?)\n\n', re.DOTALL)
|
||||
for s in srts.findall(data):
|
||||
_s = {'in': parseTime(s[0]), 'out': parseTime(s[1]), 'text': s[2].strip()}
|
||||
srt.append(_s)
|
||||
return srt
|
||||
|
||||
def editable(self, user):
|
||||
#FIXME: check that user has instance of this file
|
||||
return True
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue