utf-8 srts are more common

This commit is contained in:
j 2012-01-28 13:56:42 +05:30
parent b7bb4a4e80
commit ca7bfd06d5

View file

@ -34,11 +34,16 @@ def _detectEncoding(fp):
fp.seek(oldFP) fp.seek(oldFP)
if bomDetection: if bomDetection:
return bomDetection return bomDetection
encoding = 'latin-1' encoding = 'latin-1'
#more character detecting magick using http://chardet.feedparser.org/ #more character detecting magick using http://chardet.feedparser.org/
fp.seek(0) fp.seek(0)
rawdata = fp.read() rawdata = fp.read()
#if data can be decoded as utf-8 use that, try chardet otherwise
#chardet detects utf-8 as ISO-8859-2 most of the time
try:
data = unicode(rawdata, 'utf-8')
encoding = 'utf-8'
except:
encoding = chardet.detect(rawdata)['encoding'] encoding = chardet.detect(rawdata)['encoding']
fp.seek(oldFP) fp.seek(oldFP)
return encoding return encoding