utf-8 srts are more common
This commit is contained in:
parent
b7bb4a4e80
commit
ca7bfd06d5
1 changed files with 7 additions and 2 deletions
|
@ -34,11 +34,16 @@ def _detectEncoding(fp):
|
||||||
fp.seek(oldFP)
|
fp.seek(oldFP)
|
||||||
if bomDetection:
|
if bomDetection:
|
||||||
return bomDetection
|
return bomDetection
|
||||||
|
|
||||||
encoding = 'latin-1'
|
encoding = 'latin-1'
|
||||||
#more character detecting magick using http://chardet.feedparser.org/
|
#more character detecting magick using http://chardet.feedparser.org/
|
||||||
fp.seek(0)
|
fp.seek(0)
|
||||||
rawdata = fp.read()
|
rawdata = fp.read()
|
||||||
|
#if data can be decoded as utf-8 use that, try chardet otherwise
|
||||||
|
#chardet detects utf-8 as ISO-8859-2 most of the time
|
||||||
|
try:
|
||||||
|
data = unicode(rawdata, 'utf-8')
|
||||||
|
encoding = 'utf-8'
|
||||||
|
except:
|
||||||
encoding = chardet.detect(rawdata)['encoding']
|
encoding = chardet.detect(rawdata)['encoding']
|
||||||
fp.seek(oldFP)
|
fp.seek(oldFP)
|
||||||
return encoding
|
return encoding
|
||||||
|
|
Loading…
Reference in a new issue