get basic utf-8 case right
This commit is contained in:
parent
556f878fc4
commit
d0d5ee8132
1 changed files with 2 additions and 0 deletions
|
@ -64,6 +64,8 @@ def getUrlUnicode(url):
|
||||||
return unicode(data, encoding)
|
return unicode(data, encoding)
|
||||||
|
|
||||||
def getEncoding(data):
|
def getEncoding(data):
|
||||||
|
if 'content="text/html; charset=utf-8"' in data:
|
||||||
|
return 'utf-8'
|
||||||
detector = UniversalDetector()
|
detector = UniversalDetector()
|
||||||
for line in data.split('\n'):
|
for line in data.split('\n'):
|
||||||
detector.feed(line)
|
detector.feed(line)
|
||||||
|
|
Loading…
Reference in a new issue