get basic utf-8 case right

This commit is contained in:
j 2009-03-14 22:02:20 +01:00
parent 556f878fc4
commit d0d5ee8132

View file

@ -64,6 +64,8 @@ def getUrlUnicode(url):
return unicode(data, encoding) return unicode(data, encoding)
def getEncoding(data): def getEncoding(data):
if 'content="text/html; charset=utf-8"' in data:
return 'utf-8'
detector = UniversalDetector() detector = UniversalDetector()
for line in data.split('\n'): for line in data.split('\n'):
detector.feed(line) detector.feed(line)