get basic utf-8 case right

This commit is contained in:
j 2009-03-14 22:02:20 +01:00
parent 556f878fc4
commit d0d5ee8132
1 changed files with 2 additions and 0 deletions

View File

@ -64,6 +64,8 @@ def getUrlUnicode(url):
return unicode(data, encoding)
def getEncoding(data):
if 'content="text/html; charset=utf-8"' in data:
return 'utf-8'
detector = UniversalDetector()
for line in data.split('\n'):
detector.feed(line)