detect html5 encoding

This commit is contained in:
j 2012-09-22 20:57:40 +02:00
parent 2d1ad2785d
commit c69a2ee825

View file

@ -70,7 +70,8 @@ def read_url(url, data=None, headers=DEFAULT_HEADERS, return_headers=False, unic
return result return result
def detect_encoding(data): def detect_encoding(data):
if 'content="text/html; charset=utf-8"' in data: if 'content="text/html; charset=utf-8"' in data or \
'meta charset="utf-8"' in data:
return 'utf-8' return 'utf-8'
elif 'content="text/html; charset=iso-8859-1"' in data: elif 'content="text/html; charset=iso-8859-1"' in data:
return 'iso-8859-1' return 'iso-8859-1'