fix unicode detection

This commit is contained in:
j 2013-06-01 13:21:13 +02:00
parent 7d7c7c9407
commit 3165e3a8b1
2 changed files with 2 additions and 2 deletions

View file

@ -70,7 +70,7 @@ def read_url(url, data=None, headers=DEFAULT_HEADERS, return_headers=False, unic
return result
def detect_encoding(data):
if 'content="text/html; charset=utf-8"' in data or \
if 'content="text/html; charset=utf-8"' in data.lower() or \
'meta charset="utf-8"' in data.lower():
return 'utf-8'
elif 'content="text/html; charset=iso-8859-1"' in data:

View file

@ -13,7 +13,7 @@ def find(query, timeout=ox.cache.cache_timeout):
query = query.encode('utf-8')
params = urllib.urlencode({'q': query})
url = 'http://duckduckgo.com/html/?' + params
data = read_url(url, timeout=timeout, unicode=True)
data = read_url(url, timeout=timeout).decode('utf-8')
results = []
regex = '<a .*?class="large" href="(.+?)">(.*?)</a>.*?<div class="snippet">(.*?)</div>'
for r in re.compile(regex, re.DOTALL).findall(data):