fix unicode detection
This commit is contained in:
parent
7d7c7c9407
commit
3165e3a8b1
2 changed files with 2 additions and 2 deletions
|
@ -70,7 +70,7 @@ def read_url(url, data=None, headers=DEFAULT_HEADERS, return_headers=False, unic
|
|||
return result
|
||||
|
||||
def detect_encoding(data):
|
||||
if 'content="text/html; charset=utf-8"' in data or \
|
||||
if 'content="text/html; charset=utf-8"' in data.lower() or \
|
||||
'meta charset="utf-8"' in data.lower():
|
||||
return 'utf-8'
|
||||
elif 'content="text/html; charset=iso-8859-1"' in data:
|
||||
|
|
|
@ -13,7 +13,7 @@ def find(query, timeout=ox.cache.cache_timeout):
|
|||
query = query.encode('utf-8')
|
||||
params = urllib.urlencode({'q': query})
|
||||
url = 'http://duckduckgo.com/html/?' + params
|
||||
data = read_url(url, timeout=timeout, unicode=True)
|
||||
data = read_url(url, timeout=timeout).decode('utf-8')
|
||||
results = []
|
||||
regex = '<a .*?class="large" href="(.+?)">(.*?)</a>.*?<div class="snippet">(.*?)</div>'
|
||||
for r in re.compile(regex, re.DOTALL).findall(data):
|
||||
|
|
Loading…
Reference in a new issue