fix unicode detection
This commit is contained in:
parent
7d7c7c9407
commit
3165e3a8b1
2 changed files with 2 additions and 2 deletions
|
@ -70,7 +70,7 @@ def read_url(url, data=None, headers=DEFAULT_HEADERS, return_headers=False, unic
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def detect_encoding(data):
|
def detect_encoding(data):
|
||||||
if 'content="text/html; charset=utf-8"' in data or \
|
if 'content="text/html; charset=utf-8"' in data.lower() or \
|
||||||
'meta charset="utf-8"' in data.lower():
|
'meta charset="utf-8"' in data.lower():
|
||||||
return 'utf-8'
|
return 'utf-8'
|
||||||
elif 'content="text/html; charset=iso-8859-1"' in data:
|
elif 'content="text/html; charset=iso-8859-1"' in data:
|
||||||
|
|
|
@ -13,7 +13,7 @@ def find(query, timeout=ox.cache.cache_timeout):
|
||||||
query = query.encode('utf-8')
|
query = query.encode('utf-8')
|
||||||
params = urllib.urlencode({'q': query})
|
params = urllib.urlencode({'q': query})
|
||||||
url = 'http://duckduckgo.com/html/?' + params
|
url = 'http://duckduckgo.com/html/?' + params
|
||||||
data = read_url(url, timeout=timeout, unicode=True)
|
data = read_url(url, timeout=timeout).decode('utf-8')
|
||||||
results = []
|
results = []
|
||||||
regex = '<a .*?class="large" href="(.+?)">(.*?)</a>.*?<div class="snippet">(.*?)</div>'
|
regex = '<a .*?class="large" href="(.+?)">(.*?)</a>.*?<div class="snippet">(.*?)</div>'
|
||||||
for r in re.compile(regex, re.DOTALL).findall(data):
|
for r in re.compile(regex, re.DOTALL).findall(data):
|
||||||
|
|
Loading…
Add table
Reference in a new issue