From 3165e3a8b1806dee7bb47e900afb7cec113cac18 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Sat, 1 Jun 2013 13:21:13 +0200 Subject: [PATCH] fix unicode detection --- ox/net.py | 2 +- ox/web/duckduckgo.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ox/net.py b/ox/net.py index 5a5557a..1c37ae3 100644 --- a/ox/net.py +++ b/ox/net.py @@ -70,7 +70,7 @@ def read_url(url, data=None, headers=DEFAULT_HEADERS, return_headers=False, unic return result def detect_encoding(data): - if 'content="text/html; charset=utf-8"' in data or \ + if 'content="text/html; charset=utf-8"' in data.lower() or \ 'meta charset="utf-8"' in data.lower(): return 'utf-8' elif 'content="text/html; charset=iso-8859-1"' in data: diff --git a/ox/web/duckduckgo.py b/ox/web/duckduckgo.py index 415576f..d60578a 100644 --- a/ox/web/duckduckgo.py +++ b/ox/web/duckduckgo.py @@ -13,7 +13,7 @@ def find(query, timeout=ox.cache.cache_timeout): query = query.encode('utf-8') params = urllib.urlencode({'q': query}) url = 'http://duckduckgo.com/html/?' + params - data = read_url(url, timeout=timeout, unicode=True) + data = read_url(url, timeout=timeout).decode('utf-8') results = [] regex = '(.*?).*?
(.*?)
' for r in re.compile(regex, re.DOTALL).findall(data):