From 9fc6425a9ecca6d9e97c52246155df0b42452138 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Fri, 31 Dec 2010 12:53:24 +0530 Subject: [PATCH] search cleanup --- ox/web/duckduckgo.py | 7 ++++--- ox/web/google.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/ox/web/duckduckgo.py b/ox/web/duckduckgo.py index c11b2dc..4209bb5 100644 --- a/ox/web/duckduckgo.py +++ b/ox/web/duckduckgo.py @@ -5,14 +5,15 @@ import urllib import ox from ox import stripTags, decodeHtml from ox.utils import json -from ox.cache import readUrl +from ox.cache import readUrlUnicode def find(query, timeout=ox.cache.cache_timeout): params = urllib.urlencode({'q': query}) + if isinstance(query, unicode): + query = query.encode('utf-8') url = 'http://duckduckgo.com/html/?' + params - print url - data = readUrl(url, timeout=timeout) + data = readUrlUnicode(url, timeout=timeout) results = [] regex = '(.*?).*?
(.*?)
' for r in re.compile(regex, re.DOTALL).findall(data): diff --git a/ox/web/google.py b/ox/web/google.py index c8c8c50..e29f754 100644 --- a/ox/web/google.py +++ b/ox/web/google.py @@ -66,6 +66,6 @@ def _find(query, timeout=DEFAULT_TIMEOUT): u'http://www.imdb.com/title/tt0133093/' """ url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=%s' % quote_plus(query) - results = json.loads(ox.cache.readUrlUnicode(url, timeout=timeout))['responseData']['results'] + results = json.loads(ox.cache.readUrl(url, timeout=timeout))['responseData']['results'] return results