search cleanup

This commit is contained in:
j 2010-12-31 12:53:24 +05:30
parent 2892f3d12e
commit 9fc6425a9e
2 changed files with 5 additions and 4 deletions

View file

@ -5,14 +5,15 @@ import urllib
import ox import ox
from ox import stripTags, decodeHtml from ox import stripTags, decodeHtml
from ox.utils import json from ox.utils import json
from ox.cache import readUrl from ox.cache import readUrlUnicode
def find(query, timeout=ox.cache.cache_timeout): def find(query, timeout=ox.cache.cache_timeout):
params = urllib.urlencode({'q': query}) params = urllib.urlencode({'q': query})
if isinstance(query, unicode):
query = query.encode('utf-8')
url = 'http://duckduckgo.com/html/?' + params url = 'http://duckduckgo.com/html/?' + params
print url data = readUrlUnicode(url, timeout=timeout)
data = readUrl(url, timeout=timeout)
results = [] results = []
regex = '<a .*?class="l le" href="(.+?)">(.*?)</a>.*?<div class="cra">(.*?)</div>' regex = '<a .*?class="l le" href="(.+?)">(.*?)</a>.*?<div class="cra">(.*?)</div>'
for r in re.compile(regex, re.DOTALL).findall(data): for r in re.compile(regex, re.DOTALL).findall(data):

View file

@ -66,6 +66,6 @@ def _find(query, timeout=DEFAULT_TIMEOUT):
u'http://www.imdb.com/title/tt0133093/' u'http://www.imdb.com/title/tt0133093/'
""" """
url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=%s' % quote_plus(query) url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=%s' % quote_plus(query)
results = json.loads(ox.cache.readUrlUnicode(url, timeout=timeout))['responseData']['results'] results = json.loads(ox.cache.readUrl(url, timeout=timeout))['responseData']['results']
return results return results