From 2892f3d12eab1963b3023f017d262cdacb9a3e18 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Wed, 29 Dec 2010 17:36:14 +0530 Subject: [PATCH] add duckduckgo search --- ox/web/duckduckgo.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 ox/web/duckduckgo.py diff --git a/ox/web/duckduckgo.py b/ox/web/duckduckgo.py new file mode 100644 index 0000000..c11b2dc --- /dev/null +++ b/ox/web/duckduckgo.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +# vi:si:et:sw=4:sts=4:ts=4 +import re +import urllib +import ox +from ox import stripTags, decodeHtml +from ox.utils import json +from ox.cache import readUrl + + +def find(query, timeout=ox.cache.cache_timeout): + params = urllib.urlencode({'q': query}) + url = 'http://duckduckgo.com/html/?' + params + print url + data = readUrl(url, timeout=timeout) + results = [] + regex = '(.*?).*?
(.*?)
' + for r in re.compile(regex, re.DOTALL).findall(data): + results.append((stripTags(decodeHtml(r[1])), r[0], stripTags(decodeHtml(r[2])))) + return results +