diff --git a/ox/web/duckduckgo.py b/ox/web/duckduckgo.py index a8f7869..b4b3494 100644 --- a/ox/web/duckduckgo.py +++ b/ox/web/duckduckgo.py @@ -6,17 +6,25 @@ from six.moves import urllib import ox from ox import strip_tags, decode_html from ox.cache import read_url +import lxml.html def find(query, timeout=ox.cache.cache_timeout): + """ + Returns tuples with title, url, description + """ if not isinstance(query, bytes): query = query.encode('utf-8') params = urllib.parse.urlencode({'q': query}) url = 'http://duckduckgo.com/html/?' + params data = read_url(url, timeout=timeout).decode('utf-8') + doc = lxml.html.document_fromstring(data) results = [] - regex = '(.*?).*?