From 98ab0e29db397257efb62b374ad663041505f406 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Sun, 8 Sep 2013 15:56:57 +0200 Subject: [PATCH] support returning more than 10 results --- ox/web/google.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/ox/web/google.py b/ox/web/google.py index 91fccf1..c247ad6 100644 --- a/ox/web/google.py +++ b/ox/web/google.py @@ -27,13 +27,18 @@ def find(query, max_results=DEFAULT_MAX_RESULTS, timeout=DEFAULT_TIMEOUT): >>> find("The Matrix site:imdb.com", 1)[0][1] u'http://www.imdb.com/title/tt0133093/' """ - url = 'http://google.com/search?q=%s' % quote_plus(query) - data = read_url(url, timeout=timeout) results = [] - data = re.sub('(.*?)', '\\1', data) - for a in re.compile('(.*?).*?(.*?)<\/span>').findall(data): - results.append((strip_tags(decode_html(a[1])), a[0], strip_tags(decode_html(a[2])))) - if len(results) >= max_results: - break + offset = 0 + while len(results) < max_results: + url = 'http://google.com/search?q=%s' % quote_plus(query) + if offset: + url += '&start=%d' % offset + data = read_url(url, timeout=timeout) + data = re.sub('(.*?)', '\\1', data) + for a in re.compile('(.*?).*?(.*?)<\/span>').findall(data): + results.append((strip_tags(decode_html(a[1])), a[0], strip_tags(decode_html(a[2])))) + if len(results) >= max_results: + break + offset += 10 return results