From 98ab0e29db397257efb62b374ad663041505f406 Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Sun, 8 Sep 2013 15:56:57 +0200
Subject: [PATCH] support returning more than 10 results
---
ox/web/google.py | 19 ++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/ox/web/google.py b/ox/web/google.py
index 91fccf1..c247ad6 100644
--- a/ox/web/google.py
+++ b/ox/web/google.py
@@ -27,13 +27,18 @@ def find(query, max_results=DEFAULT_MAX_RESULTS, timeout=DEFAULT_TIMEOUT):
>>> find("The Matrix site:imdb.com", 1)[0][1]
u'http://www.imdb.com/title/tt0133093/'
"""
- url = 'http://google.com/search?q=%s' % quote_plus(query)
- data = read_url(url, timeout=timeout)
results = []
- data = re.sub('(.*?)', '\\1', data)
- for a in re.compile('(.*?).*?(.*?)<\/span>').findall(data):
- results.append((strip_tags(decode_html(a[1])), a[0], strip_tags(decode_html(a[2]))))
- if len(results) >= max_results:
- break
+ offset = 0
+ while len(results) < max_results:
+ url = 'http://google.com/search?q=%s' % quote_plus(query)
+ if offset:
+ url += '&start=%d' % offset
+ data = read_url(url, timeout=timeout)
+ data = re.sub('(.*?)', '\\1', data)
+ for a in re.compile('(.*?).*?(.*?)<\/span>').findall(data):
+ results.append((strip_tags(decode_html(a[1])), a[0], strip_tags(decode_html(a[2]))))
+ if len(results) >= max_results:
+ break
+ offset += 10
return results