_find() method (using ajax api) added to google module

This commit is contained in:
Rolux 2009-07-04 12:37:25 +02:00
parent e01af3fdd7
commit bb8db87d02
1 changed files with 26 additions and 19 deletions

View File

@ -1,13 +1,15 @@
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import re
import time
import urllib
import urllib2
import weakref
import threading
import Queue
import re
import time
import urllib
import urllib2
import weakref
import threading
import Queue
import simplejson
import oxlib
from oxlib import stripTags
@ -25,26 +27,31 @@ google.find(query, max_results)
FIXME: how search depper than first page?
'''
DEFAULT_MAX_RESULTS = 10
DEFAULT_MAX_RESULTS = 10
def getUrl(url, data=None, headers=oxlib.net.DEFAULT_HEADERS):
google_timeout=24*60*60
return oxlib.cache.getUrl(url, data, headers, google_timeout)
def quote_plus(s):
return urllib.quote_plus(s.encode('utf-8'))
def quote_plus(s):
return urllib.quote_plus(s.encode('utf-8'))
def find(query, max_results=DEFAULT_MAX_RESULTS):
url = "http://www.google.com/search?q=%s" % quote_plus(query)
data = getUrl(url)
link_re = r'<a href="(?P<url>[^"]*?)" class=l.*?>(?P<name>.*?)</a>' + \
r'.*?(?:<br>|<table.*?>)' + \
r'(?P<desc>.*?)' + '(?:<font color=#008000>|<a)'
results = []
for match in re.compile(link_re, re.DOTALL).finditer(data):
(name, url, desc) = match.group('name', 'url', 'desc')
link_re = r'<a href="(?P<url>[^"]*?)" class=l.*?>(?P<name>.*?)</a>' + \
r'.*?(?:<br>|<table.*?>)' + \
r'(?P<desc>.*?)' + '(?:<font color=#008000>|<a)'
results = []
for match in re.compile(link_re, re.DOTALL).finditer(data):
(name, url, desc) = match.group('name', 'url', 'desc')
results.append((stripTags(name), url, stripTags(desc)))
if len(results) > max_results:
results = results[:max_results]
results = results[:max_results]
return results
def _find(query):
url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=%s' % quote_plus(query)
results = simplejson.loads(getUrlUnicode(url))['responseData']['results']
return results