rate limit google requests

This commit is contained in:
j 2016-01-23 18:06:40 +05:30
parent 609ff07214
commit 216fd0c232
2 changed files with 34 additions and 1 deletions

View File

@ -59,7 +59,11 @@ def lookup(key, value):
return oml.metaremote.lookup(key, value)
'''
if key == 'isbn':
data = google.info(key, value)
try:
data = google.info(key, value)
except:
logger.debug('google.info failed %s=%s', key, value, exc_info=True)
data = {}
else:
data = {key: [value]}
ids = set([(key, value)])

View File

@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from time import time, sleep
from ox.cache import get_json, store
import ox.web.google
@ -39,6 +40,9 @@ def info(key, value):
if key not in ('isbn', 'lccn', 'oclc'):
raise IOError('unknwon key %s' % key)
url = 'https://www.googleapis.com/books/v1/volumes?q=%s:%s' % (key, value)
while not api_limit.consume(1):
logger.debug('hitting google api to fast, waiting 1 second')
sleep(1)
r = get_json(url, timeout=-1)
if 'error' in r:
store.delete(url)
@ -94,3 +98,28 @@ def info(key, value):
data = decode_html_data(data)
return data
class Limit(object):
def __init__(self, fill_rate, capacity):
self.timestamp = time()
self.fill_rate = fill_rate
self.capacity = capacity
self._tokens = capacity
def consume(self, tokens):
if tokens <= self.tokens:
self._tokens -= tokens
else:
return False
return True
def get_tokens(self):
now = time()
if self._tokens < self.capacity:
delta = self.fill_rate * (now - self.timestamp)
self._tokens = min(self.capacity, self._tokens + delta)
self.timestamp = now
return self._tokens
tokens = property(get_tokens)
api_limit = Limit(fill_rate=1, capacity=10)