# -*- coding: utf-8 -*- # vi:si:et:sw=4:sts=4:ts=4 from time import time, sleep from ox.cache import get_json, store import ox.web.google import stdnum.isbn from .utils import find_isbns, get_language, decode_html_data, to_isbn13 import logging logger = logging.getLogger(__name__) def find(query): logger.debug('find %s', query) query += ' isbn' isbns = [] for r in ox.web.google.find(query): isbns += find_isbns(' '.join(r)) logger.debug('isbns', isbns) results = [] done = set() for isbn in isbns: if isbn not in done: r = { 'isbn': isbn, 'primaryid': ['isbn', isbn] } results.append(r) done.add(isbn) if len(isbn) == 10: done.add(stdnum.isbn.to_isbn13(isbn)) if len(isbn) == 13 and isbn.startswith('978'): done.add(stdnum.isbn.to_isbn10(isbn)) return results def info(key, value): if key not in ('isbn', 'lccn', 'oclc'): raise IOError('unknwon key %s' % key) url = 'https://www.googleapis.com/books/v1/volumes?q=%s:%s' % (key, value) if api_limit.error: raise IOError(url) while not api_limit.consume(1): logger.debug('hitting google api to fast, waiting 1 second') sleep(1) r = get_json(url, timeout=-1) if 'error' in r: logger.debug('got google api error, dont call for 10 minutes') store.delete(url) api_limit.error = True raise IOError(url, r) if not 'items' in r: logger.debug('unknown %s: %s [%s]', key, value, r) return {} _data = r['items'][0]['volumeInfo'] _id = r['items'][0]['id'] data = {} for key in [ 'authors', 'categories', 'description', 'pageCount', 'publishedDate', 'publisher', 'title', ]: if key in _data: data[{ 'authors': 'author', 'pageCount': 'pages', 'publishedDate': 'date', }.get(key,key)] = _data[key] if 'subtitle' in _data and _data['subtitle'].strip(): data['title'] = '{title}: {subtitle}'.format(**_data) if r['items'][0]['accessInfo']['viewability'] != 'NO_PAGES': #data['cover'] = 'https://books.google.com/books?id=%s&pg=PP1&img=1&zoom=0&hl=en' % _id data['cover'] = 'https://books.google.com/books/content/images/frontcover/%s?fife=w600-rw' % _id elif 'imageLinks' in _data: for size in ('extraLarge', 'large', 'medium', 'small', 'thumbnail', 'smallThumbnail'): if size in _data['imageLinks']: data['cover'] = _data['imageLinks'][size] break if 'industryIdentifiers' in _data: for k in _data['industryIdentifiers']: if k['type'].startswith('ISBN'): if not 'isbn' in data: data['isbn'] = [] data['isbn'].append(k['identifier']) else: logger.debug('unknown identifier %s', k) if 'isbn' in data: data['isbn'] = [to_isbn13(i) for i in data['isbn']][0] if 'publisher' in data and isinstance(data['publisher'], str): data['publisher'] = [data['publisher']] if 'language' in _data: data['language'] = [get_language(_data['language'])] data = decode_html_data(data) return data class Limit(object): _error = False def __init__(self, fill_rate, capacity): self.timestamp = time() self.fill_rate = fill_rate self.capacity = capacity self._tokens = capacity def consume(self, tokens): if tokens <= self.tokens: self._tokens -= tokens else: return False return True def get_tokens(self): now = time() if self._tokens < self.capacity: delta = self.fill_rate * (now - self.timestamp) self._tokens = min(self.capacity, self._tokens + delta) self.timestamp = now return self._tokens tokens = property(get_tokens) def get_error(self): if self._error and self._error < (time() - 10*60): self._error = False return self._error != False def set_error(self, value): self._error = time() error = property(get_error, set_error) api_limit = Limit(fill_rate=0.5, capacity=25)