use new google api to find books by title/author

This commit is contained in:
j 2019-02-01 15:33:06 +05:30
parent ac84e82a52
commit 70beacd848
1 changed files with 46 additions and 54 deletions

View File

@ -17,7 +17,7 @@ import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
NAMESPACES = { NAMESPACES = {
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/', 'openSearch': 'http://a9.com/-/spec/opensearchrss/1.0/',
'atom' : 'http://www.w3.org/2005/Atom', 'atom' : 'http://www.w3.org/2005/Atom',
'dc' : 'http://purl.org/dc/terms', 'dc' : 'http://purl.org/dc/terms',
'gbs' : 'http://schemas.google.com/books/2008', 'gbs' : 'http://schemas.google.com/books/2008',
@ -25,29 +25,6 @@ NAMESPACES = {
} }
XPath = partial(etree.XPath, namespaces=NAMESPACES) XPath = partial(etree.XPath, namespaces=NAMESPACES)
def find_(query):
logger.debug('find %s', query)
query += ' isbn'
isbns = []
for r in ox.web.google.find(query):
isbns += find_isbns(' '.join(r))
logger.debug('isbns', isbns)
results = []
done = set()
for isbn in isbns:
if isbn not in done:
r = {
'isbn': isbn,
'primaryid': ['isbn', isbn]
}
results.append(r)
done.add(isbn)
if len(isbn) == 10:
done.add(stdnum.isbn.to_isbn13(isbn))
if len(isbn) == 13 and isbn.startswith('978'):
done.add(stdnum.isbn.to_isbn10(isbn))
return results
def parse_entry(entry_): def parse_entry(entry_):
entry_id = XPath('descendant::atom:id') entry_id = XPath('descendant::atom:id')
creator = XPath('descendant::dc:creator') creator = XPath('descendant::dc:creator')
@ -90,7 +67,7 @@ def parse_entry(entry_):
info = decode_html_data(info) info = decode_html_data(info)
return info return info
def find(title=None, author=None): def find_feeds(title=None, author=None):
''' '''
parts = [] parts = []
if title: if title:
@ -107,36 +84,48 @@ def find(title=None, author=None):
url = 'http://books.google.com/books/feeds/volumes?' + urlencode({ url = 'http://books.google.com/books/feeds/volumes?' + urlencode({
'q': q.strip(), 'q': q.strip(),
'max-results': 20, 'max-results': 20,
'start-index':1, 'start-index': 1,
'min-viewability':'none', 'min-viewability': 'none',
}) })
data = read_url(url) data = read_url(url)
feed = etree.fromstring(data, feed = etree.fromstring(data, parser=etree.XMLParser(recover=True, no_network=True))
parser=etree.XMLParser(recover=True, no_network=True))
results = [] results = []
isbns = set() isbns = set()
for entry_ in XPath('//atom:entry')(feed): for entry_ in XPath('//atom:entry')(feed):
info = parse_entry(entry_) entry = parse_entry(entry_)
if 'isbn' in info and not 'isbn' in isbns: if 'isbn' in entry and 'isbn' not in isbns:
results.append(info) results.append(info(entry['isbn']))
isbns.add(info['isbn']) isbns.add(info['isbn'])
return results return results
def info_old(isbn): def find(title=None, author=None):
url = 'http://books.google.com/books/feeds/volumes?' + urlencode({ q = ''
'q': 'isnb:' + isbn, if title:
'max-results':1, q += title + ' '
'start-index':1, if author:
'min-viewability':'none', q += author
}) url = 'https://www.googleapis.com/books/v1/volumes?q=%s' % q
data = read_url(url) api_key = settings.server.get('google_api_key')
feed = etree.fromstring(data, if api_key:
parser=etree.XMLParser(recover=True, no_network=True)) url += '&key=' + api_key
for entry_ in XPath('//atom:entry')(feed): if api_limit.error:
info = parse_entry(entry_) raise IOError(url)
info['isbn'] = isbn while not api_limit.consume(1):
return info logger.debug('hitting google api to fast, waiting 1 second')
return {} sleep(1)
r = get_json(url, timeout=-1)
if 'error' in r:
logger.debug('got google api error, dont call for 10 minutes')
store.delete(url)
api_limit.error = True
raise IOError(url, r)
if 'items' not in r:
logger.debug('unknown %s: %s [%s]', key, value, r)
return []
results = []
for item in r['items']:
results.append(parse_item(item))
return results
def info(value): def info(value):
key = 'isbn' key = 'isbn'
@ -155,11 +144,14 @@ def info(value):
store.delete(url) store.delete(url)
api_limit.error = True api_limit.error = True
raise IOError(url, r) raise IOError(url, r)
if not 'items' in r: if 'items' not in r:
logger.debug('unknown %s: %s [%s]', key, value, r) logger.debug('unknown %s: %s [%s]', key, value, r)
return {} return {}
_data = r['items'][0]['volumeInfo'] return parse_item(r['items'][0])
_id = r['items'][0]['id']
def parse_item(item):
_data = item['volumeInfo']
_id = item['id']
data = {} data = {}
for key in [ for key in [
'authors', 'authors',
@ -169,17 +161,17 @@ def info(value):
'publishedDate', 'publishedDate',
'publisher', 'publisher',
'title', 'title',
]: ]:
if key in _data: if key in _data:
data[{ data[{
'authors': 'author', 'authors': 'author',
'pageCount': 'pages', 'pageCount': 'pages',
'publishedDate': 'date', 'publishedDate': 'date',
}.get(key,key)] = _data[key] }.get(key, key)] = _data[key]
if 'subtitle' in _data and _data['subtitle'].strip(): if 'subtitle' in _data and _data['subtitle'].strip():
data['title'] = '{title}: {subtitle}'.format(**_data) data['title'] = '{title}: {subtitle}'.format(**_data)
if r['items'][0]['accessInfo']['viewability'] != 'NO_PAGES': if item['accessInfo']['viewability'] != 'NO_PAGES':
#data['cover'] = 'https://books.google.com/books?id=%s&pg=PP1&img=1&zoom=0&hl=en' % _id #data['cover'] = 'https://books.google.com/books?id=%s&pg=PP1&img=1&zoom=0&hl=en' % _id
data['cover'] = 'https://books.google.com/books/content/images/frontcover/%s?fife=w600-rw' % _id data['cover'] = 'https://books.google.com/books/content/images/frontcover/%s?fife=w600-rw' % _id
@ -191,7 +183,7 @@ def info(value):
if 'industryIdentifiers' in _data: if 'industryIdentifiers' in _data:
for k in _data['industryIdentifiers']: for k in _data['industryIdentifiers']:
if k['type'].startswith('ISBN'): if k['type'].startswith('ISBN'):
if not 'isbn' in data: if 'isbn' not in data:
data['isbn'] = [] data['isbn'] = []
data['isbn'].append(k['identifier']) data['isbn'].append(k['identifier'])
else: else: