use new google api to find books by title/author
This commit is contained in:
parent
ac84e82a52
commit
70beacd848
1 changed files with 46 additions and 54 deletions
|
@ -17,7 +17,7 @@ import logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
NAMESPACES = {
|
NAMESPACES = {
|
||||||
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
|
'openSearch': 'http://a9.com/-/spec/opensearchrss/1.0/',
|
||||||
'atom' : 'http://www.w3.org/2005/Atom',
|
'atom' : 'http://www.w3.org/2005/Atom',
|
||||||
'dc' : 'http://purl.org/dc/terms',
|
'dc' : 'http://purl.org/dc/terms',
|
||||||
'gbs' : 'http://schemas.google.com/books/2008',
|
'gbs' : 'http://schemas.google.com/books/2008',
|
||||||
|
@ -25,29 +25,6 @@ NAMESPACES = {
|
||||||
}
|
}
|
||||||
XPath = partial(etree.XPath, namespaces=NAMESPACES)
|
XPath = partial(etree.XPath, namespaces=NAMESPACES)
|
||||||
|
|
||||||
def find_(query):
|
|
||||||
logger.debug('find %s', query)
|
|
||||||
query += ' isbn'
|
|
||||||
isbns = []
|
|
||||||
for r in ox.web.google.find(query):
|
|
||||||
isbns += find_isbns(' '.join(r))
|
|
||||||
logger.debug('isbns', isbns)
|
|
||||||
results = []
|
|
||||||
done = set()
|
|
||||||
for isbn in isbns:
|
|
||||||
if isbn not in done:
|
|
||||||
r = {
|
|
||||||
'isbn': isbn,
|
|
||||||
'primaryid': ['isbn', isbn]
|
|
||||||
}
|
|
||||||
results.append(r)
|
|
||||||
done.add(isbn)
|
|
||||||
if len(isbn) == 10:
|
|
||||||
done.add(stdnum.isbn.to_isbn13(isbn))
|
|
||||||
if len(isbn) == 13 and isbn.startswith('978'):
|
|
||||||
done.add(stdnum.isbn.to_isbn10(isbn))
|
|
||||||
return results
|
|
||||||
|
|
||||||
def parse_entry(entry_):
|
def parse_entry(entry_):
|
||||||
entry_id = XPath('descendant::atom:id')
|
entry_id = XPath('descendant::atom:id')
|
||||||
creator = XPath('descendant::dc:creator')
|
creator = XPath('descendant::dc:creator')
|
||||||
|
@ -90,7 +67,7 @@ def parse_entry(entry_):
|
||||||
info = decode_html_data(info)
|
info = decode_html_data(info)
|
||||||
return info
|
return info
|
||||||
|
|
||||||
def find(title=None, author=None):
|
def find_feeds(title=None, author=None):
|
||||||
'''
|
'''
|
||||||
parts = []
|
parts = []
|
||||||
if title:
|
if title:
|
||||||
|
@ -107,36 +84,48 @@ def find(title=None, author=None):
|
||||||
url = 'http://books.google.com/books/feeds/volumes?' + urlencode({
|
url = 'http://books.google.com/books/feeds/volumes?' + urlencode({
|
||||||
'q': q.strip(),
|
'q': q.strip(),
|
||||||
'max-results': 20,
|
'max-results': 20,
|
||||||
'start-index':1,
|
'start-index': 1,
|
||||||
'min-viewability':'none',
|
'min-viewability': 'none',
|
||||||
})
|
})
|
||||||
data = read_url(url)
|
data = read_url(url)
|
||||||
feed = etree.fromstring(data,
|
feed = etree.fromstring(data, parser=etree.XMLParser(recover=True, no_network=True))
|
||||||
parser=etree.XMLParser(recover=True, no_network=True))
|
|
||||||
results = []
|
results = []
|
||||||
isbns = set()
|
isbns = set()
|
||||||
for entry_ in XPath('//atom:entry')(feed):
|
for entry_ in XPath('//atom:entry')(feed):
|
||||||
info = parse_entry(entry_)
|
entry = parse_entry(entry_)
|
||||||
if 'isbn' in info and not 'isbn' in isbns:
|
if 'isbn' in entry and 'isbn' not in isbns:
|
||||||
results.append(info)
|
results.append(info(entry['isbn']))
|
||||||
isbns.add(info['isbn'])
|
isbns.add(info['isbn'])
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def info_old(isbn):
|
def find(title=None, author=None):
|
||||||
url = 'http://books.google.com/books/feeds/volumes?' + urlencode({
|
q = ''
|
||||||
'q': 'isnb:' + isbn,
|
if title:
|
||||||
'max-results':1,
|
q += title + ' '
|
||||||
'start-index':1,
|
if author:
|
||||||
'min-viewability':'none',
|
q += author
|
||||||
})
|
url = 'https://www.googleapis.com/books/v1/volumes?q=%s' % q
|
||||||
data = read_url(url)
|
api_key = settings.server.get('google_api_key')
|
||||||
feed = etree.fromstring(data,
|
if api_key:
|
||||||
parser=etree.XMLParser(recover=True, no_network=True))
|
url += '&key=' + api_key
|
||||||
for entry_ in XPath('//atom:entry')(feed):
|
if api_limit.error:
|
||||||
info = parse_entry(entry_)
|
raise IOError(url)
|
||||||
info['isbn'] = isbn
|
while not api_limit.consume(1):
|
||||||
return info
|
logger.debug('hitting google api to fast, waiting 1 second')
|
||||||
return {}
|
sleep(1)
|
||||||
|
r = get_json(url, timeout=-1)
|
||||||
|
if 'error' in r:
|
||||||
|
logger.debug('got google api error, dont call for 10 minutes')
|
||||||
|
store.delete(url)
|
||||||
|
api_limit.error = True
|
||||||
|
raise IOError(url, r)
|
||||||
|
if 'items' not in r:
|
||||||
|
logger.debug('unknown %s: %s [%s]', key, value, r)
|
||||||
|
return []
|
||||||
|
results = []
|
||||||
|
for item in r['items']:
|
||||||
|
results.append(parse_item(item))
|
||||||
|
return results
|
||||||
|
|
||||||
def info(value):
|
def info(value):
|
||||||
key = 'isbn'
|
key = 'isbn'
|
||||||
|
@ -155,11 +144,14 @@ def info(value):
|
||||||
store.delete(url)
|
store.delete(url)
|
||||||
api_limit.error = True
|
api_limit.error = True
|
||||||
raise IOError(url, r)
|
raise IOError(url, r)
|
||||||
if not 'items' in r:
|
if 'items' not in r:
|
||||||
logger.debug('unknown %s: %s [%s]', key, value, r)
|
logger.debug('unknown %s: %s [%s]', key, value, r)
|
||||||
return {}
|
return {}
|
||||||
_data = r['items'][0]['volumeInfo']
|
return parse_item(r['items'][0])
|
||||||
_id = r['items'][0]['id']
|
|
||||||
|
def parse_item(item):
|
||||||
|
_data = item['volumeInfo']
|
||||||
|
_id = item['id']
|
||||||
data = {}
|
data = {}
|
||||||
for key in [
|
for key in [
|
||||||
'authors',
|
'authors',
|
||||||
|
@ -175,11 +167,11 @@ def info(value):
|
||||||
'authors': 'author',
|
'authors': 'author',
|
||||||
'pageCount': 'pages',
|
'pageCount': 'pages',
|
||||||
'publishedDate': 'date',
|
'publishedDate': 'date',
|
||||||
}.get(key,key)] = _data[key]
|
}.get(key, key)] = _data[key]
|
||||||
|
|
||||||
if 'subtitle' in _data and _data['subtitle'].strip():
|
if 'subtitle' in _data and _data['subtitle'].strip():
|
||||||
data['title'] = '{title}: {subtitle}'.format(**_data)
|
data['title'] = '{title}: {subtitle}'.format(**_data)
|
||||||
if r['items'][0]['accessInfo']['viewability'] != 'NO_PAGES':
|
if item['accessInfo']['viewability'] != 'NO_PAGES':
|
||||||
#data['cover'] = 'https://books.google.com/books?id=%s&pg=PP1&img=1&zoom=0&hl=en' % _id
|
#data['cover'] = 'https://books.google.com/books?id=%s&pg=PP1&img=1&zoom=0&hl=en' % _id
|
||||||
data['cover'] = 'https://books.google.com/books/content/images/frontcover/%s?fife=w600-rw' % _id
|
data['cover'] = 'https://books.google.com/books/content/images/frontcover/%s?fife=w600-rw' % _id
|
||||||
|
|
||||||
|
@ -191,7 +183,7 @@ def info(value):
|
||||||
if 'industryIdentifiers' in _data:
|
if 'industryIdentifiers' in _data:
|
||||||
for k in _data['industryIdentifiers']:
|
for k in _data['industryIdentifiers']:
|
||||||
if k['type'].startswith('ISBN'):
|
if k['type'].startswith('ISBN'):
|
||||||
if not 'isbn' in data:
|
if 'isbn' not in data:
|
||||||
data['isbn'] = []
|
data['isbn'] = []
|
||||||
data['isbn'].append(k['identifier'])
|
data['isbn'].append(k['identifier'])
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Reference in a new issue