use old google api
This commit is contained in:
parent
1412cb4e39
commit
404842f849
4 changed files with 119 additions and 20 deletions
|
@ -242,11 +242,7 @@ def findMetadata(data):
|
|||
if r:
|
||||
response['items'].append(r)
|
||||
elif key == 'author,title':
|
||||
value = ' '.join(data.values())
|
||||
for isbn in meta.find(value):
|
||||
r = meta.lookup('isbn', isbn)
|
||||
if r:
|
||||
response['items'].append(r)
|
||||
response['items'] = meta.find(**data)
|
||||
elif key == 'id':
|
||||
import user.models
|
||||
items = {}
|
||||
|
|
|
@ -101,6 +101,7 @@ def run_scan():
|
|||
return
|
||||
position += 1
|
||||
with db.session():
|
||||
if os.path.exists(f):
|
||||
id = media.get_id(f)
|
||||
file = File.get(id)
|
||||
if not file:
|
||||
|
|
|
@ -31,9 +31,9 @@ providers = [
|
|||
('abebooks', 'isbn')
|
||||
]
|
||||
|
||||
def find(query):
|
||||
#results = google.find(query)
|
||||
results = duckduckgo.find(query)
|
||||
def find(title=None, author=None):
|
||||
results = google.find(title=title, author=author)
|
||||
#results = duckduckgo.find(query)
|
||||
'''
|
||||
results = openlibrary.find(query)
|
||||
for r in results:
|
||||
|
@ -55,7 +55,7 @@ def lookup(key, value):
|
|||
return {}
|
||||
if key == 'isbn':
|
||||
try:
|
||||
data = google.info(key, value)
|
||||
data = google.info(value)
|
||||
except:
|
||||
logger.debug('google.info failed %s=%s', key, value, exc_info=True)
|
||||
data = {}
|
||||
|
|
|
@ -2,10 +2,14 @@
|
|||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
|
||||
from time import time, sleep
|
||||
from urllib.parse import urlencode
|
||||
import re
|
||||
from functools import partial
|
||||
|
||||
from ox.cache import get_json, store
|
||||
from ox.cache import get_json, store, read_url
|
||||
import ox.web.google
|
||||
import stdnum.isbn
|
||||
from lxml import etree
|
||||
|
||||
from .utils import find_isbns, get_language, decode_html_data, to_isbn13
|
||||
import settings
|
||||
|
@ -13,8 +17,16 @@ import settings
|
|||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
NAMESPACES = {
|
||||
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
|
||||
'atom' : 'http://www.w3.org/2005/Atom',
|
||||
'dc' : 'http://purl.org/dc/terms',
|
||||
'gbs' : 'http://schemas.google.com/books/2008',
|
||||
'gd' : 'http://schemas.google.com/g/2005'
|
||||
}
|
||||
XPath = partial(etree.XPath, namespaces=NAMESPACES)
|
||||
|
||||
def find(query):
|
||||
def find_(query):
|
||||
logger.debug('find %s', query)
|
||||
query += ' isbn'
|
||||
isbns = []
|
||||
|
@ -37,9 +49,99 @@ def find(query):
|
|||
done.add(stdnum.isbn.to_isbn10(isbn))
|
||||
return results
|
||||
|
||||
def info(key, value):
|
||||
if key not in ('isbn', 'lccn', 'oclc'):
|
||||
raise IOError('unknwon key %s' % key)
|
||||
def parse_entry(entry_):
|
||||
entry_id = XPath('descendant::atom:id')
|
||||
creator = XPath('descendant::dc:creator')
|
||||
date = XPath('descendant::dc:date')
|
||||
description = XPath('descendant::dc:description')
|
||||
_format = XPath('descendant::dc:format')
|
||||
identifier = XPath('descendant::dc:identifier')
|
||||
language = XPath('descendant::dc:language')
|
||||
publisher = XPath('descendant::dc:publisher')
|
||||
subject = XPath('descendant::dc:subject')
|
||||
title = XPath('descendant::dc:title')
|
||||
viewability = XPath('descendant::gbs:viewability')
|
||||
id_url = entry_id(entry_)[0].text
|
||||
_id = id_url.split('/')[-1]
|
||||
info = {}
|
||||
info['title'] = ': '.join([x.text for x in title(entry_)]).strip()
|
||||
authors = [x.text.strip() for x in creator(entry_) if x.text]
|
||||
if authors:
|
||||
info['author'] = authors
|
||||
info['description'] = '\n\n'.join([x.text for x in description(entry_)]).strip()
|
||||
info['date'] = ''.join([x.text for x in date(entry_)]).strip()
|
||||
info['categories'] = [x.text for x in subject(entry_)]
|
||||
info['publisher'] = [x.text for x in publisher(entry_)]
|
||||
info['language'] = [get_language(x.text) for x in language(entry_)]
|
||||
v = viewability(entry_)
|
||||
if v and v[0].attrib.get('value') != 'http://schemas.google.com/books/2008#view_no_pages':
|
||||
info['cover'] = 'https://books.google.com/books/content/images/frontcover/%s?fife=w600-rw' % _id
|
||||
format_ = ''.join([x.text for x in _format(entry_)])
|
||||
if format_:
|
||||
pages = re.compile('\d+').findall(format_)
|
||||
if pages:
|
||||
info['pages'] = int(pages[0])
|
||||
for x in identifier(entry_):
|
||||
t = str(x.text).strip()
|
||||
if t[:5].upper() == 'ISBN:':
|
||||
t = to_isbn13(t[5:])
|
||||
if t:
|
||||
info['isbn'] = t
|
||||
break
|
||||
info = decode_html_data(info)
|
||||
return info
|
||||
|
||||
def find(title=None, author=None):
|
||||
'''
|
||||
parts = []
|
||||
if title:
|
||||
parts.append(' '.join(['intitle:%s' % p for p in title.split(' ')]))
|
||||
if author:
|
||||
parts.append(' '.join(['inauthor:%s' % p for p in author.split(' ')]))
|
||||
q = '+'.join(parts)
|
||||
'''
|
||||
q = ''
|
||||
if title:
|
||||
q += title + ' '
|
||||
if author:
|
||||
q += author
|
||||
url = 'http://books.google.com/books/feeds/volumes?' + urlencode({
|
||||
'q': q.strip(),
|
||||
'max-results': 20,
|
||||
'start-index':1,
|
||||
'min-viewability':'none',
|
||||
})
|
||||
print(url)
|
||||
data = read_url(url)
|
||||
feed = etree.fromstring(data,
|
||||
parser=etree.XMLParser(recover=True, no_network=True))
|
||||
results = []
|
||||
isbns = set()
|
||||
for entry_ in XPath('//atom:entry')(feed):
|
||||
info = parse_entry(entry_)
|
||||
if 'isbn' in info and not 'isbn' in isbns:
|
||||
results.append(info)
|
||||
isbns.add(info['isbn'])
|
||||
return results
|
||||
|
||||
def info(isbn):
|
||||
url = 'http://books.google.com/books/feeds/volumes?' + urlencode({
|
||||
'q': 'isnb:' + isbn,
|
||||
'max-results':1,
|
||||
'start-index':1,
|
||||
'min-viewability':'none',
|
||||
})
|
||||
data = read_url(url)
|
||||
feed = etree.fromstring(data,
|
||||
parser=etree.XMLParser(recover=True, no_network=True))
|
||||
for entry_ in XPath('//atom:entry')(feed):
|
||||
info = parse_entry(entry_)
|
||||
info['isbn'] = isbn
|
||||
return info
|
||||
return {}
|
||||
|
||||
def info_newapi(value):
|
||||
key = 'isbn'
|
||||
url = 'https://www.googleapis.com/books/v1/volumes?q=%s:%s' % (key, value)
|
||||
api_key = settings.server.get('google_api_key')
|
||||
if api_key:
|
||||
|
|
Loading…
Reference in a new issue