use old google api

This commit is contained in:
j 2016-02-03 01:00:40 +05:30
parent 1412cb4e39
commit 404842f849
4 changed files with 119 additions and 20 deletions

View file

@ -242,11 +242,7 @@ def findMetadata(data):
if r:
response['items'].append(r)
elif key == 'author,title':
value = ' '.join(data.values())
for isbn in meta.find(value):
r = meta.lookup('isbn', isbn)
if r:
response['items'].append(r)
response['items'] = meta.find(**data)
elif key == 'id':
import user.models
items = {}

View file

@ -101,6 +101,7 @@ def run_scan():
return
position += 1
with db.session():
if os.path.exists(f):
id = media.get_id(f)
file = File.get(id)
if not file:

View file

@ -31,9 +31,9 @@ providers = [
('abebooks', 'isbn')
]
def find(query):
#results = google.find(query)
results = duckduckgo.find(query)
def find(title=None, author=None):
results = google.find(title=title, author=author)
#results = duckduckgo.find(query)
'''
results = openlibrary.find(query)
for r in results:
@ -55,7 +55,7 @@ def lookup(key, value):
return {}
if key == 'isbn':
try:
data = google.info(key, value)
data = google.info(value)
except:
logger.debug('google.info failed %s=%s', key, value, exc_info=True)
data = {}

View file

@ -2,10 +2,14 @@
# vi:si:et:sw=4:sts=4:ts=4
from time import time, sleep
from urllib.parse import urlencode
import re
from functools import partial
from ox.cache import get_json, store
from ox.cache import get_json, store, read_url
import ox.web.google
import stdnum.isbn
from lxml import etree
from .utils import find_isbns, get_language, decode_html_data, to_isbn13
import settings
@ -13,8 +17,16 @@ import settings
import logging
logger = logging.getLogger(__name__)
NAMESPACES = {
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
'atom' : 'http://www.w3.org/2005/Atom',
'dc' : 'http://purl.org/dc/terms',
'gbs' : 'http://schemas.google.com/books/2008',
'gd' : 'http://schemas.google.com/g/2005'
}
XPath = partial(etree.XPath, namespaces=NAMESPACES)
def find(query):
def find_(query):
logger.debug('find %s', query)
query += ' isbn'
isbns = []
@ -37,9 +49,99 @@ def find(query):
done.add(stdnum.isbn.to_isbn10(isbn))
return results
def info(key, value):
if key not in ('isbn', 'lccn', 'oclc'):
raise IOError('unknwon key %s' % key)
def parse_entry(entry_):
entry_id = XPath('descendant::atom:id')
creator = XPath('descendant::dc:creator')
date = XPath('descendant::dc:date')
description = XPath('descendant::dc:description')
_format = XPath('descendant::dc:format')
identifier = XPath('descendant::dc:identifier')
language = XPath('descendant::dc:language')
publisher = XPath('descendant::dc:publisher')
subject = XPath('descendant::dc:subject')
title = XPath('descendant::dc:title')
viewability = XPath('descendant::gbs:viewability')
id_url = entry_id(entry_)[0].text
_id = id_url.split('/')[-1]
info = {}
info['title'] = ': '.join([x.text for x in title(entry_)]).strip()
authors = [x.text.strip() for x in creator(entry_) if x.text]
if authors:
info['author'] = authors
info['description'] = '\n\n'.join([x.text for x in description(entry_)]).strip()
info['date'] = ''.join([x.text for x in date(entry_)]).strip()
info['categories'] = [x.text for x in subject(entry_)]
info['publisher'] = [x.text for x in publisher(entry_)]
info['language'] = [get_language(x.text) for x in language(entry_)]
v = viewability(entry_)
if v and v[0].attrib.get('value') != 'http://schemas.google.com/books/2008#view_no_pages':
info['cover'] = 'https://books.google.com/books/content/images/frontcover/%s?fife=w600-rw' % _id
format_ = ''.join([x.text for x in _format(entry_)])
if format_:
pages = re.compile('\d+').findall(format_)
if pages:
info['pages'] = int(pages[0])
for x in identifier(entry_):
t = str(x.text).strip()
if t[:5].upper() == 'ISBN:':
t = to_isbn13(t[5:])
if t:
info['isbn'] = t
break
info = decode_html_data(info)
return info
def find(title=None, author=None):
'''
parts = []
if title:
parts.append(' '.join(['intitle:%s' % p for p in title.split(' ')]))
if author:
parts.append(' '.join(['inauthor:%s' % p for p in author.split(' ')]))
q = '+'.join(parts)
'''
q = ''
if title:
q += title + ' '
if author:
q += author
url = 'http://books.google.com/books/feeds/volumes?' + urlencode({
'q': q.strip(),
'max-results': 20,
'start-index':1,
'min-viewability':'none',
})
print(url)
data = read_url(url)
feed = etree.fromstring(data,
parser=etree.XMLParser(recover=True, no_network=True))
results = []
isbns = set()
for entry_ in XPath('//atom:entry')(feed):
info = parse_entry(entry_)
if 'isbn' in info and not 'isbn' in isbns:
results.append(info)
isbns.add(info['isbn'])
return results
def info(isbn):
url = 'http://books.google.com/books/feeds/volumes?' + urlencode({
'q': 'isnb:' + isbn,
'max-results':1,
'start-index':1,
'min-viewability':'none',
})
data = read_url(url)
feed = etree.fromstring(data,
parser=etree.XMLParser(recover=True, no_network=True))
for entry_ in XPath('//atom:entry')(feed):
info = parse_entry(entry_)
info['isbn'] = isbn
return info
return {}
def info_newapi(value):
key = 'isbn'
url = 'https://www.googleapis.com/books/v1/volumes?q=%s:%s' % (key, value)
api_key = settings.server.get('google_api_key')
if api_key: