2014-05-14 09:57:11 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# vi:si:et:sw=4:sts=4:ts=4
|
2014-09-02 22:32:44 +00:00
|
|
|
|
2014-05-14 09:57:11 +00:00
|
|
|
|
2014-05-18 23:24:04 +00:00
|
|
|
import stdnum.isbn
|
2014-05-21 00:02:21 +00:00
|
|
|
import ox
|
2014-05-17 14:26:59 +00:00
|
|
|
|
2014-09-02 22:32:44 +00:00
|
|
|
from . import abebooks
|
|
|
|
from . import loc
|
|
|
|
from . import lookupbyisbn
|
|
|
|
from . import openlibrary
|
|
|
|
from . import worldcat
|
|
|
|
from . import google
|
|
|
|
from . import duckduckgo
|
2014-05-14 09:57:11 +00:00
|
|
|
|
2016-01-08 10:22:07 +00:00
|
|
|
from .utils import decode_html_data
|
|
|
|
|
2016-01-05 16:14:57 +00:00
|
|
|
from oml import settings
|
|
|
|
|
2014-05-18 23:24:04 +00:00
|
|
|
import logging
|
2015-11-29 14:56:38 +00:00
|
|
|
logger = logging.getLogger(__name__)
|
2014-05-18 23:24:04 +00:00
|
|
|
|
|
|
|
|
2014-05-14 09:57:11 +00:00
|
|
|
providers = [
|
|
|
|
('openlibrary', 'olid'),
|
|
|
|
('loc', 'lccn'),
|
|
|
|
('worldcat', 'oclc'),
|
2015-11-03 22:36:19 +00:00
|
|
|
('worldcat', 'isbn'),
|
2014-05-14 09:57:11 +00:00
|
|
|
('lookupbyisbn', 'asin'),
|
2015-03-09 08:30:46 +00:00
|
|
|
('lookupbyisbn', 'isbn'),
|
2014-05-21 00:02:21 +00:00
|
|
|
('abebooks', 'isbn')
|
2014-05-14 09:57:11 +00:00
|
|
|
]
|
|
|
|
|
2016-02-02 19:30:40 +00:00
|
|
|
def find(title=None, author=None):
|
|
|
|
results = google.find(title=title, author=author)
|
|
|
|
#results = duckduckgo.find(query)
|
2014-05-16 08:06:11 +00:00
|
|
|
'''
|
2014-05-21 00:02:21 +00:00
|
|
|
results = openlibrary.find(query)
|
2014-05-14 18:46:31 +00:00
|
|
|
for r in results:
|
2014-05-21 00:02:21 +00:00
|
|
|
r['primaryid'] = 'olid'
|
2014-05-16 08:06:11 +00:00
|
|
|
'''
|
2014-05-14 18:46:31 +00:00
|
|
|
return results
|
2014-05-14 09:57:11 +00:00
|
|
|
|
2015-11-03 22:36:19 +00:00
|
|
|
def lookup_provider(arg):
|
|
|
|
provider, id, ids, key, value = arg
|
|
|
|
values = set()
|
|
|
|
for key, value in ids:
|
|
|
|
if key == id or provider in ('openlibrary', ):
|
|
|
|
for kv in globals()[provider].get_ids(key, value):
|
|
|
|
values.add(kv)
|
|
|
|
return values
|
|
|
|
|
2014-05-14 09:57:11 +00:00
|
|
|
def lookup(key, value):
|
2014-05-18 23:24:04 +00:00
|
|
|
if not isvalid_id(key, value):
|
|
|
|
return {}
|
2016-01-07 10:12:48 +00:00
|
|
|
if key == 'isbn':
|
2016-01-23 12:36:40 +00:00
|
|
|
try:
|
2016-02-02 19:30:40 +00:00
|
|
|
data = google.info(value)
|
2016-01-23 12:36:40 +00:00
|
|
|
except:
|
|
|
|
logger.debug('google.info failed %s=%s', key, value, exc_info=True)
|
|
|
|
data = {}
|
2016-01-07 10:12:48 +00:00
|
|
|
else:
|
|
|
|
data = {key: [value]}
|
|
|
|
ids = set([(key, value)])
|
|
|
|
provider_data = {}
|
|
|
|
done = False
|
2015-11-03 22:36:19 +00:00
|
|
|
|
2016-01-07 10:12:48 +00:00
|
|
|
while not done:
|
|
|
|
done = True
|
|
|
|
for provider, id in providers:
|
|
|
|
result = lookup_provider((provider, id, ids, key, value))
|
|
|
|
done = not result - ids
|
|
|
|
ids.update(result)
|
|
|
|
logger.debug('FIXME: sort ids')
|
|
|
|
ids = sorted(ids, key=lambda i: ox.sort_string(''.join(i)))
|
|
|
|
logger.debug('IDS %s', ids)
|
|
|
|
for k, v in ids:
|
|
|
|
for provider, id in providers:
|
|
|
|
if id == k:
|
|
|
|
if provider not in provider_data:
|
|
|
|
provider_data[provider] = {}
|
|
|
|
for k_, v_ in globals()[provider].lookup(v).items():
|
|
|
|
if k_ not in provider_data[provider]:
|
|
|
|
provider_data[provider][k_] = v_
|
|
|
|
for provider in sorted(
|
|
|
|
list(provider_data.keys()),
|
|
|
|
key=lambda x: -len(provider_data[x])
|
|
|
|
):
|
|
|
|
logger.debug('%s %s %s', provider, len(provider_data[provider]), list(provider_data[provider].keys()))
|
|
|
|
for k_, v_ in provider_data[provider].items():
|
|
|
|
if not k_ in data:
|
|
|
|
data[k_] = v_
|
|
|
|
for k, v in ids:
|
|
|
|
if k not in data:
|
|
|
|
data[k] = []
|
|
|
|
if v not in data[k]:
|
|
|
|
data[k].append(v)
|
2016-01-05 16:14:57 +00:00
|
|
|
for key in [k['id'] for k in settings.config['itemKeys'] if isinstance(k['type'], list)]:
|
|
|
|
if key in data and not isinstance(data[key], list):
|
|
|
|
data[key] = [data[key]]
|
2016-01-08 10:22:07 +00:00
|
|
|
data = decode_html_data(data)
|
2014-05-14 09:57:11 +00:00
|
|
|
return data
|
|
|
|
|
2014-05-18 23:24:04 +00:00
|
|
|
def isvalid_id(key, value):
|
2014-05-21 00:02:21 +00:00
|
|
|
if key == 'isbn':
|
|
|
|
if len(value) not in (10, 13) or not stdnum.isbn.is_valid(value):
|
2014-05-18 23:24:04 +00:00
|
|
|
return False
|
|
|
|
if key == 'asin' and len(value) != 10:
|
|
|
|
return False
|
|
|
|
if key == 'olid' and not (value.startswith('OL') and value.endswith('M')):
|
|
|
|
return False
|
|
|
|
return True
|
2014-05-14 09:57:11 +00:00
|
|
|
|