openmedialibrary/oml/meta/__init__.py

89 lines
2.4 KiB
Python
Raw Normal View History

2014-05-14 09:57:11 +00:00
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import division
2014-05-18 23:24:04 +00:00
import stdnum.isbn
2014-05-21 00:02:21 +00:00
import ox
2014-05-17 14:26:59 +00:00
2014-05-14 09:57:11 +00:00
import abebooks
import loc
import lookupbyisbn
import openlibrary
import worldcat
2014-05-16 08:06:11 +00:00
import google
2014-05-17 09:19:32 +00:00
import duckduckgo
2014-05-14 09:57:11 +00:00
2014-05-18 23:24:04 +00:00
import logging
logger = logging.getLogger('meta')
2014-05-14 09:57:11 +00:00
providers = [
('openlibrary', 'olid'),
('loc', 'lccn'),
('worldcat', 'oclc'),
('lookupbyisbn', 'asin'),
2014-05-21 00:02:21 +00:00
('abebooks', 'isbn')
2014-05-14 09:57:11 +00:00
]
2014-05-21 00:02:21 +00:00
def find(query):
#results = google.find(query)
results = duckduckgo.find(query)
2014-05-16 08:06:11 +00:00
'''
2014-05-21 00:02:21 +00:00
results = openlibrary.find(query)
2014-05-14 18:46:31 +00:00
for r in results:
2014-05-21 00:02:21 +00:00
r['primaryid'] = 'olid'
2014-05-16 08:06:11 +00:00
'''
2014-05-14 18:46:31 +00:00
return results
2014-05-14 09:57:11 +00:00
def lookup(key, value):
2014-05-18 23:24:04 +00:00
if not isvalid_id(key, value):
return {}
2014-05-21 00:02:21 +00:00
data = {key: [value]}
2014-05-14 09:57:11 +00:00
ids = [(key, value)]
provider_data = {}
done = False
while not done:
done = True
for provider, id in providers:
for key, value in ids:
for kv in globals()[provider].get_ids(key, value):
if not kv in ids:
ids.append(kv)
done = False
2014-05-21 00:02:21 +00:00
logger.debug('FIXME: sort ids')
ids.sort(key=lambda i: ox.sort_string(u''.join(i)))
logger.debug('IDS %s', ids)
2014-05-14 09:57:11 +00:00
for k, v in ids:
for provider, id in providers:
2014-05-21 00:02:21 +00:00
if id == k:
if provider not in provider_data:
provider_data[provider] = {}
for k_, v_ in globals()[provider].lookup(v).iteritems():
if k_ not in provider_data[provider]:
provider_data[provider][k_] = v_
2014-05-14 09:57:11 +00:00
for provider in sorted(
provider_data.keys(),
key=lambda x: -len(provider_data[x])
):
2014-05-17 14:26:59 +00:00
logger.debug('%s %s %s', provider, len(provider_data[provider]), provider_data[provider].keys())
2014-05-14 09:57:11 +00:00
for k_, v_ in provider_data[provider].iteritems():
if not k_ in data:
data[k_] = v_
2014-05-21 00:02:21 +00:00
for k, v in ids:
if k not in data:
data[k] = []
if v not in data[k]:
data[k].append(v)
2014-05-14 09:57:11 +00:00
return data
2014-05-18 23:24:04 +00:00
def isvalid_id(key, value):
2014-05-21 00:02:21 +00:00
if key == 'isbn':
if len(value) not in (10, 13) or not stdnum.isbn.is_valid(value):
2014-05-18 23:24:04 +00:00
return False
if key == 'asin' and len(value) != 10:
return False
if key == 'olid' and not (value.startswith('OL') and value.endswith('M')):
return False
return True
2014-05-14 09:57:11 +00:00