meta
This commit is contained in:
parent
d385853186
commit
0e6b9533b4
12 changed files with 521 additions and 154 deletions
|
|
@ -2,12 +2,60 @@
|
|||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
from urllib import urlencode
|
||||
from ox.cache import read_url
|
||||
import json
|
||||
|
||||
from marc_countries import COUNTRIES
|
||||
from utils import normalize_isbn
|
||||
|
||||
KEYS = {
|
||||
'authors': 'author',
|
||||
'covers': 'cover',
|
||||
'dewey_decimal_class': 'classification',
|
||||
'isbn_10': 'isbn10',
|
||||
'isbn_13': 'isbn13',
|
||||
'languages': 'language',
|
||||
'lccn': 'lccn',
|
||||
'number_of_pages': 'pages',
|
||||
'oclc_numbers': 'oclc',
|
||||
'publish_country': 'country',
|
||||
'publish_date': 'date',
|
||||
'publishers': 'publisher',
|
||||
'publish_places': 'place',
|
||||
'series': 'series',
|
||||
'title': 'title',
|
||||
}
|
||||
|
||||
def find(*args, **kargs):
|
||||
args = [a.replace(':', ' ') for a in args]
|
||||
for k in ('date', 'publisher'):
|
||||
if k in kargs:
|
||||
print 'ignoring %s on openlibrary' % k, kargs[k]
|
||||
del kargs[k]
|
||||
for k, v in kargs.iteritems():
|
||||
key = KEYS.keys()[KEYS.values().index(k)]
|
||||
if v:
|
||||
if not isinstance(v, list):
|
||||
v = [v]
|
||||
#v = ['%s:"%s"' % (key, value.replace(':', '\:')) for value in v]
|
||||
v = ['"%s"' % value.replace(':', ' ') for value in v]
|
||||
args += v
|
||||
query = ' '.join(args)
|
||||
query = query.strip()
|
||||
print 'openlibrary.find', query
|
||||
r = api.search(query)
|
||||
results = []
|
||||
ids = [b for b in r.get('result', []) if b.startswith('/books')]
|
||||
books = api.get_many(ids).get('result', [])
|
||||
for olid, value in books.iteritems():
|
||||
olid = olid.split('/')[-1]
|
||||
book = format(value)
|
||||
book['olid'] = olid
|
||||
results.append(book)
|
||||
return results
|
||||
|
||||
|
||||
def get_ids(key, value):
|
||||
ids = []
|
||||
if key == 'olid':
|
||||
|
|
@ -17,15 +65,13 @@ def get_ids(key, value):
|
|||
for v in data[id]:
|
||||
if (id, v) not in ids:
|
||||
ids.append((id, v))
|
||||
elif key in ('isbn10', 'isbn13'):
|
||||
elif key in ('isbn10', 'isbn13', 'oclc', 'lccn'):
|
||||
print 'openlibraryid.get_ids', key, value
|
||||
r = find('isbn:%s' % value)
|
||||
for d in sorted(r.get('docs', []), key=lambda d: -d['last_modified_i']):
|
||||
if 'edition_key' in d:
|
||||
v = d['edition_key']
|
||||
if isinstance(v, list):
|
||||
v = v[0]
|
||||
for kv in [('olid', v)] + get_ids('olid', v):
|
||||
r = api.things({'type': '/type/edition', key.replace('isbn', 'isbn_'): value})
|
||||
for b in r.get('result', []):
|
||||
if b.startswith('/books'):
|
||||
olid = b.split('/')[-1]
|
||||
for kv in [('olid', olid)] + get_ids('olid', olid):
|
||||
if kv not in ids:
|
||||
ids.append(kv)
|
||||
if ids:
|
||||
|
|
@ -35,38 +81,29 @@ def get_ids(key, value):
|
|||
|
||||
def lookup(id, return_all=False):
|
||||
#print 'openlibrary.lookup', id
|
||||
data = {
|
||||
'olid': id
|
||||
}
|
||||
url = 'https://openlibrary.org/books/%s.json' % id
|
||||
info = json.loads(read_url(url))
|
||||
keys = {
|
||||
'title': 'title',
|
||||
'authors': 'author',
|
||||
'publishers': 'publisher',
|
||||
'languages': 'language',
|
||||
'publish_places': 'place',
|
||||
'publish_country': 'country',
|
||||
'covers': 'cover',
|
||||
'isbn_10': 'isbn10',
|
||||
'isbn_13': 'isbn13',
|
||||
'lccn': 'lccn',
|
||||
'oclc_numbers': 'oclc',
|
||||
'dewey_decimal_class': 'classification',
|
||||
'number_of_pages': 'pages',
|
||||
}
|
||||
for key in keys:
|
||||
info = api.get('/books/' + id).get('result', {})
|
||||
#url = 'https://openlibrary.org/books/%s.json' % id
|
||||
#info = json.loads(read_url(url))
|
||||
data = format(info, return_all)
|
||||
data['olid'] = id
|
||||
print 'openlibrary.lookup', id, data.keys()
|
||||
return data
|
||||
|
||||
def format(info, return_all=False):
|
||||
data = {}
|
||||
for key in KEYS:
|
||||
if key in info:
|
||||
value = info[key]
|
||||
if key == 'authors':
|
||||
value = authors(value)
|
||||
value = resolve_names(value)
|
||||
elif key == 'publish_country':
|
||||
value = value.strip()
|
||||
value = COUNTRIES.get(value, value)
|
||||
elif key == 'covers':
|
||||
value = 'https://covers.openlibrary.org/b/id/%s.jpg' % value[0]
|
||||
value = COUNTRIES.get(value, value)
|
||||
elif key == 'languages':
|
||||
value = languages(value)
|
||||
value = resolve_names(value)
|
||||
elif not return_all and isinstance(value, list) and key not in ('publish_places'):
|
||||
value = value[0]
|
||||
if key in ('isbn_10', 'isbn_13'):
|
||||
|
|
@ -74,27 +111,52 @@ def lookup(id, return_all=False):
|
|||
value = map(normalize_isbn, value)
|
||||
else:
|
||||
value = normalize_isbn(value)
|
||||
data[keys[key]] = value
|
||||
data[KEYS[key]] = value
|
||||
return data
|
||||
|
||||
info = lookup
|
||||
|
||||
def find(query):
|
||||
url = 'https://openlibrary.org/search.json?q=%s' % query
|
||||
data = json.loads(read_url(url))
|
||||
return data
|
||||
|
||||
def authors(authors):
|
||||
return resolve_names(authors)
|
||||
|
||||
def resolve_names(objects, key='name'):
|
||||
r = []
|
||||
for o in objects:
|
||||
url = 'https://openlibrary.org%s.json' % o['key']
|
||||
data = json.loads(read_url(url))
|
||||
r.append(data[key])
|
||||
data = api.get_many([k['key'] for k in objects]).get('result', {})
|
||||
for k, value in data.iteritems():
|
||||
if 'location' in value and value.get('type', {}).get('key') == '/type/redirect':
|
||||
value = api.get(value['location']).get('result', {})
|
||||
r.append(value[key])
|
||||
return r
|
||||
|
||||
def languages(languages):
|
||||
return resolve_names(languages)
|
||||
class API(object):
|
||||
base = 'https://openlibrary.org/api'
|
||||
|
||||
def _request(self, action, data):
|
||||
for key in data:
|
||||
if not isinstance(data[key], basestring):
|
||||
data[key] = json.dumps(data[key])
|
||||
url = self.base + '/' + action + '?' + urlencode(data)
|
||||
result = json.loads(read_url(url))
|
||||
if 'status' in result and result['status'] == 'error' or 'error' in result:
|
||||
print 'FAILED', action, data
|
||||
print 'URL', url
|
||||
return result
|
||||
|
||||
def get(self, key):
|
||||
data = self._request('get', {'key': key})
|
||||
return data
|
||||
|
||||
def get_many(self, keys):
|
||||
data = self._request('get_many', {'keys': keys})
|
||||
return data
|
||||
|
||||
def search(self, query):
|
||||
if isinstance(query, basestring):
|
||||
query = {
|
||||
'query': query
|
||||
}
|
||||
data = self._request('search', {'q': query})
|
||||
if 'status' in data and data['status'] == 'error':
|
||||
print 'FAILED', query
|
||||
return data
|
||||
|
||||
def things(self, query):
|
||||
data = self._request('things', {'query': query})
|
||||
return data
|
||||
|
||||
api = API()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue