better metadata lookup
This commit is contained in:
parent
2298fe68b9
commit
7e37713c95
3 changed files with 20 additions and 10 deletions
|
@ -21,6 +21,7 @@ providers = [
|
|||
('openlibrary', 'olid'),
|
||||
('loc', 'lccn'),
|
||||
('worldcat', 'oclc'),
|
||||
('worldcat', 'isbn'),
|
||||
('lookupbyisbn', 'asin'),
|
||||
('lookupbyisbn', 'isbn'),
|
||||
('abebooks', 'isbn')
|
||||
|
@ -36,23 +37,31 @@ def find(query):
|
|||
'''
|
||||
return results
|
||||
|
||||
def lookup_provider(arg):
|
||||
provider, id, ids, key, value = arg
|
||||
values = set()
|
||||
for key, value in ids:
|
||||
if key == id or provider in ('openlibrary', ):
|
||||
for kv in globals()[provider].get_ids(key, value):
|
||||
values.add(kv)
|
||||
return values
|
||||
|
||||
def lookup(key, value):
|
||||
if not isvalid_id(key, value):
|
||||
return {}
|
||||
data = {key: [value]}
|
||||
ids = [(key, value)]
|
||||
ids = set([(key, value)])
|
||||
provider_data = {}
|
||||
done = False
|
||||
|
||||
while not done:
|
||||
done = True
|
||||
for provider, id in providers:
|
||||
for key, value in ids:
|
||||
for kv in globals()[provider].get_ids(key, value):
|
||||
if not kv in ids:
|
||||
ids.append(kv)
|
||||
done = False
|
||||
result = lookup_provider((provider, id, ids, key, value))
|
||||
done = not result - ids
|
||||
ids.update(result)
|
||||
logger.debug('FIXME: sort ids')
|
||||
ids.sort(key=lambda i: ox.sort_string(''.join(i)))
|
||||
ids = sorted(ids, key=lambda i: ox.sort_string(''.join(i)))
|
||||
logger.debug('IDS %s', ids)
|
||||
for k, v in ids:
|
||||
for provider, id in providers:
|
||||
|
|
|
@ -15,7 +15,7 @@ base = 'http://www.abebooks.com'
|
|||
def get_ids(key, value):
|
||||
ids = []
|
||||
if key == 'isbn':
|
||||
url = '%s/servlet/SearchResults?isbn=%s&sts=t' % (base, id)
|
||||
url = '%s/servlet/SearchResults?isbn=%s&sts=t' % (base, value)
|
||||
data = read_url(url, unicode=True)
|
||||
urls = re.compile('href="(/servlet/BookDetailsPL[^"]+)"').findall(data)
|
||||
if urls:
|
||||
|
|
|
@ -31,7 +31,7 @@ def get_ids(key, value):
|
|||
m = re.compile('href="(/Lookup/Book/[^"]+?)"').findall(data)
|
||||
if m:
|
||||
asin = m[0].split('/')[-3]
|
||||
if not stdnum.isbn.is_valid(asin):
|
||||
if stdnum.isbn.to_isbn10(asin) or not stdnum.isbn.is_valid(asin):
|
||||
ids.append(('asin', asin))
|
||||
if key == 'isbn':
|
||||
add_other_isbn(value)
|
||||
|
@ -89,5 +89,6 @@ def lookup(id):
|
|||
return r
|
||||
|
||||
def amazon_lookup(asin):
|
||||
html = read_url('http://www.amazon.com/dp/%s' % asin).decode('utf-8', 'ignore')
|
||||
url = 'http://www.amazon.com/dp/%s' % asin
|
||||
html = read_url(url, timeout=-1).decode('utf-8', 'ignore')
|
||||
return list(set(find_isbns(find_re(html, 'Formats</h3>.*?</table'))))
|
||||
|
|
Loading…
Reference in a new issue