from ox.cache import read_url from ox import find_re, strip_tags, decode_html import re import stdnum.isbn import logging logger = logging.getLogger('meta.lookupbyisbn') base = 'http://www.lookupbyisbn.com' def get_ids(key, value): ids = [] if key in ('isbn10', 'isbn13', 'asin'): url = '%s/Search/Book/%s/1' % (base, value) data = read_url(url).decode('utf-8') m = re.compile('href="(/Lookup/Book/[^"]+?)"').findall(data) if m: asin = m[0].split('/')[-3] ids.append(('asin', asin)) if key == 'isbn10': ids.append(('isbn13', stdnum.isbn.to_isbn13(value))) if key == 'asin': if stdnum.isbn.is_valid(value): ids.append(('isbn10', value)) if ids: logger.debug('get_ids %s, %s => %s', key, value, ids) return ids def lookup(id): logger.debug('lookup %s', id) r = { 'asin': id } url = '%s/Lookup/Book/%s/%s/1' % (base, id, id) data = read_url(url).decode('utf-8') r["title"] = find_re(data, "

(.*?)

") keys = { 'author': 'Author(s)', 'publisher': 'Publisher', 'date': 'Publication date', 'edition': 'Edition', 'binding': 'Binding', 'volume': 'Volume(s)', 'pages': 'Pages', } for key in keys: r[key] = find_re(data, '%s:(.*?)'% re.escape(keys[key])) if r[key] == '--' or not r[key]: del r[key] if key == 'pages' and key in r: r[key] = int(r[key]) desc = find_re(data, '

(.*?)

Description:<\/h2>(.*?)', ' ').replace(' ', ' ').replace('', ' ') r['description'] = decode_html(strip_tags(desc)) r['cover'] = find_re(data, '

Description:<\/h2>(.*?)

', ' ').replace('
', ' ').replace('
', ' ') r['description'] = decode_html(strip_tags(desc)) r['cover'] = find_re(data, '