From cdc56bc63f1c3dbe0ba6d975a6c21a0892945b2f Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Thu, 3 Apr 2014 12:15:30 +0200 Subject: [PATCH] add lookupbyisbn --- ox/web/lookupbyisbn.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 ox/web/lookupbyisbn.py diff --git a/ox/web/lookupbyisbn.py b/ox/web/lookupbyisbn.py new file mode 100644 index 0000000..af5d5f3 --- /dev/null +++ b/ox/web/lookupbyisbn.py @@ -0,0 +1,40 @@ +from ox.cache import read_url +from ox import find_re, strip_tags +import re + +base = 'http://www.lookupbyisbn.com' + +def get_data(isbn): + r = {} + url = '%s/Search/Book/%s/1' % (base, isbn) + + data = read_url(url).decode('utf-8') + m = re.compile('href="(/Lookup/Book/[^"]+?)"').findall(data) + if m: + ids = m[0].split('/') + r['isbn'] = ids[-2] + r['asin'] = ids[-3] + url = '%s%s' % (base, m[0]) + data = read_url(url).decode('utf-8') + r["title"] = find_re(data, "

(.*?)

") + keys = { + 'author': 'Author(s)', + 'publisher': 'Publisher', + 'date': 'Publication date', + 'edition': 'Edition', + 'binding': 'Binding', + 'volume': 'Volume(s)', + 'pages': 'Pages', + } + for key in keys: + r[key] = find_re(data, '%s:(.*?)'% re.escape(keys[key])) + if r[key] == '--': + r[key] = '' + if key == 'pages' and r[key]: + r[key] = int(r[key]) + r['description'] = strip_tags(find_re(data, '

Description:<\/h2>(.*?)