diff --git a/oml/meta/amazon.py b/oml/meta/amazon.py index 8fd8d92..273ace6 100644 --- a/oml/meta/amazon.py +++ b/oml/meta/amazon.py @@ -27,8 +27,13 @@ def info(key, value): info['title'] = strip_tags(decode_html(doc.xpath('//span[@id="productTitle"]')[0].text)) info['title'] = re.sub(' \([^\)]+? Classics\)', '', info['title']) info['title'] = re.sub(' \([^\)]+? Collection\)', '', info['title']) - info['description'] = strip_tags(decode_html(unquote(re.compile('encodedDescription\' : "(.*?)",').findall(data)[0]))) - info['description'] = fix_bad_unicode(info['description']) + d = re.compile('encodedDescription\' : "(.*?)",').findall(data) + if d: + info['description'] = strip_tags(decode_html(unquote(d[0]))) + info['description'] = fix_bad_unicode(info['description']) + else: + info['description'] = '' + content = doc.xpath('//div[@class="content"]')[0] content_info = {} for li in content.xpath('.//li'): @@ -47,7 +52,6 @@ def info(key, value): if 'ISBN-13' in content_info: info['isbn'] = content_info['ISBN-13'].replace('-', '') - info['isbn'].append(content_info['ISBN-13'].replace('-', '')) elif 'ISBN-10' in content_info: info['isbn'] = stdnum.isbn.to_isbn13(content_info['ISBN-10']) @@ -56,24 +60,42 @@ def info(key, value): for span in a: r = span.getchildren()[0].text.strip() role = get_role(r) - if not role in info: info[role] = [] + if role not in info: + info[role] = [] info[role].append(span.text.strip()) else: for span in doc.xpath('//span[@class="author notFaded"]'): author = [x.strip() for x in span.text_content().strip().split('\n') if x.strip()] role = get_role(author[-1]) - if not role in info: info[role] = [] + if role not in info: + info[role] = [] info[role].append(author[0]) covers = re.compile('data-a-dynamic-image="({.+?})"').findall(data)[0] covers = json.loads(decode_html(covers)) - last = [0,0] + last = [0, 0] for url in covers: if covers[url] > last: last = covers[url] info['cover'] = re.sub('(\._SX.+?_\.)', '.', url) return info +def get_price(asin, currency='EUR'): + if currency == 'EUR': + url = 'http://www.amazon.de/dp/' + asin + else: + url = 'http://www.amazon.com/dp/' + asin + data = read_url(url).decode() + doc = lxml.html.document_fromstring(data) + for price in doc.xpath("//span[contains(@class, 'a-color-price')]"): + price = price.text_content().strip() + if currency == 'EUR': + price = price.replace('EUR ', '').replace(',', '.') + else: + price = price.replace('$', '').strip() + price = float(price) + return price + def get_role(value): if 'Translator' in value: role = 'translator'