az fixes
This commit is contained in:
parent
e0c2ccbb24
commit
6211253675
1 changed files with 28 additions and 6 deletions
|
@ -27,8 +27,13 @@ def info(key, value):
|
||||||
info['title'] = strip_tags(decode_html(doc.xpath('//span[@id="productTitle"]')[0].text))
|
info['title'] = strip_tags(decode_html(doc.xpath('//span[@id="productTitle"]')[0].text))
|
||||||
info['title'] = re.sub(' \([^\)]+? Classics\)', '', info['title'])
|
info['title'] = re.sub(' \([^\)]+? Classics\)', '', info['title'])
|
||||||
info['title'] = re.sub(' \([^\)]+? Collection\)', '', info['title'])
|
info['title'] = re.sub(' \([^\)]+? Collection\)', '', info['title'])
|
||||||
info['description'] = strip_tags(decode_html(unquote(re.compile('encodedDescription\' : "(.*?)",').findall(data)[0])))
|
d = re.compile('encodedDescription\' : "(.*?)",').findall(data)
|
||||||
|
if d:
|
||||||
|
info['description'] = strip_tags(decode_html(unquote(d[0])))
|
||||||
info['description'] = fix_bad_unicode(info['description'])
|
info['description'] = fix_bad_unicode(info['description'])
|
||||||
|
else:
|
||||||
|
info['description'] = ''
|
||||||
|
|
||||||
content = doc.xpath('//div[@class="content"]')[0]
|
content = doc.xpath('//div[@class="content"]')[0]
|
||||||
content_info = {}
|
content_info = {}
|
||||||
for li in content.xpath('.//li'):
|
for li in content.xpath('.//li'):
|
||||||
|
@ -47,7 +52,6 @@ def info(key, value):
|
||||||
|
|
||||||
if 'ISBN-13' in content_info:
|
if 'ISBN-13' in content_info:
|
||||||
info['isbn'] = content_info['ISBN-13'].replace('-', '')
|
info['isbn'] = content_info['ISBN-13'].replace('-', '')
|
||||||
info['isbn'].append(content_info['ISBN-13'].replace('-', ''))
|
|
||||||
elif 'ISBN-10' in content_info:
|
elif 'ISBN-10' in content_info:
|
||||||
info['isbn'] = stdnum.isbn.to_isbn13(content_info['ISBN-10'])
|
info['isbn'] = stdnum.isbn.to_isbn13(content_info['ISBN-10'])
|
||||||
|
|
||||||
|
@ -56,13 +60,15 @@ def info(key, value):
|
||||||
for span in a:
|
for span in a:
|
||||||
r = span.getchildren()[0].text.strip()
|
r = span.getchildren()[0].text.strip()
|
||||||
role = get_role(r)
|
role = get_role(r)
|
||||||
if not role in info: info[role] = []
|
if role not in info:
|
||||||
|
info[role] = []
|
||||||
info[role].append(span.text.strip())
|
info[role].append(span.text.strip())
|
||||||
else:
|
else:
|
||||||
for span in doc.xpath('//span[@class="author notFaded"]'):
|
for span in doc.xpath('//span[@class="author notFaded"]'):
|
||||||
author = [x.strip() for x in span.text_content().strip().split('\n') if x.strip()]
|
author = [x.strip() for x in span.text_content().strip().split('\n') if x.strip()]
|
||||||
role = get_role(author[-1])
|
role = get_role(author[-1])
|
||||||
if not role in info: info[role] = []
|
if role not in info:
|
||||||
|
info[role] = []
|
||||||
info[role].append(author[0])
|
info[role].append(author[0])
|
||||||
|
|
||||||
covers = re.compile('data-a-dynamic-image="({.+?})"').findall(data)[0]
|
covers = re.compile('data-a-dynamic-image="({.+?})"').findall(data)[0]
|
||||||
|
@ -74,6 +80,22 @@ def info(key, value):
|
||||||
info['cover'] = re.sub('(\._SX.+?_\.)', '.', url)
|
info['cover'] = re.sub('(\._SX.+?_\.)', '.', url)
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
def get_price(asin, currency='EUR'):
|
||||||
|
if currency == 'EUR':
|
||||||
|
url = 'http://www.amazon.de/dp/' + asin
|
||||||
|
else:
|
||||||
|
url = 'http://www.amazon.com/dp/' + asin
|
||||||
|
data = read_url(url).decode()
|
||||||
|
doc = lxml.html.document_fromstring(data)
|
||||||
|
for price in doc.xpath("//span[contains(@class, 'a-color-price')]"):
|
||||||
|
price = price.text_content().strip()
|
||||||
|
if currency == 'EUR':
|
||||||
|
price = price.replace('EUR ', '').replace(',', '.')
|
||||||
|
else:
|
||||||
|
price = price.replace('$', '').strip()
|
||||||
|
price = float(price)
|
||||||
|
return price
|
||||||
|
|
||||||
def get_role(value):
|
def get_role(value):
|
||||||
if 'Translator' in value:
|
if 'Translator' in value:
|
||||||
role = 'translator'
|
role = 'translator'
|
||||||
|
|
Loading…
Reference in a new issue