catch more 404s

This commit is contained in:
j 2016-01-05 14:00:25 +05:30
parent 9996c6b603
commit 9aff4766e5
1 changed files with 2 additions and 1 deletions

View File

@ -13,13 +13,14 @@ def info(key, value):
value = stdnum.isbn.to_isbn10(value)
if len(value) != 10:
raise IOError('invalid isbn %s' % value)
url = 'http://www.amazon.com/dp/' + value
data = read_url(url).decode()
doc = lxml.html.document_fromstring(data)
info = {}
if '<title>404 - Document Not Found</title>' in data:
return info
if 'To discuss automated access to Amazon data please' in data:
return info
for l in doc.xpath('//link[@rel="canonical" and @href]'):
info['asin'] = [l.get('href').rpartition('/')[-1]]
break