catch more 404s
This commit is contained in:
parent
9996c6b603
commit
9aff4766e5
1 changed files with 2 additions and 1 deletions
|
@ -13,13 +13,14 @@ def info(key, value):
|
||||||
value = stdnum.isbn.to_isbn10(value)
|
value = stdnum.isbn.to_isbn10(value)
|
||||||
if len(value) != 10:
|
if len(value) != 10:
|
||||||
raise IOError('invalid isbn %s' % value)
|
raise IOError('invalid isbn %s' % value)
|
||||||
|
|
||||||
url = 'http://www.amazon.com/dp/' + value
|
url = 'http://www.amazon.com/dp/' + value
|
||||||
data = read_url(url).decode()
|
data = read_url(url).decode()
|
||||||
doc = lxml.html.document_fromstring(data)
|
doc = lxml.html.document_fromstring(data)
|
||||||
info = {}
|
info = {}
|
||||||
if '<title>404 - Document Not Found</title>' in data:
|
if '<title>404 - Document Not Found</title>' in data:
|
||||||
return info
|
return info
|
||||||
|
if 'To discuss automated access to Amazon data please' in data:
|
||||||
|
return info
|
||||||
for l in doc.xpath('//link[@rel="canonical" and @href]'):
|
for l in doc.xpath('//link[@rel="canonical" and @href]'):
|
||||||
info['asin'] = [l.get('href').rpartition('/')[-1]]
|
info['asin'] = [l.get('href').rpartition('/')[-1]]
|
||||||
break
|
break
|
||||||
|
|
Loading…
Reference in a new issue