catch more 404s

2016-01-05 14:00:25 +05:30 · 2016-01-05 14:00:25 +05:30 · 9aff4766e5
commit 9aff4766e5
parent 9996c6b603
1 changed files with 2 additions and 1 deletions
--- a/oml/meta/amazon.py
+++ b/oml/meta/amazon.py
@ -13,13 +13,14 @@ def info(key, value):
        value = stdnum.isbn.to_isbn10(value)
    if len(value) != 10:
        raise IOError('invalid isbn %s' % value)
    url = 'http://www.amazon.com/dp/' + value
    data = read_url(url).decode()
    doc = lxml.html.document_fromstring(data)
    info = {}
    if '<title>404 - Document Not Found</title>' in data:
        return info
    if 'To discuss automated access to Amazon data please' in data:
        return info
    for l in doc.xpath('//link[@rel="canonical" and @href]'):
        info['asin'] = [l.get('href').rpartition('/')[-1]]
        break