From bcd0c528fd8e4a36b7a0398d1114af814ce9c705 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Tue, 26 Oct 2010 19:33:32 +0200 Subject: [PATCH] add amazon --- ox/web/amazon.py | 61 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 ox/web/amazon.py diff --git a/ox/web/amazon.py b/ox/web/amazon.py new file mode 100644 index 0000000..69acafb --- /dev/null +++ b/ox/web/amazon.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +# vi:si:et:sw=4:sts=4:ts=4 +import re +from urllib import quote + +from ox import findRe, stripTags, decodeHtml +from ox.cache import readUrlUnicode + + +def findISBN(title, author): + q = '%s %s' % (title, author) + url = "http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Dstripbooks&field-keywords=" + "%s&x=0&y=0" % quote(q) + data = readUrlUnicode(url) + links = re.compile('href="(http://www.amazon.com/.*?/dp/.*?)"').findall(data) + id = findRe(re.compile('href="(http://www.amazon.com/.*?/dp/.*?)"').findall(data)[0], '/dp/(.*?)/') + data = getData(id) + if author in data['authors']: + return data + return {} + +def getData(id): + url = "http://www.amazon.com/title/dp/%s/" % id + data = readUrlUnicode(url) + + + def findData(key): + return findRe(data, '