add abebooks

This commit is contained in:
j 2014-05-06 00:24:13 +02:00
parent 8212c28ac7
commit 73a60e73d7

20
ox/web/abebooks.py Normal file
View file

@ -0,0 +1,20 @@
from ox.cache import read_url
import re
import lxml.html
def get_data(id):
info = {}
base = 'http://www.abebooks.com'
url = '%s/servlet/SearchResults?isbn=%s&sts=t' % (base, id)
data = read_url(url)
urls = re.compile('href="(/servlet/BookDetailsPL[^"]+)"').findall(data)
if urls:
details = '%s%s' % (base, urls[0])
data = read_url(details)
doc = lxml.html.document_fromstring(data)
for e in doc.xpath("//*[contains(@id, 'biblio')]"):
key = e.attrib['id'].replace('biblio-', '')
value = e.text_content()
if value and key not in ('bookcondition', 'binding'):
info[key] = value
return info