add abebooks
This commit is contained in:
parent
8212c28ac7
commit
73a60e73d7
1 changed files with 20 additions and 0 deletions
20
ox/web/abebooks.py
Normal file
20
ox/web/abebooks.py
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
from ox.cache import read_url
|
||||||
|
import re
|
||||||
|
import lxml.html
|
||||||
|
|
||||||
|
def get_data(id):
|
||||||
|
info = {}
|
||||||
|
base = 'http://www.abebooks.com'
|
||||||
|
url = '%s/servlet/SearchResults?isbn=%s&sts=t' % (base, id)
|
||||||
|
data = read_url(url)
|
||||||
|
urls = re.compile('href="(/servlet/BookDetailsPL[^"]+)"').findall(data)
|
||||||
|
if urls:
|
||||||
|
details = '%s%s' % (base, urls[0])
|
||||||
|
data = read_url(details)
|
||||||
|
doc = lxml.html.document_fromstring(data)
|
||||||
|
for e in doc.xpath("//*[contains(@id, 'biblio')]"):
|
||||||
|
key = e.attrib['id'].replace('biblio-', '')
|
||||||
|
value = e.text_content()
|
||||||
|
if value and key not in ('bookcondition', 'binding'):
|
||||||
|
info[key] = value
|
||||||
|
return info
|
Loading…
Reference in a new issue