cleanup meta parser

This commit is contained in:
j 2014-05-26 10:23:10 +02:00
commit 67d1814192
6 changed files with 49 additions and 6 deletions

View file

@ -38,7 +38,12 @@ def lookup(id):
doc = lxml.html.document_fromstring(html)
for e in doc.xpath("//*[contains(@id, 'biblio')]"):
key = e.attrib['id'].replace('biblio-', '')
value = e.text_content()
value = e.text_content().strip()
k = keys.get(key, key)
if k == 'date' and value == 'Publication Date:':
value = ''
elif k == 'publisher' and value == 'Publisher:':
value = ''
if value and key not in ('bookcondition', 'binding', 'edition-amz'):
data[keys.get(key, key)] = value
data[k] = value
return data