fix epub metadata parser
This commit is contained in:
parent
20864c591f
commit
81cd9c2337
2 changed files with 6 additions and 4 deletions
|
@ -48,7 +48,7 @@ def metadata(f, from_=None):
|
|||
):
|
||||
if key in info:
|
||||
value = info[key]
|
||||
if isinstance(value, str):
|
||||
if isinstance(value, bytes):
|
||||
try:
|
||||
value = value.decode('utf-8')
|
||||
except:
|
||||
|
|
|
@ -77,18 +77,20 @@ def info(epub):
|
|||
info = ET.fromstring(z.read(opf[0]))
|
||||
metadata = info.findall('{http://www.idpf.org/2007/opf}metadata')[0]
|
||||
for e in metadata.getchildren():
|
||||
if e.text and e.text not in ('unknown', 'none'):
|
||||
if e.text and e.text.strip() and e.text not in ('unknown', 'none'):
|
||||
key = e.tag.split('}')[-1]
|
||||
key = {
|
||||
'creator': 'author',
|
||||
}.get(key, key)
|
||||
value = e.text
|
||||
value = e.text.strip()
|
||||
if key == 'identifier':
|
||||
value = normalize_isbn(value)
|
||||
if stdnum.isbn.is_valid(value):
|
||||
data['isbn'] = [value]
|
||||
elif key == 'author':
|
||||
data[key] = value.split(', ')
|
||||
else:
|
||||
data[key] = e.text
|
||||
data[key] = value
|
||||
text = extract_text(epub)
|
||||
data['textsize'] = len(text)
|
||||
if not 'isbn' in data:
|
||||
|
|
Loading…
Reference in a new issue