fix epub metadata parser
This commit is contained in:
parent
20864c591f
commit
81cd9c2337
2 changed files with 6 additions and 4 deletions
|
@ -48,7 +48,7 @@ def metadata(f, from_=None):
|
||||||
):
|
):
|
||||||
if key in info:
|
if key in info:
|
||||||
value = info[key]
|
value = info[key]
|
||||||
if isinstance(value, str):
|
if isinstance(value, bytes):
|
||||||
try:
|
try:
|
||||||
value = value.decode('utf-8')
|
value = value.decode('utf-8')
|
||||||
except:
|
except:
|
||||||
|
|
|
@ -77,18 +77,20 @@ def info(epub):
|
||||||
info = ET.fromstring(z.read(opf[0]))
|
info = ET.fromstring(z.read(opf[0]))
|
||||||
metadata = info.findall('{http://www.idpf.org/2007/opf}metadata')[0]
|
metadata = info.findall('{http://www.idpf.org/2007/opf}metadata')[0]
|
||||||
for e in metadata.getchildren():
|
for e in metadata.getchildren():
|
||||||
if e.text and e.text not in ('unknown', 'none'):
|
if e.text and e.text.strip() and e.text not in ('unknown', 'none'):
|
||||||
key = e.tag.split('}')[-1]
|
key = e.tag.split('}')[-1]
|
||||||
key = {
|
key = {
|
||||||
'creator': 'author',
|
'creator': 'author',
|
||||||
}.get(key, key)
|
}.get(key, key)
|
||||||
value = e.text
|
value = e.text.strip()
|
||||||
if key == 'identifier':
|
if key == 'identifier':
|
||||||
value = normalize_isbn(value)
|
value = normalize_isbn(value)
|
||||||
if stdnum.isbn.is_valid(value):
|
if stdnum.isbn.is_valid(value):
|
||||||
data['isbn'] = [value]
|
data['isbn'] = [value]
|
||||||
|
elif key == 'author':
|
||||||
|
data[key] = value.split(', ')
|
||||||
else:
|
else:
|
||||||
data[key] = e.text
|
data[key] = value
|
||||||
text = extract_text(epub)
|
text = extract_text(epub)
|
||||||
data['textsize'] = len(text)
|
data['textsize'] = len(text)
|
||||||
if not 'isbn' in data:
|
if not 'isbn' in data:
|
||||||
|
|
Loading…
Add table
Reference in a new issue