diff --git a/oml/media/__init__.py b/oml/media/__init__.py index a4d0655..307c695 100644 --- a/oml/media/__init__.py +++ b/oml/media/__init__.py @@ -48,7 +48,7 @@ def metadata(f, from_=None): ): if key in info: value = info[key] - if isinstance(value, str): + if isinstance(value, bytes): try: value = value.decode('utf-8') except: diff --git a/oml/media/epub.py b/oml/media/epub.py index a25b0f5..7865a56 100644 --- a/oml/media/epub.py +++ b/oml/media/epub.py @@ -77,18 +77,20 @@ def info(epub): info = ET.fromstring(z.read(opf[0])) metadata = info.findall('{http://www.idpf.org/2007/opf}metadata')[0] for e in metadata.getchildren(): - if e.text and e.text not in ('unknown', 'none'): + if e.text and e.text.strip() and e.text not in ('unknown', 'none'): key = e.tag.split('}')[-1] key = { 'creator': 'author', }.get(key, key) - value = e.text + value = e.text.strip() if key == 'identifier': value = normalize_isbn(value) if stdnum.isbn.is_valid(value): data['isbn'] = [value] + elif key == 'author': + data[key] = value.split(', ') else: - data[key] = e.text + data[key] = value text = extract_text(epub) data['textsize'] = len(text) if not 'isbn' in data: