parse epubs without manifest

This commit is contained in:
j 2016-01-06 18:40:23 +05:30
parent c86ba8ac99
commit d866b4de91

View file

@ -39,8 +39,13 @@ def cover(path):
if opf: if opf:
#logger.debug('opf: %s', z.read(opf[0]).decode()) #logger.debug('opf: %s', z.read(opf[0]).decode())
info = ET.fromstring(z.read(opf[0])) info = ET.fromstring(z.read(opf[0]))
metadata = info.findall('{http://www.idpf.org/2007/opf}metadata')[0] metadata = info.findall('{http://www.idpf.org/2007/opf}metadata')
manifest = info.findall('{http://www.idpf.org/2007/opf}manifest')[0] if metadata:
metadata = metadata[0]
manifest = info.findall('{http://www.idpf.org/2007/opf}manifest')
if manifest:
manifest = manifest[0]
if metadata and manifest:
for e in metadata.getchildren(): for e in metadata.getchildren():
if e.tag == '{http://www.idpf.org/2007/opf}meta' and e.attrib.get('name') == 'cover': if e.tag == '{http://www.idpf.org/2007/opf}meta' and e.attrib.get('name') == 'cover':
cover_id = e.attrib['content'] cover_id = e.attrib['content']
@ -50,6 +55,7 @@ def cover(path):
filename = os.path.normpath(os.path.join(os.path.dirname(opf[0]), filename)) filename = os.path.normpath(os.path.join(os.path.dirname(opf[0]), filename))
if filename in files: if filename in files:
return use(filename) return use(filename)
if manifest:
images = [e for e in manifest.getchildren() if 'image' in e.attrib['media-type']] images = [e for e in manifest.getchildren() if 'image' in e.attrib['media-type']]
if images: if images:
image_data = [] image_data = []
@ -94,7 +100,9 @@ def info(epub):
opf = [f.filename for f in z.filelist if f.filename.endswith('opf')] opf = [f.filename for f in z.filelist if f.filename.endswith('opf')]
if opf: if opf:
info = ET.fromstring(z.read(opf[0])) info = ET.fromstring(z.read(opf[0]))
metadata = info.findall('{http://www.idpf.org/2007/opf}metadata')[0] metadata = info.findall('{http://www.idpf.org/2007/opf}metadata')
if metadata:
metadata = metadata[0]
for e in metadata.getchildren(): for e in metadata.getchildren():
if e.text and e.text.strip() and e.text not in ('unknown', 'none'): if e.text and e.text.strip() and e.text not in ('unknown', 'none'):
key = e.tag.split('}')[-1] key = e.tag.split('}')[-1]