parse epubs without manifest
This commit is contained in:
parent
c86ba8ac99
commit
d866b4de91
1 changed files with 61 additions and 53 deletions
|
@ -39,8 +39,13 @@ def cover(path):
|
||||||
if opf:
|
if opf:
|
||||||
#logger.debug('opf: %s', z.read(opf[0]).decode())
|
#logger.debug('opf: %s', z.read(opf[0]).decode())
|
||||||
info = ET.fromstring(z.read(opf[0]))
|
info = ET.fromstring(z.read(opf[0]))
|
||||||
metadata = info.findall('{http://www.idpf.org/2007/opf}metadata')[0]
|
metadata = info.findall('{http://www.idpf.org/2007/opf}metadata')
|
||||||
manifest = info.findall('{http://www.idpf.org/2007/opf}manifest')[0]
|
if metadata:
|
||||||
|
metadata = metadata[0]
|
||||||
|
manifest = info.findall('{http://www.idpf.org/2007/opf}manifest')
|
||||||
|
if manifest:
|
||||||
|
manifest = manifest[0]
|
||||||
|
if metadata and manifest:
|
||||||
for e in metadata.getchildren():
|
for e in metadata.getchildren():
|
||||||
if e.tag == '{http://www.idpf.org/2007/opf}meta' and e.attrib.get('name') == 'cover':
|
if e.tag == '{http://www.idpf.org/2007/opf}meta' and e.attrib.get('name') == 'cover':
|
||||||
cover_id = e.attrib['content']
|
cover_id = e.attrib['content']
|
||||||
|
@ -50,6 +55,7 @@ def cover(path):
|
||||||
filename = os.path.normpath(os.path.join(os.path.dirname(opf[0]), filename))
|
filename = os.path.normpath(os.path.join(os.path.dirname(opf[0]), filename))
|
||||||
if filename in files:
|
if filename in files:
|
||||||
return use(filename)
|
return use(filename)
|
||||||
|
if manifest:
|
||||||
images = [e for e in manifest.getchildren() if 'image' in e.attrib['media-type']]
|
images = [e for e in manifest.getchildren() if 'image' in e.attrib['media-type']]
|
||||||
if images:
|
if images:
|
||||||
image_data = []
|
image_data = []
|
||||||
|
@ -94,7 +100,9 @@ def info(epub):
|
||||||
opf = [f.filename for f in z.filelist if f.filename.endswith('opf')]
|
opf = [f.filename for f in z.filelist if f.filename.endswith('opf')]
|
||||||
if opf:
|
if opf:
|
||||||
info = ET.fromstring(z.read(opf[0]))
|
info = ET.fromstring(z.read(opf[0]))
|
||||||
metadata = info.findall('{http://www.idpf.org/2007/opf}metadata')[0]
|
metadata = info.findall('{http://www.idpf.org/2007/opf}metadata')
|
||||||
|
if metadata:
|
||||||
|
metadata = metadata[0]
|
||||||
for e in metadata.getchildren():
|
for e in metadata.getchildren():
|
||||||
if e.text and e.text.strip() and e.text not in ('unknown', 'none'):
|
if e.text and e.text.strip() and e.text not in ('unknown', 'none'):
|
||||||
key = e.tag.split('}')[-1]
|
key = e.tag.split('}')[-1]
|
||||||
|
|
Loading…
Reference in a new issue