diff --git a/oml/media/epub.py b/oml/media/epub.py index 7f1b081..ca4a6b2 100644 --- a/oml/media/epub.py +++ b/oml/media/epub.py @@ -89,7 +89,8 @@ def info(epub): except zipfile.BadZipFile: logger.debug('invalid epub file %s', epub) return data - opf = [f.filename for f in z.filelist if f.filename.endswith('opf')] + files = [f.filename for f in z.filelist] + opf = [f for f in files if f.endswith('opf')] if opf: info = ET.fromstring(z.read(opf[0])) metadata = info.findall('{http://www.idpf.org/2007/opf}metadata') @@ -114,12 +115,13 @@ def info(epub): if guide: for ref in guide[0].findall('{http://www.idpf.org/2007/opf}reference'): if ref.attrib.get('type') == 'toc': - filename = unquote(ref.attrib['href']) + filename = unquote(ref.attrib['href']).split('#')[0] filename = os.path.normpath(os.path.join(os.path.dirname(opf[0]), filename)) - toc = z.read(filename) - if toc: - doc = lxml.html.document_fromstring(toc) - data['tableofcontents'] = '\n'.join([a.text_content() for a in doc.xpath('//a')]) + if filename in files: + toc = z.read(filename) + if toc: + doc = lxml.html.document_fromstring(toc) + data['tableofcontents'] = '\n'.join([a.text_content() for a in doc.xpath('//a')]) if 'description' in data: data['description'] = strip_tags(decode_html(data['description']))