toc href can contain #

This commit is contained in:
j 2016-01-11 19:25:33 +05:30
parent 02e040d9f5
commit bb09596566

View file

@ -89,7 +89,8 @@ def info(epub):
except zipfile.BadZipFile:
logger.debug('invalid epub file %s', epub)
return data
opf = [f.filename for f in z.filelist if f.filename.endswith('opf')]
files = [f.filename for f in z.filelist]
opf = [f for f in files if f.endswith('opf')]
if opf:
info = ET.fromstring(z.read(opf[0]))
metadata = info.findall('{http://www.idpf.org/2007/opf}metadata')
@ -114,8 +115,9 @@ def info(epub):
if guide:
for ref in guide[0].findall('{http://www.idpf.org/2007/opf}reference'):
if ref.attrib.get('type') == 'toc':
filename = unquote(ref.attrib['href'])
filename = unquote(ref.attrib['href']).split('#')[0]
filename = os.path.normpath(os.path.join(os.path.dirname(opf[0]), filename))
if filename in files:
toc = z.read(filename)
if toc:
doc = lxml.html.document_fromstring(toc)