diff --git a/oml/media/epub.py b/oml/media/epub.py index e9dacde..6e83864 100644 --- a/oml/media/epub.py +++ b/oml/media/epub.py @@ -11,6 +11,7 @@ from urllib.parse import unquote from PIL import Image import stdnum.isbn +from ox import strip_tags, decode_html from utils import normalize_isbn, find_isbns, get_language @@ -36,27 +37,33 @@ def cover(path): if opf: info = ET.fromstring(z.read(opf[0])) manifest = info.findall('{http://www.idpf.org/2007/opf}manifest')[0] - for e in manifest.getchildren(): - if 'image' in e.attrib['media-type']: + images = [e for e in manifest.getchildren() if 'image' in e.attrib['media-type']] + if images: + image_data = [] + for e in images: filename = unquote(e.attrib['href']) filename = os.path.normpath(os.path.join(os.path.dirname(opf[0]), filename)) if filename in files: - data = z.read(filename) - break - elif 'html' in e.attrib['media-type']: - filename = unquote(e.attrib['href']) - filename = os.path.normpath(os.path.join(os.path.dirname(opf[0]), filename)) - html = z.read(filename).decode('utf-8', 'ignore') - img = re.compile('