diff --git a/oml/media/epub.py b/oml/media/epub.py index 4934940..c367283 100644 --- a/oml/media/epub.py +++ b/oml/media/epub.py @@ -178,7 +178,9 @@ def extract_text(path): for f in z.filelist: if '/._' in f.filename or f.filename.startswith('._'): continue - if f.filename.endswith('html'): + if 'META-INF' in f.filename: + continue + if f.filename.split('.')[-1] in ('html', 'xml', 'htm'): data += z.read(f.filename).decode('utf-8', 'ignore') return data