From 36fc6e7e735be74e3b26bb33605a4e57dab20b09 Mon Sep 17 00:00:00 2001 From: j Date: Thu, 28 Jan 2016 17:53:50 +0530 Subject: [PATCH] pages can also end with xml --- oml/media/epub.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/oml/media/epub.py b/oml/media/epub.py index 4934940..c367283 100644 --- a/oml/media/epub.py +++ b/oml/media/epub.py @@ -178,7 +178,9 @@ def extract_text(path): for f in z.filelist: if '/._' in f.filename or f.filename.startswith('._'): continue - if f.filename.endswith('html'): + if 'META-INF' in f.filename: + continue + if f.filename.split('.')[-1] in ('html', 'xml', 'htm'): data += z.read(f.filename).decode('utf-8', 'ignore') return data