From 8c5f21c83d2d11f3ae1790ef30ceae0cb662b93b Mon Sep 17 00:00:00 2001 From: j Date: Thu, 4 Feb 2016 15:25:27 +0530 Subject: [PATCH] strip html tags from book metadata --- oml/media/__init__.py | 3 ++- oml/meta/utils.py | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/oml/media/__init__.py b/oml/media/__init__.py index 428a37d..6dc2f22 100644 --- a/oml/media/__init__.py +++ b/oml/media/__init__.py @@ -15,7 +15,7 @@ from . import epub from . import txt from . import opf -from meta.utils import decode_html_data, to_isbn13 +from meta.utils import decode_html_data, strip_tags_data, to_isbn13 import settings import logging @@ -115,6 +115,7 @@ def metadata(f, from_=None): if not data['title'].strip(): del data['title'] data = decode_html_data(data) + data = strip_tags_data(data) for key in list(data): if not data[key]: del data[key] diff --git a/oml/meta/utils.py b/oml/meta/utils.py index e8a92f8..3868468 100644 --- a/oml/meta/utils.py +++ b/oml/meta/utils.py @@ -41,3 +41,13 @@ def decode_html_data(data): elif isinstance(data, str): data = ox.decode_html(data) return data + +def strip_tags_data(data): + if isinstance(data, dict): + for key in data: + data[key] = strip_tags_data(data[key]) + elif isinstance(data, list): + data = [strip_tags_data(v) for v in data] + elif isinstance(data, str): + data = ox.strip_tags(data) + return data