strip html tags from book metadata

This commit is contained in:
j 2016-02-04 15:25:27 +05:30
parent b8fc91142a
commit 8c5f21c83d
2 changed files with 12 additions and 1 deletions

View File

@ -15,7 +15,7 @@ from . import epub
from . import txt
from . import opf
from meta.utils import decode_html_data, to_isbn13
from meta.utils import decode_html_data, strip_tags_data, to_isbn13
import settings
import logging
@ -115,6 +115,7 @@ def metadata(f, from_=None):
if not data['title'].strip():
del data['title']
data = decode_html_data(data)
data = strip_tags_data(data)
for key in list(data):
if not data[key]:
del data[key]

View File

@ -41,3 +41,13 @@ def decode_html_data(data):
elif isinstance(data, str):
data = ox.decode_html(data)
return data
def strip_tags_data(data):
if isinstance(data, dict):
for key in data:
data[key] = strip_tags_data(data[key])
elif isinstance(data, list):
data = [strip_tags_data(v) for v in data]
elif isinstance(data, str):
data = ox.strip_tags(data)
return data