strip html tags from book metadata
This commit is contained in:
parent
b8fc91142a
commit
8c5f21c83d
2 changed files with 12 additions and 1 deletions
|
@ -15,7 +15,7 @@ from . import epub
|
||||||
from . import txt
|
from . import txt
|
||||||
from . import opf
|
from . import opf
|
||||||
|
|
||||||
from meta.utils import decode_html_data, to_isbn13
|
from meta.utils import decode_html_data, strip_tags_data, to_isbn13
|
||||||
import settings
|
import settings
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
@ -115,6 +115,7 @@ def metadata(f, from_=None):
|
||||||
if not data['title'].strip():
|
if not data['title'].strip():
|
||||||
del data['title']
|
del data['title']
|
||||||
data = decode_html_data(data)
|
data = decode_html_data(data)
|
||||||
|
data = strip_tags_data(data)
|
||||||
for key in list(data):
|
for key in list(data):
|
||||||
if not data[key]:
|
if not data[key]:
|
||||||
del data[key]
|
del data[key]
|
||||||
|
|
|
@ -41,3 +41,13 @@ def decode_html_data(data):
|
||||||
elif isinstance(data, str):
|
elif isinstance(data, str):
|
||||||
data = ox.decode_html(data)
|
data = ox.decode_html(data)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
def strip_tags_data(data):
|
||||||
|
if isinstance(data, dict):
|
||||||
|
for key in data:
|
||||||
|
data[key] = strip_tags_data(data[key])
|
||||||
|
elif isinstance(data, list):
|
||||||
|
data = [strip_tags_data(v) for v in data]
|
||||||
|
elif isinstance(data, str):
|
||||||
|
data = ox.strip_tags(data)
|
||||||
|
return data
|
||||||
|
|
Loading…
Add table
Reference in a new issue