fix sanitize_fragment('\ufeff')

This commit is contained in:
j 2023-07-27 18:35:33 +02:00
parent 6b4a307e23
commit d03a6b120d

View file

@ -440,7 +440,12 @@ def sanitize_fragment(html):
if not html.strip():
return html
import lxml.html
try:
body = lxml.html.document_fromstring(html).find('body')
except lxml.etree.ParserError as e:
if e.args and e.args[0] == 'Document is empty':
return html
raise e
html = lxml.html.tostring(body, encoding='utf-8')[6:-7].decode('utf-8')
if html.startswith('<p>') and html.endswith('</p>'):
html = html[3:-4]