fix sanitize_fragment('\ufeff')
This commit is contained in:
parent
6b4a307e23
commit
d03a6b120d
1 changed files with 6 additions and 1 deletions
|
@ -440,7 +440,12 @@ def sanitize_fragment(html):
|
||||||
if not html.strip():
|
if not html.strip():
|
||||||
return html
|
return html
|
||||||
import lxml.html
|
import lxml.html
|
||||||
|
try:
|
||||||
body = lxml.html.document_fromstring(html).find('body')
|
body = lxml.html.document_fromstring(html).find('body')
|
||||||
|
except lxml.etree.ParserError as e:
|
||||||
|
if e.args and e.args[0] == 'Document is empty':
|
||||||
|
return html
|
||||||
|
raise e
|
||||||
html = lxml.html.tostring(body, encoding='utf-8')[6:-7].decode('utf-8')
|
html = lxml.html.tostring(body, encoding='utf-8')[6:-7].decode('utf-8')
|
||||||
if html.startswith('<p>') and html.endswith('</p>'):
|
if html.startswith('<p>') and html.endswith('</p>'):
|
||||||
html = html[3:-4]
|
html = html[3:-4]
|
||||||
|
|
Loading…
Reference in a new issue