fix sanitize_fragment('\ufeff')
This commit is contained in:
parent
6b4a307e23
commit
d03a6b120d
1 changed files with 6 additions and 1 deletions
|
@ -440,7 +440,12 @@ def sanitize_fragment(html):
|
|||
if not html.strip():
|
||||
return html
|
||||
import lxml.html
|
||||
try:
|
||||
body = lxml.html.document_fromstring(html).find('body')
|
||||
except lxml.etree.ParserError as e:
|
||||
if e.args and e.args[0] == 'Document is empty':
|
||||
return html
|
||||
raise e
|
||||
html = lxml.html.tostring(body, encoding='utf-8')[6:-7].decode('utf-8')
|
||||
if html.startswith('<p>') and html.endswith('</p>'):
|
||||
html = html[3:-4]
|
||||
|
|
Loading…
Reference in a new issue