fix html cleanup of empty string
This commit is contained in:
parent
6f68729b6f
commit
37cd92dfba
1 changed files with 2 additions and 0 deletions
|
@ -395,6 +395,8 @@ def sanitize_fragment(html):
|
|||
import html5lib
|
||||
return html5lib.parseFragment(html).toxml().decode('utf-8')
|
||||
'''
|
||||
if not html:
|
||||
return u''
|
||||
import lxml.html
|
||||
body = lxml.html.document_fromstring(html).find('body')
|
||||
html = lxml.html.tostring(body, encoding='utf-8')[6:-7].decode('utf-8')
|
||||
|
|
Loading…
Reference in a new issue