fix html cleanup of empty string
This commit is contained in:
parent
6f68729b6f
commit
37cd92dfba
1 changed files with 2 additions and 0 deletions
|
@ -395,6 +395,8 @@ def sanitize_fragment(html):
|
||||||
import html5lib
|
import html5lib
|
||||||
return html5lib.parseFragment(html).toxml().decode('utf-8')
|
return html5lib.parseFragment(html).toxml().decode('utf-8')
|
||||||
'''
|
'''
|
||||||
|
if not html:
|
||||||
|
return u''
|
||||||
import lxml.html
|
import lxml.html
|
||||||
body = lxml.html.document_fromstring(html).find('body')
|
body = lxml.html.document_fromstring(html).find('body')
|
||||||
html = lxml.html.tostring(body, encoding='utf-8')[6:-7].decode('utf-8')
|
html = lxml.html.tostring(body, encoding='utf-8')[6:-7].decode('utf-8')
|
||||||
|
|
Loading…
Reference in a new issue