use fragment_fromstring instead of document_fromstring

This commit is contained in:
j 2014-02-04 10:40:01 +00:00
parent 7577b319ce
commit 8bda86c17d

View file

@ -398,8 +398,5 @@ def sanitize_fragment(html):
if not html:
return u''
import lxml.html
body = lxml.html.document_fromstring(html).find('body')
html = lxml.html.tostring(body, encoding='utf-8')[6:-7].decode('utf-8')
if html.startswith('<p>') and html.endswith('</p>'):
html = html[3:-4]
return html
html = lxml.html.fragment_fromstring(html, create_parent=False)
return lxml.html.tostring(html, encoding='utf-8').decode('utf-8')