revert change, fragment_fromstring only parses single element

This commit is contained in:
j 2014-02-04 10:44:51 +00:00
parent 8bda86c17d
commit 34691832eb

View file

@ -398,5 +398,8 @@ def sanitize_fragment(html):
if not html: if not html:
return u'' return u''
import lxml.html import lxml.html
html = lxml.html.fragment_fromstring(html, create_parent=False) body = lxml.html.document_fromstring(html).find('body')
return lxml.html.tostring(html, encoding='utf-8').decode('utf-8') html = lxml.html.tostring(body, encoding='utf-8')[6:-7].decode('utf-8')
if html.startswith('<p>') and html.endswith('</p>'):
html = html[3:-4]
return html