From 8bda86c17dd3ca501c167ac3b883c872c399bb20 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Tue, 4 Feb 2014 10:40:01 +0000 Subject: [PATCH] use fragment_fromstring instead of document_fromstring --- ox/html.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/ox/html.py b/ox/html.py index 549e32f..7da078f 100644 --- a/ox/html.py +++ b/ox/html.py @@ -398,8 +398,5 @@ def sanitize_fragment(html): if not html: return u'' import lxml.html - body = lxml.html.document_fromstring(html).find('body') - html = lxml.html.tostring(body, encoding='utf-8')[6:-7].decode('utf-8') - if html.startswith('

') and html.endswith('

'): - html = html[3:-4] - return html + html = lxml.html.fragment_fromstring(html, create_parent=False) + return lxml.html.tostring(html, encoding='utf-8').decode('utf-8')