From d38da54a17357e20e6f56e29f369127d1071ff6b Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Thu, 31 Oct 2013 13:49:55 +0100 Subject: [PATCH] strip

--- ox/html.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ox/html.py b/ox/html.py index bae9bd9..8f30609 100644 --- a/ox/html.py +++ b/ox/html.py @@ -318,5 +318,7 @@ def sanitize_fragment(html): ''' import lxml.html body = lxml.html.document_fromstring(html).find('body') - return lxml.html.tostring(body, encoding='utf-8')[6:-7].decode('utf-8') - + html = lxml.html.tostring(body, encoding='utf-8')[6:-7].decode('utf-8') + if html.startswith('

') and html.endswith('

'): + html = html[3:-4] + return html