diff --git a/ox/html.py b/ox/html.py index 5fcc6c1..5286116 100644 --- a/ox/html.py +++ b/ox/html.py @@ -440,7 +440,12 @@ def sanitize_fragment(html): if not html.strip(): return html import lxml.html - body = lxml.html.document_fromstring(html).find('body') + try: + body = lxml.html.document_fromstring(html).find('body') + except lxml.etree.ParserError as e: + if e.args and e.args[0] == 'Document is empty': + return html + raise e html = lxml.html.tostring(body, encoding='utf-8')[6:-7].decode('utf-8') if html.startswith('

') and html.endswith('

'): html = html[3:-4]