From 533a1a627e54e9bfb5acc420ce6a6e0c4882fa9c Mon Sep 17 00:00:00 2001 From: Will Thompson Date: Tue, 24 Nov 2015 18:05:27 +0000 Subject: [PATCH] ox.html.sanitize_fragment: documentation, tests --- ox/html.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ox/html.py b/ox/html.py index d69c2df..202a036 100644 --- a/ox/html.py +++ b/ox/html.py @@ -393,6 +393,21 @@ def split_tags(string): return [item for sublist in zip(strings, tags) for item in sublist][:-1] def sanitize_fragment(html): + ''' + Ensures that tags are closed (or not, as appropriate), attributes + are quoted, etc. Does not strip potentially-malicious HTML: use + sanitize_html() for that. + + >>> sanitize_fragment(u'') + u'' + >>> sanitize_fragment(u'') + u'' + >>> sanitize_fragment(u'


') + u'

' + >>> sanitize_fragment(u'foo') + u'foo' + ''' + ''' #html5lib reorders arguments, so not usable import html5lib