diff --git a/ox/html.py b/ox/html.py index d69c2df..202a036 100644 --- a/ox/html.py +++ b/ox/html.py @@ -393,6 +393,21 @@ def split_tags(string): return [item for sublist in zip(strings, tags) for item in sublist][:-1] def sanitize_fragment(html): + ''' + Ensures that tags are closed (or not, as appropriate), attributes + are quoted, etc. Does not strip potentially-malicious HTML: use + sanitize_html() for that. + + >>> sanitize_fragment(u'') + u'' + >>> sanitize_fragment(u'') + u'' + >>> sanitize_fragment(u'


') + u'

' + >>> sanitize_fragment(u'foo') + u'foo' + ''' + ''' #html5lib reorders arguments, so not usable import html5lib