ox.html.sanitize_fragment: documentation, tests
This commit is contained in:
parent
5448aec902
commit
533a1a627e
1 changed files with 15 additions and 0 deletions
15
ox/html.py
15
ox/html.py
|
@ -393,6 +393,21 @@ def split_tags(string):
|
||||||
return [item for sublist in zip(strings, tags) for item in sublist][:-1]
|
return [item for sublist in zip(strings, tags) for item in sublist][:-1]
|
||||||
|
|
||||||
def sanitize_fragment(html):
|
def sanitize_fragment(html):
|
||||||
|
'''
|
||||||
|
Ensures that tags are closed (or not, as appropriate), attributes
|
||||||
|
are quoted, etc. Does not strip potentially-malicious HTML: use
|
||||||
|
sanitize_html() for that.
|
||||||
|
|
||||||
|
>>> sanitize_fragment(u'<span lang="en">')
|
||||||
|
u'<span lang="en"></span>'
|
||||||
|
>>> sanitize_fragment(u'<span lang=en></span>')
|
||||||
|
u'<span lang="en"></span>'
|
||||||
|
>>> sanitize_fragment(u'<br><br/></br>')
|
||||||
|
u'<br><br>'
|
||||||
|
>>> sanitize_fragment(u'<a href="javascript:alert()">foo</a>')
|
||||||
|
u'<a href="javascript:alert()">foo</a>'
|
||||||
|
'''
|
||||||
|
|
||||||
'''
|
'''
|
||||||
#html5lib reorders arguments, so not usable
|
#html5lib reorders arguments, so not usable
|
||||||
import html5lib
|
import html5lib
|
||||||
|
|
Loading…
Reference in a new issue