ox.html.sanitize_fragment: documentation, tests
This commit is contained in:
parent
5448aec902
commit
533a1a627e
1 changed files with 15 additions and 0 deletions
15
ox/html.py
15
ox/html.py
|
@ -393,6 +393,21 @@ def split_tags(string):
|
|||
return [item for sublist in zip(strings, tags) for item in sublist][:-1]
|
||||
|
||||
def sanitize_fragment(html):
|
||||
'''
|
||||
Ensures that tags are closed (or not, as appropriate), attributes
|
||||
are quoted, etc. Does not strip potentially-malicious HTML: use
|
||||
sanitize_html() for that.
|
||||
|
||||
>>> sanitize_fragment(u'<span lang="en">')
|
||||
u'<span lang="en"></span>'
|
||||
>>> sanitize_fragment(u'<span lang=en></span>')
|
||||
u'<span lang="en"></span>'
|
||||
>>> sanitize_fragment(u'<br><br/></br>')
|
||||
u'<br><br>'
|
||||
>>> sanitize_fragment(u'<a href="javascript:alert()">foo</a>')
|
||||
u'<a href="javascript:alert()">foo</a>'
|
||||
'''
|
||||
|
||||
'''
|
||||
#html5lib reorders arguments, so not usable
|
||||
import html5lib
|
||||
|
|
Loading…
Reference in a new issue