/gi,
- ''
- ],
- [
- /<\/rtl>/gi,
- '
'
- ]
- ],
- '*': function(tag) {
- return [
- [
- new RegExp('?' + tag + ' ?/?>', 'gi'),
- '{0}'
- ]
- ];
- }
- },
salt = Ox.range(2).map(function(){
return Ox.range(16).map(function() {
return Ox.char(65 + Ox.random(26));
@@ -491,16 +504,16 @@
> Ox.sanitizeHTML('foo')
'foo'
> Ox.sanitizeHTML('foo')
- '<a href="javascript:alert()">foo'
+ '<a href="javascript:alert()">foo</a>'
> Ox.sanitizeHTML('foo')
- '<a href="foo">foo'
+ '<a href="foo">foo</a>'
> Ox.sanitizeHTML('foo')
'foo'
> Ox.sanitizeHTML('foo')
'foo'
> Ox.sanitizeHTML('[http://foo.com foo]')
'foo'
- > Ox.sanitizeHTML('foo')
+ > Ox.sanitizeHTML('foo
')
'foo
'
> Ox.sanitizeHTML('')
'<script>alert()</script>'
@@ -514,40 +527,105 @@
'&&'
> Ox.sanitizeHTML('')
'<http://foo.com>'
+ > Ox.sanitizeHTML('')
+ '"<foo value="http://foo.com"></foo>"'
@*/
- Ox.sanitizeHTML = function(html, tags, replaceTags) {
- var matches = [];
+ Ox.sanitizeHTML = function(html, tags, globalAttributes) {
tags = tags || defaultTags;
- replaceTags = replaceTags || {};
+ globalAttributes = globalAttributes || [];
+ var escaped = {},
+ level = 0,
+ matches = [],
+ nonClosingTags = ['img', 'br'],
+ validAttributes = {}, requiredAttributes = {}, validation = {},
+ validTags = tags.map(function(tag) {
+ validAttributes[tag.name] = globalAttributes
+ .concat(tag.required || [])
+ .concat(tag.optional || []);
+ requiredAttributes[tag.name] = tag.required || [];
+ validation[tag.name] = tag.validation || {};
+ return tag.name;
+ });
+
// html = Ox.clean(html); fixme: can this be a parameter?
- if (tags.indexOf('[]') > -1) {
+ if (validTags.indexOf('[]') > -1) {
html = html.replace(
/\[((\/|https?:\/\/|mailto:).+?) (.+?)\]/gi,
'$3'
);
- tags = tags.filter(function(tag) {
+ validTags = validTags.filter(function(tag) {
return tag != '[]';
});
}
- tags.forEach(function(tag) {
- var array = replaceTags[tag] || replace[tag] || replace['*'](tag);
- Ox.forEach(array, function(value) {
- html = html.replace(value[0], function() {
- var match;
- if (Ox.isFunction(value[1])) {
- match = value[1].apply(null, arguments);
- } else {
- match = Ox.formatString(value[1], arguments);
+
+ html = splitHTMLTags(html).map(function(string, i) {
+ var attributes,
+ attrs = {},
+ attrRegexp = /([^=\ ]+)="([^"]+)"/g,
+ isClosing,
+ isTag = i % 2,
+ isValid = true,
+ name,
+ match,
+ tag,
+ tagRegexp = /<(\/)?([^\ \/]+)(.*?)(\/)?>/g;
+
+ if (isTag) {
+ tag = tagRegexp.exec(string);
+ if (tag) {
+ isClosing = !Ox.isUndefined(tag[1]);
+ name = tag[2];
+ attributes = tag[3].trim();
+ while(match = attrRegexp.exec(attributes)) {
+ if (validAttributes[name] && validAttributes[name].indexOf(match[1]) > -1) {
+ attrs[match[1]] = match[2];
+ }
}
- matches.push(match);
- return salt.join(matches.length - 1);
- });
- });
- });
- html = Ox.encodeHTMLEntities(Ox.decodeHTMLEntities(html));
- matches.forEach(function(match, i) {
- html = html.replace(new RegExp(salt.join(i)), match);
- });
+ if (!isClosing && nonClosingTags.indexOf(name) == -1) {
+ level++;
+ }
+ if (Ox.isEmpty(attrs) && attributes.length || validTags.indexOf(name) == -1) {
+ isValid = false;
+ } else if(!isClosing && requiredAttributes[name]) {
+ requiredAttributes[name].forEach(function(attr) {
+ if (Ox.isUndefined(attrs[attr])) {
+ isValid = false;
+ }
+ });
+ }
+ if (isValid && !Ox.isEmpty(attrs)) {
+ Ox.forEach(attrs, function(value, key) {
+ if (!Ox.isUndefined(validation[name][key])
+ && !validation[name][key].exec(value)) {
+ isValid = false;
+ return false;
+ }
+ });
+ }
+ if (isValid && isClosing) {
+ isValid = !escaped[level];
+ } else {
+ escaped[level] = !isValid;
+ }
+ if (isClosing) {
+ level --;
+ }
+ if (isValid) {
+ return '<'
+ + (isClosing ? '/' : '')
+ + name
+ + (!isClosing && !Ox.isEmpty(attrs)
+ ? ' ' + Ox.values(Ox.map(attrs, function(value, key) {
+ return key + '="' + value + '"';
+ })).join(' ')
+ : '')
+ + '>';
+ }
+ }
+ }
+ return Ox.encodeHTMLEntities(Ox.decodeHTMLEntities(string));
+ }).join('');
+ //FIXME: dont add links to urls inside of escaped tags
html = Ox.addLinks(html, true);
html = html.replace(/\n\n/g, '
');
// Close extra opening and remove extra closing tags.