cleanup sanitizeHTML

This commit is contained in:
rolux 2014-01-19 12:39:53 +05:30
parent 89f5329c8b
commit b06386a00b

View file

@ -22,7 +22,7 @@
{ {
'name': 'div', 'name': 'div',
'optional': ['style'], 'optional': ['style'],
'validation': { 'validate': {
'style': /^direction: rtl$/ 'style': /^direction: rtl$/
} }
}, },
@ -51,7 +51,7 @@
{ {
'name': 'a', 'name': 'a',
'required': ['href'], 'required': ['href'],
'validation': { 'validate': {
'href': /^((https?:\/\/|\/|mailto:).*?)/ 'href': /^((https?:\/\/|\/|mailto:).*?)/
} }
}, },
@ -60,7 +60,7 @@
'name': 'iframe', 'name': 'iframe',
'optional': ['width', 'height'], 'optional': ['width', 'height'],
'required': ['src'], 'required': ['src'],
'validation': { 'validate': {
'width': /^\d+$/, 'width': /^\d+$/,
'height': /^\d+$/, 'height': /^\d+$/,
'src': /^((https?:\/\/|\/|mailto:).*?)/ 'src': /^((https?:\/\/|\/|mailto:).*?)/
@ -70,7 +70,7 @@
'name': 'img', 'name': 'img',
'optional': ['width', 'height'], 'optional': ['width', 'height'],
'required': ['src'], 'required': ['src'],
'validation': { 'validate': {
'width': /^\d+$/, 'width': /^\d+$/,
'height': /^\d+$/, 'height': /^\d+$/,
'src': /^((https?:\/\/|\/|mailto:).*?)/ 'src': /^((https?:\/\/|\/|mailto:).*?)/
@ -531,24 +531,26 @@
'"<foo value="http://foo.com"></foo>"' '"<foo value="http://foo.com"></foo>"'
@*/ @*/
Ox.sanitizeHTML = function(html, tags, globalAttributes) { Ox.sanitizeHTML = function(html, tags, globalAttributes) {
tags = tags || defaultTags; tags = tags || defaultTags;
globalAttributes = globalAttributes || []; globalAttributes = globalAttributes || [];
var escaped = {}, var escaped = {},
level = 0, level = 0,
matches = [], matches = [],
nonClosingTags = ['img', 'br'], selfClosingTags = ['img', 'br'],
validAttributes = {}, requiredAttributes = {}, validation = {}, validAttributes = {}, requiredAttributes = {}, validate = {},
validTags = tags.map(function(tag) { validTags = tags.map(function(tag) {
validAttributes[tag.name] = globalAttributes validAttributes[tag.name] = globalAttributes
.concat(tag.required || []) .concat(tag.required || [])
.concat(tag.optional || []); .concat(tag.optional || []);
requiredAttributes[tag.name] = tag.required || []; requiredAttributes[tag.name] = tag.required || [];
validation[tag.name] = tag.validation || {}; validate[tag.name] = tag.validate || {};
return tag.name; return tag.name;
}); });
// html = Ox.clean(html); fixme: can this be a parameter? // html = Ox.clean(html); fixme: can this be a parameter?
if (validTags.indexOf('[]') > -1) { if (Ox.contains(validTags, '[]')) {
html = html.replace( html = html.replace(
/\[((\/|https?:\/\/|mailto:).+?) (.+?)\]/gi, /\[((\/|https?:\/\/|mailto:).+?) (.+?)\]/gi,
'<a href="$1">$3</a>' '<a href="$1">$3</a>'
@ -559,35 +561,42 @@
} }
html = splitHTMLTags(html).map(function(string, i) { html = splitHTMLTags(html).map(function(string, i) {
var attributes,
attrs = {}, var attrs = {},
attrMatch,
attrRegexp = /([^=\ ]+)="([^"]+)"/g, attrRegexp = /([^=\ ]+)="([^"]+)"/g,
attrString,
isClosing, isClosing,
isTag = i % 2, isTag = i % 2,
isValid = true, isValid = true,
name,
match,
tag, tag,
tagMatch,
tagRegexp = /<(\/)?([^\ \/]+)(.*?)(\/)?>/g; tagRegexp = /<(\/)?([^\ \/]+)(.*?)(\/)?>/g;
if (isTag) { if (isTag) {
tag = tagRegexp.exec(string); tagMatch = tagRegexp.exec(string);
if (tag) { if (tagMatch) {
isClosing = !Ox.isUndefined(tag[1]); isClosing = !Ox.isUndefined(tagMatch[1]);
name = tag[2]; tag = tagMatch[2];
attributes = tag[3].trim(); attrString = tagMatch[3].trim();
while(match = attrRegexp.exec(attributes)) { while (attrMatch = attrRegexp.exec(attrString)) {
if (validAttributes[name] && validAttributes[name].indexOf(match[1]) > -1) { if (
attrs[match[1]] = match[2]; validAttributes[tag]
&& Ox.contains(validAttributes[tag], attrMatch[1])
) {
attrs[attrMatch[1]] = attrMatch[2];
} }
} }
if (!isClosing && nonClosingTags.indexOf(name) == -1) { if (!isClosing && !Ox.contains(selfClosingTags, tag)) {
level++; level++;
} }
if (Ox.isEmpty(attrs) && attributes.length || validTags.indexOf(name) == -1) { if (
!Ox.contains(validTags, tag)
|| (attrString.length && Ox.isEmpty(attrs))
) {
isValid = false; isValid = false;
} else if(!isClosing && requiredAttributes[name]) { } else if (!isClosing && requiredAttributes[tag]) {
requiredAttributes[name].forEach(function(attr) { requiredAttributes[tag].forEach(function(attr) {
if (Ox.isUndefined(attrs[attr])) { if (Ox.isUndefined(attrs[attr])) {
isValid = false; isValid = false;
} }
@ -595,10 +604,12 @@
} }
if (isValid && !Ox.isEmpty(attrs)) { if (isValid && !Ox.isEmpty(attrs)) {
Ox.forEach(attrs, function(value, key) { Ox.forEach(attrs, function(value, key) {
if (!Ox.isUndefined(validation[name][key]) if (
&& !validation[name][key].exec(value)) { !Ox.isUndefined(validate[tag][key])
&& !validate[tag][key].exec(value)
) {
isValid = false; isValid = false;
return false; return false; // break
} }
}); });
} }
@ -608,12 +619,12 @@
escaped[level] = !isValid; escaped[level] = !isValid;
} }
if (isClosing) { if (isClosing) {
level --; level--;
} }
if (isValid) { if (isValid) {
return '<' return '<'
+ (isClosing ? '/' : '') + (isClosing ? '/' : '')
+ name + tag
+ (!isClosing && !Ox.isEmpty(attrs) + (!isClosing && !Ox.isEmpty(attrs)
? ' ' + Ox.values(Ox.map(attrs, function(value, key) { ? ' ' + Ox.values(Ox.map(attrs, function(value, key) {
return key + '="' + value + '"'; return key + '="' + value + '"';
@ -623,14 +634,18 @@
} }
} }
} }
return Ox.encodeHTMLEntities(Ox.decodeHTMLEntities(string)); return Ox.encodeHTMLEntities(Ox.decodeHTMLEntities(string));
}).join(''); }).join('');
//FIXME: dont add links to urls inside of escaped tags //FIXME: dont add links to urls inside of escaped tags
html = Ox.addLinks(html, true); html = Ox.addLinks(html, true);
html = html.replace(/\n\n/g, '<br/><br/>'); html = html.replace(/\n\n/g, '<br/><br/>');
// Close extra opening and remove extra closing tags. // Close extra opening and remove extra closing tags.
// Note: this converts '&apos;' to "'" and '&quot;' to '"' // Note: this converts '&apos;' to "'" and '&quot;' to '"'
return Ox.normalizeHTML(html); return Ox.normalizeHTML(html);
}; };
/*@ /*@