'use strict'; (function() { var defaultTags = [ // inline formatting 'b', 'code', 'i', 's', 'sub', 'sup', 'u', // block formatting 'blockquote', 'h1', 'h2', 'h3', 'p', 'pre', // lists 'li', 'ol', 'ul', // tables 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', // other 'a', 'br', 'img', // special 'rtl', '[]' ], htmlEntities = { '"': '"', '&': '&', "'": ''', '<': '<', '>': '>' }, regexp = { entity: /&[^\s]*;/g, tag: new RegExp('<\\/?(' + [ 'a', 'b', 'br', 'code', 'i', 's', 'span', 'u' ].join('|') + ')\\/?>', 'gi') }, replace = { mail: [ /\b([0-9A-Z\.\+\-_]+@(?:[0-9A-Z\-]+\.)+[A-Z]{2,6})\b/gi, '$1' ], namedEntity: [ new RegExp('(' + Ox.values(htmlEntities).join('|') + ')', 'g'), function(match) { return Ox.keyOf(htmlEntities, match); } ], numericEntity: [ /&#([0-9A-FX]+);/gi, function(match, code) { return Ox.char( /^X/i.test(code) ? parseInt(code.slice(1), 16) : parseInt(code, 10) ); } ], tag: { a: [ [ /]*?href="((\/|https?:\/\/|mailto:).+?)".*?>/gi, '', ], [ /<\/a>/gi, '' ] ], img: [ [ /]*?src="((\/|https?:\/\/).+?)".*?>/gi, '' ] ], rtl: [ [ //gi, '
' ], [ /<\/rtl>/gi, '
' ] ], '*': function(tag) { return [ [ new RegExp('', 'gi'), '{0}' ] ]; } }, url: [ /\b((https?:\/\/|www\.).+?)([\.,:;!\?\)\]]*?(\s|$))/gi, function(string, url, prefix, end) { prefix = prefix.toLowerCase() == 'www.' ? 'http://' : ''; return Ox.formatString( '{url}{end}', {end: end, prefix: prefix, url: url} ); } ] }, salt = Ox.range(2).map(function(){ return Ox.range(16).map(function() { return Ox.char(65 + Ox.random(26)); }).join(''); }); // Splits a string into text (even indices) and tags (odd indices), ignoring // tags with starting positions that are included in the ignore array function splitHTMLTags(string, ignore) { var isTag = false, ret = ['']; ignore = ignore || []; Ox.forEach(string, function(char, i) { if (!isTag && char == '<' && ignore.indexOf(i) == -1) { isTag = true; ret.push(''); } ret[ret.length - 1] += char; if (isTag && char == '>') { isTag = false; ret.push(''); } }); return ret; }; /*@ Ox.addLinks Takes a string and adds links for e-mail addresses and URLs (string[, isHTML]) -> Formatted string string String isHTML If true, ignore matches in tags or enclosed by links > Ox.addLinks('foo bar ') 'foo bar <foo@bar.com>' > Ox.addLinks('www.foo.com/bar#baz, etc.') 'www.foo.com/bar#baz, etc.' > Ox.addLinks('foo.com', true) 'foo.com' @*/ Ox.addLinks = function(string, isHTML) { var isLink = false; function replaceString(string) { return string .replace(replace.mail[0], replace.mail[1]) .replace(replace.url[0], replace.url[1]); } return isHTML ? splitHTMLTags(string).map(function(string, i) { var isTag = i % 2; if (isTag) { if (/^ Encodes HTML entities (string[, encodeAll]) -> String string String encodeAll If true, encode characters > 127 as numeric entities > Ox.encodeHTMLEntities('<\'&"> äbçdê') '<'&"> äbçdê' > Ox.encodeHTMLEntities('<\'&"> äbçdê', true) '<'&"> äbçdê' @*/ Ox.encodeHTMLEntities = function(string, encodeAll) { return Ox.map(String(string), function(char) { var code = char.charCodeAt(0); if (code < 128) { char = char in htmlEntities ? htmlEntities[char] : char; } else if (encodeAll) { char = '&#x' + Ox.pad(code.toString(16).toUpperCase(), 4) + ';'; } return char; }); }; /*@ Ox.decodeHTMLEntities Decodes HTML entities (string[, decodeAll]) -> String string String decodeAll If true, decode named entities for characters > 127 Note that decodeAll relies on Ox.normalizeHTML, which uses the DOM and may transform the string > Ox.decodeHTMLEntities('<'&">') '<\'&">' > Ox.decodeHTMLEntities('<'&">') '<\'&">' > Ox.decodeHTMLEntities('äbçdê') 'äbçdê' > Ox.decodeHTMLEntities('äbçdê') 'äbçdê' > Ox.decodeHTMLEntities('äbçdê', true) 'äbçdê' > Ox.decodeHTMLEntities('β') 'β' > Ox.decodeHTMLEntities('β', true) 'β' > Ox.decodeHTMLEntities('<b>') '' @*/ Ox.decodeHTMLEntities = function(string, decodeAll) { return decodeAll ? Ox.decodeHTMLEntities(Ox.normalizeHTML(string)) : String(string) .replace(replace.namedEntity[0], replace.namedEntity[1]) .replace(replace.numericEntity[0], replace.numericEntity[1]); }; /*@ Ox.highlightHTML Highlight matches in string (string, query, classname[, isHTML]) -> Output string string Input string query Case-insentitive query string, or regular expression classname Class name for matches isHTML If true, the input string is treated as HTML > Ox.highlightHTML('', 'name', 'c') '<name>' > Ox.highlightHTML('name', 'name', 'c', true) 'name' > Ox.highlightHTML('amp & amp', 'amp', 'c', true) 'amp & amp' > Ox.highlightHTML('amp & amp', 'amp & amp', 'c', true) 'amp & amp' > Ox.highlightHTML('<b>', '', 'c', true) '<b>' > Ox.highlightHTML('<b>', '<b>', 'c', true) '<b>' > Ox.highlightHTML('foobarbaz', 'foobar', 'c', true) 'foobarbaz' > Ox.highlightHTML('foo

bar

baz', 'foobar', 'c', true) 'foo

bar

baz' > Ox.highlightHTML('foo
bar baz', 'foo bar', 'c', true) 'foo
bar
baz' @*/ Ox.highlightHTML = function(string, query, classname, isHTML) { var cursor = 0, entities = [], matches = [], re = Ox.isRegExp(query) ? query : new RegExp(Ox.escapeRegExp(query), 'gi'), span = ['', ''], tags = []; function insert(array) { // for each replacement array.forEach(function(v) { // replace the modified value with the original value string = Ox.splice(string, v.position, v.length, v.value); // for each match matches.forEach(function(match) { if (v.position < match.position) { // replacement is before match, update match position match.position += v.value.length - v.length; } else if ( v.position < match.position + match.value.length ) { // replacement is inside match, update match value match.value = Ox.splice( match.value, v.position - match.position, v.length, v.value ); } }); }); } if (isHTML) { string = Ox.normalizeHTML(string) // remove inline tags .replace(regexp.tag, function(value, tag, position) { tags.push({ length: 0, position: position, value: value }); return ''; }) // decode html entities .replace(regexp.entity, function(value, position) { var ret = Ox.decodeHTMLEntities(value, true); entities.push({ length: ret.length, position: position, value: value }); return ret; }); // if decoding entities has created new tags, ignore them splitHTMLTags(string, entities.map(function(entity) { return entity.position; })).forEach(function(v, i) { if (i % 2 == 0) { // outside tags, find matches and save position and value v.replace(re, function(value, position) { matches.push( {position: cursor + position, value: value} ); }); } cursor += v.length; }); insert(entities); insert(tags); // for each match (in reverse order, so that positions are correct) matches.reverse().forEach(function(match) { // wrap it in a span string = Ox.splice( string, match.position, match.value.length, span.join(match.value) ); }); } else { string = string.replace(re, function(value) { return span.join(value); }); } // if isHTML, we may have enclosed single opening or closing tags in a // span, if not isHTML, the string may contain '<', '>' or '&', so in // both cases, we have to normalize return Ox.normalizeHTML(string); }; /*@ Ox.normalizeHTML Normalize HTML (using the DOM) > Ox.normalizeHTML('foo') 'foo' > Ox.normalizeHTML('foo') 'foo' > Ox.normalizeHTML('<'&"> äbçdê') '<\'&"> äbçdê' @*/ Ox.normalizeHTML = function(html) { return Ox.$('
').html(html).html(); }; /*@ Ox.sanitizeHTML Takes untrusted HTML and returns something trustworthy > Ox.sanitizeHTML('http://foo.com, bar') 'http://foo.com, bar' > Ox.sanitizeHTML('http://foo.com/foo?bar, bar') 'http://foo.com/foo?bar, bar' > Ox.sanitizeHTML('(see: www.foo.com)') '(see: www.foo.com)' > Ox.sanitizeHTML('foo@bar.com') 'foo@bar.com' > Ox.sanitizeHTML('foo') 'foo' > Ox.sanitizeHTML('foo') 'foo' > Ox.sanitizeHTML('foo') 'foo' > Ox.sanitizeHTML('foo') '<a href="javascript:alert()">foo' > Ox.sanitizeHTML('foo') '<a href="foo">foo' > Ox.sanitizeHTML('foo') 'foo' > Ox.sanitizeHTML('[http://foo.com foo]') 'foo' > Ox.sanitizeHTML('foo') '
foo
' > Ox.sanitizeHTML('') '<script>alert()</script>' > Ox.sanitizeHTML('\'foo\' < \'bar\' && "foo" > "bar"') '\'foo\' < \'bar\' && "foo" > "bar"' > Ox.sanitizeHTML('foo') 'foo' > Ox.sanitizeHTML('foo') 'foo' @*/ Ox.sanitizeHTML = function(html, tags) { var matches = []; tags = tags || defaultTags; // html = Ox.clean(html); fixme: can this be a parameter? if (tags.indexOf('[]') > -1) { html = html.replace(/\[((\/|https?:\/\/|mailto:).+?) (.+?)\]/gi, '$3'); tags = tags.filter(function(tag) { return tag != '[]'; }); } tags.forEach(function(tag) { var array = replace.tag[tag] || replace.tag['*'](tag); Ox.forEach(array, function(value) { html = html.replace(value[0], function() { matches.push(Ox.formatString(value[1], arguments)); return salt.join(matches.length - 1); }); }); }); html = Ox.addLinks(Ox.encodeHTMLEntities(html), true); matches.forEach(function(match, i) { html = html.replace(new RegExp(salt.join(i)), match); }); html = html.replace(/\n\n/g, '

'); // Close extra opening and remove extra closing tags. // Note: this converts ''' to "'" and '"' to '"' return Ox.normalizeHTML(html); }; }());