/*@ Ox.parseEmailAddresses Takes HTML and turns e-mail addresses into links @*/ // fixme: no tests Ox.parseEmailAddresses = function(html) { return html.replace( /\b([0-9A-Z\.\+\-_]+@(?:[0-9A-Z\-]+\.)+[A-Z]{2,6})\b/gi, '$1' ); }; /*@ Ox.parseHTML Takes HTML from an untrusted source and returns something sane > Ox.parseHTML('http://foo.com, bar') 'foo.com, bar' > Ox.parseHTML('(see: www.foo.com)') '(see: www.foo.com)' > Ox.parseHTML('foo@bar.com') 'foo@bar.com' > Ox.parseHTML('foo') 'foo' > Ox.parseHTML('foo') '<a href="javascript:alert()">foo' > Ox.parseHTML('[http://foo.com foo]') 'foo' > Ox.parseHTML('foo') '
foo
' > Ox.parseHTML('') '<script>alert()</script>' > Ox.parseHTML('\'foo\' < \'bar\' && "foo" > "bar"') '\'foo\' < \'bar\' && "foo" > "bar"' > Ox.parseHTML('foo') 'foo' > Ox.parseHTML('foo') 'foo' @*/ Ox.parseHTML = (function() { var defaultTags = [ // fixme: why not p? 'a', 'b', 'blockquote', 'cite', 'code', 'del', 'em', 'i', 'img', 'ins', 'li', 'ol', 'q', 'rtl', 's', 'strong', 'sub', 'sup', 'ul', '[]' ], parse = { a: { ']*?href="(https?:\/\/.+?)".*?>': '', '<\/a>': '' }, img: { ']*?src="(https?:\/\/.+?)".*?>': '' }, rtl: { '': '
', '<\/rtl>': '
' }, '*': function(tag) { var ret = {}; ret['<(/?' + tag + ')>'] = '<{1}>'; return ret; } }, tab = '\t'; return function(html, tags, wikilinks) { var matches = [], tags = tags || defaultTags; // html = Ox.clean(html); fixme: can this be a parameter? if (tags.indexOf('[]') > -1) { html = html.replace(/\[(https?:\/\/.+?) (.+?)\]/gi, '$2'); tags = tags.filter(function(tag) { return tag != '[]'; }); } tags.forEach(function(tag) { var p = parse[tag] || parse['*'](tag); Ox.forEach(p, function(replace, regexp) { html = html.replace(new RegExp(regexp, 'gi'), function() { matches.push(Ox.formatString(replace, arguments)); return tab + (matches.length - 1) + tab; }); }); }); html = Ox.encodeHTML(html); html = Ox.parseURLs(html); html = Ox.parseEmailAddresses(html); //Ox.print('Ox.parseHTML', html, 'matches', matches); matches.forEach(function(match, i) { html = html.replace(new RegExp(tab + i + tab, 'gi'), match); }); html = html.replace(/\n/g, '
\n'); // close extra opening (and remove extra closing) tags // note: this converts '"' to '"' return Ox.element('
').html(html).html(); } }()); /*@ Ox.parseURL Takes a URL, returns its components (url) -> URL components url URL > Ox.test.object.hash '#c' > Ox.test.object.host 'www.foo.com:8080' > Ox.test.object.hostname 'www.foo.com' > Ox.test.object.origin 'http://www.foo.com:8080' > Ox.test.object.pathname '/bar/index.html' > Ox.test.object.port '8080' > Ox.test.object.protocol 'http:' > Ox.test.object.search '?a=0&b=1' @*/ Ox.parseURL = (function() { // fixme: leak memory, like now, or create every time? ... benchmark?? var a = document.createElement('a'), keys = ['hash', 'host', 'hostname', 'origin', 'pathname', 'port', 'protocol', 'search']; return function(str) { var ret = {}; a.href = str; keys.forEach(function(key) { ret[key] = a[key]; }); return ret; }; }()); /*@ Ox.parseURLs Takes HTML and turns URLs into links @*/ // fixme: is parseURLs the right name? // fixme: no tests Ox.parseURLs = function(html) { return html.replace( /\b((https?:\/\/|www\.).+?)([\.,:;!\?\)\]]*?(\s|$))/gi, function(str, url, pre, end) { url = (pre == 'www.' ? 'http://' : '' ) + url; return Ox.formatString( '{host}{end}', { end: end, host: Ox.parseURL(url).hostname, url: url } ); } ); };