'use strict'; /*@ Ox.parseEmailAddresses Takes HTML and turns e-mail addresses into links @*/ // fixme: no tests Ox.parseEmailAddresses = function(html) { return html.replace( /\b([0-9A-Z\.\+\-_]+@(?:[0-9A-Z\-]+\.)+[A-Z]{2,6})\b/gi, '$1' ); }; /*@ Ox.parseHTML Takes HTML from an untrusted source and returns something sane > Ox.parseHTML('http://foo.com, bar') 'foo.com, bar' > Ox.parseHTML('(see: www.foo.com)') '(see: www.foo.com)' > Ox.parseHTML('foo@bar.com') 'foo@bar.com' > Ox.parseHTML('foo') 'foo' > Ox.parseHTML('foo') '<a href="javascript:alert()">foo' > Ox.parseHTML('[http://foo.com foo]') 'foo' > Ox.parseHTML('foo') '

foo

' > Ox.parseHTML('') '<script>alert()</script>' > Ox.parseHTML('\'foo\' < \'bar\' && "foo" > "bar"') '\'foo\' < \'bar\' && "foo" > "bar"' > Ox.parseHTML('foo') 'foo' > Ox.parseHTML('foo') 'foo' @*/ Ox.parseHTML = (function() { var defaultTags = [ // inline formatting 'b', 'code', 'i', 'q', 's', 'sub', 'sup', 'u', // block 'blockquote', 'h1', 'p', 'pre', // lists 'li', 'ol', 'ul', // tables 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', // other 'a', 'br', 'img', // special 'rtl', '[]' ], parse = { a: { ']*?href="((https?:\/\/|\/).+?)".*?>': '', '<\/a>': '' }, img: { ']*?src="(https?:\/\/.+?)".*?>': '

' }, rtl: { '': '

', '<\/rtl>': '

' }, '*': function(tag) { var ret = {}; ret['<(/?' + tag + ')>'] = '<{1}>'; return ret; } }, tab = '\t'; return function(html, tags, wikilinks) { var matches = [], tags = tags || defaultTags; // html = Ox.clean(html); fixme: can this be a parameter? if (tags.indexOf('[]') > -1) { html = html.replace(/\[(https?:\/\/.+?) (.+?)\]/gi, '$2'); tags = tags.filter(function(tag) { return tag != '[]'; }); } tags.forEach(function(tag) { var p = parse[tag] || parse['*'](tag); Ox.forEach(p, function(replace, regexp) { html = html.replace(new RegExp(regexp, 'gi'), function() { matches.push(Ox.formatString(replace, arguments)); return tab + (matches.length - 1) + tab; }); }); }); html = Ox.encodeHTML(html); html = Ox.parseURLs(html); html = Ox.parseEmailAddresses(html); //Ox.print('Ox.parseHTML', html, 'matches', matches); matches.forEach(function(match, i) { html = html.replace(new RegExp(tab + i + tab, 'gi'), match); }); //html = html.replace(/\n/g, '
\n'); html = html.replace(/\n\n/g, '

'); // close extra opening (and remove extra closing) tags // note: this converts '"' to '"' return Ox.element('

').html(html).html(); } }()); /*@ Ox.parseURL Takes a URL, returns its components (url) -> URL components url URL > Ox.test.object.hash '#c' > Ox.test.object.host 'www.foo.com:8080' > Ox.test.object.hostname 'www.foo.com' > Ox.test.object.origin 'http://www.foo.com:8080' > Ox.test.object.pathname '/bar/index.html' > Ox.test.object.port '8080' > Ox.test.object.protocol 'http:' > Ox.test.object.search '?a=0&b=1' @*/ Ox.parseURL = (function() { // fixme: leak memory, like now, or create every time? ... benchmark?? var a = document.createElement('a'), keys = ['hash', 'host', 'hostname', 'origin', 'pathname', 'port', 'protocol', 'search']; return function(str) { var ret = {}; a.href = str; keys.forEach(function(key) { ret[key] = a[key]; }); return ret; }; }()); /*@ Ox.parseURLs Takes HTML and turns URLs into links @*/ // fixme: is parseURLs the right name? // fixme: no tests Ox.parseURLs = function(html) { return html.replace( /\b((https?:\/\/|www\.).+?)([\.,:;!\?\)\]]*?(\s|$))/gi, function(str, url, pre, end) { url = (pre == 'www.' ? 'http://' : '' ) + url; return Ox.formatString( '{host}{end}', { end: end, host: Ox.parseURL(url).hostname, url: url } ); } ); };