'use strict'; /*@ Ox.parseEmailAddresses <f> Takes HTML and turns e-mail addresses into links @*/ // fixme: no tests // fixme: shouldn't this be formatEmailAddresses? // fixme: fails for linked emails Ox.parseEmailAddresses = function(html) { return html.replace( /\b([0-9A-Z\.\+\-_]+@(?:[0-9A-Z\-]+\.)+[A-Z]{2,6})\b/gi, '<a href="mailto:$1">$1</a>' ); }; /*@ Ox.parseHTML <f> Takes HTML from an untrusted source and returns something sane > Ox.parseHTML('http://foo.com, bar') '<a href="http://foo.com">http://foo.com</a>, bar' > Ox.parseHTML('http://foo.com/foobar?foo, bar') '<a href="http://foo.com/foobar?foo">http://foo.com/foobar?foo</a>, bar' > Ox.parseHTML('(see: www.foo.com)') '(see: <a href="http://www.foo.com">www.foo.com</a>)' > Ox.parseHTML('foo@bar.com') '<a href="mailto:foo@bar.com">foo@bar.com</a>' > Ox.parseHTML('<a href="http://foo.com" onmouseover="alert()">foo</a>') '<a href="http://foo.com">foo</a>' > Ox.parseHTML('<a href="javascript:alert()">foo</a>') '<a href="javascript:alert()">foo' > Ox.parseHTML('[http://foo.com foo]') '<a href="http://foo.com">foo</a>' > Ox.parseHTML('<rtl>foo</rtl>') '<div style="direction: rtl">foo</div>' > Ox.parseHTML('<script>alert()</script>') '<script>alert()</script>' > Ox.parseHTML('\'foo\' < \'bar\' && "foo" > "bar"') '\'foo\' < \'bar\' && "foo" > "bar"' > Ox.parseHTML('<b>foo') '<b>foo</b>' > Ox.parseHTML('<b>foo</b></b>') '<b>foo</b>' @*/ Ox.parseHTML = (function() { var defaultTags = [ // inline formatting 'b', 'code', 'i', 's', 'sub', 'sup', 'u', // block formatting 'blockquote', 'h1', 'p', 'pre', // lists 'li', 'ol', 'ul', // tables 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', // other 'a', 'br', 'img', // special 'rtl', '[]' ], parse = { a: { '<a [^<>]*?href="((https?:\/\/|\/).+?)".*?>': '<a href="{1}">', '<\/a>': '</a>' }, img: { '<img [^<>]*?src="((https?:\/\/|\/).+?)".*?>': '<img src="{1}">' }, rtl: { '<rtl>': '<div style="direction: rtl">', '<\/rtl>': '</div>' }, '*': function(tag) { var ret = {}; ret['<(/?' + tag + ') ?/?>'] = '<{1}>'; return ret; } }, tab = '\t'; return function(html, tags, wikilinks) { var matches = []; tags = tags || defaultTags; // html = Ox.clean(html); fixme: can this be a parameter? if (tags.indexOf('[]') > -1) { html = html.replace(/\[((https?:\/\/|\/).+?) (.+?)\]/gi, '<a href="$1">$3</a>'); tags = tags.filter(function(tag) { return tag != '[]'; }); } tags.forEach(function(tag) { var p = parse[tag] || parse['*'](tag); Ox.forEach(p, function(replace, regexp) { html = html.replace(new RegExp(regexp, 'gi'), function() { matches.push(Ox.formatString(replace, arguments)); return tab + (matches.length - 1) + tab; }); }); }); html = Ox.encodeHTML(html); //fixme: both fail if urls/emails are already links //html = Ox.parseURLs(html); //html = Ox.parseEmailAddresses(html); matches.forEach(function(match, i) { html = html.replace(new RegExp(tab + i + tab, 'gi'), match); }); html = html.replace(/\n\n/g, '<br/><br/>'); // close extra opening (and remove extra closing) tags // note: this converts '"' to '"' return Ox.element('<div>').html(html).html(); }; }()); /*@ Ox.parseURL <f> Takes a URL, returns its components (url) -> <o> URL components url <s> URL <script> Ox.test.object = Ox.parseURL('http://www.foo.com:8080/bar/index.html?a=0&b=1#c'); </script> > Ox.test.object.hash '#c' > Ox.test.object.host 'www.foo.com:8080' > Ox.test.object.hostname 'www.foo.com' > Ox.test.object.origin 'http://www.foo.com:8080' > Ox.test.object.pathname '/bar/index.html' > Ox.test.object.port '8080' > Ox.test.object.protocol 'http:' > Ox.test.object.search '?a=0&b=1' @*/ Ox.parseURL = (function() { var a = document.createElement('a'), keys = ['hash', 'host', 'hostname', 'origin', 'pathname', 'port', 'protocol', 'search']; return function(str) { var ret = {}; a.href = str; keys.forEach(function(key) { ret[key] = a[key]; }); return ret; }; }()); /*@ Ox.parseURLs <f> Takes HTML and turns URLs into links > Ox.parseURLs('http://foo.com, bar') '<a href="http://foo.com">http://foo.com</a>, bar' > Ox.parseURLs('http://foo.com/foobar?foo, bar') '<a href="http://foo.com/foobar?foo">http://foo.com/foobar?foo</a>, bar' > Ox.parseURLs('www.foo.com, bar') '<a href="http://www.foo.com">www.foo.com</a>, bar' > Ox.parseURLs('<a href="http://foo.com">http://foo.com</a> etc') '<a href="http://foo.com">http://foo.com</a> etc' @*/ // fixme: shouldn't this be formatURLs? // fixme: fails for urls inside links Ox.parseURLs = function(html) { return html.replace( /\b((https?:\/\/|www\.).+?)([\.,:;!\?\)\]]*?(\s|$))/gi, function(str, url, pre, end) { url = (pre == 'www.' ? 'http://' : '' ) + url; return Ox.formatString( '<a href="{url}">{url}</a>{end}', { end: end, url: url } ); } ); };