oxjs/source/Ox/js/HTML.js

'use strict';

/*@
Ox.parseEmailAddresses <f> Takes HTML and turns e-mail addresses into links
@*/
// fixme: no tests
// fixme: shouldn't this be formatEmailAddresses?
// fixme: fails for linked emails
Ox.parseEmailAddresses = function(html) {
    return html.replace(
        /\b([0-9A-Z\.\+\-_]+@(?:[0-9A-Z\-]+\.)+[A-Z]{2,6})\b/gi,
        '<a href="mailto:$1">$1</a>'
    );
};

/*@
Ox.parseHTML <f> Takes HTML from an untrusted source and returns something sane
    > Ox.parseHTML('http://foo.com, bar')
    '<a href="http://foo.com">http://foo.com</a>, bar'
    > Ox.parseHTML('http://foo.com/foobar?foo, bar')
    '<a href="http://foo.com/foobar?foo">http://foo.com/foobar?foo</a>, bar'
    > Ox.parseHTML('(see: www.foo.com)')
    '(see: <a href="http://www.foo.com">www.foo.com</a>)'
    > Ox.parseHTML('foo@bar.com')
    '<a href="mailto:foo@bar.com">foo@bar.com</a>'
    > Ox.parseHTML('<a href="http://foo.com" onmouseover="alert()">foo</a>')
    '<a href="http://foo.com">foo</a>'
    > Ox.parseHTML('<a href="javascript:alert()">foo</a>')
    '&lt;a href="javascript:alert()"&gt;foo'
    > Ox.parseHTML('[http://foo.com foo]')
    '<a href="http://foo.com">foo</a>'
    > Ox.parseHTML('<rtl>foo</rtl>')
    '<div style="direction: rtl">foo</div>'
    > Ox.parseHTML('<script>alert()</script>')
    '&lt;script&gt;alert()&lt;/script&gt;'
    > Ox.parseHTML('\'foo\' < \'bar\' && "foo" > "bar"')
    '\'foo\' &lt; \'bar\' &amp;&amp; "foo" &gt; "bar"'
    > Ox.parseHTML('<b>foo')
    '<b>foo</b>'
    > Ox.parseHTML('<b>foo</b></b>')
    '<b>foo</b>'
@*/

Ox.parseHTML = (function() {
    var defaultTags = [
            // inline formatting
            'b', 'code', 'i', 's', 'sub', 'sup', 'u',
            // block formatting
            'blockquote', 'h1', 'h2', 'h3', 'p', 'pre',
            // lists
            'li', 'ol', 'ul',
            // tables
            'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr',
            // other
            'a', 'br', 'img',
            // special
            'rtl', '[]'
        ],
        parse = {
            a: {
                '<a [^<>]*?href="((https?:\/\/|\/).+?)".*?>': '<a href="{1}">',
                '<\/a>': '</a>'
            },
            img: {
                '<img [^<>]*?src="((https?:\/\/|\/).+?)".*?>': '<img src="{1}">'
            },
            rtl: {
                '<rtl>': '<div style="direction: rtl">',
                '<\/rtl>': '</div>'
            },
            '*': function(tag) {
                var ret = {};
                ret['<(/?' + tag + ') ?/?>'] = '<{1}>';
                return ret;
            }
        },
        tab = '\t';
    return function(html, tags, wikilinks) {
        var matches = [];
        tags = tags || defaultTags;
        // html = Ox.clean(html); fixme: can this be a parameter?
        if (tags.indexOf('[]') > -1) {
            html = html.replace(/\[((https?:\/\/|\/).+?) (.+?)\]/gi, '<a href="$1">$3</a>');
            tags = tags.filter(function(tag) {
                return tag != '[]';
            });
        }
        tags.forEach(function(tag) {
            var p = parse[tag] || parse['*'](tag);
            Ox.forEach(p, function(replace, regexp) {
                html = html.replace(new RegExp(regexp, 'gi'), function() {
                    matches.push(Ox.formatString(replace, arguments));
                    return tab + (matches.length - 1) + tab; 
                });
            });
        });
        html = Ox.encodeHTML(html);
        //fixme: both fail if urls/emails are already links
        //html = Ox.parseURLs(html);
        //html = Ox.parseEmailAddresses(html);
        matches.forEach(function(match, i) {
            html = html.replace(new RegExp(tab + i + tab), match);
        });
        html = html.replace(/\n\n/g, '<br/><br/>');
        // close extra opening (and remove extra closing) tags
        // note: this converts '&quot;' to '"' 
        return Ox.element('<div>').html(html).html();
    };
}());

/*@
Ox.parseURL <f> Takes a URL, returns its components
    (url) -> <o> URL components
    url <s> URL
    <script>
        Ox.test.object = Ox.parseURL('http://www.foo.com:8080/bar/index.html?a=0&b=1#c');
    </script>
    > Ox.test.object.hash
    '#c'
    > Ox.test.object.host
    'www.foo.com:8080'
    > Ox.test.object.hostname
    'www.foo.com'
    > Ox.test.object.origin
    'http://www.foo.com:8080'
    > Ox.test.object.pathname
    '/bar/index.html'
    > Ox.test.object.port
    '8080'
    > Ox.test.object.protocol
    'http:'
    > Ox.test.object.search
    '?a=0&b=1'
@*/
Ox.parseURL = (function() {
    var a = document.createElement('a'),
        keys = ['hash', 'host', 'hostname', 'origin',
            'pathname', 'port', 'protocol', 'search'];
    return function(str) {
        var ret = {};
        a.href = str;
        keys.forEach(function(key) {
            ret[key] = a[key];
        });
        return ret;
    };
}());

/*@
Ox.parseURLs <f> Takes HTML and turns URLs into links
    > Ox.parseURLs('http://foo.com, bar')
    '<a href="http://foo.com">http://foo.com</a>, bar'
    > Ox.parseURLs('http://foo.com/foobar?foo, bar')
    '<a href="http://foo.com/foobar?foo">http://foo.com/foobar?foo</a>, bar'
    > Ox.parseURLs('www.foo.com, bar')
    '<a href="http://www.foo.com">www.foo.com</a>, bar'
    > Ox.parseURLs('<a href="http://foo.com">http://foo.com</a> etc')
    '<a href="http://foo.com">http://foo.com</a> etc'
@*/
// fixme: shouldn't this be formatURLs?
// fixme: fails for urls inside links
Ox.parseURLs = function(html) {
    return html.replace(
        /\b((https?:\/\/|www\.).+?)([\.,:;!\?\)\]]*?(\s|$))/gi,
        function(str, url, pre, end) {
            url = (pre == 'www.' ? 'http://' : '' ) + url;
            return Ox.formatString(
                '<a href="{url}">{url}</a>{end}',
                {
                    end: end,
                    url: url
                }
            );
        }
    );
};
'use strict'; 2011-11-05 16:46:53 +00:00			`'use strict';`

new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`/*@`
			`Ox.parseEmailAddresses <f> Takes HTML and turns e-mail addresses into links`
			`@*/`
			`// fixme: no tests`
minor changes in OxJS 2012-01-06 11:57:54 +00:00			`// fixme: shouldn't this be formatEmailAddresses?`
- allow links to local images - allow wiki links to local urls - disable automatic url / email linking since its broken right now 2012-02-11 12:19:25 +00:00			`// fixme: fails for linked emails`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`Ox.parseEmailAddresses = function(html) {`
			`return html.replace(`
			`/\b([0-9A-Z\.\+\-_]+@(?:[0-9A-Z\-]+\.)+[A-Z]{2,6})\b/gi,`
fix Ox.Edidable textarea height, dont add title to links in parseHTML 2012-01-19 16:37:39 +00:00			`'<a href="mailto:$1">$1</a>'`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`);`
			`};`

			`/*@`
			`Ox.parseHTML <f> Takes HTML from an untrusted source and returns something sane`
			`> Ox.parseHTML('http://foo.com, bar')`
do not shorten urls, just turn them into links 2012-02-10 09:52:09 +00:00			`'<a href="http://foo.com">http://foo.com</a>, bar'`
			`> Ox.parseHTML('http://foo.com/foobar?foo, bar')`
			`'<a href="http://foo.com/foobar?foo">http://foo.com/foobar?foo</a>, bar'`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`> Ox.parseHTML('(see: www.foo.com)')`
fix Ox.Edidable textarea height, dont add title to links in parseHTML 2012-01-19 16:37:39 +00:00			`'(see: <a href="http://www.foo.com">www.foo.com</a>)'`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`> Ox.parseHTML('foo@bar.com')`
fix Ox.Edidable textarea height, dont add title to links in parseHTML 2012-01-19 16:37:39 +00:00			`'<a href="mailto:foo@bar.com">foo@bar.com</a>'`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`> Ox.parseHTML('<a href="http://foo.com" onmouseover="alert()">foo</a>')`
fix Ox.Edidable textarea height, dont add title to links in parseHTML 2012-01-19 16:37:39 +00:00			`'<a href="http://foo.com">foo</a>'`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`> Ox.parseHTML('<a href="javascript:alert()">foo</a>')`
			`'<a href="javascript:alert()">foo'`
			`> Ox.parseHTML('[http://foo.com foo]')`
fix Ox.Edidable textarea height, dont add title to links in parseHTML 2012-01-19 16:37:39 +00:00			`'<a href="http://foo.com">foo</a>'`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`> Ox.parseHTML('<rtl>foo</rtl>')`
			`'<div style="direction: rtl">foo</div>'`
			`> Ox.parseHTML('<script>alert()</script>')`
			`'<script>alert()</script>'`
			`> Ox.parseHTML('\'foo\' < \'bar\' && "foo" > "bar"')`
			`'\'foo\' < \'bar\' && "foo" > "bar"'`
			`> Ox.parseHTML('<b>foo')`
			`'<b>foo</b>'`
			`> Ox.parseHTML('<b>foo</b></b>')`
			`'<b>foo</b>'`
			`@*/`

			`Ox.parseHTML = (function() {`
			`var defaultTags = [`
updates for html parsing, request handling, and editable elements 2011-10-27 18:50:23 +00:00			`// inline formatting`
misc updates; add geo demo 2011-12-31 12:57:02 +00:00			`'b', 'code', 'i', 's', 'sub', 'sup', 'u',`
			`// block formatting`
support h1-h6, fixes #516 2012-02-18 15:40:41 +00:00			`'blockquote', 'h1', 'h2', 'h3', 'p', 'pre',`
updates for html parsing, request handling, and editable elements 2011-10-27 18:50:23 +00:00			`// lists`
			`'li', 'ol', 'ul',`
			`// tables`
			`'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr',`
			`// other`
			`'a', 'br', 'img',`
			`// special`
			`'rtl', '[]'`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`],`
			`parse = {`
			`a: {`
fix Ox.Edidable textarea height, dont add title to links in parseHTML 2012-01-19 16:37:39 +00:00			`'<a [^<>]?href="((https?:\/\/\|\/).+?)".?>': '<a href="{1}">',`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`'<\/a>': '</a>'`
			`},`
			`img: {`
- allow links to local images - allow wiki links to local urls - disable automatic url / email linking since its broken right now 2012-02-11 12:19:25 +00:00			`'<img [^<>]?src="((https?:\/\/\|\/).+?)".?>': '<img src="{1}">'`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`},`
			`rtl: {`
			`'<rtl>': '<div style="direction: rtl">',`
			`'<\/rtl>': '</div>'`
			`},`
			`'*': function(tag) {`
			`var ret = {};`
update video editor (editables) 2012-01-16 11:22:34 +00:00			`ret['<(/?' + tag + ') ?/?>'] = '<{1}>';`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`return ret;`
			`}`
			`},`
			`tab = '\t';`
			`return function(html, tags, wikilinks) {`
do not shorten urls, just turn them into links 2012-02-10 09:52:09 +00:00			`var matches = [];`
			`tags = tags \|\| defaultTags;`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`// html = Ox.clean(html); fixme: can this be a parameter?`
			`if (tags.indexOf('[]') > -1) {`
- allow links to local images - allow wiki links to local urls - disable automatic url / email linking since its broken right now 2012-02-11 12:19:25 +00:00			`html = html.replace(/\[((https?:\/\/\|\/).+?) (.+?)\]/gi, '<a href="$1">$3</a>');`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`tags = tags.filter(function(tag) {`
			`return tag != '[]';`
			`});`
			`}`
			`tags.forEach(function(tag) {`
			`var p = parse[tag] \|\| parse['*'](tag);`
			`Ox.forEach(p, function(replace, regexp) {`
			`html = html.replace(new RegExp(regexp, 'gi'), function() {`
			`matches.push(Ox.formatString(replace, arguments));`
			`return tab + (matches.length - 1) + tab;`
			`});`
			`});`
			`});`
			`html = Ox.encodeHTML(html);`
- allow links to local images - allow wiki links to local urls - disable automatic url / email linking since its broken right now 2012-02-11 12:19:25 +00:00			`//fixme: both fail if urls/emails are already links`
			`//html = Ox.parseURLs(html);`
			`//html = Ox.parseEmailAddresses(html);`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`matches.forEach(function(match, i) {`
- add loadAsync and use in Ox.loadFile, Ox.getJSON - add Ox.getJSONP - fix Ox.parseHTML - fix Ox.Doc - add more documentation 2012-05-22 23:17:17 +00:00			`html = html.replace(new RegExp(tab + i + tab), match);`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`});`
updates for html parsing, request handling, and editable elements 2011-10-27 18:50:23 +00:00			`html = html.replace(/\n\n/g, '<br/><br/>');`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`// close extra opening (and remove extra closing) tags`
			`// note: this converts '"' to '"'`
			`return Ox.element('<div>').html(html).html();`
do not shorten urls, just turn them into links 2012-02-10 09:52:09 +00:00			`};`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`}());`

			`/*@`
			`Ox.parseURL <f> Takes a URL, returns its components`
			`(url) -> <o> URL components`
			`url <s> URL`
			`<script>`
			`Ox.test.object = Ox.parseURL('http://www.foo.com:8080/bar/index.html?a=0&b=1#c');`
			`</script>`
			`> Ox.test.object.hash`
			`'#c'`
			`> Ox.test.object.host`
			`'www.foo.com:8080'`
			`> Ox.test.object.hostname`
			`'www.foo.com'`
			`> Ox.test.object.origin`
			`'http://www.foo.com:8080'`
			`> Ox.test.object.pathname`
			`'/bar/index.html'`
			`> Ox.test.object.port`
			`'8080'`
			`> Ox.test.object.protocol`
			`'http:'`
			`> Ox.test.object.search`
			`'?a=0&b=1'`
			`@*/`
			`Ox.parseURL = (function() {`
			`var a = document.createElement('a'),`
			`keys = ['hash', 'host', 'hostname', 'origin',`
			`'pathname', 'port', 'protocol', 'search'];`
			`return function(str) {`
			`var ret = {};`
			`a.href = str;`
			`keys.forEach(function(key) {`
			`ret[key] = a[key];`
			`});`
			`return ret;`
			`};`
			`}());`

			`/*@`
			`Ox.parseURLs <f> Takes HTML and turns URLs into links`
- allow links to local images - allow wiki links to local urls - disable automatic url / email linking since its broken right now 2012-02-11 12:19:25 +00:00			`> Ox.parseURLs('http://foo.com, bar')`
			`'<a href="http://foo.com">http://foo.com</a>, bar'`
			`> Ox.parseURLs('http://foo.com/foobar?foo, bar')`
			`'<a href="http://foo.com/foobar?foo">http://foo.com/foobar?foo</a>, bar'`
			`> Ox.parseURLs('www.foo.com, bar')`
			`'<a href="http://www.foo.com">www.foo.com</a>, bar'`
			`> Ox.parseURLs('<a href="http://foo.com">http://foo.com</a> etc')`
			`'<a href="http://foo.com">http://foo.com</a> etc'`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`@*/`
minor changes in OxJS 2012-01-06 11:57:54 +00:00			`// fixme: shouldn't this be formatURLs?`
- allow links to local images - allow wiki links to local urls - disable automatic url / email linking since its broken right now 2012-02-11 12:19:25 +00:00			`// fixme: fails for urls inside links`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`Ox.parseURLs = function(html) {`
			`return html.replace(`
			`/\b((https?:\/\/\|www\.).+?)([\.,:;!\?\)\]]*?(\s\|$))/gi,`
			`function(str, url, pre, end) {`
			`url = (pre == 'www.' ? 'http://' : '' ) + url;`
			`return Ox.formatString(`
do not shorten urls, just turn them into links 2012-02-10 09:52:09 +00:00			`'<a href="{url}">{url}</a>{end}',`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`{`
			`end: end,`
			`url: url`
			`}`
			`);`
			`}`
			`);`
add some ;, remove print statement 2011-10-24 13:13:00 +00:00			`};`