oxjs/source/Ox/js/HTML.js

'use strict';

(function() {

    var defaultTags = [
            // inline formatting
            'b', 'code', 'i', 's', 'sub', 'sup', 'u',
            // block formatting
            'blockquote', 'h1', 'h2', 'h3', 'p', 'pre',
            // lists
            'li', 'ol', 'ul',
            // tables
            'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr',
            // other
            'a', 'br', 'img',
            // special
            'rtl', '[]'
        ],
        htmlEntities = {
            '"': '&quot;', '&': '&amp;', "'": '&apos;', '<': '&lt;', '>': '&gt;'
        },
        regexp = {
            entity: /&[^\s]*;/g,
            tag: new RegExp('<\\/?(' + [
                'a', 'b', 'br', 'code', 'i', 's', 'span', 'u'
            ].join('|') + ')\\/?>', 'gi')
        },
        replace = {
            mail: [
                /\b([0-9A-Z\.\+\-_]+@(?:[0-9A-Z\-]+\.)+[A-Z]{2,6})\b/gi,
                '<a href="mailto:$1">$1</a>'
            ],
            namedEntity: [
                new RegExp('(' + Ox.values(htmlEntities).join('|') + ')', 'g'),
                function(match) {
                    return Ox.keyOf(htmlEntities, match);
                }
            ],
            numericEntity: [
                /&#([0-9A-FX]+);/gi,
                function(match, code) {
                    return Ox.char(
                        /^X/i.test(code)
                            ? parseInt(code.slice(1), 16)
                            : parseInt(code, 10)
                    );
                }
            ],
            tag: {
                a: [
                    [
                        /<a [^<>]*?href="((https?:\/\/|\/).+?)".*?>/gi,
                        '<a href="{1}">',
                    ],
                    [
                        /<\/a>/gi,
                        '</a>'
                    ]
                ],
                img: [
                    [
                        /<img [^<>]*?src="((https?:\/\/|\/).+?)".*?>/gi,
                        '<img src="$1">'
                    ]
                ],
                rtl: [
                    [
                        /<rtl>/gi,
                        '<div style="direction: rtl">'
                    ],
                    [
                        /<\/rtl>/gi,
                        '</div>'
                    ]
                ],
                '*': function(tag) {
                    return [
                        [
                            new RegExp('</?' + tag + ' ?/?>', 'gi'),
                            '{0}'
                        ]
                    ];
                }
            },
            url: [
                /\b((https?:\/\/|www\.).+?)([\.,:;!\?\)\]]*?(\s|$))/gi,
                function(string, url, prefix, end) {
                    prefix = prefix.toLowerCase() == 'www.' ? 'http://' : '';
                    return Ox.formatString(
                        '<a href="{prefix}{url}">{url}</a>{end}',
                        {end: end, prefix: prefix, url: url}
                    );
                }
            ]
        },
        salt = Ox.range(2).map(function(){
            return Ox.range(16).map(function() {
                return Ox.char(65 + Ox.random(26));
            }).join('');
        });

    // Splits a string into text (even indices) and tags (odd indices), ignoring
    // tags with starting positions that are included in the ignore array
    function splitHTMLTags(string, ignore) {
        var isTag = false, ret = [''];
        ignore = ignore || [];
        Ox.forEach(string, function(char, i) {
            if (!isTag && char == '<' && ignore.indexOf(i) == -1) {
                isTag = true;
                ret.push('');
            }
            ret[ret.length - 1] += char;
            if (isTag && char == '>') {
                isTag = false;
                ret.push('');
            }
        });
        return ret;
    };

    /*@
    Ox.addLinks <f> Takes a string and adds links for e-mail addresses and URLs
        (string[, isHTML]) -> <s> Formatted string
        string <s> String
        isHTML <b|false> If true, ignore matches in tags or enclosed by links
        > Ox.addLinks('foo bar <foo@bar.com>')
        'foo bar &lt;<a href="mailto:foo@bar.com">foo@bar.com</a>&gt;'
        > Ox.addLinks('www.foo.com/bar#baz, etc.')
        '<a href="http://www.foo.com/bar#baz">www.foo.com/bar#baz</a>, etc.'
        > Ox.addLinks('<a href="http://foo.com">foo.com</a>', true)
        '<a href="http://foo.com">foo.com</a>'
    @*/
    Ox.addLinks = function(string, isHTML) {
        var isLink = false;
        function replaceString(string) {
            return string
                .replace(replace.mail[0], replace.mail[1])
                .replace(replace.url[0], replace.url[1]);
        }
        return isHTML
            ? splitHTMLTags(string).map(function(string, i) {
                var isTag = i % 2;
                if (isTag) {
                    if (/^<a/.test(string)) {
                        isLink = true;
                    } else if (/^<\/a/.test(string)) {
                        isLink = false;
                    }
                }
                return isTag || isLink ? string : replaceString(string); 
            }).join('')
            : Ox.normalizeHTML(replaceString(string));
    };

    /*@
    Ox.encodeHTMLEntities <f> Encodes HTML entities
        (string[, encodeAll]) -> <s> String
        string <s> String
        encodeAll <b|false> If true, encode characters > 127 as numeric entities
        > Ox.encodeHTMLEntities('<\'&"> äbçdê')
        '&lt;&apos;&amp;&quot;&gt; äbçdê'
        > Ox.encodeHTMLEntities('<\'&"> äbçdê', true)
        '&lt;&apos;&amp;&quot;&gt; &#x00E4;b&#x00E7;d&#x00EA;'
    @*/
    Ox.encodeHTMLEntities = function(string, encodeAll) {
        return Ox.map(String(string), function(char) {
            var code = char.charCodeAt(0);
            if (code < 128) {
                char = char in htmlEntities ? htmlEntities[char] : char;
            } else if (encodeAll) {
                char = '&#x' + Ox.pad(code.toString(16).toUpperCase(), 4) + ';';
            }
            return char;
        });
    };

    /*@
    Ox.decodeHTMLEntities <f> Decodes HTML entities
        (string[, decodeAll]) -> <s> String
        string <s> String
        decodeAll <b|false> If true, decode named entities for characters > 127
            Note that <code>decodeAll</code> relies on
            <code>Ox.normalizeHTML</code>, which uses the DOM and may transform
            the string
        > Ox.decodeHTMLEntities('&#x003C;&#x0027;&#x0026;&#x0022;&#x003E;')
        '<\'&">'
        > Ox.decodeHTMLEntities('&lt;&apos;&amp;&quot;&gt;')
        '<\'&">'
        > Ox.decodeHTMLEntities('&#x00E4;b&#x00E7;d&#x00EA;')
        'äbçdê'
        > Ox.decodeHTMLEntities('&auml;b&ccedil;d&ecirc;')
        '&auml;b&ccedil;d&ecirc;'
        > Ox.decodeHTMLEntities('&auml;b&ccedil;d&ecirc;', true)
        'äbçdê'
        > Ox.decodeHTMLEntities('<b>&beta;')
        '<b>&beta;'
        > Ox.decodeHTMLEntities('<b>&beta;', true)
        '<b>β</b>'
        > Ox.decodeHTMLEntities('&lt;b&gt;')
        '<b>'
    @*/
    Ox.decodeHTMLEntities = function(string, decodeAll) {
        return decodeAll
            ? Ox.decodeHTMLEntities(Ox.normalizeHTML(string))
            : String(string)
                .replace(replace.namedEntity[0], replace.namedEntity[1])
                .replace(replace.numericEntity[0], replace.numericEntity[1]);
    };

    /*@
    Ox.highlightHTML <f> Highlight matches in string
        (string, query, classname[, isHTML]) -> Output string
        string <s> Input string
        query <r|s> Case-insentitive query string, or regular expression
        classname <s> Class name for matches
        isHTML <b|false> If true, the input string is treated as HTML
        > Ox.highlightHTML('<name>', 'name', 'c')
        '&lt;<span class="c">name</span>&gt;'
        > Ox.highlightHTML('<span class="name">name</span>', 'name', 'c', true)
        '<span class="name"><span class="c">name</span></span>'
        > Ox.highlightHTML('amp &amp; amp', 'amp', 'c', true)
        '<span class="c">amp</span> &amp; <span class="c">amp</span>'
        > Ox.highlightHTML('amp &amp; amp', 'amp & amp', 'c', true)
        '<span class="c">amp &amp; amp</span>'
        > Ox.highlightHTML('<b>&lt;b&gt;</b>', '<b>', 'c', true)
        '<span class="c"><b>&lt;b&gt;</b></span>'
        > Ox.highlightHTML('<b>&lt;b&gt;</b>', '&lt;b&gt;', 'c', true)
        '<b>&lt;b&gt;</b>'
        > Ox.highlightHTML('foo<b>bar</b>baz', 'foobar', 'c', true)
        '<span class="c">foo<b>bar</b></span>baz'
        > Ox.highlightHTML('foo<p>bar</p>baz', 'foobar', 'c', true)
        'foo<p>bar</p>baz'
        > Ox.highlightHTML('foo <br/>bar baz', 'foo bar', 'c', true)
        '<span class="c">foo <br>bar</span> baz'
    @*/
    Ox.highlightHTML = function(string, query, classname, isHTML) {
        var cursor = 0,
            entities = [],
            matches = [],
            re = Ox.isRegExp(query) ? query
                : new RegExp(Ox.escapeRegExp(query), 'gi'),
            span = ['<span class="' + classname + '">', '</span>'],
            tags = [];
        function insert(array) {
            // for each replacement
            array.forEach(function(v) {
                // replace the modified value with the original value
                string = Ox.splice(string, v.position, v.length, v.value);
                // for each match
                matches.forEach(function(match) {
                    if (v.position < match.position) {
                        // replacement is before match, update match position
                        match.position += v.value.length - v.length;
                    } else if (
                        v.position < match.position + match.value.length
                    ) {
                        // replacement is inside match, update match value
                        match.value = Ox.splice(
                            match.value, v.position - match.position, v.length,
                            v.value
                        );
                    }
                });
            });
        }
        if (isHTML) {
            string = Ox.normalizeHTML(string)
                // remove inline tags
                .replace(regexp.tag, function(value, tag, position) {
                    tags.push({
                        length: 0, position: position, value: value
                    });
                    return '';
                })
                // decode html entities
                .replace(regexp.entity, function(value, position) {
                    var ret = Ox.decodeHTMLEntities(value, true);
                    entities.push({
                        length: ret.length, position: position, value: value
                    });
                    return ret;
                });
            // if decoding entities has created new tags, ignore them
            splitHTMLTags(string, entities.map(function(entity) {
                return entity.position;
            })).forEach(function(v, i) {
                if (i % 2 == 0) {
                    // outside tags, find matches and save position and value
                    v.replace(re, function(value, position) {
                        matches.push(
                            {position: cursor + position, value: value}
                        );
                    });
                }
                cursor += v.length;
            });
            insert(entities);
            insert(tags);
            // for each match (in reverse order, so that positions are correct)
            matches.reverse().forEach(function(match) {
                // wrap it in a span
                string = Ox.splice(
                    string, match.position, match.value.length,
                    span.join(match.value)
                );
            });
            
        } else {
            string = string.replace(re, function(value) {
                return span.join(value);
            });
        }
        // if isHTML, we may have enclosed single opening or closing tags in a 
        // span, if not isHTML, the string may contain '<', '>' or '&', so in
        // both cases, we have to normalize
        return Ox.normalizeHTML(string);
    };

    /*@
    Ox.normalizeHTML <f> Normalize HTML (using the DOM)
        > Ox.normalizeHTML('<b>foo')
        '<b>foo</b>'
        > Ox.normalizeHTML('<b>foo</b></b>')
        '<b>foo</b>'
        > Ox.normalizeHTML('&lt;&apos;&amp;&quot;&gt; &#x00E4;b&#x00E7;d&#x00EA;')
        '&lt;\'&amp;"&gt; äbçdê'
    @*/
    Ox.normalizeHTML = function(html) {
        return Ox.$('<div>').html(html).html();
    };

    /*@
    Ox.sanitizeHTML <f> Takes untrusted HTML and returns something trustworthy
        > Ox.sanitizeHTML('http://foo.com, bar')
        '<a href="http://foo.com">http://foo.com</a>, bar'
        > Ox.sanitizeHTML('http://foo.com/foobar?foo, bar')
        '<a href="http://foo.com/foobar?foo">http://foo.com/foobar?foo</a>, bar'
        > Ox.sanitizeHTML('(see: www.foo.com)')
        '(see: <a href="http://www.foo.com">www.foo.com</a>)'
        > Ox.sanitizeHTML('foo@bar.com')
        '<a href="mailto:foo@bar.com">foo@bar.com</a>'
        > Ox.sanitizeHTML('<a href="http://foo.com" onclick="alert()">foo</a>')
        '<a href="http://foo.com">foo</a>'
        > Ox.sanitizeHTML('<a href="javascript:alert()">foo</a>')
        '&lt;a href="javascript:alert()"&gt;foo'
        > Ox.sanitizeHTML('[http://foo.com foo]')
        '<a href="http://foo.com">foo</a>'
        > Ox.sanitizeHTML('<rtl>foo</rtl>')
        '<div style="direction: rtl">foo</div>'
        > Ox.sanitizeHTML('<script>alert()</script>')
        '&lt;script&gt;alert()&lt;/script&gt;'
        > Ox.sanitizeHTML('\'foo\' < \'bar\' && "foo" > "bar"')
        '\'foo\' &lt; \'bar\' &amp;&amp; "foo" &gt; "bar"'
        > Ox.sanitizeHTML('<b>foo')
        '<b>foo</b>'
        > Ox.sanitizeHTML('<b>foo</b></b>')
        '<b>foo</b>'
    @*/
    Ox.sanitizeHTML = (function() {
        var defaultTags = [
                // inline formatting
                'b', 'code', 'i', 's', 'sub', 'sup', 'u',
                // block formatting
                'blockquote', 'h1', 'h2', 'h3', 'p', 'pre',
                // lists
                'li', 'ol', 'ul',
                // tables
                'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr',
                // other
                'a', 'br', 'img',
                // special
                'rtl', '[]'
            ],
            parse = {
                a: {
                    '<a [^<>]*?href="((https?:\/\/|\/).+?)".*?>': '<a href="{1}">',
                    '<\/a>': '</a>'
                },
                img: {
                    '<img [^<>]*?src="((https?:\/\/|\/).+?)".*?>': '<img src="{1}">'
                },
                rtl: {
                    '<rtl>': '<div style="direction: rtl">',
                    '<\/rtl>': '</div>'
                },
                '*': function(tag) {
                    var ret = {};
                    ret['<(/?' + tag + ') ?/?>'] = '<{1}>';
                    return ret;
                }
            },
            tab = '\t';
        return function(html, tags, wikilinks) {
            var matches = [];
            tags = tags || defaultTags;
            // html = Ox.clean(html); fixme: can this be a parameter?
            if (tags.indexOf('[]') > -1) {
                html = html.replace(/\[((https?:\/\/|\/).+?) (.+?)\]/gi, '<a href="$1">$3</a>');
                tags = tags.filter(function(tag) {
                    return tag != '[]';
                });
            }
            tags.forEach(function(tag) {
                var array = replace.tag[tag] || replace.tag['*'](tag);
                Ox.forEach(array, function(value) {
                    html = html.replace(value[0], function() {
                        matches.push(Ox.formatString(value[1], arguments));
                        return salt.join(matches.length - 1);
                    });
                });
            });
            html = Ox.addLinks(Ox.encodeHTMLEntities(html), true);
            matches.forEach(function(match, i) {
                html = html.replace(new RegExp(salt.join(i)), match);
            });
            html = html.replace(/\n\n/g, '<br/><br/>');
            // Close extra opening and remove extra closing tags.
            // Note: this converts '&apos;' to "'" and '&quot;' to '"'
            return Ox.normalizeHTML(html);
        };
    }());

}());
'use strict'; 2011-11-05 16:46:53 +00:00			`'use strict';`

replace Ox.parseEmailAddresses and Ox.parseURLs with Ox.addLinks; rename Ox.parseHTML to Ox.sanitizeHTML; add Ox.normalizeHTML; update Ox.encodeHTMLEntities and Ox.decodeHTMLEntities so that they also cover the previous Ox.encodeHTML and Ox.decodeHTML 2012-05-27 10:36:16 +00:00			`(function() {`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00
			`var defaultTags = [`
updates for html parsing, request handling, and editable elements 2011-10-27 18:50:23 +00:00			`// inline formatting`
misc updates; add geo demo 2011-12-31 12:57:02 +00:00			`'b', 'code', 'i', 's', 'sub', 'sup', 'u',`
			`// block formatting`
support h1-h6, fixes #516 2012-02-18 15:40:41 +00:00			`'blockquote', 'h1', 'h2', 'h3', 'p', 'pre',`
updates for html parsing, request handling, and editable elements 2011-10-27 18:50:23 +00:00			`// lists`
			`'li', 'ol', 'ul',`
			`// tables`
			`'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr',`
			`// other`
			`'a', 'br', 'img',`
			`// special`
			`'rtl', '[]'`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`],`
replace Ox.parseEmailAddresses and Ox.parseURLs with Ox.addLinks; rename Ox.parseHTML to Ox.sanitizeHTML; add Ox.normalizeHTML; update Ox.encodeHTMLEntities and Ox.decodeHTMLEntities so that they also cover the previous Ox.encodeHTML and Ox.decodeHTML 2012-05-27 10:36:16 +00:00			`htmlEntities = {`
			`'"': '"', '&': '&', "'": ''', '<': '<', '>': '>'`
			`},`
			`regexp = {`
			`entity: /&[^\s]*;/g,`
			`tag: new RegExp('<\\/?(' + [`
			`'a', 'b', 'br', 'code', 'i', 's', 'span', 'u'`
			`].join('\|') + ')\\/?>', 'gi')`
			`},`
			`replace = {`
			`mail: [`
			`/\b([0-9A-Z\.\+\-_]+@(?:[0-9A-Z\-]+\.)+[A-Z]{2,6})\b/gi,`
			`'<a href="mailto:$1">$1</a>'`
			`],`
			`namedEntity: [`
			`new RegExp('(' + Ox.values(htmlEntities).join('\|') + ')', 'g'),`
			`function(match) {`
			`return Ox.keyOf(htmlEntities, match);`
			`}`
			`],`
			`numericEntity: [`
			`/&#([0-9A-FX]+);/gi,`
			`function(match, code) {`
			`return Ox.char(`
			`/^X/i.test(code)`
			`? parseInt(code.slice(1), 16)`
			`: parseInt(code, 10)`
			`);`
			`}`
			`],`
			`tag: {`
			`a: [`
			`[`
			`/<a [^<>]?href="((https?:\/\/\|\/).+?)".?>/gi,`
			`'<a href="{1}">',`
			`],`
			`[`
			`/<\/a>/gi,`
			`'</a>'`
			`]`
			`],`
			`img: [`
			`[`
			`/<img [^<>]?src="((https?:\/\/\|\/).+?)".?>/gi,`
			`'<img src="$1">'`
			`]`
			`],`
			`rtl: [`
			`[`
			`/<rtl>/gi,`
			`'<div style="direction: rtl">'`
			`],`
			`[`
			`/<\/rtl>/gi,`
			`'</div>'`
			`]`
			`],`
			`'*': function(tag) {`
			`return [`
			`[`
			`new RegExp('</?' + tag + ' ?/?>', 'gi'),`
			`'{0}'`
			`]`
			`];`
			`}`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`},`
replace Ox.parseEmailAddresses and Ox.parseURLs with Ox.addLinks; rename Ox.parseHTML to Ox.sanitizeHTML; add Ox.normalizeHTML; update Ox.encodeHTMLEntities and Ox.decodeHTMLEntities so that they also cover the previous Ox.encodeHTML and Ox.decodeHTML 2012-05-27 10:36:16 +00:00			`url: [`
			`/\b((https?:\/\/\|www\.).+?)([\.,:;!\?\)\]]*?(\s\|$))/gi,`
			`function(string, url, prefix, end) {`
			`prefix = prefix.toLowerCase() == 'www.' ? 'http://' : '';`
			`return Ox.formatString(`
			`'<a href="{prefix}{url}">{url}</a>{end}',`
			`{end: end, prefix: prefix, url: url}`
			`);`
			`}`
			`]`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`},`
replace Ox.parseEmailAddresses and Ox.parseURLs with Ox.addLinks; rename Ox.parseHTML to Ox.sanitizeHTML; add Ox.normalizeHTML; update Ox.encodeHTMLEntities and Ox.decodeHTMLEntities so that they also cover the previous Ox.encodeHTML and Ox.decodeHTML 2012-05-27 10:36:16 +00:00			`salt = Ox.range(2).map(function(){`
			`return Ox.range(16).map(function() {`
			`return Ox.char(65 + Ox.random(26));`
			`}).join('');`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`});`
replace Ox.parseEmailAddresses and Ox.parseURLs with Ox.addLinks; rename Ox.parseHTML to Ox.sanitizeHTML; add Ox.normalizeHTML; update Ox.encodeHTMLEntities and Ox.decodeHTMLEntities so that they also cover the previous Ox.encodeHTML and Ox.decodeHTML 2012-05-27 10:36:16 +00:00
			`// Splits a string into text (even indices) and tags (odd indices), ignoring`
			`// tags with starting positions that are included in the ignore array`
			`function splitHTMLTags(string, ignore) {`
			`var isTag = false, ret = [''];`
			`ignore = ignore \|\| [];`
			`Ox.forEach(string, function(char, i) {`
			`if (!isTag && char == '<' && ignore.indexOf(i) == -1) {`
			`isTag = true;`
			`ret.push('');`
			`}`
			`ret[ret.length - 1] += char;`
			`if (isTag && char == '>') {`
			`isTag = false;`
			`ret.push('');`
			`}`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`});`
replace Ox.parseEmailAddresses and Ox.parseURLs with Ox.addLinks; rename Ox.parseHTML to Ox.sanitizeHTML; add Ox.normalizeHTML; update Ox.encodeHTMLEntities and Ox.decodeHTMLEntities so that they also cover the previous Ox.encodeHTML and Ox.decodeHTML 2012-05-27 10:36:16 +00:00			`return ret;`
			`};`

			`/*@`
			`Ox.addLinks <f> Takes a string and adds links for e-mail addresses and URLs`
			`(string[, isHTML]) -> <s> Formatted string`
			`string <s> String`
			`isHTML <b\|false> If true, ignore matches in tags or enclosed by links`
			`> Ox.addLinks('foo bar <foo@bar.com>')`
			`'foo bar <<a href="mailto:foo@bar.com">foo@bar.com</a>>'`
			`> Ox.addLinks('www.foo.com/bar#baz, etc.')`
			`'<a href="http://www.foo.com/bar#baz">www.foo.com/bar#baz</a>, etc.'`
			`> Ox.addLinks('<a href="http://foo.com">foo.com</a>', true)`
			`'<a href="http://foo.com">foo.com</a>'`
			`@*/`
			`Ox.addLinks = function(string, isHTML) {`
			`var isLink = false;`
			`function replaceString(string) {`
			`return string`
			`.replace(replace.mail[0], replace.mail[1])`
			`.replace(replace.url[0], replace.url[1]);`
			`}`
			`return isHTML`
			`? splitHTMLTags(string).map(function(string, i) {`
			`var isTag = i % 2;`
			`if (isTag) {`
			`if (/^<a/.test(string)) {`
			`isLink = true;`
			`} else if (/^<\/a/.test(string)) {`
			`isLink = false;`
			`}`
			`}`
			`return isTag \|\| isLink ? string : replaceString(string);`
			`}).join('')`
			`: Ox.normalizeHTML(replaceString(string));`
do not shorten urls, just turn them into links 2012-02-10 09:52:09 +00:00			`};`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00
replace Ox.parseEmailAddresses and Ox.parseURLs with Ox.addLinks; rename Ox.parseHTML to Ox.sanitizeHTML; add Ox.normalizeHTML; update Ox.encodeHTMLEntities and Ox.decodeHTMLEntities so that they also cover the previous Ox.encodeHTML and Ox.decodeHTML 2012-05-27 10:36:16 +00:00			`/*@`
			`Ox.encodeHTMLEntities <f> Encodes HTML entities`
			`(string[, encodeAll]) -> <s> String`
			`string <s> String`
			`encodeAll <b\|false> If true, encode characters > 127 as numeric entities`
			`> Ox.encodeHTMLEntities('<\'&"> äbçdê')`
			`'<'&"> äbçdê'`
			`> Ox.encodeHTMLEntities('<\'&"> äbçdê', true)`
			`'<'&"> äbçdê'`
			`@*/`
			`Ox.encodeHTMLEntities = function(string, encodeAll) {`
			`return Ox.map(String(string), function(char) {`
			`var code = char.charCodeAt(0);`
			`if (code < 128) {`
			`char = char in htmlEntities ? htmlEntities[char] : char;`
			`} else if (encodeAll) {`
			`char = '&#x' + Ox.pad(code.toString(16).toUpperCase(), 4) + ';';`
			`}`
			`return char;`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`});`
			`};`

replace Ox.parseEmailAddresses and Ox.parseURLs with Ox.addLinks; rename Ox.parseHTML to Ox.sanitizeHTML; add Ox.normalizeHTML; update Ox.encodeHTMLEntities and Ox.decodeHTMLEntities so that they also cover the previous Ox.encodeHTML and Ox.decodeHTML 2012-05-27 10:36:16 +00:00			`/*@`
			`Ox.decodeHTMLEntities <f> Decodes HTML entities`
			`(string[, decodeAll]) -> <s> String`
			`string <s> String`
			`decodeAll <b\|false> If true, decode named entities for characters > 127`
			`Note that <code>decodeAll</code> relies on`
			`<code>Ox.normalizeHTML</code>, which uses the DOM and may transform`
			`the string`
			`> Ox.decodeHTMLEntities('<'&">')`
			`'<\'&">'`
			`> Ox.decodeHTMLEntities('<'&">')`
			`'<\'&">'`
			`> Ox.decodeHTMLEntities('äbçdê')`
			`'äbçdê'`
			`> Ox.decodeHTMLEntities('äbçdê')`
			`'äbçdê'`
			`> Ox.decodeHTMLEntities('äbçdê', true)`
			`'äbçdê'`
			`> Ox.decodeHTMLEntities('<b>β')`
			`'<b>β'`
			`> Ox.decodeHTMLEntities('<b>β', true)`
			`'<b>β</b>'`
			`> Ox.decodeHTMLEntities('<b>')`
			`'<b>'`
			`@*/`
			`Ox.decodeHTMLEntities = function(string, decodeAll) {`
			`return decodeAll`
			`? Ox.decodeHTMLEntities(Ox.normalizeHTML(string))`
			`: String(string)`
			`.replace(replace.namedEntity[0], replace.namedEntity[1])`
			`.replace(replace.numericEntity[0], replace.numericEntity[1]);`
			`};`

			`/*@`
			`Ox.highlightHTML <f> Highlight matches in string`
			`(string, query, classname[, isHTML]) -> Output string`
			`string <s> Input string`
			`query <r\|s> Case-insentitive query string, or regular expression`
			`classname <s> Class name for matches`
			`isHTML <b\|false> If true, the input string is treated as HTML`
			`> Ox.highlightHTML('<name>', 'name', 'c')`
			`'<<span class="c">name</span>>'`
			`> Ox.highlightHTML('<span class="name">name</span>', 'name', 'c', true)`
			`'<span class="name"><span class="c">name</span></span>'`
			`> Ox.highlightHTML('amp & amp', 'amp', 'c', true)`
			`'<span class="c">amp</span> & <span class="c">amp</span>'`
			`> Ox.highlightHTML('amp & amp', 'amp & amp', 'c', true)`
			`'<span class="c">amp & amp</span>'`
			`> Ox.highlightHTML('<b><b></b>', '<b>', 'c', true)`
			`'<span class="c"><b><b></b></span>'`
			`> Ox.highlightHTML('<b><b></b>', '<b>', 'c', true)`
			`'<b><b></b>'`
			`> Ox.highlightHTML('foo<b>bar</b>baz', 'foobar', 'c', true)`
			`'<span class="c">foo<b>bar</b></span>baz'`
			`> Ox.highlightHTML('foo<p>bar</p>baz', 'foobar', 'c', true)`
			`'foo<p>bar</p>baz'`
			`> Ox.highlightHTML('foo <br/>bar baz', 'foo bar', 'c', true)`
			`'<span class="c">foo <br>bar</span> baz'`
			`@*/`
			`Ox.highlightHTML = function(string, query, classname, isHTML) {`
			`var cursor = 0,`
			`entities = [],`
			`matches = [],`
			`re = Ox.isRegExp(query) ? query`
			`: new RegExp(Ox.escapeRegExp(query), 'gi'),`
			`span = ['<span class="' + classname + '">', '</span>'],`
			`tags = [];`
			`function insert(array) {`
			`// for each replacement`
			`array.forEach(function(v) {`
			`// replace the modified value with the original value`
			`string = Ox.splice(string, v.position, v.length, v.value);`
			`// for each match`
			`matches.forEach(function(match) {`
			`if (v.position < match.position) {`
			`// replacement is before match, update match position`
			`match.position += v.value.length - v.length;`
			`} else if (`
			`v.position < match.position + match.value.length`
			`) {`
			`// replacement is inside match, update match value`
			`match.value = Ox.splice(`
			`match.value, v.position - match.position, v.length,`
			`v.value`
			`);`
			`}`
			`});`
			`});`
			`}`
			`if (isHTML) {`
			`string = Ox.normalizeHTML(string)`
			`// remove inline tags`
			`.replace(regexp.tag, function(value, tag, position) {`
			`tags.push({`
			`length: 0, position: position, value: value`
			`});`
			`return '';`
			`})`
			`// decode html entities`
			`.replace(regexp.entity, function(value, position) {`
			`var ret = Ox.decodeHTMLEntities(value, true);`
			`entities.push({`
			`length: ret.length, position: position, value: value`
			`});`
			`return ret;`
			`});`
			`// if decoding entities has created new tags, ignore them`
			`splitHTMLTags(string, entities.map(function(entity) {`
			`return entity.position;`
			`})).forEach(function(v, i) {`
			`if (i % 2 == 0) {`
			`// outside tags, find matches and save position and value`
			`v.replace(re, function(value, position) {`
			`matches.push(`
			`{position: cursor + position, value: value}`
			`);`
			`});`
			`}`
			`cursor += v.length;`
			`});`
			`insert(entities);`
			`insert(tags);`
			`// for each match (in reverse order, so that positions are correct)`
			`matches.reverse().forEach(function(match) {`
			`// wrap it in a span`
			`string = Ox.splice(`
			`string, match.position, match.value.length,`
			`span.join(match.value)`
			`);`
			`});`

			`} else {`
			`string = string.replace(re, function(value) {`
			`return span.join(value);`
			`});`
new build system: build in /build, dev version in /dev; split up Ox.js; fix tests 2011-10-07 01:04:47 +00:00			`}`
replace Ox.parseEmailAddresses and Ox.parseURLs with Ox.addLinks; rename Ox.parseHTML to Ox.sanitizeHTML; add Ox.normalizeHTML; update Ox.encodeHTMLEntities and Ox.decodeHTMLEntities so that they also cover the previous Ox.encodeHTML and Ox.decodeHTML 2012-05-27 10:36:16 +00:00			`// if isHTML, we may have enclosed single opening or closing tags in a`
			`// span, if not isHTML, the string may contain '<', '>' or '&', so in`
			`// both cases, we have to normalize`
			`return Ox.normalizeHTML(string);`
			`};`

			`/*@`
			`Ox.normalizeHTML <f> Normalize HTML (using the DOM)`
			`> Ox.normalizeHTML('<b>foo')`
			`'<b>foo</b>'`
			`> Ox.normalizeHTML('<b>foo</b></b>')`
			`'<b>foo</b>'`
			`> Ox.normalizeHTML('<'&"> äbçdê')`
			`'<\'&"> äbçdê'`
			`@*/`
			`Ox.normalizeHTML = function(html) {`
			`return Ox.$('<div>').html(html).html();`
			`};`

			`/*@`
			`Ox.sanitizeHTML <f> Takes untrusted HTML and returns something trustworthy`
			`> Ox.sanitizeHTML('http://foo.com, bar')`
			`'<a href="http://foo.com">http://foo.com</a>, bar'`
			`> Ox.sanitizeHTML('http://foo.com/foobar?foo, bar')`
			`'<a href="http://foo.com/foobar?foo">http://foo.com/foobar?foo</a>, bar'`
			`> Ox.sanitizeHTML('(see: www.foo.com)')`
			`'(see: <a href="http://www.foo.com">www.foo.com</a>)'`
			`> Ox.sanitizeHTML('foo@bar.com')`
			`'<a href="mailto:foo@bar.com">foo@bar.com</a>'`
			`> Ox.sanitizeHTML('<a href="http://foo.com" onclick="alert()">foo</a>')`
			`'<a href="http://foo.com">foo</a>'`
			`> Ox.sanitizeHTML('<a href="javascript:alert()">foo</a>')`
			`'<a href="javascript:alert()">foo'`
			`> Ox.sanitizeHTML('[http://foo.com foo]')`
			`'<a href="http://foo.com">foo</a>'`
			`> Ox.sanitizeHTML('<rtl>foo</rtl>')`
			`'<div style="direction: rtl">foo</div>'`
			`> Ox.sanitizeHTML('<script>alert()</script>')`
			`'<script>alert()</script>'`
			`> Ox.sanitizeHTML('\'foo\' < \'bar\' && "foo" > "bar"')`
			`'\'foo\' < \'bar\' && "foo" > "bar"'`
			`> Ox.sanitizeHTML('<b>foo')`
			`'<b>foo</b>'`
			`> Ox.sanitizeHTML('<b>foo</b></b>')`
			`'<b>foo</b>'`
			`@*/`
			`Ox.sanitizeHTML = (function() {`
			`var defaultTags = [`
			`// inline formatting`
			`'b', 'code', 'i', 's', 'sub', 'sup', 'u',`
			`// block formatting`
			`'blockquote', 'h1', 'h2', 'h3', 'p', 'pre',`
			`// lists`
			`'li', 'ol', 'ul',`
			`// tables`
			`'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr',`
			`// other`
			`'a', 'br', 'img',`
			`// special`
			`'rtl', '[]'`
			`],`
			`parse = {`
			`a: {`
			`'<a [^<>]?href="((https?:\/\/\|\/).+?)".?>': '<a href="{1}">',`
			`'<\/a>': '</a>'`
			`},`
			`img: {`
			`'<img [^<>]?src="((https?:\/\/\|\/).+?)".?>': '<img src="{1}">'`
			`},`
			`rtl: {`
			`'<rtl>': '<div style="direction: rtl">',`
			`'<\/rtl>': '</div>'`
			`},`
			`'*': function(tag) {`
			`var ret = {};`
			`ret['<(/?' + tag + ') ?/?>'] = '<{1}>';`
			`return ret;`
			`}`
			`},`
			`tab = '\t';`
			`return function(html, tags, wikilinks) {`
			`var matches = [];`
			`tags = tags \|\| defaultTags;`
			`// html = Ox.clean(html); fixme: can this be a parameter?`
			`if (tags.indexOf('[]') > -1) {`
			`html = html.replace(/\[((https?:\/\/\|\/).+?) (.+?)\]/gi, '<a href="$1">$3</a>');`
			`tags = tags.filter(function(tag) {`
			`return tag != '[]';`
			`});`
			`}`
			`tags.forEach(function(tag) {`
			`var array = replace.tag[tag] \|\| replace.tag['*'](tag);`
			`Ox.forEach(array, function(value) {`
			`html = html.replace(value[0], function() {`
			`matches.push(Ox.formatString(value[1], arguments));`
			`return salt.join(matches.length - 1);`
			`});`
			`});`
			`});`
in Ox.sanitizeHTML, fix a bug with adding links 2012-05-27 11:11:49 +00:00			`html = Ox.addLinks(Ox.encodeHTMLEntities(html), true);`
replace Ox.parseEmailAddresses and Ox.parseURLs with Ox.addLinks; rename Ox.parseHTML to Ox.sanitizeHTML; add Ox.normalizeHTML; update Ox.encodeHTMLEntities and Ox.decodeHTMLEntities so that they also cover the previous Ox.encodeHTML and Ox.decodeHTML 2012-05-27 10:36:16 +00:00			`matches.forEach(function(match, i) {`
			`html = html.replace(new RegExp(salt.join(i)), match);`
			`});`
			`html = html.replace(/\n\n/g, '<br/><br/>');`
			`// Close extra opening and remove extra closing tags.`
			`// Note: this converts ''' to "'" and '"' to '"'`
			`return Ox.normalizeHTML(html);`
			`};`
			`}());`

			`}());`