highlight: several bugfixes and performance improvements; normalizeHTML: only use dom if needed

2012-06-16 20:45:05 +00:00 · 2012-06-16 20:45:05 +00:00 · 898cc702f5
commit 898cc702f5
parent 743a36e308
1 changed files with 30 additions and 13 deletions
--- a/source/Ox/js/HTML.js
+++ b/source/Ox/js/HTML.js
@ -20,7 +20,8 @@
            '"': '&quot;', '&': '&amp;', "'": '&apos;', '<': '&lt;', '>': '&gt;'
        },
        regexp = {
-            entity: /&[^\s]*;/g,
+            entity: /&[^\s]+?;/g,
+            html: /[<&]/,
            tag: new RegExp('<\\/?(' + [
                'a', 'b', 'br', 'code', 'i', 's', 'span', 'u'
            ].join('|') + ')\\/?>', 'gi')
@ -234,8 +235,14 @@
        query <r|s> Case-insentitive query string, or regular expression
        classname <s> Class name for matches
        isHTML <b|false> If true, the input string is treated as HTML
-        > Ox.highlight('<name>', 'name', 'c')
-        '&lt;<span class="c">name</span>&gt;'
+        > Ox.highlight('<foo><bar>', 'foo', 'c')
+        '&lt;<span class="c">foo</span>&gt;&lt;bar&gt;'
+        > Ox.highlight('&amp;', '&amp;', 'c')
+        '<span class="c">&amp;amp;</span>'
+        > Ox.highlight('&', '&amp;', 'c')
+        '&amp;'
+        > Ox.highlight('&lt;foo&gt; &lt;foo&gt;', '<foo>', 'c', true)
+        '<span class="c">&lt;foo&gt;</span> <span class="c">&lt;foo&gt;</span>'
        > Ox.highlight('<span class="name">name</span>', 'name', 'c', true)
        '<span class="name"><span class="c">name</span></span>'
        > Ox.highlight('amp &amp; amp', 'amp', 'c', true)
@ -260,6 +267,7 @@
        var cursor = 0,
            entities = [],
            matches = [],
+            offset = 0,
            re = Ox.isRegExp(query) ? query
                : new RegExp(Ox.escapeRegExp(query), 'gi'),
            span = ['<span class="' + classname + '">', '</span>'],
@ -286,8 +294,8 @@
                });
            });
        }
-        if (isHTML) {
-            string = Ox.normalizeHTML(string)
+        if (isHTML && regexp.html.test(string)) {
+            string = string // Ox.normalizeHTML(string)
                // remove inline tags
                .replace(regexp.tag, function(value, tag, position) {
                    tags.push({
@ -305,7 +313,9 @@
                });
            // if decoding entities has created new tags, ignore them
            splitHTMLTags(string, entities.map(function(entity) {
-                return entity.position;
+                var ret = entity.position + offset;
+                offset += entity.length - entity.value.length
+                return ret;
            })).forEach(function(v, i) {
                if (i % 2 == 0) {
                    // outside tags, find matches and save position and value
@ -327,15 +337,22 @@
                    span.join(match.value)
                );
            });
+            // we may have enclosed single opening or closing tags in a span
+            if (matches.length && tags.length) {
+                string = Ox.normalizeHTML(string);
+            }
        } else {
-            string = string.replace(re, function(value) {
-                return span.join(value);
+            string = Ox.encodeHTMLEntities(
+                string.replace(re, function(value) {
+                    matches.push(span.join(Ox.encodeHTMLEntities(value)));
+                    return salt.join(matches.length - 1);
+                })
+            );
+            matches.forEach(function(match, i) {
+                string = string.replace(new RegExp(salt.join(i)), match);
            });
        }
-        // if isHTML, we may have enclosed single opening or closing tags in a 
-        // span, if not isHTML, the string may contain '<', '>' or '&', so in
-        // both cases, we have to normalize
-        return Ox.normalizeHTML(string);
+        return string;
    };

    /*@
@ -348,7 +365,7 @@
        '&lt;\'&amp;"&gt; äbçdê'
    @*/
    Ox.normalizeHTML = function(html) {
-        return Ox.$('<div>').html(html).html();
+        return regexp.html.test(html) ? Ox.$('<div>').html(html).html() : html;
    };

    /*@