From 5a2ecca0f197cb10f74968e75246c34e4c3500d8 Mon Sep 17 00:00:00 2001
From: rolux <rolux@rolux.org>
Date: Sat, 26 May 2012 12:54:52 +0200
Subject: [PATCH] improve Ox.tokenize, add Ox.identify

---
 source/Ox/js/JavaScript.js | 600 +++++++++++++++++--------------------
 1 file changed, 275 insertions(+), 325 deletions(-)
diff --git a/source/Ox/js/JavaScript.js b/source/Ox/js/JavaScript.js
index 12891043..8bddd034 100644
--- a/source/Ox/js/JavaScript.js
+++ b/source/Ox/js/JavaScript.js
@@ -134,10 +134,9 @@ Ox.doc = (function() {
             tokens = [];
         Ox.tokenize(source).forEach(function(token) {
             var match;
-            token.source = source.substr(token.offset, token.length);
             if (token.type == 'comment' && (
-                match = re.multiline.exec(token.source)
-                || re.singleline.exec(token.source)
+                match = re.multiline.exec(token.value)
+                || re.singleline.exec(token.value)
             )) {
                 blocks.push(match[1]);
                 tokens.push([]);
@@ -162,7 +161,9 @@ Ox.doc = (function() {
                 if (/^[A-Z]/.test(item.name)) {
                     // main item
                     // include leading whitespace
-                    item.source = parseTokens(tokens[i]);
+                    item.source = parseTokens(tokens[i]).map(function(token) {
+                        return token.value;
+                    }).join('');
                     item.line = source.slice(0, item.source[0].offset)
                         .split('\n').length;
                     items.push(item);
@@ -303,6 +304,164 @@ Ox.doc = (function() {
     }
 }());
 
+/*@
+Ox.identify <f> Returns the type of a JavaScript identifier
+    (str) -> <s> Type
+        Type can be <code>constant</code>, <code>identifier</code>,
+        <code>keyword</code>, <code>method</code>, <code>object</code> or
+        <code>property</code>
+@*/
+Ox.identify = (function() {
+    // see https://developer.mozilla.org/en/JavaScript/Reference
+    var identifiers = {
+        constant: [
+            // Math
+            'E', 'LN2', 'LN10', 'LOG2E', 'LOG10E', 'PI', 'SQRT1_2', 'SQRT2',
+            // Number
+            'MAX_VALUE', 'MIN_VALUE', 'NEGATIVE_INFINITY', 'POSITIVE_INFINITY'
+        ],
+        method: [
+            // Array
+            'concat',
+            'every',
+            'filter', 'forEach',
+            'join',
+            'lastIndexOf',
+            'indexOf', 'isArray',
+            'map',
+            'pop', 'push',
+            'reduce', 'reduceRight', 'reverse',
+            'shift', 'slice', 'some', 'sort', 'splice',
+            'unshift',
+            // Date
+            'getDate', 'getDay', 'getFullYear', 'getHours',
+            'getMilliseconds', 'getMinutes', 'getMonth', 'getSeconds',
+            'getTime', 'getTimezoneOffset',
+            'getUTCDate', 'getUTCDay', 'getUTCFullYear', 'getUTCHours',
+            'getUTCMilliseconds', 'getUTCMinutes', 'getUTCMonth', 'getUTCSeconds',
+            'now',
+            'parse',
+            'setDate', 'setFullYear', 'setHours', 'setMilliseconds',
+            'setMinutes', 'setMonth', 'setSeconds', 'setTime',
+            'setUTCDate', 'setUTCFullYear', 'setUTCHours', 'setUTCMilliseconds',
+            'setUTCMinutes', 'setUTCMonth', 'setUTCSeconds',
+            'toDateString', 'toJSON', 'toLocaleDateString', 'toLocaleString',
+            'toLocaleTimeString', 'toTimeString', 'toUTCString',
+            'UTC',
+            // Function
+            'apply', 'bind', 'call', 'isGenerator',
+            // JSON
+            'parse', 'stringify',
+            // Math
+            'abs', 'acos', 'asin', 'atan', 'atan2',
+            'ceil', 'cos',
+            'exp',
+            'floor',
+            'log',
+            'max', 'min',
+            'pow',
+            'random', 'round',
+            'sin', 'sqrt',
+            'tan',
+            // Number
+            'toExponential', 'toFixed', 'toLocaleString', 'toPrecision',
+            // Object
+            'create',
+            'defineProperty', 'defineProperties',
+            'freeze',
+            'getOwnPropertyDescriptor', 'getOwnPropertyNames', 'getPrototypeOf',
+            'hasOwnProperty',
+            'isExtensible', 'isFrozen', 'isPrototypeOf', 'isSealed',
+            'keys',
+            'preventExtensions', 'propertyIsEnumerable',
+            'seal',
+            'toLocaleString', 'toString',
+            'valueOf',
+            // RegExp
+            'exec', 'test',
+            // String
+            'charAt', 'charCodeAt', 'concat',
+            'fromCharCode',
+            'indexOf',
+            'lastIndexOf', 'localeCompare',
+            'match',
+            'replace',
+            'search', 'slice', 'split', 'substr', 'substring',
+            'toLocaleLowerCase', 'toLocaleUpperCase',
+            'toLowerCase', 'toUpperCase', 'trim',
+            // Window
+            'addEventListener', 'alert', 'atob',
+            'blur', 'btoa',
+            'clearInterval', 'clearTimeout', 'close', 'confirm',
+            'dispatchEvent',
+            'escape',
+            'find', 'focus',
+            'getComputedStyle', 'getSelection',
+            'moveBy', 'moveTo',
+            'open',
+            'postMessage', 'print', 'prompt',
+            'removeEventListener', 'resizeBy', 'resizeTo',
+            'scroll', 'scrollBy', 'scrollTo',
+            'setCursor', 'setInterval', 'setTimeout', 'stop',
+            'unescape'
+        ],
+        object: [
+            'Array',
+            'Boolean',
+            'Date', 'decodeURI', 'decodeURIComponent',
+            'encodeURI', 'encodeURIComponent', 'Error', 'eval', 'EvalError',
+            'Function',
+            'Infinity', 'isFinite', 'isNaN',
+            'JSON',
+            'Math',
+            'NaN', 'Number',
+            'Object',
+            'parseFloat', 'parseInt',
+            'RangeError', 'ReferenceError', 'RegExp',
+            'String', 'SyntaxError',
+            'TypeError',
+            'undefined', 'URIError',
+            'window'
+        ],
+        property: [
+            // Function
+            'constructor', 'length', 'prototype',
+            // RegExp
+            'global', 'ignoreCase', 'lastIndex', 'multiline', 'source',
+            // Window
+            'applicationCache',
+            'closed', 'console', 'content', 'crypto',
+            'defaultStatus', 'document',
+            'frameElement', 'frames',
+            'history',
+            'innerHeight', 'innerWidth',
+            'length', 'location', 'locationbar', 'localStorage',
+            'menubar',
+            'name', 'navigator',
+            'opener', 'outerHeight', 'outerWidth',
+            'pageXOffset', 'pageYOffset', 'parent', 'personalbar',
+            'screen', 'screenX', 'screenY', 'scrollbars', 'scrollX', 'scrollY',
+            'self', 'sessionStorage', 'status', 'statusbar',
+            'toolbar', 'top'
+        ]
+    };
+    return function(identifier) {
+        var ret;
+        if (Ox.KEYWORDS.indexOf(identifier) > -1) {
+            ret = 'keyword'
+        } else {
+            ret = 'identifier'
+            Ox.forEach(identifiers, function(words, type) {
+                if (words.indexOf(identifier) > -1) {
+                    ret = type;
+                    Ox.Break();
+                }
+            });
+        }
+        return ret;
+    };
+})();
+
 /*@
 Ox.minify <f> Minifies JavaScript
     (source) -> <s> Minified JavaScript
@@ -327,36 +486,18 @@ Ox.minify = function() {
             arguments[1](minify(source));
         });
     }
-    function isCommentOrLinebreakOrWhitespace(token) {
-        return token.type == 'comment' || isLinebreakOrWhitespace(token);
-    }
-    function isIdentifierOrNumber(token) {
-        return Ox.contains([
-            'constant', 'identifier', 'keyword',
-            'number', 'method', 'object', 'property'
-        ], token.type);
-    }
-    function isIdentifierOrNumberOrString(token) {
-        return isIdentifierOrNumber(token) || token.type == 'string';
-    }
-    function isLinebreakOrWhitespace(token) {
-        return Ox.contains(['linebreak', 'whitespace'], token.type);
-    }
     function minify(source) {
         var tokens = Ox.tokenize(source),
             length = tokens.length,
             ret = '';
-        function getValue(token) {
-            return source.substr(token.offset, token.length);
-        }
         tokens.forEach(function(token, i) {
-            var next, nextToken, previousToken;
-            if (isLinebreakOrWhitespace(token)) {
-                previousToken = i == 0 ? null : tokens[i - 1];
+            var next, nextToken, prevToken;
+            if (['linebreak', 'whitespace'].indexOf(token.type) > -1) {
+                prevToken = i == 0 ? null : tokens[i - 1];
                 next = i + 1;
                 while (
-                    next < length
-                    && isCommentOrLinebreakOrWhitespace(tokens[next])
+                    next < length && ['comment', 'linebreak', 'whitespace']
+                        .indexOf(tokens[next].type) > -1
                 ) {
                     next++;
                 }
@@ -367,16 +508,12 @@ Ox.minify = function() {
                 // or numbers or strings or unary operators or grouping
                 // operators with a single newline, otherwise remove it
                 if (
-                    previousToken && nextToken && (
-                        isIdentifierOrNumberOrString(previousToken)
-                        || Ox.contains([
-                            '++', '--', ')', ']', '}'
-                        ], getValue(previousToken))
+                    prevToken && nextToken && (
+                        ['identifier', 'number', 'string'].indexOf(prevToken.type) > -1
+                        || ['++', '--', ')', ']', '}'].indexOf(prevToken.value) > -1
                     ) && (
-                        isIdentifierOrNumberOrString(nextToken)
-                        || Ox.contains([
-                            '+', '-', '++', '--', '~', '!', '(', '[', '{'
-                        ], getValue(nextToken))
+                        ['identifier', 'number', 'string'].indexOf(nextToken.type) > -1
+                        || ['+', '-', '++', '--', '~', '!', '(', '[', '{'].indexOf(nextToken.value) > -1
                     )
                 ) {
                     ret += '\n';
@@ -387,23 +524,19 @@ Ox.minify = function() {
                 // that begins with "+" or "-", with a single space, otherwise
                 // remove it
                 if (
-                    previousToken && nextToken && ((
-                        isIdentifierOrNumber(previousToken)
-                        && isIdentifierOrNumber(nextToken)
+                    prevToken && nextToken && ((
+                        ['identifier', 'number'].indexOf(prevToken.type) > -1
+                        && ['identifier', 'number'].indexOf(nextToken.type) > -1
                     ) || (
-                        Ox.contains([
-                           '+', '-', '++', '--'
-                        ], getValue(previousToken))
-                        && Ox.contains([
-                           '+', '-', '++', '--'
-                        ], getValue(nextToken))
+                        ['+', '-', '++', '--'].indexOf(prevToken.value) > -1
+                        && ['+', '-', '++', '--'].indexOf(nextToken.value) > -1
                     ))
                 ) {
                     ret += ' ';
                 }
             } else if (token.type != 'comment') {
                 // remove comments and leave all other tokens untouched
-                ret += getValue(token);
+                ret += token.value;
             }
         });
         return ret;
@@ -443,28 +576,23 @@ Ox.test = function(file, callback) {
 /*@
 Ox.tokenize <f> Tokenizes JavaScript
     (source) -> <[o]> Array of tokens
-        length <n> Length of the token
-        offset <n> Offset of the token
+        column <n> Column of the token
+        line <n> Line of the token
         type <s> Type of the token
-            Type can be <code>"comment"</code>, <code>"constant"</code>,
-            <code>"identifier"</code>, <code>"keyword"</code>,
-            <code>"linebreak"</code>, <code>"method"</code>,
-            <code>"number"</code>, <code>"object"</code>, 
-            <code>"operator"</code>, <code>"property"</code>,
-            <code>"regexp"</code>, <code>"string"</code>
-            or <code>"whitespace"</code>
+            Type can be <code>"comment"</code>, <code>"identifier"</code>,
+            <code>"linebreak"</code>, <code>"number"</code>,
+            <code>"operator"</code>, <code>"regexp"</code>,
+            <code>"string"</code> or <code>"whitespace"</code>
+        value <s> Value of the token
     source <s> JavaScript source code
 @*/
-// FIXME: constant/method/object/property is of interest
-// for syntax highlighting, but may not belong here
-// FIXME: backport python version
 // FIXME: numbers (hex, exp, etc.)
 Ox.tokenize = (function() {
 
-    // see https://github.com/mozilla/narcissus/blob/master/lib/jslex.js
-    // and https://developer.mozilla.org/en/JavaScript/Reference
+    // see https://github.com/mozilla/narcissus/blob/master/lib/lexer.js
 
-    var identifier = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$_',
+    var comment = ['//', '/*'],
+        identifier = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$_',
         linebreak = '\n\r',
         number = '0123456789',
         operator = [
@@ -488,286 +616,108 @@ Ox.tokenize = (function() {
         ],
         regexp = 'abcdefghijklmnopqrstuvwxyz',
         string = '\'"',
-        whitespace = ' \t',
-        word = {
-            constant: [
-                // Math
-                'E', 'LN2', 'LN10', 'LOG2E', 'LOG10E', 'PI', 'SQRT1_2', 'SQRT2',
-                // Number
-                'MAX_VALUE', 'MIN_VALUE', 'NEGATIVE_INFINITY', 'POSITIVE_INFINITY'
-            ],
-            keyword: [
-                'break',
-                'case', 'catch', 'class', 'const', 'continue',
-                'debugger', 'default', 'delete', 'do',
-                'else', 'enum', 'export', 'extends',
-                'false', 'finally', 'for', 'function',
-                'if', 'implements', 'import', 'in', 'instanceof', 'interface',
-                'let', 'module',
-                'new', 'null',
-                'package', 'private', 'protected', 'public',
-                'return',
-                'super', 'switch', 'static',
-                'this', 'throw', 'true', 'try', 'typeof',
-                'var', 'void',
-                'yield',
-                'while', 'with',
-            ],
-            method: [
-                // Array
-                'concat',
-                'every',
-                'filter', 'forEach',
-                'join',
-                'lastIndexOf',
-                'indexOf', 'isArray',
-                'map',
-                'pop', 'push',
-                'reduce', 'reduceRight', 'reverse',
-                'shift', 'slice', 'some', 'sort', 'splice',
-                'unshift',
-                // Date
-                'getDate', 'getDay', 'getFullYear', 'getHours',
-                'getMilliseconds', 'getMinutes', 'getMonth', 'getSeconds',
-                'getTime', 'getTimezoneOffset',
-                'getUTCDate', 'getUTCDay', 'getUTCFullYear', 'getUTCHours',
-                'getUTCMilliseconds', 'getUTCMinutes', 'getUTCMonth', 'getUTCSeconds',
-                'now',
-                'parse',
-                'setDate', 'setFullYear', 'setHours', 'setMilliseconds',
-                'setMinutes', 'setMonth', 'setSeconds', 'setTime',
-                'setUTCDate', 'setUTCFullYear', 'setUTCHours', 'setUTCMilliseconds',
-                'setUTCMinutes', 'setUTCMonth', 'setUTCSeconds',
-                'toDateString', 'toJSON', 'toLocaleDateString', 'toLocaleString',
-                'toLocaleTimeString', 'toTimeString', 'toUTCString',
-                'UTC',
-                // Function
-                'apply', 'bind', 'call', 'isGenerator',
-                // JSON
-                'parse', 'stringify',
-                // Math
-                'abs', 'acos', 'asin', 'atan', 'atan2',
-                'ceil', 'cos',
-                'exp',
-                'floor',
-                'log',
-                'max', 'min',
-                'pow',
-                'random', 'round',
-                'sin', 'sqrt',
-                'tan',
-                // Number
-                'toExponential', 'toFixed', 'toLocaleString', 'toPrecision',
-                // Object
-                'create',
-                'defineProperty', 'defineProperties',
-                'freeze',
-                'getOwnPropertyDescriptor', 'getOwnPropertyNames', 'getPrototypeOf',
-                'hasOwnProperty',
-                'isExtensible', 'isFrozen', 'isPrototypeOf', 'isSealed',
-                'keys',
-                'preventExtensions', 'propertyIsEnumerable',
-                'seal',
-                'toLocaleString', 'toString',
-                'valueOf',
-                // RegExp
-                'exec', 'test',
-                // String
-                'charAt', 'charCodeAt', 'concat',
-                'fromCharCode',
-                'indexOf',
-                'lastIndexOf', 'localeCompare',
-                'match',
-                'replace',
-                'search', 'slice', 'split', 'substr', 'substring',
-                'toLocaleLowerCase', 'toLocaleUpperCase', 'toLowerCase', 'toUpperCase', 'trim',
-                // Window
-                'addEventListener', 'alert', 'atob',
-                'blur', 'btoa',
-                'clearInterval', 'clearTimeout', 'close', 'confirm',
-                'dispatchEvent',
-                'escape',
-                'find', 'focus',
-                'getComputedStyle', 'getSelection',
-                'moveBy', 'moveTo',
-                'open',
-                'postMessage', 'print', 'prompt',
-                'removeEventListener', 'resizeBy', 'resizeTo',
-                'scroll', 'scrollBy', 'scrollTo',
-                'setCursor', 'setInterval', 'setTimeout', 'stop',
-                'unescape'
-            ],
-            object: [
-                'Array',
-                'Boolean',
-                'Date', 'decodeURI', 'decodeURIComponent',
-                'encodeURI', 'encodeURIComponent', 'Error', 'eval', 'EvalError',
-                'Function',
-                'Infinity', 'isFinite', 'isNaN',
-                'JSON',
-                'Math',
-                'NaN', 'Number',
-                'Object',
-                'parseFloat', 'parseInt',
-                'RangeError', 'ReferenceError', 'RegExp',
-                'String', 'SyntaxError',
-                'TypeError',
-                'undefined', 'URIError',
-                'window'
-            ],
-            property: [
-                // Function
-                'constructor', 'length', 'prototype',
-                // RegExp
-                'global', 'ignoreCase', 'lastIndex', 'multiline', 'source',
-                // Window
-                'applicationCache',
-                'closed', 'console', 'content', 'crypto',
-                'defaultStatus', 'document',
-                'frameElement', 'frames',
-                'history',
-                'innerHeight', 'innerWidth',
-                'length', 'location', 'locationbar', 'localStorage',
-                'menubar',
-                'name', 'navigator',
-                'opener', 'outerHeight', 'outerWidth',
-                'pageXOffset', 'pageYOffset', 'parent', 'personalbar',
-                'screen', 'screenX', 'screenY', 'scrollbars', 'scrollX', 'scrollY',
-                'self', 'sessionStorage', 'status', 'statusbar',
-                'toolbar', 'top'
-            ]
-        };
+        whitespace = ' \t';
+
+    function isRegExp(tokens) {
+        // Returns true if the current token is the beginning of a RegExp, as
+        // opposed to the beginning of an operator
+        var i = tokens.length - 1, isRegExp, token
+        // Scan back to the previous significant token, or to the beginning of
+        // the source
+        while (i >= 0 && [
+            'comment', 'linebreak', 'whitespace'
+        ].indexOf(tokens[i].type) > -1) {
+            i--;
+        }
+        if (i == -1) {
+            // Source begins with a forward slash
+            isRegExp = true;
+        } else {
+            token = tokens[i];
+            isRegExp = (
+                token.type == 'identifier'
+                && Ox.identify(token.value) == 'keyword'
+                && ['false', 'null', 'true'].indexOf(token.value) == -1
+            ) || (
+                token.type == 'operator'
+                && ['++', '--', ')', ']', '}'].indexOf(token.value) == -1
+            )
+        }
+        return isRegExp;
+    }
 
     return function(source) {
-
-        source = source.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
-
-        var cursor = 0,
-            tokenize = {
-                comment: function() {
-                    while (char = source[++cursor]) {
-                        if (next == '/' && char == '\n') {
-                            break;
-                        } else if (next == '*' && char + source[cursor + 1] == '*/') {
-                            cursor += 2;
-                            break;
-                        }
-                    }
-                },
-                identifier: function() {
-                    var str;
-                    while ((identifier + number).indexOf(source[++cursor]) > -1) {}
-                    str = source.slice(start, cursor);
-                    Ox.forEach(word, function(value, key) {
-                        if (value.indexOf(str) > -1) {
-                            type = key;
-                            Ox.Break();
-                        }
-                    });
-                },
-                linebreak: function() {
-                    while (linebreak.indexOf(source[++cursor]) > -1) {}
-                },
-                number: function() {
-                    while ((number + '.').indexOf(source[++cursor]) > -1) {}
-                },
-                operator: function() {
-                    while (operator.indexOf(char += source[++cursor]) > -1) {}
-                },
-                regexp: function() {
-                    while ((char = source[++cursor]) != '/') {
-                        char == '\\' && ++cursor;
-                        if (cursor == source.length) {
-                            break;
-                        }
-                    }
-                    while (regexp.indexOf(source[++cursor]) > -1) {}
-                },
-                string: function() {
-                    var delimiter = char;
-                    while ((char = source[++cursor]) != delimiter) {
-                        char == '\\' && ++cursor;
-                        if (cursor == source.length) {
-                            break;
-                        }
-                    }
-                    ++cursor;
-                },
-                whitespace: function() {
-                    while (whitespace.indexOf(source[++cursor]) > -1) {}
-                }
-            },
+        var char,
+            column = 0,
+            cursor = 0,
+            delimiter,
+            length = source.length,
+            line = 0,
+            lines,
+            next,
             tokens = [],
-            type;
-
-        while (cursor < source.length) {
-            var char = source[cursor],
-                next = source[cursor + 1],
-                start = cursor;
-            if (char == '/' && (next == '/' || next == '*')) {
+            start,
+            type,
+            value;
+        source = source.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
+        while (cursor < length) {
+            start = cursor;
+            char = source[cursor];
+            if (comment.indexOf(delimiter = char + source[cursor + 1]) > -1) {
                 type = 'comment';
+                ++cursor;
+                while (char = source[++cursor]) {
+                    if (delimiter == '//' && char == '\n') {
+                        break;
+                    } else if (delimiter == '/*' && char + source[cursor + 1] == '*/') {
+                        cursor += 2;
+                        break;
+                    }
+                }
             } else if (identifier.indexOf(char) > -1) {
                 type = 'identifier';
+                while ((identifier + number).indexOf(source[++cursor]) > -1) {}
             } else if (linebreak.indexOf(char) > -1) {
                 type = 'linebreak';
+                while (linebreak.indexOf(source[++cursor]) > -1) {}
             } else if (number.indexOf(char) > -1) {
                 type = 'number';
-            } else if (string.indexOf(char) > -1) {
-                type = 'string';
-            } else if (whitespace.indexOf(char) > -1) {
-                type = 'whitespace';
-            } else if (char == '/') {
-                type = isRegExp() ? 'regexp' : 'operator';
+                while ((number + '.').indexOf(source[++cursor]) > -1) {}
+            } else if (char == '/' && isRegExp(tokens)) {
+                type = 'regexp';
+                while ((char = source[++cursor]) != '/' && cursor < length) {
+                    char == '\\' && ++cursor;
+                }
+                while (regexp.indexOf(source[++cursor]) > -1) {}
             } else if (operator.indexOf(char) > -1) {
                 type = 'operator';
-            }
-            tokenize[type]();
-            tokens.push({
-                length: cursor - start,
-                offset: start,
-                type: type
-            });
-        }
-
-        function isRegExp() {
-            // checks if a forward slash is the beginning of a regexp,
-            // as opposed to the beginning of an operator
-            // see http://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html#regular-expressions
-            var index = tokens.length,
-                isRegExp = false,
-                offset = 0,
-                prevToken,
-                prevString;
-            // scan back to the previous significant token,
-            // or the beginning of the source
-            while (
-                tokens[--index] !== void 0 && [
-                    'comment', 'linebreak', 'whitespace'
-                ].indexOf(tokens[index].type) > -1
-            ) {
-                offset += tokens[index].length;
-            }
-            if (typeof tokens[index] == 'undefined') {
-                // source begins with forward slash
-                isRegExp = true;
+                while (operator.indexOf(char += source[++cursor]) > -1 && cursor < length) {}
+            } else if (string.indexOf(delimiter = char) > -1) {
+                type = 'string';
+                while ((char = source[++cursor]) != delimiter && cursor < length) {
+                    char == '\\' && ++cursor;
+                }
+                ++cursor;
+            } else if (whitespace.indexOf(char) > -1) {
+                type = 'whitespace';
+                while (whitespace.indexOf(source[++cursor]) > -1) {}
             } else {
-                prevToken = tokens[index];
-                prevString = source.substr(
-                    cursor - prevToken.length - offset, prevToken.length
-                );
-                isRegExp = (
-                    prevToken.type == 'keyword'
-                    && ['false', 'null', 'true'].indexOf(prevString) == -1
-                ) || (
-                    prevToken.type == 'operator'
-                    && ['++', '--', ')', ']', '}'].indexOf(prevString) == -1
-                );
+                break;
+            }
+            value = source.slice(start, cursor);
+            tokens.push({column: column, line: line, type: type, value: value});
+            if (type == 'comment') {
+                lines = value.split('\n');
+                column = lines[lines.length - 1].length;
+                line += lines.length - 1;
+            } else if (type == 'linebreak') {
+                column = 0;
+                line += value.length;
+            } else {
+                column += value.length;
             }
-            return isRegExp;
         }
-
         return tokens;
-
     };
 
 }());