Ox.tokenize, Ox.SyntaxHighlighter (+demo)

This commit is contained in:
rolux 2011-04-28 20:34:19 +02:00
commit 74b9a25387
8 changed files with 632 additions and 0 deletions

View file

@ -2701,6 +2701,286 @@ Ox.toDashes = function(str) {
});
};
Ox.tokenize = (function() {
// see https://github.com/mozilla/narcissus/blob/master/lib/jslex.js
var identifier = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$_',
// see https://developer.mozilla.org/en/JavaScript/Reference/Reserved_Words
linebreak = '\n\r',
number = '0123456789',
// see https://developer.mozilla.org/en/JavaScript/Reference
operator = [
// arithmetic
'+', '-', '*', '/', '%', '++', '--',
// assignment
'=', '+=', '-=', '*=', '/=', '%=',
'&=', '|=', '^=', '<<=', '>>=', '>>>=',
// bitwise
'&', '|', '^', '~', '<<', '>>', '>>>',
// comparison
'==', '!=', '===', '!==', '>', '>=', '<', '<=',
// conditional
'?', ':',
// grouping
'(', ')', '[', ']', '{', '}',
// logical
'&&', '||', '!',
// other
'.', ',', ';'
],
whitespace = ' \t',
word = {
constant: [
// Math
'E', 'LN2', 'LN10', 'LOG2E', 'LOG10E', 'PI', 'SQRT1_2', 'SQRT2',
// Number
'MAX_VALUE', 'MIN_VALUE', 'NEGATIVE_INFINITY', 'POSITIVE_INFINITY'
],
keyword: [
'break',
'case', 'catch', 'class', 'const', 'continue',
'debugger', 'default', 'delete', 'do',
'else', 'enum', 'export', 'extends',
'false', 'finally', 'for', 'function',
'if', 'implements', 'import', 'in', 'instanceof', 'interface',
'let', 'module',
'new', 'null',
'package', 'private', 'protected', 'public',
'return',
'super', 'switch', 'static',
'this', 'throw', 'true', 'try', 'typeof',
'var', 'void',
'yield',
'while', 'with',
],
method: [
// Array
'concat',
'every',
'filter', 'forEach',
'join',
'lastIndexOf',
'indexOf', 'isArray',
'map',
'pop', 'push',
'reduce', 'reduceRight', 'reverse',
'shift', 'slice', 'some', 'sort', 'splice',
'unshift',
// Date
'getDate', 'getDay', 'getFullYear', 'getHours', 'getMilliseconds',
'getMinutes', 'getMonth', 'getSeconds', 'getTime', 'getTimezoneOffset',
'getUTCDate', 'getUTCDay', 'getUTCFullYear', 'getUTCHours', 'getUTCMilliseconds',
'getUTCMinutes', 'getUTCMonth', 'getUTCSeconds',
'now',
'parse',
'setDate', 'setFullYear', 'setHours', 'setMilliseconds', 'setMinutes',
'setMonth', 'setSeconds', 'setTime',
'setUTCDate', 'setUTCFullYear', 'setUTCHours', 'setUTCMilliseconds', 'setUTCMinutes',
'setUTCMonth', 'setUTCSeconds',
'toDateString', 'toJSON', 'toLocaleDateString', 'toLocaleString', 'toLocaleTimeString',
'toTimeString', 'toUTCString',
'UTC',
// Function
'apply', 'bind', 'call', 'isGenerator',
// JSON
'parse', 'stringify',
// Math
'abs', 'acos', 'asin', 'atan', 'atan2',
'ceil', 'cos',
'exp',
'floor',
'log',
'max', 'min',
'pow',
'random', 'round',
'sin', 'sqrt',
'tan',
// Number
'toExponential', 'toFixed', 'toLocaleString', 'toPrecision',
// Object
'create',
'defineProperty', 'defineProperties',
'freeze',
'getOwnPropertyDescriptor', 'getOwnPropertyNames', 'getPrototypeOf',
'hasOwnProperty',
'isExtensible', 'isFrozen', 'isPrototypeOf', 'isSealed',
'keys',
'preventExtensions', 'propertyIsEnumerable',
'seal',
'toLocaleString', 'toString',
'valueOf',
// RegExp
'exec', 'test',
// String
'charAt', 'charCodeAt', 'concat',
'fromCharCode',
'indexOf',
'lastIndexOf', 'localeCompare',
'match',
'replace',
'search', 'slice', 'split', 'substr', 'substring',
'toLocaleLowerCase', 'toLocaleUpperCase', 'toLowerCase', 'toUpperCase', 'trim'
],
object: [
'Array',
'Boolean',
'Date', 'decodeURI', 'decodeURIComponent',
'encodeURI', 'encodeURIComponent', 'Error', 'eval', 'EvalError',
'Function',
'Infinity', 'isFinite', 'isNaN',
'JSON',
'Math',
'NaN', 'Number',
'Object',
'parseFloat', 'parseInt',
'RangeError', 'ReferenceError', 'RegExp',
'String', 'SyntaxError',
'TypeError',
'undefined', 'URIError'
],
property: [
// Function
'constructor', 'length', 'prototype',
// RegExp
'global', 'ignoreCase', 'lastIndex', 'multiline', 'source'
]
};
return function(source) {
//source = source.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
var cursor = 0,
tokenize = {
comment: function() {
while (char = source[++cursor]) {
if (next == '/' && char == '\n') {
break;
} else if (next == '*' && char == '*' && source[cursor + 1] == '/') {
cursor += 2;
break;
}
}
},
identifier: function() {
var str;
while (identifier.indexOf(source[++cursor]) > -1) {}
str = source.substring(start, cursor);
Ox.forEach(word, function(value, key) {
if (value.indexOf(str) > -1) {
type = key;
return false;
}
});
},
linebreak: function() {
while (linebreak.indexOf(source[++cursor]) > -1) {}
},
number: function() {
while ((number + '.').indexOf(source[++cursor]) > -1) {}
},
operator: function() {
if (operator.indexOf(char + source[++cursor]) > -1) {
if (operator.indexOf(char + next + source[++cursor]) > 1) {
++cursor;
}
}
},
regexp: function() {
while ((char = source[++cursor]) != '/') {
char == '\\' && ++cursor;
if (cursor == source.length) {
break;
}
}
while (identifier.indexOf(source[++cursor]) > -1) {}
},
string: function() {
var delimiter = char;
while ((char = source[++cursor]) != delimiter) {
char == '\\' && ++cursor;
if (cursor == source.length) {
break;
}
}
++cursor;
},
whitespace: function() {
while (whitespace.indexOf(source[++cursor]) > -1) {}
}
},
tokens = [],
type;
while (cursor < source.length) {
var char = source[cursor],
next = source[cursor + 1],
start = cursor;
if (char == '/' && (next == '/' || next == '*')) {
type = 'comment';
} else if (identifier.indexOf(char) > -1) {
type = 'identifier';
} else if (linebreak.indexOf(char) > -1) {
type = 'linebreak';
} else if (number.indexOf(char) > -1) {
type = 'number';
} else if (char == "'" || char == '"') {
type = 'string';
} else if (whitespace.indexOf(char) > -1) {
type = 'whitespace';
} else if (char == '/') {
type = isRegExp() ? 'regexp' : 'operator';
} else if (operator.indexOf(char) > -1) {
type = 'operator';
}
tokenize[type]();
tokens.push({
length: cursor - start,
type: type,
});
}
function isRegExp() {
// checks if a forward slash is the beginning of a regexp,
// as opposed to the beginning of an operator
// see http://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html#regular-expressions
var index = tokens.length,
isRegExp = false
offset = 0;
// scan back to the previous significant token,
// or the beginning of the source
while (
typeof tokens[--index] != 'undefined' &&
['comment', 'linebreak', 'whitespace'].indexOf(tokens[index].type) > -1
) {
offset += tokens[index].length;
}
if (typeof tokens[index] == 'undefined') {
// source begins with forward slash
isRegExp = true;
} else {
prevToken = tokens[index];
prevString = source.substr(cursor - prevToken.length - offset, prevToken.length);
Ox.print('forward slash |', prevToken, prevToken.type, '"'+prevString+'"');
isRegExp = (
prevToken.type == 'keyword' &&
['false', 'null', 'true'].indexOf(prevString) == -1
) || (
prevToken.type == 'operator' &&
['++', '--', ')', ']', '}'].indexOf(prevString) == -1
);
}
return isRegExp;
}
return tokens;
};
}());
Ox.toSlashes = function(str) {
/*
>>> Ox.toSlashes("fooBarBaz")