1
0
Fork 0
forked from 0x2620/oxjs

Ox.tokenize, Ox.SyntaxHighlighter (+demo)

This commit is contained in:
rolux 2011-04-28 20:34:19 +02:00
commit 74b9a25387
8 changed files with 632 additions and 0 deletions

View file

@ -2701,6 +2701,286 @@ Ox.toDashes = function(str) {
});
};
Ox.tokenize = (function() {
// see https://github.com/mozilla/narcissus/blob/master/lib/jslex.js
var identifier = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$_',
// see https://developer.mozilla.org/en/JavaScript/Reference/Reserved_Words
linebreak = '\n\r',
number = '0123456789',
// see https://developer.mozilla.org/en/JavaScript/Reference
operator = [
// arithmetic
'+', '-', '*', '/', '%', '++', '--',
// assignment
'=', '+=', '-=', '*=', '/=', '%=',
'&=', '|=', '^=', '<<=', '>>=', '>>>=',
// bitwise
'&', '|', '^', '~', '<<', '>>', '>>>',
// comparison
'==', '!=', '===', '!==', '>', '>=', '<', '<=',
// conditional
'?', ':',
// grouping
'(', ')', '[', ']', '{', '}',
// logical
'&&', '||', '!',
// other
'.', ',', ';'
],
whitespace = ' \t',
word = {
constant: [
// Math
'E', 'LN2', 'LN10', 'LOG2E', 'LOG10E', 'PI', 'SQRT1_2', 'SQRT2',
// Number
'MAX_VALUE', 'MIN_VALUE', 'NEGATIVE_INFINITY', 'POSITIVE_INFINITY'
],
keyword: [
'break',
'case', 'catch', 'class', 'const', 'continue',
'debugger', 'default', 'delete', 'do',
'else', 'enum', 'export', 'extends',
'false', 'finally', 'for', 'function',
'if', 'implements', 'import', 'in', 'instanceof', 'interface',
'let', 'module',
'new', 'null',
'package', 'private', 'protected', 'public',
'return',
'super', 'switch', 'static',
'this', 'throw', 'true', 'try', 'typeof',
'var', 'void',
'yield',
'while', 'with',
],
method: [
// Array
'concat',
'every',
'filter', 'forEach',
'join',
'lastIndexOf',
'indexOf', 'isArray',
'map',
'pop', 'push',
'reduce', 'reduceRight', 'reverse',
'shift', 'slice', 'some', 'sort', 'splice',
'unshift',
// Date
'getDate', 'getDay', 'getFullYear', 'getHours', 'getMilliseconds',
'getMinutes', 'getMonth', 'getSeconds', 'getTime', 'getTimezoneOffset',
'getUTCDate', 'getUTCDay', 'getUTCFullYear', 'getUTCHours', 'getUTCMilliseconds',
'getUTCMinutes', 'getUTCMonth', 'getUTCSeconds',
'now',
'parse',
'setDate', 'setFullYear', 'setHours', 'setMilliseconds', 'setMinutes',
'setMonth', 'setSeconds', 'setTime',
'setUTCDate', 'setUTCFullYear', 'setUTCHours', 'setUTCMilliseconds', 'setUTCMinutes',
'setUTCMonth', 'setUTCSeconds',
'toDateString', 'toJSON', 'toLocaleDateString', 'toLocaleString', 'toLocaleTimeString',
'toTimeString', 'toUTCString',
'UTC',
// Function
'apply', 'bind', 'call', 'isGenerator',
// JSON
'parse', 'stringify',
// Math
'abs', 'acos', 'asin', 'atan', 'atan2',
'ceil', 'cos',
'exp',
'floor',
'log',
'max', 'min',
'pow',
'random', 'round',
'sin', 'sqrt',
'tan',
// Number
'toExponential', 'toFixed', 'toLocaleString', 'toPrecision',
// Object
'create',
'defineProperty', 'defineProperties',
'freeze',
'getOwnPropertyDescriptor', 'getOwnPropertyNames', 'getPrototypeOf',
'hasOwnProperty',
'isExtensible', 'isFrozen', 'isPrototypeOf', 'isSealed',
'keys',
'preventExtensions', 'propertyIsEnumerable',
'seal',
'toLocaleString', 'toString',
'valueOf',
// RegExp
'exec', 'test',
// String
'charAt', 'charCodeAt', 'concat',
'fromCharCode',
'indexOf',
'lastIndexOf', 'localeCompare',
'match',
'replace',
'search', 'slice', 'split', 'substr', 'substring',
'toLocaleLowerCase', 'toLocaleUpperCase', 'toLowerCase', 'toUpperCase', 'trim'
],
object: [
'Array',
'Boolean',
'Date', 'decodeURI', 'decodeURIComponent',
'encodeURI', 'encodeURIComponent', 'Error', 'eval', 'EvalError',
'Function',
'Infinity', 'isFinite', 'isNaN',
'JSON',
'Math',
'NaN', 'Number',
'Object',
'parseFloat', 'parseInt',
'RangeError', 'ReferenceError', 'RegExp',
'String', 'SyntaxError',
'TypeError',
'undefined', 'URIError'
],
property: [
// Function
'constructor', 'length', 'prototype',
// RegExp
'global', 'ignoreCase', 'lastIndex', 'multiline', 'source'
]
};
return function(source) {
//source = source.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
var cursor = 0,
tokenize = {
comment: function() {
while (char = source[++cursor]) {
if (next == '/' && char == '\n') {
break;
} else if (next == '*' && char == '*' && source[cursor + 1] == '/') {
cursor += 2;
break;
}
}
},
identifier: function() {
var str;
while (identifier.indexOf(source[++cursor]) > -1) {}
str = source.substring(start, cursor);
Ox.forEach(word, function(value, key) {
if (value.indexOf(str) > -1) {
type = key;
return false;
}
});
},
linebreak: function() {
while (linebreak.indexOf(source[++cursor]) > -1) {}
},
number: function() {
while ((number + '.').indexOf(source[++cursor]) > -1) {}
},
operator: function() {
if (operator.indexOf(char + source[++cursor]) > -1) {
if (operator.indexOf(char + next + source[++cursor]) > 1) {
++cursor;
}
}
},
regexp: function() {
while ((char = source[++cursor]) != '/') {
char == '\\' && ++cursor;
if (cursor == source.length) {
break;
}
}
while (identifier.indexOf(source[++cursor]) > -1) {}
},
string: function() {
var delimiter = char;
while ((char = source[++cursor]) != delimiter) {
char == '\\' && ++cursor;
if (cursor == source.length) {
break;
}
}
++cursor;
},
whitespace: function() {
while (whitespace.indexOf(source[++cursor]) > -1) {}
}
},
tokens = [],
type;
while (cursor < source.length) {
var char = source[cursor],
next = source[cursor + 1],
start = cursor;
if (char == '/' && (next == '/' || next == '*')) {
type = 'comment';
} else if (identifier.indexOf(char) > -1) {
type = 'identifier';
} else if (linebreak.indexOf(char) > -1) {
type = 'linebreak';
} else if (number.indexOf(char) > -1) {
type = 'number';
} else if (char == "'" || char == '"') {
type = 'string';
} else if (whitespace.indexOf(char) > -1) {
type = 'whitespace';
} else if (char == '/') {
type = isRegExp() ? 'regexp' : 'operator';
} else if (operator.indexOf(char) > -1) {
type = 'operator';
}
tokenize[type]();
tokens.push({
length: cursor - start,
type: type,
});
}
function isRegExp() {
// checks if a forward slash is the beginning of a regexp,
// as opposed to the beginning of an operator
// see http://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html#regular-expressions
var index = tokens.length,
isRegExp = false
offset = 0;
// scan back to the previous significant token,
// or the beginning of the source
while (
typeof tokens[--index] != 'undefined' &&
['comment', 'linebreak', 'whitespace'].indexOf(tokens[index].type) > -1
) {
offset += tokens[index].length;
}
if (typeof tokens[index] == 'undefined') {
// source begins with forward slash
isRegExp = true;
} else {
prevToken = tokens[index];
prevString = source.substr(cursor - prevToken.length - offset, prevToken.length);
Ox.print('forward slash |', prevToken, prevToken.type, '"'+prevString+'"');
isRegExp = (
prevToken.type == 'keyword' &&
['false', 'null', 'true'].indexOf(prevString) == -1
) || (
prevToken.type == 'operator' &&
['++', '--', ')', ']', '}'].indexOf(prevString) == -1
);
}
return isRegExp;
}
return tokens;
};
}());
Ox.toSlashes = function(str) {
/*
>>> Ox.toSlashes("fooBarBaz")