oxjs/source/Ox/js/JavaScript.js

913 lines
34 KiB
JavaScript
Raw Normal View History

2011-11-05 16:46:53 +00:00
'use strict';
/*@
Ox.doc <f> Generates documentation for annotated JavaScript
2012-05-29 14:19:57 +00:00
(source) -> <[o]> Array of doc objects
(file, callback) -> <u> undefined
2012-04-09 08:39:02 +00:00
(files, callback) -> <u> undefined
source <s> JavaScript source code
file <s> JavaScript file
2012-04-09 08:39:02 +00:00
files <[s]> Array of javascript files
callback <f> Callback function
doc <[o]> Array of doc objects
arguments <[o]|u> Arguments (array of doc objects)
Present if the <code>type</code> of the item is
<code>"function"</code>.
description <s|u> Multi-line description with optional markup
See Ox.sanitizeHTML for details
events <[o]|u> Events (array of doc objects)
Present if the item fires any events
file <s> File name
line <n> Line number
name <s> Name of the item
2012-05-30 11:31:42 +00:00
order <[s]> Order of returns, arguments, properties
Present if the type of the item is "function"
properties <[o]|u> Properties (array of doc objects)
Present if the <code>type</code> of the item is
<code>"event"</code>, <code>"function"</code>
or <code>"object"</code>.
section <s|u> Section in the file
source <[o]> Source code (array of tokens)
column <n> Column
line <n> Line
type <s> Type (see Ox.tokenize for a list of types)
value <s> Value
returns <[o]> Return values (array of doc objects)
Present if the <code>type</code> of the item is
<code>"function"</code>.
summary <s> One-line summary
tests <[o]> Tests (array of test objects)
expected <s> Expected result
statement <s> Statement
type <s> Type of the item
<script>
2012-05-30 08:19:30 +00:00
Ox.test.doc = Ox.doc([
'//@ My.FOO <n> Magic constant',
'My.FOO = 23;',
'',
'/*@',
'My.foo <f> Returns an items\'s bar per baz',
' Bar per baz is a good indicator of an item\'s foo-ness.',
' (item) -> <n> Bar per baz, or NaN',
' item <o> Any item',
' > My.foo({bar: 1, baz: 10})',
' 0.1',
' > My.foo({})',
' NaN',
'@*' + '/',
'My.foo = function(item) {',
' return item.bar / item.baz;',
'};'
2012-05-30 08:19:30 +00:00
].join('\n'));
</script>
> Ox.test.doc[0].name
'My.FOO'
> Ox.test.doc[0].types
['number']
> Ox.test.doc[0].summary
'Magic constant'
> Ox.test.doc[1].description
'Bar per baz is a good indicator of an item\'s foo-ness.'
2012-05-30 08:19:30 +00:00
> Ox.test.doc[1].returns[0].types
['number']
2012-05-30 08:19:30 +00:00
> Ox.test.doc[1].returns[0].summary
'Bar per baz, or NaN'
> Ox.test.doc[1].tests[1]
{expected: 'NaN', statement: 'My.foo({})'}
@*/
Ox.doc = (function() {
var re = {
item: /^(.+?) <(.+?)> (.+?)$/,
multiline: /^\/\*\@.*?\n([\w\W]+)\n.*?\@?\*\/$/,
script: /\n(\s*<script>s*\n[\w\W]+\n\s*<\/script>s*)/g,
signature: /(\(.*?\)) \->(.*)/,
singleline: /^\/\/@\s*(.*?)\s*$/,
test: /\n(\s*> .+\n.+?)/g,
},
types = {
a: 'array', b: 'boolean', d: 'date',
e: 'element', f: 'function', n: 'number',
o: 'object', r: 'regexp', s: 'string',
u: 'undefined', '*': 'value', '!': 'event'
};
function decodeLinebreaks(match, submatch) {
return (submatch || match).replace(/\u21A9/g, '\n');
}
function encodeLinebreaks(match, submatch) {
return '\n' + (submatch || match).replace(/\n/g, '\u21A9');
}
2012-05-25 11:42:25 +00:00
function getIndent(string) {
var indent = -1;
2012-05-25 11:42:25 +00:00
while (string[++indent] == ' ') {}
return indent;
}
2012-05-25 11:42:25 +00:00
function parseItem(string) {
var matches = re.item.exec(string);
// to tell a variable with default value, like
// name <string|'<a href="...">foo</a>'> summary
// from a line of description with tags, like
// some <a href="...">description</a> text
// we need to check if there is either no forward slash
// or if the second last char is a single or double quote
return matches && (
matches[2].indexOf('/') == -1 ||
2012-05-24 09:47:33 +00:00
'\'"'.indexOf(matches[2].slice(-2, -1)) > -1
) ? Ox.extend(
parseName(matches[1]),
2012-05-30 11:31:42 +00:00
parseTypes(matches[2]),
{summary: matches[3].trim()}
) : null;
}
2012-05-25 11:42:25 +00:00
function parseName(string) {
var matches = re.signature.exec(string);
return matches
? {signature: matches[1], name: matches[2].trim()}
: {name: string};
}
function parseNode(node) {
var item = parseItem(node.line), order = [];
item.name = item.name.replace(/^\./, '');
node.nodes && node.nodes.forEach(function(node) {
var key, line = node.line, subitem;
if (!/^#/.test(node.line)) {
if (/^<script>/.test(line)) {
item.tests = [parseScript(line)];
} else if (/^>/.test(line)) {
item.tests = item.tests || [];
item.tests.push(parseTest(line));
} else if ((subitem = parseItem(line))) {
if (subitem.signature) {
item.returns = item.returns || [];
item.returns.push(parseNode(node));
order.push('returns');
} else if (subitem.types[0] == 'event') {
item.events = item.events || [];
item.events.push(parseNode(node));
order.push('events');
} else {
key = item.types[0] == 'function'
&& !/^\./.test(subitem.name)
? 'arguments' : 'properties';
item[key] = item[key] || [];
item[key].push(parseNode(node));
order.push(key);
}
} else {
item.description = item.description
? item.description + ' ' + line : line
}
}
});
2012-05-30 11:31:42 +00:00
item.summary = Ox.parseMarkdown(item.summary);
if (item.description) {
item.description = Ox.parseMarkdown(item.description)
}
if (item.types[0] == 'function') {
item.order = Ox.unique(order);
}
return item;
}
2012-05-25 11:42:25 +00:00
function parseScript(string) {
// remove script tags and extra indentation
2012-05-25 11:42:25 +00:00
var lines = decodeLinebreaks(string).split('\n'),
indent = getIndent(lines[1]);
return {
2012-05-22 17:50:26 +00:00
statement: lines.slice(1, -1).map(function(line, i) {
return line.slice(indent);
}).join('\n')
};
}
function parseSource(source, file) {
var blocks = [],
items = [],
section = '',
tokens = [];
Ox.tokenize(source).forEach(function(token) {
var match;
2012-04-09 08:39:02 +00:00
if (token.type == 'comment' && (
2012-05-26 10:54:52 +00:00
match = re.multiline.exec(token.value)
|| re.singleline.exec(token.value)
)) {
blocks.push(match[1]);
tokens.push([]);
} else if (tokens.length) {
tokens[tokens.length - 1].push(token);
}
});
blocks.forEach(function(block, i) {
var item, lastItem,
lines = block
.replace(re.script, encodeLinebreaks)
.replace(re.test, encodeLinebreaks)
.split('\n'),
parent,
tree = parseTree(lines);
if (re.item.test(tree.line)) {
// parse the tree's root node
item = parseNode(tree);
item.file = file || '';
if (section) {
item.section = section;
}
if (/^[A-Z]/.test(item.name)) {
// main item
// include leading whitespace
item.source = parseTokens(tokens[i]);
item.line = item.source[0].line;
items.push(item);
} else {
// property of a function item
lastItem = items[items.length - 1];
parent = lastItem.types[0] == 'function'
&& lastItem.returns
&& lastItem.returns[0].types[0] == 'object'
? lastItem.returns[0] : lastItem;
parent.properties = parent.properties || [];
parent.properties.push(item);
if (
parent.order && !Ox.contains(parent.order, 'properties')
) {
parent.order.push('properties');
}
// include leading linebreaks and whitespace
2012-05-24 07:45:33 +00:00
lastItem.source = lastItem.source.concat(
parseTokens(tokens[i], true)
);
}
} else {
section = tree.line.split(' ')[0]
}
});
return items;
}
2012-05-25 11:42:25 +00:00
function parseTest(string) {
// fixme: we cannot properly handle tests where a string contains '\n '
2012-05-25 11:42:25 +00:00
var lines = decodeLinebreaks(string).split('\n ');
return {
2012-05-24 09:47:33 +00:00
statement: lines[0].slice(2),
expected: lines[1].trim()
};
}
function parseTokens(tokens, includeLeadingLinebreaks) {
var isLeading = true,
isTrailing = false,
tokens_ = [],
types = ['linebreak', 'whitespace'];
tokens.forEach(function(token) {
if (isLeading && types.indexOf(token.type) > -1) {
if (token.type == 'linebreak') {
if (includeLeadingLinebreaks) {
tokens_.push(token);
} else {
tokens_ = [];
}
} else {
tokens_.push(token);
}
} else {
tokens_.push(token);
isLeading = false;
if (types.indexOf(token.type) == -1) {
isTrailing = true;
}
}
});
if (isTrailing) {
while (types.indexOf(tokens_[tokens_.length - 1].type) > -1) {
tokens_.pop();
}
}
return tokens_;
}
function parseTree(lines) {
// parses indented lines into a tree structure, like
// {line: "...", nodes: [{line: "...", nodes: [...]}]}
var branches = [],
indent,
node = {
// chop the root line
line: lines.shift().trim()
};
if (lines.length) {
indent = getIndent(lines[0]);
lines.forEach(function(line) {
if (getIndent(line) == indent) {
// line is a child,
// make it the root line of a new branch
branches.push([line]);
} else {
// line is a descendant of the last child,
// add it to the last branch
branches[branches.length - 1].push(line);
}
});
node.nodes = branches.map(function(lines) {
return parseTree(lines);
});
}
return node;
}
2012-05-30 11:31:42 +00:00
function parseTypes(string) {
// returns {types: [""]}
// or {types: [""], default: ""}
2012-04-09 08:39:02 +00:00
// or {types: [""], super: ""}
2012-05-25 11:42:25 +00:00
var array,
isArray,
ret = {types: []},
type;
// only split by ':' if there is no default string value
2012-05-25 11:42:25 +00:00
if ('\'"'.indexOf(string.slice(-2, -1)) == -1) {
array = string.split(':');
string = array[0];
if (array.length == 2) {
2012-05-25 16:28:05 +00:00
ret['super'] = array[1];
}
}
2012-05-25 11:42:25 +00:00
string.split('|').forEach(function(string) {
var unwrapped = unwrap(string);
if (unwrapped in types) {
ret.types.push(wrap(types[unwrapped]))
} else if (
(type = Ox.filter(Ox.values(types), function(type) {
return Ox.startsWith(type, unwrapped);
})).length
) {
ret.types.push(wrap(type[0]));
} else {
2012-05-25 11:42:25 +00:00
ret['default'] = string;
}
});
2012-05-25 11:42:25 +00:00
function unwrap(string) {
return (isArray = /^\[.+\]$/.test(string))
? string.slice(1, -1) : string;
}
2012-05-25 11:42:25 +00:00
function wrap(string) {
return isArray ? '[' + string + 's' + ']' : string;
}
return ret;
}
return function(argument, callback) {
var counter = 0, items = [], ret;
if (arguments.length == 1) {
ret = parseSource(argument);
} else {
argument = Ox.makeArray(argument);
argument.forEach(function(file) {
Ox.get(file, function(source) {
items = items.concat(
parseSource(source, file.split('?')[0])
);
++counter == argument.length && callback(items);
});
})
}
return ret;
}
}());
2012-05-26 10:54:52 +00:00
/*@
Ox.identify <f> Returns the type of a JavaScript identifier
(str) -> <s> Type
Type can be <code>constant</code>, <code>identifier</code>,
<code>keyword</code>, <code>method</code>, <code>object</code> or
<code>property</code>
@*/
Ox.identify = (function() {
// see https://developer.mozilla.org/en/JavaScript/Reference
var identifiers = {
constant: [
// Math
'E', 'LN2', 'LN10', 'LOG2E', 'LOG10E', 'PI', 'SQRT1_2', 'SQRT2',
// Number
'MAX_VALUE', 'MIN_VALUE', 'NEGATIVE_INFINITY', 'POSITIVE_INFINITY'
],
method: [
// Array
'concat',
'every',
'filter', 'forEach',
'join',
'lastIndexOf',
'indexOf', 'isArray',
'map',
'pop', 'push',
'reduce', 'reduceRight', 'reverse',
'shift', 'slice', 'some', 'sort', 'splice',
'unshift',
// Date
'getDate', 'getDay', 'getFullYear', 'getHours',
'getMilliseconds', 'getMinutes', 'getMonth', 'getSeconds',
'getTime', 'getTimezoneOffset',
'getUTCDate', 'getUTCDay', 'getUTCFullYear', 'getUTCHours',
'getUTCMilliseconds', 'getUTCMinutes', 'getUTCMonth', 'getUTCSeconds',
'now',
'parse',
'setDate', 'setFullYear', 'setHours', 'setMilliseconds',
'setMinutes', 'setMonth', 'setSeconds', 'setTime',
'setUTCDate', 'setUTCFullYear', 'setUTCHours', 'setUTCMilliseconds',
'setUTCMinutes', 'setUTCMonth', 'setUTCSeconds',
'toDateString', 'toJSON', 'toLocaleDateString', 'toLocaleString',
'toLocaleTimeString', 'toTimeString', 'toUTCString',
'UTC',
// Function
'apply', 'bind', 'call', 'isGenerator',
// JSON
'parse', 'stringify',
// Math
'abs', 'acos', 'asin', 'atan', 'atan2',
'ceil', 'cos',
'exp',
'floor',
'log',
'max', 'min',
'pow',
'random', 'round',
'sin', 'sqrt',
'tan',
// Number
'toExponential', 'toFixed', 'toLocaleString', 'toPrecision',
// Object
'create',
'defineProperty', 'defineProperties',
'freeze',
'getOwnPropertyDescriptor', 'getOwnPropertyNames', 'getPrototypeOf',
'hasOwnProperty',
'isExtensible', 'isFrozen', 'isPrototypeOf', 'isSealed',
'keys',
'preventExtensions', 'propertyIsEnumerable',
'seal',
'toLocaleString', 'toString',
'valueOf',
// RegExp
'exec', 'test',
// String
'charAt', 'charCodeAt', 'concat',
'fromCharCode',
'indexOf',
'lastIndexOf', 'localeCompare',
'match',
'replace',
'search', 'slice', 'split', 'substr', 'substring',
'toLocaleLowerCase', 'toLocaleUpperCase',
'toLowerCase', 'toUpperCase', 'trim',
// Window
'addEventListener', 'alert', 'atob',
'blur', 'btoa',
'clearInterval', 'clearTimeout', 'close', 'confirm',
'dispatchEvent',
'escape',
'find', 'focus',
'getComputedStyle', 'getSelection',
'moveBy', 'moveTo',
'open',
'postMessage', 'print', 'prompt',
'removeEventListener', 'resizeBy', 'resizeTo',
'scroll', 'scrollBy', 'scrollTo',
'setCursor', 'setInterval', 'setTimeout', 'stop',
'unescape'
],
object: [
'Array',
'Boolean',
'Date', 'decodeURI', 'decodeURIComponent',
'encodeURI', 'encodeURIComponent', 'Error', 'eval', 'EvalError',
'Function',
'Infinity', 'isFinite', 'isNaN',
'JSON',
'Math',
'NaN', 'Number',
'Object',
'parseFloat', 'parseInt',
'RangeError', 'ReferenceError', 'RegExp',
'String', 'SyntaxError',
'TypeError',
'undefined', 'URIError',
'window'
],
property: [
// Function
'constructor', 'length', 'prototype',
// RegExp
'global', 'ignoreCase', 'lastIndex', 'multiline', 'source',
// Window
'applicationCache',
'closed', 'console', 'content', 'crypto',
'defaultStatus', 'document',
'frameElement', 'frames',
'history',
'innerHeight', 'innerWidth',
'length', 'location', 'locationbar', 'localStorage',
'menubar',
'name', 'navigator',
'opener', 'outerHeight', 'outerWidth',
'pageXOffset', 'pageYOffset', 'parent', 'personalbar',
'screen', 'screenX', 'screenY', 'scrollbars', 'scrollX', 'scrollY',
'self', 'sessionStorage', 'status', 'statusbar',
'toolbar', 'top'
]
};
return function(identifier) {
var ret;
if (Ox.KEYWORDS.indexOf(identifier) > -1) {
ret = 'keyword'
} else {
ret = 'identifier'
Ox.forEach(identifiers, function(words, type) {
if (words.indexOf(identifier) > -1) {
ret = type;
Ox.Break();
}
});
}
return ret;
};
}());
2012-05-26 10:54:52 +00:00
/*@
Ox.minify <f> Minifies JavaScript
(source) -> <s> Minified JavaScript
(file, callback) -> <u> undefined
source <s> JavaScript source
file <s> JavaScript file
callback <f> Callback function
> Ox.minify('for (a in b)\n{\t\tc = void 0;\n}')
'for(a in b)\n{c=void 0;}'
> Ox.minify('return a; return 0; return "";')
'return a;return 0;return"";'
2012-05-25 22:18:50 +00:00
> Ox.minify('return\na;\nreturn\n0;\nreturn\n"";')
2012-05-25 22:02:28 +00:00
'return\na;return\n0;return\n"";'
@*/
Ox.minify = function() {
// see https://github.com/douglascrockford/JSMin/blob/master/README
// and http://inimino.org/~inimino/blog/javascript_semicolons
if (arguments.length == 1) {
return minify(arguments[0]);
} else {
Ox.get(arguments[0], function(source) {
arguments[1](minify(source));
});
}
function minify(source) {
var tokens = Ox.tokenize(source),
2012-05-25 22:02:28 +00:00
length = tokens.length,
ret = '';
tokens.forEach(function(token, i) {
2012-05-26 10:54:52 +00:00
var next, nextToken, prevToken;
if (['linebreak', 'whitespace'].indexOf(token.type) > -1) {
prevToken = i == 0 ? null : tokens[i - 1];
2012-05-25 22:02:28 +00:00
next = i + 1;
while (
2012-05-26 10:54:52 +00:00
next < length && ['comment', 'linebreak', 'whitespace']
.indexOf(tokens[next].type) > -1
2012-05-25 22:02:28 +00:00
) {
next++;
}
2012-05-25 22:02:28 +00:00
nextToken = next == length ? null : tokens[next];
}
2012-05-25 22:02:28 +00:00
if (token.type == 'linebreak') {
// replace a linebreak between two tokens that are identifiers
// or numbers or strings or unary operators or grouping
// operators with a single newline, otherwise remove it
if (
2012-05-26 10:54:52 +00:00
prevToken && nextToken && (
['identifier', 'number', 'string'].indexOf(prevToken.type) > -1
|| ['++', '--', ')', ']', '}'].indexOf(prevToken.value) > -1
2012-05-25 22:02:28 +00:00
) && (
2012-05-26 10:54:52 +00:00
['identifier', 'number', 'string'].indexOf(nextToken.type) > -1
|| ['+', '-', '++', '--', '~', '!', '(', '[', '{'].indexOf(nextToken.value) > -1
)
2012-05-25 22:02:28 +00:00
) {
ret += '\n';
}
} else if (token.type == 'whitespace') {
// replace whitespace between two tokens that are identifiers or
// numbers, or between a token that ends with "+" or "-" and one
// that begins with "+" or "-", with a single space, otherwise
// remove it
if (
2012-05-26 10:54:52 +00:00
prevToken && nextToken && ((
['identifier', 'number'].indexOf(prevToken.type) > -1
&& ['identifier', 'number'].indexOf(nextToken.type) > -1
2012-05-25 22:02:28 +00:00
) || (
2012-05-26 10:54:52 +00:00
['+', '-', '++', '--'].indexOf(prevToken.value) > -1
&& ['+', '-', '++', '--'].indexOf(nextToken.value) > -1
2012-05-25 22:02:28 +00:00
))
) {
ret += ' ';
}
} else if (token.type != 'comment') {
// remove comments and leave all other tokens untouched
2012-05-26 10:54:52 +00:00
ret += token.value;
}
2012-05-25 22:02:28 +00:00
});
return ret;
}
};
/*@
Ox.test <f> Takes JavaScript, runs inline tests, returns results
(source, callback) -> <[o]> Array of results
(file, callback) -> <u> undefined
(files, callback) -> <u> undefined
(doc, callback) -> <u> undefined
(docs, callback) -> <u> undefined
source <s> JavaScript source
file <s> JavaScript file
files <[s]> Array of JavaScript files
doc <o> Documentation object (as returned by Ox.doc)
docs <[o]> Array of documentation objects (as returned by Ox.doc)
callback <f> Callback function
results <[o]> Array of results
actual <s> Actual result
expected <s> Expected result
name <s> Item name
section <s|u> Section in the file
statement <s> Test statement
passed <b> True if actual result and expected result are equal
.data <o> undocumented
<script>
Ox.test.foo = function(item) {
return item.bar / item.baz;
};
2012-05-30 08:19:30 +00:00
Ox.test.source = [
'/*@',
'Ox.test.foo <f> Returns an items\'s bar per baz',
' Bar per baz is a good indicator of an item\'s foo-ness.',
' (item) -> <n> Bar per baz, or NaN',
' item <o> Any item',
' > Ox.test.foo({bar: 1, baz: 10})',
' 0.1',
' > Ox.test.foo({})',
' NaN',
'@*' + '/',
'Ox.test.foo = function(item) {',
' return item.bar / item.baz;',
'};'
].join('\n');
</script>
> Ox.test(Ox.test.source, function(r) { Ox.test(r[0].passed, true); })
undefined
@*/
Ox.test = function(argument, callback) {
// Ansynchronous functions can be tested by calling Ox.test(actual,
// expected) in the callback. If Ox.test is called inside a test statement
// (unless at the beginning of the statement, which is a test for Ox.test),
// the call to Ox.test is patched by inserting the test statement string as
// the first argument of the Ox.test call, and Ox.test will branch when
// called with three arguments.
function runTests(items) {
var id = Ox.uid(), regexp = /(.+Ox\.test\()/, results = [];
// We have to create a globally accessible object so that synchronous
// and asynchronous tests can read, write and return the same data.
Ox.test.data[id] = {
callback: callback,
done: false,
results: results,
tests: {}
};
items.forEach(function(item) {
item.tests && item.tests.some(function(test) {
return test.expected;
}) && item.tests.forEach(function(test) {
var actual, isAsync = regexp.test(test.statement);
if (isAsync) {
// Add a pending test
Ox.test.data[id].tests[test.statement] = {
name: item.name,
section: item.section
};
// Patch the test statement
test.statement = test.statement.replace(
regexp,
"$1'" + test.statement.replace(/'/g, "\\'") + "', "
);
}
if (test.expected || test.statement.match(/Ox\.test\./)) {
// Eval the statement, unless it's a script tag that doesn't
// add a property to Ox.test
Ox.Log('TEST', test.statement);
actual = eval(test.statement);
}
if (!isAsync && test.expected) {
Ox.test.data[id].results.push({
actual: JSON.stringify(actual),
expected: test.expected,
name: item.name,
section: item.section,
statement: test.statement,
passed: Ox.isEqual(
actual, eval('(' + test.expected + ')')
)
});
}
});
});
Ox.test.data[id].done = true;
if (Ox.isEmpty(Ox.test.data[id].tests)) {
callback(Ox.test.data[id].results);
}
}
if (arguments.length == 2) {
if (Ox.typeOf(argument) == 'string' && Ox.contains(argument, '\n')) {
// source code
runTests(Ox.doc(argument))
} else {
argument = Ox.makeArray(argument);
if (Ox.typeOf(argument[0]) == 'string') {
// files
Ox.doc(argument, runTests);
} else {
// doc objects
runTests(argument);
}
}
} else {
var statement = arguments[0],
result = arguments[1],
expected = arguments[2],
id, test;
Ox.forEach(Ox.test.data, function(v, k) {
if (v.tests[statement]) {
id = k;
test = v.tests[statement];
Ox.Break();
}
});
Ox.test.data[id].results.push(Ox.extend(test, {
actual: result,
expected: expected,
statement: statement,
passed: Ox.isEqual(result, expected)
}));
delete Ox.test.data[id].tests[statement];
if (Ox.test.data[id].done && Ox.isEmpty(Ox.test.data[id].tests)) {
Ox.test.data[id].callback(Ox.test.data[id].results);
}
}
};
Ox.test.data = {};
/*@
Ox.tokenize <f> Tokenizes JavaScript
(source) -> <[o]> Array of tokens
2012-05-26 10:54:52 +00:00
column <n> Column of the token
line <n> Line of the token
type <s> Type of the token
Type can be <code>"comment"</code>, <code>"error"</code>,
<code>"identifier"</code>, <code>"linebreak"</code>,
<code>"number"</code>, <code>"operator"</code>,
<code>"regexp"</code>, <code>"string"</code> or
<code>"whitespace"</code>
2012-05-26 10:54:52 +00:00
value <s> Value of the token
source <s> JavaScript source code
2012-05-26 11:02:50 +00:00
> Ox.tokenize('// comment\nvar foo = bar / baz;').length
14
> Ox.tokenize('return /foo/g;')[2].value.length
6
@*/
2012-05-25 22:18:50 +00:00
// FIXME: numbers (hex, exp, etc.)
Ox.tokenize = (function() {
2012-05-26 10:54:52 +00:00
// see https://github.com/mozilla/narcissus/blob/master/lib/lexer.js
2012-05-26 10:54:52 +00:00
var comment = ['//', '/*'],
identifier = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$_',
linebreak = '\n\r',
number = '0123456789',
operator = [
// arithmetic
'+', '-', '*', '/', '%', '++', '--',
// assignment
'=', '+=', '-=', '*=', '/=', '%=',
'&=', '|=', '^=', '<<=', '>>=', '>>>=',
// bitwise
'&', '|', '^', '~', '<<', '>>', '>>>',
// comparison
'==', '!=', '===', '!==', '>', '>=', '<', '<=',
// conditional
'?', ':',
// grouping
'(', ')', '[', ']', '{', '}',
// logical
'&&', '||', '!',
// other
'.', ',', ';'
],
2011-10-31 14:14:54 +00:00
regexp = 'abcdefghijklmnopqrstuvwxyz',
string = '\'"',
2012-05-26 10:54:52 +00:00
whitespace = ' \t';
2012-05-26 10:54:52 +00:00
function isRegExp(tokens) {
// Returns true if the current token is the beginning of a RegExp, as
// opposed to the beginning of an operator
var i = tokens.length - 1, isRegExp, token
// Scan back to the previous significant token, or to the beginning of
// the source
while (i >= 0 && [
'comment', 'linebreak', 'whitespace'
].indexOf(tokens[i].type) > -1) {
i--;
}
if (i == -1) {
// Source begins with a forward slash
isRegExp = true;
} else {
token = tokens[i];
isRegExp = (
token.type == 'identifier'
&& Ox.identify(token.value) == 'keyword'
&& ['false', 'null', 'true'].indexOf(token.value) == -1
) || (
token.type == 'operator'
&& ['++', '--', ')', ']', '}'].indexOf(token.value) == -1
)
}
return isRegExp;
}
2012-05-26 10:54:52 +00:00
return function(source) {
var char,
column = 1,
2012-05-26 10:54:52 +00:00
cursor = 0,
delimiter,
length = source.length,
line = 1,
2012-05-26 10:54:52 +00:00
lines,
next,
tokens = [],
start,
type,
value;
source = source.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
2012-05-26 10:54:52 +00:00
while (cursor < length) {
start = cursor;
char = source[cursor];
if (comment.indexOf(delimiter = char + source[cursor + 1]) > -1) {
type = 'comment';
++cursor;
while (char = source[++cursor]) {
if (delimiter == '//' && char == '\n') {
break;
} else if (delimiter == '/*' && char + source[cursor + 1] == '*/') {
cursor += 2;
break;
}
}
} else if (identifier.indexOf(char) > -1) {
type = 'identifier';
2012-05-26 10:54:52 +00:00
while ((identifier + number).indexOf(source[++cursor]) > -1) {}
} else if (linebreak.indexOf(char) > -1) {
type = 'linebreak';
2012-05-26 10:54:52 +00:00
while (linebreak.indexOf(source[++cursor]) > -1) {}
} else if (number.indexOf(char) > -1) {
type = 'number';
2012-05-26 10:54:52 +00:00
while ((number + '.').indexOf(source[++cursor]) > -1) {}
} else if (char == '/' && isRegExp(tokens)) {
type = 'regexp';
while ((char = source[++cursor]) != '/' && cursor < length) {
char == '\\' && ++cursor;
}
while (regexp.indexOf(source[++cursor]) > -1) {}
} else if (operator.indexOf(char) > -1) {
type = 'operator';
while (operator.indexOf(char += source[++cursor]) > -1 && cursor < length) {}
} else if (string.indexOf(delimiter = char) > -1) {
type = 'string';
2012-05-26 10:54:52 +00:00
while ((char = source[++cursor]) != delimiter && cursor < length) {
char == '\\' && ++cursor;
}
++cursor;
} else if (whitespace.indexOf(char) > -1) {
type = 'whitespace';
2012-05-26 10:54:52 +00:00
while (whitespace.indexOf(source[++cursor]) > -1) {}
} else {
type = 'error';
++cursor;
}
2012-05-26 10:54:52 +00:00
value = source.slice(start, cursor);
if (
type == 'error' && tokens.length
&& tokens[tokens.length - 1].type == 'error'
) {
tokens[tokens.length - 1].value += value;
} else {
tokens.push(
{column: column, line: line, type: type, value: value}
);
}
2012-05-26 10:54:52 +00:00
if (type == 'comment') {
lines = value.split('\n');
column = lines[lines.length - 1].length;
line += lines.length - 1;
} else if (type == 'linebreak') {
column = 1;
2012-05-26 10:54:52 +00:00
line += value.length;
} else {
2012-05-26 10:54:52 +00:00
column += value.length;
}
}
return tokens;
};
2011-10-31 14:14:54 +00:00
}());