acorn.js | |
|---|---|
| Acorn is a tiny, fast JavaScript parser written in JavaScript. Acorn was written by Marijn Haverbeke and released under an MIT license. The Unicode regexps (for identifiers and whitespace) were taken from Esprima by Ariya Hidayat. Git repositories for Acorn are available at
Please use the github bug tracker to report issues. This file defines the main parser interface. The library also comes with a error-tolerant parser and an abstract syntax tree walker, defined in other files. | (function(root, mod) {
if (typeof exports == "object" && typeof module == "object") return mod(exports); // CommonJS
if (typeof define == "function" && define.amd) return define(["exports"], mod); // AMD
mod(root.acorn || (root.acorn = {})); // Plain browser env
})(this, function(exports) {
"use strict";
exports.version = "0.3.2"; |
| The main exported interface (under | var options, input, inputLen, sourceFile;
exports.parse = function(inpt, opts) {
input = String(inpt); inputLen = input.length;
setOptions(opts);
initTokenState();
return parseTopLevel(options.program);
}; |
| A second optional argument can be given to further configure the parser process. These options are recognized: | var defaultOptions = exports.defaultOptions = { |
|
| ecmaVersion: 5, |
| Turn on | strictSemicolons: false, |
| When | allowTrailingCommas: true, |
| By default, reserved words are not enforced. Enable
| forbidReserved: false, |
| When | locations: false, |
| A function can be passed as | onComment: null, |
| Nodes have their start and end characters offsets recorded in
| ranges: false, |
| It is possible to parse multiple files into a single AST by
passing the tree produced by parsing the first file as
| program: null, |
| When | sourceFile: null
};
function setOptions(opts) {
options = opts || {};
for (var opt in defaultOptions) if (!Object.prototype.hasOwnProperty.call(options, opt))
options[opt] = defaultOptions[opt];
sourceFile = options.sourceFile || null;
} |
| The | var getLineInfo = exports.getLineInfo = function(input, offset) {
for (var line = 1, cur = 0;;) {
lineBreak.lastIndex = cur;
var match = lineBreak.exec(input);
if (match && match.index < offset) {
++line;
cur = match.index + match[0].length;
} else break;
}
return {line: line, column: offset - cur};
}; |
| Acorn is organized as a tokenizer and a recursive-descent parser.
The | exports.tokenize = function(inpt, opts) {
input = String(inpt); inputLen = input.length;
setOptions(opts);
initTokenState();
var t = {};
function getToken(forceRegexp) {
readToken(forceRegexp);
t.start = tokStart; t.end = tokEnd;
t.startLoc = tokStartLoc; t.endLoc = tokEndLoc;
t.type = tokType; t.value = tokVal;
return t;
}
getToken.jumpTo = function(pos, reAllowed) {
tokPos = pos;
if (options.locations) {
tokCurLine = 1;
tokLineStart = lineBreak.lastIndex = 0;
var match;
while ((match = lineBreak.exec(input)) && match.index < pos) {
++tokCurLine;
tokLineStart = match.index + match[0].length;
}
}
tokRegexpAllowed = reAllowed;
skipSpace();
};
return getToken;
}; |
| State is kept in (closure-)global variables. We already saw the
| |
| The current position of the tokenizer in the input. | var tokPos; |
| The start and end offsets of the current token. | var tokStart, tokEnd; |
| When | var tokStartLoc, tokEndLoc; |
| The type and value of the current token. Token types are objects,
named by variables against which they can be compared, and
holding properties that describe them (indicating, for example,
the precedence of an infix operator, and the original name of a
keyword token). The kind of value that's held in | var tokType, tokVal; |
| Interal state for the tokenizer. To distinguish between division
operators and regular expressions, it remembers whether the last
token was one that is allowed to be followed by an expression.
(If it is, a slash is probably a regexp, if it isn't it's a
division operator. See the | var tokRegexpAllowed; |
| When | var tokCurLine, tokLineStart; |
| These store the position of the previous token, which is useful
when finishing a node and assigning its | var lastStart, lastEnd, lastEndLoc; |
| This is the parser's state. | var inFunction, labels, strict; |
| This function is used to raise exceptions on parse errors. It
takes an offset integer (into the current | function raise(pos, message) {
var loc = getLineInfo(input, pos);
message += " (" + loc.line + ":" + loc.column + ")";
var err = new SyntaxError(message);
err.pos = pos; err.loc = loc; err.raisedAt = tokPos;
throw err;
} |
| Reused empty array added for node fields that are always empty. | var empty = []; |
Token types | |
| The assignment of fine-grained, information-carrying type objects allows the tokenizer to store the information it has about a token in a way that is very cheap for the parser to look up. | |
| All token type variables start with an underscore, to make them easy to recognize. | |
| These are the general types. The | var _num = {type: "num"}, _regexp = {type: "regexp"}, _string = {type: "string"};
var _name = {type: "name"}, _eof = {type: "eof"}; |
| Keyword tokens. The The
| var _break = {keyword: "break"}, _case = {keyword: "case", beforeExpr: true}, _catch = {keyword: "catch"};
var _continue = {keyword: "continue"}, _debugger = {keyword: "debugger"}, _default = {keyword: "default"};
var _do = {keyword: "do", isLoop: true}, _else = {keyword: "else", beforeExpr: true};
var _finally = {keyword: "finally"}, _for = {keyword: "for", isLoop: true}, _function = {keyword: "function"};
var _if = {keyword: "if"}, _return = {keyword: "return", beforeExpr: true}, _switch = {keyword: "switch"};
var _throw = {keyword: "throw", beforeExpr: true}, _try = {keyword: "try"}, _var = {keyword: "var"};
var _while = {keyword: "while", isLoop: true}, _with = {keyword: "with"}, _new = {keyword: "new", beforeExpr: true};
var _this = {keyword: "this"}; |
| The keywords that denote values. | var _null = {keyword: "null", atomValue: null}, _true = {keyword: "true", atomValue: true};
var _false = {keyword: "false", atomValue: false}; |
| Some keywords are treated as regular operators. | var _in = {keyword: "in", binop: 7, beforeExpr: true}; |
| Map keyword names to token types. | var keywordTypes = {"break": _break, "case": _case, "catch": _catch,
"continue": _continue, "debugger": _debugger, "default": _default,
"do": _do, "else": _else, "finally": _finally, "for": _for,
"function": _function, "if": _if, "return": _return, "switch": _switch,
"throw": _throw, "try": _try, "var": _var, "while": _while, "with": _with,
"null": _null, "true": _true, "false": _false, "new": _new, "in": _in,
"instanceof": {keyword: "instanceof", binop: 7, beforeExpr: true}, "this": _this,
"typeof": {keyword: "typeof", prefix: true, beforeExpr: true},
"void": {keyword: "void", prefix: true, beforeExpr: true},
"delete": {keyword: "delete", prefix: true, beforeExpr: true}}; |
| Punctuation token types. Again, the | var _bracketL = {type: "[", beforeExpr: true}, _bracketR = {type: "]"}, _braceL = {type: "{", beforeExpr: true};
var _braceR = {type: "}"}, _parenL = {type: "(", beforeExpr: true}, _parenR = {type: ")"};
var _comma = {type: ",", beforeExpr: true}, _semi = {type: ";", beforeExpr: true};
var _colon = {type: ":", beforeExpr: true}, _dot = {type: "."}, _question = {type: "?", beforeExpr: true}; |
| Operators. These carry several kinds of properties to help the parser use them properly (the presence of these properties is what categorizes them as operators).
| var _slash = {binop: 10, beforeExpr: true}, _eq = {isAssign: true, beforeExpr: true};
var _assign = {isAssign: true, beforeExpr: true}, _plusmin = {binop: 9, prefix: true, beforeExpr: true};
var _incdec = {postfix: true, prefix: true, isUpdate: true}, _prefix = {prefix: true, beforeExpr: true};
var _bin1 = {binop: 1, beforeExpr: true}, _bin2 = {binop: 2, beforeExpr: true};
var _bin3 = {binop: 3, beforeExpr: true}, _bin4 = {binop: 4, beforeExpr: true};
var _bin5 = {binop: 5, beforeExpr: true}, _bin6 = {binop: 6, beforeExpr: true};
var _bin7 = {binop: 7, beforeExpr: true}, _bin8 = {binop: 8, beforeExpr: true};
var _bin10 = {binop: 10, beforeExpr: true}; |
| Provide access to the token types for external users of the tokenizer. | exports.tokTypes = {bracketL: _bracketL, bracketR: _bracketR, braceL: _braceL, braceR: _braceR,
parenL: _parenL, parenR: _parenR, comma: _comma, semi: _semi, colon: _colon,
dot: _dot, question: _question, slash: _slash, eq: _eq, name: _name, eof: _eof,
num: _num, regexp: _regexp, string: _string};
for (var kw in keywordTypes) exports.tokTypes["_" + kw] = keywordTypes[kw]; |
| This is a trick taken from Esprima. It turns out that, on
non-Chrome browsers, to check whether a string is in a set, a
predicate containing a big ugly It starts by sorting the words by length. | function makePredicate(words) {
words = words.split(" ");
var f = "", cats = [];
out: for (var i = 0; i < words.length; ++i) {
for (var j = 0; j < cats.length; ++j)
if (cats[j][0].length == words[i].length) {
cats[j].push(words[i]);
continue out;
}
cats.push([words[i]]);
}
function compareTo(arr) {
if (arr.length == 1) return f += "return str === " + JSON.stringify(arr[0]) + ";";
f += "switch(str){";
for (var i = 0; i < arr.length; ++i) f += "case " + JSON.stringify(arr[i]) + ":";
f += "return true}return false;";
} |
| When there are more than three length categories, an outer switch first dispatches on the lengths, to save on comparisons. | if (cats.length > 3) {
cats.sort(function(a, b) {return b.length - a.length;});
f += "switch(str.length){";
for (var i = 0; i < cats.length; ++i) {
var cat = cats[i];
f += "case " + cat[0].length + ":";
compareTo(cat);
}
f += "}"; |
| Otherwise, simply generate a flat | } else {
compareTo(words);
}
return new Function("str", f);
} |
| The ECMAScript 3 reserved word list. | var isReservedWord3 = makePredicate("abstract boolean byte char class double enum export extends final float goto implements import int interface long native package private protected public short static super synchronized throws transient volatile"); |
| ECMAScript 5 reserved words. | var isReservedWord5 = makePredicate("class enum extends super const export import"); |
| The additional reserved words in strict mode. | var isStrictReservedWord = makePredicate("implements interface let package private protected public static yield"); |
| The forbidden variable names in strict mode. | var isStrictBadIdWord = makePredicate("eval arguments"); |
| And the keywords. | var isKeyword = makePredicate("break case catch continue debugger default do else finally for function if return switch throw try var while with null true false instanceof typeof void delete new in this"); |
Character categories | |
| Big ugly regular expressions that match characters in the whitespace, identifier, and identifier-start categories. These are only applied when a character is found to actually have a code point above 128. | var nonASCIIwhitespace = /[\u1680\u180e\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff]/;
var nonASCIIidentifierStartChars = "\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\u02c1\u02c6-\u02d1\u02e0-\u02e4\u02ec\u02ee\u0370-\u0374\u0376\u0377\u037a-\u037d\u0386\u0388-\u038a\u038c\u038e-\u03a1\u03a3-\u03f5\u03f7-\u0481\u048a-\u0527\u0531-\u0556\u0559\u0561-\u0587\u05d0-\u05ea\u05f0-\u05f2\u0620-\u064a\u066e\u066f\u0671-\u06d3\u06d5\u06e5\u06e6\u06ee\u06ef\u06fa-\u06fc\u06ff\u0710\u0712-\u072f\u074d-\u07a5\u07b1\u07ca-\u07ea\u07f4\u07f5\u07fa\u0800-\u0815\u081a\u0824\u0828\u0840-\u0858\u08a0\u08a2-\u08ac\u0904-\u0939\u093d\u0950\u0958-\u0961\u0971-\u0977\u0979-\u097f\u0985-\u098c\u098f\u0990\u0993-\u09a8\u09aa-\u09b0\u09b2\u09b6-\u09b9\u09bd\u09ce\u09dc\u09dd\u09df-\u09e1\u09f0\u09f1\u0a05-\u0a0a\u0a0f\u0a10\u0a13-\u0a28\u0a2a-\u0a30\u0a32\u0a33\u0a35\u0a36\u0a38\u0a39\u0a59-\u0a5c\u0a5e\u0a72-\u0a74\u0a85-\u0a8d\u0a8f-\u0a91\u0a93-\u0aa8\u0aaa-\u0ab0\u0ab2\u0ab3\u0ab5-\u0ab9\u0abd\u0ad0\u0ae0\u0ae1\u0b05-\u0b0c\u0b0f\u0b10\u0b13-\u0b28\u0b2a-\u0b30\u0b32\u0b33\u0b35-\u0b39\u0b3d\u0b5c\u0b5d\u0b5f-\u0b61\u0b71\u0b83\u0b85-\u0b8a\u0b8e-\u0b90\u0b92-\u0b95\u0b99\u0b9a\u0b9c\u0b9e\u0b9f\u0ba3\u0ba4\u0ba8-\u0baa\u0bae-\u0bb9\u0bd0\u0c05-\u0c0c\u0c0e-\u0c10\u0c12-\u0c28\u0c2a-\u0c33\u0c35-\u0c39\u0c3d\u0c58\u0c59\u0c60\u0c61\u0c85-\u0c8c\u0c8e-\u0c90\u0c92-\u0ca8\u0caa-\u0cb3\u0cb5-\u0cb9\u0cbd\u0cde\u0ce0\u0ce1\u0cf1\u0cf2\u0d05-\u0d0c\u0d0e-\u0d10\u0d12-\u0d3a\u0d3d\u0d4e\u0d60\u0d61\u0d7a-\u0d7f\u0d85-\u0d96\u0d9a-\u0db1\u0db3-\u0dbb\u0dbd\u0dc0-\u0dc6\u0e01-\u0e30\u0e32\u0e33\u0e40-\u0e46\u0e81\u0e82\u0e84\u0e87\u0e88\u0e8a\u0e8d\u0e94-\u0e97\u0e99-\u0e9f\u0ea1-\u0ea3\u0ea5\u0ea7\u0eaa\u0eab\u0ead-\u0eb0\u0eb2\u0eb3\u0ebd\u0ec0-\u0ec4\u0ec6\u0edc-\u0edf\u0f00\u0f40-\u0f47\u0f49-\u0f6c\u0f88-\u0f8c\u1000-\u102a\u103f\u1050-\u1055\u105a-\u105d\u1061\u1065\u1066\u106e-\u1070\u1075-\u1081\u108e\u10a0-\u10c5\u10c7\u10cd\u10d0-\u10fa\u10fc-\u1248\u124a-\u124d\u1250-\u1256\u1258\u125a-\u125d\u1260-\u1288\u128a-\u128d\u1290-\u12b0\u12b2-\u12b5\u12b8-\u12be\u12c0\u12c2-\u12c5\u12c8-\u12d6\u12d8-\u1310\u1312-\u1315\u1318-\u135a\u1380-\u138f\u13a0-\u13f4\u1401-\u166c\u166f-\u167f\u1681-\u169a\u16a0-\u16ea\u16ee-\u16f0\u1700-\u170c\u170e-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176c\u176e-\u1770\u1780-\u17b3\u17d7\u17dc\u1820-\u1877\u1880-\u18a8\u18aa\u18b0-\u18f5\u1900-\u191c\u1950-\u196d\u1970-\u1974\u1980-\u19ab\u19c1-\u19c7\u1a00-\u1a16\u1a20-\u1a54\u1aa7\u1b05-\u1b33\u1b45-\u1b4b\u1b83-\u1ba0\u1bae\u1baf\u1bba-\u1be5\u1c00-\u1c23\u1c4d-\u1c4f\u1c5a-\u1c7d\u1ce9-\u1cec\u1cee-\u1cf1\u1cf5\u1cf6\u1d00-\u1dbf\u1e00-\u1f15\u1f18-\u1f1d\u1f20-\u1f45\u1f48-\u1f4d\u1f50-\u1f57\u1f59\u1f5b\u1f5d\u1f5f-\u1f7d\u1f80-\u1fb4\u1fb6-\u1fbc\u1fbe\u1fc2-\u1fc4\u1fc6-\u1fcc\u1fd0-\u1fd3\u1fd6-\u1fdb\u1fe0-\u1fec\u1ff2-\u1ff4\u1ff6-\u1ffc\u2071\u207f\u2090-\u209c\u2102\u2107\u210a-\u2113\u2115\u2119-\u211d\u2124\u2126\u2128\u212a-\u212d\u212f-\u2139\u213c-\u213f\u2145-\u2149\u214e\u2160-\u2188\u2c00-\u2c2e\u2c30-\u2c5e\u2c60-\u2ce4\u2ceb-\u2cee\u2cf2\u2cf3\u2d00-\u2d25\u2d27\u2d2d\u2d30-\u2d67\u2d6f\u2d80-\u2d96\u2da0-\u2da6\u2da8-\u2dae\u2db0-\u2db6\u2db8-\u2dbe\u2dc0-\u2dc6\u2dc8-\u2dce\u2dd0-\u2dd6\u2dd8-\u2dde\u2e2f\u3005-\u3007\u3021-\u3029\u3031-\u3035\u3038-\u303c\u3041-\u3096\u309d-\u309f\u30a1-\u30fa\u30fc-\u30ff\u3105-\u312d\u3131-\u318e\u31a0-\u31ba\u31f0-\u31ff\u3400-\u4db5\u4e00-\u9fcc\ua000-\ua48c\ua4d0-\ua4fd\ua500-\ua60c\ua610-\ua61f\ua62a\ua62b\ua640-\ua66e\ua67f-\ua697\ua6a0-\ua6ef\ua717-\ua71f\ua722-\ua788\ua78b-\ua78e\ua790-\ua793\ua7a0-\ua7aa\ua7f8-\ua801\ua803-\ua805\ua807-\ua80a\ua80c-\ua822\ua840-\ua873\ua882-\ua8b3\ua8f2-\ua8f7\ua8fb\ua90a-\ua925\ua930-\ua946\ua960-\ua97c\ua984-\ua9b2\ua9cf\uaa00-\uaa28\uaa40-\uaa42\uaa44-\uaa4b\uaa60-\uaa76\uaa7a\uaa80-\uaaaf\uaab1\uaab5\uaab6\uaab9-\uaabd\uaac0\uaac2\uaadb-\uaadd\uaae0-\uaaea\uaaf2-\uaaf4\uab01-\uab06\uab09-\uab0e\uab11-\uab16\uab20-\uab26\uab28-\uab2e\uabc0-\uabe2\uac00-\ud7a3\ud7b0-\ud7c6\ud7cb-\ud7fb\uf900-\ufa6d\ufa70-\ufad9\ufb00-\ufb06\ufb13-\ufb17\ufb1d\ufb1f-\ufb28\ufb2a-\ufb36\ufb38-\ufb3c\ufb3e\ufb40\ufb41\ufb43\ufb44\ufb46-\ufbb1\ufbd3-\ufd3d\ufd50-\ufd8f\ufd92-\ufdc7\ufdf0-\ufdfb\ufe70-\ufe74\ufe76-\ufefc\uff21-\uff3a\uff41-\uff5a\uff66-\uffbe\uffc2-\uffc7\uffca-\uffcf\uffd2-\uffd7\uffda-\uffdc";
var nonASCIIidentifierChars = "\u0300-\u036f\u0483-\u0487\u0591-\u05bd\u05bf\u05c1\u05c2\u05c4\u05c5\u05c7\u0610-\u061a\u0620-\u0649\u0672-\u06d3\u06e7-\u06e8\u06fb-\u06fc\u0730-\u074a\u0800-\u0814\u081b-\u0823\u0825-\u0827\u0829-\u082d\u0840-\u0857\u08e4-\u08fe\u0900-\u0903\u093a-\u093c\u093e-\u094f\u0951-\u0957\u0962-\u0963\u0966-\u096f\u0981-\u0983\u09bc\u09be-\u09c4\u09c7\u09c8\u09d7\u09df-\u09e0\u0a01-\u0a03\u0a3c\u0a3e-\u0a42\u0a47\u0a48\u0a4b-\u0a4d\u0a51\u0a66-\u0a71\u0a75\u0a81-\u0a83\u0abc\u0abe-\u0ac5\u0ac7-\u0ac9\u0acb-\u0acd\u0ae2-\u0ae3\u0ae6-\u0aef\u0b01-\u0b03\u0b3c\u0b3e-\u0b44\u0b47\u0b48\u0b4b-\u0b4d\u0b56\u0b57\u0b5f-\u0b60\u0b66-\u0b6f\u0b82\u0bbe-\u0bc2\u0bc6-\u0bc8\u0bca-\u0bcd\u0bd7\u0be6-\u0bef\u0c01-\u0c03\u0c46-\u0c48\u0c4a-\u0c4d\u0c55\u0c56\u0c62-\u0c63\u0c66-\u0c6f\u0c82\u0c83\u0cbc\u0cbe-\u0cc4\u0cc6-\u0cc8\u0cca-\u0ccd\u0cd5\u0cd6\u0ce2-\u0ce3\u0ce6-\u0cef\u0d02\u0d03\u0d46-\u0d48\u0d57\u0d62-\u0d63\u0d66-\u0d6f\u0d82\u0d83\u0dca\u0dcf-\u0dd4\u0dd6\u0dd8-\u0ddf\u0df2\u0df3\u0e34-\u0e3a\u0e40-\u0e45\u0e50-\u0e59\u0eb4-\u0eb9\u0ec8-\u0ecd\u0ed0-\u0ed9\u0f18\u0f19\u0f20-\u0f29\u0f35\u0f37\u0f39\u0f41-\u0f47\u0f71-\u0f84\u0f86-\u0f87\u0f8d-\u0f97\u0f99-\u0fbc\u0fc6\u1000-\u1029\u1040-\u1049\u1067-\u106d\u1071-\u1074\u1082-\u108d\u108f-\u109d\u135d-\u135f\u170e-\u1710\u1720-\u1730\u1740-\u1750\u1772\u1773\u1780-\u17b2\u17dd\u17e0-\u17e9\u180b-\u180d\u1810-\u1819\u1920-\u192b\u1930-\u193b\u1951-\u196d\u19b0-\u19c0\u19c8-\u19c9\u19d0-\u19d9\u1a00-\u1a15\u1a20-\u1a53\u1a60-\u1a7c\u1a7f-\u1a89\u1a90-\u1a99\u1b46-\u1b4b\u1b50-\u1b59\u1b6b-\u1b73\u1bb0-\u1bb9\u1be6-\u1bf3\u1c00-\u1c22\u1c40-\u1c49\u1c5b-\u1c7d\u1cd0-\u1cd2\u1d00-\u1dbe\u1e01-\u1f15\u200c\u200d\u203f\u2040\u2054\u20d0-\u20dc\u20e1\u20e5-\u20f0\u2d81-\u2d96\u2de0-\u2dff\u3021-\u3028\u3099\u309a\ua640-\ua66d\ua674-\ua67d\ua69f\ua6f0-\ua6f1\ua7f8-\ua800\ua806\ua80b\ua823-\ua827\ua880-\ua881\ua8b4-\ua8c4\ua8d0-\ua8d9\ua8f3-\ua8f7\ua900-\ua909\ua926-\ua92d\ua930-\ua945\ua980-\ua983\ua9b3-\ua9c0\uaa00-\uaa27\uaa40-\uaa41\uaa4c-\uaa4d\uaa50-\uaa59\uaa7b\uaae0-\uaae9\uaaf2-\uaaf3\uabc0-\uabe1\uabec\uabed\uabf0-\uabf9\ufb20-\ufb28\ufe00-\ufe0f\ufe20-\ufe26\ufe33\ufe34\ufe4d-\ufe4f\uff10-\uff19\uff3f";
var nonASCIIidentifierStart = new RegExp("[" + nonASCIIidentifierStartChars + "]");
var nonASCIIidentifier = new RegExp("[" + nonASCIIidentifierStartChars + nonASCIIidentifierChars + "]"); |
| Whether a single character denotes a newline. | var newline = /[\n\r\u2028\u2029]/; |
| Matches a whole line break (where CRLF is considered a single line break). Used to count lines. | var lineBreak = /\r\n|[\n\r\u2028\u2029]/g; |
| Test whether a given character code starts an identifier. | var isIdentifierStart = exports.isIdentifierStart = function(code) {
if (code < 65) return code === 36;
if (code < 91) return true;
if (code < 97) return code === 95;
if (code < 123)return true;
return code >= 0xaa && nonASCIIidentifierStart.test(String.fromCharCode(code));
}; |
| Test whether a given character is part of an identifier. | var isIdentifierChar = exports.isIdentifierChar = function(code) {
if (code < 48) return code === 36;
if (code < 58) return true;
if (code < 65) return false;
if (code < 91) return true;
if (code < 97) return code === 95;
if (code < 123)return true;
return code >= 0xaa && nonASCIIidentifier.test(String.fromCharCode(code));
}; |
Tokenizer | |
| These are used when | function line_loc_t() {
this.line = tokCurLine;
this.column = tokPos - tokLineStart;
} |
| Reset the token state. Used at the start of a parse. | function initTokenState() {
tokCurLine = 1;
tokPos = tokLineStart = 0;
tokRegexpAllowed = true;
skipSpace();
} |
| Called at the end of every token. Sets | function finishToken(type, val) {
tokEnd = tokPos;
if (options.locations) tokEndLoc = new line_loc_t;
tokType = type;
skipSpace();
tokVal = val;
tokRegexpAllowed = type.beforeExpr;
}
function skipBlockComment() {
var startLoc = options.onComment && options.locations && new line_loc_t;
var start = tokPos, end = input.indexOf("*/", tokPos += 2);
if (end === -1) raise(tokPos - 2, "Unterminated comment");
tokPos = end + 2;
if (options.locations) {
lineBreak.lastIndex = start;
var match;
while ((match = lineBreak.exec(input)) && match.index < tokPos) {
++tokCurLine;
tokLineStart = match.index + match[0].length;
}
}
if (options.onComment)
options.onComment(true, input.slice(start + 2, end), start, tokPos,
startLoc, options.locations && new line_loc_t);
}
function skipLineComment() {
var start = tokPos;
var startLoc = options.onComment && options.locations && new line_loc_t;
var ch = input.charCodeAt(tokPos+=2);
while (tokPos < inputLen && ch !== 10 && ch !== 13 && ch !== 8232 && ch !== 8329) {
++tokPos;
ch = input.charCodeAt(tokPos);
}
if (options.onComment)
options.onComment(false, input.slice(start + 2, tokPos), start, tokPos,
startLoc, options.locations && new line_loc_t);
} |
| Called at the start of the parse and after every token. Skips whitespace and comments, and. | function skipSpace() {
while (tokPos < inputLen) {
var ch = input.charCodeAt(tokPos);
if (ch === 32) { // ' '
++tokPos;
} else if (ch === 13) {
++tokPos;
var next = input.charCodeAt(tokPos);
if (next === 10) {
++tokPos;
}
if (options.locations) {
++tokCurLine;
tokLineStart = tokPos;
}
} else if (ch === 10) {
++tokPos;
++tokCurLine;
tokLineStart = tokPos;
} else if (ch > 8 && ch < 14) {
++tokPos;
} else if (ch === 47) { // '/'
var next = input.charCodeAt(tokPos + 1);
if (next === 42) { // '*'
skipBlockComment();
} else if (next === 47) { // '/'
skipLineComment();
} else break;
} else if (ch === 160) { // '\xa0'
++tokPos;
} else if (ch >= 5760 && nonASCIIwhitespace.test(String.fromCharCode(ch))) {
++tokPos;
} else {
break;
}
}
} |
Token reading | |
| This is the function that is called to fetch the next token. It is somewhat obscure, because it works in character codes rather than characters, and because operator parsing has been inlined into it. All in the name of speed. The | function readToken_dot() {
var next = input.charCodeAt(tokPos + 1);
if (next >= 48 && next <= 57) return readNumber(true);
++tokPos;
return finishToken(_dot);
}
function readToken_slash() { // '/'
var next = input.charCodeAt(tokPos + 1);
if (tokRegexpAllowed) {++tokPos; return readRegexp();}
if (next === 61) return finishOp(_assign, 2);
return finishOp(_slash, 1);
}
function readToken_mult_modulo() { // '%*'
var next = input.charCodeAt(tokPos + 1);
if (next === 61) return finishOp(_assign, 2);
return finishOp(_bin10, 1);
}
function readToken_pipe_amp(code) { // '|&'
var next = input.charCodeAt(tokPos + 1);
if (next === code) return finishOp(code === 124 ? _bin1 : _bin2, 2);
if (next === 61) return finishOp(_assign, 2);
return finishOp(code === 124 ? _bin3 : _bin5, 1);
}
function readToken_caret() { // '^'
var next = input.charCodeAt(tokPos + 1);
if (next === 61) return finishOp(_assign, 2);
return finishOp(_bin4, 1);
}
function readToken_plus_min(code) { // '+-'
var next = input.charCodeAt(tokPos + 1);
if (next === code) {
if (next == 45 && input.charCodeAt(tokPos + 2) == 62 && lastEnd < tokLineStart) { |
| A '-->' line comment | tokPos += 3;
skipLineComment();
skipSpace();
return readToken();
}
return finishOp(_incdec, 2);
}
if (next === 61) return finishOp(_assign, 2);
return finishOp(_plusmin, 1);
}
function readToken_lt_gt(code) { // '<>'
var next = input.charCodeAt(tokPos + 1);
var size = 1;
if (next === code) {
size = code === 62 && input.charCodeAt(tokPos+2) === 62 ? 3 : 2;
if (input.charCodeAt(tokPos + size) === 61) return finishOp(_assign, size + 1);
return finishOp(_bin8, size);
}
if (next == 33 && code == 60 && input.charCodeAt(tokPos + 2) == 45 && input.charCodeAt(tokPos + 3) == 45) { |
| ' |