diff --git a/acorn.js b/acorn.js index a2f4aa1a91..f7959aef8e 100644 --- a/acorn.js +++ b/acorn.js @@ -136,8 +136,12 @@ getToken.jumpTo = function(pos, reAllowed) { tokPos = pos; if (options.locations) { - tokCurLine = tokLineStart = 0; - tokLineStartNext = nextLineStart(); + tokCurLine = tokLineStart = lineBreak.lastIndex = 0; + var match; + while ((match = lineBreak.exec(input)) && match.index < pos) { + ++tokCurLine; + tokLineStart = match.index + match[0].length; + } } var ch = input.charAt(pos - 1); tokRegexpAllowed = reAllowed; @@ -183,9 +187,9 @@ // When `options.locations` is true, these are used to keep // track of the current line, and know when a new line has been - // entered. See the `curLineLoc` function. + // entered. - var tokCurLine, tokLineStart, tokLineStartNext; + var tokCurLine, tokLineStart; // These store the position of the previous token, which is useful // when finishing a node and assigning its `end` position. @@ -429,36 +433,19 @@ // ## Tokenizer - // These are used when `options.locations` is on, in order to track - // the current line number and start of line offset, in order to set - // `tokStartLoc` and `tokEndLoc`. - - function nextLineStart() { - lineBreak.lastIndex = tokLineStart; - var match = lineBreak.exec(input); - return match ? match.index + match[0].length : input.length + 1; - } + // These are used when `options.locations` is on, for the + // `tokStartLoc` and `tokEndLoc` properties. function line_loc_t() { this.line = tokCurLine; this.column = tokPos - tokLineStart; } - function curLineLoc() { - while (tokLineStartNext <= tokPos) { - ++tokCurLine; - tokLineStart = tokLineStartNext; - tokLineStartNext = nextLineStart(); - } - return new line_loc_t(); - } - // Reset the token state. Used at the start of a parse. function initTokenState() { tokCurLine = 1; tokPos = tokLineStart = 0; - tokLineStartNext = nextLineStart(); tokRegexpAllowed = true; skipSpace(); } @@ -469,7 +456,7 @@ function finishToken(type, val) { tokEnd = tokPos; - if (options.locations) tokEndLoc = curLineLoc(); + if (options.locations) tokEndLoc = new line_loc_t; tokType = type; skipSpace(); tokVal = val; @@ -477,20 +464,26 @@ } function skipBlockComment() { - if (options.onComment && options.locations) - var startLoc = curLineLoc(); + var startLoc = options.onComment && options.locations && new line_loc_t; var start = tokPos, end = input.indexOf("*/", tokPos += 2); if (end === -1) raise(tokPos - 2, "Unterminated comment"); tokPos = end + 2; + if (options.locations) { + lineBreak.lastIndex = start; + var match; + while ((match = lineBreak.exec(input)) && match.index < tokPos) { + ++tokCurLine; + tokLineStart = match.index + match[0].length; + } + } if (options.onComment) options.onComment(true, input.slice(start + 2, end), start, tokPos, - startLoc, options.locations && curLineLoc()); + startLoc, options.locations && new line_loc_t); } function skipLineComment() { var start = tokPos; - if (options.onComment && options.locations) - var startLoc = curLineLoc(); + var startLoc = options.onComment && options.locations && new line_loc_t; var ch = input.charCodeAt(tokPos+=2); while (tokPos < inputLen && ch !== 10 && ch !== 13 && ch !== 8232 && ch !== 8329) { ++tokPos; @@ -498,7 +491,7 @@ } if (options.onComment) options.onComment(false, input.slice(start + 2, tokPos), start, tokPos, - startLoc, options.locations && curLineLoc()); + startLoc, options.locations && new line_loc_t); } // Called at the start of the parse and after every token. Skips @@ -507,7 +500,25 @@ function skipSpace() { while (tokPos < inputLen) { var ch = input.charCodeAt(tokPos); - if (ch === 47) { // '/' + if (ch === 32) { // ' ' + ++tokPos; + } else if(ch === 13) { + ++tokPos; + var next = input.charCodeAt(tokPos); + if(next === 10) { + ++tokPos; + } + if(options.locations) { + ++tokCurLine; + tokLineStart = tokPos; + } + } else if (ch === 10) { + ++tokPos; + ++tokCurLine; + tokLineStart = tokPos; + } else if(ch < 14 && ch > 8) { + ++tokPos; + } else if (ch === 47) { // '/' var next = input.charCodeAt(tokPos+1); if (next === 42) { // '*' skipBlockComment(); @@ -662,7 +673,7 @@ function readToken(forceRegexp) { tokStart = tokPos; - if (options.locations) tokStartLoc = curLineLoc(); + if (options.locations) tokStartLoc = new line_loc_t; if (forceRegexp) return readRegexp(); if (tokPos >= inputLen) return finishToken(_eof); @@ -809,7 +820,9 @@ case 102: rs_str.push(12); break; // 'f' -> '\f' case 48: rs_str.push(0); break; // 0 -> '\0' case 13: if (input.charCodeAt(tokPos) === 10) ++tokPos; // '\r\n' - case 10: break; // ' \n' + case 10: // ' \n' + if (options.locations) { tokLineStart = tokPos; ++tokCurLine; } + break; default: rs_str.push(ch); break; } } @@ -1042,7 +1055,7 @@ function parseTopLevel(program) { lastStart = lastEnd = tokPos; - if (options.locations) lastEndLoc = curLineLoc(); + if (options.locations) lastEndLoc = new line_loc_t; inFunction = strict = null; labels = []; readToken(); diff --git a/index.html b/index.html index 7b308ba7b8..4e3d209011 100644 --- a/index.html +++ b/index.html @@ -95,8 +95,12 @@ reset the internal state, and invalidate existing tokenizers.

getToken.jumpTo = function(pos, reAllowed) { tokPos = pos; if (options.locations) { - tokCurLine = tokLineStart = 0; - tokLineStartNext = nextLineStart(); + tokCurLine = tokLineStart = lineBreak.lastIndex = 0; + var match; + while ((match = lineBreak.exec(input)) && match.index < pos) { + ++tokCurLine; + tokLineStart = match.index + match[0].length; + } } var ch = input.charAt(pos - 1); tokRegexpAllowed = reAllowed; @@ -118,7 +122,7 @@ token was one that is allowed to be followed by an expression. division operator. See the parseStatement function for a caveat.)

  var tokRegexpAllowed;

When options.locations is true, these are used to keep track of the current line, and know when a new line has been -entered. See the curLineLoc function.

  var tokCurLine, tokLineStart, tokLineStartNext;

These store the position of the previous token, which is useful +entered.

  var tokCurLine, tokLineStart;

These store the position of the previous token, which is useful when finishing a node and assigning its end position.

  var lastStart, lastEnd, lastEndLoc;

This is the parser's state. inFunction is used to reject return statements outside of functions, labels to verify that break and continue have somewhere to jump to, and strict @@ -254,37 +258,20 @@ line break). Used to count lines.

if (code < 97) return code === 95; if (code < 123)return true; return code >= 0xaa && nonASCIIidentifier.test(String.fromCharCode(code)); - }

Tokenizer

These are used when options.locations is on, in order to track -the current line number and start of line offset, in order to set -tokStartLoc and tokEndLoc.

  function nextLineStart() {
-    lineBreak.lastIndex = tokLineStart;
-    var match = lineBreak.exec(input);
-    return match ? match.index + match[0].length : input.length + 1;
-  }
-
-  function line_loc_t() {
+  }

Tokenizer

These are used when options.locations is on, for the +tokStartLoc and tokEndLoc properties.

  function line_loc_t() {
     this.line = tokCurLine;
     this.column = tokPos - tokLineStart;
-  }
-
-  function curLineLoc() {
-    while (tokLineStartNext <= tokPos) {
-      ++tokCurLine;
-      tokLineStart = tokLineStartNext;
-      tokLineStartNext = nextLineStart();
-    }
-    return new line_loc_t();
   }

Reset the token state. Used at the start of a parse.

  function initTokenState() {
     tokCurLine = 1;
     tokPos = tokLineStart = 0;
-    tokLineStartNext = nextLineStart();
     tokRegexpAllowed = true;
     skipSpace();
   }

Called at the end of every token. Sets tokEnd, tokVal, and tokRegexpAllowed, and skips the space after the token, so that the next one's tokStart will point at the right position.

  function finishToken(type, val) {
     tokEnd = tokPos;
-    if (options.locations) tokEndLoc = curLineLoc();
+    if (options.locations) tokEndLoc = new line_loc_t;
     tokType = type;
     skipSpace();
     tokVal = val;
@@ -292,20 +279,26 @@ the next one's tokStart will point at the right position.

} function skipBlockComment() { - if (options.onComment && options.locations) - var startLoc = curLineLoc(); + var startLoc = options.onComment && options.locations && new line_loc_t; var start = tokPos, end = input.indexOf("*/", tokPos += 2); if (end === -1) raise(tokPos - 2, "Unterminated comment"); tokPos = end + 2; + if (options.locations) { + lineBreak.lastIndex = start; + var match; + while ((match = lineBreak.exec(input)) && match.index < tokPos) { + ++tokCurLine; + tokLineStart = match.index + match[0].length; + } + } if (options.onComment) options.onComment(true, input.slice(start + 2, end), start, tokPos, - startLoc, options.locations && curLineLoc()); + startLoc, options.locations && new line_loc_t); } function skipLineComment() { var start = tokPos; - if (options.onComment && options.locations) - var startLoc = curLineLoc(); + var startLoc = options.onComment && options.locations && new line_loc_t; var ch = input.charCodeAt(tokPos+=2); while (tokPos < inputLen && ch !== 10 && ch !== 13 && ch !== 8232 && ch !== 8329) { ++tokPos; @@ -313,12 +306,30 @@ the next one's tokStart will point at the right position.

} if (options.onComment) options.onComment(false, input.slice(start + 2, tokPos), start, tokPos, - startLoc, options.locations && curLineLoc()); + startLoc, options.locations && new line_loc_t); }

Called at the start of the parse and after every token. Skips whitespace and comments, and.

  function skipSpace() {
     while (tokPos < inputLen) {
       var ch = input.charCodeAt(tokPos);
-      if (ch === 47) { // '/'
+      if (ch === 32) { // ' '
+        ++tokPos;
+      } else if(ch === 13) {
+        ++tokPos;
+        var next = input.charCodeAt(tokPos);
+        if(next === 10) {
+          ++tokPos;
+        }
+        if(options.locations) {
+          ++tokCurLine;
+          tokLineStart = tokPos;
+        }
+      } else if (ch === 10) {
+        ++tokPos;
+        ++tokCurLine;
+        tokLineStart = tokPos;
+      } else if(ch < 14 && ch > 8) {
+        ++tokPos;
+      } else if (ch === 47) { // '/'
         var next = input.charCodeAt(tokPos+1);
         if (next === 42) { // '*'
           skipBlockComment();
@@ -450,7 +461,7 @@ of the type given by its first argument.

function readToken(forceRegexp) { tokStart = tokPos; - if (options.locations) tokStartLoc = curLineLoc(); + if (options.locations) tokStartLoc = new line_loc_t; if (forceRegexp) return readRegexp(); if (tokPos >= inputLen) return finishToken(_eof); @@ -576,7 +587,9 @@ will return null unless the integer has exactly len di case 102: rs_str.push(12); break; // 'f' -> '\f' case 48: rs_str.push(0); break; // 0 -> '\0' case 13: if (input.charCodeAt(tokPos) === 10) ++tokPos; // '\r\n' - case 10: break; // ' \n' + case 10: // ' \n' + if (options.locations) { tokLineStart = tokPos; ++tokCurLine; } + break; default: rs_str.push(ch); break; } } @@ -732,7 +745,7 @@ statements, and wraps them in a Program node. Optionally takes a program argument. If present, the statements will be appended to its body instead of creating a new node.

  function parseTopLevel(program) {
     lastStart = lastEnd = tokPos;
-    if (options.locations) lastEndLoc = curLineLoc();
+    if (options.locations) lastEndLoc = new line_loc_t;
     inFunction = strict = null;
     labels = [];
     readToken();