Faster identifier tokenizing (#13262)

* add benchmark * perf: faster identifier tokenizing - Mover iterator identifier parsing to the Flow plugin - If the character is an identifier start, pass it to readWord1
2021-05-06 18:47:19 -04:00 · 2021-05-06 18:47:19 -04:00 · a8fea4037d
commit a8fea4037d
parent 1879491af7
6 changed files with 79 additions and 28 deletions
--- a/packages/babel-parser/benchmark/many-identifiers/1-length.bench.mjs
+++ b/packages/babel-parser/benchmark/many-identifiers/1-length.bench.mjs
@ -0,0 +1,23 @@
 import Benchmark from "benchmark";
 import baseline from "@babel-baseline/parser";
 import current from "../../lib/index.js";
 import { report } from "../util.mjs";
 const suite = new Benchmark.Suite();
 function createInput(length) {
  return "a;".repeat(length);
 }
 current.parse("a");
 function benchCases(name, implementation, options) {
  for (const length of [64, 128, 256, 512, 1024]) {
    const input = createInput(length);
    suite.add(`${name} ${length} length-1 identifiers`, () => {
      implementation.parse(input, options);
    });
  }
 }
 benchCases("baseline", baseline);
 benchCases("current", current);
 suite.on("cycle", report).run();
--- a/packages/babel-parser/benchmark/many-identifiers/2-length.bench.mjs
+++ b/packages/babel-parser/benchmark/many-identifiers/2-length.bench.mjs
@ -0,0 +1,23 @@
 import Benchmark from "benchmark";
 import baseline from "@babel-baseline/parser";
 import current from "../../lib/index.js";
 import { report } from "../util.mjs";
 const suite = new Benchmark.Suite();
 function createInput(length) {
  return "aa;".repeat(length);
 }
 current.parse("a");
 function benchCases(name, implementation, options) {
  for (const length of [64, 128, 256, 512, 1024]) {
    const input = createInput(length);
    suite.add(`${name} ${length} length-2 identifiers`, () => {
      implementation.parse(input, options);
    });
  }
 }
 benchCases("baseline", baseline);
 benchCases("current", current);
 suite.on("cycle", report).run();
--- a/packages/babel-parser/src/plugins/flow/index.js
+++ b/packages/babel-parser/src/plugins/flow/index.js
@ -2219,6 +2219,22 @@ export default (superClass: Class<Parser>): Class<Parser> =>
      }
    }
    isIterator(word: string): boolean {
      return word === "iterator" || word === "asyncIterator";
    }
    readIterator(): void {
      const word = super.readWord1();
      const fullWord = "@@" + word;
      // Allow @@iterator and @@asyncIterator as a identifier only inside type
      if (!this.isIterator(word) || !this.state.inType) {
        this.raise(this.state.pos, Errors.InvalidIdentifier, fullWord);
      }
      this.finishToken(tt.name, fullWord);
    }
    // ensure that inside flow types, we bypass the jsx parser plugin
    getTokenFromCode(code: number): void {
      const next = this.input.charCodeAt(this.state.pos + 1);
@ -2236,8 +2252,8 @@ export default (superClass: Class<Parser>): Class<Parser> =>
        // allow double nullable types in Flow: ??string
        return this.finishOp(tt.question, 1);
      } else if (isIteratorStart(code, next)) {
-        this.state.isIterator = true;
+        this.state.pos += 2; // eat "@@"
-        return super.readWord();
+        return this.readIterator();
      } else {
        return super.getTokenFromCode(code);
      }
--- a/packages/babel-parser/src/tokenizer/context.js
+++ b/packages/babel-parser/src/tokenizer/context.js
@ -76,10 +76,6 @@ tt.name.updateContext = function (prevType) {
    }
  }
  this.state.exprAllowed = allowed;
  if (this.state.isIterator) {
    this.state.isIterator = false;
  }
 };
 tt.braceL.updateContext = function (prevType) {
--- a/packages/babel-parser/src/tokenizer/index.js
+++ b/packages/babel-parser/src/tokenizer/index.js
@ -453,7 +453,10 @@ export default class Tokenizer extends ParserErrors {
        this.finishToken(tt.bracketHashL);
      }
      this.state.pos += 2;
-    } else if (isIdentifierStart(next) || next === charCodes.backslash) {
+    } else if (isIdentifierStart(next)) {
      ++this.state.pos;
      this.finishToken(tt.privateName, this.readWord1(next));
    } else if (next === charCodes.backslash) {
      ++this.state.pos;
      this.finishToken(tt.privateName, this.readWord1());
    } else {
@ -920,7 +923,7 @@ export default class Tokenizer extends ParserErrors {
      default:
        if (isIdentifierStart(code)) {
-          this.readWord();
+          this.readWord(code);
          return;
        }
    }
@ -1457,19 +1460,23 @@ export default class Tokenizer extends ParserErrors {
  //
  // Incrementally adds only escaped chars, adding other chunks as-is
  // as a micro-optimization.
  //
  // When `firstCode` is given, it assumes it is always an identifier start and
  // will skip reading start position again
-  readWord1(): string {
+  readWord1(firstCode: number | void): string {
    let word = "";
    this.state.containsEsc = false;
    let word = "";
    const start = this.state.pos;
    let chunkStart = this.state.pos;
    if (firstCode !== undefined) {
      this.state.pos += firstCode <= 0xffff ? 1 : 2;
    }
    while (this.state.pos < this.length) {
      const ch = this.codePointAtPos(this.state.pos);
      if (isIdentifierChar(ch)) {
        this.state.pos += ch <= 0xffff ? 1 : 2;
      } else if (this.state.isIterator && ch === charCodes.atSign) {
        ++this.state.pos;
      } else if (ch === charCodes.backslash) {
        this.state.containsEsc = true;
@ -1501,25 +1508,12 @@ export default class Tokenizer extends ParserErrors {
    return word + this.input.slice(chunkStart, this.state.pos);
  }
  isIterator(word: string): boolean {
    return word === "@@iterator" || word === "@@asyncIterator";
  }
  // Read an identifier or keyword token. Will check for reserved
  // words when necessary.
-  readWord(): void {
+  readWord(firstCode: number | void): void {
-    const word = this.readWord1();
+    const word = this.readWord1(firstCode);
    const type = keywordTypes.get(word) || tt.name;
    // Allow @@iterator and @@asyncIterator as a identifier only inside type
    if (
      this.state.isIterator &&
      (!this.isIterator(word) || !this.state.inType)
    ) {
      this.raise(this.state.pos, Errors.InvalidIdentifier, word);
    }
    this.finishToken(type, word);
  }
--- a/packages/babel-parser/src/tokenizer/state.js
+++ b/packages/babel-parser/src/tokenizer/state.js
@ -64,7 +64,6 @@ export default class State {
  noAnonFunctionType: boolean = false;
  inPropertyName: boolean = false;
  hasFlowComment: boolean = false;
  isIterator: boolean = false;
  isAmbientContext: boolean = false;
  inAbstractClass: boolean = false;