From a8fea4037dc194c4c220c15a0982c70cc12dce7d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hu=C3=A1ng=20J=C3=B9nli=C3=A0ng?= <jlhwung@gmail.com>
Date: Thu, 6 May 2021 18:47:19 -0400
Subject: [PATCH] Faster identifier tokenizing (#13262)

* add benchmark

* perf: faster identifier tokenizing

- Mover iterator identifier parsing to the Flow plugin
- If the character is an identifier start, pass it to readWord1
---
 .../many-identifiers/1-length.bench.mjs       | 23 ++++++++++++
 .../many-identifiers/2-length.bench.mjs       | 23 ++++++++++++
 .../babel-parser/src/plugins/flow/index.js    | 20 +++++++++--
 .../babel-parser/src/tokenizer/context.js     |  4 ---
 packages/babel-parser/src/tokenizer/index.js  | 36 ++++++++-----------
 packages/babel-parser/src/tokenizer/state.js  |  1 -
 6 files changed, 79 insertions(+), 28 deletions(-)
 create mode 100644 packages/babel-parser/benchmark/many-identifiers/1-length.bench.mjs
 create mode 100644 packages/babel-parser/benchmark/many-identifiers/2-length.bench.mjs
diff --git a/packages/babel-parser/benchmark/many-identifiers/1-length.bench.mjs b/packages/babel-parser/benchmark/many-identifiers/1-length.bench.mjs
new file mode 100644
index 0000000000..df6605795a
--- /dev/null
+++ b/packages/babel-parser/benchmark/many-identifiers/1-length.bench.mjs
@@ -0,0 +1,23 @@
+import Benchmark from "benchmark";
+import baseline from "@babel-baseline/parser";
+import current from "../../lib/index.js";
+import { report } from "../util.mjs";
+
+const suite = new Benchmark.Suite();
+function createInput(length) {
+  return "a;".repeat(length);
+}
+current.parse("a");
+function benchCases(name, implementation, options) {
+  for (const length of [64, 128, 256, 512, 1024]) {
+    const input = createInput(length);
+    suite.add(`${name} ${length} length-1 identifiers`, () => {
+      implementation.parse(input, options);
+    });
+  }
+}
+
+benchCases("baseline", baseline);
+benchCases("current", current);
+
+suite.on("cycle", report).run();
diff --git a/packages/babel-parser/benchmark/many-identifiers/2-length.bench.mjs b/packages/babel-parser/benchmark/many-identifiers/2-length.bench.mjs
new file mode 100644
index 0000000000..4d6453c6f4
--- /dev/null
+++ b/packages/babel-parser/benchmark/many-identifiers/2-length.bench.mjs
@@ -0,0 +1,23 @@
+import Benchmark from "benchmark";
+import baseline from "@babel-baseline/parser";
+import current from "../../lib/index.js";
+import { report } from "../util.mjs";
+
+const suite = new Benchmark.Suite();
+function createInput(length) {
+  return "aa;".repeat(length);
+}
+current.parse("a");
+function benchCases(name, implementation, options) {
+  for (const length of [64, 128, 256, 512, 1024]) {
+    const input = createInput(length);
+    suite.add(`${name} ${length} length-2 identifiers`, () => {
+      implementation.parse(input, options);
+    });
+  }
+}
+
+benchCases("baseline", baseline);
+benchCases("current", current);
+
+suite.on("cycle", report).run();
diff --git a/packages/babel-parser/src/plugins/flow/index.js b/packages/babel-parser/src/plugins/flow/index.js
index a237acffa5..94169d697e 100644
--- a/packages/babel-parser/src/plugins/flow/index.js
+++ b/packages/babel-parser/src/plugins/flow/index.js
@@ -2219,6 +2219,22 @@ export default (superClass: Class<Parser>): Class<Parser> =>
       }
     }
 
+    isIterator(word: string): boolean {
+      return word === "iterator" || word === "asyncIterator";
+    }
+
+    readIterator(): void {
+      const word = super.readWord1();
+      const fullWord = "@@" + word;
+
+      // Allow @@iterator and @@asyncIterator as a identifier only inside type
+      if (!this.isIterator(word) || !this.state.inType) {
+        this.raise(this.state.pos, Errors.InvalidIdentifier, fullWord);
+      }
+
+      this.finishToken(tt.name, fullWord);
+    }
+
     // ensure that inside flow types, we bypass the jsx parser plugin
     getTokenFromCode(code: number): void {
       const next = this.input.charCodeAt(this.state.pos + 1);
@@ -2236,8 +2252,8 @@ export default (superClass: Class<Parser>): Class<Parser> =>
         // allow double nullable types in Flow: ??string
         return this.finishOp(tt.question, 1);
       } else if (isIteratorStart(code, next)) {
-        this.state.isIterator = true;
-        return super.readWord();
+        this.state.pos += 2; // eat "@@"
+        return this.readIterator();
       } else {
         return super.getTokenFromCode(code);
       }
diff --git a/packages/babel-parser/src/tokenizer/context.js b/packages/babel-parser/src/tokenizer/context.js
index 5581e6b05e..854db1ccbf 100644
--- a/packages/babel-parser/src/tokenizer/context.js
+++ b/packages/babel-parser/src/tokenizer/context.js
@@ -76,10 +76,6 @@ tt.name.updateContext = function (prevType) {
     }
   }
   this.state.exprAllowed = allowed;
-
-  if (this.state.isIterator) {
-    this.state.isIterator = false;
-  }
 };
 
 tt.braceL.updateContext = function (prevType) {
diff --git a/packages/babel-parser/src/tokenizer/index.js b/packages/babel-parser/src/tokenizer/index.js
index b5c6b68e06..f891934329 100644
--- a/packages/babel-parser/src/tokenizer/index.js
+++ b/packages/babel-parser/src/tokenizer/index.js
@@ -453,7 +453,10 @@ export default class Tokenizer extends ParserErrors {
         this.finishToken(tt.bracketHashL);
       }
       this.state.pos += 2;
-    } else if (isIdentifierStart(next) || next === charCodes.backslash) {
+    } else if (isIdentifierStart(next)) {
+      ++this.state.pos;
+      this.finishToken(tt.privateName, this.readWord1(next));
+    } else if (next === charCodes.backslash) {
       ++this.state.pos;
       this.finishToken(tt.privateName, this.readWord1());
     } else {
@@ -920,7 +923,7 @@ export default class Tokenizer extends ParserErrors {
 
       default:
         if (isIdentifierStart(code)) {
-          this.readWord();
+          this.readWord(code);
           return;
         }
     }
@@ -1457,19 +1460,23 @@ export default class Tokenizer extends ParserErrors {
   //
   // Incrementally adds only escaped chars, adding other chunks as-is
   // as a micro-optimization.
+  //
+  // When `firstCode` is given, it assumes it is always an identifier start and
+  // will skip reading start position again
 
-  readWord1(): string {
-    let word = "";
+  readWord1(firstCode: number | void): string {
     this.state.containsEsc = false;
+    let word = "";
     const start = this.state.pos;
     let chunkStart = this.state.pos;
+    if (firstCode !== undefined) {
+      this.state.pos += firstCode <= 0xffff ? 1 : 2;
+    }
 
     while (this.state.pos < this.length) {
       const ch = this.codePointAtPos(this.state.pos);
       if (isIdentifierChar(ch)) {
         this.state.pos += ch <= 0xffff ? 1 : 2;
-      } else if (this.state.isIterator && ch === charCodes.atSign) {
-        ++this.state.pos;
       } else if (ch === charCodes.backslash) {
         this.state.containsEsc = true;
 
@@ -1501,25 +1508,12 @@ export default class Tokenizer extends ParserErrors {
     return word + this.input.slice(chunkStart, this.state.pos);
   }
 
-  isIterator(word: string): boolean {
-    return word === "@@iterator" || word === "@@asyncIterator";
-  }
-
   // Read an identifier or keyword token. Will check for reserved
   // words when necessary.
 
-  readWord(): void {
-    const word = this.readWord1();
+  readWord(firstCode: number | void): void {
+    const word = this.readWord1(firstCode);
     const type = keywordTypes.get(word) || tt.name;
-
-    // Allow @@iterator and @@asyncIterator as a identifier only inside type
-    if (
-      this.state.isIterator &&
-      (!this.isIterator(word) || !this.state.inType)
-    ) {
-      this.raise(this.state.pos, Errors.InvalidIdentifier, word);
-    }
-
     this.finishToken(type, word);
   }
 
diff --git a/packages/babel-parser/src/tokenizer/state.js b/packages/babel-parser/src/tokenizer/state.js
index 2dd357c191..9eb9c533d0 100644
--- a/packages/babel-parser/src/tokenizer/state.js
+++ b/packages/babel-parser/src/tokenizer/state.js
@@ -64,7 +64,6 @@ export default class State {
   noAnonFunctionType: boolean = false;
   inPropertyName: boolean = false;
   hasFlowComment: boolean = false;
-  isIterator: boolean = false;
   isAmbientContext: boolean = false;
   inAbstractClass: boolean = false;