From a8fea4037dc194c4c220c15a0982c70cc12dce7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hu=C3=A1ng=20J=C3=B9nli=C3=A0ng?= Date: Thu, 6 May 2021 18:47:19 -0400 Subject: [PATCH] Faster identifier tokenizing (#13262) * add benchmark * perf: faster identifier tokenizing - Mover iterator identifier parsing to the Flow plugin - If the character is an identifier start, pass it to readWord1 --- .../many-identifiers/1-length.bench.mjs | 23 ++++++++++++ .../many-identifiers/2-length.bench.mjs | 23 ++++++++++++ .../babel-parser/src/plugins/flow/index.js | 20 +++++++++-- .../babel-parser/src/tokenizer/context.js | 4 --- packages/babel-parser/src/tokenizer/index.js | 36 ++++++++----------- packages/babel-parser/src/tokenizer/state.js | 1 - 6 files changed, 79 insertions(+), 28 deletions(-) create mode 100644 packages/babel-parser/benchmark/many-identifiers/1-length.bench.mjs create mode 100644 packages/babel-parser/benchmark/many-identifiers/2-length.bench.mjs diff --git a/packages/babel-parser/benchmark/many-identifiers/1-length.bench.mjs b/packages/babel-parser/benchmark/many-identifiers/1-length.bench.mjs new file mode 100644 index 0000000000..df6605795a --- /dev/null +++ b/packages/babel-parser/benchmark/many-identifiers/1-length.bench.mjs @@ -0,0 +1,23 @@ +import Benchmark from "benchmark"; +import baseline from "@babel-baseline/parser"; +import current from "../../lib/index.js"; +import { report } from "../util.mjs"; + +const suite = new Benchmark.Suite(); +function createInput(length) { + return "a;".repeat(length); +} +current.parse("a"); +function benchCases(name, implementation, options) { + for (const length of [64, 128, 256, 512, 1024]) { + const input = createInput(length); + suite.add(`${name} ${length} length-1 identifiers`, () => { + implementation.parse(input, options); + }); + } +} + +benchCases("baseline", baseline); +benchCases("current", current); + +suite.on("cycle", report).run(); diff --git a/packages/babel-parser/benchmark/many-identifiers/2-length.bench.mjs b/packages/babel-parser/benchmark/many-identifiers/2-length.bench.mjs new file mode 100644 index 0000000000..4d6453c6f4 --- /dev/null +++ b/packages/babel-parser/benchmark/many-identifiers/2-length.bench.mjs @@ -0,0 +1,23 @@ +import Benchmark from "benchmark"; +import baseline from "@babel-baseline/parser"; +import current from "../../lib/index.js"; +import { report } from "../util.mjs"; + +const suite = new Benchmark.Suite(); +function createInput(length) { + return "aa;".repeat(length); +} +current.parse("a"); +function benchCases(name, implementation, options) { + for (const length of [64, 128, 256, 512, 1024]) { + const input = createInput(length); + suite.add(`${name} ${length} length-2 identifiers`, () => { + implementation.parse(input, options); + }); + } +} + +benchCases("baseline", baseline); +benchCases("current", current); + +suite.on("cycle", report).run(); diff --git a/packages/babel-parser/src/plugins/flow/index.js b/packages/babel-parser/src/plugins/flow/index.js index a237acffa5..94169d697e 100644 --- a/packages/babel-parser/src/plugins/flow/index.js +++ b/packages/babel-parser/src/plugins/flow/index.js @@ -2219,6 +2219,22 @@ export default (superClass: Class): Class => } } + isIterator(word: string): boolean { + return word === "iterator" || word === "asyncIterator"; + } + + readIterator(): void { + const word = super.readWord1(); + const fullWord = "@@" + word; + + // Allow @@iterator and @@asyncIterator as a identifier only inside type + if (!this.isIterator(word) || !this.state.inType) { + this.raise(this.state.pos, Errors.InvalidIdentifier, fullWord); + } + + this.finishToken(tt.name, fullWord); + } + // ensure that inside flow types, we bypass the jsx parser plugin getTokenFromCode(code: number): void { const next = this.input.charCodeAt(this.state.pos + 1); @@ -2236,8 +2252,8 @@ export default (superClass: Class): Class => // allow double nullable types in Flow: ??string return this.finishOp(tt.question, 1); } else if (isIteratorStart(code, next)) { - this.state.isIterator = true; - return super.readWord(); + this.state.pos += 2; // eat "@@" + return this.readIterator(); } else { return super.getTokenFromCode(code); } diff --git a/packages/babel-parser/src/tokenizer/context.js b/packages/babel-parser/src/tokenizer/context.js index 5581e6b05e..854db1ccbf 100644 --- a/packages/babel-parser/src/tokenizer/context.js +++ b/packages/babel-parser/src/tokenizer/context.js @@ -76,10 +76,6 @@ tt.name.updateContext = function (prevType) { } } this.state.exprAllowed = allowed; - - if (this.state.isIterator) { - this.state.isIterator = false; - } }; tt.braceL.updateContext = function (prevType) { diff --git a/packages/babel-parser/src/tokenizer/index.js b/packages/babel-parser/src/tokenizer/index.js index b5c6b68e06..f891934329 100644 --- a/packages/babel-parser/src/tokenizer/index.js +++ b/packages/babel-parser/src/tokenizer/index.js @@ -453,7 +453,10 @@ export default class Tokenizer extends ParserErrors { this.finishToken(tt.bracketHashL); } this.state.pos += 2; - } else if (isIdentifierStart(next) || next === charCodes.backslash) { + } else if (isIdentifierStart(next)) { + ++this.state.pos; + this.finishToken(tt.privateName, this.readWord1(next)); + } else if (next === charCodes.backslash) { ++this.state.pos; this.finishToken(tt.privateName, this.readWord1()); } else { @@ -920,7 +923,7 @@ export default class Tokenizer extends ParserErrors { default: if (isIdentifierStart(code)) { - this.readWord(); + this.readWord(code); return; } } @@ -1457,19 +1460,23 @@ export default class Tokenizer extends ParserErrors { // // Incrementally adds only escaped chars, adding other chunks as-is // as a micro-optimization. + // + // When `firstCode` is given, it assumes it is always an identifier start and + // will skip reading start position again - readWord1(): string { - let word = ""; + readWord1(firstCode: number | void): string { this.state.containsEsc = false; + let word = ""; const start = this.state.pos; let chunkStart = this.state.pos; + if (firstCode !== undefined) { + this.state.pos += firstCode <= 0xffff ? 1 : 2; + } while (this.state.pos < this.length) { const ch = this.codePointAtPos(this.state.pos); if (isIdentifierChar(ch)) { this.state.pos += ch <= 0xffff ? 1 : 2; - } else if (this.state.isIterator && ch === charCodes.atSign) { - ++this.state.pos; } else if (ch === charCodes.backslash) { this.state.containsEsc = true; @@ -1501,25 +1508,12 @@ export default class Tokenizer extends ParserErrors { return word + this.input.slice(chunkStart, this.state.pos); } - isIterator(word: string): boolean { - return word === "@@iterator" || word === "@@asyncIterator"; - } - // Read an identifier or keyword token. Will check for reserved // words when necessary. - readWord(): void { - const word = this.readWord1(); + readWord(firstCode: number | void): void { + const word = this.readWord1(firstCode); const type = keywordTypes.get(word) || tt.name; - - // Allow @@iterator and @@asyncIterator as a identifier only inside type - if ( - this.state.isIterator && - (!this.isIterator(word) || !this.state.inType) - ) { - this.raise(this.state.pos, Errors.InvalidIdentifier, word); - } - this.finishToken(type, word); } diff --git a/packages/babel-parser/src/tokenizer/state.js b/packages/babel-parser/src/tokenizer/state.js index 2dd357c191..9eb9c533d0 100644 --- a/packages/babel-parser/src/tokenizer/state.js +++ b/packages/babel-parser/src/tokenizer/state.js @@ -64,7 +64,6 @@ export default class State { noAnonFunctionType: boolean = false; inPropertyName: boolean = false; hasFlowComment: boolean = false; - isIterator: boolean = false; isAmbientContext: boolean = false; inAbstractClass: boolean = false;