Faster identifier tokenizing (#13262)

* add benchmark

* perf: faster identifier tokenizing

- Mover iterator identifier parsing to the Flow plugin
- If the character is an identifier start, pass it to readWord1
This commit is contained in:
Huáng Jùnliàng 2021-05-06 18:47:19 -04:00 committed by GitHub
parent 1879491af7
commit a8fea4037d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 79 additions and 28 deletions

View File

@ -0,0 +1,23 @@
import Benchmark from "benchmark";
import baseline from "@babel-baseline/parser";
import current from "../../lib/index.js";
import { report } from "../util.mjs";
const suite = new Benchmark.Suite();
function createInput(length) {
return "a;".repeat(length);
}
current.parse("a");
function benchCases(name, implementation, options) {
for (const length of [64, 128, 256, 512, 1024]) {
const input = createInput(length);
suite.add(`${name} ${length} length-1 identifiers`, () => {
implementation.parse(input, options);
});
}
}
benchCases("baseline", baseline);
benchCases("current", current);
suite.on("cycle", report).run();

View File

@ -0,0 +1,23 @@
import Benchmark from "benchmark";
import baseline from "@babel-baseline/parser";
import current from "../../lib/index.js";
import { report } from "../util.mjs";
const suite = new Benchmark.Suite();
function createInput(length) {
return "aa;".repeat(length);
}
current.parse("a");
function benchCases(name, implementation, options) {
for (const length of [64, 128, 256, 512, 1024]) {
const input = createInput(length);
suite.add(`${name} ${length} length-2 identifiers`, () => {
implementation.parse(input, options);
});
}
}
benchCases("baseline", baseline);
benchCases("current", current);
suite.on("cycle", report).run();

View File

@ -2219,6 +2219,22 @@ export default (superClass: Class<Parser>): Class<Parser> =>
} }
} }
isIterator(word: string): boolean {
return word === "iterator" || word === "asyncIterator";
}
readIterator(): void {
const word = super.readWord1();
const fullWord = "@@" + word;
// Allow @@iterator and @@asyncIterator as a identifier only inside type
if (!this.isIterator(word) || !this.state.inType) {
this.raise(this.state.pos, Errors.InvalidIdentifier, fullWord);
}
this.finishToken(tt.name, fullWord);
}
// ensure that inside flow types, we bypass the jsx parser plugin // ensure that inside flow types, we bypass the jsx parser plugin
getTokenFromCode(code: number): void { getTokenFromCode(code: number): void {
const next = this.input.charCodeAt(this.state.pos + 1); const next = this.input.charCodeAt(this.state.pos + 1);
@ -2236,8 +2252,8 @@ export default (superClass: Class<Parser>): Class<Parser> =>
// allow double nullable types in Flow: ??string // allow double nullable types in Flow: ??string
return this.finishOp(tt.question, 1); return this.finishOp(tt.question, 1);
} else if (isIteratorStart(code, next)) { } else if (isIteratorStart(code, next)) {
this.state.isIterator = true; this.state.pos += 2; // eat "@@"
return super.readWord(); return this.readIterator();
} else { } else {
return super.getTokenFromCode(code); return super.getTokenFromCode(code);
} }

View File

@ -76,10 +76,6 @@ tt.name.updateContext = function (prevType) {
} }
} }
this.state.exprAllowed = allowed; this.state.exprAllowed = allowed;
if (this.state.isIterator) {
this.state.isIterator = false;
}
}; };
tt.braceL.updateContext = function (prevType) { tt.braceL.updateContext = function (prevType) {

View File

@ -453,7 +453,10 @@ export default class Tokenizer extends ParserErrors {
this.finishToken(tt.bracketHashL); this.finishToken(tt.bracketHashL);
} }
this.state.pos += 2; this.state.pos += 2;
} else if (isIdentifierStart(next) || next === charCodes.backslash) { } else if (isIdentifierStart(next)) {
++this.state.pos;
this.finishToken(tt.privateName, this.readWord1(next));
} else if (next === charCodes.backslash) {
++this.state.pos; ++this.state.pos;
this.finishToken(tt.privateName, this.readWord1()); this.finishToken(tt.privateName, this.readWord1());
} else { } else {
@ -920,7 +923,7 @@ export default class Tokenizer extends ParserErrors {
default: default:
if (isIdentifierStart(code)) { if (isIdentifierStart(code)) {
this.readWord(); this.readWord(code);
return; return;
} }
} }
@ -1457,19 +1460,23 @@ export default class Tokenizer extends ParserErrors {
// //
// Incrementally adds only escaped chars, adding other chunks as-is // Incrementally adds only escaped chars, adding other chunks as-is
// as a micro-optimization. // as a micro-optimization.
//
// When `firstCode` is given, it assumes it is always an identifier start and
// will skip reading start position again
readWord1(): string { readWord1(firstCode: number | void): string {
let word = "";
this.state.containsEsc = false; this.state.containsEsc = false;
let word = "";
const start = this.state.pos; const start = this.state.pos;
let chunkStart = this.state.pos; let chunkStart = this.state.pos;
if (firstCode !== undefined) {
this.state.pos += firstCode <= 0xffff ? 1 : 2;
}
while (this.state.pos < this.length) { while (this.state.pos < this.length) {
const ch = this.codePointAtPos(this.state.pos); const ch = this.codePointAtPos(this.state.pos);
if (isIdentifierChar(ch)) { if (isIdentifierChar(ch)) {
this.state.pos += ch <= 0xffff ? 1 : 2; this.state.pos += ch <= 0xffff ? 1 : 2;
} else if (this.state.isIterator && ch === charCodes.atSign) {
++this.state.pos;
} else if (ch === charCodes.backslash) { } else if (ch === charCodes.backslash) {
this.state.containsEsc = true; this.state.containsEsc = true;
@ -1501,25 +1508,12 @@ export default class Tokenizer extends ParserErrors {
return word + this.input.slice(chunkStart, this.state.pos); return word + this.input.slice(chunkStart, this.state.pos);
} }
isIterator(word: string): boolean {
return word === "@@iterator" || word === "@@asyncIterator";
}
// Read an identifier or keyword token. Will check for reserved // Read an identifier or keyword token. Will check for reserved
// words when necessary. // words when necessary.
readWord(): void { readWord(firstCode: number | void): void {
const word = this.readWord1(); const word = this.readWord1(firstCode);
const type = keywordTypes.get(word) || tt.name; const type = keywordTypes.get(word) || tt.name;
// Allow @@iterator and @@asyncIterator as a identifier only inside type
if (
this.state.isIterator &&
(!this.isIterator(word) || !this.state.inType)
) {
this.raise(this.state.pos, Errors.InvalidIdentifier, word);
}
this.finishToken(type, word); this.finishToken(type, word);
} }

View File

@ -64,7 +64,6 @@ export default class State {
noAnonFunctionType: boolean = false; noAnonFunctionType: boolean = false;
inPropertyName: boolean = false; inPropertyName: boolean = false;
hasFlowComment: boolean = false; hasFlowComment: boolean = false;
isIterator: boolean = false;
isAmbientContext: boolean = false; isAmbientContext: boolean = false;
inAbstractClass: boolean = false; inAbstractClass: boolean = false;