diff --git a/packages/workspace/src/core/hasher/file-hasher.ts b/packages/workspace/src/core/hasher/file-hasher.ts index 8b64fdff65..604cbff5ff 100644 --- a/packages/workspace/src/core/hasher/file-hasher.ts +++ b/packages/workspace/src/core/hasher/file-hasher.ts @@ -20,18 +20,28 @@ export class FileHasher { this.usesGitForHashing = false; } - init(): void { + /** + * For the project graph daemon server use-case we can potentially skip expensive work + * by leveraging knowledge of the uncommitted and untracked files, so the init() method + * returns a Map containing this data. + */ + init(): Map { performance.mark('init hashing:start'); this.clear(); - this.getHashesFromGit(); + + const getFileHashesResult = getFileHashes(appRootPath); + this.applyFileHashes(getFileHashesResult.allFiles); this.usesGitForHashing = Object.keys(this.fileHashes).length > 0; this.isInitialized = true; + performance.mark('init hashing:end'); performance.measure( 'init hashing', 'init hashing:start', 'init hashing:end' ); + + return getFileHashesResult.untrackedUncommittedFiles; } /** @@ -42,8 +52,10 @@ export class FileHasher { * For example, the daemon server can cache the last known commit SHA in * memory and avoid calling init() by using this method instead when that * SHA is unchanged. + * + * @returns The Map of filenames to hashes returned by getUntrackedAndUncommittedFileHashes() */ - incrementalUpdate() { + incrementalUpdate(): Map { performance.mark('incremental hashing:start'); const untrackedAndUncommittedFileHashes = @@ -64,6 +76,8 @@ export class FileHasher { 'incremental hashing:start', 'incremental hashing:end' ); + + return untrackedAndUncommittedFileHashes; } hashFile(path: string): string { @@ -84,9 +98,9 @@ export class FileHasher { } } - private getHashesFromGit(): void { + private applyFileHashes(allFiles: Map): void { const sliceIndex = appRootPath.length + 1; - getFileHashes(appRootPath).forEach((hash, filename) => { + allFiles.forEach((hash, filename) => { this.fileHashes[filename.substr(sliceIndex)] = hash; /** * we have to store it separately because fileHashes can be modified diff --git a/packages/workspace/src/core/hasher/git-hasher.spec.ts b/packages/workspace/src/core/hasher/git-hasher.spec.ts index c613a79203..05110df7b9 100644 --- a/packages/workspace/src/core/hasher/git-hasher.spec.ts +++ b/packages/workspace/src/core/hasher/git-hasher.spec.ts @@ -25,54 +25,58 @@ describe('git-hasher', () => { run(`echo AAA > a.txt`); run(`git add .`); run(`git commit -am init`); - const hashes = getFileHashes(dir); + const hashes = getFileHashes(dir).allFiles; expect([...hashes.keys()]).toEqual([`${dir}/a.txt`]); expect(hashes.get(`${dir}/a.txt`)).toBeDefined(); // should handle additions run(`echo BBB > b.txt`); - expect([...getFileHashes(dir).keys()]).toEqual([ + expect([...getFileHashes(dir).allFiles.keys()]).toEqual([ `${dir}/a.txt`, `${dir}/b.txt`, ]); run(`git add .`); - expect([...getFileHashes(dir).keys()]).toEqual([ + expect([...getFileHashes(dir).allFiles.keys()]).toEqual([ `${dir}/a.txt`, `${dir}/b.txt`, ]); run(`git commit -am second`); - expect([...getFileHashes(dir).keys()]).toEqual([ + expect([...getFileHashes(dir).allFiles.keys()]).toEqual([ `${dir}/a.txt`, `${dir}/b.txt`, ]); // should handle removals run(`rm b.txt`); - expect([...getFileHashes(dir).keys()]).toEqual([`${dir}/a.txt`]); + expect([...getFileHashes(dir).allFiles.keys()]).toEqual([`${dir}/a.txt`]); run(`git add .`); - expect([...getFileHashes(dir).keys()]).toEqual([`${dir}/a.txt`]); + expect([...getFileHashes(dir).allFiles.keys()]).toEqual([`${dir}/a.txt`]); run(`git commit -am third`); - expect([...getFileHashes(dir).keys()]).toEqual([`${dir}/a.txt`]); + expect([...getFileHashes(dir).allFiles.keys()]).toEqual([`${dir}/a.txt`]); // should handle moves run(`mv a.txt newa.txt`); - expect([...getFileHashes(dir).keys()]).toEqual([`${dir}/newa.txt`]); + expect([...getFileHashes(dir).allFiles.keys()]).toEqual([ + `${dir}/newa.txt`, + ]); run(`git add .`); - expect([...getFileHashes(dir).keys()]).toEqual([`${dir}/newa.txt`]); + expect([...getFileHashes(dir).allFiles.keys()]).toEqual([ + `${dir}/newa.txt`, + ]); run(`echo AAAA > a.txt`); - expect([...getFileHashes(dir).keys()]).toEqual([ + expect([...getFileHashes(dir).allFiles.keys()]).toEqual([ `${dir}/a.txt`, `${dir}/newa.txt`, ]); run(`git add .`); - expect([...getFileHashes(dir).keys()]).toEqual([ + expect([...getFileHashes(dir).allFiles.keys()]).toEqual([ `${dir}/a.txt`, `${dir}/newa.txt`, ]); @@ -83,27 +87,29 @@ describe('git-hasher', () => { run(`git add .`); run(`git commit -am init`); run(`touch "x y z.txt"`); // unstaged - expect([...getFileHashes(dir).keys()]).toEqual([ + expect([...getFileHashes(dir).allFiles.keys()]).toEqual([ `${dir}/a b.txt`, `${dir}/x y z.txt`, ]); run(`git add .`); - expect([...getFileHashes(dir).keys()]).toEqual([ + expect([...getFileHashes(dir).allFiles.keys()]).toEqual([ `${dir}/a b.txt`, `${dir}/x y z.txt`, ]); run(`mv "a b.txt" "a b moved.txt"`); - expect([...getFileHashes(dir).keys()]).toEqual([ + expect([...getFileHashes(dir).allFiles.keys()]).toEqual([ `${dir}/x y z.txt`, `${dir}/a b moved.txt`, ]); run(`git add .`); - expect([...getFileHashes(dir).keys()]).toEqual([ + expect([...getFileHashes(dir).allFiles.keys()]).toEqual([ `${dir}/a b moved.txt`, `${dir}/x y z.txt`, ]); run(`rm "x y z.txt"`); - expect([...getFileHashes(dir).keys()]).toEqual([`${dir}/a b moved.txt`]); + expect([...getFileHashes(dir).allFiles.keys()]).toEqual([ + `${dir}/a b moved.txt`, + ]); }); it('should handle renames and modifications', () => { @@ -113,7 +119,9 @@ describe('git-hasher', () => { run(`mv a.txt moda.txt`); run(`git add .`); run(`echo modified >> moda.txt`); - expect([...getFileHashes(dir).keys()]).toEqual([`${dir}/moda.txt`]); + expect([...getFileHashes(dir).allFiles.keys()]).toEqual([ + `${dir}/moda.txt`, + ]); }); it('should handle special characters in filenames', () => { @@ -121,7 +129,7 @@ describe('git-hasher', () => { run(`echo BBB > "b-ū".txt`); run(`git add .`); run(`git commit -am init`); - expect([...getFileHashes(dir).keys()]).toEqual([ + expect([...getFileHashes(dir).allFiles.keys()]).toEqual([ `${dir}/a-ū.txt`, `${dir}/b-ū.txt`, ]); @@ -129,13 +137,13 @@ describe('git-hasher', () => { run(`mv a-ū.txt moda-ū.txt`); run(`git add .`); run(`echo modified >> moda-ū.txt`); - expect([...getFileHashes(dir).keys()]).toEqual([ + expect([...getFileHashes(dir).allFiles.keys()]).toEqual([ `${dir}/b-ū.txt`, `${dir}/moda-ū.txt`, ]); run(`rm "moda-ū.txt"`); - expect([...getFileHashes(dir).keys()]).toEqual([`${dir}/b-ū.txt`]); + expect([...getFileHashes(dir).allFiles.keys()]).toEqual([`${dir}/b-ū.txt`]); }); it('should work with sub-directories', () => { @@ -145,10 +153,12 @@ describe('git-hasher', () => { run(`echo BBB > sub/b.txt`); run(`git add --all`); run(`git commit -am init`); - expect([...getFileHashes(subDir).keys()]).toEqual([`${subDir}/b.txt`]); + expect([...getFileHashes(subDir).allFiles.keys()]).toEqual([ + `${subDir}/b.txt`, + ]); run(`echo CCC > sub/c.txt`); - expect([...getFileHashes(subDir).keys()]).toEqual([ + expect([...getFileHashes(subDir).allFiles.keys()]).toEqual([ `${subDir}/b.txt`, `${subDir}/c.txt`, ]); diff --git a/packages/workspace/src/core/hasher/git-hasher.ts b/packages/workspace/src/core/hasher/git-hasher.ts index 02372396de..ab0efb5001 100644 --- a/packages/workspace/src/core/hasher/git-hasher.ts +++ b/packages/workspace/src/core/hasher/git-hasher.ts @@ -135,21 +135,38 @@ function checkForDeletedFiles( return { filesToHash, deletedFiles }; } -export function getFileHashes(path: string): Map { - const res = new Map(); +/** + * getFileHashes() figures out both committed changes to the git tree as well as untracked + * and uncommitted file changes. + * + * For some utilities the origin of a file hash (i.e. was it committed or not) is unimportant, + * but for other tooling like the project graph daemon server it can leverage this distinction + * when figuring out what expensive work to skip during project graph construction. + * + * We therefore return both a Map of all filenames to their hashes, as well as a Map of just + * the uncommitted/untracked filenames to hashes. + */ +export function getFileHashes(path: string): { + allFiles: Map; + untrackedUncommittedFiles: Map; +} { + const allFiles = new Map(); try { const { deletedFiles, status } = gitStatus(path); const m1 = gitLsTree(path); m1.forEach((hash: string, filename: string) => { if (deletedFiles.indexOf(filename) === -1) { - res.set(`${path}/${filename}`, hash); + allFiles.set(`${path}/${filename}`, hash); } }); status.forEach((hash: string, filename: string) => { - res.set(`${path}/${filename}`, hash); + allFiles.set(`${path}/${filename}`, hash); }); - return res; + return { + allFiles, + untrackedUncommittedFiles: status, + }; } catch (e) { // this strategy is only used for speeding things up. // ignoring all the errors @@ -157,7 +174,10 @@ export function getFileHashes(path: string): Map { console.error(`Internal error:`); console.error(e); } - return new Map(); + return { + allFiles: new Map(), + untrackedUncommittedFiles: new Map(), + }; } } diff --git a/packages/workspace/src/core/project-graph/daemon/server.ts b/packages/workspace/src/core/project-graph/daemon/server.ts index 911726a90e..313b14b016 100644 --- a/packages/workspace/src/core/project-graph/daemon/server.ts +++ b/packages/workspace/src/core/project-graph/daemon/server.ts @@ -7,6 +7,7 @@ import { join, resolve } from 'path'; import { performance, PerformanceObserver } from 'perf_hooks'; import { defaultFileHasher } from '../../hasher/file-hasher'; import { gitRevParseHead } from '../../hasher/git-hasher'; +import { defaultHashing } from '../../hasher/hashing-impl'; import { createProjectGraph } from '../project-graph'; /** @@ -66,11 +67,28 @@ function formatLogMessage(message) { } /** - * We cache the latest known HEAD value on the server so that we can potentially skip - * some work initializing file hashes. If the HEAD value has not changed since we last - * initialized the hashes, then we can move straight on to hashing uncommitted changes. + * We cache the latest known HEAD value and an overall hash of the state of the untracked + * and uncommitted files so that we can potentially skip some initialization work. */ let cachedGitHead: string | undefined; +let cachedUntrackedUncommittedState: string | undefined; + +function hashAndCacheUntrackedUncommittedState( + untrackedAndUncommittedFileHashes: Map +): void { + const fileHashesMapAsFlatArray = [].concat( + ...Array.from(untrackedAndUncommittedFileHashes) + ); + cachedUntrackedUncommittedState = defaultHashing.hashArray( + fileHashesMapAsFlatArray + ); +} + +/** + * We cache the latest copy of the project graph itself in memory so that in the best case + * scenario we can skip all graph construction work entirely. + */ +let cachedProjectGraph: ProjectGraph | undefined; /** * For now we just invoke the existing `createProjectGraph()` utility and return the project @@ -91,19 +109,70 @@ const server = createServer((socket) => { serverLog('Connection Received'); const currentGitHead = gitRevParseHead(appRootPath); - if (currentGitHead === cachedGitHead) { - defaultFileHasher.incrementalUpdate(); - } else { - defaultFileHasher.init(); + + let projectGraph: ProjectGraph | undefined; + + /** + * Cached HEAD has changed, we must perform full file-hashing initialization work and + * recompute the project graph + */ + if (currentGitHead !== cachedGitHead) { + serverLog( + ` [SERVER STATE]: Cached HEAD does not match current (${currentGitHead}), performing full file hash init and recomputing project graph...` + ); + /** + * Update the cached values for the HEAD and untracked and uncommitted state which was computed + * as part of full init() + */ + const untrackedAndUncommittedFileHashes = defaultFileHasher.init(); + hashAndCacheUntrackedUncommittedState(untrackedAndUncommittedFileHashes); cachedGitHead = currentGitHead; + projectGraph = createProjectGraph(undefined, undefined, undefined, '4.0'); + } else { + /** + * We know at this point that the cached HEAD has not changed but we must still always use git + * to check for untracked and uncommitted changes (and we then create and cache a hash which + * represents their overall state). + * + * We cannot ever skip this particular git operation, but we can compare its result to our + * previously cached hash which represents the overall state for untracked and uncommitted changes + * and then potentially skip project graph creation altogether if it is unchanged and we have an + * existing cached graph. + */ + const previousUntrackedUncommittedState = cachedUntrackedUncommittedState; + const untrackedAndUncommittedFileHashes = + defaultFileHasher.incrementalUpdate(); + hashAndCacheUntrackedUncommittedState(untrackedAndUncommittedFileHashes); + + /** + * Skip project graph creation if the untracked and uncommitted state is unchanged and we have + * a cached version of the graph available in memory. + */ + if ( + previousUntrackedUncommittedState === cachedUntrackedUncommittedState && + cachedProjectGraph + ) { + serverLog( + ` [SERVER STATE]: State unchanged since last request, resolving in-memory cached project graph...` + ); + projectGraph = cachedProjectGraph; + } else { + serverLog( + ` [SERVER STATE]: Hashed untracked/uncommitted file state changed (now ${cachedUntrackedUncommittedState}), recomputing project graph...` + ); + projectGraph = createProjectGraph(undefined, undefined, undefined, '4.0'); + } } - const projectGraph = createProjectGraph( - undefined, - undefined, - undefined, - '4.0' - ); + /** + * Cache the latest version of the project graph in memory so that we can potentially skip a lot + * of expensive work on the next client request. + * + * For reference, on the very large test repo https://github.com/vsavkin/interstellar the project + * graph nxdeps.json file is about 24MB, so memory utilization should not be a huge concern. + */ + cachedProjectGraph = projectGraph; + performance.mark('project-graph-created'); performance.measure( 'createProjectGraph() total',