chore(core): daemon uses cached project graph whenever possible

2021-09-22 16:06:15 +04:00 · 2021-09-22 16:06:15 +04:00 · 98527acabc
commit 98527acabc
parent 84d64284f8
4 changed files with 159 additions and 46 deletions
--- a/packages/workspace/src/core/hasher/file-hasher.ts
+++ b/packages/workspace/src/core/hasher/file-hasher.ts
@ -20,18 +20,28 @@ export class FileHasher {
    this.usesGitForHashing = false;
  }

-  init(): void {
+  /**
+   * For the project graph daemon server use-case we can potentially skip expensive work
+   * by leveraging knowledge of the uncommitted and untracked files, so the init() method
+   * returns a Map containing this data.
+   */
+  init(): Map<string, string> {
    performance.mark('init hashing:start');
    this.clear();
-    this.getHashesFromGit();
+
+    const getFileHashesResult = getFileHashes(appRootPath);
+    this.applyFileHashes(getFileHashesResult.allFiles);
    this.usesGitForHashing = Object.keys(this.fileHashes).length > 0;
    this.isInitialized = true;
+
    performance.mark('init hashing:end');
    performance.measure(
      'init hashing',
      'init hashing:start',
      'init hashing:end'
    );
+
+    return getFileHashesResult.untrackedUncommittedFiles;
  }

  /**
@ -42,8 +52,10 @@ export class FileHasher {
   * For example, the daemon server can cache the last known commit SHA in
   * memory and avoid calling init() by using this method instead when that
   * SHA is unchanged.
+   *
+   * @returns The Map of filenames to hashes returned by getUntrackedAndUncommittedFileHashes()
   */
-  incrementalUpdate() {
+  incrementalUpdate(): Map<string, string> {
    performance.mark('incremental hashing:start');

    const untrackedAndUncommittedFileHashes =
@ -64,6 +76,8 @@ export class FileHasher {
      'incremental hashing:start',
      'incremental hashing:end'
    );
+
+    return untrackedAndUncommittedFileHashes;
  }

  hashFile(path: string): string {
@ -84,9 +98,9 @@ export class FileHasher {
    }
  }

-  private getHashesFromGit(): void {
+  private applyFileHashes(allFiles: Map<string, string>): void {
    const sliceIndex = appRootPath.length + 1;
-    getFileHashes(appRootPath).forEach((hash, filename) => {
+    allFiles.forEach((hash, filename) => {
      this.fileHashes[filename.substr(sliceIndex)] = hash;
      /**
       * we have to store it separately because fileHashes can be modified
--- a/packages/workspace/src/core/hasher/git-hasher.spec.ts
+++ b/packages/workspace/src/core/hasher/git-hasher.spec.ts
@ -25,54 +25,58 @@ describe('git-hasher', () => {
    run(`echo AAA > a.txt`);
    run(`git add .`);
    run(`git commit -am init`);
-    const hashes = getFileHashes(dir);
+    const hashes = getFileHashes(dir).allFiles;
    expect([...hashes.keys()]).toEqual([`${dir}/a.txt`]);
    expect(hashes.get(`${dir}/a.txt`)).toBeDefined();

    // should handle additions
    run(`echo BBB > b.txt`);
-    expect([...getFileHashes(dir).keys()]).toEqual([
+    expect([...getFileHashes(dir).allFiles.keys()]).toEqual([
      `${dir}/a.txt`,
      `${dir}/b.txt`,
    ]);

    run(`git add .`);
-    expect([...getFileHashes(dir).keys()]).toEqual([
+    expect([...getFileHashes(dir).allFiles.keys()]).toEqual([
      `${dir}/a.txt`,
      `${dir}/b.txt`,
    ]);

    run(`git commit  -am second`);
-    expect([...getFileHashes(dir).keys()]).toEqual([
+    expect([...getFileHashes(dir).allFiles.keys()]).toEqual([
      `${dir}/a.txt`,
      `${dir}/b.txt`,
    ]);

    // should handle removals
    run(`rm b.txt`);
-    expect([...getFileHashes(dir).keys()]).toEqual([`${dir}/a.txt`]);
+    expect([...getFileHashes(dir).allFiles.keys()]).toEqual([`${dir}/a.txt`]);

    run(`git add .`);
-    expect([...getFileHashes(dir).keys()]).toEqual([`${dir}/a.txt`]);
+    expect([...getFileHashes(dir).allFiles.keys()]).toEqual([`${dir}/a.txt`]);

    run(`git commit  -am third`);
-    expect([...getFileHashes(dir).keys()]).toEqual([`${dir}/a.txt`]);
+    expect([...getFileHashes(dir).allFiles.keys()]).toEqual([`${dir}/a.txt`]);

    // should handle moves
    run(`mv a.txt newa.txt`);
-    expect([...getFileHashes(dir).keys()]).toEqual([`${dir}/newa.txt`]);
+    expect([...getFileHashes(dir).allFiles.keys()]).toEqual([
+      `${dir}/newa.txt`,
+    ]);

    run(`git add .`);
-    expect([...getFileHashes(dir).keys()]).toEqual([`${dir}/newa.txt`]);
+    expect([...getFileHashes(dir).allFiles.keys()]).toEqual([
+      `${dir}/newa.txt`,
+    ]);

    run(`echo AAAA > a.txt`);
-    expect([...getFileHashes(dir).keys()]).toEqual([
+    expect([...getFileHashes(dir).allFiles.keys()]).toEqual([
      `${dir}/a.txt`,
      `${dir}/newa.txt`,
    ]);

    run(`git add .`);
-    expect([...getFileHashes(dir).keys()]).toEqual([
+    expect([...getFileHashes(dir).allFiles.keys()]).toEqual([
      `${dir}/a.txt`,
      `${dir}/newa.txt`,
    ]);
@ -83,27 +87,29 @@ describe('git-hasher', () => {
    run(`git add .`);
    run(`git commit -am init`);
    run(`touch "x y z.txt"`); // unstaged
-    expect([...getFileHashes(dir).keys()]).toEqual([
+    expect([...getFileHashes(dir).allFiles.keys()]).toEqual([
      `${dir}/a b.txt`,
      `${dir}/x y z.txt`,
    ]);
    run(`git add .`);
-    expect([...getFileHashes(dir).keys()]).toEqual([
+    expect([...getFileHashes(dir).allFiles.keys()]).toEqual([
      `${dir}/a b.txt`,
      `${dir}/x y z.txt`,
    ]);
    run(`mv "a b.txt" "a b moved.txt"`);
-    expect([...getFileHashes(dir).keys()]).toEqual([
+    expect([...getFileHashes(dir).allFiles.keys()]).toEqual([
      `${dir}/x y z.txt`,
      `${dir}/a b moved.txt`,
    ]);
    run(`git add .`);
-    expect([...getFileHashes(dir).keys()]).toEqual([
+    expect([...getFileHashes(dir).allFiles.keys()]).toEqual([
      `${dir}/a b moved.txt`,
      `${dir}/x y z.txt`,
    ]);
    run(`rm "x y z.txt"`);
-    expect([...getFileHashes(dir).keys()]).toEqual([`${dir}/a b moved.txt`]);
+    expect([...getFileHashes(dir).allFiles.keys()]).toEqual([
+      `${dir}/a b moved.txt`,
+    ]);
  });

  it('should handle renames and modifications', () => {
@ -113,7 +119,9 @@ describe('git-hasher', () => {
    run(`mv a.txt moda.txt`);
    run(`git add .`);
    run(`echo modified >> moda.txt`);
-    expect([...getFileHashes(dir).keys()]).toEqual([`${dir}/moda.txt`]);
+    expect([...getFileHashes(dir).allFiles.keys()]).toEqual([
+      `${dir}/moda.txt`,
+    ]);
  });

  it('should handle special characters in filenames', () => {
@ -121,7 +129,7 @@ describe('git-hasher', () => {
    run(`echo BBB > "b-ū".txt`);
    run(`git add .`);
    run(`git commit -am init`);
-    expect([...getFileHashes(dir).keys()]).toEqual([
+    expect([...getFileHashes(dir).allFiles.keys()]).toEqual([
      `${dir}/a-ū.txt`,
      `${dir}/b-ū.txt`,
    ]);
@ -129,13 +137,13 @@ describe('git-hasher', () => {
    run(`mv a-ū.txt moda-ū.txt`);
    run(`git add .`);
    run(`echo modified >> moda-ū.txt`);
-    expect([...getFileHashes(dir).keys()]).toEqual([
+    expect([...getFileHashes(dir).allFiles.keys()]).toEqual([
      `${dir}/b-ū.txt`,
      `${dir}/moda-ū.txt`,
    ]);

    run(`rm "moda-ū.txt"`);
-    expect([...getFileHashes(dir).keys()]).toEqual([`${dir}/b-ū.txt`]);
+    expect([...getFileHashes(dir).allFiles.keys()]).toEqual([`${dir}/b-ū.txt`]);
  });

  it('should work with sub-directories', () => {
@ -145,10 +153,12 @@ describe('git-hasher', () => {
    run(`echo BBB > sub/b.txt`);
    run(`git add --all`);
    run(`git commit -am init`);
-    expect([...getFileHashes(subDir).keys()]).toEqual([`${subDir}/b.txt`]);
+    expect([...getFileHashes(subDir).allFiles.keys()]).toEqual([
+      `${subDir}/b.txt`,
+    ]);

    run(`echo CCC > sub/c.txt`);
-    expect([...getFileHashes(subDir).keys()]).toEqual([
+    expect([...getFileHashes(subDir).allFiles.keys()]).toEqual([
      `${subDir}/b.txt`,
      `${subDir}/c.txt`,
    ]);
--- a/packages/workspace/src/core/hasher/git-hasher.ts
+++ b/packages/workspace/src/core/hasher/git-hasher.ts
@ -135,21 +135,38 @@ function checkForDeletedFiles(
  return { filesToHash, deletedFiles };
 }

-export function getFileHashes(path: string): Map<string, string> {
-  const res = new Map<string, string>();
+/**
+ * getFileHashes() figures out both committed changes to the git tree as well as untracked
+ * and uncommitted file changes.
+ *
+ * For some utilities the origin of a file hash (i.e. was it committed or not) is unimportant,
+ * but for other tooling like the project graph daemon server it can leverage this distinction
+ * when figuring out what expensive work to skip during project graph construction.
+ *
+ * We therefore return both a Map of all filenames to their hashes, as well as a Map of just
+ * the uncommitted/untracked filenames to hashes.
+ */
+export function getFileHashes(path: string): {
+  allFiles: Map<string, string>;
+  untrackedUncommittedFiles: Map<string, string>;
+} {
+  const allFiles = new Map<string, string>();

  try {
    const { deletedFiles, status } = gitStatus(path);
    const m1 = gitLsTree(path);
    m1.forEach((hash: string, filename: string) => {
      if (deletedFiles.indexOf(filename) === -1) {
-        res.set(`${path}/${filename}`, hash);
+        allFiles.set(`${path}/${filename}`, hash);
      }
    });
    status.forEach((hash: string, filename: string) => {
-      res.set(`${path}/${filename}`, hash);
+      allFiles.set(`${path}/${filename}`, hash);
    });
-    return res;
+    return {
+      allFiles,
+      untrackedUncommittedFiles: status,
+    };
  } catch (e) {
    // this strategy is only used for speeding things up.
    // ignoring all the errors
@ -157,7 +174,10 @@ export function getFileHashes(path: string): Map<string, string> {
      console.error(`Internal error:`);
      console.error(e);
    }
-    return new Map<string, string>();
+    return {
+      allFiles: new Map<string, string>(),
+      untrackedUncommittedFiles: new Map<string, string>(),
+    };
  }
 }

--- a/packages/workspace/src/core/project-graph/daemon/server.ts
+++ b/packages/workspace/src/core/project-graph/daemon/server.ts
@ -7,6 +7,7 @@ import { join, resolve } from 'path';
 import { performance, PerformanceObserver } from 'perf_hooks';
 import { defaultFileHasher } from '../../hasher/file-hasher';
 import { gitRevParseHead } from '../../hasher/git-hasher';
+import { defaultHashing } from '../../hasher/hashing-impl';
 import { createProjectGraph } from '../project-graph';

 /**
@ -66,11 +67,28 @@ function formatLogMessage(message) {
 }

 /**
- * We cache the latest known HEAD value on the server so that we can potentially skip
- * some work initializing file hashes. If the HEAD value has not changed since we last
- * initialized the hashes, then we can move straight on to hashing uncommitted changes.
+ * We cache the latest known HEAD value and an overall hash of the state of the untracked
+ * and uncommitted files so that we can potentially skip some initialization work.
 */
 let cachedGitHead: string | undefined;
+let cachedUntrackedUncommittedState: string | undefined;
+
+function hashAndCacheUntrackedUncommittedState(
+  untrackedAndUncommittedFileHashes: Map<string, string>
+): void {
+  const fileHashesMapAsFlatArray = [].concat(
+    ...Array.from(untrackedAndUncommittedFileHashes)
+  );
+  cachedUntrackedUncommittedState = defaultHashing.hashArray(
+    fileHashesMapAsFlatArray
+  );
+}
+
+/**
+ * We cache the latest copy of the project graph itself in memory so that in the best case
+ * scenario we can skip all graph construction work entirely.
+ */
+let cachedProjectGraph: ProjectGraph | undefined;

 /**
 * For now we just invoke the existing `createProjectGraph()` utility and return the project
@ -91,19 +109,70 @@ const server = createServer((socket) => {
  serverLog('Connection Received');

  const currentGitHead = gitRevParseHead(appRootPath);
-  if (currentGitHead === cachedGitHead) {
-    defaultFileHasher.incrementalUpdate();
-  } else {
-    defaultFileHasher.init();
+
+  let projectGraph: ProjectGraph | undefined;
+
+  /**
+   * Cached HEAD has changed, we must perform full file-hashing initialization work and
+   * recompute the project graph
+   */
+  if (currentGitHead !== cachedGitHead) {
+    serverLog(
+      ` [SERVER STATE]: Cached HEAD does not match current (${currentGitHead}), performing full file hash init and recomputing project graph...`
+    );
+    /**
+     * Update the cached values for the HEAD and untracked and uncommitted state which was computed
+     * as part of full init()
+     */
+    const untrackedAndUncommittedFileHashes = defaultFileHasher.init();
+    hashAndCacheUntrackedUncommittedState(untrackedAndUncommittedFileHashes);
    cachedGitHead = currentGitHead;
+    projectGraph = createProjectGraph(undefined, undefined, undefined, '4.0');
+  } else {
+    /**
+     * We know at this point that the cached HEAD has not changed but we must still always use git
+     * to check for untracked and uncommitted changes (and we then create and cache a hash which
+     * represents their overall state).
+     *
+     * We cannot ever skip this particular git operation, but we can compare its result to our
+     * previously cached hash which represents the overall state for untracked and uncommitted changes
+     * and then potentially skip project graph creation altogether if it is unchanged and we have an
+     * existing cached graph.
+     */
+    const previousUntrackedUncommittedState = cachedUntrackedUncommittedState;
+    const untrackedAndUncommittedFileHashes =
+      defaultFileHasher.incrementalUpdate();
+    hashAndCacheUntrackedUncommittedState(untrackedAndUncommittedFileHashes);
+
+    /**
+     * Skip project graph creation if the untracked and uncommitted state is unchanged and we have
+     * a cached version of the graph available in memory.
+     */
+    if (
+      previousUntrackedUncommittedState === cachedUntrackedUncommittedState &&
+      cachedProjectGraph
+    ) {
+      serverLog(
+        ` [SERVER STATE]: State unchanged since last request, resolving in-memory cached project graph...`
+      );
+      projectGraph = cachedProjectGraph;
+    } else {
+      serverLog(
+        ` [SERVER STATE]: Hashed untracked/uncommitted file state changed (now ${cachedUntrackedUncommittedState}), recomputing project graph...`
+      );
+      projectGraph = createProjectGraph(undefined, undefined, undefined, '4.0');
+    }
  }

-  const projectGraph = createProjectGraph(
-    undefined,
-    undefined,
-    undefined,
-    '4.0'
-  );
+  /**
+   * Cache the latest version of the project graph in memory so that we can potentially skip a lot
+   * of expensive work on the next client request.
+   *
+   * For reference, on the very large test repo https://github.com/vsavkin/interstellar the project
+   * graph nxdeps.json file is about 24MB, so memory utilization should not be a huge concern.
+   */
+  cachedProjectGraph = projectGraph;
+
  performance.mark('project-graph-created');
  performance.measure(
    'createProjectGraph() total',