fix(core): improve file gathering performance (#20377)

This commit is contained in:
Jonathan Cammisuli 2023-11-27 09:38:27 -05:00 committed by GitHub
parent ff5d1bef83
commit cc8dbef25e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 103 additions and 106 deletions

42
Cargo.lock generated
View File

@ -449,9 +449,9 @@ dependencies = [
[[package]] [[package]]
name = "futures" name = "futures"
version = "0.3.28" version = "0.3.29"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" checksum = "da0290714b38af9b4a7b094b8a37086d1b4e61f2df9122c3cad2577669145335"
dependencies = [ dependencies = [
"futures-channel", "futures-channel",
"futures-core", "futures-core",
@ -464,9 +464,9 @@ dependencies = [
[[package]] [[package]]
name = "futures-channel" name = "futures-channel"
version = "0.3.28" version = "0.3.29"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" checksum = "ff4dd66668b557604244583e3e1e1eada8c5c2e96a6d0d6653ede395b78bbacb"
dependencies = [ dependencies = [
"futures-core", "futures-core",
"futures-sink", "futures-sink",
@ -474,15 +474,15 @@ dependencies = [
[[package]] [[package]]
name = "futures-core" name = "futures-core"
version = "0.3.28" version = "0.3.29"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" checksum = "eb1d22c66e66d9d72e1758f0bd7d4fd0bee04cad842ee34587d68c07e45d088c"
[[package]] [[package]]
name = "futures-executor" name = "futures-executor"
version = "0.3.28" version = "0.3.29"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" checksum = "0f4fb8693db0cf099eadcca0efe2a5a22e4550f98ed16aba6c48700da29597bc"
dependencies = [ dependencies = [
"futures-core", "futures-core",
"futures-task", "futures-task",
@ -491,15 +491,15 @@ dependencies = [
[[package]] [[package]]
name = "futures-io" name = "futures-io"
version = "0.3.28" version = "0.3.29"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" checksum = "8bf34a163b5c4c52d0478a4d757da8fb65cabef42ba90515efee0f6f9fa45aaa"
[[package]] [[package]]
name = "futures-macro" name = "futures-macro"
version = "0.3.28" version = "0.3.29"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" checksum = "53b153fd91e4b0147f4aced87be237c98248656bb01050b96bf3ee89220a8ddb"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -508,21 +508,21 @@ dependencies = [
[[package]] [[package]]
name = "futures-sink" name = "futures-sink"
version = "0.3.28" version = "0.3.29"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" checksum = "e36d3378ee38c2a36ad710c5d30c2911d752cb941c00c72dbabfb786a7970817"
[[package]] [[package]]
name = "futures-task" name = "futures-task"
version = "0.3.28" version = "0.3.29"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" checksum = "efd193069b0ddadc69c46389b740bbccdd97203899b48d09c5f7969591d6bae2"
[[package]] [[package]]
name = "futures-util" name = "futures-util"
version = "0.3.28" version = "0.3.29"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" checksum = "a19526d624e703a3179b3d322efec918b6246ea0fa51d41124525f00f1cc8104"
dependencies = [ dependencies = [
"futures-channel", "futures-channel",
"futures-core", "futures-core",
@ -1378,8 +1378,6 @@ dependencies = [
"parking_lot", "parking_lot",
"rayon", "rayon",
"regex", "regex",
"serde",
"serde_json",
"swc_common", "swc_common",
"swc_ecma_ast", "swc_ecma_ast",
"swc_ecma_dep_graph", "swc_ecma_dep_graph",
@ -1560,9 +1558,9 @@ dependencies = [
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.56" version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da"
dependencies = [ dependencies = [
"unicode-ident", "unicode-ident",
] ]

View File

@ -1,8 +1,8 @@
[workspace] [workspace]
resolver = "2" resolver = '2'
members = [ members = [
'packages/nx' 'packages/nx',
] ]
[profile.release] [profile.release]

View File

@ -7,7 +7,7 @@ edition = '2021'
anyhow = "1.0.71" anyhow = "1.0.71"
colored = "2" colored = "2"
crossbeam-channel = '0.5' crossbeam-channel = '0.5'
dashmap = { version = "5.5.3", features= ["rayon"] } dashmap = { version = "5.5.3", features = ["rayon"] }
fs_extra = "1.3.0" fs_extra = "1.3.0"
globset = "0.4.10" globset = "0.4.10"
hashbrown = { version = "0.14.0", features = ["rayon"] } hashbrown = { version = "0.14.0", features = ["rayon"] }
@ -25,8 +25,6 @@ napi-derive = '2.9.3'
nom = '7.1.3' nom = '7.1.3'
regex = "1.9.1" regex = "1.9.1"
rayon = "1.7.0" rayon = "1.7.0"
serde = "1"
serde_json = "1"
thiserror = "1.0.40" thiserror = "1.0.40"
tokio = { version = "1.28.2", features = ["fs"] } tokio = { version = "1.28.2", features = ["fs"] }
tracing = "0.1.37" tracing = "0.1.37"

View File

@ -1,6 +1,7 @@
use crate::native::utils::Normalize; use std::fs::File;
use crate::native::walker::nx_walker; use std::io::{BufRead, BufReader};
use std::collections::HashMap; use std::path::Path;
use tracing::trace;
use xxhash_rust::xxh3; use xxhash_rust::xxh3;
pub fn hash(content: &[u8]) -> String { pub fn hash(content: &[u8]) -> String {
@ -16,25 +17,24 @@ pub fn hash_array(input: Vec<String>) -> String {
#[napi] #[napi]
pub fn hash_file(file: String) -> Option<String> { pub fn hash_file(file: String) -> Option<String> {
let Ok(content) = std::fs::read(file) else { hash_file_path(file)
}
#[inline]
pub fn hash_file_path<P: AsRef<Path>>(path: P) -> Option<String> {
let path = path.as_ref();
let Ok(file) = File::open(path) else {
trace!("could not open file: {path:?}");
return None; return None;
}; };
Some(hash(&content)) let mut buffer = BufReader::new(file);
} let Ok(content) = buffer.fill_buf() else {
trace!("could not read file: {path:?}");
return None;
};
#[napi] Some(hash(content))
pub fn hash_files(workspace_root: String) -> HashMap<String, String> {
nx_walker(workspace_root, |rec| {
let mut collection: HashMap<String, String> = HashMap::new();
for (path, content) in rec {
collection.insert(
path.to_normalized_string(),
xxh3::xxh3_64(&content).to_string(),
);
}
collection
})
} }
#[cfg(test)] #[cfg(test)]

View File

@ -23,7 +23,6 @@ export function remove(src: string): void
export function copy(src: string, dest: string): void export function copy(src: string, dest: string): void
export function hashArray(input: Array<string>): string export function hashArray(input: Array<string>): string
export function hashFile(file: string): string | null export function hashFile(file: string): string | null
export function hashFiles(workspaceRoot: string): Record<string, string>
export function findImports(projectFileMap: Record<string, Array<string>>): Array<ImportResult> export function findImports(projectFileMap: Record<string, Array<string>>): Array<ImportResult>
/** /**
* Transfer the project graph from the JS world to the Rust world, so that we can pass the project graph via memory quicker * Transfer the project graph from the JS world to the Rust world, so that we can pass the project graph via memory quicker

View File

@ -246,7 +246,7 @@ if (!nativeBinding) {
throw new Error(`Failed to load native binding`) throw new Error(`Failed to load native binding`)
} }
const { expandOutputs, getFilesForOutputs, remove, copy, hashArray, hashFile, hashFiles, ImportResult, findImports, transferProjectGraph, HashPlanner, TaskHasher, EventType, Watcher, WorkspaceContext, WorkspaceErrors } = nativeBinding const { expandOutputs, getFilesForOutputs, remove, copy, hashArray, hashFile, ImportResult, findImports, transferProjectGraph, HashPlanner, TaskHasher, EventType, Watcher, WorkspaceContext, WorkspaceErrors } = nativeBinding
module.exports.expandOutputs = expandOutputs module.exports.expandOutputs = expandOutputs
module.exports.getFilesForOutputs = getFilesForOutputs module.exports.getFilesForOutputs = getFilesForOutputs
@ -254,7 +254,6 @@ module.exports.remove = remove
module.exports.copy = copy module.exports.copy = copy
module.exports.hashArray = hashArray module.exports.hashArray = hashArray
module.exports.hashFile = hashFile module.exports.hashFile = hashFile
module.exports.hashFiles = hashFiles
module.exports.ImportResult = ImportResult module.exports.ImportResult = ImportResult
module.exports.findImports = findImports module.exports.findImports = findImports
module.exports.transferProjectGraph = transferProjectGraph module.exports.transferProjectGraph = transferProjectGraph

View File

@ -1361,16 +1361,11 @@ import('./dynamic-import.vue')
ancestors.next(); ancestors.next();
let root = PathBuf::from(ancestors.next().unwrap()); let root = PathBuf::from(ancestors.next().unwrap());
let files = nx_walker(root.clone(), move |receiver| {
let mut files = vec![];
let glob = build_glob_set(&["**/*.[jt]s"]).unwrap(); let glob = build_glob_set(&["**/*.[jt]s"]).unwrap();
for (path, _) in receiver { let files = nx_walker(root.clone())
if glob.is_match(&path) { .filter(|(full_path, _)| glob.is_match(full_path))
files.push(root.join(path).to_normalized_string()); .map(|(full_path, _)| full_path.to_normalized_string())
} .collect::<Vec<_>>();
}
files
});
let results: HashMap<_, _> = let results: HashMap<_, _> =
find_imports(HashMap::from([(String::from("nx"), files.clone())])) find_imports(HashMap::from([(String::from("nx"), files.clone())]))

View File

@ -2,8 +2,9 @@ use std::path::{Path, PathBuf};
use std::thread; use std::thread;
use std::thread::available_parallelism; use std::thread::available_parallelism;
use crossbeam_channel::{unbounded, Receiver}; use crossbeam_channel::unbounded;
use ignore::WalkBuilder; use ignore::WalkBuilder;
use tracing::trace;
use crate::native::glob::build_glob_set; use crate::native::glob::build_glob_set;
@ -35,11 +36,9 @@ where
} }
/// Walk the directory and ignore files from .gitignore and .nxignore /// Walk the directory and ignore files from .gitignore and .nxignore
pub fn nx_walker<P, Fn, Re>(directory: P, f: Fn) -> Re pub fn nx_walker<P>(directory: P) -> impl Iterator<Item = (PathBuf, PathBuf)>
where where
P: AsRef<Path>, P: AsRef<Path>,
Fn: FnOnce(Receiver<(PathBuf, Vec<u8>)>) -> Re + Send + 'static,
Re: Send + 'static,
{ {
let directory = directory.as_ref(); let directory = directory.as_ref();
let nx_ignore = directory.join(".nxignore"); let nx_ignore = directory.join(".nxignore");
@ -59,10 +58,11 @@ where
let cpus = available_parallelism().map_or(2, |n| n.get()) - 1; let cpus = available_parallelism().map_or(2, |n| n.get()) - 1;
let (sender, receiver) = unbounded::<(PathBuf, Vec<u8>)>(); let (sender, receiver) = unbounded();
let receiver_thread = thread::spawn(|| f(receiver)); trace!(?directory, "walking");
let now = std::time::Instant::now();
walker.threads(cpus).build_parallel().run(|| { walker.threads(cpus).build_parallel().run(|| {
let tx = sender.clone(); let tx = sender.clone();
Box::new(move |entry| { Box::new(move |entry| {
@ -72,27 +72,29 @@ where
return Continue; return Continue;
}; };
let Ok(content) = std::fs::read(dir_entry.path()) else { if dir_entry.file_type().is_some_and(|d| d.is_dir()) {
return Continue; return Continue;
}; }
let Ok(file_path) = dir_entry.path().strip_prefix(directory) else { let Ok(file_path) = dir_entry.path().strip_prefix(directory) else {
return Continue; return Continue;
}; };
tx.send((file_path.into(), content)).ok(); tx.send((dir_entry.path().to_owned(), file_path.to_owned()))
.ok();
Continue Continue
}) })
}); });
trace!("walked in {:?}", now.elapsed());
let receiver_thread = thread::spawn(move || receiver.into_iter());
drop(sender); drop(sender);
receiver_thread.join().unwrap() receiver_thread.join().unwrap()
} }
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use std::collections::HashMap;
use std::{assert_eq, vec}; use std::{assert_eq, vec};
use assert_fs::prelude::*; use assert_fs::prelude::*;
@ -124,32 +126,21 @@ mod test {
#[test] #[test]
fn it_walks_a_directory() { fn it_walks_a_directory() {
// handle empty workspaces // handle empty workspaces
let content = nx_walker("/does/not/exist", |rec| { let content = nx_walker("/does/not/exist").collect::<Vec<_>>();
let mut paths = vec![];
for (path, _) in rec {
paths.push(path);
}
paths
});
assert!(content.is_empty()); assert!(content.is_empty());
let temp_dir = setup_fs(); let temp_dir = setup_fs();
let content = nx_walker(temp_dir, |rec| { let mut content = nx_walker(&temp_dir).collect::<Vec<_>>();
let mut paths = HashMap::new(); content.sort();
for (path, content) in rec {
paths.insert(path, content);
}
paths
});
assert_eq!( assert_eq!(
content, content,
HashMap::from([ vec![
(PathBuf::from("baz/qux.txt"), "content@qux".into()), (temp_dir.join("bar.txt"), PathBuf::from("bar.txt")),
(PathBuf::from("foo.txt"), "content1".into()), (temp_dir.join("baz/qux.txt"), PathBuf::from("baz/qux.txt")),
(PathBuf::from("test.txt"), "content".into()), (temp_dir.join("foo.txt"), PathBuf::from("foo.txt")),
(PathBuf::from("bar.txt"), "content2".into()), (temp_dir.join("test.txt"), PathBuf::from("test.txt")),
]) ]
); );
} }
@ -180,13 +171,10 @@ nested/child-two/
) )
.unwrap(); .unwrap();
let mut file_names = nx_walker(temp_dir, |rec| { let mut file_names = nx_walker(temp_dir)
let mut file_names = vec![]; .into_iter()
for (path, _) in rec { .map(|(_, p)| p.to_normalized_string())
file_names.push(path.to_normalized_string()); .collect::<Vec<_>>();
}
file_names
});
file_names.sort(); file_names.sort();

View File

@ -1,14 +1,15 @@
use napi::bindgen_prelude::External; use napi::bindgen_prelude::External;
use std::collections::HashMap; use std::collections::HashMap;
use crate::native::hasher::hash; use crate::native::hasher::{hash, hash_file_path};
use crate::native::utils::Normalize; use crate::native::utils::Normalize;
use napi::bindgen_prelude::*; use napi::bindgen_prelude::*;
use rayon::prelude::*; use rayon::prelude::*;
use std::ops::Deref; use std::ops::Deref;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::sync::Arc; use std::sync::Arc;
use std::thread; use std::thread::available_parallelism;
use std::{cmp, thread};
use crate::native::logger::enable_logger; use crate::native::logger::enable_logger;
use crate::native::project_graph::utils::{find_project_for_path, ProjectRootMappings}; use crate::native::project_graph::utils::{find_project_for_path, ProjectRootMappings};
@ -30,6 +31,7 @@ pub struct WorkspaceContext {
} }
type Files = Vec<(PathBuf, String)>; type Files = Vec<(PathBuf, String)>;
struct FilesWorker(Option<Arc<(Mutex<Files>, Condvar)>>); struct FilesWorker(Option<Arc<(Mutex<Files>, Condvar)>>);
impl FilesWorker { impl FilesWorker {
fn gather_files(workspace_root: &Path) -> Self { fn gather_files(workspace_root: &Path) -> Self {
@ -49,16 +51,34 @@ impl FilesWorker {
trace!("locking files"); trace!("locking files");
let (lock, cvar) = &*files_lock_clone; let (lock, cvar) = &*files_lock_clone;
let mut workspace_files = lock.lock(); let mut workspace_files = lock.lock();
let files = nx_walker(workspace_root, |rec| {
let mut file_hashes: Vec<(PathBuf, String)> = vec![];
for (path, content) in rec {
file_hashes.push((path, hash(&content)));
}
file_hashes
});
workspace_files.extend(files); let files = nx_walker(workspace_root).collect::<Vec<_>>();
workspace_files.par_sort(); let num_parallelism = cmp::max(available_parallelism().map_or(2, |n| n.get()) / 3, 2);
let chunks = files.len() / num_parallelism;
let now = std::time::Instant::now();
let mut files = if chunks < num_parallelism {
files
.iter()
.filter_map(|(full_path, path)| {
hash_file_path(full_path).map(|hash| (path.to_owned(), hash))
})
.collect::<Vec<_>>()
} else {
files
.par_chunks(chunks)
.flat_map_iter(|chunks| {
chunks.iter().filter_map(|(full_path, path)| {
hash_file_path(full_path).map(|hash| (path.to_owned(), hash))
})
})
.collect::<Vec<_>>()
};
files.par_sort();
trace!("hashed and sorted workspace files in {:?}", now.elapsed());
*workspace_files = files;
let files_len = workspace_files.len(); let files_len = workspace_files.len();
trace!(?files_len, "files retrieved"); trace!(?files_len, "files retrieved");