fix(core): dedupe project files in rust (#17618)

This commit is contained in:
Jason Jean 2023-06-15 16:36:33 -04:00 committed by GitHub
parent a546d5f737
commit 00d07b1109
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 197 additions and 54 deletions

View File

@ -3,6 +3,7 @@
use crate::native::parallel_walker::nx_walker;
use crate::native::types::FileData;
use crate::native::utils::glob::build_glob_set;
use crate::native::utils::path::Normalize;
use anyhow::anyhow;
use crossbeam_channel::unbounded;
use globset::{Glob, GlobSetBuilder};
@ -39,7 +40,10 @@ fn hash_files(workspace_root: String) -> HashMap<String, String> {
nx_walker(workspace_root, |rec| {
let mut collection: HashMap<String, String> = HashMap::new();
for (path, content) in rec {
collection.insert(path, xxh3::xxh3_64(&content).to_string());
collection.insert(
path.to_normalized_string(),
xxh3::xxh3_64(&content).to_string(),
);
}
collection
})
@ -57,7 +61,7 @@ fn hash_files_matching_globs(
for (path, content) in receiver {
if glob_set.is_match(&path) {
collection.push(FileData {
file: path,
file: path.to_normalized_string(),
hash: xxh3::xxh3_64(&content).to_string(),
});
}

View File

@ -1,4 +1,4 @@
use std::path::Path;
use std::path::{Path, PathBuf};
use std::thread;
use std::thread::available_parallelism;
@ -8,7 +8,7 @@ use ignore::WalkBuilder;
pub fn nx_walker<P, Fn, Re>(directory: P, f: Fn) -> Re
where
P: AsRef<Path>,
Fn: FnOnce(Receiver<(String, Vec<u8>)>) -> Re + Send + 'static,
Fn: FnOnce(Receiver<(PathBuf, Vec<u8>)>) -> Re + Send + 'static,
Re: Send + 'static,
{
let directory = directory.as_ref();
@ -27,7 +27,7 @@ where
let cpus = available_parallelism().map_or(2, |n| n.get()) - 1;
let (sender, receiver) = unbounded::<(String, Vec<u8>)>();
let (sender, receiver) = unbounded::<(PathBuf, Vec<u8>)>();
let receiver_thread = thread::spawn(|| f(receiver));
@ -49,15 +49,7 @@ where
return Continue;
};
let Some(file_path) = file_path.to_str() else {
return Continue;
};
// convert back-slashes in Windows paths, since the js expects only forward-slash path separators
#[cfg(target_os = "windows")]
let file_path = file_path.replace('\\', "/");
tx.send((file_path.to_string(), content)).ok();
tx.send((file_path.into(), content)).ok();
Continue
})
@ -70,6 +62,7 @@ where
#[cfg(test)]
mod test {
use super::*;
use crate::native::utils::path::Normalize;
use assert_fs::prelude::*;
use assert_fs::TempDir;
use std::collections::HashMap;
@ -117,10 +110,10 @@ mod test {
assert_eq!(
content,
HashMap::from([
("baz/qux.txt".into(), "content@qux".into()),
("foo.txt".into(), "content1".into()),
("test.txt".into(), "content".into()),
("bar.txt".into(), "content2".into()),
(PathBuf::from("baz/qux.txt"), "content@qux".into()),
(PathBuf::from("foo.txt"), "content1".into()),
(PathBuf::from("test.txt"), "content".into()),
(PathBuf::from("bar.txt"), "content2".into()),
])
);
}
@ -155,7 +148,7 @@ nested/child-two/
let mut file_names = nx_walker(temp_dir, |rec| {
let mut file_names = vec![];
for (path, _) in rec {
file_names.push(path);
file_names.push(path.to_normalized_string());
}
file_names
});

View File

@ -1,4 +1,8 @@
import { getWorkspaceFilesNative, WorkspaceErrors } from '../index';
import {
getConfigFiles,
getWorkspaceFilesNative,
WorkspaceErrors,
} from '../index';
import { TempFs } from '../../utils/testing/temp-fs';
import { NxJsonConfiguration } from '../../config/nx-json';
@ -124,6 +128,48 @@ describe('workspace files', () => {
]
`);
});
it('should dedupe configuration files', async () => {
const fs = new TempFs('workspace-files');
const nxJson: NxJsonConfiguration = {};
await fs.createFiles({
'./nx.json': JSON.stringify(nxJson),
'./package.json': JSON.stringify({
name: 'repo-name',
version: '0.0.0',
dependencies: {},
}),
'./project.json': JSON.stringify({
name: 'repo-name',
}),
'./libs/project1/project.json': JSON.stringify({
name: 'project1',
}),
'./libs/project1/package.json': JSON.stringify({
name: 'project1',
}),
'./libs/project1/index.js': '',
});
let globs = ['project.json', '**/project.json', '**/package.json'];
let { configFiles } = getWorkspaceFilesNative(fs.tempDir, globs);
configFiles = configFiles.sort();
expect(configFiles).toMatchInlineSnapshot(`
[
"libs/project1/project.json",
"project.json",
]
`);
let configFiles2 = getConfigFiles(fs.tempDir, globs).sort();
expect(configFiles2).toMatchInlineSnapshot(`
[
"libs/project1/project.json",
"project.json",
]
`);
});
describe('errors', () => {
it('it should infer names of configuration files without a name', async () => {

View File

@ -1 +1,2 @@
pub mod glob;
pub mod path;

View File

@ -0,0 +1,14 @@
pub trait Normalize {
fn to_normalized_string(&self) -> String;
}
impl Normalize for std::path::Path {
fn to_normalized_string(&self) -> String {
// convert back-slashes in Windows paths, since the js expects only forward-slash path separators
if cfg!(windows) {
self.display().to_string().replace('\\', "/")
} else {
self.display().to_string()
}
}
}

View File

@ -1,17 +1,103 @@
use crate::native::parallel_walker::nx_walker;
use crate::native::utils::glob::build_glob_set;
use crate::native::utils::path::Normalize;
use globset::GlobSet;
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::path::{Path, PathBuf};
#[napi]
/// Get workspace config files based on provided globs
pub fn get_config_files(workspace_root: String, globs: Vec<String>) -> anyhow::Result<Vec<String>> {
let globs = build_glob_set(globs)?;
Ok(nx_walker(workspace_root, move |rec| {
let mut config_paths: Vec<String> = vec![];
for (path, _) in rec {
if globs.is_match(&path) {
config_paths.push(path.to_owned());
}
let mut config_paths: HashMap<PathBuf, (PathBuf, Vec<u8>)> = HashMap::new();
for (path, content) in rec {
insert_config_file_into_map((path, content), &mut config_paths, &globs);
}
config_paths
.into_iter()
.map(|(_, (val, _))| val.to_normalized_string())
.collect()
}))
}
pub fn insert_config_file_into_map(
(path, content): (PathBuf, Vec<u8>),
config_paths: &mut HashMap<PathBuf, (PathBuf, Vec<u8>)>,
globs: &GlobSet,
) {
if globs.is_match(&path) {
let parent = path.parent().unwrap_or_else(|| Path::new("")).to_path_buf();
let file_name = path
.file_name()
.expect("Config paths always have file names");
if file_name == "project.json" {
config_paths.insert(parent, (path, content));
} else if file_name == "package.json" {
match config_paths.entry(parent) {
Entry::Occupied(mut o) => {
if o.get()
.0
.file_name()
.expect("Config paths always have file names")
!= "project.json"
{
o.insert((path, content));
}
}
Entry::Vacant(v) => {
v.insert((path, content));
}
}
} else {
config_paths.entry(parent).or_insert((path, content));
}
}
}
#[cfg(test)]
mod test {
use super::*;
use std::collections::HashMap;
use std::path::PathBuf;
#[test]
fn should_insert_config_files_properly() {
let mut config_paths: HashMap<PathBuf, (PathBuf, Vec<u8>)> = HashMap::new();
let globs = build_glob_set(vec!["**/*".into()]).unwrap();
insert_config_file_into_map(
(PathBuf::from("project.json"), vec![]),
&mut config_paths,
&globs,
);
insert_config_file_into_map(
(PathBuf::from("package.json"), vec![]),
&mut config_paths,
&globs,
);
insert_config_file_into_map(
(PathBuf::from("lib1/project.json"), vec![]),
&mut config_paths,
&globs,
);
insert_config_file_into_map(
(PathBuf::from("lib2/package.json"), vec![]),
&mut config_paths,
&globs,
);
let config_files: Vec<PathBuf> = config_paths
.into_iter()
.map(|(_, (path, _))| path)
.collect();
assert!(config_files.contains(&PathBuf::from("project.json")));
assert!(config_files.contains(&PathBuf::from("lib1/project.json")));
assert!(config_files.contains(&PathBuf::from("lib2/package.json")));
assert!(!config_files.contains(&PathBuf::from("package.json")));
}
}

View File

@ -1,6 +1,5 @@
use jsonc_parser::ParseOptions;
use std::collections::HashMap;
use std::ffi::OsStr;
use std::path::{Path, PathBuf};
use rayon::prelude::*;
@ -11,7 +10,9 @@ use crate::native::logger::enable_logger;
use crate::native::parallel_walker::nx_walker;
use crate::native::types::FileData;
use crate::native::utils::glob::build_glob_set;
use crate::native::utils::path::Normalize;
use crate::native::workspace::errors::{InternalWorkspaceErrors, WorkspaceErrors};
use crate::native::workspace::get_config_files::insert_config_file_into_map;
use crate::native::workspace::types::{FileLocation, ProjectConfiguration};
#[napi(object)]
@ -89,17 +90,19 @@ pub fn get_workspace_files_native(
Ok(NxWorkspaceFiles {
project_file_map,
global_files,
config_files: projects.iter().map(|(path, _)| path.clone()).collect(),
config_files: projects
.keys()
.map(|path| path.to_normalized_string())
.collect(),
})
}
fn create_root_map(
projects: &Vec<(String, Vec<u8>)>,
projects: &HashMap<PathBuf, Vec<u8>>,
) -> Result<hashbrown::HashMap<&Path, String>, InternalWorkspaceErrors> {
projects
.par_iter()
.map(|(path, content)| {
let path = Path::new(path);
let file_name = path
.file_name()
.expect("path should always have a filename");
@ -128,31 +131,29 @@ fn create_root_map(
})
}?;
Ok((parent_path, name))
} else if let Some(parent_path) = path.parent() {
Ok((
parent_path,
parent_path
.file_name()
.unwrap_or_default()
.to_os_string()
.into_string()
.map_err(|os_string| InternalWorkspaceErrors::Generic {
msg: format!("Cannot turn {os_string:?} into String"),
})?,
))
} else {
if let Some(parent_path) = path.parent() {
Ok((
parent_path,
parent_path
.file_name()
.unwrap_or_default()
.to_os_string()
.into_string()
.map_err(|os_string| InternalWorkspaceErrors::Generic {
msg: format!("Cannot turn {os_string:?} into String"),
})?,
))
} else {
Err(InternalWorkspaceErrors::Generic {
msg: format!("{path:?} has no parent"),
})
}
Err(InternalWorkspaceErrors::Generic {
msg: format!("{path:?} has no parent"),
})
};
})
.collect()
}
fn read_project_configuration(
content: &Vec<u8>,
content: &[u8],
path: &Path,
) -> Result<ProjectConfiguration, InternalWorkspaceErrors> {
serde_json::from_slice(content).or_else(|_| {
@ -169,22 +170,20 @@ fn read_project_configuration(
})
}
type WorkspaceData = (Vec<(String, Vec<u8>)>, Vec<FileData>);
type WorkspaceData = (HashMap<PathBuf, Vec<u8>>, Vec<FileData>);
fn get_file_data(workspace_root: &str, globs: Vec<String>) -> anyhow::Result<WorkspaceData> {
let globs = build_glob_set(globs)?;
let (projects, file_data) = nx_walker(workspace_root, move |rec| {
let mut projects: Vec<(String, Vec<u8>)> = vec![];
let mut projects: HashMap<PathBuf, (PathBuf, Vec<u8>)> = HashMap::new();
let mut file_hashes: Vec<FileData> = vec![];
for (path, content) in rec {
file_hashes.push(FileData {
file: path.clone(),
file: path.to_normalized_string(),
hash: xxh3::xxh3_64(&content).to_string(),
});
if globs.is_match(&path) {
projects.push((path, content));
}
insert_config_file_into_map((path, content), &mut projects, &globs)
}
(projects, file_hashes)
});
Ok((projects, file_data))
Ok((projects.into_values().collect(), file_data))
}