From 4b88f7984ffe0e61b8a8f266af1f0b922509e43e Mon Sep 17 00:00:00 2001 From: Chris Olszewski Date: Fri, 17 May 2024 15:04:52 -0700 Subject: [PATCH 1/3] chore(glob): move doublestar addition to dedicated function --- crates/turborepo-globwalk/src/lib.rs | 35 +++++++++++++++------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/crates/turborepo-globwalk/src/lib.rs b/crates/turborepo-globwalk/src/lib.rs index b25a6574c2def..3a99e09b0a967 100644 --- a/crates/turborepo-globwalk/src/lib.rs +++ b/crates/turborepo-globwalk/src/lib.rs @@ -102,24 +102,9 @@ fn preprocess_paths_and_globs( .map(|s| join_unix_like_paths(&base_path_slash, &s)) .filter_map(|g| collapse_path(&g).map(|(s, _)| s.to_string())) { - let split = split.to_string(); // if the glob ends with a slash, then we need to add a double star, // unless it already ends with a double star - if split.ends_with('/') { - if split.ends_with("**/") { - exclude_paths.push(split[..split.len() - 1].to_string()); - } else { - exclude_paths.push(format!("{}**", split)); - } - } else if split.ends_with("/**") { - exclude_paths.push(split); - } else { - // Match Go globby behavior. If the glob doesn't already end in /**, add it - // TODO: The Go version uses system separator. Are we forcing all globs to unix - // paths? - exclude_paths.push(format!("{}/**", split)); - exclude_paths.push(split); - } + add_trailing_double_star(&mut exclude_paths, &split); } Ok((base_path, include_paths, exclude_paths)) @@ -215,6 +200,24 @@ fn collapse_path(path: &str) -> Option<(Cow, usize)> { } } +fn add_trailing_double_star(exclude_paths: &mut Vec, glob: &str) { + if let Some(stripped) = glob.strip_suffix('/') { + if stripped.ends_with("**") { + exclude_paths.push(stripped.to_string()); + } else { + exclude_paths.push(format!("{}**", glob)); + } + } else if glob.ends_with("/**") { + exclude_paths.push(glob.to_string()); + } else { + // Match Go globby behavior. If the glob doesn't already end in /**, add it + // TODO: The Go version uses system separator. Are we forcing all globs to unix + // paths? + exclude_paths.push(format!("{}/**", glob)); + exclude_paths.push(glob.to_string()); + } +} + #[tracing::instrument] fn glob_with_contextual_error + std::fmt::Debug>( raw: S, From fdc0026435ce7169cfe7c27ad7d2cb62b55eae83 Mon Sep 17 00:00:00 2001 From: Chris Olszewski Date: Mon, 20 May 2024 14:07:05 -0700 Subject: [PATCH 2/3] feat(inputs): add doublestar to dir globs --- crates/turborepo-globwalk/src/lib.rs | 86 +++++++++++++++++-- .../integration/fixtures/dir_globs/.gitignore | 3 + .../dir_globs/apps/my-app/package.json | 10 +++ .../fixtures/dir_globs/package.json | 10 +++ .../dir_globs/packages/util/package.json | 6 ++ .../dir_globs/packages/util/src/boo.txt | 1 + .../integration/fixtures/dir_globs/turbo.json | 9 ++ turborepo-tests/integration/tests/run/globs.t | 39 +++++++++ 8 files changed, 156 insertions(+), 8 deletions(-) create mode 100644 turborepo-tests/integration/fixtures/dir_globs/.gitignore create mode 100644 turborepo-tests/integration/fixtures/dir_globs/apps/my-app/package.json create mode 100644 turborepo-tests/integration/fixtures/dir_globs/package.json create mode 100644 turborepo-tests/integration/fixtures/dir_globs/packages/util/package.json create mode 100644 turborepo-tests/integration/fixtures/dir_globs/packages/util/src/boo.txt create mode 100644 turborepo-tests/integration/fixtures/dir_globs/turbo.json create mode 100644 turborepo-tests/integration/tests/run/globs.t diff --git a/crates/turborepo-globwalk/src/lib.rs b/crates/turborepo-globwalk/src/lib.rs index 3a99e09b0a967..abc31a0686a77 100644 --- a/crates/turborepo-globwalk/src/lib.rs +++ b/crates/turborepo-globwalk/src/lib.rs @@ -15,7 +15,8 @@ use path_clean::PathClean; use path_slash::PathExt; use rayon::prelude::*; use regex::Regex; -use turbopath::{AbsoluteSystemPath, AbsoluteSystemPathBuf, PathError}; +use tracing::debug; +use turbopath::{AbsoluteSystemPath, AbsoluteSystemPathBuf, PathError, RelativeUnixPath}; use wax::{walk::FileIterator, BuildError, Glob}; #[derive(Debug, PartialEq, Clone, Copy)] @@ -51,10 +52,13 @@ fn join_unix_like_paths(a: &str, b: &str) -> String { [a.trim_end_matches('/'), "/", b.trim_start_matches('/')].concat() } -fn escape_glob_literals(literal_glob: &str) -> Cow { +fn glob_literals() -> &'static Regex { static RE: OnceLock = OnceLock::new(); RE.get_or_init(|| Regex::new(r"(?[\?\*\$:<>\(\)\[\]{},])").unwrap()) - .replace_all(literal_glob, "\\$literal") +} + +fn escape_glob_literals(literal_glob: &str) -> Cow { + glob_literals().replace_all(literal_glob, "\\$literal") } #[tracing::instrument] @@ -63,6 +67,8 @@ fn preprocess_paths_and_globs( include: &[String], exclude: &[String], ) -> Result<(PathBuf, Vec, Vec), WalkError> { + debug!("processing includes: {include:?}"); + debug!("processing excludes: {exclude:?}"); let base_path_slash = base_path .as_std_path() .to_slash() @@ -76,6 +82,12 @@ fn preprocess_paths_and_globs( let (include_paths, lowest_segment) = include .iter() .map(|s| fix_glob_pattern(s)) + .map(|mut s| { + // We need to check inclusion globs before the join + // as to_slash doesn't preserve Windows drive names. + add_doublestar_to_dir(base_path, &mut s); + s + }) .map(|s| join_unix_like_paths(&base_path_slash, &s)) .filter_map(|s| collapse_path(&s).map(|(s, v)| (s.to_string(), v))) .fold( @@ -106,6 +118,8 @@ fn preprocess_paths_and_globs( // unless it already ends with a double star add_trailing_double_star(&mut exclude_paths, &split); } + debug!("processed includes: {include_paths:?}"); + debug!("processed excludes: {exclude_paths:?}"); Ok((base_path, include_paths, exclude_paths)) } @@ -211,13 +225,46 @@ fn add_trailing_double_star(exclude_paths: &mut Vec, glob: &str) { exclude_paths.push(glob.to_string()); } else { // Match Go globby behavior. If the glob doesn't already end in /**, add it - // TODO: The Go version uses system separator. Are we forcing all globs to unix - // paths? + // We use the unix style operator as wax expects unix style paths exclude_paths.push(format!("{}/**", glob)); exclude_paths.push(glob.to_string()); } } +fn add_doublestar_to_dir(base: &AbsoluteSystemPath, glob: &mut String) { + // If the glob has a glob literal in it e.g. * + // then skip trying to read it as a file path. + if glob_literals().is_match(&*glob) { + return; + } + + // Globs are given in unix style + let Ok(glob_path) = RelativeUnixPath::new(&*glob) else { + // Glob isn't valid relative unix path so can't check if dir + debug!("'{glob}' isn't valid path"); + return; + }; + + let path = base.join_unix_path(glob_path); + + let Ok(metadata) = path.symlink_metadata() else { + debug!("'{path}' doesn't have metadata"); + return; + }; + + if !metadata.is_dir() { + return; + } + + debug!("'{path}' is a directory"); + + // Glob points to a dir, must add ** + if !glob.ends_with('/') { + glob.push('/'); + } + glob.push_str("**"); +} + #[tracing::instrument] fn glob_with_contextual_error + std::fmt::Debug>( raw: S, @@ -373,12 +420,13 @@ mod test { use std::{collections::HashSet, str::FromStr}; use itertools::Itertools; + use tempdir::TempDir; use test_case::test_case; - use turbopath::AbsoluteSystemPathBuf; + use turbopath::{AbsoluteSystemPath, AbsoluteSystemPathBuf}; use crate::{ - collapse_path, escape_glob_literals, fix_glob_pattern, globwalk, ValidatedGlob, WalkError, - WalkType, + add_doublestar_to_dir, collapse_path, escape_glob_literals, fix_glob_pattern, globwalk, + ValidatedGlob, WalkError, WalkType, }; #[cfg(unix)] @@ -1426,4 +1474,26 @@ mod test { r"\?\*\$\:\<\>\(\)\[\]\{\}\," ); } + + #[test_case("foo", false, "foo" ; "file")] + #[test_case("foo", true, "foo/**" ; "dir")] + #[test_case("foo/", true, "foo/**" ; "dir slash")] + #[test_case("f[o0]o", true, "f[o0]o" ; "non-literal")] + fn test_add_double_star(glob: &str, is_dir: bool, expected: &str) { + let tmpdir = TempDir::new("doublestar").unwrap(); + let base = AbsoluteSystemPath::new(tmpdir.path().to_str().unwrap()).unwrap(); + + let foo = base.join_component("foo"); + + match is_dir { + true => foo.create_dir_all().unwrap(), + false => foo.create_with_contents(b"bar").unwrap(), + } + + let mut glob = glob.to_owned(); + + add_doublestar_to_dir(base, &mut glob); + + assert_eq!(glob, expected); + } } diff --git a/turborepo-tests/integration/fixtures/dir_globs/.gitignore b/turborepo-tests/integration/fixtures/dir_globs/.gitignore new file mode 100644 index 0000000000000..77af9fc60321d --- /dev/null +++ b/turborepo-tests/integration/fixtures/dir_globs/.gitignore @@ -0,0 +1,3 @@ +node_modules/ +.turbo +.npmrc diff --git a/turborepo-tests/integration/fixtures/dir_globs/apps/my-app/package.json b/turborepo-tests/integration/fixtures/dir_globs/apps/my-app/package.json new file mode 100644 index 0000000000000..1746e0db23610 --- /dev/null +++ b/turborepo-tests/integration/fixtures/dir_globs/apps/my-app/package.json @@ -0,0 +1,10 @@ +{ + "name": "my-app", + "scripts": { + "build": "echo building", + "maybefails": "exit 4" + }, + "dependencies": { + "util": "*" + } +} diff --git a/turborepo-tests/integration/fixtures/dir_globs/package.json b/turborepo-tests/integration/fixtures/dir_globs/package.json new file mode 100644 index 0000000000000..b3601d4b13a2c --- /dev/null +++ b/turborepo-tests/integration/fixtures/dir_globs/package.json @@ -0,0 +1,10 @@ +{ + "name": "monorepo", + "scripts": { + "something": "turbo run build" + }, + "workspaces": [ + "apps/**", + "packages/**" + ] +} diff --git a/turborepo-tests/integration/fixtures/dir_globs/packages/util/package.json b/turborepo-tests/integration/fixtures/dir_globs/packages/util/package.json new file mode 100644 index 0000000000000..e8a7130a1662d --- /dev/null +++ b/turborepo-tests/integration/fixtures/dir_globs/packages/util/package.json @@ -0,0 +1,6 @@ +{ + "name": "util", + "scripts": { + "build": "echo building; mkdir dist; echo 'world' > dist/hello.txt " + } +} diff --git a/turborepo-tests/integration/fixtures/dir_globs/packages/util/src/boo.txt b/turborepo-tests/integration/fixtures/dir_globs/packages/util/src/boo.txt new file mode 100644 index 0000000000000..5626abf0f72e5 --- /dev/null +++ b/turborepo-tests/integration/fixtures/dir_globs/packages/util/src/boo.txt @@ -0,0 +1 @@ +one diff --git a/turborepo-tests/integration/fixtures/dir_globs/turbo.json b/turborepo-tests/integration/fixtures/dir_globs/turbo.json new file mode 100644 index 0000000000000..49f0d49efdf22 --- /dev/null +++ b/turborepo-tests/integration/fixtures/dir_globs/turbo.json @@ -0,0 +1,9 @@ +{ + "$schema": "https://turbo.build/schema.json", + "pipeline": { + "build": { + "inputs": ["src/"], + "outputs": ["dist"] + } + } +} diff --git a/turborepo-tests/integration/tests/run/globs.t b/turborepo-tests/integration/tests/run/globs.t new file mode 100644 index 0000000000000..1f80696b62583 --- /dev/null +++ b/turborepo-tests/integration/tests/run/globs.t @@ -0,0 +1,39 @@ +Setup + $ . ${TESTDIR}/../../../helpers/setup_integration_test.sh dir_globs +Verify that input directory change causes cache miss + $ ${TURBO} build --filter=util --output-logs=hash-only + \xe2\x80\xa2 Packages in scope: util (esc) + \xe2\x80\xa2 Running build in 1 packages (esc) + \xe2\x80\xa2 Remote caching disabled (esc) + util:build: cache miss, executing 1b15024312fdecef + + Tasks: 1 successful, 1 total + Cached: 0 cached, 1 total + Time:\s*[\.0-9]+m?s (re) + + $ touch packages/util/src/oops.txt + $ ${TURBO} build --filter=util --output-logs=hash-only + \xe2\x80\xa2 Packages in scope: util (esc) + \xe2\x80\xa2 Running build in 1 packages (esc) + \xe2\x80\xa2 Remote caching disabled (esc) + util:build: cache miss, executing 31adf3c5481a64c7 + + Tasks: 1 successful, 1 total + Cached: 0 cached, 1 total + Time:\s*[\.0-9]+m?s (re) + + $ cat packages/util/dist/hello.txt + world + $ rm packages/util/dist/hello.txt + $ ${TURBO} build --filter=util --output-logs=hash-only + \xe2\x80\xa2 Packages in scope: util (esc) + \xe2\x80\xa2 Running build in 1 packages (esc) + \xe2\x80\xa2 Remote caching disabled (esc) + util:build: cache hit, suppressing logs 31adf3c5481a64c7 + + Tasks: 1 successful, 1 total + Cached: 1 cached, 1 total + Time:\s*[\.0-9]+m?s >>> FULL TURBO (re) + + $ cat packages/util/dist/hello.txt + world From 200688d6a333eec4b6ea039f001846fbbee086c8 Mon Sep 17 00:00:00 2001 From: Chris Olszewski Date: Mon, 20 May 2024 14:49:49 -0700 Subject: [PATCH 3/3] chore: update unit tests --- crates/turborepo-globwalk/src/lib.rs | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/crates/turborepo-globwalk/src/lib.rs b/crates/turborepo-globwalk/src/lib.rs index abc31a0686a77..f23cf43efdb8f 100644 --- a/crates/turborepo-globwalk/src/lib.rs +++ b/crates/turborepo-globwalk/src/lib.rs @@ -588,7 +588,7 @@ mod test { #[test_case("**/*f", 4, 4 => matches None ; "leading doublestar expansion")] #[test_case("**f", 4, 4 => matches None ; "transform leading doublestar")] #[test_case("a**", 22, 22 => matches None ; "transform trailing doublestar")] - #[test_case("abc", 1, 1 => matches None ; "exact match")] + #[test_case("abc", 3, 3 => matches None ; "exact match")] #[test_case("*", 19, 15 => matches None ; "single star match")] #[test_case("*c", 2, 2 => matches None ; "single star suffix match")] #[test_case("a*", 9, 9 => matches None ; "single star prefix match")] @@ -621,7 +621,7 @@ mod test { #[test_case("a/**/b", 2, 2 => matches None ; "a followed by double star and single subdirectory match")] #[test_case("a/**/c", 2, 2 => matches None ; "a followed by double star and multiple subdirectories match 2")] #[test_case("a/**/d", 1, 1 => matches None ; "a followed by double star and multiple subdirectories with target match")] - #[test_case("a/b/c", 1, 1 => matches None ; "a followed by subdirectories and double slash mismatch")] + #[test_case("a/b/c", 2, 2 => matches None ; "a followed by subdirectories and double slash mismatch")] #[test_case("ab{c,d}", 1, 1 => matches None ; "pattern with curly braces match")] #[test_case("ab{c,d,*}", 5, 5 => matches None ; "pattern with curly braces and wildcard match")] #[test_case("ab{c,d}[", 0, 0 => matches Some(WalkError::BadPattern(_, _)))] @@ -954,8 +954,21 @@ mod test { "/repos/some-app/", &["dist"], &[], - &["/repos/some-app/dist"], - &[] + &[ + "/repos/some-app/dist", + "/repos/some-app/dist/index.html", + "/repos/some-app/dist/js", + "/repos/some-app/dist/js/index.js", + "/repos/some-app/dist/js/lib.js", + "/repos/some-app/dist/js/node_modules", + "/repos/some-app/dist/js/node_modules/browserify.js", + ], + &[ + "/repos/some-app/dist/index.html", + "/repos/some-app/dist/js/index.js", + "/repos/some-app/dist/js/lib.js", + "/repos/some-app/dist/js/node_modules/browserify.js", + ] ; "passing just a directory captures no children")] #[test_case(&[ "/repos/some-app/dist/index.html",