Skip to content

Commit

Permalink
feat: add doublestar to exact dir paths (#8180)
Browse files Browse the repository at this point in the history
### Description

If a user provides a glob that points to a directory e.g. `dist` or
`dist/` then we will add a trailing double star so that the directory
contents get captured instead of just the directory entry itself.

### Testing Instructions

Added unit tests for adding doublestar
Added integration test for verifying that `src/` and `dist` for task
inputs/outputs get treated as `src/**` and `dist/**` respectively.
  • Loading branch information
chris-olszewski committed May 29, 2024
1 parent b8e21df commit 1404e12
Show file tree
Hide file tree
Showing 8 changed files with 190 additions and 26 deletions.
138 changes: 112 additions & 26 deletions crates/turborepo-globwalk/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ use path_clean::PathClean;
use path_slash::PathExt;
use rayon::prelude::*;
use regex::Regex;
use turbopath::{AbsoluteSystemPath, AbsoluteSystemPathBuf, PathError};
use tracing::debug;
use turbopath::{AbsoluteSystemPath, AbsoluteSystemPathBuf, PathError, RelativeUnixPath};
use wax::{walk::FileIterator, BuildError, Glob};

#[derive(Debug, PartialEq, Clone, Copy)]
Expand Down Expand Up @@ -51,10 +52,13 @@ fn join_unix_like_paths(a: &str, b: &str) -> String {
[a.trim_end_matches('/'), "/", b.trim_start_matches('/')].concat()
}

fn escape_glob_literals(literal_glob: &str) -> Cow<str> {
fn glob_literals() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| Regex::new(r"(?<literal>[\?\*\$:<>\(\)\[\]{},])").unwrap())
.replace_all(literal_glob, "\\$literal")
}

fn escape_glob_literals(literal_glob: &str) -> Cow<str> {
glob_literals().replace_all(literal_glob, "\\$literal")
}

#[tracing::instrument]
Expand All @@ -63,6 +67,8 @@ fn preprocess_paths_and_globs(
include: &[String],
exclude: &[String],
) -> Result<(PathBuf, Vec<String>, Vec<String>), WalkError> {
debug!("processing includes: {include:?}");
debug!("processing excludes: {exclude:?}");
let base_path_slash = base_path
.as_std_path()
.to_slash()
Expand All @@ -76,6 +82,12 @@ fn preprocess_paths_and_globs(
let (include_paths, lowest_segment) = include
.iter()
.map(|s| fix_glob_pattern(s))
.map(|mut s| {
// We need to check inclusion globs before the join
// as to_slash doesn't preserve Windows drive names.
add_doublestar_to_dir(base_path, &mut s);
s
})
.map(|s| join_unix_like_paths(&base_path_slash, &s))
.filter_map(|s| collapse_path(&s).map(|(s, v)| (s.to_string(), v)))
.fold(
Expand All @@ -102,25 +114,12 @@ fn preprocess_paths_and_globs(
.map(|s| join_unix_like_paths(&base_path_slash, &s))
.filter_map(|g| collapse_path(&g).map(|(s, _)| s.to_string()))
{
let split = split.to_string();
// if the glob ends with a slash, then we need to add a double star,
// unless it already ends with a double star
if split.ends_with('/') {
if split.ends_with("**/") {
exclude_paths.push(split[..split.len() - 1].to_string());
} else {
exclude_paths.push(format!("{}**", split));
}
} else if split.ends_with("/**") {
exclude_paths.push(split);
} else {
// Match Go globby behavior. If the glob doesn't already end in /**, add it
// TODO: The Go version uses system separator. Are we forcing all globs to unix
// paths?
exclude_paths.push(format!("{}/**", split));
exclude_paths.push(split);
}
add_trailing_double_star(&mut exclude_paths, &split);
}
debug!("processed includes: {include_paths:?}");
debug!("processed excludes: {exclude_paths:?}");

Ok((base_path, include_paths, exclude_paths))
}
Expand Down Expand Up @@ -215,6 +214,57 @@ fn collapse_path(path: &str) -> Option<(Cow<str>, usize)> {
}
}

fn add_trailing_double_star(exclude_paths: &mut Vec<String>, glob: &str) {
if let Some(stripped) = glob.strip_suffix('/') {
if stripped.ends_with("**") {
exclude_paths.push(stripped.to_string());
} else {
exclude_paths.push(format!("{}**", glob));
}
} else if glob.ends_with("/**") {
exclude_paths.push(glob.to_string());
} else {
// Match Go globby behavior. If the glob doesn't already end in /**, add it
// We use the unix style operator as wax expects unix style paths
exclude_paths.push(format!("{}/**", glob));
exclude_paths.push(glob.to_string());
}
}

fn add_doublestar_to_dir(base: &AbsoluteSystemPath, glob: &mut String) {
// If the glob has a glob literal in it e.g. *
// then skip trying to read it as a file path.
if glob_literals().is_match(&*glob) {
return;
}

// Globs are given in unix style
let Ok(glob_path) = RelativeUnixPath::new(&*glob) else {
// Glob isn't valid relative unix path so can't check if dir
debug!("'{glob}' isn't valid path");
return;
};

let path = base.join_unix_path(glob_path);

let Ok(metadata) = path.symlink_metadata() else {
debug!("'{path}' doesn't have metadata");
return;
};

if !metadata.is_dir() {
return;
}

debug!("'{path}' is a directory");

// Glob points to a dir, must add **
if !glob.ends_with('/') {
glob.push('/');
}
glob.push_str("**");
}

#[tracing::instrument]
fn glob_with_contextual_error<S: AsRef<str> + std::fmt::Debug>(
raw: S,
Expand Down Expand Up @@ -370,12 +420,13 @@ mod test {
use std::{collections::HashSet, str::FromStr};

use itertools::Itertools;
use tempdir::TempDir;
use test_case::test_case;
use turbopath::AbsoluteSystemPathBuf;
use turbopath::{AbsoluteSystemPath, AbsoluteSystemPathBuf};

use crate::{
collapse_path, escape_glob_literals, fix_glob_pattern, globwalk, ValidatedGlob, WalkError,
WalkType,
add_doublestar_to_dir, collapse_path, escape_glob_literals, fix_glob_pattern, globwalk,
ValidatedGlob, WalkError, WalkType,
};

#[cfg(unix)]
Expand Down Expand Up @@ -537,7 +588,7 @@ mod test {
#[test_case("**/*f", 4, 4 => matches None ; "leading doublestar expansion")]
#[test_case("**f", 4, 4 => matches None ; "transform leading doublestar")]
#[test_case("a**", 22, 22 => matches None ; "transform trailing doublestar")]
#[test_case("abc", 1, 1 => matches None ; "exact match")]
#[test_case("abc", 3, 3 => matches None ; "exact match")]
#[test_case("*", 19, 15 => matches None ; "single star match")]
#[test_case("*c", 2, 2 => matches None ; "single star suffix match")]
#[test_case("a*", 9, 9 => matches None ; "single star prefix match")]
Expand Down Expand Up @@ -570,7 +621,7 @@ mod test {
#[test_case("a/**/b", 2, 2 => matches None ; "a followed by double star and single subdirectory match")]
#[test_case("a/**/c", 2, 2 => matches None ; "a followed by double star and multiple subdirectories match 2")]
#[test_case("a/**/d", 1, 1 => matches None ; "a followed by double star and multiple subdirectories with target match")]
#[test_case("a/b/c", 1, 1 => matches None ; "a followed by subdirectories and double slash mismatch")]
#[test_case("a/b/c", 2, 2 => matches None ; "a followed by subdirectories and double slash mismatch")]
#[test_case("ab{c,d}", 1, 1 => matches None ; "pattern with curly braces match")]
#[test_case("ab{c,d,*}", 5, 5 => matches None ; "pattern with curly braces and wildcard match")]
#[test_case("ab{c,d}[", 0, 0 => matches Some(WalkError::BadPattern(_, _)))]
Expand Down Expand Up @@ -903,8 +954,21 @@ mod test {
"/repos/some-app/",
&["dist"],
&[],
&["/repos/some-app/dist"],
&[]
&[
"/repos/some-app/dist",
"/repos/some-app/dist/index.html",
"/repos/some-app/dist/js",
"/repos/some-app/dist/js/index.js",
"/repos/some-app/dist/js/lib.js",
"/repos/some-app/dist/js/node_modules",
"/repos/some-app/dist/js/node_modules/browserify.js",
],
&[
"/repos/some-app/dist/index.html",
"/repos/some-app/dist/js/index.js",
"/repos/some-app/dist/js/lib.js",
"/repos/some-app/dist/js/node_modules/browserify.js",
]
; "passing just a directory captures no children")]
#[test_case(&[
"/repos/some-app/dist/index.html",
Expand Down Expand Up @@ -1423,4 +1487,26 @@ mod test {
r"\?\*\$\:\<\>\(\)\[\]\{\}\,"
);
}

#[test_case("foo", false, "foo" ; "file")]
#[test_case("foo", true, "foo/**" ; "dir")]
#[test_case("foo/", true, "foo/**" ; "dir slash")]
#[test_case("f[o0]o", true, "f[o0]o" ; "non-literal")]
fn test_add_double_star(glob: &str, is_dir: bool, expected: &str) {
let tmpdir = TempDir::new("doublestar").unwrap();
let base = AbsoluteSystemPath::new(tmpdir.path().to_str().unwrap()).unwrap();

let foo = base.join_component("foo");

match is_dir {
true => foo.create_dir_all().unwrap(),
false => foo.create_with_contents(b"bar").unwrap(),
}

let mut glob = glob.to_owned();

add_doublestar_to_dir(base, &mut glob);

assert_eq!(glob, expected);
}
}
3 changes: 3 additions & 0 deletions turborepo-tests/integration/fixtures/dir_globs/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
node_modules/
.turbo
.npmrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"name": "my-app",
"scripts": {
"build": "echo building",
"maybefails": "exit 4"
},
"dependencies": {
"util": "*"
}
}
10 changes: 10 additions & 0 deletions turborepo-tests/integration/fixtures/dir_globs/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"name": "monorepo",
"scripts": {
"something": "turbo run build"
},
"workspaces": [
"apps/**",
"packages/**"
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"name": "util",
"scripts": {
"build": "echo building; mkdir dist; echo 'world' > dist/hello.txt "
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
one
9 changes: 9 additions & 0 deletions turborepo-tests/integration/fixtures/dir_globs/turbo.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"$schema": "https://turbo.build/schema.json",
"pipeline": {
"build": {
"inputs": ["src/"],
"outputs": ["dist"]
}
}
}
39 changes: 39 additions & 0 deletions turborepo-tests/integration/tests/run/globs.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
Setup
$ . ${TESTDIR}/../../../helpers/setup_integration_test.sh dir_globs
Verify that input directory change causes cache miss
$ ${TURBO} build --filter=util --output-logs=hash-only
\xe2\x80\xa2 Packages in scope: util (esc)
\xe2\x80\xa2 Running build in 1 packages (esc)
\xe2\x80\xa2 Remote caching disabled (esc)
util:build: cache miss, executing 1b15024312fdecef

Tasks: 1 successful, 1 total
Cached: 0 cached, 1 total
Time:\s*[\.0-9]+m?s (re)

$ touch packages/util/src/oops.txt
$ ${TURBO} build --filter=util --output-logs=hash-only
\xe2\x80\xa2 Packages in scope: util (esc)
\xe2\x80\xa2 Running build in 1 packages (esc)
\xe2\x80\xa2 Remote caching disabled (esc)
util:build: cache miss, executing 31adf3c5481a64c7

Tasks: 1 successful, 1 total
Cached: 0 cached, 1 total
Time:\s*[\.0-9]+m?s (re)

$ cat packages/util/dist/hello.txt
world
$ rm packages/util/dist/hello.txt
$ ${TURBO} build --filter=util --output-logs=hash-only
\xe2\x80\xa2 Packages in scope: util (esc)
\xe2\x80\xa2 Running build in 1 packages (esc)
\xe2\x80\xa2 Remote caching disabled (esc)
util:build: cache hit, suppressing logs 31adf3c5481a64c7

Tasks: 1 successful, 1 total
Cached: 1 cached, 1 total
Time:\s*[\.0-9]+m?s >>> FULL TURBO (re)

$ cat packages/util/dist/hello.txt
world

0 comments on commit 1404e12

Please sign in to comment.