Skip to content

Commit

Permalink
fix glob matching of alternatives (vercel/turborepo#6839)
Browse files Browse the repository at this point in the history
### Description

The unicode refactoring introduced some bugs in matching of alternatives


Closes PACK-2172
  • Loading branch information
sokra authored Jan 4, 2024
1 parent a0e5ee8 commit 7ef127a
Showing 1 changed file with 56 additions and 47 deletions.
103 changes: 56 additions & 47 deletions crates/turbo-tasks-fs/src/glob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ impl GlobPart {
match_partial,
previous_part_is_path_separator_equivalent,
cursor: GraphemeCursor::new(0, path.len(), true),
index: 0,
glob_iterator: None,
}
}
Expand Down Expand Up @@ -257,6 +258,7 @@ struct GlobPartMatchesIterator<'a> {
match_partial: bool,
previous_part_is_path_separator_equivalent: bool,
cursor: GraphemeCursor,
index: usize,
glob_iterator: Option<Box<GlobMatchesIterator<'a>>>,
}

Expand All @@ -267,12 +269,9 @@ impl<'a> Iterator for GlobPartMatchesIterator<'a> {
match self.part {
GlobPart::AnyDirectories => {
if self.cursor.cur_cursor() == 0 {
let end = self.cursor.next_boundary(self.path, 0);
match end {
Ok(Some(_)) => {}
Ok(None) => return None,
Err(..) => return None,
}
let Ok(Some(_)) = self.cursor.next_boundary(self.path, 0) else {
return None;
};
return Some((self.path, true));
}

Expand Down Expand Up @@ -304,22 +303,19 @@ impl<'a> Iterator for GlobPartMatchesIterator<'a> {
}
}
GlobPart::AnyFile => {
let end = self.cursor.next_boundary(self.path, 0);
match end {
Ok(Some(_)) => {}
Ok(None) => return None,
Err(..) => return None,
}
let Ok(Some(c)) = self.cursor.next_boundary(self.path, 0) else {
return None;
};

let idx = self.path[0..self.cursor.cur_cursor()].len();
let idx = self.path[0..c].len();

// TODO verify if `*` does match zero chars?
if let Some(slice) = self.path.get(0..self.cursor.cur_cursor()) {
if let Some(slice) = self.path.get(0..c) {
if slice.ends_with('/') {
None
} else {
Some((
&self.path[self.cursor.cur_cursor()..],
&self.path[c..],
self.previous_part_is_path_separator_equivalent && idx == 1,
))
}
Expand All @@ -330,14 +326,11 @@ impl<'a> Iterator for GlobPartMatchesIterator<'a> {
GlobPart::AnyFileChar => todo!(),
GlobPart::PathSeparator => {
if self.cursor.cur_cursor() == 0 {
let end = self.cursor.next_boundary(self.path, 0);
match end {
Ok(Some(_)) => {}
Ok(None) => return None,
Err(..) => return None,
}
let Ok(Some(b)) = self.cursor.next_boundary(self.path, 0) else {
return None;
};
if self.path.starts_with('/') {
Some((&self.path[1..], true))
Some((&self.path[b..], true))
} else if self.previous_part_is_path_separator_equivalent {
Some((self.path, true))
} else {
Expand All @@ -347,28 +340,25 @@ impl<'a> Iterator for GlobPartMatchesIterator<'a> {
None
}
}
GlobPart::FileChar(chars) => loop {
let end = match self.cursor.next_boundary(self.path, 0) {
Ok(Some(end)) => end,
_ => return None,
GlobPart::FileChar(chars) => {
let start = self.cursor.cur_cursor();
let Ok(Some(end)) = self.cursor.next_boundary(self.path, 0) else {
return None;
};

let c = &chars[self.cursor.cur_cursor()..end]
.iter()
.cloned()
.collect::<String>();
if self.path.starts_with(c) {
return Some((&self.path[c.len()..], false));
let mut chars_in_path = self.path[start..end].chars();
let Some(c) = chars_in_path.next() else {
return None;
};
if chars_in_path.next().is_some() {
return None;
}
},
chars.contains(&c).then(|| (&self.path[end..], false))
}
GlobPart::File(name) => {
if self.cursor.cur_cursor() == 0 && self.path.starts_with(name) {
let end = self.cursor.next_boundary(self.path, 0);
match end {
Ok(Some(_)) => {}
Ok(None) => return None,
Err(..) => return None,
}
let Ok(Some(_)) = self.cursor.next_boundary(self.path, 0) else {
return None;
};
Some((&self.path[name.len()..], false))
} else {
None
Expand All @@ -379,15 +369,10 @@ impl<'a> Iterator for GlobPartMatchesIterator<'a> {
if let Some((path, is_path_separator_equivalent)) = glob_iterator.next() {
return Some((path, is_path_separator_equivalent));
} else {
let end = self.cursor.next_boundary(self.path, 0);
self.index += 1;
self.glob_iterator = None;
match end {
Ok(Some(_)) => {}
Ok(None) => return None,
Err(..) => return None,
}
}
} else if let Some(alternative) = alternatives.get(self.cursor.cur_cursor()) {
} else if let Some(alternative) = alternatives.get(self.index) {
self.glob_iterator = Some(Box::new(alternative.iter_matches(
self.path,
self.previous_part_is_path_separator_equivalent,
Expand Down Expand Up @@ -457,19 +442,43 @@ mod tests {
"node_modules/next/dist/server/next.js"
)]
#[case::node_modules_root("**/node_modules/**", "node_modules/next/dist/server/next.js")]
#[case::node_modules_root_package(
"**/node_modules/next/**",
"node_modules/next/dist/server/next.js"
)]
#[case::node_modules_nested(
"**/node_modules/**",
"apps/some-app/node_modules/regenerate-unicode-properties/Script_Extensions/Osage.js"
)]
#[case::node_modules_nested_package(
"**/node_modules/regenerate-unicode-properties/**",
"apps/some-app/node_modules/regenerate-unicode-properties/Script_Extensions/Osage.js"
)]
#[case::node_modules_pnpm(
"**/node_modules/**",
"node_modules/.pnpm/[email protected]/node_modules/\
regenerate-unicode-properties/Script_Extensions/Osage.js"
)]
#[case::node_modules_pnpm_package(
"**/node_modules/{regenerate,regenerate-unicode-properties}/**",
"node_modules/.pnpm/[email protected]/node_modules/\
regenerate-unicode-properties/Script_Extensions/Osage.js"
)]
#[case::node_modules_pnpm_prefixed_package(
"**/node_modules/{@blockfrost/blockfrost-js,@highlight-run/node,@libsql/client,@jpg-store/\
lucid-cardano,@mikro-orm/core,@mikro-orm/knex,@prisma/client,@sentry/nextjs,@sentry/node,\
@swc/core,argon2,autoprefixer,bcrypt,better-sqlite3,canvas,cpu-features,cypress,eslint,\
express,next-seo,node-pty,payload,pg,playwright,postcss,prettier,prisma,puppeteer,rimraf,\
sharp,shiki,sqlite3,tailwindcss,ts-node,typescript,vscode-oniguruma,webpack,websocket,@\
aws-sdk/client-dynamodb,@aws-sdk/lib-dynamodb}/**",
"node_modules/.pnpm/@[email protected]_@[email protected]/\
node_modules/@aws-sdk/lib-dynamodb/dist-es/index.js"
)]
#[case::alternatives_nested1("{a,b/c,d/e/{f,g/h}}", "a")]
#[case::alternatives_nested2("{a,b/c,d/e/{f,g/h}}", "b/c")]
#[case::alternatives_nested3("{a,b/c,d/e/{f,g/h}}", "d/e/f")]
#[case::alternatives_nested4("{a,b/c,d/e/{f,g/h}}", "d/e/g/h")]
// #[case::alternatives_chars("[abc]", "b")]
fn glob_match(#[case] glob: &str, #[case] path: &str) {
let glob = Glob::parse(glob).unwrap();

Expand Down

0 comments on commit 7ef127a

Please sign in to comment.