From 061778236b742436ee2322703d049baf1be5c7c5 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Tue, 5 Sep 2023 11:59:25 +0200 Subject: [PATCH 1/3] Remove github restriction --- Cargo.lock | 1 - Cargo.toml | 14 +++++++-- src/buckify.rs | 83 +++++++++++++++++++++++++++++++++++++++----------- 3 files changed, 76 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 87e284e4..db427a8b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1108,7 +1108,6 @@ dependencies = [ "once_cell", "proc-macro2", "rayon", - "regex", "rustsec", "semver", "serde", diff --git a/Cargo.toml b/Cargo.toml index ee5347fe..3dfb9a94 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,15 +22,23 @@ nom = "7.1" once_cell = "1.12" proc-macro2 = { version = "1.0.64", features = ["span-locations"] } rayon = "1.2" -regex = "1.9.2" rustsec = { version = "0.26", features = ["fix"] } semver = { version = "1.0.17", features = ["serde"] } serde = { version = "1.0.185", features = ["derive", "rc"] } -serde_json = { version = "1.0.100", features = ["float_roundtrip", "unbounded_depth"] } +serde_json = { version = "1.0.100", features = [ + "float_roundtrip", + "unbounded_depth", +] } serde_starlark = "0.1.13" structopt = "0.3.23" strum = { version = "0.24", features = ["derive"] } -syn = { version = "2.0.23", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } +syn = { version = "2.0.23", features = [ + "extra-traits", + "fold", + "full", + "visit", + "visit-mut", +] } termcolor = "1.0" toml = "0.7.3" unicode-ident = "1.0.10" diff --git a/src/buckify.rs b/src/buckify.rs index 2f52d818..2505c3b9 100644 --- a/src/buckify.rs +++ b/src/buckify.rs @@ -23,8 +23,6 @@ use std::sync::Mutex; use anyhow::bail; use anyhow::Context; use anyhow::Result; -use once_cell::sync::OnceCell; -use regex::Regex; use crate::buck; use crate::buck::Alias; @@ -237,7 +235,7 @@ fn generate_nonvendored_sources_archive<'scope>( } => generate_git_fetch(repo, commit_hash).map(Some), Source::Unrecognized(_) => { bail!( - "`vendor = false` mode is supported only with exclusively crates.io and GitHub dependencies. \"{}\" {} is coming from some source other than crates.io or GitHub", + "`vendor = false` mode is supported only with exclusively crates.io and https git dependencies. \"{}\" {} is coming from some other source", pkg.name, pkg.version, ); @@ -289,23 +287,46 @@ fn generate_git_fetch(repo: &str, commit_hash: &str) -> Result { })) } -/// Extract the "serde-rs/serde" part of "https://github.com/serde-rs/serde.git". -pub fn short_name_for_git_repo(repo: &str) -> Result<&str> { - static GITHUB_URL_REGEX: OnceCell = OnceCell::new(); - let github_url_regex = GITHUB_URL_REGEX - .get_or_try_init(|| Regex::new(r"^https://github.com/([[:alnum:].-]+/[[:alnum:].-]+)$"))?; +/// Create a uniquely hashed directory name for the arbitrary source url +pub fn short_name_for_git_repo(repo: &str) -> Result { + // The strategy here is similar to what cargo does to generate a unique directory name + // for git sources + let mut sanitized = repo.to_lowercase(); - let repo_without_extension = repo.strip_suffix(".git").unwrap_or(repo); - if let Some(captures) = github_url_regex.captures(repo_without_extension) { - Ok(captures.get(1).unwrap().as_str()) - } else { - // TODO: come up with some mangling scheme for arbitrary repo URLs. Buck - // does not permit ':' in target names. - bail!( - "unsupported git URL: {:?}, currently vendor=false mode only supports \"https://github.com/$OWNER/$REPO\" repositories", - repo, - ); + if let Some(query) = sanitized.rfind('?') { + sanitized.truncate(query); + } + + if let Some(hash) = sanitized.rfind('#') { + sanitized.truncate(hash); + } + + if sanitized.ends_with(".git") { + sanitized.truncate(sanitized.len() - 4); + } + + let mut dir_name = sanitized + .split('/') + .next_back() + .unwrap_or("_empty") + .to_owned(); + + #[allow(deprecated)] + let hash = { + use std::hash::{Hash, Hasher, SipHasher}; + let mut hasher = SipHasher::new_with_keys(0, 0); + sanitized.hash(&mut hasher); + hasher.finish() + }; + + dir_name.push('-'); + + for byte in hash.to_le_bytes() { + use std::fmt::Write; + write!(&mut dir_name, "{byte:0x}").unwrap(); } + + Ok(dir_name) } /// Find the git repository containing the given manifest directory. @@ -914,3 +935,29 @@ pub(crate) fn buckify(config: &Config, args: &Args, paths: &Paths, stdout: bool) Ok(()) } + +#[cfg(test)] +mod test { + use super::short_name_for_git_repo; + + #[test] + fn hashes_with_same_repo_variations() { + let same = [ + short_name_for_git_repo("https://github.com/facebookincubator/reindeer.git").unwrap(), + short_name_for_git_repo("https://github.com/facebookincubator/reindeer").unwrap(), + short_name_for_git_repo("https://github.com/FacebookIncubator/reindeer.git").unwrap(), + ]; + + assert!(!same + .iter() + .any(|dir_name| dir_name != "reindeer-c03e7d4c9f50fdff")); + } + + #[test] + fn hashes_non_github() { + assert_eq!( + short_name_for_git_repo("https://gitlab.com/gilrs-project/gilrs.git").unwrap(), + "gilrs-784d1d6a17891c9" + ); + } +} From 7c41edd7afd046a20cac96b701c9c96d302cde89 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Tue, 5 Sep 2023 12:05:19 +0200 Subject: [PATCH 2/3] Keep https restriction --- src/buckify.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/buckify.rs b/src/buckify.rs index 2505c3b9..86f03c9f 100644 --- a/src/buckify.rs +++ b/src/buckify.rs @@ -289,6 +289,11 @@ fn generate_git_fetch(repo: &str, commit_hash: &str) -> Result { /// Create a uniquely hashed directory name for the arbitrary source url pub fn short_name_for_git_repo(repo: &str) -> Result { + anyhow::ensure!( + repo.starts_with("https://"), + "only https git urls are supported" + ); + // The strategy here is similar to what cargo does to generate a unique directory name // for git sources let mut sanitized = repo.to_lowercase(); From e496806176407eae3161b5b5c7395a5d4bf137c7 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Wed, 13 Sep 2023 09:53:02 +0200 Subject: [PATCH 3/3] Address feedback --- Cargo.lock | 35 ++++++++++++++--------------------- Cargo.toml | 2 ++ src/buckify.rs | 50 ++++++++++++++++++++++++++------------------------ 3 files changed, 42 insertions(+), 45 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index db427a8b..90d5eb1d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -467,9 +467,9 @@ checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" [[package]] name = "form_urlencoded" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8" +checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" dependencies = [ "percent-encoding", ] @@ -598,9 +598,9 @@ dependencies = [ [[package]] name = "idna" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" dependencies = [ "unicode-bidi", "unicode-normalization", @@ -780,12 +780,6 @@ dependencies = [ "value-bag", ] -[[package]] -name = "matches" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" - [[package]] name = "measure_time" version = "0.8.2" @@ -950,9 +944,9 @@ checksum = "877630b3de15c0b64cc52f659345724fbf6bdad9bd9566699fc53688f3c34a34" [[package]] name = "percent-encoding" -version = "2.2.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" +checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" [[package]] name = "pkg-config" @@ -1097,6 +1091,7 @@ dependencies = [ "anyhow", "dunce", "env_logger", + "fnv", "globset", "ignore", "indexmap", @@ -1120,6 +1115,7 @@ dependencies = [ "termcolor", "toml 0.7.3", "unicode-ident", + "url", "walkdir", ] @@ -1675,12 +1671,9 @@ dependencies = [ [[package]] name = "unicode-bidi" -version = "0.3.4" +version = "0.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5" -dependencies = [ - "matches", -] +checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" [[package]] name = "unicode-ident" @@ -1690,9 +1683,9 @@ checksum = "22049a19f4a68748a168c0fc439f9516686aa045927ff767eca0a85101fb6e73" [[package]] name = "unicode-normalization" -version = "0.1.21" +version = "0.1.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "854cbdc4f7bc6ae19c820d44abdc3277ac3e1b2b93db20a636825d9322fb60e6" +checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" dependencies = [ "tinyvec", ] @@ -1736,9 +1729,9 @@ dependencies = [ [[package]] name = "url" -version = "2.3.1" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d68c799ae75762b8c3fe375feb6600ef5602c883c5d21eb51c09f22b83c4643" +checksum = "143b538f18257fac9cad154828a57c6bf5157e1aa604d4816b5995bf6de87ae5" dependencies = [ "form_urlencoded", "idna", diff --git a/Cargo.toml b/Cargo.toml index 3dfb9a94..19fddb8a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ license = "MIT" anyhow = "1.0.71" dunce = "1.0.2" env_logger = "0.10" +fnv = "1.0" globset = { version = "0.4.13", features = ["serde1"] } ignore = "0.4" indexmap = { version = "1.9.2", features = ["arbitrary", "rayon", "serde-1"] } @@ -42,6 +43,7 @@ syn = { version = "2.0.23", features = [ termcolor = "1.0" toml = "0.7.3" unicode-ident = "1.0.10" +url = "2.4" walkdir = "2.3" [dev-dependencies] diff --git a/src/buckify.rs b/src/buckify.rs index 86f03c9f..4fffdbdb 100644 --- a/src/buckify.rs +++ b/src/buckify.rs @@ -289,38 +289,40 @@ fn generate_git_fetch(repo: &str, commit_hash: &str) -> Result { /// Create a uniquely hashed directory name for the arbitrary source url pub fn short_name_for_git_repo(repo: &str) -> Result { + let mut canonical = url::Url::parse(&repo.to_lowercase()).context("invalid url")?; + anyhow::ensure!( - repo.starts_with("https://"), + canonical.scheme() == "https", "only https git urls are supported" ); - // The strategy here is similar to what cargo does to generate a unique directory name - // for git sources - let mut sanitized = repo.to_lowercase(); - - if let Some(query) = sanitized.rfind('?') { - sanitized.truncate(query); - } - - if let Some(hash) = sanitized.rfind('#') { - sanitized.truncate(hash); - } - - if sanitized.ends_with(".git") { - sanitized.truncate(sanitized.len() - 4); + canonical.set_query(None); + canonical.set_fragment(None); + + // It would be nice to just say "you're using a .git extension, please remove it", + // but unfortunately some git providers (notably gitlab) require the .git extension + // in the url, but other providers, notably github, treat urls with or without + // the extension exactly the same. If we don't take the .git extension into + // account at all we could run into a situation where 2 or more crates are + // sourced from the same git repo but with and without the .git extension, + // causing them to be hashed and placed differently + if canonical.path().ends_with(".git") { + // This is less efficient but far simpler than using the really unfriendly + // path_segments_mut API + let stripped = canonical.path().trim_end_matches(".git").to_owned(); + canonical.set_path(&stripped); } - let mut dir_name = sanitized - .split('/') - .next_back() + let mut dir_name = canonical + .path_segments() + .and_then(|mut it| it.next_back()) .unwrap_or("_empty") .to_owned(); - #[allow(deprecated)] let hash = { - use std::hash::{Hash, Hasher, SipHasher}; - let mut hasher = SipHasher::new_with_keys(0, 0); - sanitized.hash(&mut hasher); + use std::hash::{Hash, Hasher}; + let mut hasher = fnv::FnvHasher::default(); + canonical.hash(&mut hasher); hasher.finish() }; @@ -955,14 +957,14 @@ mod test { assert!(!same .iter() - .any(|dir_name| dir_name != "reindeer-c03e7d4c9f50fdff")); + .any(|dir_name| dir_name != "reindeer-3e497668718a129")); } #[test] fn hashes_non_github() { assert_eq!( short_name_for_git_repo("https://gitlab.com/gilrs-project/gilrs.git").unwrap(), - "gilrs-784d1d6a17891c9" + "gilrs-1b413f0b5e8e0bb" ); } }