Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support for shallow clones and fetches with gitoxide #11840

Merged
merged 17 commits into from
May 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
312 changes: 160 additions & 152 deletions Cargo.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ filetime = "0.2.9"
flate2 = { version = "1.0.3", default-features = false, features = ["zlib"] }
git2 = "0.17.0"
git2-curl = "0.18.0"
gix = { version = "0.39.0", default-features = false, features = ["blocking-http-transport-curl", "progress-tree"] }
gix-features-for-configuration-only = { version = "0.28.0", package = "gix-features", features = [ "parallel" ] }
gix = { version = "0.44.1", default-features = false, features = ["blocking-http-transport-curl", "progress-tree"] }
gix-features-for-configuration-only = { version = "0.29.0", package = "gix-features", features = [ "parallel" ] }
glob = "0.3.0"
hex = "0.4"
hmac = "0.12.1"
Expand Down
41 changes: 41 additions & 0 deletions src/cargo/sources/git/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,46 @@ mod source;
mod utils;

pub mod fetch {
use crate::core::features::GitoxideFeatures;
use crate::Config;

/// The kind remote repository to fetch.
#[derive(Debug, Copy, Clone)]
pub enum RemoteKind {
/// A repository belongs to a git dependency.
GitDependency,
/// A repository belongs to a Cargo registry.
Registry,
}

impl RemoteKind {
/// Obtain the kind of history we would want for a fetch from our remote knowing if the target repo is already shallow
/// via `repo_is_shallow` along with gitoxide-specific feature configuration via `config`.
/// `rev_and_ref` is additional information that affects whether or not we may be shallow.
pub(crate) fn to_shallow_setting(
&self,
repo_is_shallow: bool,
config: &Config,
) -> gix::remote::fetch::Shallow {
let has_feature = |cb: &dyn Fn(GitoxideFeatures) -> bool| {
config
.cli_unstable()
.gitoxide
.map_or(false, |features| cb(features))
};

// maintain shallow-ness and keep downloading single commits, or see if we can do shallow clones
if !repo_is_shallow {
match self {
RemoteKind::GitDependency if has_feature(&|git| git.shallow_deps) => {}
RemoteKind::Registry if has_feature(&|git| git.shallow_index) => {}
_ => return gix::remote::fetch::Shallow::NoChange,
}
};

gix::remote::fetch::Shallow::DepthAtRemote(1.try_into().expect("non-zero"))
}
}

pub type Error = gix::env::collate::fetch::Error<gix::refspec::parse::Error>;
}
57 changes: 44 additions & 13 deletions src/cargo/sources/git/oxide.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ pub fn with_retry_and_progress(
) -> CargoResult<()> {
std::thread::scope(|s| {
let mut progress_bar = Progress::new("Fetch", config);
let is_shallow = config
.cli_unstable()
.gitoxide
.map_or(false, |gix| gix.shallow_deps || gix.shallow_index);
network::retry::with_retry(config, || {
let progress_root: Arc<gix::progress::tree::Root> =
gix::progress::tree::root::Options {
Expand All @@ -50,7 +54,7 @@ pub fn with_retry_and_progress(
);
amend_authentication_hints(res, urls.get_mut().take())
});
translate_progress_to_bar(&mut progress_bar, root)?;
translate_progress_to_bar(&mut progress_bar, root, is_shallow)?;
thread.join().expect("no panic in scoped thread")
})
})
Expand All @@ -59,7 +63,9 @@ pub fn with_retry_and_progress(
fn translate_progress_to_bar(
progress_bar: &mut Progress<'_>,
root: Weak<gix::progress::tree::Root>,
is_shallow: bool,
) -> CargoResult<()> {
let remote_progress: gix::progress::Id = gix::remote::fetch::ProgressId::RemoteProgress.into();
let read_pack_bytes: gix::progress::Id =
gix::odb::pack::bundle::write::ProgressId::ReadPackBytes.into();
let delta_index_objects: gix::progress::Id =
Expand Down Expand Up @@ -88,6 +94,7 @@ fn translate_progress_to_bar(
"progress should be smoother by keeping these as multiples of each other"
);

let num_phases = if is_shallow { 3 } else { 2 }; // indexing + delta-resolution, both with same amount of objects to handle
while let Some(root) = root.upgrade() {
std::thread::sleep(sleep_interval);
let needs_update = last_fast_update.elapsed() >= fast_check_interval;
Expand All @@ -102,31 +109,37 @@ fn translate_progress_to_bar(
fn progress_by_id(
id: gix::progress::Id,
task: &gix::progress::Task,
) -> Option<&gix::progress::Value> {
(task.id == id).then(|| task.progress.as_ref()).flatten()
) -> Option<(&str, &gix::progress::Value)> {
(task.id == id)
.then(|| task.progress.as_ref())
.flatten()
.map(|value| (task.name.as_str(), value))
}
fn find_in<K>(
tasks: &[(K, gix::progress::Task)],
cb: impl Fn(&gix::progress::Task) -> Option<&gix::progress::Value>,
) -> Option<&gix::progress::Value> {
cb: impl Fn(&gix::progress::Task) -> Option<(&str, &gix::progress::Value)>,
) -> Option<(&str, &gix::progress::Value)> {
tasks.iter().find_map(|(_, t)| cb(t))
}

const NUM_PHASES: usize = 2; // indexing + delta-resolution, both with same amount of objects to handle
if let Some(objs) = find_in(&tasks, |t| progress_by_id(resolve_objects, t)) {
// Resolving deltas.
if let Some((_, objs)) = find_in(&tasks, |t| progress_by_id(resolve_objects, t)) {
// Phase 3: Resolving deltas.
let objects = objs.step.load(Ordering::Relaxed);
let total_objects = objs.done_at.expect("known amount of objects");
let msg = format!(", ({objects}/{total_objects}) resolving deltas");

progress_bar.tick(total_objects + objects, total_objects * NUM_PHASES, &msg)?;
progress_bar.tick(
(total_objects * (num_phases - 1)) + objects,
total_objects * num_phases,
&msg,
)?;
} else if let Some((objs, read_pack)) =
find_in(&tasks, |t| progress_by_id(read_pack_bytes, t)).and_then(|read| {
find_in(&tasks, |t| progress_by_id(delta_index_objects, t))
.map(|delta| (delta, read))
.map(|delta| (delta.1, read.1))
})
{
// Receiving objects.
// Phase 2: Receiving objects.
let objects = objs.step.load(Ordering::Relaxed);
let total_objects = objs.done_at.expect("known amount of objects");
let received_bytes = read_pack.step.load(Ordering::Relaxed);
Expand All @@ -139,7 +152,25 @@ fn translate_progress_to_bar(
let (rate, unit) = human_readable_bytes(counter.rate() as u64);
let msg = format!(", {rate:.2}{unit}/s");

progress_bar.tick(objects, total_objects * NUM_PHASES, &msg)?;
progress_bar.tick(
(total_objects * (num_phases - 2)) + objects,
total_objects * num_phases,
&msg,
)?;
} else if let Some((action, remote)) =
find_in(&tasks, |t| progress_by_id(remote_progress, t))
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is specifically for avoiding pauses when shallow cloning the index, which is when the remote does far more work.

{
if !is_shallow {
continue;
}
// phase 1: work on the remote side

// Resolving deltas.
let objects = remote.step.load(Ordering::Relaxed);
if let Some(total_objects) = remote.done_at {
weihanglo marked this conversation as resolved.
Show resolved Hide resolved
let msg = format!(", ({objects}/{total_objects}) {action}");
progress_bar.tick(objects, total_objects * num_phases, &msg)?;
}
}
}
Ok(())
Expand Down Expand Up @@ -232,7 +263,7 @@ pub fn open_repo(
) -> Result<gix::Repository, gix::open::Error> {
gix::open_opts(repo_path, {
let mut opts = gix::open::Options::default();
opts.permissions.config = gix::permissions::Config::all();
opts.permissions.config = gix::open::permissions::Config::all();
opts.permissions.config.git_binary = purpose.needs_git_binary_config();
opts.with(gix::sec::Trust::Full)
.config_overrides(config_overrides)
Expand Down
36 changes: 29 additions & 7 deletions src/cargo/sources/git/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,26 @@ impl<'cfg> GitSource<'cfg> {
assert!(source_id.is_git(), "id is not git, id={}", source_id);

let remote = GitRemote::new(source_id.url());
let ident = ident(&source_id);

let source = GitSource {
remote,
manifest_reference: source_id.git_reference().unwrap().clone(),
locked_rev: match source_id.precise() {
let manifest_reference = source_id.git_reference().unwrap().clone();
let locked_rev =
match source_id.precise() {
Some(s) => Some(git2::Oid::from_str(s).with_context(|| {
format!("precise value for git is not a git revision: {}", s)
})?),
None => None,
},
};
let ident = ident_shallow(
&source_id,
config
.cli_unstable()
.gitoxide
.map_or(false, |gix| gix.fetch && gix.shallow_deps),
);

let source = GitSource {
remote,
manifest_reference,
locked_rev,
source_id,
path_source: None,
ident,
Expand All @@ -63,6 +72,7 @@ impl<'cfg> GitSource<'cfg> {
}
}

/// Create an identifier from a URL, essentially turning `proto://host/path/repo` into `repo-<hash-of-url>`.
fn ident(id: &SourceId) -> String {
let ident = id
.canonical_url()
Expand All @@ -76,6 +86,18 @@ fn ident(id: &SourceId) -> String {
format!("{}-{}", ident, short_hash(id.canonical_url()))
}

/// Like `ident()`, but appends `-shallow` to it, turning `proto://host/path/repo` into `repo-<hash-of-url>-shallow`.
///
/// It's important to separate shallow from non-shallow clones for reasons of backwards compatibility - older
/// cargo's aren't necessarily handling shallow clones correctly.
fn ident_shallow(id: &SourceId, is_shallow: bool) -> String {
weihanglo marked this conversation as resolved.
Show resolved Hide resolved
let mut ident = ident(id);
if is_shallow {
ident.push_str("-shallow");
}
ident
}

impl<'cfg> Debug for GitSource<'cfg> {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "git repo at {}", self.remote.url())?;
Expand Down
Loading