From 344f0aeca0f9669ad071ad7fba2143506572cd7d Mon Sep 17 00:00:00 2001 From: Essien Ita Essien <34972+essiene@users.noreply.github.com> Date: Sat, 2 Nov 2024 11:04:02 +0000 Subject: [PATCH] cli: git sync - Proof of concept This is not meant to be merged in this form. I am bashing everything on the head with a large bat, until it works. Once everything works nicely in this draft, I will break it up into smaller tested pieces that are intended to be merged. * Grab current heads and build a set of [(Parent, Child)...] * Start a transaction. * Fetch in the transaction: It's extremely important to fetch *all* branches from *all* remotes. This is because if we don't fetch all branches, when we start rebasing, a root branch may be merged upstream with contents of a branch we didn't fetch, so we end up with conflicts. It's safest to always fetch all branches and then only rebase what we need. * Grab current heads on transaction: * Transaction MutableRepo should be updated after fetch. * Build a Map where old heads are the keys and the new heads are values. * If old == new, skip it. * This way if nothing changed, we end up with an empty map and avoid bugs downstream (index.is_ancestor returns true if old == new). * Relationship is figured out by index.is_ancestor(old_head_id, new_head_id) in a loop. * Check if rebase is needed. * old_heads.set_diffence(new_heads) gives old heads that are no longer heads, which would mean, their descendants need to be rebased. * Find children needing rebase: * Build list of rebase specs with: (commit, new_parent) * commit is the immdiate child of old_heads needing rebase. * call repo_mut.transform_descendants with a rewritter closure. * simplify_merge * rewrite commits matching the rebase_specs * for all other commits, if any new_parents match old_heads in our old -> new mapping, update those to point at the updated heads. * rebase with emptying newly emptied commits. Issue: #1039 --- cli/src/commands/git/mod.rs | 5 + cli/src/commands/git/sync.rs | 306 +++++++++++++++++++++++++++++++ cli/tests/cli-reference@.md.snap | 24 +++ lib/src/rewrite.rs | 5 + 4 files changed, 340 insertions(+) create mode 100644 cli/src/commands/git/sync.rs diff --git a/cli/src/commands/git/mod.rs b/cli/src/commands/git/mod.rs index 3aa6105ec1..da2c783600 100644 --- a/cli/src/commands/git/mod.rs +++ b/cli/src/commands/git/mod.rs @@ -20,6 +20,7 @@ pub mod init; pub mod push; pub mod remote; pub mod submodule; +pub mod sync; use clap::Subcommand; @@ -39,6 +40,8 @@ use self::remote::cmd_git_remote; use self::remote::RemoteCommand; use self::submodule::cmd_git_submodule; use self::submodule::GitSubmoduleCommand; +use self::sync::cmd_git_sync; +use self::sync::GitSyncArgs; use crate::cli_util::CommandHelper; use crate::cli_util::WorkspaceCommandHelper; use crate::command_error::user_error_with_message; @@ -61,6 +64,7 @@ pub enum GitCommand { Remote(RemoteCommand), #[command(subcommand, hide = true)] Submodule(GitSubmoduleCommand), + Sync(GitSyncArgs), } pub fn cmd_git( @@ -77,6 +81,7 @@ pub fn cmd_git( GitCommand::Push(args) => cmd_git_push(ui, command, args), GitCommand::Remote(args) => cmd_git_remote(ui, command, args), GitCommand::Submodule(args) => cmd_git_submodule(ui, command, args), + GitCommand::Sync(args) => cmd_git_sync(ui, command, args), } } diff --git a/cli/src/commands/git/sync.rs b/cli/src/commands/git/sync.rs new file mode 100644 index 0000000000..5767d5560a --- /dev/null +++ b/cli/src/commands/git/sync.rs @@ -0,0 +1,306 @@ +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use std::fmt; + +use itertools::Itertools; +use jj_lib::backend::CommitId; +use jj_lib::commit::Commit; +use jj_lib::repo::Repo; +use jj_lib::revset::FailingSymbolResolver; +use jj_lib::revset::RevsetExpression; +use jj_lib::revset::RevsetIteratorExt; +use jj_lib::rewrite::EmptyBehaviour; +use jj_lib::str_util::StringPattern; + +use crate::cli_util::short_change_hash; +use crate::cli_util::short_commit_hash; +use crate::cli_util::CommandHelper; +use crate::cli_util::WorkspaceCommandTransaction; +use crate::commands::CommandError; +use crate::git_util::get_fetch_remotes; +use crate::git_util::get_git_repo; +use crate::git_util::git_fetch; +use crate::git_util::FetchArgs; +use crate::ui::Ui; + +/// Sync the local JJ repo to specified Git remote branch(es). +/// +/// The sync command will first fetch from the Git remote, then +/// rebase all local changes onto the appropriate updated +/// heads that were fetched. +/// +/// Changes that are made empty by the rebase are dropped. +#[derive(clap::Args, Clone, Debug)] +pub struct GitSyncArgs { + #[command(flatten)] + fetch: FetchArgs, +} + +pub fn cmd_git_sync( + ui: &mut Ui, + command: &CommandHelper, + args: &GitSyncArgs, +) -> Result<(), CommandError> { + let mut workspace_command = command.workspace_helper(ui)?; + + let git_repo = get_git_repo(workspace_command.repo().store())?; + let remotes = get_fetch_remotes( + ui, + command.settings(), + &git_repo, + &FetchArgs { + branch: vec![StringPattern::everything()], + remotes: vec![], + all_remotes: true, + }, + )?; + + let remote_patterns = remotes + .iter() + .map(|p| StringPattern::Exact(p.to_string())) + .collect_vec(); + + let mut tx = workspace_command.start_transaction(); + + let old_heads = get_branch_heads(tx.repo(), &args.fetch.branch)?; + let parent_child_pairs = get_parent_child_commit_pairs( + tx.repo(), + &old_heads.iter().map(|c| c.clone()).collect_vec(), + &args.fetch.branch, + &remote_patterns, + )?; + + // prep to git fetch + git_fetch( + ui, + &mut tx, + &git_repo, + &FetchArgs { + branch: vec![StringPattern::everything()], + remotes: remotes.clone(), + all_remotes: true, + }, + )?; + + // is rebase needed? + let updated_heads = get_branch_heads(tx.repo(), &args.fetch.branch)?; + let new_heads = updated_heads + .difference(&old_heads) + .map(|c| c.clone()) + .collect_vec(); + if new_heads.is_empty() { + // rebase not needed + tx.finish(ui, format!("sync completed; no rebase"))?; + return Ok(()); + } + + // find rebase targets + let needs_rebase = old_heads + .difference(&updated_heads) + .map(|c| c.clone()) + .collect_vec(); + + let old_to_new = map_old_to_new_heads(&tx, &needs_rebase[..], &new_heads[..])?; + + let rebase_specs = parent_child_pairs + .iter() + .filter_map(|pair| { + if needs_rebase.contains(&pair.parent) { + if let Some(new) = old_to_new.get(&pair.parent.id()) { + Some(RebaseSpec { + commit: pair.child.id().clone(), + new_parent: new.clone(), + }) + } else { + None + } + } else { + None + } + }) + .collect_vec(); + + let settings = tx.settings().clone(); + + tx.repo_mut().transform_descendants( + &settings, + rebase_specs + .iter() + .map(|spec| spec.commit.clone()) + .collect::>(), + |mut rewriter| { + rewriter.simplify_ancestor_merge(); + for spec in &rebase_specs { + // move children of updated branches to the latest branch head + if rewriter.old_commit().id() == &spec.commit { + rewriter.set_new_parents(vec![spec.new_parent.clone()]); + } else { + // if new parents point at old heads, update them to point + // at the updated heads. + let mut updated_parents: Vec = vec![]; + + let old_parents = rewriter.new_parents().to_vec(); + + for parent in &old_parents { + if let Some(updated) = maybe_update_commit( + rewriter.repo(), + &parent, + &new_heads.iter().map(|c| c.id().clone()).collect_vec(), + ) { + updated_parents.push(updated.clone()); + } else { + updated_parents.push(parent.clone()); + } + } + + rewriter.set_new_parents(updated_parents); + } + } + + if let Some(builder) = + rewriter.rebase_with_empty_behavior(&settings, EmptyBehaviour::AbandonNewlyEmpty)? + { + builder.write()?; + } + + Ok(()) + }, + )?; + + tx.finish(ui, format!("sync completed; commits rebased to new heads"))?; + + Ok(()) +} + +fn get_branch_heads( + repo: &dyn Repo, + branches: &[StringPattern], +) -> Result, CommandError> { + let mut commits: BTreeSet = BTreeSet::from([]); + + for branch in branches { + let mut branch_commits: BTreeSet = RevsetExpression::bookmarks(branch.clone()) + .heads() + .resolve_user_expression(repo, &FailingSymbolResolver)? + .evaluate(repo)? + .iter() + .commits(repo.store()) + .try_collect()?; + + commits.append(&mut branch_commits); + } + + Ok(commits) +} + +fn maybe_update_commit( + repo: &dyn Repo, + commit: &CommitId, + new_heads: &[CommitId], +) -> Option { + new_heads + .iter() + .filter_map(|new| { + if new != commit && repo.index().is_ancestor(commit, new) { + Some(new.clone()) + } else { + None + } + }) + .next() +} + +fn map_old_to_new_heads( + tx: &WorkspaceCommandTransaction, + old_heads: &[Commit], + new_heads: &[Commit], +) -> Result, CommandError> { + let mut out: BTreeMap = BTreeMap::from([]); + let index = tx.repo().index(); + new_heads.iter().for_each(|new| { + old_heads.iter().for_each(|old| { + if old != new && index.is_ancestor(old.id(), new.id()) { + out.insert(old.id().clone(), new.id().clone()); + } + }); + }); + + Ok(out) +} + +fn format_commit(commit: &Commit) -> String { + let change_hash = short_change_hash(commit.change_id()); + let commit_hash = short_commit_hash(commit.id()); + + format!("{commit_hash}:{change_hash}") +} + +#[derive(Eq, Ord, PartialEq, PartialOrd)] +pub struct CommitPair { + parent: Commit, + child: Commit, +} + +impl fmt::Display for CommitPair { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let parent = format_commit(&self.parent); + let child = format_commit(&self.child); + write!(f, "=> {parent} --> {child}") + } +} + +pub struct RebaseSpec { + commit: CommitId, + new_parent: CommitId, +} + +fn get_parent_child_commit_pairs( + repo: &dyn Repo, + start: &[Commit], + branches: &[StringPattern], + remotes: &[StringPattern], +) -> Result, CommandError> { + let store = repo.store(); + let mut pairs: Vec = vec![]; + + for commit in start { + for parent_id in commit.parent_ids() { + pairs.push(CommitPair { + parent: store.get_commit(parent_id)?, + child: commit.clone(), + }); + } + } + + let start = start.iter().map(|c| c.id().clone()).collect_vec(); + + for remote in remotes { + for branch in branches { + let commits: Vec = RevsetExpression::commits(start.to_vec()) + .descendants() + .minus(&RevsetExpression::commits(start.to_vec())) + .minus(&RevsetExpression::remote_bookmarks( + branch.clone(), + remote.clone(), + None, + )) + .resolve_user_expression(repo, &FailingSymbolResolver)? + .evaluate(repo)? + .iter() + .commits(repo.store()) + .try_collect()?; + + for commit in commits { + for parent_id in commit.parent_ids() { + pairs.push(CommitPair { + parent: store.get_commit(parent_id)?, + child: commit.clone(), + }); + } + } + } + } + + pairs.sort(); + Ok(pairs) +} diff --git a/cli/tests/cli-reference@.md.snap b/cli/tests/cli-reference@.md.snap index c8850015a2..dbedf78450 100644 --- a/cli/tests/cli-reference@.md.snap +++ b/cli/tests/cli-reference@.md.snap @@ -60,6 +60,7 @@ This document contains the help content for the `jj` command-line program. * [`jj git remote remove`↴](#jj-git-remote-remove) * [`jj git remote rename`↴](#jj-git-remote-rename) * [`jj git remote set-url`↴](#jj-git-remote-set-url) +* [`jj git sync`↴](#jj-git-sync) * [`jj help`↴](#jj-help) * [`jj init`↴](#jj-init) * [`jj interdiff`↴](#jj-interdiff) @@ -1040,6 +1041,7 @@ For a comparison with Git, including a table of commands, see https://martinvonz * `init` — Create a new Git backed repo * `push` — Push to a Git remote * `remote` — Manage Git remotes +* `sync` — Sync the local JJ repo to specified Git remote branch(es) @@ -1247,6 +1249,28 @@ Set the URL of a Git remote +## `jj git sync` + +Sync the local JJ repo to specified Git remote branch(es). + +The sync command will first fetch from the Git remote, then rebase all local changes onto the appropriate updated heads that were fetched. + +Changes that are made empty by the rebase are dropped. + +**Usage:** `jj git sync [OPTIONS]` + +###### **Options:** + +* `-b`, `--branch ` — Fetch only some of the branches + + By default, the specified name matches exactly. Use `glob:` prefix to expand `*` as a glob. The other wildcard characters aren't supported. + + Default value: `glob:*` +* `--remote ` — The remote to fetch from (only named remotes are supported, can be repeated) +* `--all-remotes` — Fetch from all remotes + + + ## `jj help` Print this message or the help of the given subcommand(s) diff --git a/lib/src/rewrite.rs b/lib/src/rewrite.rs index 75932c4491..8faa4c7161 100644 --- a/lib/src/rewrite.rs +++ b/lib/src/rewrite.rs @@ -155,6 +155,11 @@ impl<'repo> CommitRewriter<'repo> { self.mut_repo } + /// Returns immutable reference to the `MutableRepo`. + pub fn repo(&mut self) -> &MutableRepo { + &self.mut_repo + } + /// The commit we're rewriting. pub fn old_commit(&self) -> &Commit { &self.old_commit