From d4bfee2225f824b429bb9feda799ce4ed4cf3cbb Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Fri, 29 Dec 2023 16:02:56 +0900 Subject: [PATCH] operation: add "op prune --ancestors" command that "rebases" operations (PoC) In order to implement GC (#12), we'll need to somehow prune old operations. Perhaps the easiest implementation is to just remove unwanted operation files and put tombstone file instead (like git shallow.) However, the removed operations might be referenced by another jj process running in parallel. Since the parallel operation thinks all the historical head commits are reachable, the removed operations would have to be resurrected (or fix up index data, etc.) when the op heads get merged. The idea behind this patch is to split the "op log" GC into two steps: 1. recreate operations to be retained and make the old history unreachable, 2. delete unreachable operations if the head was created e.g. 3 days ago. The latter will be run by "jj util gc". I don't think GC can be implemented 100% safe against lock-less append-only storage, and we'll probably need some timestamp-based mechanism to not remove objects that might be referenced by uncommitted operation. FWIW, another nice thing about this implementation is that the index is automatically invalidated as the op id changes. The bad thing is that the "undo" description would contain an old op id. It seems the performance is pretty okay. --- cli/src/commands/operation.rs | 80 ++++++++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/cli/src/commands/operation.rs b/cli/src/commands/operation.rs index 03166adf37..d6a9e3ee66 100644 --- a/cli/src/commands/operation.rs +++ b/cli/src/commands/operation.rs @@ -1,6 +1,11 @@ +use std::collections::{HashMap, HashSet}; +use std::slice; + use clap::Subcommand; +use itertools::Itertools as _; use jj_lib::backend::ObjectId; -use jj_lib::operation; +use jj_lib::op_store::OperationId; +use jj_lib::operation::{self, Operation}; use jj_lib::repo::Repo; use crate::cli_util::{user_error, CommandError, CommandHelper, LogContentFormat}; @@ -18,6 +23,7 @@ pub enum OperationCommand { Log(OperationLogArgs), Undo(OperationUndoArgs), Restore(OperationRestoreArgs), + Prune(OperationPruneArgs), } /// Show the operation log @@ -75,6 +81,19 @@ pub struct OperationUndoArgs { what: Vec, } +/// Prune unneeded operations (EXPERIMENTAL) +#[derive(clap::Args, Clone, Debug)] +pub struct OperationPruneArgs { + /// The latest operation to prune + /// + /// The specified operation and its all ancestors will be removed. The + /// descendant operations will have new ids. + #[arg(long, required = true)] + ancestors: Option, + // TODO: There may be an option to move op heads backwards to discard recent + // commit objects? +} + #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, clap::ValueEnum)] enum UndoWhatToRestore { /// The jj repo state and local branches @@ -235,6 +254,64 @@ fn cmd_op_restore( Ok(()) } +fn cmd_op_prune( + ui: &mut Ui, + command: &CommandHelper, + args: &OperationPruneArgs, +) -> Result<(), CommandError> { + let workspace_command = command.workspace_helper(ui)?; + let repo = workspace_command.repo(); + let op_store = repo.op_store(); + let op_heads_store = repo.op_heads_store(); + let old_head_op = repo.operation().clone(); + if let Some(unwanted_op_str) = &args.ancestors { + let unwanted_head_op = workspace_command.resolve_single_op(unwanted_op_str)?; + if old_head_op == unwanted_head_op { + return Err(user_error("Cannot prune the current operation")); + } + // TODO: Extract lib function that walks/rewrites operations. + // TODO: Stop walk at unwanted_head_op if possible. + let unwanted_op_ids: HashSet = operation::walk_ancestors(&unwanted_head_op) + .map_ok(|op| op.id().clone()) + .try_collect()?; + let wanted_ops: Vec = operation::walk_ancestors(&old_head_op) + .filter_ok(|op| !unwanted_op_ids.contains(op.id())) + .try_collect()?; + // TODO: Transaction::merge_operation() doesn't support multiple roots + // atm. If root op id is introduced, maybe unwanted_op_ids can be mapped + // to the root id? + let mut rewritten_op_ids: HashMap = HashMap::new(); + for old_op in wanted_ops.into_iter().rev() { + let mut data = old_op.store_operation().clone(); + data.parents = data + .parents + .iter() + .filter(|id| !unwanted_op_ids.contains(id)) + .map(|id| { + rewritten_op_ids + .get(id) + .expect("parent should have been rewritten") + }) + .cloned() + .collect(); + let new_id = op_store.write_operation(&data)?; + rewritten_op_ids.insert(old_op.id().to_owned(), new_id); + } + // TODO: somehow integrate with UnpublishedOperation API? + let new_head_op_id = rewritten_op_ids + .get(old_head_op.id()) + .expect("current op should have been rewritten"); + op_heads_store.update_op_heads(slice::from_ref(old_head_op.id()), new_head_op_id); + let new_head_op = { + let data = op_store.read_operation(new_head_op_id)?; + Operation::new(op_store.clone(), new_head_op_id.to_owned(), data) + }; + let repo = repo.reload_at(&new_head_op)?; + repo.readonly_index(); // build index + } + Ok(()) +} + pub fn cmd_operation( ui: &mut Ui, command: &CommandHelper, @@ -244,5 +321,6 @@ pub fn cmd_operation( OperationCommand::Log(args) => cmd_op_log(ui, command, args), OperationCommand::Restore(args) => cmd_op_restore(ui, command, args), OperationCommand::Undo(args) => cmd_op_undo(ui, command, args), + OperationCommand::Prune(args) => cmd_op_prune(ui, command, args), } }