diff --git a/Documentation/technical/api-path-walk.txt b/Documentation/technical/api-path-walk.txt index 83bfe3d665e9fb..85d8e262401486 100644 --- a/Documentation/technical/api-path-walk.txt +++ b/Documentation/technical/api-path-walk.txt @@ -65,6 +65,14 @@ better off using the revision walk API instead. the revision walk so that the walk emits commits marked with the `UNINTERESTING` flag. +`edge_aggressive`:: + For performance reasons, usually only the boundary commits are + explored to find UNINTERESTING objects. However, in the case of + shallow clones it can be helpful to mark all trees and blobs + reachable from UNINTERESTING tip commits as UNINTERESTING. This + matches the behavior of `--objects-edge-aggressive` in the + revision API. + `pl`:: This pattern list pointer allows focusing the path-walk search to a set of patterns, only emitting paths that match the given diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index b49f5fe797a28d..aa694204d8b313 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -201,6 +201,7 @@ static int keep_unreachable, unpack_unreachable, include_tag; static timestamp_t unpack_unreachable_expiration; static int pack_loose_unreachable; static int cruft; +static int shallow = 0; static timestamp_t cruft_expiration; static int local; static int have_non_local_packs; @@ -4438,6 +4439,7 @@ static void get_object_list_path_walk(struct rev_info *revs) * base objects. */ info.prune_all_uninteresting = sparse; + info.edge_aggressive = shallow; if (walk_objects_by_path(&info)) die(_("failed to pack objects via path-walk")); @@ -4627,7 +4629,6 @@ int cmd_pack_objects(int argc, struct repository *repo UNUSED) { int use_internal_rev_list = 0; - int shallow = 0; int all_progress_implied = 0; struct strvec rp = STRVEC_INIT; int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0; @@ -4812,10 +4813,6 @@ int cmd_pack_objects(int argc, warning(_("cannot use delta islands with --path-walk")); path_walk = 0; } - if (path_walk && shallow) { - warning(_("cannot use --shallow with --path-walk")); - path_walk = 0; - } if (path_walk) { strvec_push(&rp, "--boundary"); /* diff --git a/path-walk.c b/path-walk.c index c0be95922c1615..6451e16562971f 100644 --- a/path-walk.c +++ b/path-walk.c @@ -18,6 +18,7 @@ #include "trace2.h" #include "tree.h" #include "tree-walk.h" +#include "list-objects.h" struct type_and_oid_list { @@ -233,6 +234,26 @@ static void clear_strmap(struct strmap *map) strmap_init(map); } +static struct repository *edge_repo; +static struct type_and_oid_list *edge_tree_list; + +static void show_edge(struct commit *commit) +{ + struct tree *t = repo_get_commit_tree(edge_repo, commit); + + if (!t) + return; + + if (commit->object.flags & UNINTERESTING) + t->object.flags |= UNINTERESTING; + + if (t->object.flags & SEEN) + return; + t->object.flags |= SEEN; + + oid_array_append(&edge_tree_list->oids, &t->object.oid); +} + /** * Given the configuration of 'info', walk the commits based on 'info->revs' and * call 'info->path_fn' on each discovered path. @@ -242,7 +263,7 @@ static void clear_strmap(struct strmap *map) int walk_objects_by_path(struct path_walk_info *info) { const char *root_path = ""; - int ret = 0, has_uninteresting = 0; + int ret = 0; size_t commits_nr = 0, paths_nr = 0; struct commit *c; struct type_and_oid_list *root_tree_list; @@ -254,7 +275,6 @@ int walk_objects_by_path(struct path_walk_info *info) .path_stack = STRING_LIST_INIT_DUP, .paths_to_lists = STRMAP_INIT }; - struct oidset root_tree_set = OIDSET_INIT; trace2_region_enter("path-walk", "commit-walk", info->revs->repo); @@ -280,6 +300,18 @@ int walk_objects_by_path(struct path_walk_info *info) if (prepare_revision_walk(info->revs)) die(_("failed to setup revision walk")); + /* + * Do an initial walk of tip commits in info->revs->commits and + * info->revs->cmdline.rev to match the standard edge-walk behavior. + * + * This is particularly important when 'edge_aggressive' is set. + */ + info->revs->edge_hint_aggressive = info->edge_aggressive; + + edge_repo = info->revs->repo; + edge_tree_list = root_tree_list; + mark_edges_uninteresting(info->revs, show_edge, info->prune_all_uninteresting); + info->revs->blob_objects = info->revs->tree_objects = 0; if (info->tags) { @@ -366,17 +398,10 @@ int walk_objects_by_path(struct path_walk_info *info) if (t->object.flags & SEEN) continue; t->object.flags |= SEEN; - - if (!oidset_insert(&root_tree_set, oid)) - oid_array_append(&root_tree_list->oids, oid); + oid_array_append(&root_tree_list->oids, oid); } else { warning("could not find tree %s", oid_to_hex(oid)); } - - if (t && (c->object.flags & UNINTERESTING)) { - t->object.flags |= UNINTERESTING; - has_uninteresting = 1; - } } trace2_data_intmax("path-walk", ctx.repo, "commits", commits_nr); @@ -389,21 +414,6 @@ int walk_objects_by_path(struct path_walk_info *info) oid_array_clear(&commit_list->oids); free(commit_list); - /* - * Before performing a DFS of our paths and emitting them as interesting, - * do a full walk of the trees to distribute the UNINTERESTING bit. Use - * the sparse algorithm if prune_all_uninteresting was set. - */ - if (has_uninteresting) { - trace2_region_enter("path-walk", "uninteresting-walk", info->revs->repo); - if (info->prune_all_uninteresting) - mark_trees_uninteresting_sparse(ctx.repo, &root_tree_set); - else - mark_trees_uninteresting_dense(ctx.repo, &root_tree_set); - trace2_region_leave("path-walk", "uninteresting-walk", info->revs->repo); - } - oidset_clear(&root_tree_set); - string_list_append(&ctx.path_stack, root_path); trace2_region_enter("path-walk", "path-walk", info->revs->repo); diff --git a/path-walk.h b/path-walk.h index 090cda3b5cf8f4..d19048d0d312e5 100644 --- a/path-walk.h +++ b/path-walk.h @@ -48,6 +48,13 @@ struct path_walk_info { */ int prune_all_uninteresting; + /** + * When 'edge_aggressive' is set, then the revision walk will use + * the '--object-edge-aggressive' option to mark even more objects + * as uninteresting. + */ + int edge_aggressive; + /** * Specify a sparse-checkout definition to match our paths to. Do not * walk outside of this sparse definition. If the patterns are in diff --git a/t/t5538-push-shallow.sh b/t/t5538-push-shallow.sh index 6adc3a20a45b77..7b3efd9b313f15 100755 --- a/t/t5538-push-shallow.sh +++ b/t/t5538-push-shallow.sh @@ -124,4 +124,38 @@ EOF git cat-file blob $(echo 1|git hash-object --stdin) >/dev/null ) ' + +test_expect_success 'push new commit from shallow clone has correct object count' ' + git init origin && + test_commit -C origin a && + test_commit -C origin b && + + git clone --depth=1 "file://$(pwd)/origin" client && + git -C client checkout -b topic && + git -C client commit --allow-empty -m "empty" && + GIT_PROGRESS_DELAY=0 git -C client push --progress origin topic 2>err && + test_grep "Enumerating objects: 1, done." err +' + +test_expect_success 'push new commit from shallow clone has good deltas' ' + git init base && + test_seq 1 999 >base/a && + test_commit -C base initial && + git -C base add a && + git -C base commit -m "big a" && + + git clone --depth=1 "file://$(pwd)/base" deltas && + git -C deltas checkout -b deltas && + test_seq 1 1000 >deltas/a && + git -C deltas commit -a -m "bigger a" && + GIT_TRACE2_PERF="$(pwd)/trace.txt" \ + GIT_PROGRESS_DELAY=0 git -C deltas push --progress origin deltas 2>err && + + test_grep "Enumerating objects: 5, done" err && + + # If the delta base is found, then this message uses "bytes". + # If the delta base is not found, then this message uses "KiB". + test_grep "Writing objects: .* bytes" err +' + test_done diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index 425aa8d8789ed1..7923dfbf3cb5a7 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -527,6 +527,7 @@ test_expect_success 'fetch lazy-fetches only to resolve deltas' ' # used as delta bases! GIT_TRACE_PACKET="$(pwd)/trace" \ GIT_TEST_FULL_NAME_HASH=0 \ + GIT_TEST_PACK_PATH_WALK=0 \ git -C client \ fetch "file://$(pwd)/server" main && @@ -557,6 +558,7 @@ test_expect_success 'fetch lazy-fetches only to resolve deltas, protocol v2' ' # used as delta bases! GIT_TRACE_PACKET="$(pwd)/trace" \ GIT_TEST_FULL_NAME_HASH=0 \ + GIT_TEST_PACK_PATH_WALK=0 \ git -C client \ fetch "file://$(pwd)/server" main && diff --git a/t/t6601-path-walk.sh b/t/t6601-path-walk.sh index 312bf3c19c176a..d67a077d37b99d 100755 --- a/t/t6601-path-walk.sh +++ b/t/t6601-path-walk.sh @@ -181,13 +181,13 @@ test_expect_success 'topic, not base' ' COMMIT::$(git rev-parse topic) commits:1 TREE::$(git rev-parse topic^{tree}) - TREE:left/:$(git rev-parse topic:left) + TREE:left/:$(git rev-parse base~1:left):UNINTERESTING TREE:right/:$(git rev-parse topic:right) trees:3 - BLOB:a:$(git rev-parse topic:a) - BLOB:left/b:$(git rev-parse topic:left/b) + BLOB:a:$(git rev-parse base~1:a):UNINTERESTING + BLOB:left/b:$(git rev-parse base~1:left/b):UNINTERESTING BLOB:right/c:$(git rev-parse topic:right/c) - BLOB:right/d:$(git rev-parse topic:right/d) + BLOB:right/d:$(git rev-parse base~1:right/d):UNINTERESTING blobs:4 tags:0 EOF @@ -205,10 +205,10 @@ test_expect_success 'topic, not base, only blobs' ' cat >expect <<-EOF && commits:0 trees:0 - BLOB:a:$(git rev-parse topic:a) - BLOB:left/b:$(git rev-parse topic:left/b) + BLOB:a:$(git rev-parse base~1:a):UNINTERESTING + BLOB:left/b:$(git rev-parse base~1:left/b):UNINTERESTING BLOB:right/c:$(git rev-parse topic:right/c) - BLOB:right/d:$(git rev-parse topic:right/d) + BLOB:right/d:$(git rev-parse base~1:right/d):UNINTERESTING blobs:4 tags:0 EOF @@ -246,7 +246,7 @@ test_expect_success 'topic, not base, only trees' ' cat >expect <<-EOF && commits:0 TREE::$(git rev-parse topic^{tree}) - TREE:left/:$(git rev-parse topic:left) + TREE:left/:$(git rev-parse base~1:left):UNINTERESTING TREE:right/:$(git rev-parse topic:right) trees:3 blobs:0