Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize submodules fetching #5399

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 21 additions & 13 deletions src/libfetchers/git.cc
Original file line number Diff line number Diff line change
Expand Up @@ -439,19 +439,27 @@ struct GitInputScheme : InputScheme
}

if (submodules) {
Path tmpGitDir = createTempDir();
AutoDelete delTmpGitDir(tmpGitDir, true);

runProgram("git", true, { "-c", "init.defaultBranch=" + gitInitialBranch, "init", tmpDir, "--separate-git-dir", tmpGitDir });
// TODO: repoDir might lack the ref (it only checks if rev
// exists, see FIXME above) so use a big hammer and fetch
// everything to ensure we get the rev.
runProgram("git", true, { "-C", tmpDir, "fetch", "--quiet", "--force",
"--update-head-ok", "--", repoDir, "refs/*:refs/*" });

runProgram("git", true, { "-C", tmpDir, "checkout", "--quiet", input.getRev()->gitRev() });
runProgram("git", true, { "-C", tmpDir, "remote", "add", "origin", actualUrl });
runProgram("git", true, { "-C", tmpDir, "submodule", "--quiet", "update", "--init", "--recursive" });
if (isLocal) {
// `git submodule update` modifies the source repo, so do a temporary, cheap clone with its own HEADs
runProgram("git", true, { "clone", "--quiet", "--no-checkout", "--shared", "-c", "submodule.alternateLocation=superproject", repoDir, tmpDir });
runProgram("git", true, { "-C", tmpDir, "checkout", "--quiet", input.getRev()->gitRev() });
// try checking out submodules without fetching first since otherwise git seems to unnecessarily do a fetch when the
// submodule commit is present but not reachable
try {
runProgram2({
.program ="git",
.args = { "-C", tmpDir, "submodule", "update", "--init", "--recursive", "--quiet", "--no-fetch" }
});
} catch (ExecError &) {
// TODO: cache these fetches
runProgram("git", true, { "-C", tmpDir, "submodule", "update", "--init", "--recursive", "--quiet" });
}
} else {
// it's fine to modify `repoDir` HEADs if it is in the Nix cache; otherwise *no* submodule fetches would be cached
runProgram("git", true, { "--git-dir", repoDir, "--work-tree", tmpDir, "checkout", "--quiet", input.getRev()->gitRev(), "." });
// `-C` should be redundant, probably a bug in `git submodule`
runProgram("git", true, { "--git-dir", repoDir, "--work-tree", tmpDir, "-C", tmpDir, "submodule", "--quiet", "update", "--init", "--recursive" });
Comment on lines +459 to +461
Copy link
Contributor Author

@Kha Kha Oct 17, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This sequence (which existed before) should probably be covered by a repo lock to prevent races

}

filter = isNotDotGitDirectory;
} else {
Expand Down
92 changes: 50 additions & 42 deletions tests/fetchGitSubmodules.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,60 +38,68 @@ git -C $rootRepo commit -m "Add submodule"

rev=$(git -C $rootRepo rev-parse HEAD)

r1=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; }).outPath")
r2=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; submodules = false; }).outPath")
r3=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; submodules = true; }).outPath")
doTests() {
r1=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; }).outPath")
r2=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; submodules = false; }).outPath")
r3=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; submodules = true; }).outPath")

[[ $r1 == $r2 ]]
[[ $r2 != $r3 ]]
[[ $r1 == $r2 ]]
[[ $r2 != $r3 ]]

r4=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$rootRepo; ref = \"master\"; rev = \"$rev\"; }).outPath")
r5=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$rootRepo; ref = \"master\"; rev = \"$rev\"; submodules = false; }).outPath")
r6=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$rootRepo; ref = \"master\"; rev = \"$rev\"; submodules = true; }).outPath")
r7=$(nix eval --raw --expr "(builtins.fetchGit { url = $rootRepo; ref = \"master\"; rev = \"$rev\"; submodules = true; }).outPath")
r8=$(nix eval --raw --expr "(builtins.fetchGit { url = $rootRepo; rev = \"$rev\"; submodules = true; }).outPath")
r4=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$rootRepo; ref = \"master\"; rev = \"$rev\"; }).outPath")
r5=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$rootRepo; ref = \"master\"; rev = \"$rev\"; submodules = false; }).outPath")
r6=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$rootRepo; ref = \"master\"; rev = \"$rev\"; submodules = true; }).outPath")
r7=$(nix eval --raw --expr "(builtins.fetchGit { url = $rootRepo; ref = \"master\"; rev = \"$rev\"; submodules = true; }).outPath")
r8=$(nix eval --raw --expr "(builtins.fetchGit { url = $rootRepo; rev = \"$rev\"; submodules = true; }).outPath")

[[ $r1 == $r4 ]]
[[ $r4 == $r5 ]]
[[ $r3 == $r6 ]]
[[ $r6 == $r7 ]]
[[ $r7 == $r8 ]]
[[ $r1 == $r4 ]]
[[ $r4 == $r5 ]]
[[ $r3 == $r6 ]]
[[ $r6 == $r7 ]]
[[ $r7 == $r8 ]]

have_submodules=$(nix eval --expr "(builtins.fetchGit { url = $rootRepo; rev = \"$rev\"; }).submodules")
[[ $have_submodules == false ]]
have_submodules=$(nix eval --expr "(builtins.fetchGit { url = $rootRepo; rev = \"$rev\"; }).submodules")
[[ $have_submodules == false ]]

have_submodules=$(nix eval --expr "(builtins.fetchGit { url = $rootRepo; rev = \"$rev\"; submodules = false; }).submodules")
[[ $have_submodules == false ]]
have_submodules=$(nix eval --expr "(builtins.fetchGit { url = $rootRepo; rev = \"$rev\"; submodules = false; }).submodules")
[[ $have_submodules == false ]]

have_submodules=$(nix eval --expr "(builtins.fetchGit { url = $rootRepo; rev = \"$rev\"; submodules = true; }).submodules")
[[ $have_submodules == true ]]
have_submodules=$(nix eval --expr "(builtins.fetchGit { url = $rootRepo; rev = \"$rev\"; submodules = true; }).submodules")
[[ $have_submodules == true ]]

pathWithoutSubmodules=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; }).outPath")
pathWithSubmodules=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; submodules = true; }).outPath")
pathWithSubmodulesAgain=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; submodules = true; }).outPath")
pathWithSubmodulesAgainWithRef=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$rootRepo; ref = \"master\"; rev = \"$rev\"; submodules = true; }).outPath")
pathWithoutSubmodules=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; }).outPath")
pathWithSubmodules=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; submodules = true; }).outPath")
pathWithSubmodulesAgain=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; submodules = true; }).outPath")
pathWithSubmodulesAgainWithRef=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$rootRepo; ref = \"master\"; rev = \"$rev\"; submodules = true; }).outPath")

# The resulting store path cannot be the same.
[[ $pathWithoutSubmodules != $pathWithSubmodules ]]
# The resulting store path cannot be the same.
[[ $pathWithoutSubmodules != $pathWithSubmodules ]]

# Checking out the same repo with submodules returns in the same store path.
[[ $pathWithSubmodules == $pathWithSubmodulesAgain ]]
# Checking out the same repo with submodules returns in the same store path.
[[ $pathWithSubmodules == $pathWithSubmodulesAgain ]]

# Checking out the same repo with submodules returns in the same store path.
[[ $pathWithSubmodulesAgain == $pathWithSubmodulesAgainWithRef ]]
# Checking out the same repo with submodules returns in the same store path.
[[ $pathWithSubmodulesAgain == $pathWithSubmodulesAgainWithRef ]]

# The submodules flag is actually honored.
[[ ! -e $pathWithoutSubmodules/sub/content ]]
[[ -e $pathWithSubmodules/sub/content ]]
# The submodules flag is actually honored.
[[ ! -e $pathWithoutSubmodules/sub/content ]]
[[ -e $pathWithSubmodules/sub/content ]]

[[ -e $pathWithSubmodulesAgainWithRef/sub/content ]]
[[ -e $pathWithSubmodulesAgainWithRef/sub/content ]]

# No .git directory or submodule reference files must be left
test "$(find "$pathWithSubmodules" -name .git)" = ""
# No .git directory or submodule reference files must be left
test "$(find "$pathWithSubmodules" -name .git)" = ""

# Git repos without submodules can be fetched with submodules = true.
subRev=$(git -C $subRepo rev-parse HEAD)
noSubmoduleRepoBaseline=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$subRepo; rev = \"$subRev\"; }).outPath")
noSubmoduleRepo=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$subRepo; rev = \"$subRev\"; submodules = true; }).outPath")
# Git repos without submodules can be fetched with submodules = true.
subRev=$(git -C $subRepo rev-parse HEAD)
noSubmoduleRepoBaseline=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$subRepo; rev = \"$subRev\"; }).outPath")
noSubmoduleRepo=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$subRepo; rev = \"$subRev\"; submodules = true; }).outPath")

[[ $noSubmoduleRepoBaseline == $noSubmoduleRepo ]]
[[ $noSubmoduleRepoBaseline == $noSubmoduleRepo ]]
}

doTests

# again, with forced clones
export _NIX_FORCE_HTTP=1
doTests