Skip to content

Commit

Permalink
Merge pull request #9086 from nipunn1313/partial_clone
Browse files Browse the repository at this point in the history
Use git's partial clone feature to speed up pip
  • Loading branch information
sbidoul authored Aug 15, 2021
2 parents d8fba8d + 2d35b80 commit 01308b8
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 1 deletion.
1 change: 1 addition & 0 deletions news/9086.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
When a revision is specified in a Git URL, use git's partial clone feature to speed up source retrieval.
17 changes: 16 additions & 1 deletion src/pip/_internal/vcs/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,12 +262,27 @@ def fetch_new(self, dest, url, rev_options):
# type: (str, HiddenText, RevOptions) -> None
rev_display = rev_options.to_display()
logger.info("Cloning %s%s to %s", url, rev_display, display_path(dest))
self.run_command(make_command("clone", "-q", url, dest))
if self.get_git_version() >= (2, 17):
# Git added support for partial clone in 2.17
# https://git-scm.com/docs/partial-clone
# Speeds up cloning by functioning without a complete copy of repository
self.run_command(
make_command(
"clone",
"--filter=blob:none",
"-q",
url,
dest,
)
)
else:
self.run_command(make_command("clone", "-q", url, dest))

if rev_options.rev:
# Then a specific revision was requested.
rev_options = self.resolve_revision(dest, url, rev_options)
branch_name = getattr(rev_options, "branch_name", None)
logger.debug("Rev options %s, branch_name %s", rev_options, branch_name)
if branch_name is None:
# Only do a checkout if the current commit id doesn't match
# the requested revision.
Expand Down
105 changes: 105 additions & 0 deletions tests/functional/test_vcs_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

import os
from unittest.mock import patch

import pytest

Expand Down Expand Up @@ -282,3 +283,107 @@ def test_resolve_commit_not_on_branch(script, tmp_path):
# check we can fetch our commit
rev_options = Git.make_rev_options(commit)
Git().fetch_new(str(clone_path), repo_path.as_uri(), rev_options)


def _initialize_clonetest_server(repo_path, script, enable_partial_clone):
repo_path.mkdir()
script.run("git", "init", cwd=str(repo_path))
repo_file = repo_path / "file.txt"
repo_file.write_text(u".")
script.run("git", "add", "file.txt", cwd=str(repo_path))
script.run("git", "commit", "-m", "initial commit", cwd=str(repo_path))

# Enable filtering support on server
if enable_partial_clone:
script.run("git", "config", "uploadpack.allowFilter", "true", cwd=repo_path)
script.run(
"git", "config", "uploadpack.allowanysha1inwant", "true", cwd=repo_path
)

return repo_file


@pytest.mark.skipif(Git().get_git_version() < (2, 17), reason="git too old")
def test_partial_clone(script, tmp_path):
"""Test partial clone w/ a git-server that supports it"""
repo_path = tmp_path / "repo"
repo_file = _initialize_clonetest_server(
repo_path, script, enable_partial_clone=True
)
clone_path1 = repo_path / "clone1"
clone_path2 = repo_path / "clone2"

commit = script.run("git", "rev-parse", "HEAD", cwd=str(repo_path)).stdout.strip()

# Check that we can clone at HEAD
Git().fetch_new(str(clone_path1), repo_path.as_uri(), Git.make_rev_options())
# Check that we can clone to commit
Git().fetch_new(str(clone_path2), repo_path.as_uri(), Git.make_rev_options(commit))

# Write some additional stuff to git pull
repo_file.write_text(u"..")
script.run("git", "commit", "-am", "second commit", cwd=str(repo_path))

# Make sure git pull works - with server supporting filtering
assert (
"warning: filtering not recognized by server, ignoring"
not in script.run("git", "pull", cwd=clone_path1).stderr
)
assert (
"warning: filtering not recognized by server, ignoring"
not in script.run("git", "pull", cwd=clone_path2).stderr
)


@pytest.mark.skipif(Git().get_git_version() < (2, 17), reason="git too old")
def test_partial_clone_without_server_support(script, tmp_path):
"""Test partial clone w/ a git-server that does not support it"""
repo_path = tmp_path / "repo"
repo_file = _initialize_clonetest_server(
repo_path, script, enable_partial_clone=False
)
clone_path1 = repo_path / "clone1"
clone_path2 = repo_path / "clone2"

commit = script.run("git", "rev-parse", "HEAD", cwd=str(repo_path)).stdout.strip()

# Check that we can clone at HEAD
Git().fetch_new(str(clone_path1), repo_path.as_uri(), Git.make_rev_options())
# Check that we can clone to commit
Git().fetch_new(str(clone_path2), repo_path.as_uri(), Git.make_rev_options(commit))

# Write some additional stuff to git pull
repo_file.write_text(u"..")
script.run("git", "commit", "-am", "second commit", cwd=str(repo_path))

# Make sure git pull works - even though server doesn't support filtering
assert (
"warning: filtering not recognized by server, ignoring"
in script.run("git", "pull", cwd=clone_path1).stderr
)
assert (
"warning: filtering not recognized by server, ignoring"
in script.run("git", "pull", cwd=clone_path2).stderr
)


def test_clone_without_partial_clone_support(script, tmp_path):
"""Older git clients don't support partial clone. Test the fallback path"""
repo_path = tmp_path / "repo"
repo_file = _initialize_clonetest_server(
repo_path, script, enable_partial_clone=True
)
clone_path = repo_path / "clone1"

# Check that we can clone w/ old version of git w/o --filter
with patch("pip._internal.vcs.git.Git.get_git_version", return_value=(2, 16)):
Git().fetch_new(str(clone_path), repo_path.as_uri(), Git.make_rev_options())

repo_file.write_text(u"...")
script.run("git", "commit", "-am", "third commit", cwd=str(repo_path))

# Should work fine w/o attempting to use `--filter` args
assert (
"warning: filtering not recognized by server, ignoring"
not in script.run("git", "pull", cwd=clone_path).stderr
)

0 comments on commit 01308b8

Please sign in to comment.