From f7ac0d5716b65d7c5a0f47e58707c19fc3d26977 Mon Sep 17 00:00:00 2001 From: Nipunn Koorapati Date: Sun, 1 Nov 2020 23:36:25 +0000 Subject: [PATCH] Use git's partial clone feature to speed up pip Clone with --filter=blob:none - as it fetches all metadata, but only dynamically fetches the blobs as needed by checkout. Since typically, pip only needs the blobs for a single revision, this can be a big improvement, especially when fetching from repositories with a lot of history, particularly on slower network connections. Added unit test for the rev-less path. Confirmed that both of the if/else paths are tested by the unit tests. --- news/9086.feature.rst | 2 ++ src/pip/_internal/vcs/git.py | 5 ++++- tests/functional/test_vcs_git.py | 21 +++++++++++++++++++++ tests/lib/__init__.py | 2 ++ tests/lib/local_repos.py | 3 +++ 5 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 news/9086.feature.rst diff --git a/news/9086.feature.rst b/news/9086.feature.rst new file mode 100644 index 00000000000..6c979b904e3 --- /dev/null +++ b/news/9086.feature.rst @@ -0,0 +1,2 @@ +Use git's partial clone feature to speed up pip +when a git-rev is specified. diff --git a/src/pip/_internal/vcs/git.py b/src/pip/_internal/vcs/git.py index 1831aede58a..c835d1a3bcd 100644 --- a/src/pip/_internal/vcs/git.py +++ b/src/pip/_internal/vcs/git.py @@ -251,12 +251,15 @@ def fetch_new(self, dest, url, rev_options): # type: (str, HiddenText, RevOptions) -> None rev_display = rev_options.to_display() logger.info('Cloning %s%s to %s', url, rev_display, display_path(dest)) - self.run_command(make_command('clone', '-q', url, dest)) + self.run_command(make_command( + 'clone', '--filter=blob:none', '-q', url, dest, + )) if rev_options.rev: # Then a specific revision was requested. rev_options = self.resolve_revision(dest, url, rev_options) branch_name = getattr(rev_options, 'branch_name', None) + logger.info('Rev options %s, branch_name %s', rev_options, branch_name) if branch_name is None: # Only do a checkout if the current commit id doesn't match # the requested revision. diff --git a/tests/functional/test_vcs_git.py b/tests/functional/test_vcs_git.py index 8b07ae6673b..d2bdf73a21b 100644 --- a/tests/functional/test_vcs_git.py +++ b/tests/functional/test_vcs_git.py @@ -282,3 +282,24 @@ def test_resolve_commit_not_on_branch(script, tmp_path): # check we can fetch our commit rev_options = Git.make_rev_options(commit) Git().fetch_new(str(clone_path), repo_path.as_uri(), rev_options) + + +def test_fetch_new(script, tmp_path): + repo_path = tmp_path / "repo" + repo_file = repo_path / "file.txt" + clone_path1 = repo_path / "clone1" + clone_path2 = repo_path / "clone2" + + repo_path.mkdir() + script.run("git", "init", cwd=str(repo_path)) + repo_file.write_text(u".") + script.run("git", "add", "file.txt", cwd=str(repo_path)) + script.run("git", "commit", "-m", "initial commit", cwd=str(repo_path)) + commit = script.run( + "git", "rev-parse", "HEAD", cwd=str(repo_path) + ).stdout.strip() + + # Check that we can clone at HEAD + Git().fetch_new(str(clone_path1), repo_path.as_uri(), Git.make_rev_options()) + # Check that we can clone to commit + Git().fetch_new(str(clone_path2), repo_path.as_uri(), Git.make_rev_options(commit)) diff --git a/tests/lib/__init__.py b/tests/lib/__init__.py index 07569d814f4..4a57b69c05a 100644 --- a/tests/lib/__init__.py +++ b/tests/lib/__init__.py @@ -791,6 +791,8 @@ def _git_commit( def _vcs_add(script, version_pkg_path, vcs='git'): if vcs == 'git': script.run('git', 'init', cwd=version_pkg_path) + script.run('git', 'config', 'uploadpack.allowFilter', 'true', cwd=version_pkg_path) + script.run('git', 'config', 'uploadpack.allowanysha1inwant', 'true', cwd=version_pkg_path) script.run('git', 'add', '.', cwd=version_pkg_path) _git_commit(script, version_pkg_path, message='initial version') elif vcs == 'hg': diff --git a/tests/lib/local_repos.py b/tests/lib/local_repos.py index 2a41595f9f2..b77f49bc064 100644 --- a/tests/lib/local_repos.py +++ b/tests/lib/local_repos.py @@ -65,6 +65,9 @@ def local_checkout( else: vcs_backend = vcs.get_backend(vcs_name) vcs_backend.obtain(repo_url_path, url=hide_url(remote_repo)) + if vcs_name == 'git': + subprocess.check_call(['git', 'config', 'uploadpack.allowFilter', 'true'], cwd=repo_url_path) + subprocess.check_call(['git', 'config', 'uploadpack.allowanysha1inwant', 'true'], cwd=repo_url_path) return '{}+{}'.format(vcs_name, path_to_url(repo_url_path))