From 0a3b1197fa094547bbef521bddadbe2fb5af9380 Mon Sep 17 00:00:00 2001 From: rmorotti Date: Fri, 21 Jun 2024 13:34:50 +0100 Subject: [PATCH] PERF: do not recreate the parent dir before extracting each file from a wheel --- news/12782.bugfix.rst | 2 ++ src/pip/_internal/operations/install/wheel.py | 14 ++++++++------ 2 files changed, 10 insertions(+), 6 deletions(-) create mode 100644 news/12782.bugfix.rst diff --git a/news/12782.bugfix.rst b/news/12782.bugfix.rst new file mode 100644 index 00000000000..b84b2bdacfa --- /dev/null +++ b/news/12782.bugfix.rst @@ -0,0 +1,2 @@ +Improve performance of pip install. When extracting a wheel, +do not recheck/recreate the parent directory before extracting each file. \ No newline at end of file diff --git a/src/pip/_internal/operations/install/wheel.py b/src/pip/_internal/operations/install/wheel.py index a02a193d226..9a5e39b5ee0 100644 --- a/src/pip/_internal/operations/install/wheel.py +++ b/src/pip/_internal/operations/install/wheel.py @@ -358,12 +358,6 @@ def _getinfo(self) -> ZipInfo: return self._zip_file.getinfo(self.src_record_path) def save(self) -> None: - # directory creation is lazy and after file filtering - # to ensure we don't install empty dirs; empty dirs can't be - # uninstalled. - parent_dir = os.path.dirname(self.dest_path) - ensure_dir(parent_dir) - # When we open the output file below, any existing file is truncated # before we start writing the new contents. This is fine in most # cases, but can cause a segfault if pip has loaded a shared @@ -580,7 +574,15 @@ def is_entrypoint_wrapper(file: "File") -> bool: script_scheme_files = map(ScriptFile, script_scheme_files) files = chain(files, script_scheme_files) + existing_parents = set() for file in files: + # directory creation is lazy and after file filtering + # to ensure we don't install empty dirs; empty dirs can't be + # uninstalled. + parent_dir = os.path.dirname(file.dest_path) + if parent_dir not in existing_parents: + ensure_dir(parent_dir) + existing_parents.add(parent_dir) file.save() record_installed(file.src_record_path, file.dest_path, file.changed)