From f895bf5849d771d744e7e1a1fd410d40dba62a62 Mon Sep 17 00:00:00 2001 From: Josh Snyder Date: Mon, 25 Apr 2022 12:33:34 -0700 Subject: [PATCH] reproducible directories for pip builds Currently, pip randomly assigns directory names when it builds Python sdists into bdists. This can result in randomized file paths being embedded into the build output (usually in debug symbols, but potentially in other places). The ideal solution would be to trim the front (random part) of the file path off, leaving the remaining (deterministic) part to embed in the binary. Doing so would require reaching deep into the configuration of whatever compiler/linker pip happens to be using (e.g. gcc, clang, rustc, etc.). This option, on the other hand, doesn't require modifying the internals of Python packages. In this patch we make it so that pip's randomly assigned directory paths are instead generated from a deterministic counter. Doing so requires exclusive access to TMPDIR, because otherwise other programs (likely other executions of `pip`) will attempt to create directories of the same name. For that reason, the feature only activates when SOURCE_DATE_EPOCH is set. For more discussion (and prior art) in this area, see: * https://github.com/NixOS/nixpkgs/pull/102222/files * https://github.com/pypa/pip/issues/6505 --- src/pip/_internal/build_env.py | 12 ++++++++++- .../operations/build/metadata_legacy.py | 11 +++++++++- .../operations/build/wheel_legacy.py | 13 +++++++++++- src/pip/_internal/utils/temp_dir.py | 21 +++++++++++++++++++ src/pip/_internal/wheel_builder.py | 2 +- 5 files changed, 55 insertions(+), 4 deletions(-) diff --git a/src/pip/_internal/build_env.py b/src/pip/_internal/build_env.py index cc2b38bab79..e92ed5d6ce4 100644 --- a/src/pip/_internal/build_env.py +++ b/src/pip/_internal/build_env.py @@ -81,6 +81,7 @@ class BuildEnvironment: def __init__(self) -> None: temp_dir = TempDirectory(kind=tempdir_kinds.BUILD_ENV, globally_managed=True) + self._sub_temp_dir = temp_dir.make_sub_temp_dir() self._prefixes = OrderedDict( (name, _Prefix(os.path.join(temp_dir.path, name))) @@ -136,7 +137,7 @@ def __init__(self) -> None: def __enter__(self) -> None: self._save_env = { name: os.environ.get(name, None) - for name in ("PATH", "PYTHONNOUSERSITE", "PYTHONPATH") + for name in ('PATH', 'PYTHONNOUSERSITE', 'PYTHONPATH', 'TMPDIR') } path = self._bin_dirs[:] @@ -153,6 +154,8 @@ def __enter__(self) -> None: "PYTHONPATH": os.pathsep.join(pythonpath), } ) + if self._sub_temp_dir is not None: + os.environ['TMPDIR'] = self._sub_temp_dir def __exit__( self, @@ -213,12 +216,14 @@ def install_requirements( prefix.setup = True if not requirements: return + self._install_requirements( get_runnable_pip(), finder, requirements, prefix, kind=kind, + sub_temp_dir=self._sub_temp_dir, ) @staticmethod @@ -229,6 +234,7 @@ def _install_requirements( prefix: _Prefix, *, kind: str, + sub_temp_dir: str, ) -> None: args: List[str] = [ sys.executable, @@ -270,6 +276,10 @@ def _install_requirements( args.append("--") args.extend(requirements) extra_environ = {"_PIP_STANDALONE_CERT": where()} + + if sub_temp_dir is not None: + extra_environ["TMPDIR"] = sub_temp_dir + with open_spinner(f"Installing {kind}") as spinner: call_subprocess( args, diff --git a/src/pip/_internal/operations/build/metadata_legacy.py b/src/pip/_internal/operations/build/metadata_legacy.py index e60988d643e..424a44a323e 100644 --- a/src/pip/_internal/operations/build/metadata_legacy.py +++ b/src/pip/_internal/operations/build/metadata_legacy.py @@ -50,7 +50,10 @@ def generate_metadata( details, ) - egg_info_dir = TempDirectory(kind="pip-egg-info", globally_managed=True).path + tmp_dir = TempDirectory( + kind="pip-egg-info", globally_managed=True + ) + egg_info_dir = tmp_dir.path args = make_setuptools_egg_info_args( setup_py_path, @@ -58,6 +61,11 @@ def generate_metadata( no_user_config=isolated, ) + extra_environ = dict() + sub_temp_dir = tmp_dir.make_sub_temp_dir() + if sub_temp_dir is not None: + extra_environ["TMPDIR"] = sub_temp_dir + with build_env: with open_spinner("Preparing metadata (setup.py)") as spinner: try: @@ -65,6 +73,7 @@ def generate_metadata( args, cwd=source_dir, command_desc="python setup.py egg_info", + extra_environ=extra_environ, spinner=spinner, ) except InstallationSubprocessError as error: diff --git a/src/pip/_internal/operations/build/wheel_legacy.py b/src/pip/_internal/operations/build/wheel_legacy.py index c5f0492ccbe..8a4056ba74b 100644 --- a/src/pip/_internal/operations/build/wheel_legacy.py +++ b/src/pip/_internal/operations/build/wheel_legacy.py @@ -1,5 +1,6 @@ import logging import os.path +import shutil from typing import List, Optional from pip._internal.cli.spinners import open_spinner @@ -62,12 +63,13 @@ def build_wheel_legacy( source_dir: str, global_options: List[str], build_options: List[str], - tempd: str, + temp_dir, # type: TempDirectory ) -> Optional[str]: """Build one unpacked package using the "legacy" build process. Returns path to wheel if successfully built. Otherwise, returns None. """ + tempd = temp_dir.path wheel_args = make_setuptools_bdist_wheel_args( setup_py_path, global_options=global_options, @@ -79,11 +81,17 @@ def build_wheel_legacy( with open_spinner(spin_message) as spinner: logger.debug("Destination directory: %s", tempd) + sub_temp_dir = temp_dir.make_sub_temp_dir() + extra_environ = dict() + if sub_temp_dir is not None: + extra_environ["TMPDIR"] = sub_temp_dir + try: output = call_subprocess( wheel_args, command_desc="python setup.py bdist_wheel", cwd=source_dir, + extra_environ=extra_environ, spinner=spinner, ) except Exception: @@ -91,6 +99,9 @@ def build_wheel_legacy( logger.error("Failed building wheel for %s", name) return None + if sub_temp_dir is not None: + shutil.rmtree(sub_temp_dir) + names = os.listdir(tempd) wheel_path = get_legacy_build_wheel_path( names=names, diff --git a/src/pip/_internal/utils/temp_dir.py b/src/pip/_internal/utils/temp_dir.py index 8ee8a1cb180..9f9aaca2920 100644 --- a/src/pip/_internal/utils/temp_dir.py +++ b/src/pip/_internal/utils/temp_dir.py @@ -1,6 +1,7 @@ import errno import itertools import logging +import os import os.path import tempfile from contextlib import ExitStack, contextmanager @@ -12,6 +13,7 @@ _T = TypeVar("_T", bound="TempDirectory") +tmpdir_serial = 0 # Kinds of temporary directories. Only needed for ones that are # globally-managed. @@ -161,6 +163,17 @@ def _create(self, kind: str) -> str: # symlinked to another directory. This tends to confuse build # scripts, so we canonicalize the path by traversing potential # symlinks here. + + if "SOURCE_DATE_EPOCH" in os.environ: + global tmpdir_serial + path = os.path.join( + tempfile.gettempdir(), + "pip-{}-{}".format(kind, tmpdir_serial) + ) + tmpdir_serial += 1 + os.mkdir(path) + return path + path = os.path.realpath(tempfile.mkdtemp(prefix=f"pip-{kind}-")) logger.debug("Created temporary directory: %s", path) return path @@ -172,6 +185,14 @@ def cleanup(self) -> None: return rmtree(self._path) + def make_sub_temp_dir(self): + if "SOURCE_DATE_EPOCH" not in os.environ: + return None + + ret = os.path.join(self._path, 'tmp') + os.mkdir(ret) + return ret + class AdjacentTempDirectory(TempDirectory): """Helper class that creates a temporary directory adjacent to a real one. diff --git a/src/pip/_internal/wheel_builder.py b/src/pip/_internal/wheel_builder.py index 15b30af58e4..bfc75d40212 100644 --- a/src/pip/_internal/wheel_builder.py +++ b/src/pip/_internal/wheel_builder.py @@ -271,7 +271,7 @@ def _build_one_inside_env( source_dir=req.unpacked_source_directory, global_options=global_options, build_options=build_options, - tempd=temp_dir.path, + temp_dir=temp_dir, ) if wheel_path is not None: