From e04f7bff42d15d669462ab549168c4d67b08b057 Mon Sep 17 00:00:00 2001 From: Josh Snyder Date: Mon, 25 Apr 2022 12:33:34 -0700 Subject: [PATCH] reproducible directories for pip builds Currently, pip randomly assigns directory names when it builds Python sdists into bdists. This can result in randomized file paths being embedded into the build output (usually in debug symbols, but potentially in other places). The ideal solution would be to trim the front (random part) of the file path off, leaving the remaining (deterministic) part to embed in the binary. Doing so would require reaching deep into the configuration of whatever compiler/linker pip happens to be using (e.g. gcc, clang, rustc, etc.). This option, on the other hand, doesn't require modifying the internals of Python packages. In this patch we make it so that pip's randomly assigned directory paths are instead generated from a deterministic counter. Doing so requires exclusive access to TMPDIR, because otherwise other programs (likely other executions of `pip`) will attempt to create directories of the same name. For that reason, the feature only activates when SOURCE_DATE_EPOCH is set. For more discussion (and prior art) in this area, see: * https://github.com/NixOS/nixpkgs/pull/102222/files * https://github.com/pypa/pip/issues/6505 --- src/pip/_internal/build_env.py | 20 +++++++++++++++++++- src/pip/_internal/utils/temp_dir.py | 13 +++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/pip/_internal/build_env.py b/src/pip/_internal/build_env.py index cdf04324107..e2e80ba3d13 100644 --- a/src/pip/_internal/build_env.py +++ b/src/pip/_internal/build_env.py @@ -64,15 +64,26 @@ def _create_standalone_pip() -> Iterator[str]: yield os.path.join(pip_zip, "pip") +def _make_sub_temp_dir(within): + if "SOURCE_DATE_EPOCH" not in os.environ: + return None + + ret = os.path.join(within, 'tmp') + os.mkdir(ret) + return ret + + class BuildEnvironment: """Creates and manages an isolated environment to install build deps """ def __init__(self): # type: () -> None + temp_dir = TempDirectory( kind=tempdir_kinds.BUILD_ENV, globally_managed=True ) + self._sub_temp_dir = _make_sub_temp_dir(temp_dir.path) self._prefixes = OrderedDict( (name, _Prefix(os.path.join(temp_dir.path, name))) @@ -202,6 +213,7 @@ def install_requirements( requirements, prefix, message, + self._sub_temp_dir, ) @staticmethod @@ -211,6 +223,7 @@ def _install_requirements( requirements: Iterable[str], prefix: _Prefix, message: str, + sub_temp_dir: Optional[str], ) -> None: args = [ sys.executable, pip_runnable, 'install', @@ -242,7 +255,12 @@ def _install_requirements( args.append('--prefer-binary') args.append('--') args.extend(requirements) - extra_environ = {"_PIP_STANDALONE_CERT": where()} + extra_environ = { + "_PIP_STANDALONE_CERT": where(), + } + if sub_temp_dir: + extra_environ["TMPDIR"] = sub_temp_dir + with open_spinner(message) as spinner: call_subprocess(args, spinner=spinner, extra_environ=extra_environ) diff --git a/src/pip/_internal/utils/temp_dir.py b/src/pip/_internal/utils/temp_dir.py index 477cbe6b1aa..cab06407867 100644 --- a/src/pip/_internal/utils/temp_dir.py +++ b/src/pip/_internal/utils/temp_dir.py @@ -1,6 +1,7 @@ import errno import itertools import logging +import os import os.path import tempfile from contextlib import ExitStack, contextmanager @@ -12,6 +13,7 @@ _T = TypeVar("_T", bound="TempDirectory") +tmpdir_serial = 0 # Kinds of temporary directories. Only needed for ones that are # globally-managed. @@ -171,6 +173,17 @@ def _create(self, kind): # symlinked to another directory. This tends to confuse build # scripts, so we canonicalize the path by traversing potential # symlinks here. + + if "SOURCE_DATE_EPOCH" in os.environ: + global tmpdir_serial + path = os.path.join( + tempfile.gettempdir(), + "pip-{}-{}".format(kind, tmpdir_serial) + ) + tmpdir_serial += 1 + os.mkdir(path) + return path + path = os.path.realpath(tempfile.mkdtemp(prefix=f"pip-{kind}-")) logger.debug("Created temporary directory: %s", path) return path