From c726b43ba21001725adfdb41d95c169f8f81f19a Mon Sep 17 00:00:00 2001 From: John Sirois Date: Fri, 11 Dec 2020 10:33:54 -0800 Subject: [PATCH] Improve `PythonInterpreter` venv support. (#1129) This fixes binary canonicalization to handle virtual environments created with virtualenv instead of pyvenv. It also adds support for resolving the base interpreter used to build a virtual environment. The ability to resolve a virtual environment intepreter will be used to fix #1031 where virtual environments created with `--system-site-packages` leak those packages through as regular sys.path entries otherwise undetectable by PEX. Work towards #962 and #1115. --- pex/interpreter.py | 158 ++++++++++++++++++++++++++++++--- tests/test_integration.py | 8 +- tests/test_interpreter.py | 52 ++++++++++- tests/test_pex_bootstrapper.py | 7 +- 4 files changed, 204 insertions(+), 21 deletions(-) diff --git a/pex/interpreter.py b/pex/interpreter.py index 007c30835..e2d19082a 100644 --- a/pex/interpreter.py +++ b/pex/interpreter.py @@ -16,7 +16,7 @@ from textwrap import dedent from pex import third_party -from pex.common import safe_rmtree +from pex.common import is_exe, safe_rmtree from pex.compatibility import string from pex.executor import Executor from pex.jobs import ErrorHandler, Job, Retain, SpawnedJob, execute_parallel @@ -34,6 +34,7 @@ Dict, Iterable, Iterator, + List, MutableMapping, Optional, Sequence, @@ -95,6 +96,14 @@ def get(cls, binary=None): preferred_tag = supported_tags[0] return cls( binary=binary or sys.executable, + prefix=sys.prefix, + base_prefix=( + # Old virtualenv (16 series and lower) sets `sys.real_prefix` in all cases. + getattr(sys, "real_prefix", None) + # Both pyvenv and virtualenv 20+ set `sys.base_prefix` as per + # https://www.python.org/dev/peps/pep-0405/. + or getattr(sys, "base_prefix", sys.prefix) + ), python_tag=preferred_tag.interpreter, abi_tag=preferred_tag.abi, platform_tag=preferred_tag.platform, @@ -107,7 +116,7 @@ def get(cls, binary=None): def decode(cls, encoded): TRACER.log("creating PythonIdentity from encoded: %s" % encoded, V=9) values = json.loads(encoded) - if len(values) != 7: + if len(values) != 9: raise cls.InvalidError("Invalid interpreter identity: %s" % encoded) supported_tags = values.pop("supported_tags") @@ -126,13 +135,25 @@ def _find_interpreter_name(cls, python_tag): raise ValueError("Unknown interpreter: {}".format(python_tag)) def __init__( - self, binary, python_tag, abi_tag, platform_tag, version, supported_tags, env_markers + self, + binary, # type: str + prefix, # type: str + base_prefix, # type: str + python_tag, # type: str + abi_tag, # type: str + platform_tag, # type: str + version, # type: Iterable[int] + supported_tags, # type: Iterable[tags.Tag] + env_markers, # type: Dict[str, str] ): + # type: (...) -> None # N.B.: We keep this mapping to support historical values for `distribution` and `requirement` # properties. self._interpreter_name = self._find_interpreter_name(python_tag) self._binary = binary + self._prefix = prefix + self._base_prefix = base_prefix self._python_tag = python_tag self._abi_tag = abi_tag self._platform_tag = platform_tag @@ -143,6 +164,8 @@ def __init__( def encode(self): values = dict( binary=self._binary, + prefix=self._prefix, + base_prefix=self._base_prefix, python_tag=self._python_tag, abi_tag=self._abi_tag, platform_tag=self._platform_tag, @@ -158,6 +181,16 @@ def encode(self): def binary(self): return self._binary + @property + def prefix(self): + # type: () -> str + return self._prefix + + @property + def base_prefix(self): + # type: () -> str + return self._base_prefix + @property def python_tag(self): return self._python_tag @@ -308,7 +341,7 @@ class PythonInterpreter(object): _PYTHON_INTERPRETER_BY_NORMALIZED_PATH = {} # type: Dict @staticmethod - def _read_pyvenv_home(path): + def _get_pyvenv_cfg(path): # type: (str) -> Optional[str] # See: https://www.python.org/dev/peps/pep-0405/#specification pyvenv_cfg_path = os.path.join(path, "pyvenv.cfg") @@ -317,11 +350,11 @@ def _read_pyvenv_home(path): for line in fp: name, _, value = line.partition("=") if name.strip() == "home": - return value.strip() + return pyvenv_cfg_path return None @classmethod - def _find_pyvenv_home(cls, maybe_venv_python_binary): + def _find_pyvenv_cfg(cls, maybe_venv_python_binary): # type: (str) -> Optional[str] # A pyvenv is identified by a pyvenv.cfg file with a home key in one of the two following # directory layouts: @@ -340,11 +373,11 @@ def _find_pyvenv_home(cls, maybe_venv_python_binary): # # See: # See: https://www.python.org/dev/peps/pep-0405/#specification maybe_venv_bin_dir = os.path.dirname(maybe_venv_python_binary) - home_dir = cls._read_pyvenv_home(maybe_venv_bin_dir) - if not home_dir: + pyvenv_cfg = cls._get_pyvenv_cfg(maybe_venv_bin_dir) + if not pyvenv_cfg: maybe_venv_dir = os.path.dirname(maybe_venv_bin_dir) - home_dir = cls._read_pyvenv_home(maybe_venv_dir) - return home_dir + pyvenv_cfg = cls._get_pyvenv_cfg(maybe_venv_dir) + return pyvenv_cfg @classmethod def _resolve_pyvenv_canonical_python_binary( @@ -357,8 +390,8 @@ def _resolve_pyvenv_canonical_python_binary( if not os.path.islink(maybe_venv_python_binary): return None - home_dir = cls._find_pyvenv_home(maybe_venv_python_binary) - if os.path.dirname(real_binary) != home_dir: + pyvenv_cfg = cls._find_pyvenv_cfg(maybe_venv_python_binary) + if pyvenv_cfg is None: return None while os.path.islink(maybe_venv_python_binary): @@ -798,8 +831,109 @@ def __init__(self, identity): @property def binary(self): + # type: () -> str return self._binary + @property + def is_venv(self): + # type: () -> bool + """Return `True` if this interpreter is homed in a virtual environment.""" + return self._identity.prefix != self._identity.base_prefix + + @property + def prefix(self): + # type: () -> str + """Return the `sys.prefix` of this interpreter. + + For virtual environments, this will be the virtual environment directory itself. + """ + return self._identity.prefix + + class BaseInterpreterResolutionError(Exception): + """Indicates the base interpreter for a virtual environment could not be resolved.""" + + def resolve_base_interpreter(self): + # type: () -> PythonInterpreter + """Finds the base system interpreter used to create a virtual environment. + + If this interpreter is not homed in a virtual environment, returns itself. + """ + if not self.is_venv: + return self + + # In the case of PyPy, the dir might contain one of the following: + # + # 1. On a system with PyPy 2.7 series and one PyPy 3.x series + # bin/ + # pypy + # pypy3 + # + # 2. On a system with PyPy 2.7 series and more than one PyPy 3.x series + # bin/ + # pypy + # pypy3 + # pypy3.6 + # pypy3.7 + # + # In both cases, bin/pypy is a 2.7 series interpreter. In case 2 bin/pypy3 could be either + # PyPy 3.6 series or PyPy 3.7 series. In order to ensure we pick the correct base executable + # of a PyPy virtual environment, we always try to resolve the most specific basename first + # to the least specific basename last and we also verify that, if the basename resolves, it + # resolves to an equivalent interpreter. We employ the same strategy for CPython, but only + # for uniformity in the algorithm. It appears to always be the case for CPython that + # python. is present in any given /bin/ directory; so the algorithm + # gets a hit on 1st try for CPython binaries incurring ~no extra overhead. + + version = self._identity.version + abi_tag = self._identity.abi_tag + + prefix = "pypy" if self._identity.interpreter == "PyPy" else "python" + suffixes = ("{}.{}".format(version[0], version[1]), str(version[0]), "") + candidate_binaries = tuple("{}{}".format(prefix, suffix) for suffix in suffixes) + + def iter_base_candidate_binary_paths(interpreter): + # type: (PythonInterpreter) -> Iterator[str] + bin_dir = os.path.join(interpreter._identity.base_prefix, "bin") + for candidate_binary in candidate_binaries: + candidate_binary_path = os.path.join(bin_dir, candidate_binary) + if is_exe(candidate_binary_path): + yield candidate_binary_path + + def is_same_interpreter(interpreter): + # type: (PythonInterpreter) -> bool + identity = interpreter._identity + return identity.version == version and identity.abi_tag == abi_tag + + resolution_path = [] # type: List[str] + base_interpreter = self + while base_interpreter.is_venv: + resolved = None # type: Optional[PythonInterpreter] + for candidate_path in iter_base_candidate_binary_paths(base_interpreter): + resolved_interpreter = self.from_binary(candidate_path) + if is_same_interpreter(resolved_interpreter): + resolved = resolved_interpreter + break + if resolved is None: + message = [ + "Failed to resolve the base interpreter for the virtual environment at " + "{venv_dir}.".format(venv_dir=self._identity.prefix) + ] + if resolution_path: + message.append( + "Resolved through {path}".format( + path=" -> ".join(binary for binary in resolution_path) + ) + ) + message.append( + "Search of base_prefix {} found no equivalent interpreter for {}".format( + base_interpreter._identity.base_prefix, base_interpreter._binary + ) + ) + raise self.BaseInterpreterResolutionError("\n".join(message)) + base_interpreter = resolved_interpreter + resolution_path.append(base_interpreter.binary) + return base_interpreter + @property def identity(self): # type: () -> PythonIdentity diff --git a/tests/test_integration.py b/tests/test_integration.py index 796f8fd07..81cb189e2 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -1922,7 +1922,9 @@ def add_to_path(entry): assert "PEX_PYTHON_PATH" not in final_env assert "_PEX_SHOULD_EXIT_BOOTSTRAP_REEXEC" not in final_env - expected_exec_chain = [os.path.realpath(i) for i in [sys.executable] + (exec_chain or [])] + expected_exec_chain = [ + PythonInterpreter.from_binary(i).binary for i in [sys.executable] + (exec_chain or []) + ] assert expected_exec_chain == final_env["_PEX_EXEC_CHAIN"].split(os.pathsep) @@ -1933,7 +1935,7 @@ def test_pex_no_reexec_no_constraints(): def test_pex_reexec_no_constraints_pythonpath_present(): # type: () -> None - _assert_exec_chain(exec_chain=[os.path.realpath(sys.executable)], pythonpath=["."]) + _assert_exec_chain(exec_chain=[sys.executable], pythonpath=["."]) def test_pex_no_reexec_constraints_match_current(): @@ -1946,7 +1948,7 @@ def test_pex_reexec_constraints_match_current_pythonpath_present(): # type: () -> None current_version = ".".join(str(component) for component in sys.version_info[0:3]) _assert_exec_chain( - exec_chain=[os.path.realpath(sys.executable)], + exec_chain=[sys.executable], pythonpath=["."], interpreter_constraints=["=={}".format(current_version)], ) diff --git a/tests/test_interpreter.py b/tests/test_interpreter.py index a3de2913e..324f9a41d 100644 --- a/tests/test_interpreter.py +++ b/tests/test_interpreter.py @@ -9,7 +9,7 @@ import pytest from pex import interpreter -from pex.common import temporary_dir, touch +from pex.common import safe_mkdtemp, temporary_dir, touch from pex.compatibility import PY3 from pex.executor import Executor from pex.interpreter import PythonInterpreter @@ -17,6 +17,7 @@ PY27, PY35, PY36, + PY_VER, ensure_python_distribution, ensure_python_interpreter, environment_as, @@ -30,7 +31,7 @@ from unittest.mock import Mock, patch # type: ignore[misc,no-redef,import] if TYPE_CHECKING: - from typing import Iterator, Tuple, Union, Any + from typing import Iterator, Tuple, Union, Any, List InterpreterIdentificationError = Tuple[str, str] InterpreterOrError = Union[PythonInterpreter, InterpreterIdentificationError] @@ -255,7 +256,7 @@ def assert_chosen(expected_version, other_version): assert_chosen(expected_version="3.6.1", other_version="3.6.0") -def test_pyvenv(tmpdir): +def test_detect_pyvenv(tmpdir): # type: (Any) -> None venv = str(tmpdir) py35 = ensure_python_interpreter(PY35) @@ -285,3 +286,48 @@ def test_pyvenv(tmpdir): assert len(pythons) >= 2, "Expected at least two virtualenv python binaries, found: {}".format( pythons ) + + +def check_resolve_venv(real_interpreter): + # type: (PythonInterpreter) -> None + tmpdir = safe_mkdtemp() + + def create_venv( + interpreter, # type: PythonInterpreter + rel_path, # type: str + ): + # type: (...) -> List[str] + venv_dir = os.path.join(tmpdir, rel_path) + interpreter.execute(["-m", "venv", venv_dir]) + return glob.glob(os.path.join(venv_dir, "bin", "python*")) + + assert not real_interpreter.is_venv + assert real_interpreter is real_interpreter.resolve_base_interpreter() + + for index, python in enumerate(create_venv(real_interpreter, "first-level")): + venv_interpreter = PythonInterpreter.from_binary(python) + assert venv_interpreter.is_venv + assert venv_interpreter != real_interpreter.binary + assert real_interpreter == venv_interpreter.resolve_base_interpreter() + + for nested_python in create_venv(venv_interpreter, "second-level{}".format(index)): + nested_venv_interpreter = PythonInterpreter.from_binary(nested_python) + assert nested_venv_interpreter.is_venv + assert nested_venv_interpreter != venv_interpreter + assert nested_venv_interpreter != real_interpreter + assert real_interpreter == nested_venv_interpreter.resolve_base_interpreter() + + +def test_resolve_venv(): + # type: () -> None + real_interpreter = PythonInterpreter.from_binary(ensure_python_interpreter(PY35)) + check_resolve_venv(real_interpreter) + + +@pytest.mark.skipif( + PY_VER < (3, 0), reason="Test requires the venv module which is not present in Python 2." +) +def test_resolve_venv_ambient(): + # type: () -> None + ambient_real_interpreter = PythonInterpreter.get().resolve_base_interpreter() + check_resolve_venv(ambient_real_interpreter) diff --git a/tests/test_pex_bootstrapper.py b/tests/test_pex_bootstrapper.py index e6a2f7621..7a59f488a 100644 --- a/tests/test_pex_bootstrapper.py +++ b/tests/test_pex_bootstrapper.py @@ -30,7 +30,7 @@ def find_interpreters( constraints=None, # type: Optional[Iterable[str]] preferred_interpreter=None, # type: Optional[PythonInterpreter] ): - # type: (...) -> List[AnyStr] + # type: (...) -> List[str] return [ interp.binary for interp in iter_compatible_interpreters( @@ -163,8 +163,9 @@ def test_find_compatible_interpreters_with_valid_basenames_and_constraints(): def test_find_compatible_interpreters_bias_current(): # type: () -> None py36 = ensure_python_interpreter(PY36) - assert [os.path.realpath(sys.executable), py36] == find_interpreters([py36, sys.executable]) - assert [os.path.realpath(sys.executable), py36] == find_interpreters([sys.executable, py36]) + current_interpreter = PythonInterpreter.get() + assert [current_interpreter.binary, py36] == find_interpreters([py36, sys.executable]) + assert [current_interpreter.binary, py36] == find_interpreters([sys.executable, py36]) def test_find_compatible_interpreters_siblings_of_current_issues_1109():