Skip to content

Commit

Permalink
Support --pre-resolved-dists resolver.
Browse files Browse the repository at this point in the history
When building a PEX or creating a venv via `pex3 venv create` you can
now tell Pex to use a set of pre-resolved distributions. This is similar
to using `--no-pypi --find-links ...` except that:
1. Its roughly 3x faster since Pip is not asked to do any resolution.
2. It requires all the distributions specified form an already complete
   resolve.

One way to obtain distributions that meet criteria 2 is to use `pip
download -d ...` or `pip wheel -w ...` to pre-resolve the distributions
you need.

Closes pex-tool#1907
  • Loading branch information
jsirois committed Sep 13, 2024
1 parent 1c9ac9e commit e063da3
Show file tree
Hide file tree
Showing 31 changed files with 3,003 additions and 223 deletions.
34 changes: 19 additions & 15 deletions pex/bin/pex.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
from pex.resolve.resolver_configuration import (
LockRepositoryConfiguration,
PexRepositoryConfiguration,
PreResolvedConfiguration,
)
from pex.resolve.resolver_options import create_pip_configuration
from pex.resolve.resolvers import Unsatisfiable, sorted_requirements
Expand Down Expand Up @@ -136,7 +137,9 @@ def configure_clp_pex_resolution(parser):
),
)

resolver_options.register(group, include_pex_repository=True, include_lock=True)
resolver_options.register(
group, include_pex_repository=True, include_lock=True, include_pre_resolved=True
)

group.add_argument(
"--pex-path",
Expand Down Expand Up @@ -1011,25 +1014,26 @@ def build_pex(
DependencyConfiguration.from_pex_info(requirements_pex_info)
)

if isinstance(resolver_configuration, (LockRepositoryConfiguration, PreResolvedConfiguration)):
pip_configuration = resolver_configuration.pip_configuration
elif isinstance(resolver_configuration, PexRepositoryConfiguration):
# TODO(John Sirois): Consider finding a way to support custom --index and --find-links in
# this case. I.E.: I use a corporate index to build a PEX repository and now I want to
# build a --project PEX whose pyproject.toml build-system.requires should be resolved from
# that corporate index.
pip_configuration = try_(
finalize_resolve_config(
create_pip_configuration(options), targets=targets, context="--project building"
)
)
else:
pip_configuration = resolver_configuration

project_dependencies = OrderedSet() # type: OrderedSet[Requirement]
with TRACER.timed(
"Adding distributions built from local projects and collecting their requirements: "
"{projects}".format(projects=" ".join(options.projects))
):
if isinstance(resolver_configuration, LockRepositoryConfiguration):
pip_configuration = resolver_configuration.pip_configuration
elif isinstance(resolver_configuration, PexRepositoryConfiguration):
# TODO(John Sirois): Consider finding a way to support custom --index and --find-links in this case.
# I.E.: I use a corporate index to build a PEX repository and now I want to build a --project PEX
# whose pyproject.toml build-system.requires should be resolved from that corporate index.
pip_configuration = try_(
finalize_resolve_config(
create_pip_configuration(options), targets=targets, context="--project building"
)
)
else:
pip_configuration = resolver_configuration

projects = project.get_projects(options)
built_projects = projects.build(
targets=targets,
Expand Down
1 change: 1 addition & 0 deletions pex/cli/commands/lock.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,7 @@ def _add_resolve_options(cls, parser):
cls._create_resolver_options_group(parser),
include_pex_repository=False,
include_lock=False,
include_pre_resolved=False,
)

@classmethod
Expand Down
4 changes: 3 additions & 1 deletion pex/cli/commands/venv.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,9 @@ def _add_create_arguments(cls, parser):
)
installer_options.register(parser)
target_options.register(parser, include_platforms=True)
resolver_options.register(parser, include_pex_repository=True, include_lock=True)
resolver_options.register(
parser, include_pex_repository=True, include_lock=True, include_pre_resolved=True
)
requirement_options.register(parser)

@classmethod
Expand Down
3 changes: 2 additions & 1 deletion pex/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,10 +450,11 @@ def can_write_dir(path):


def touch(file):
# type: (Text) -> None
# type: (_Text) -> _Text
"""Equivalent of unix `touch path`."""
with safe_open(file, "a"):
os.utime(file, None)
return file


class Chroot(object):
Expand Down
83 changes: 69 additions & 14 deletions pex/dist_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,65 @@ class InvalidMetadataError(MetadataError):
"""Indicates a metadata value that is invalid."""


def is_tar_sdist(path):
# type: (Text) -> bool
# N.B.: PEP-625 (https://peps.python.org/pep-0625/) says sdists must use .tar.gz, but we
# have a known example of tar.bz2 in the wild in python-constraint 1.4.0 on PyPI:
# https://pypi.org/project/python-constraint/1.4.0/#files
# This probably all stems from the legacy `python setup.py sdist` as last described here:
# https://docs.python.org/3.11/distutils/sourcedist.html
# There was a move to reject exotic formats in PEP-527 in 2016 and the historical sdist
# formats appear to be listed here: https://peps.python.org/pep-0527/#file-extensions
# A query on the PyPI dataset shows:
#
# SELECT
# REGEXP_EXTRACT(path, r'\.([^.]+|tar\.[^.]+|tar)$') as extension,
# count(*) as count
# FROM `bigquery-public-data.pypi.distribution_metadata`
# group by extension
# order by count desc
#
# | extension | count |
# |-----------|---------|
# | whl | 6332494 |
# * | tar.gz | 5283102 |
# | egg | 135940 |
# * | zip | 108532 |
# | exe | 18452 |
# * | tar.bz2 | 3857 |
# | msi | 625 |
# | rpm | 603 |
# * | tgz | 226 |
# | dmg | 47 |
# | deb | 36 |
# * | tar.zip | 2 |
# * | ZIP | 1 |
return path.lower().endswith((".tar.gz", ".tgz", ".tar.bz2"))


def is_zip_sdist(path):
# type: (Text) -> bool
return path.lower().endswith(".zip")


def is_sdist(path):
# type: (Text) -> bool
return is_tar_sdist(path) or is_zip_sdist(path)


def is_wheel(path):
# type: (Text) -> bool
return path.lower().endswith(".whl")


def _strip_sdist_path(sdist_path):
# type: (Text) -> Optional[Text]
if not sdist_path.endswith((".tar.gz", ".tgz", ".tar.bz2", ".tbz2", ".tar.xz", ".txz", ".zip")):
if not is_sdist(sdist_path):
return None

sdist_basename = os.path.basename(sdist_path)
filename, _ = os.path.splitext(sdist_basename)
if filename.endswith(".tar"):
if filename.lower().endswith(".tar"):
filename, _ = os.path.splitext(filename)
return filename

Expand Down Expand Up @@ -194,8 +245,19 @@ def read_function(rel_path):
)


def _read_from_zip(
zip_location, # type: str
rel_path, # type: Text
):
# type: (...) -> bytes
with open_zip(zip_location) as zf:
return zf.read(rel_path)


def find_wheel_metadata(location):
# type: (Text) -> Optional[MetadataFiles]

read_function = functools.partial(_read_from_zip, location)
with open_zip(location) as zf:
for name in zf.namelist():
if name.endswith("/"):
Expand All @@ -218,11 +280,6 @@ def find_wheel_metadata(location):
if dist_info_dir == head and tail != metadata_file_name:
files.append(rel_path)

def read_function(rel_path):
# type: (Text) -> bytes
with open_zip(location) as zf:
return zf.read(rel_path)

return MetadataFiles(
metadata=DistMetadataFile(
type=MetadataType.DIST_INFO,
Expand Down Expand Up @@ -330,7 +387,7 @@ def iter_metadata_files(
location, MetadataType.DIST_INFO, "*.dist-info", "METADATA"
)
)
elif location.endswith(".whl") and zipfile.is_zipfile(location):
elif is_wheel(location) and zipfile.is_zipfile(location):
metadata_files = find_wheel_metadata(location)
if metadata_files:
listing.append(metadata_files)
Expand All @@ -341,13 +398,11 @@ def iter_metadata_files(
)
)
elif MetadataType.PKG_INFO is metadata_type:
if location.endswith(".zip") and zipfile.is_zipfile(location):
if is_zip_sdist(location) and zipfile.is_zipfile(location):
metadata_file = find_zip_sdist_metadata(location)
if metadata_file:
listing.append(MetadataFiles(metadata=metadata_file))
elif location.endswith(
(".tar.gz", ".tgz", ".tar.bz2", ".tbz2", ".tar.xz", ".txz")
) and tarfile.is_tarfile(location):
elif is_tar_sdist(location) and tarfile.is_tarfile(location):
metadata_file = find_tar_sdist_metadata(location)
if metadata_file:
listing.append(MetadataFiles(metadata=metadata_file))
Expand Down Expand Up @@ -408,7 +463,7 @@ def from_filename(cls, path):
#
# The wheel filename convention is specified here:
# https://www.python.org/dev/peps/pep-0427/#file-name-convention.
if path.endswith(".whl"):
if is_wheel(path):
project_name, version, _ = os.path.basename(path).split("-", 2)
return cls(project_name=project_name, version=version)

Expand Down Expand Up @@ -903,7 +958,7 @@ def of(cls, location):
# type: (Text) -> DistributionType.Value
if os.path.isdir(location):
return cls.INSTALLED
if location.endswith(".whl") and zipfile.is_zipfile(location):
if is_wheel(location) and zipfile.is_zipfile(location):
return cls.WHEEL
return cls.SDIST

Expand Down
31 changes: 13 additions & 18 deletions pex/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,18 @@
from pex import dist_metadata, pex_warnings, targets
from pex.common import pluralize
from pex.dependency_configuration import DependencyConfiguration
from pex.dist_metadata import Distribution, Requirement
from pex.dist_metadata import Distribution, Requirement, is_wheel
from pex.fingerprinted_distribution import FingerprintedDistribution
from pex.inherit_path import InheritPath
from pex.interpreter import PythonInterpreter
from pex.layout import ensure_installed, identify_layout
from pex.orderedset import OrderedSet
from pex.pep_425 import CompatibilityTags, TagRank
from pex.pep_425 import TagRank
from pex.pep_503 import ProjectName
from pex.pex_info import PexInfo
from pex.targets import Target
from pex.third_party.packaging import specifiers
from pex.third_party.packaging.tags import Tag
from pex.tracer import TRACER
from pex.typing import TYPE_CHECKING

Expand Down Expand Up @@ -139,7 +140,7 @@ def render_message(self, _target):

@attr.s(frozen=True)
class _TagMismatch(_UnrankedDistribution):
wheel_tags = attr.ib() # type: CompatibilityTags
wheel_tags = attr.ib() # type: Iterable[Tag]

def render_message(self, target):
# type: (Target) -> str
Expand Down Expand Up @@ -332,32 +333,26 @@ def _update_candidate_distributions(self, distribution_iter):

def _can_add(self, fingerprinted_dist):
# type: (FingerprintedDistribution) -> Union[_RankedDistribution, _UnrankedDistribution]
filename, ext = os.path.splitext(os.path.basename(fingerprinted_dist.location))
if ext.lower() != ".whl":
filename = os.path.basename(fingerprinted_dist.location)
if not is_wheel(filename):
# This supports resolving pex's own vendored distributions which are vendored in a
# directory with the project name (`pip/` for pip) and not the corresponding wheel name
# (`pip-19.3.1-py2.py3-none-any.whl/` for pip). Pex only vendors universal wheels for
# all platforms it supports at buildtime and runtime so this is always safe.
return _RankedDistribution.highest_rank(fingerprinted_dist)

try:
wheel_tags = CompatibilityTags.from_wheel(fingerprinted_dist.location)
wheel_eval = self._target.wheel_applies(fingerprinted_dist.distribution)
except ValueError:
return _InvalidWheelName(fingerprinted_dist, filename)

# There will be multiple parsed tags for compressed tag sets. Ensure we grab the parsed tag
# with highest rank from that expanded set.
best_match = self._target.supported_tags.best_match(wheel_tags)
if best_match is None:
return _TagMismatch(fingerprinted_dist, wheel_tags)
if not wheel_eval.best_match:
return _TagMismatch(fingerprinted_dist, wheel_eval.tags)
if not wheel_eval.applies:
assert wheel_eval.requires_python
return _PythonRequiresMismatch(fingerprinted_dist, wheel_eval.requires_python)

python_requires = dist_metadata.requires_python(fingerprinted_dist.distribution)
if python_requires and not self._target.requires_python_applies(
python_requires, source=fingerprinted_dist.distribution.as_requirement()
):
return _PythonRequiresMismatch(fingerprinted_dist, python_requires)

return _RankedDistribution(best_match.rank, fingerprinted_dist)
return _RankedDistribution(wheel_eval.best_match.rank, fingerprinted_dist)

def activate(self):
# type: () -> Iterable[Distribution]
Expand Down
8 changes: 4 additions & 4 deletions pex/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,10 +744,10 @@ def iter_map_parallel(
#
input_items.sort(key=costing_function, reverse=True)

# We want each of the job slots above to process MULTIPROCESSING_MIN_AVERAGE_LOAD on average in
# order to overcome multiprocessing overheads. Of course, if there are fewer available cores
# than that or the user has pinned max jobs lower, we clamp to that. Finally, we always want at
# least two slots to ensure we process input items in parallel.
# We want each of the job slots above to process MULTIPROCESSING_DEFAULT_MIN_AVERAGE_LOAD on
# average in order to overcome multiprocessing overheads. Of course, if there are fewer
# available cores than that or the user has pinned max jobs lower, we clamp to that. Finally, we
# always want at least two slots to ensure we process input items in parallel.
pool_size = max(2, min(len(input_items) // min_average_load, _sanitize_max_jobs(max_jobs)))

apply_function = functools.partial(_apply_function, function)
Expand Down
5 changes: 3 additions & 2 deletions pex/pep_425.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import itertools
import os.path

from pex.dist_metadata import is_wheel
from pex.orderedset import OrderedSet
from pex.rank import Rank
from pex.third_party.packaging.tags import Tag, parse_tag
Expand Down Expand Up @@ -56,14 +57,14 @@ class CompatibilityTags(object):
@classmethod
def from_wheel(cls, wheel):
# type: (str) -> CompatibilityTags
wheel_stem, ext = os.path.splitext(os.path.basename(wheel))
if ".whl" != ext:
if not is_wheel(wheel):
raise ValueError(
"Can only calculate wheel tags from a filename that ends in .whl per "
"https://peps.python.org/pep-0427/#file-name-convention, given: {wheel!r}".format(
wheel=wheel
)
)
wheel_stem, _ = os.path.splitext(os.path.basename(wheel))
# Wheel filename format: https://www.python.org/dev/peps/pep-0427/#file-name-convention
# `{distribution}-{version}(-{build tag})?-{python tag}-{abi tag}-{platform tag}.whl`
wheel_components = wheel_stem.rsplit("-", 3)
Expand Down
2 changes: 1 addition & 1 deletion pex/requirements.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ class VCSScheme(object):


def parse_scheme(scheme):
# type: (str) -> Optional[Union[str, ArchiveScheme.Value, VCSScheme]]
# type: (str) -> Union[str, ArchiveScheme.Value, VCSScheme]
match = re.match(
r"""
^
Expand Down
Loading

0 comments on commit e063da3

Please sign in to comment.