Skip to content

Commit

Permalink
Add support for --no-pre-install-wheels and --max-install-jobs. (#…
Browse files Browse the repository at this point in the history
…2298)

The `--no-pre-install-wheels` option causes built PEXes to use raw
`.whl` files. For `--layout zipapp` this means a single `.whl` file is
`STORED` per dep, and for `--layout {packed,loose}` this means the loose
`.deps/` dir contains raw `.whl` files. This speeds up all PEX builds by
avoiding pre-installing wheel deps (~unzipping into the `PEX_ROOT`) and
then, in the case of zipapp and packed layout, re-zipping. For large
dependencies the time savings can be dramatic.

Not pre-installing wheels comes with a PEX boot cold-start performance
tradeoff since installation now needs to be done at runtime. This is
generally a penalty of O(100ms), but that penalty can be erased for some
deployment scenarios with the new `--max-install-jobs` build option / 
`PEX_MAX_INSTALL_JOBS` runtime env var. By default, runtime installs are
performed serially, but this new option can be set to use multiple
parallel install processes, which can speed up cold boots for large
dependencies.

Fixes #2292
  • Loading branch information
jsirois authored Dec 14, 2023
1 parent 25ce9da commit b3a7767
Show file tree
Hide file tree
Showing 42 changed files with 1,858 additions and 639 deletions.
20 changes: 14 additions & 6 deletions docs/_ext/vars.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,24 @@
# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from docutils import nodes
from docutils import nodes, statemachine
from docutils.parsers.rst import Directive
from sphinx import addnodes
from sphinx.util.nodes import nested_parse_with_titles

from pex.variables import DefaultedProperty, Variables


class Vars(Directive):
def convert_rst_to_nodes(self, rst_source):
"""Turn an RST string into a node that can be used in the document."""
node = nodes.Element()
node.document = self.state.document
nested_parse_with_titles(
state=self.state, content=statemachine.ViewList(rst_source.split("\n")), node=node
)
return node.children

def run(self):
def make_nodes(var_name):
var_obj = Variables.__dict__[var_name]
Expand All @@ -19,13 +29,11 @@ def make_nodes(var_name):
desc_str = desc_str or "NO DESC"

sig = addnodes.desc()
sig["objtype"] = sig["desctype"] = "var"
sig.append(nodes.target("", "", ids=[var_name]))
sig.append(addnodes.desc_signature(var_name, var_name))
desc = nodes.paragraph()
for line in desc_str.split("\n"):
desc += nodes.line(line, line)
sig["objtype"] = sig["desctype"] = "var"
return sig, desc

return [sig] + self.convert_rst_to_nodes(desc_str)

return [
node for var in dir(Variables) if var.startswith("PEX_") for node in make_nodes(var)
Expand Down
51 changes: 46 additions & 5 deletions pex/bin/pex.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@
from pex.enum import Enum
from pex.inherit_path import InheritPath
from pex.interpreter_constraints import InterpreterConstraints
from pex.layout import Layout, maybe_install
from pex.layout import Layout, ensure_installed
from pex.orderedset import OrderedSet
from pex.pep_427 import InstallableType
from pex.pex import PEX
from pex.pex_bootstrapper import ensure_venv
from pex.pex_builder import Check, CopyMode, PEXBuilder
Expand Down Expand Up @@ -164,6 +165,41 @@ def configure_clp_pex_options(parser):
"mode as well and support `--seed`ing."
),
)
group.add_argument(
"--pre-install-wheels",
"--no-pre-install-wheels",
dest="pre_install_wheels",
default=True,
action=HandleBoolAction,
help=(
"Whether to pre-install third party dependency wheels. Pre-installed wheels will "
"always yield slightly faster PEX cold boot times; so they are used by default, but "
"they also slow down PEX build time. As the size of dependencies grows you may find a "
"tipping point where it makes sense to not pre-install wheels; either because the "
"increased cold boot time is irrelevant to your use case or marginal compared to "
"other costs. Note that you may be able to use --max-install-jobs to decrease cold "
"boot times for some PEX deployment scenarios."
),
)
group.add_argument(
"--max-install-jobs",
dest="max_install_jobs",
default=1,
type=int,
help=(
"The maximum number of parallel jobs to use when installing third party dependencies "
"contained in a PEX during its first boot. By default, this is set to 1 which "
"indicates dependencies should be installed in serial. A value of 2 or more indicates "
"dependencies should be installed in parallel using exactly this maximum number of "
"jobs. A value of 0 indicates the maximum number of parallel jobs should be "
"auto-selected taking the number of cores into account. Finally, a value of -1 "
"indicates the maximum number of parallel jobs should be auto-selected taking both the "
"characteristics of the third party dependencies contained in the PEX and the number "
"of cores into account. The third party dependency heuristics are intended to yield "
"good install performance, but are opaque and may change across PEX releases if better "
"heuristics are discovered. Any other value is illegal."
),
)
group.add_argument(
"--check",
dest="check",
Expand Down Expand Up @@ -824,6 +860,8 @@ def build_pex(
pex_info.pex_root = options.runtime_pex_root
pex_info.strip_pex_env = options.strip_pex_env
pex_info.interpreter_constraints = interpreter_constraints
pex_info.deps_are_wheel_files = not options.pre_install_wheels
pex_info.max_install_jobs = options.max_install_jobs

dependency_manager = DependencyManager()
excluded = list(options.excluded) # type: List[str]
Expand All @@ -848,13 +886,18 @@ def build_pex(
)
):
try:
dependency_manager.add_from_installed(
dependency_manager.add_from_resolved(
resolve(
targets=targets,
requirement_configuration=requirement_configuration,
resolver_configuration=resolver_configuration,
compile_pyc=options.compile,
ignore_errors=options.ignore_errors,
result_type=(
InstallableType.INSTALLED_WHEEL_CHROOT
if options.pre_install_wheels
else InstallableType.WHEEL_FILE
),
)
)
except Unsatisfiable as e:
Expand Down Expand Up @@ -1063,9 +1106,7 @@ def create_verbose_info(final_pex_path):

with TRACER.timed("Seeding caches for {}".format(pex_path)):
final_pex_path = os.path.join(
maybe_install(pex=pex_path, pex_root=pex_root, pex_hash=pex_hash)
or os.path.abspath(pex_path),
"__main__.py",
ensure_installed(pex=pex_path, pex_root=pex_root, pex_hash=pex_hash), "__main__.py"
)
if verbose:
return json.dumps(create_verbose_info(final_pex_path=final_pex_path))
Expand Down
6 changes: 3 additions & 3 deletions pex/build_system/pep_517.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,11 @@ def _default_build_system(
selected_pip_version.wheel_requirement,
]
resolved = tuple(
installed_distribution.fingerprinted_distribution.distribution
for installed_distribution in resolver.resolve_requirements(
resolved_distribution.fingerprinted_distribution.distribution
for resolved_distribution in resolver.resolve_requirements(
requirements=requires,
targets=Targets.from_target(target),
).installed_distributions
).distributions
)
build_system = try_(
BuildSystem.create(
Expand Down
3 changes: 1 addition & 2 deletions pex/build_system/pep_518.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,7 @@ def load_build_system(
interpreter=target.get_interpreter(),
requires=build_system_table.requires,
resolved=tuple(
installed_distribution.distribution
for installed_distribution in result.installed_distributions
resolved_distribution.distribution for resolved_distribution in result.distributions
),
build_backend=build_system_table.build_backend,
backend_path=build_system_table.backend_path,
Expand Down
10 changes: 5 additions & 5 deletions pex/cli/commands/venv.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ def _create(self):
requirement_configuration = requirement_options.configure(self.options)
resolver_configuration = resolver_options.configure(self.options)
with TRACER.timed("Resolving distributions"):
installed = configured_resolve.resolve(
resolved = configured_resolve.resolve(
targets=targets,
requirement_configuration=requirement_configuration,
resolver_configuration=resolver_configuration,
Expand All @@ -280,16 +280,16 @@ def _create(self):

with TRACER.timed(
"Installing {count} {wheels} in {subject} at {dest_dir}".format(
count=len(installed.installed_distributions),
wheels=pluralize(installed.installed_distributions, "wheel"),
count=len(resolved.distributions),
wheels=pluralize(resolved.distributions, "wheel"),
subject=subject,
dest_dir=dest_dir,
)
):
hermetic_scripts = not update and installer_configuration.hermetic_scripts
distributions = tuple(
installed_distribution.distribution
for installed_distribution in installed.installed_distributions
resolved_distribution.distribution
for resolved_distribution in resolved.distributions
)
provenance = (
Provenance.create(venv=venv)
Expand Down
86 changes: 63 additions & 23 deletions pex/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,11 +461,7 @@ class Error(Exception):
pass

class ChrootTaggingException(Error):
def __init__(self, filename, orig_tag, new_tag):
super(Chroot.ChrootTaggingException, self).__init__( # noqa: T800
"Trying to add %s to fileset(%s) but already in fileset(%s)!"
% (filename, new_tag, orig_tag)
)
pass

def __init__(self, chroot_base):
# type: (str) -> None
Expand All @@ -479,6 +475,7 @@ def __init__(self, chroot_base):
raise self.Error("Unable to create chroot in %s: %s" % (chroot_base, e))
self.chroot = chroot_base # type: str
self.filesets = defaultdict(set) # type: DefaultDict[Optional[str], Set[str]]
self._compress_by_file = {} # type: Dict[str, bool]
self._file_index = {} # type: Dict[str, Optional[str]]

def clone(self, into=None):
Expand Down Expand Up @@ -511,23 +508,51 @@ def _normalize(self, dst):
raise self.Error("Destination path is not a relative path!")
return dst

def _check_tag(self, fn, label):
def _check_tag(
self,
fn, # type: str
label, # type: Optional[str]
compress=True, # type: bool
):
# type: (...) -> None
"""Raises ChrootTaggingException if a file was added under more than one label."""
existing_label = self._file_index.setdefault(fn, label)
if label != existing_label:
raise self.ChrootTaggingException(fn, existing_label, label)
raise self.ChrootTaggingException(
"Trying to add {file} to fileset({new_tag}) but already in "
"fileset({orig_tag})!".format(file=fn, new_tag=label, orig_tag=existing_label)
)
existing_compress = self._compress_by_file.setdefault(fn, compress)
if compress != existing_compress:
raise self.ChrootTaggingException(
"Trying to add {file} to fileset({tag}) with compress {new_compress} but already "
"added with compress {orig_compress}!".format(
file=fn, tag=label, new_compress=compress, orig_compress=existing_compress
)
)

def _tag(self, fn, label):
# type: (str, Optional[str]) -> None
self._check_tag(fn, label)
def _tag(
self,
fn, # type: str
label, # type: Optional[str]
compress, # type: bool
):
# type: (...) -> None
self._check_tag(fn, label, compress)
self.filesets[label].add(fn)

def _ensure_parent(self, path):
# type: (str) -> None
safe_mkdir(os.path.dirname(os.path.join(self.chroot, path)))

def copy(self, src, dst, label=None):
# type: (str, str, Optional[str]) -> None
def copy(
self,
src, # type: str
dst, # type: str
label=None, # type: Optional[str]
compress=True, # type: bool
):
# type: (...) -> None
"""Copy file ``src`` to ``chroot/dst`` with optional label.
May raise anything shutil.copy can raise, e.g.
Expand All @@ -537,12 +562,18 @@ def copy(self, src, dst, label=None):
but with a different label.
"""
dst = self._normalize(dst)
self._tag(dst, label)
self._tag(dst, label, compress)
self._ensure_parent(dst)
shutil.copy(src, os.path.join(self.chroot, dst))

def link(self, src, dst, label=None):
# type: (str, str, Optional[str]) -> None
def link(
self,
src, # type: str
dst, # type: str
label=None, # type: Optional[str]
compress=True, # type: bool
):
# type: (...) -> None
"""Hard link file from ``src`` to ``chroot/dst`` with optional label.
May raise anything os.link can raise, e.g.
Expand All @@ -552,7 +583,7 @@ def link(self, src, dst, label=None):
but with a different label.
"""
dst = self._normalize(dst)
self._tag(dst, label)
self._tag(dst, label, compress)
self._ensure_parent(dst)
abs_src = src
abs_dst = os.path.join(self.chroot, dst)
Expand All @@ -564,10 +595,11 @@ def symlink(
src, # type: str
dst, # type: str
label=None, # type: Optional[str]
compress=True, # type: bool
):
# type: (...) -> None
dst = self._normalize(dst)
self._tag(dst, label)
self._tag(dst, label, compress)
self._ensure_parent(dst)
abs_src = os.path.abspath(src)
abs_dst = os.path.join(self.chroot, dst)
Expand All @@ -580,28 +612,33 @@ def write(
label=None, # type: Optional[str]
mode="wb", # type: str
executable=False, # type: bool
compress=True, # type: bool
):
# type: (...) -> None
"""Write data to ``chroot/dst`` with optional label.
Has similar exceptional cases as ``Chroot.copy``
"""
dst = self._normalize(dst)
self._tag(dst, label)
self._tag(dst, label, compress)
self._ensure_parent(dst)
with open(os.path.join(self.chroot, dst), mode) as wp:
wp.write(data)
if executable:
chmod_plus_x(wp.name)

def touch(self, dst, label=None):
# type: (str, Optional[str]) -> None
def touch(
self,
dst, # type: str
label=None, # type: Optional[str]
):
# type: (...) -> None
"""Perform 'touch' on ``chroot/dst`` with optional label.
Has similar exceptional cases as Chroot.copy
"""
dst = self._normalize(dst)
self._tag(dst, label)
self._tag(dst, label, compress=False)
touch(os.path.join(self.chroot, dst))

def get(self, label):
Expand Down Expand Up @@ -651,8 +688,9 @@ def zip(
else:
selected_files = self.files()

compression = zipfile.ZIP_DEFLATED if compress else zipfile.ZIP_STORED
with open_zip(filename, mode, compression) as zf:
with open_zip(
filename, mode, zipfile.ZIP_DEFLATED if compress else zipfile.ZIP_STORED
) as zf:

def write_entry(
filename, # type: str
Expand All @@ -666,6 +704,8 @@ def write_entry(
if deterministic_timestamp
else None,
)
compress_file = compress and self._compress_by_file.get(arcname, True)
compression = zipfile.ZIP_DEFLATED if compress_file else zipfile.ZIP_STORED
zf.writestr(zip_entry.info, zip_entry.data, compression)

def get_parent_dir(path):
Expand Down
3 changes: 2 additions & 1 deletion pex/compatibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,8 @@ def exec_function(ast, globals_map):
def cpu_count():
# type: () -> Optional[int]
# The set of CPUs accessible to the current process (pid 0).
cpu_set = os.sched_getaffinity(0)
# N.B.: MyPy does not track the hasattr guard above under interpreters without the attr.
cpu_set = os.sched_getaffinity(0) # type: ignore[attr-defined]
return len(cpu_set)

else:
Expand Down
Loading

0 comments on commit b3a7767

Please sign in to comment.