Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cache python tools in ~/.cache/pants #7236

Merged
merged 10 commits into from
Feb 22, 2019
1 change: 1 addition & 0 deletions src/python/pants/backend/native/subsystems/conan.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class Conan(PythonToolBase):
'pylint==1.9.3',
]
default_entry_point = 'conans.conan'
default_interpreter_constraints = ['CPython>=2.7,<4']

@classmethod
def register_options(cls, register):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,26 @@ class PythonToolBase(Subsystem):
# Subclasses must set.
default_requirements = None
default_entry_point = None
# Subclasses need not override.
default_interpreter_constraints = []

@classmethod
def register_options(cls, register):
super(PythonToolBase, cls).register_options(register)
register('--interpreter-constraints', type=list, advanced=True, fingerprint=True,
default=cls.default_interpreter_constraints,
help='Python interpreter constraints for this tool. An empty list uses the default '
'interpreter constraints for the repo.')
register('--requirements', type=list, advanced=True, fingerprint=True,
default=cls.default_requirements,
help='Python requirement strings for the tool.')
register('--entry-point', type=str, advanced=True, fingerprint=True,
default=cls.default_entry_point,
help='The main module for the tool.')

def get_interpreter_constraints(self):
return self.get_options().interpreter_constraints

def get_requirement_specs(self):
return self.get_options().requirements

Expand Down
36 changes: 32 additions & 4 deletions src/python/pants/backend/python/tasks/python_tool_prep_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from __future__ import absolute_import, division, print_function, unicode_literals

import os
from builtins import str
from contextlib import contextmanager

from pex.pex import PEX
Expand All @@ -13,6 +14,8 @@
from pants.backend.python.interpreter_cache import PythonInterpreterCache
from pants.backend.python.python_requirement import PythonRequirement
from pants.backend.python.subsystems.pex_build_util import PexBuilderWrapper
from pants.base.build_environment import get_pants_cachedir
from pants.base.hash_utils import stable_json_sha1
from pants.base.workunit import WorkUnitLabel
from pants.task.task import Task
from pants.util.dirutil import safe_concurrent_creation
Expand All @@ -23,11 +26,16 @@
class PythonToolInstance(object):
def __init__(self, pex_path, interpreter):
self._pex = PEX(pex_path, interpreter=interpreter)
self._interpreter = interpreter

@property
def pex(self):
return self._pex

@property
def interpreter(self):
return self._interpreter

def _pretty_cmdline(self, args):
return safe_shlex_join(self._pex.cmdline(args))

Expand Down Expand Up @@ -63,6 +71,12 @@ def run(self, *args, **kwargs):
return cmdline, exit_code


# TODO: This python tool setup ends up eagerly generating each pex for each task in every goal which
# is transitively required by the command-line goals, even for tasks which no-op. This requires each
# pex for each relevant python tool to be buildable on the current host, even if it may never be
# intended to be invoked. Especially given the existing clear separation of concerns into
# PythonToolBase/PythonToolInstance/PythonToolPrepBase, this seems like an extremely ripe use case
# for some v2 rules for free caching and no-op when not required for the command-line goals.
class PythonToolPrepBase(Task):
"""Base class for tasks that resolve a python tool to be invoked out-of-process."""

Expand Down Expand Up @@ -97,16 +111,30 @@ def _build_tool_pex(self, tool_subsystem, interpreter, pex_path):
pex_builder.set_entry_point(tool_subsystem.get_entry_point())
pex_builder.freeze()

def _generate_fingerprinted_pex_path(self, tool_subsystem, interpreter):
# `tool_subsystem.get_requirement_specs()` is a list, but order shouldn't actually matter. This
# should probably be sorted, but it's possible a user could intentionally tweak order to work
# around a particular requirement resolution resolve-order issue. In practice the lists are
# expected to be mostly static, so we accept the risk of too-fine-grained caching creating lots
# of pexes in the cache dir.
specs_fingerprint = stable_json_sha1(tool_subsystem.get_requirement_specs())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The tool_subsystem.get_requirement_specs() is a list, but order shouldn't actually matter. This should probably be sorted, but I guess someone could intentionally tweak order to work around a particular requirement resolution resolve-order issue :/

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wasn't sure how to effectively fingerprint this and should have made a note of that. Under the assumption that the requirements are going to be static enough to not blow up the user's disk space with the pants cache, it might be reasonable to assume the ordering will remain static as well?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I think its reasonable to risk too-fine-grained-caching. In practice the lists are static as you suggest, and when mutated almost certainly to change deps and not just permute their order.

return os.path.join(
get_pants_cachedir(),
'python',
str(interpreter.identity),
self.fingerprint,
'{}-{}.pex'.format(tool_subsystem.options_scope, specs_fingerprint),
)

def execute(self):
tool_subsystem = self.tool_subsystem_cls.scoped_instance(self)
pex_name = tool_subsystem.options_scope
pex_path = os.path.join(self.workdir, self.fingerprint, '{}.pex'.format(pex_name))

interpreter_cache = PythonInterpreterCache.global_instance()
interpreter = interpreter_cache.select_interpreter_for_targets([])
interpreter = min(interpreter_cache.setup(filters=tool_subsystem.get_interpreter_constraints()))

pex_path = self._generate_fingerprinted_pex_path(tool_subsystem, interpreter)
if not os.path.exists(pex_path):
with self.context.new_workunit(name='create-{}-pex'.format(pex_name),
with self.context.new_workunit(name='create-{}-pex'.format(tool_subsystem.options_scope),
labels=[WorkUnitLabel.PREP]):
self._build_tool_pex(tool_subsystem=tool_subsystem,
interpreter=interpreter,
cosmicexplorer marked this conversation as resolved.
Show resolved Hide resolved
Expand Down
91 changes: 91 additions & 0 deletions tests/python/pants_test/backend/python/tasks/test_python_tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# coding=utf-8
# Copyright 2019 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import absolute_import, division, print_function, unicode_literals

import glob
import os
import re

from pants.backend.python.subsystems.python_tool_base import PythonToolBase
from pants.backend.python.tasks.python_tool_prep_base import PythonToolInstance, PythonToolPrepBase
from pants.task.task import Task
from pants.util.collections import assert_single_element
from pants.util.contextutil import environment_as, temporary_dir
from pants_test.backend.python.tasks.python_task_test_base import PythonTaskTestBase


class Tool(PythonToolBase):
options_scope = 'test-tool'
default_requirements = [
'pex==1.5.3',
]
default_entry_point = 'pex.bin.pex:main'


class ToolInstance(PythonToolInstance):
pass


class ToolPrep(PythonToolPrepBase):
options_scope = 'tool-prep-task'
tool_subsystem_cls = Tool
tool_instance_cls = ToolInstance


class ToolTask(Task):
options_scope = 'tool-task'

@classmethod
def prepare(cls, options, round_manager):
super(ToolTask, cls).prepare(options, round_manager)
round_manager.require_data(ToolPrep.tool_instance_cls)

def execute(self):
tool_for_pex = self.context.products.get_data(ToolPrep.tool_instance_cls)
stdout, stderr, exit_code, _ = tool_for_pex.output(['--version'])
assert '' == stderr
cosmicexplorer marked this conversation as resolved.
Show resolved Hide resolved
assert re.match(r'.*\.pex 1.5.3', stdout)
assert 0 == exit_code


class PythonToolPrepTest(PythonTaskTestBase):

@classmethod
def task_type(cls):
return ToolTask

def _assert_tool_execution_for_python_version(self, use_py3=True):
scope_string = '3' if use_py3 else '2'
constraint_string = 'CPython>=3' if use_py3 else 'CPython<3'
tool_prep_type = self.synthesize_task_subtype(ToolPrep, 'tp_scope_py{}'.format(scope_string))
context = self.context(for_task_types=[tool_prep_type], for_subsystems=[Tool], options={
'test-tool': {
'interpreter_constraints': [constraint_string],
},
})
# XDG_CACHE_HOME overrides the location of the cache dir.
cosmicexplorer marked this conversation as resolved.
Show resolved Hide resolved
with temporary_dir() as tmp_dir, environment_as(XDG_CACHE_HOME=tmp_dir):
tool_prep_task = tool_prep_type(context, os.path.join(
self.pants_workdir, 'tp_py{}'.format(scope_string)))
tool_prep_task.execute()
# Check that the tool is in an interpreter-specific subdir of the cache dir.
cosmicexplorer marked this conversation as resolved.
Show resolved Hide resolved
constructed_tool_location_glob = os.path.join(
tmp_dir, 'pants', 'python',
'CPython-{}*'.format(scope_string),
tool_prep_task.fingerprint,
'test-tool-*.pex',
)
tool_location = assert_single_element(glob.glob(constructed_tool_location_glob))
self.assertTrue(os.path.isdir(tool_location))

# Check that the tool can be executed successfully.
self.create_task(context).execute()
pex_tool = context.products.get_data(ToolPrep.tool_instance_cls)
self.assertTrue(pex_tool.interpreter.identity.matches(constraint_string))

def test_tool_execution(self):
"""Test that python tools are fingerprinted by python interpreter."""
self._assert_tool_execution_for_python_version(use_py3=True)
self._assert_tool_execution_for_python_version(use_py3=False)