New experimental_shell_command (#12878)

The new experimental_shell_command added to the shell backend allows running arbitrary commands during pants execution. This target is for introducing side effects in the build, either as new or modified files, calling out to external services or managing some other state. It remains important to ensure idempotency however, as the command may be cancelled or retried on the sole discretion of Pants. For those familiar with Bazel, the experimental_shell_command has similarities with the Bazel genrule. Fixes #3734 Example BUILD file usage: ```python shell_library(name="build-tools") experimental_shell_command( command="./build-util.sh -o output do-things", tools=["bash", "env", "cat", "curl", "tar"], outputs=["output/"], dependencies=[":build-tools"], ) ``` The dependencies will pull in scripts from shell_library, arbitrary files from files and other experimental_shell_command targets, the outputs lists directories and files to capture, which may be included by consuming targets, and tools lists all required executables that command may be using. The [shell-setup].executable-search-paths option is used when finding the specified tools. Signed-off-by: Andreas Stenius <[email protected]> Co-authored-by: Eric Arellano <[email protected]> Co-authored-by: Stu Hood <[email protected]>
pantsbuild · Sep 23, 2021 · 2da07b6 · 2da07b6
1 parent 970031f
commit 2da07b6
Show file tree

Hide file tree

Showing 7 changed files with 606 additions and 11 deletions.
diff --git a/src/python/pants/backend/shell/builtin.py b/src/python/pants/backend/shell/builtin.py
@@ -0,0 +1,105 @@
+# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
+# Licensed under the Apache License, Version 2.0 (see LICENSE).
+
+BASH_BUILTIN_COMMANDS = (
+    "alias",
+    "alloc",
+    "bg",
+    "bind",
+    "bindkey",
+    "break",
+    "breaksw",
+    "builtin",
+    "builtins",
+    "case",
+    "cd",
+    "chdir",
+    "command",
+    "complete",
+    "continue",
+    "default",
+    "dirs",
+    "do",
+    "done",
+    "echo",
+    "echotc",
+    "elif",
+    "else",
+    "end",
+    "endif",
+    "endsw",
+    "esac",
+    "eval",
+    "exec",
+    "exit",
+    "export",
+    "false",
+    "fc",
+    "fg",
+    "fi",
+    "filetest",
+    "for",
+    "foreach",
+    "getopts",
+    "glob",
+    "goto",
+    "hash",
+    "hashstat",
+    "history",
+    "hup",
+    "if",
+    "jobid",
+    "jobs",
+    "kill",
+    "limit",
+    "local",
+    "log",
+    "login",
+    "logout",
+    "ls-F",
+    "nice",
+    "nohup",
+    "notify",
+    "onintr",
+    "popd",
+    "printenv",
+    "pushd",
+    "pwd",
+    "read",
+    "readonly",
+    "rehash",
+    "repeat",
+    "return",
+    "sched",
+    "set",
+    "setenv",
+    "settc",
+    "setty",
+    "setvar",
+    "shift",
+    "source",
+    "stop",
+    "suspend",
+    "switch",
+    "telltc",
+    "test",
+    "then",
+    "time",
+    "times",
+    "trap",
+    "true",
+    "type",
+    "ulimit",
+    "umask",
+    "unalias",
+    "uncomplete",
+    "unhash",
+    "unlimit",
+    "unset",
+    "unsetenv",
+    "until",
+    "wait",
+    "where",
+    "which",
+    "while",
+)
diff --git a/src/python/pants/backend/shell/register.py b/src/python/pants/backend/shell/register.py
@@ -1,19 +1,20 @@
 # Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
 # Licensed under the Apache License, Version 2.0 (see LICENSE).
 
-from pants.backend.shell import dependency_inference, shunit2_test_runner, tailor
-from pants.backend.shell.target_types import ShellLibrary, Shunit2Tests
+from pants.backend.shell import dependency_inference, shell_command, shunit2_test_runner, tailor
+from pants.backend.shell.target_types import ShellCommand, ShellLibrary, Shunit2Tests
 from pants.backend.shell.target_types import rules as target_types_rules
 
 
 def target_types():
-    return [ShellLibrary, Shunit2Tests]
+    return [ShellCommand, ShellLibrary, Shunit2Tests]
 
 
 def rules():
     return [
         *dependency_inference.rules(),
-        *tailor.rules(),
+        *shell_command.rules(),
         *shunit2_test_runner.rules(),
+        *tailor.rules(),
         *target_types_rules(),
     ]
diff --git a/src/python/pants/backend/shell/shell_command.py b/src/python/pants/backend/shell/shell_command.py
@@ -0,0 +1,164 @@
+# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
+# Licensed under the Apache License, Version 2.0 (see LICENSE).
+
+from __future__ import annotations
+
+import logging
+import shlex
+from textwrap import dedent
+
+from pants.backend.shell.builtin import BASH_BUILTIN_COMMANDS
+from pants.backend.shell.shell_setup import ShellSetup
+from pants.backend.shell.target_types import (
+    ShellCommandCommandField,
+    ShellCommandLogOutputField,
+    ShellCommandOutputsField,
+    ShellCommandSources,
+    ShellCommandToolsField,
+)
+from pants.core.target_types import FilesSources
+from pants.core.util_rules.source_files import SourceFiles, SourceFilesRequest
+from pants.engine.environment import Environment, EnvironmentRequest
+from pants.engine.fs import AddPrefix, CreateDigest, Digest, Directory, MergeDigests, Snapshot
+from pants.engine.process import (
+    BashBinary,
+    BinaryNotFoundError,
+    BinaryPathRequest,
+    BinaryPaths,
+    Process,
+    ProcessResult,
+)
+from pants.engine.rules import Get, MultiGet, collect_rules, rule
+from pants.engine.target import (
+    GeneratedSources,
+    GenerateSourcesRequest,
+    Sources,
+    TransitiveTargets,
+    TransitiveTargetsRequest,
+)
+from pants.engine.unions import UnionRule
+from pants.util.logging import LogLevel
+
+logger = logging.getLogger(__name__)
+
+
+class GenerateFilesFromShellCommandRequest(GenerateSourcesRequest):
+    input = ShellCommandSources
+    output = FilesSources
+
+
+@rule(desc="Running experimental_shell_command", level=LogLevel.DEBUG)
+async def run_shell_command(
+    request: GenerateFilesFromShellCommandRequest,
+    shell_setup: ShellSetup,
+    bash: BashBinary,
+) -> GeneratedSources:
+    shell_command = request.protocol_target
+    working_directory = shell_command.address.spec_path
+    command = shell_command[ShellCommandCommandField].value
+    tools = shell_command[ShellCommandToolsField].value
+    outputs = shell_command[ShellCommandOutputsField].value or ()
+
+    if not command:
+        raise ValueError(
+            f"Missing `command` line in `shell_command` target {shell_command.address}."
+        )
+
+    if not tools:
+        raise ValueError(
+            f"Must provide any `tools` used by the `shell_command` {shell_command.address}."
+        )
+
+    env = await Get(Environment, EnvironmentRequest(["PATH"]))
+    search_path = shell_setup.executable_search_path(env)
+    tool_requests = [
+        BinaryPathRequest(
+            binary_name=tool,
+            search_path=search_path,
+        )
+        for tool in {*tools, *["mkdir", "ln"]}
+        if tool not in BASH_BUILTIN_COMMANDS
+    ]
+    tool_paths = await MultiGet(
+        Get(BinaryPaths, BinaryPathRequest, request) for request in tool_requests
+    )
+
+    command_env = {
+        "TOOLS": " ".join(shlex.quote(tool.binary_name) for tool in tool_requests),
+    }
+
+    for binary, tool_request in zip(tool_paths, tool_requests):
+        if binary.first_path:
+            command_env[tool_request.binary_name] = binary.first_path.path
+        else:
+            raise BinaryNotFoundError(
+                tool_request,
+                rationale=f"execute experimental_shell_command {shell_command.address}",
+            )
+
+    transitive_targets = await Get(
+        TransitiveTargets,
+        TransitiveTargetsRequest([shell_command.address]),
+    )
+
+    sources = await Get(
+        SourceFiles,
+        SourceFilesRequest(
+            sources_fields=[tgt.get(Sources) for tgt in transitive_targets.dependencies],
+            for_sources_types=(
+                Sources,
+                FilesSources,
+            ),
+            enable_codegen=True,
+        ),
+    )
+
+    output_files = [f for f in outputs if not f.endswith("/")]
+    output_directories = [d for d in outputs if d.endswith("/")]
+
+    if working_directory in sources.snapshot.dirs:
+        input_digest = sources.snapshot.digest
+    else:
+        work_dir = await Get(Digest, CreateDigest([Directory(working_directory)]))
+        input_digest = await Get(Digest, MergeDigests([sources.snapshot.digest, work_dir]))
+
+    # Setup bin_relpath dir with symlinks to all requested tools, so that we can use PATH.
+    bin_relpath = ".bin"
+    setup_tool_symlinks_script = ";".join(
+        dedent(
+            f"""\
+            $mkdir -p {bin_relpath}
+            for tool in $TOOLS; do $ln -s ${{!tool}} {bin_relpath}/; done
+            export PATH="$PWD/{bin_relpath}"
+            """
+        ).split("\n")
+    )
+
+    result = await Get(
+        ProcessResult,
+        Process(
+            argv=(bash.path, "-c", setup_tool_symlinks_script + command),
+            description=f"Running experimental_shell_command {shell_command.address}",
+            env=command_env,
+            input_digest=input_digest,
+            output_directories=output_directories,
+            output_files=output_files,
+            working_directory=working_directory,
+        ),
+    )
+
+    if shell_command[ShellCommandLogOutputField].value:
+        if result.stdout:
+            logger.info(result.stdout.decode())
+        if result.stderr:
+            logger.warning(result.stderr.decode())
+
+    output = await Get(Snapshot, AddPrefix(result.output_digest, working_directory))
+    return GeneratedSources(output)
+
+
+def rules():
+    return [
+        *collect_rules(),
+        UnionRule(GenerateSourcesRequest, GenerateFilesFromShellCommandRequest),
+    ]