From aae9f01020af8dc7ae707a6603610f425ff1d83e Mon Sep 17 00:00:00 2001 From: Lachlan Donald Date: Mon, 15 Jul 2019 10:47:37 +1000 Subject: [PATCH 1/3] Add support for custom process interrupt signal --- process/process.go | 66 +++++++++++++++++++++++++++++++-------- process/process_test.go | 49 ++++++++++++++++++++++++++++- process/signal.go | 32 ++++++++++--------- process/signal_windows.go | 6 ++-- 4 files changed, 122 insertions(+), 31 deletions(-) diff --git a/process/process.go b/process/process.go index 1f2e8ded76..87b7c8c64d 100644 --- a/process/process.go +++ b/process/process.go @@ -6,6 +6,8 @@ import ( "io" "os" "os/exec" + "strconv" + "strings" "sync" "syscall" "time" @@ -18,17 +20,55 @@ const ( termType = `xterm-256color` ) +type Signal int + +const ( + SIGHUP Signal = 1 + SIGINT Signal = 2 + SIGQUIT Signal = 3 + SIGUSR1 Signal = 10 + SIGUSR2 Signal = 12 + SIGTERM Signal = 15 +) + +var signalMap = map[string]Signal{ + `SIGHUP`: SIGHUP, + `SIGINT`: SIGINT, + `SIGQUIT`: SIGQUIT, + `SIGUSR1`: SIGUSR1, + `SIGUSR2`: SIGUSR2, + `SIGTERM`: SIGTERM, +} + +func (s Signal) String() string { + for k, sig := range signalMap { + if sig == s { + return k + } + } + return strconv.FormatInt(int64(s), 10) +} + +func ParseSignal(sig string) (Signal, error) { + s, ok := signalMap[strings.ToUpper(sig)] + if !ok { + return Signal(0), fmt.Errorf("Unknown signal %q", sig) + } + return s, nil +} + // Configuration for a Process type Config struct { - PTY bool - Timestamp bool - Path string - Args []string - Env []string - Stdout io.Writer - Stderr io.Writer - Dir string - Context context.Context + PTY bool + Timestamp bool + Path string + Args []string + Env []string + Stdout io.Writer + Stderr io.Writer + Dir string + Context context.Context + InterruptSignal Signal } // Process is an operating system level process @@ -78,7 +118,7 @@ func (p *Process) Run() error { // Setup the process to create a process group if supported // See https://github.com/kr/pty/issues/35 for context if !p.conf.PTY { - SetupProcessGroup(p.command) + p.setupProcessGroup() } // Configure working dir and fail if it doesn't exist, otherwise @@ -258,11 +298,11 @@ func (p *Process) Interrupt() error { } // interrupt the process (ctrl-c or SIGINT) - if err := InterruptProcessGroup(p.command.Process, p.logger); err != nil { + if err := p.interruptProcessGroup(); err != nil { p.logger.Error("[Process] Failed to interrupt process %d: %v", p.pid, err) // Fallback to terminating if we get an error - if termErr := TerminateProcessGroup(p.command.Process, p.logger); termErr != nil { + if termErr := p.terminateProcessGroup(); termErr != nil { return termErr } } @@ -280,7 +320,7 @@ func (p *Process) Terminate() error { return nil } - return TerminateProcessGroup(p.command.Process, p.logger) + return p.terminateProcessGroup() } func timeoutWait(waitGroup *sync.WaitGroup) error { diff --git a/process/process_test.go b/process/process_test.go index fb7f275741..c6321fe524 100644 --- a/process/process_test.go +++ b/process/process_test.go @@ -166,7 +166,10 @@ func TestProcessInterrupts(t *testing.T) { // give the signal handler some time to install time.Sleep(time.Millisecond * 50) - p.Interrupt() + err := p.Interrupt() + if err != nil { + t.Error(err) + } }() if err := p.Run(); err != nil { @@ -183,6 +186,50 @@ func TestProcessInterrupts(t *testing.T) { assertProcessDoesntExist(t, p) } +func TestProcessInterruptsWithCustomSignal(t *testing.T) { + if runtime.GOOS == `windows` { + t.Skip("Works in windows, but not in docker") + } + + b := &bytes.Buffer{} + + p := process.New(logger.Discard, process.Config{ + Path: os.Args[0], + Env: []string{"TEST_MAIN=tester-signal"}, + Stdout: b, + InterruptSignal: process.SIGINT, + }) + + var wg sync.WaitGroup + wg.Add(1) + + go func() { + defer wg.Done() + <-p.Started() + + // give the signal handler some time to install + time.Sleep(time.Millisecond * 50) + + err := p.Interrupt() + if err != nil { + t.Error(err) + } + }() + + if err := p.Run(); err != nil { + t.Fatal(err) + } + + wg.Wait() + + output := b.String() + if output != `SIG interrupt` { + t.Fatalf("Bad output: %q", output) + } + + assertProcessDoesntExist(t, p) +} + func TestProcessSetsProcessGroupID(t *testing.T) { if runtime.GOOS == `windows` { t.Skip("Process groups not supported on windows") diff --git a/process/signal.go b/process/signal.go index 159ae10506..116d60045a 100644 --- a/process/signal.go +++ b/process/signal.go @@ -3,30 +3,34 @@ package process import ( - "os" - "os/exec" "syscall" - - "github.com/buildkite/agent/logger" ) -func SetupProcessGroup(cmd *exec.Cmd) { - cmd.SysProcAttr = &syscall.SysProcAttr{ - Setpgid: true, - Pgid: 0, +func (p *Process) setupProcessGroup() { + // See https://github.com/kr/pty/issues/35 for context + if !p.conf.PTY { + p.command.SysProcAttr = &syscall.SysProcAttr{ + Setpgid: true, + Pgid: 0, + } } } -func TerminateProcessGroup(p *os.Process, l logger.Logger) error { - l.Debug("[Process] Sending signal SIGKILL to PGID: %d", p.Pid) - return syscall.Kill(-p.Pid, syscall.SIGKILL) +func (p *Process) terminateProcessGroup() error { + p.logger.Debug("[Process] Sending signal SIGKILL to PGID: %d", p.Pid) + return syscall.Kill(-p.pid, syscall.SIGKILL) } -func InterruptProcessGroup(p *os.Process, l logger.Logger) error { - l.Debug("[Process] Sending signal SIGTERM to PGID: %d", p.Pid) +func (p *Process) interruptProcessGroup() error { + intSignal := p.conf.InterruptSignal // TODO: this should be SIGINT, but will be a breaking change - return syscall.Kill(-p.Pid, syscall.SIGTERM) + if intSignal == Signal(0) { + intSignal = SIGTERM + } + + p.logger.Debug("[Process] Sending signal %s to PGID: %d", intSignal, p.Pid) + return syscall.Kill(-p.pid, syscall.Signal(intSignal)) } func GetPgid(pid int) (int, error) { diff --git a/process/signal_windows.go b/process/signal_windows.go index 1521459aef..09f6ccf02c 100644 --- a/process/signal_windows.go +++ b/process/signal_windows.go @@ -26,13 +26,13 @@ const ( createNewProcessGroupFlag = 0x00000200 ) -func SetupProcessGroup(cmd *exec.Cmd) { +func setupProcessGroup(cmd *exec.Cmd) { cmd.SysProcAttr = &syscall.SysProcAttr{ CreationFlags: syscall.CREATE_UNICODE_ENVIRONMENT | createNewProcessGroupFlag, } } -func TerminateProcessGroup(p *os.Process, l logger.Logger) error { +func terminateProcessGroup(p *os.Process, l logger.Logger) error { l.Debug("[Process] Terminating process tree with TASKKILL.EXE PID: %d", p.Pid) // taskkill.exe with /F will call TerminateProcess and hard-kill the process and @@ -40,7 +40,7 @@ func TerminateProcessGroup(p *os.Process, l logger.Logger) error { return exec.Command("CMD", "/C", "TASKKILL.EXE", "/F", "/T", "/PID", strconv.Itoa(p.Pid)).Run() } -func InterruptProcessGroup(p *os.Process, l logger.Logger) error { +func interruptProcessGroup(p *os.Process, l logger.Logger) error { // Sends a CTRL-BREAK signal to the process group id, which is the same as the process PID // For some reason I cannot fathom, this returns "Incorrect function" in docker for windows r1, _, err := procGenerateConsoleCtrlEvent.Call(syscall.CTRL_BREAK_EVENT, uintptr(p.Pid)) From 4e2a07df5aa5f211bacf51367cefd25e7448e082 Mon Sep 17 00:00:00 2001 From: Lachlan Donald Date: Mon, 15 Jul 2019 10:48:03 +1000 Subject: [PATCH 2/3] Support custom signals with cancel-signal --- agent/agent_worker.go | 9 +++++++++ agent/job_runner.go | 16 ++++++++++------ clicommand/agent_start.go | 14 ++++++++++++++ 3 files changed, 33 insertions(+), 6 deletions(-) diff --git a/agent/agent_worker.go b/agent/agent_worker.go index 6f1fdcf4f7..37ae033201 100644 --- a/agent/agent_worker.go +++ b/agent/agent_worker.go @@ -11,6 +11,7 @@ import ( "github.com/buildkite/agent/api" "github.com/buildkite/agent/logger" "github.com/buildkite/agent/metrics" + "github.com/buildkite/agent/process" "github.com/buildkite/agent/proctitle" "github.com/buildkite/agent/retry" ) @@ -19,6 +20,9 @@ type AgentWorkerConfig struct { // Whether to set debug in the job Debug bool + // What signal to use for worker cancellation + CancelSignal process.Signal + // The configuration of the agent from the CLI AgentConfiguration AgentConfiguration } @@ -51,6 +55,9 @@ type AgentWorker struct { // Whether to enable debug debug bool + // The signal to use for cancellation + cancelSig process.Signal + // Stop controls stop chan struct{} stopping bool @@ -71,6 +78,7 @@ func NewAgentWorker(l logger.Logger, a *api.AgentRegisterResponse, m *metrics.Co debug: c.Debug, agentConfiguration: c.AgentConfiguration, stop: make(chan struct{}), + cancelSig: c.CancelSignal, } } @@ -373,6 +381,7 @@ func (a *AgentWorker) AcceptAndRun(job *api.Job) error { // Now that the job has been accepted, we can start it. a.jobRunner, err = NewJobRunner(a.logger, jobMetricsScope, a.agent, accepted, a.apiClient, JobRunnerConfig{ Debug: a.debug, + CancelSignal: a.cancelSig, AgentConfiguration: a.agentConfiguration, }) diff --git a/agent/job_runner.go b/agent/job_runner.go index 9cb1c5edce..14bed6d255 100644 --- a/agent/job_runner.go +++ b/agent/job_runner.go @@ -24,6 +24,9 @@ type JobRunnerConfig struct { // The configuration of the agent from the CLI AgentConfiguration AgentConfiguration + // What signal to use for worker cancellation + CancelSignal process.Signal + // Whether to set debug in the job Debug bool } @@ -181,12 +184,13 @@ func NewJobRunner(l logger.Logger, scope *metrics.Scope, ag *api.AgentRegisterRe // The process that will run the bootstrap script runner.process = process.New(l, process.Config{ - Path: cmd[0], - Args: cmd[1:], - Env: processEnv, - PTY: conf.AgentConfiguration.RunInPty, - Stdout: processWriter, - Stderr: processWriter, + Path: cmd[0], + Args: cmd[1:], + Env: processEnv, + PTY: conf.AgentConfiguration.RunInPty, + Stdout: processWriter, + Stderr: processWriter, + InterruptSignal: conf.CancelSignal, }) // Kick off our callback when the process starts diff --git a/clicommand/agent_start.go b/clicommand/agent_start.go index 23e26845cf..ed7c1d3e32 100644 --- a/clicommand/agent_start.go +++ b/clicommand/agent_start.go @@ -15,6 +15,7 @@ import ( "github.com/buildkite/agent/experiments" "github.com/buildkite/agent/logger" "github.com/buildkite/agent/metrics" + "github.com/buildkite/agent/process" "github.com/buildkite/shellwords" "github.com/urfave/cli" ) @@ -80,6 +81,7 @@ type AgentStartConfig struct { MetricsDatadogHost string `cli:"metrics-datadog-host"` Spawn int `cli:"spawn"` LogFormat string `cli:"log-format"` + CancelSignal string `cli:"cancel-signal"` // Global flags Debug bool `cli:"debug"` @@ -353,6 +355,12 @@ var AgentStartCommand = cli.Command{ Value: 1, EnvVar: "BUILDKITE_AGENT_SPAWN", }, + cli.StringFlag{ + Name: "cancel-signal", + Usage: "The signal to use for cancellation", + EnvVar: "BUILDKITE_CANCEL_SIGNAL", + Value: "SIGTERM", + }, // API Flags AgentRegisterTokenFlag, @@ -590,6 +598,11 @@ var AgentStartCommand = cli.Command{ l.Info("Agents will disconnect after %d seconds of inactivity", agentConf.DisconnectAfterIdleTimeout) } + cancelSig, err := process.ParseSignal(cfg.CancelSignal) + if err != nil { + l.Fatal("Failed to parse cancel-signal: %v", err) + } + // Create the API client client := api.NewClient(l, loadAPIClientConfig(cfg, `Token`)) @@ -630,6 +643,7 @@ var AgentStartCommand = cli.Command{ agent.NewAgentWorker( l.WithFields(logger.StringField(`agent`, ag.Name)), ag, mc, client, agent.AgentWorkerConfig{ AgentConfiguration: agentConf, + CancelSignal: cancelSig, Debug: cfg.Debug, })) } From 039959b22b4af4b7df330879e5247ae130e6d0ef Mon Sep 17 00:00:00 2001 From: Lachlan Donald Date: Mon, 15 Jul 2019 11:00:55 +1000 Subject: [PATCH 3/3] Fix windows process control --- process/signal_windows.go | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/process/signal_windows.go b/process/signal_windows.go index 09f6ccf02c..cdd60cb205 100644 --- a/process/signal_windows.go +++ b/process/signal_windows.go @@ -2,12 +2,9 @@ package process import ( "errors" - "os" "os/exec" "strconv" "syscall" - - "github.com/buildkite/agent/logger" ) // Windows has no concept of parent/child processes or signals. The best we can do @@ -26,24 +23,24 @@ const ( createNewProcessGroupFlag = 0x00000200 ) -func setupProcessGroup(cmd *exec.Cmd) { - cmd.SysProcAttr = &syscall.SysProcAttr{ +func (p *Process) setupProcessGroup() { + p.command.SysProcAttr = &syscall.SysProcAttr{ CreationFlags: syscall.CREATE_UNICODE_ENVIRONMENT | createNewProcessGroupFlag, } } -func terminateProcessGroup(p *os.Process, l logger.Logger) error { - l.Debug("[Process] Terminating process tree with TASKKILL.EXE PID: %d", p.Pid) +func (p *Process) terminateProcessGroup() error { + p.logger.Debug("[Process] Terminating process tree with TASKKILL.EXE PID: %d", p.Pid) // taskkill.exe with /F will call TerminateProcess and hard-kill the process and // anything left in it's process tree. - return exec.Command("CMD", "/C", "TASKKILL.EXE", "/F", "/T", "/PID", strconv.Itoa(p.Pid)).Run() + return exec.Command("CMD", "/C", "TASKKILL.EXE", "/F", "/T", "/PID", strconv.Itoa(p.pid)).Run() } -func interruptProcessGroup(p *os.Process, l logger.Logger) error { +func (p *Process) interruptProcessGroup() error { // Sends a CTRL-BREAK signal to the process group id, which is the same as the process PID // For some reason I cannot fathom, this returns "Incorrect function" in docker for windows - r1, _, err := procGenerateConsoleCtrlEvent.Call(syscall.CTRL_BREAK_EVENT, uintptr(p.Pid)) + r1, _, err := procGenerateConsoleCtrlEvent.Call(syscall.CTRL_BREAK_EVENT, uintptr(p.pid)) if r1 == 0 { return err }