Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

runc kill: add support for cgroup.kill #3825

Merged
merged 7 commits into from
Jun 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ import (
)

func killContainer(container *libcontainer.Container) error {
_ = container.Signal(unix.SIGKILL, false)
_ = container.Signal(unix.SIGKILL)
for i := 0; i < 100; i++ {
time.Sleep(100 * time.Millisecond)
cyphar marked this conversation as resolved.
Show resolved Hide resolved
if err := container.Signal(unix.Signal(0), false); err != nil {
if err := container.Signal(unix.Signal(0)); err != nil {
destroy(container)
return nil
}
Expand Down
13 changes: 10 additions & 3 deletions kill.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
package main

import (
"errors"
"fmt"
"strconv"
"strings"

"github.com/opencontainers/runc/libcontainer"
"github.com/urfave/cli"
"golang.org/x/sys/unix"
)
Expand All @@ -24,8 +26,9 @@ signal to the init process of the "ubuntu01" container:
# runc kill ubuntu01 KILL`,
Flags: []cli.Flag{
cli.BoolFlag{
Name: "all, a",
Usage: "send the specified signal to all processes inside the container",
Name: "all, a",
Usage: "(obsoleted, do not use)",
Hidden: true,
},
},
Action: func(context *cli.Context) error {
Expand All @@ -49,7 +52,11 @@ signal to the init process of the "ubuntu01" container:
if err != nil {
return err
}
return container.Signal(signal, context.Bool("all"))
err = container.Signal(signal)
if errors.Is(err, libcontainer.ErrNotRunning) && context.Bool("all") {
err = nil
}
return err
},
}

Expand Down
12 changes: 12 additions & 0 deletions libcontainer/configs/namespaces_syscall.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,15 @@ func (n *Namespaces) CloneFlags() uintptr {
}
return uintptr(flag)
}

// IsPrivate tells whether the namespace of type t is configured as private
// (i.e. it exists and is not shared).
func (n Namespaces) IsPrivate(t NamespaceType) bool {
for _, v := range n {
if v.Type == t {
return v.Path == ""
}
}
// Not found, so implicitly sharing a parent namespace.
return false
}
63 changes: 35 additions & 28 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -357,37 +357,47 @@ func (c *Container) start(process *Process) (retErr error) {
return nil
}

func (c *Container) Signal(s os.Signal, all bool) error {
// Signal sends a specified signal to container's init.
//
// When s is SIGKILL and the container does not have its own PID namespace, all
// the container's processes are killed. In this scenario, the libcontainer
// user may be required to implement a proper child reaper.
func (c *Container) Signal(s os.Signal) error {
c.m.Lock()
defer c.m.Unlock()
status, err := c.currentStatus()
if err != nil {
return err
}
if all {
if status == Stopped && !c.cgroupManager.Exists() {
// Avoid calling signalAllProcesses which may print
// a warning trying to freeze a non-existing cgroup.
return nil
}
return c.ignoreCgroupError(signalAllProcesses(c.cgroupManager, s))
}
// to avoid a PID reuse attack
if status == Running || status == Created || status == Paused {
if err := c.initProcess.signal(s); err != nil {
return fmt.Errorf("unable to signal init: %w", err)
}
if status == Paused {
// For cgroup v1, killing a process in a frozen cgroup
// does nothing until it's thawed. Only thaw the cgroup
// for SIGKILL.
if s, ok := s.(unix.Signal); ok && s == unix.SIGKILL {
_ = c.cgroupManager.Freeze(configs.Thawed)
}
}
return nil
// To avoid a PID reuse attack, don't kill non-running container.
switch status {
case Running, Created, Paused:
default:
return ErrNotRunning
}
return ErrNotRunning

// When a container has its own PID namespace, inside it the init PID
// is 1, and thus it is handled specially by the kernel. In particular,
// killing init with SIGKILL from an ancestor namespace will also kill
// all other processes in that PID namespace (see pid_namespaces(7)).
//
// OTOH, if PID namespace is shared, we should kill all pids to avoid
// leftover processes.
if s == unix.SIGKILL && !c.config.Namespaces.IsPrivate(configs.NEWPID) {
err = signalAllProcesses(c.cgroupManager, unix.SIGKILL)
} else {
err = c.initProcess.signal(s)
}
if err != nil {
return fmt.Errorf("unable to signal init: %w", err)
}
if status == Paused && s == unix.SIGKILL {
// For cgroup v1, killing a process in a frozen cgroup
// does nothing until it's thawed. Only thaw the cgroup
// for SIGKILL.
_ = c.cgroupManager.Freeze(configs.Thawed)
}
return nil
}

func (c *Container) createExecFifo() error {
Expand Down Expand Up @@ -539,7 +549,6 @@ func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, l
nsMaps[ns.Type] = ns.Path
}
}
_, sharePidns := nsMaps[configs.NEWPID]
data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps, initStandard)
if err != nil {
return nil, err
Expand Down Expand Up @@ -584,7 +593,6 @@ func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, l
container: c,
process: p,
bootstrapData: data,
sharePidns: sharePidns,
}
c.initProcess = init
return init, nil
Expand Down Expand Up @@ -687,8 +695,7 @@ func (c *Container) newInitConfig(process *Process) *initConfig {
return cfg
}

// Destroy destroys the container, if its in a valid state, after killing any
// remaining running processes.
// Destroy destroys the container, if its in a valid state.
//
// Any event registrations are removed before the container is destroyed.
// No error is returned if the container is already destroyed.
Expand Down
91 changes: 15 additions & 76 deletions libcontainer/init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (
"runtime/debug"
"strconv"
"strings"
"unsafe"

"github.com/containerd/console"
"github.com/opencontainers/runtime-spec/specs-go"
Expand Down Expand Up @@ -575,39 +574,20 @@ func setupRlimits(limits []configs.Rlimit, pid int) error {
return nil
}

const _P_PID = 1

//nolint:structcheck,unused
type siginfo struct {
si_signo int32
si_errno int32
si_code int32
// below here is a union; si_pid is the only field we use
si_pid int32
// Pad to 128 bytes as detailed in blockUntilWaitable
pad [96]byte
}

// isWaitable returns true if the process has exited false otherwise.
// Its based off blockUntilWaitable in src/os/wait_waitid.go
func isWaitable(pid int) (bool, error) {
si := &siginfo{}
_, _, e := unix.Syscall6(unix.SYS_WAITID, _P_PID, uintptr(pid), uintptr(unsafe.Pointer(si)), unix.WEXITED|unix.WNOWAIT|unix.WNOHANG, 0, 0)
if e != 0 {
return false, &os.SyscallError{Syscall: "waitid", Err: e}
}

return si.si_pid != 0, nil
}

// signalAllProcesses freezes then iterates over all the processes inside the
// manager's cgroups sending the signal s to them.
// If s is SIGKILL and subreaper is not enabled then it will wait for each
// process to exit.
// For all other signals it will check if the process is ready to report its
// exit status and only if it is will a wait be performed.
func signalAllProcesses(m cgroups.Manager, s os.Signal) error {
var procs []*os.Process
func signalAllProcesses(m cgroups.Manager, s unix.Signal) error {
// Use cgroup.kill, if available.
if s == unix.SIGKILL {
if p := m.Path(""); p != "" { // Either cgroup v2 or hybrid.
err := cgroups.WriteFile(p, "cgroup.kill", "1")
if err == nil || !errors.Is(err, os.ErrNotExist) {
return err
}
// Fallback to old implementation.
}
}

if err := m.Freeze(configs.Frozen); err != nil {
logrus.Warn(err)
}
Expand All @@ -619,55 +599,14 @@ func signalAllProcesses(m cgroups.Manager, s os.Signal) error {
return err
}
for _, pid := range pids {
p, err := os.FindProcess(pid)
if err != nil {
logrus.Warn(err)
continue
}
procs = append(procs, p)
if err := p.Signal(s); err != nil {
logrus.Warn(err)
err := unix.Kill(pid, s)
if err != nil && err != unix.ESRCH { //nolint:errorlint // unix errors are bare
logrus.Warnf("kill %d: %v", pid, err)
}
}
if err := m.Freeze(configs.Thawed); err != nil {
logrus.Warn(err)
}

subreaper, err := system.GetSubreaper()
if err != nil {
// The error here means that PR_GET_CHILD_SUBREAPER is not
// supported because this code might run on a kernel older
// than 3.4. We don't want to throw an error in that case,
// and we simplify things, considering there is no subreaper
// set.
subreaper = 0
}

for _, p := range procs {
if s != unix.SIGKILL {
if ok, err := isWaitable(p.Pid); err != nil {
if !errors.Is(err, unix.ECHILD) {
logrus.Warn("signalAllProcesses: ", p.Pid, err)
}
continue
} else if !ok {
// Not ready to report so don't wait
continue
}
}

// In case a subreaper has been setup, this code must not
// wait for the process. Otherwise, we cannot be sure the
// current process will be reaped by the subreaper, while
// the subreaper might be waiting for this process in order
// to retrieve its exit code.
if subreaper == 0 {
if _, err := p.Wait(); err != nil {
if !errors.Is(err, unix.ECHILD) {
logrus.Warn("wait: ", err)
}
}
}
}
cyphar marked this conversation as resolved.
Show resolved Hide resolved
return nil
}
37 changes: 24 additions & 13 deletions libcontainer/integration/exec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1355,16 +1355,26 @@ func TestPIDHost(t *testing.T) {
}
}

func TestPIDHostInitProcessWait(t *testing.T) {
func TestHostPidnsInitKill(t *testing.T) {
config := newTemplateConfig(t, nil)
// Implicitly use host pid ns.
config.Namespaces.Remove(configs.NEWPID)
testPidnsInitKill(t, config)
}

func TestSharedPidnsInitKill(t *testing.T) {
config := newTemplateConfig(t, nil)
// Explicitly use host pid ns.
config.Namespaces.Add(configs.NEWPID, "/proc/1/ns/pid")
testPidnsInitKill(t, config)
}

func testPidnsInitKill(t *testing.T, config *configs.Config) {
if testing.Short() {
return
}

pidns := "/proc/1/ns/pid"

// Run a container with two long-running processes.
config := newTemplateConfig(t, nil)
config.Namespaces.Add(configs.NEWPID, pidns)
container, err := newContainer(t, config)
ok(t, err)
defer func() {
Expand All @@ -1373,7 +1383,7 @@ func TestPIDHostInitProcessWait(t *testing.T) {

process1 := &libcontainer.Process{
Cwd: "/",
Args: []string{"sleep", "100"},
Args: []string{"sleep", "1h"},
Env: standardEnvironment,
Init: true,
}
Expand All @@ -1382,25 +1392,26 @@ func TestPIDHostInitProcessWait(t *testing.T) {

process2 := &libcontainer.Process{
Cwd: "/",
Args: []string{"sleep", "100"},
Args: []string{"sleep", "1h"},
Env: standardEnvironment,
Init: false,
}
err = container.Run(process2)
ok(t, err)

// Kill the init process and Wait for it.
err = process1.Signal(syscall.SIGKILL)
// Kill the container.
err = container.Signal(syscall.SIGKILL)
ok(t, err)
_, err = process1.Wait()
if err == nil {
t.Fatal("expected Wait to indicate failure")
}

// The non-init process must've been killed.
err = process2.Signal(syscall.Signal(0))
if err == nil || err.Error() != "no such process" {
t.Fatalf("expected process to have been killed: %v", err)
// The non-init process must've also been killed. If not,
// the test will time out.
_, err = process2.Wait()
if err == nil {
t.Fatal("expected Wait to indicate failure")
}
}

Expand Down
5 changes: 0 additions & 5 deletions libcontainer/process_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,6 @@ type initProcess struct {
fds []string
process *Process
bootstrapData io.Reader
sharePidns bool
}

func (p *initProcess) pid() int {
Expand Down Expand Up @@ -616,10 +615,6 @@ func (p *initProcess) start() (retErr error) {

func (p *initProcess) wait() (*os.ProcessState, error) {
err := p.cmd.Wait()
// we should kill all processes in cgroup when init is died if we use host PID namespace
if p.sharePidns {
_ = signalAllProcesses(p.manager, unix.SIGKILL)
}
return p.cmd.ProcessState, err
}

Expand Down
7 changes: 0 additions & 7 deletions libcontainer/state_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import (

"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)

Expand Down Expand Up @@ -36,12 +35,6 @@ type containerState interface {
}

func destroy(c *Container) error {
if !c.config.Namespaces.Contains(configs.NEWPID) ||
c.config.Namespaces.PathOf(configs.NEWPID) != "" {
if err := signalAllProcesses(c.cgroupManager, unix.SIGKILL); err != nil {
logrus.Warn(err)
}
}
cyphar marked this conversation as resolved.
Show resolved Hide resolved
err := c.cgroupManager.Destroy()
if c.intelRdtManager != nil {
if ierr := c.intelRdtManager.Destroy(); err == nil {
Expand Down
Loading