From d3fd53ce4a66b798f61b93182085f86fbbd303e1 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 11 Apr 2023 17:42:25 -0700 Subject: [PATCH] runc kill: add support for cgroup.kill cgroup.kill inteface was added to Linux kernel 5.14 (see [1], [2]). Use it if we can. [1] https://lwn.net/Articles/855049/ [2] https://lwn.net/Articles/855924/ Signed-off-by: Kir Kolyshkin --- libcontainer/signal_all_linux.go | 108 ++++++++++++++++++++++++++----- 1 file changed, 93 insertions(+), 15 deletions(-) diff --git a/libcontainer/signal_all_linux.go b/libcontainer/signal_all_linux.go index 204fb13f1f1..171f433c758 100644 --- a/libcontainer/signal_all_linux.go +++ b/libcontainer/signal_all_linux.go @@ -2,6 +2,7 @@ package libcontainer import ( "errors" + "fmt" "os" "unsafe" @@ -34,36 +35,57 @@ func isWaitable(pid int) (bool, error) { return si.pid != 0, nil } -// signalAllProcesses freezes then iterates over all the processes inside the -// manager's cgroups sending the signal s to them. -// If s is SIGKILL and subreaper is not enabled then it will wait for each -// process to exit. -// For all other signals it will check if the process is ready to report its -// exit status and only if it is will a wait be performed. +// signalAllProcesses sends signal to all the process inside the manager's cgroup. +// In case the signal is SIGKILL, and cgroup.kill is available, it is used. Otherwise, +// the cgroup is frozen, then the signal is sent to all the processes one by one. +// +// If s is SIGKILL and subreaper is not enabled, this function waits for each +// process to exit. For all other signals it will check if the process is ready +// to report its exit status and only if it is will a wait be performed. func signalAllProcesses(m cgroups.Manager, s os.Signal) error { sig, ok := s.(unix.Signal) if !ok { return errors.New("unsupported signal type") } - if err := m.Freeze(configs.Frozen); err != nil { - logrus.Warn(err) + haveCgroupKill := false + + // Use cgroup.kill, if available. + if s == unix.SIGKILL { + if p := m.Path(""); p != "" { // Either cgroup v2 or hybrid. + if err := cgroupKillAll(p); err == nil { + haveCgroupKill = true + } else if !errors.Is(err, unix.ENOENT) { + logrus.Warnf("cgroupKillAll: %v", err) + } + } + } + + if !haveCgroupKill { + if err := m.Freeze(configs.Frozen); err != nil { + logrus.Warn(err) + } } + pids, err := m.GetAllPids() if err != nil { - if err := m.Freeze(configs.Thawed); err != nil { - logrus.Warn(err) + if !haveCgroupKill { + if err := m.Freeze(configs.Thawed); err != nil { + logrus.Warn(err) + } } return err } - for _, pid := range pids { - if err := unix.Kill(pid, sig); err != nil && err != unix.ESRCH { //nolint:errorlint // unix errors are bare + if !haveCgroupKill { + for _, pid := range pids { + if err := unix.Kill(pid, sig); err != nil && err != unix.ESRCH { //nolint:errorlint // unix errors are bare + logrus.Warn(err) + } + } + if err := m.Freeze(configs.Thawed); err != nil { logrus.Warn(err) } } - if err := m.Freeze(configs.Thawed); err != nil { - logrus.Warn(err) - } subreaper, err := system.GetSubreaper() if err != nil { @@ -103,3 +125,59 @@ func signalAllProcesses(m cgroups.Manager, s os.Signal) error { } return nil } + +func prepareCgWait(dir string) (int, error) { + fd, err := unix.InotifyInit() + if err != nil { + return -1, fmt.Errorf("unable to init inotify: %w", err) + } + _, err = unix.InotifyAddWatch(fd, dir+"/cgroup.events", unix.IN_MODIFY) + if err != nil { + unix.Close(fd) + return -1, fmt.Errorf("unable to add inotify watch: %w", err) + } + return fd, nil +} + +func cgWait(fd int) error { + fds := []unix.PollFd{{ + Fd: int32(fd), + Events: unix.POLLIN, + }} + for { + res, err := unix.Poll(fds, 10000) + if err == unix.EINTR { //nolint:errorlint // unix errors are bare + + continue + } + if err != nil { + return &os.SyscallError{Syscall: "poll", Err: err} + } + if res == 0 { // Timeout. + return &os.SyscallError{Syscall: "poll", Err: unix.ETIMEDOUT} + } + if res > 0 && fds[0].Revents&unix.POLLIN != 0 { + return nil + } + } +} + +func cgroupKillAll(path string) error { + const file = "cgroup.kill" + if err := unix.Access(path+"/"+file, unix.F_OK); err != nil { + return &os.PathError{Op: "access", Path: path + "/" + file, Err: err} + } + + fd, err := prepareCgWait(path) + if err != nil { + return err + } + + err = cgroups.WriteFile(path, file, "1") + if err == nil { + err = cgWait(fd) + } + _ = unix.Close(fd) + + return err +}