diff --git a/cmd/containerd-shim-runhcs-v1/exec_hcs.go b/cmd/containerd-shim-runhcs-v1/exec_hcs.go index ca444d60c8..ff65ccf27a 100644 --- a/cmd/containerd-shim-runhcs-v1/exec_hcs.go +++ b/cmd/containerd-shim-runhcs-v1/exec_hcs.go @@ -290,7 +290,32 @@ func (he *hcsExec) Kill(ctx context.Context, signal uint32) error { } var delivered bool if supported && options != nil { - delivered, err = he.p.Process.Signal(ctx, options) + if he.isWCOW { + // Servercore images block on signaling and wait until the target process + // is terminated to return to the caller. This causes issues when graceful + // termination of containers is requested (Bug36689012). + // To fix this, we deliver the signal to the target process in a separate background + // thread so that the caller can wait for the desired timeout before sending + // a SIGKILL to the process. + // TODO: We can get rid of these changes once the fix to support graceful termination is + // made in windows. + go func() { + signalDelivered, deliveryErr := he.p.Process.Signal(ctx, options) + + if deliveryErr != nil { + if !hcs.IsAlreadyStopped(deliveryErr) { + // Process is not already stopped and there was a signal delivery error to this process + log.G(ctx).WithField("err", deliveryErr).Errorf("Error in delivering signal %d, to pid: %d", signal, he.pid) + } + } + if !signalDelivered { + log.G(ctx).Errorf("Error: NotFound; exec: '%s' in task: '%s' not found", he.id, he.tid) + } + }() + delivered, err = true, nil + } else { + delivered, err = he.p.Process.Signal(ctx, options) + } } else { // legacy path before signals support OR if WCOW with signals // support needs to issue a terminate. diff --git a/internal/hcs/process.go b/internal/hcs/process.go index 873b829b43..e437e297c0 100644 --- a/internal/hcs/process.go +++ b/internal/hcs/process.go @@ -167,7 +167,39 @@ func (process *Process) Kill(ctx context.Context) (bool, error) { return true, nil } - resultJSON, err := vmcompute.HcsTerminateProcess(ctx, process.handle) + // HCS serializes the signals sent to a target pid per compute system handle. + // To avoid SIGKILL being serialized behind other signals, we open a new compute + // system handle to deliver the kill signal. + // If the calls to opening a new compute system handle fail, we forcefully + // terminate the container itself so that no container is left behind + hcsSystem, err := OpenComputeSystem(ctx, process.system.id) + if err != nil { + // log error and force termination of container + log.G(ctx).WithField("err", err).Error("OpenComputeSystem() call failed") + err = process.system.Terminate(ctx) + // if the Terminate() call itself ever failed, log and return error + if err != nil { + log.G(ctx).WithField("err", err).Error("Terminate() call failed") + return false, err + } + process.system.Close() + return true, nil + } + defer hcsSystem.Close() + + newProcessHandle, err := hcsSystem.OpenProcess(ctx, process.Pid()) + if err != nil { + // Return true only if the target process has either already + // exited, or does not exist. + if IsAlreadyStopped(err) { + return true, nil + } else { + return false, err + } + } + defer newProcessHandle.Close() + + resultJSON, err := vmcompute.HcsTerminateProcess(ctx, newProcessHandle.handle) if err != nil { // We still need to check these two cases, as processes may still be killed by an // external actor (human operator, OOM, random script etc). @@ -191,9 +223,9 @@ func (process *Process) Kill(ctx context.Context) (bool, error) { } } events := processHcsResult(ctx, resultJSON) - delivered, err := process.processSignalResult(ctx, err) + delivered, err := newProcessHandle.processSignalResult(ctx, err) if err != nil { - err = makeProcessError(process, operation, err, events) + err = makeProcessError(newProcessHandle, operation, err, events) } process.killSignalDelivered = delivered diff --git a/internal/hcsoci/hcsdoc_wcow.go b/internal/hcsoci/hcsdoc_wcow.go index 1d25b44438..5e34cd9c05 100644 --- a/internal/hcsoci/hcsdoc_wcow.go +++ b/internal/hcsoci/hcsdoc_wcow.go @@ -7,8 +7,10 @@ import ( "context" "errors" "fmt" + "math" "path/filepath" "regexp" + "strconv" "strings" specs "github.com/opencontainers/runtime-spec/specs-go" @@ -402,6 +404,31 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter dumpPath = specDumpPath } + // Servercore images block on signaling and wait until the target process + // is terminated to return to its caller. By default, servercore waits for + // 5 seconds (default value of 'WaitToKillServiceTimeout') before sending + // a SIGKILL to terminate the process. This causes issues when graceful + // termination of containers is requested (Bug36689012). + // The regkey 'WaitToKillServiceTimeout' value is overridden here to help + // honor graceful termination of containers by waiting for the requested + // amount of time before stopping the container. + // More details on the implementation of this fix can be found in the Kill() + // function of exec_hcs.go + + // 'WaitToKillServiceTimeout' reg key value is arbitrarily chosen and set to a + // value that is long enough that no one will want to wait longer + registryAdd := []hcsschema.RegistryValue{ + { + Key: &hcsschema.RegistryKey{ + Hive: "System", + Name: "ControlSet001\\Control", + }, + Name: "WaitToKillServiceTimeout", + StringValue: strconv.Itoa(math.MaxInt32), + Type_: "String", + }, + } + if dumpPath != "" { dumpType, err := parseDumpType(coi.Spec.Annotations) if err != nil { @@ -410,30 +437,31 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter // Setup WER registry keys for local process dump creation if specified. // https://docs.microsoft.com/en-us/windows/win32/wer/collecting-user-mode-dumps - v2Container.RegistryChanges = &hcsschema.RegistryChanges{ - AddValues: []hcsschema.RegistryValue{ - { - Key: &hcsschema.RegistryKey{ - Hive: "Software", - Name: "Microsoft\\Windows\\Windows Error Reporting\\LocalDumps", - }, - Name: "DumpFolder", - StringValue: dumpPath, - Type_: "String", + registryAdd = append(registryAdd, []hcsschema.RegistryValue{ + { + Key: &hcsschema.RegistryKey{ + Hive: "Software", + Name: "Microsoft\\Windows\\Windows Error Reporting\\LocalDumps", }, - { - Key: &hcsschema.RegistryKey{ - Hive: "Software", - Name: "Microsoft\\Windows\\Windows Error Reporting\\LocalDumps", - }, - Name: "DumpType", - DWordValue: dumpType, - Type_: "DWord", + Name: "DumpFolder", + StringValue: dumpPath, + Type_: "String", + }, + { + Key: &hcsschema.RegistryKey{ + Hive: "Software", + Name: "Microsoft\\Windows\\Windows Error Reporting\\LocalDumps", }, + Name: "DumpType", + DWordValue: dumpType, + Type_: "DWord", }, - } + }...) } + v2Container.RegistryChanges = &hcsschema.RegistryChanges{ + AddValues: registryAdd, + } return v1, v2Container, nil } diff --git a/test/cri-containerd/main_test.go b/test/cri-containerd/main_test.go index 6edfb1ebcd..333a719dd1 100644 --- a/test/cri-containerd/main_test.go +++ b/test/cri-containerd/main_test.go @@ -48,22 +48,24 @@ const ( testVMServiceAddress = "C:\\ContainerPlat\\vmservice.sock" testVMServiceBinary = "C:\\Containerplat\\vmservice.exe" - lcowRuntimeHandler = "runhcs-lcow" - imageLcowK8sPause = "mcr.microsoft.com/oss/kubernetes/pause:3.1" - imageLcowAlpine = "mcr.microsoft.com/mirror/docker/library/alpine:3.16" - imageLcowAlpineCoreDump = "cplatpublic.azurecr.io/stackoverflow-alpine:latest" - imageLcowCosmos = "cosmosarno/spark-master:2.4.1_2019-04-18_8e864ce" - imageLcowCustomUser = "cplatpublic.azurecr.io/linux_custom_user:latest" - imageWindowsProcessDump = "cplatpublic.azurecr.io/crashdump:latest" - imageWindowsArgsEscaped = "cplatpublic.azurecr.io/argsescaped:latest" - imageWindowsTimezone = "cplatpublic.azurecr.io/timezone:latest" - imageJobContainerHNS = "cplatpublic.azurecr.io/jobcontainer_hns:latest" - imageJobContainerETW = "cplatpublic.azurecr.io/jobcontainer_etw:latest" - imageJobContainerVHD = "cplatpublic.azurecr.io/jobcontainer_vhd:latest" - imageJobContainerCmdline = "cplatpublic.azurecr.io/jobcontainer_cmdline:latest" - imageJobContainerWorkDir = "cplatpublic.azurecr.io/jobcontainer_workdir:latest" - alpineAspNet = "mcr.microsoft.com/dotnet/core/aspnet:3.1-alpine3.11" - alpineAspnetUpgrade = "mcr.microsoft.com/dotnet/core/aspnet:3.1.2-alpine3.11" + lcowRuntimeHandler = "runhcs-lcow" + imageLcowK8sPause = "mcr.microsoft.com/oss/kubernetes/pause:3.1" + imageLcowAlpine = "mcr.microsoft.com/mirror/docker/library/alpine:3.16" + imageLcowAlpineCoreDump = "cplatpublic.azurecr.io/stackoverflow-alpine:latest" + imageLcowCosmos = "cosmosarno/spark-master:2.4.1_2019-04-18_8e864ce" + imageLcowCustomUser = "cplatpublic.azurecr.io/linux_custom_user:latest" + imageWindowsProcessDump = "cplatpublic.azurecr.io/crashdump:latest" + imageWindowsArgsEscaped = "cplatpublic.azurecr.io/argsescaped:latest" + imageWindowsTimezone = "cplatpublic.azurecr.io/timezone:latest" + imageJobContainerHNS = "cplatpublic.azurecr.io/jobcontainer_hns:latest" + imageJobContainerETW = "cplatpublic.azurecr.io/jobcontainer_etw:latest" + imageJobContainerVHD = "cplatpublic.azurecr.io/jobcontainer_vhd:latest" + imageJobContainerCmdline = "cplatpublic.azurecr.io/jobcontainer_cmdline:latest" + imageJobContainerWorkDir = "cplatpublic.azurecr.io/jobcontainer_workdir:latest" + alpineAspNet = "mcr.microsoft.com/dotnet/core/aspnet:3.1-alpine3.11" + alpineAspnetUpgrade = "mcr.microsoft.com/dotnet/core/aspnet:3.1.2-alpine3.11" + gracefulTerminationServercore = "cplatpublic.azurecr.io/servercore-gracefultermination-repro:latest" + gracefulTerminationNanoserver = "cplatpublic.azurecr.io/nanoserver-gracefultermination-repro:latest" // Default account name for use with GMSA related tests. This will not be // present/you will not have access to the account on your machine unless // your environment is configured properly. diff --git a/test/cri-containerd/stopcontainer_test.go b/test/cri-containerd/stopcontainer_test.go index 3a582877a6..c5cd692da4 100644 --- a/test/cri-containerd/stopcontainer_test.go +++ b/test/cri-containerd/stopcontainer_test.go @@ -6,6 +6,7 @@ package cri_containerd import ( "context" "testing" + "time" runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" ) @@ -170,3 +171,78 @@ func Test_StopContainer_ReusePod_LCOW(t *testing.T) { containerID = createContainer(t, client, ctx, request) runContainerLifetime(t, client, ctx, containerID) } + +// This test runs a container with an image that waits for sigterm and then +// prints for loop counter down from 60 till the container is stopped with +// a timeout of 15 seconds. This is done to mimic graceful termination +// behavior and to ensure that the containers are killed only after 15 second +// timeout specified via the stop container command. +func Test_GracefulTermination(t *testing.T) { + for name, tc := range map[string]struct { + features []string + runtimeHandler string + image string + }{ + "WCOWProcessNanoserver": { + features: []string{featureWCOWProcess}, + runtimeHandler: wcowProcessRuntimeHandler, + image: gracefulTerminationNanoserver, + }, + "WCOWProcessServercore": { + features: []string{featureWCOWProcess}, + runtimeHandler: wcowProcessRuntimeHandler, + image: gracefulTerminationServercore, + }, + "WCOWHypervisorNanoserver": { + features: []string{featureWCOWHypervisor}, + runtimeHandler: wcowHypervisorRuntimeHandler, + image: gracefulTerminationNanoserver, + }, + "WCOWHypervisorServercore": { + features: []string{featureWCOWHypervisor}, + runtimeHandler: wcowHypervisorRuntimeHandler, + image: gracefulTerminationServercore, + }, + } { + t.Run(name, func(t *testing.T) { + requireFeatures(t, tc.features...) + pullRequiredImages(t, []string{tc.image}) + client := newTestRuntimeClient(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + sandboxRequest := getRunPodSandboxRequest(t, tc.runtimeHandler) + podID := runPodSandbox(t, client, ctx, sandboxRequest) + defer removePodSandbox(t, client, ctx, podID) + defer stopPodSandbox(t, client, ctx, podID) + request := &runtime.CreateContainerRequest{ + PodSandboxId: podID, + Config: &runtime.ContainerConfig{ + Metadata: &runtime.ContainerMetadata{}, + Image: &runtime.ImageSpec{ + Image: tc.image, + }, + }, + SandboxConfig: sandboxRequest.Config, + } + containerID := createContainer(t, client, ctx, request) + defer removeContainer(t, client, ctx, containerID) + + startContainer(t, client, ctx, containerID) + // Wait few seconds for the container to be completely initialized + time.Sleep(5 * time.Second) + assertContainerState(t, client, ctx, containerID, runtime.ContainerState_CONTAINER_RUNNING) + + startTimeOfContainer := time.Now() + // stop container with timeout of 15 seconds + stopContainerWithTimeout(t, client, ctx, containerID, 15) + assertContainerState(t, client, ctx, containerID, runtime.ContainerState_CONTAINER_EXITED) + // get time elapsed before and after container stop command was issued + elapsedTime := time.Since(startTimeOfContainer) + // Ensure that the container has stopped after approx 15 seconds. + // We are giving it a buffer of +/- 1 second + if elapsedTime < 14*time.Second || elapsedTime > 16*time.Second { + t.Fatalf("Container did not shutdown gracefully \n") + } + }) + } +} diff --git a/test/cri-containerd/test-images/nanoserver-gracefultermination-repro/latest/Dockerfile b/test/cri-containerd/test-images/nanoserver-gracefultermination-repro/latest/Dockerfile new file mode 100644 index 0000000000..336e9c65e1 --- /dev/null +++ b/test/cri-containerd/test-images/nanoserver-gracefultermination-repro/latest/Dockerfile @@ -0,0 +1,12 @@ +FROM golang:latest as build +ENV GOOS=windows +ENV GOARCH=amd64 +ENV GO111MODULE=off +WORKDIR /app +COPY ./delayed-shutdown.go ./ +RUN go build -o delayed-shutdown.exe + +FROM mcr.microsoft.com/windows/nanoserver:ltsc2022 +WORKDIR /app +COPY --from=build /app/delayed-shutdown.exe . +ENTRYPOINT ["delayed-shutdown.exe"] diff --git a/test/cri-containerd/test-images/nanoserver-gracefultermination-repro/latest/delayed-shutdown.go b/test/cri-containerd/test-images/nanoserver-gracefultermination-repro/latest/delayed-shutdown.go new file mode 100644 index 0000000000..0c5719c19c --- /dev/null +++ b/test/cri-containerd/test-images/nanoserver-gracefultermination-repro/latest/delayed-shutdown.go @@ -0,0 +1,45 @@ +package main + +import ( + "fmt" + "os" + "os/signal" + "syscall" + "time" +) + +func main() { + fmt.Println("Waiting for OS signal...") + + signalChannel := make(chan os.Signal) + wait := make(chan int, 1) + + signal.Notify(signalChannel, syscall.SIGHUP) + signal.Notify(signalChannel, syscall.SIGINT) + signal.Notify(signalChannel, syscall.SIGTERM) + + go func() { + sig := <-signalChannel + switch sig { + case syscall.SIGHUP: + fmt.Println("SIGHUP") + wait <- 1 + case syscall.SIGTERM: + fmt.Println("SIGTERM") + wait <- 1 + case syscall.SIGINT: + fmt.Println("SIGINT") + wait <- 1 + } + }() + + <-wait + + fmt.Println("Exiting in 60 seconds...") + for i := 60; i > 0; i-- { + fmt.Printf("%d\n", i) + time.Sleep(1 * time.Second) + } + + fmt.Println("Goodbye") +} \ No newline at end of file diff --git a/test/cri-containerd/test-images/servercore-gracefultermination-repro/latest/Dockerfile b/test/cri-containerd/test-images/servercore-gracefultermination-repro/latest/Dockerfile new file mode 100644 index 0000000000..7fc6cd03d5 --- /dev/null +++ b/test/cri-containerd/test-images/servercore-gracefultermination-repro/latest/Dockerfile @@ -0,0 +1,12 @@ +FROM golang:latest as build +ENV GOOS=windows +ENV GOARCH=amd64 +ENV GO111MODULE=off +WORKDIR /app +COPY ./delayed-shutdown.go ./ +RUN go build -o delayed-shutdown.exe + +FROM mcr.microsoft.com/windows/servercore:ltsc2022 +WORKDIR /app +COPY --from=build /app/delayed-shutdown.exe . +ENTRYPOINT ["delayed-shutdown.exe"] diff --git a/test/cri-containerd/test-images/servercore-gracefultermination-repro/latest/delayed-shutdown.go b/test/cri-containerd/test-images/servercore-gracefultermination-repro/latest/delayed-shutdown.go new file mode 100644 index 0000000000..0c5719c19c --- /dev/null +++ b/test/cri-containerd/test-images/servercore-gracefultermination-repro/latest/delayed-shutdown.go @@ -0,0 +1,45 @@ +package main + +import ( + "fmt" + "os" + "os/signal" + "syscall" + "time" +) + +func main() { + fmt.Println("Waiting for OS signal...") + + signalChannel := make(chan os.Signal) + wait := make(chan int, 1) + + signal.Notify(signalChannel, syscall.SIGHUP) + signal.Notify(signalChannel, syscall.SIGINT) + signal.Notify(signalChannel, syscall.SIGTERM) + + go func() { + sig := <-signalChannel + switch sig { + case syscall.SIGHUP: + fmt.Println("SIGHUP") + wait <- 1 + case syscall.SIGTERM: + fmt.Println("SIGTERM") + wait <- 1 + case syscall.SIGINT: + fmt.Println("SIGINT") + wait <- 1 + } + }() + + <-wait + + fmt.Println("Exiting in 60 seconds...") + for i := 60; i > 0; i-- { + fmt.Printf("%d\n", i) + time.Sleep(1 * time.Second) + } + + fmt.Println("Goodbye") +} \ No newline at end of file