Skip to content

Commit

Permalink
wcow: support graceful termination of servercore containers (#1416) (#…
Browse files Browse the repository at this point in the history
…1640)

* This commit includes the changes to enable graceful termination of WCOW containers

(cherry picked from commit 5cfbc2a)

Signed-off-by: Kirtana Ashok <[email protected]>
Co-authored-by: Kirtana Ashok <[email protected]>
  • Loading branch information
kiashok and Kirtana Ashok authored Feb 21, 2023
1 parent f985798 commit 27bb92f
Show file tree
Hide file tree
Showing 11 changed files with 394 additions and 47 deletions.
30 changes: 28 additions & 2 deletions cmd/containerd-shim-runhcs-v1/exec_hcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"github.com/Microsoft/hcsshim/internal/cmd"
"github.com/Microsoft/hcsshim/internal/cow"
"github.com/Microsoft/hcsshim/internal/guestrequest"
"github.com/Microsoft/hcsshim/internal/hcs"
"github.com/Microsoft/hcsshim/internal/log"
"github.com/Microsoft/hcsshim/internal/signals"
"github.com/Microsoft/hcsshim/internal/uvm"
Expand Down Expand Up @@ -285,7 +286,32 @@ func (he *hcsExec) Kill(ctx context.Context, signal uint32) error {
}
var delivered bool
if supported && options != nil {
delivered, err = he.p.Process.Signal(ctx, options)
if he.isWCOW {
// Servercore images block on signaling and wait until the target process
// is terminated to return to the caller. This causes issues when graceful
// termination of containers is requested (Bug36689012).
// To fix this, we deliver the signal to the target process in a separate background
// thread so that the caller can wait for the desired timeout before sending
// a SIGKILL to the process.
// TODO: We can get rid of these changes once the fix to support graceful termination is
// made in windows.
go func() {
signalDelivered, deliveryErr := he.p.Process.Signal(ctx, options)

if deliveryErr != nil {
if !hcs.IsAlreadyStopped(deliveryErr) {
// Process is not already stopped and there was a signal delivery error to this process
log.G(ctx).WithField("err", deliveryErr).Errorf("Error in delivering signal %d, to pid: %d", signal, he.pid)
}
}
if !signalDelivered {
log.G(ctx).Errorf("Error: NotFound; exec: '%s' in task: '%s' not found", he.id, he.tid)
}
}()
delivered, err = true, nil
} else {
delivered, err = he.p.Process.Signal(ctx, options)
}
} else {
// legacy path before signals support OR if WCOW with signals
// support needs to issue a terminate.
Expand Down Expand Up @@ -356,7 +382,7 @@ func (he *hcsExec) ForceExit(ctx context.Context, status int) {
// To transition for a created state the following must be done:
//
// 1. Issue `he.processDoneCancel` to unblock the goroutine
// `he.waitForContainerExit()``.
// `he.waitForContainerExit().
//
// 2. Set `he.state`, `he.exitStatus` and `he.exitedAt` to the exited values.
//
Expand Down
38 changes: 35 additions & 3 deletions internal/hcs/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,39 @@ func (process *Process) Kill(ctx context.Context) (bool, error) {
return true, nil
}

resultJSON, err := vmcompute.HcsTerminateProcess(ctx, process.handle)
// HCS serializes the signals sent to a target pid per compute system handle.
// To avoid SIGKILL being serialized behind other signals, we open a new compute
// system handle to deliver the kill signal.
// If the calls to opening a new compute system handle fail, we forcefully
// terminate the container itself so that no container is left behind
hcsSystem, err := OpenComputeSystem(ctx, process.system.id)
if err != nil {
// log error and force termination of container
log.G(ctx).WithField("err", err).Error("OpenComputeSystem() call failed")
err = process.system.Terminate(ctx)
// if the Terminate() call itself ever failed, log and return error
if err != nil {
log.G(ctx).WithField("err", err).Error("Terminate() call failed")
return false, err
}
process.system.Close()
return true, nil
}
defer hcsSystem.Close()

newProcessHandle, err := hcsSystem.OpenProcess(ctx, process.Pid())
if err != nil {
// Return true only if the target process has either already
// exited, or does not exist.
if IsAlreadyStopped(err) {
return true, nil
} else {
return false, err
}
}
defer newProcessHandle.Close()

resultJSON, err := vmcompute.HcsTerminateProcess(ctx, newProcessHandle.handle)
if err != nil {
// We still need to check these two cases, as processes may still be killed by an
// external actor (human operator, OOM, random script etc).
Expand All @@ -185,9 +217,9 @@ func (process *Process) Kill(ctx context.Context) (bool, error) {
}
}
events := processHcsResult(ctx, resultJSON)
delivered, err := process.processSignalResult(ctx, err)
delivered, err := newProcessHandle.processSignalResult(ctx, err)
if err != nil {
err = makeProcessError(process, operation, err, events)
err = makeProcessError(newProcessHandle, operation, err, events)
}

process.killSignalDelivered = delivered
Expand Down
66 changes: 47 additions & 19 deletions internal/hcsoci/hcsdoc_wcow.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@ import (
"context"
"errors"
"fmt"
"math"
"path/filepath"
"regexp"
"strconv"
"strings"

"github.com/Microsoft/hcsshim/internal/hcs/schema1"
Expand Down Expand Up @@ -399,6 +401,31 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter
dumpPath = specDumpPath
}

// Servercore images block on signaling and wait until the target process
// is terminated to return to its caller. By default, servercore waits for
// 5 seconds (default value of 'WaitToKillServiceTimeout') before sending
// a SIGKILL to terminate the process. This causes issues when graceful
// termination of containers is requested (Bug36689012).
// The regkey 'WaitToKillServiceTimeout' value is overridden here to help
// honor graceful termination of containers by waiting for the requested
// amount of time before stopping the container.
// More details on the implementation of this fix can be found in the Kill()
// function of exec_hcs.go

// 'WaitToKillServiceTimeout' reg key value is arbitrarily chosen and set to a
// value that is long enough that no one will want to wait longer
registryAdd := []hcsschema.RegistryValue{
{
Key: &hcsschema.RegistryKey{
Hive: "System",
Name: "ControlSet001\\Control",
},
Name: "WaitToKillServiceTimeout",
StringValue: strconv.Itoa(math.MaxInt32),
Type_: "String",
},
}

if dumpPath != "" {
dumpType, err := parseDumpType(coi.Spec.Annotations)
if err != nil {
Expand All @@ -407,30 +434,31 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter

// Setup WER registry keys for local process dump creation if specified.
// https://docs.microsoft.com/en-us/windows/win32/wer/collecting-user-mode-dumps
v2Container.RegistryChanges = &hcsschema.RegistryChanges{
AddValues: []hcsschema.RegistryValue{
{
Key: &hcsschema.RegistryKey{
Hive: "Software",
Name: "Microsoft\\Windows\\Windows Error Reporting\\LocalDumps",
},
Name: "DumpFolder",
StringValue: dumpPath,
Type_: "String",
registryAdd = append(registryAdd, []hcsschema.RegistryValue{
{
Key: &hcsschema.RegistryKey{
Hive: "Software",
Name: "Microsoft\\Windows\\Windows Error Reporting\\LocalDumps",
},
{
Key: &hcsschema.RegistryKey{
Hive: "Software",
Name: "Microsoft\\Windows\\Windows Error Reporting\\LocalDumps",
},
Name: "DumpType",
DWordValue: dumpType,
Type_: "DWord",
Name: "DumpFolder",
StringValue: dumpPath,
Type_: "String",
},
{
Key: &hcsschema.RegistryKey{
Hive: "Software",
Name: "Microsoft\\Windows\\Windows Error Reporting\\LocalDumps",
},
Name: "DumpType",
DWordValue: dumpType,
Type_: "DWord",
},
}
}...)
}

v2Container.RegistryChanges = &hcsschema.RegistryChanges{
AddValues: registryAdd,
}
return v1, v2Container, nil
}

Expand Down
4 changes: 3 additions & 1 deletion test/cri-containerd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,9 @@ const (
// Default account name for use with GMSA related tests. This will not be
// present/you will not have access to the account on your machine unless
// your environment is configured properly.
gmsaAccount = "cplat"
gmsaAccount = "cplat"
gracefulTerminationServercore = "cplatpublic.azurecr.io/servercore-gracefultermination-repro:latest"
gracefulTerminationNanoserver = "cplatpublic.azurecr.io/nanoserver-gracefultermination-repro:latest"
)

// Image definitions
Expand Down
85 changes: 85 additions & 0 deletions test/cri-containerd/stopcontainer_test.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
//go:build functional
// +build functional

package cri_containerd

import (
"context"
"testing"
"time"

runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
)
Expand Down Expand Up @@ -169,3 +171,86 @@ func Test_StopContainer_ReusePod_LCOW(t *testing.T) {
containerID = createContainer(t, client, ctx, request)
runContainerLifetime(t, client, ctx, containerID)
}

// Helper function to check container state
func assertContainerState(t *testing.T, client runtime.RuntimeServiceClient, ctx context.Context, containerID string, state runtime.ContainerState) {
t.Helper()
if st := getContainerStatus(t, client, ctx, containerID); st != state {
t.Fatalf("got container %q state %q; wanted %v", containerID, st.String(), state.String())
}
}

// This test runs a container with an image that waits for sigterm and then
// prints for loop counter down from 60 till the container is stopped with
// a timeout of 15 seconds. This is done to mimic graceful termination
// behavior and to ensure that the containers are killed only after 15 second
// timeout specified via the stop container command.
func Test_GracefulTermination(t *testing.T) {
for name, tc := range map[string]struct {
features []string
runtimeHandler string
image string
}{
"WCOWProcessNanoserver": {
features: []string{featureWCOWProcess},
runtimeHandler: wcowProcessRuntimeHandler,
image: gracefulTerminationNanoserver,
},
"WCOWProcessServercore": {
features: []string{featureWCOWProcess},
runtimeHandler: wcowProcessRuntimeHandler,
image: gracefulTerminationServercore,
},
"WCOWHypervisorNanoserver": {
features: []string{featureWCOWHypervisor},
runtimeHandler: wcowHypervisorRuntimeHandler,
image: gracefulTerminationNanoserver,
},
"WCOWHypervisorServercore": {
features: []string{featureWCOWHypervisor},
runtimeHandler: wcowHypervisorRuntimeHandler,
image: gracefulTerminationServercore,
},
} {
t.Run(name, func(t *testing.T) {
requireFeatures(t, tc.features...)
pullRequiredImages(t, []string{tc.image})
client := newTestRuntimeClient(t)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
sandboxRequest := getRunPodSandboxRequest(t, tc.runtimeHandler)
podID := runPodSandbox(t, client, ctx, sandboxRequest)
defer removePodSandbox(t, client, ctx, podID)
defer stopPodSandbox(t, client, ctx, podID)
request := &runtime.CreateContainerRequest{
PodSandboxId: podID,
Config: &runtime.ContainerConfig{
Metadata: &runtime.ContainerMetadata{},
Image: &runtime.ImageSpec{
Image: tc.image,
},
},
SandboxConfig: sandboxRequest.Config,
}
containerID := createContainer(t, client, ctx, request)
defer removeContainer(t, client, ctx, containerID)

startContainer(t, client, ctx, containerID)
// Wait few seconds for the container to be completely initialized
time.Sleep(5 * time.Second)
assertContainerState(t, client, ctx, containerID, runtime.ContainerState_CONTAINER_RUNNING)

startTimeOfContainer := time.Now()
// stop container with timeout of 15 seconds
stopContainerWithTimeout(t, client, ctx, containerID, 15)
assertContainerState(t, client, ctx, containerID, runtime.ContainerState_CONTAINER_EXITED)
// get time elapsed before and after container stop command was issued
elapsedTime := time.Since(startTimeOfContainer)
// Ensure that the container has stopped after approx 15 seconds.
// We are giving it a buffer of +/- 1 second
if elapsedTime < 14*time.Second || elapsedTime > 16*time.Second {
t.Fatalf("Container did not shutdown gracefully \n")
}
})
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
FROM golang:latest as build
ENV GOOS=windows
ENV GOARCH=amd64
ENV GO111MODULE=off
WORKDIR /app
COPY ./delayed-shutdown.go ./
RUN go build -o delayed-shutdown.exe

FROM mcr.microsoft.com/windows/nanoserver:ltsc2022
WORKDIR /app
COPY --from=build /app/delayed-shutdown.exe .
ENTRYPOINT ["delayed-shutdown.exe"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package main

import (
"fmt"
"os"
"os/signal"
"syscall"
"time"
)

func main() {
fmt.Println("Waiting for OS signal...")

signalChannel := make(chan os.Signal)
wait := make(chan int, 1)

signal.Notify(signalChannel, syscall.SIGHUP)
signal.Notify(signalChannel, syscall.SIGINT)
signal.Notify(signalChannel, syscall.SIGTERM)

go func() {
sig := <-signalChannel
switch sig {
case syscall.SIGHUP:
fmt.Println("SIGHUP")
wait <- 1
case syscall.SIGTERM:
fmt.Println("SIGTERM")
wait <- 1
case syscall.SIGINT:
fmt.Println("SIGINT")
wait <- 1
}
}()

<-wait

fmt.Println("Exiting in 60 seconds...")
for i := 60; i > 0; i-- {
fmt.Printf("%d\n", i)
time.Sleep(1 * time.Second)
}

fmt.Println("Goodbye")
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
FROM golang:latest as build
ENV GOOS=windows
ENV GOARCH=amd64
ENV GO111MODULE=off
WORKDIR /app
COPY ./delayed-shutdown.go ./
RUN go build -o delayed-shutdown.exe

FROM mcr.microsoft.com/windows/servercore:ltsc2022
WORKDIR /app
COPY --from=build /app/delayed-shutdown.exe .
ENTRYPOINT ["delayed-shutdown.exe"]
Loading

0 comments on commit 27bb92f

Please sign in to comment.