Skip to content

Commit

Permalink
fix: stop containers more thoroughly
Browse files Browse the repository at this point in the history
Don't skip pods which are not ready, try still to stop containers inside
not ready pod sandboxes.

Re-enable the test with Canal CNI (upstream Calico got fixed).

Signed-off-by: Andrey Smirnov <[email protected]>
  • Loading branch information
smira committed Sep 12, 2022
1 parent 12827b8 commit f424e53
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 22 deletions.
11 changes: 8 additions & 3 deletions .drone.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,12 @@ local integration_cilium = Step("e2e-cilium-1.9.10", target="e2e-qemu", privileg
"WITH_CONFIG_PATCH": '[{"op": "replace", "path": "/cluster/network/podSubnets", "value": ["10.0.0.0/8"]}]', # use Pod CIDRs as hardcoded in Cilium's quick-install
"IMAGE_REGISTRY": local_registry,
});
local integration_bios = Step("e2e-bios", target="e2e-qemu", privileged=true, depends_on=[integration_cilium], environment={
local integration_canal_reset = Step("e2e-canal-reset", target="e2e-qemu", privileged=true, depends_on=[integration_cilium], environment={
"INTEGRATION_TEST_RUN": "TestIntegration/api.ResetSuite/TestResetWithSpec",
"CUSTOM_CNI_URL": "https://docs.projectcalico.org/manifests/canal.yaml",
"REGISTRY": local_registry,
});
local integration_bios = Step("e2e-bios", target="e2e-qemu", privileged=true, depends_on=[integration_canal_reset], environment={
"SHORT_INTEGRATION_TEST": "yes",
"WITH_UEFI": "false",
"IMAGE_REGISTRY": local_registry,
Expand Down Expand Up @@ -460,7 +465,7 @@ local integration_pipelines = [
Pipeline('integration-provision-1', default_pipeline_steps + [integration_provision_tests_prepare, integration_provision_tests_track_1]) + integration_trigger(['integration-provision', 'integration-provision-1']),
Pipeline('integration-provision-2', default_pipeline_steps + [integration_provision_tests_prepare, integration_provision_tests_track_2]) + integration_trigger(['integration-provision', 'integration-provision-2']),
Pipeline('integration-misc', default_pipeline_steps + [integration_extensions
, integration_cilium, integration_bios, integration_disk_image, integration_control_plane_port, integration_no_cluster_discovery, integration_kubespan, integration_default_hostname]) + integration_trigger(['integration-misc']),
, integration_cilium, integration_canal_reset, integration_bios, integration_disk_image, integration_control_plane_port, integration_no_cluster_discovery, integration_kubespan, integration_default_hostname]) + integration_trigger(['integration-misc']),
Pipeline('integration-qemu-encrypted-vip', default_pipeline_steps + [integration_qemu_encrypted_vip]) + integration_trigger(['integration-qemu-encrypted-vip']),
Pipeline('integration-qemu-race', default_pipeline_steps + [build_race, integration_qemu_race]) + integration_trigger(['integration-qemu-race']),
Pipeline('integration-qemu-csi', default_pipeline_steps + [integration_qemu_csi]) + integration_trigger(['integration-qemu-csi']),
Expand All @@ -472,7 +477,7 @@ local integration_pipelines = [
Pipeline('cron-integration-provision-1', default_pipeline_steps + [integration_provision_tests_prepare, integration_provision_tests_track_1], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
Pipeline('cron-integration-provision-2', default_pipeline_steps + [integration_provision_tests_prepare, integration_provision_tests_track_2], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
Pipeline('cron-integration-misc', default_pipeline_steps + [integration_extensions
, integration_cilium, integration_bios, integration_disk_image, integration_control_plane_port, integration_no_cluster_discovery, integration_kubespan, integration_default_hostname], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
, integration_cilium, integration_canal_reset, integration_bios, integration_disk_image, integration_control_plane_port, integration_no_cluster_discovery, integration_kubespan, integration_default_hostname], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
Pipeline('cron-integration-qemu-encrypted-vip', default_pipeline_steps + [integration_qemu_encrypted_vip], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
Pipeline('cron-integration-qemu-race', default_pipeline_steps + [build_race, integration_qemu_race], [default_cron_pipeline]) + cron_trigger(['nightly']),
Pipeline('cron-integration-qemu-csi', default_pipeline_steps + [integration_qemu_csi], [default_cron_pipeline]) + cron_trigger(['nightly']),
Expand Down
40 changes: 21 additions & 19 deletions internal/pkg/cri/pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ import (
"google.golang.org/grpc/codes"
grpcstatus "google.golang.org/grpc/status"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"

"github.com/talos-systems/talos/pkg/machinery/constants"
)

// RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure
Expand Down Expand Up @@ -124,12 +126,6 @@ func (c *Client) StopAndRemovePodSandboxes(ctx context.Context, stopAction StopA
return nil
}

if pod.GetState() != runtimeapi.PodSandboxState_SANDBOX_READY {
log.Printf("skipping pod %s/%s, state %s", pod.Metadata.Namespace, pod.Metadata.Name, pod.GetState())

return nil
}

if e = stopAndRemove(ctx, stopAction, c, pod, networkMode.String()); e != nil {
return fmt.Errorf("failed stopping pod %s/%s: %w", pod.Metadata.Namespace, pod.Metadata.Name, e)
}
Expand Down Expand Up @@ -182,18 +178,22 @@ func stopAndRemove(ctx context.Context, stopAction StopAction, client *Client, p
container := container // https://golang.org/doc/faq#closures_and_goroutines

g.Go(func() error {
log.Printf("%s container %s/%s:%s", action, pod.Metadata.Namespace, pod.Metadata.Name, container.Metadata.Name)

// TODO(andrewrynhard): Can we set the timeout dynamically?
if criErr := client.StopContainer(ctx, container.Id, 30); criErr != nil {
if grpcstatus.Code(criErr) == codes.NotFound {
return nil
}
if container.State == runtimeapi.ContainerState_CONTAINER_RUNNING || container.State == runtimeapi.ContainerState_CONTAINER_UNKNOWN {
log.Printf("stopping container %s/%s:%s", pod.Metadata.Namespace, pod.Metadata.Name, container.Metadata.Name)

if criErr := client.StopContainer(ctx, container.Id, int64(constants.KubeletShutdownGracePeriod.Seconds())); criErr != nil {
if grpcstatus.Code(criErr) == codes.NotFound {
return nil
}

return criErr
return criErr
}
}

if stopAction == StopAndRemove {
log.Printf("removing container %s/%s:%s", pod.Metadata.Namespace, pod.Metadata.Name, container.Metadata.Name)

if criErr := client.RemoveContainer(ctx, container.Id); criErr != nil {
if grpcstatus.Code(criErr) == codes.NotFound {
return nil
Expand All @@ -213,14 +213,16 @@ func stopAndRemove(ctx context.Context, stopAction StopAction, client *Client, p
return err
}

if err = client.StopPodSandbox(ctx, pod.Id); err != nil {
if grpcstatus.Code(err) == codes.NotFound {
return nil
}
if pod.State == runtimeapi.PodSandboxState_SANDBOX_READY {
if err = client.StopPodSandbox(ctx, pod.Id); err != nil {
if grpcstatus.Code(err) == codes.NotFound {
return nil
}

log.Printf("error stopping pod %s/%s, ignored: %s", pod.Metadata.Namespace, pod.Metadata.Name, err)
log.Printf("error stopping pod %s/%s, ignored: %s", pod.Metadata.Namespace, pod.Metadata.Name, err)

return nil
return nil
}
}

if stopAction == StopAndRemove {
Expand Down

0 comments on commit f424e53

Please sign in to comment.