From 36cd07a86eba62019947e3b3017596d338ce2cda Mon Sep 17 00:00:00 2001 From: oraz Date: Sun, 5 Nov 2023 10:29:58 +0200 Subject: [PATCH 1/2] Remove volumeAttachment deletion VA should not be deleted by FAR. The kube-control-manager (KCM) detects the pod deletion and automatically detaches the Volume Attachments (VAs) that are not connected to any pod, forcefully deleting the VA has the risk of interfering with the process of detaching the VAs. --- .../fenceagentsremediation_controller.go | 3 +- .../fenceagentsremediation_controller_test.go | 36 +----------- test/e2e/far_e2e_test.go | 56 ++----------------- 3 files changed, 9 insertions(+), 86 deletions(-) diff --git a/controllers/fenceagentsremediation_controller.go b/controllers/fenceagentsremediation_controller.go index a550852b..73369488 100644 --- a/controllers/fenceagentsremediation_controller.go +++ b/controllers/fenceagentsremediation_controller.go @@ -25,6 +25,7 @@ import ( "github.com/go-logr/logr" commonAnnotations "github.com/medik8s/common/pkg/annotations" commonConditions "github.com/medik8s/common/pkg/conditions" + commonResources "github.com/medik8s/common/pkg/resources" apiErrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" @@ -224,7 +225,7 @@ func (r *FenceAgentsRemediationReconciler) Reconcile(ctx context.Context, req ct !meta.IsStatusConditionTrue(far.Status.Conditions, commonConditions.SucceededType) { // Fence agent action succeeded, and now we try to remove workloads (pods and their volume attachments) r.Log.Info("Manual workload deletion", "Fence Agent", far.Spec.Agent, "Node Name", req.Name) - if err := utils.DeleteResources(ctx, r.Client, req.Name); err != nil { + if err := commonResources.DeletePods(ctx, r.Client, req.Name); err != nil { r.Log.Error(err, "Manual workload deletion has failed", "CR's Name", req.Name) return emptyResult, err } diff --git a/controllers/fenceagentsremediation_controller_test.go b/controllers/fenceagentsremediation_controller_test.go index 84bc9e9e..ad9b842f 100644 --- a/controllers/fenceagentsremediation_controller_test.go +++ b/controllers/fenceagentsremediation_controller_test.go @@ -162,9 +162,7 @@ var _ = Describe("FAR Controller", func() { By("Having a finalizer if we have a remediation taint") Expect(controllerutil.ContainsFinalizer(underTestFAR, v1alpha1.FARFinalizer)).To(BeTrue()) - By("Not having any VAs nor the test pod") - testVADeletion(vaName1, resourceDeletionWasTriggered) - testVADeletion(vaName2, resourceDeletionWasTriggered) + By("Not having any test pod") testPodDeletion(testPodName, resourceDeletionWasTriggered) By("Verifying correct conditions for successfull remediation") @@ -195,10 +193,8 @@ var _ = Describe("FAR Controller", func() { By("Not having remediation taint") Expect(utils.TaintExists(node.Spec.Taints, &farNoExecuteTaint)).To(BeFalse()) - By("Still having all the VAs and one test pod") + By("Still having one test pod") resourceDeletionWasTriggered = false - testVADeletion(vaName1, resourceDeletionWasTriggered) - testVADeletion(vaName2, resourceDeletionWasTriggered) testPodDeletion(testPodName, resourceDeletionWasTriggered) By("Verifying correct conditions for unsuccessfull remediation") @@ -314,34 +310,6 @@ func cliCommandsEquality(far *v1alpha1.FenceAgentsRemediation) (bool, error) { return isEqualStringLists(mocksExecuter.command, expectedCommand), nil } -// TODO: Think about using Generics for the next two functions - -// testVADeletion tests whether the volume attachment no longer exist for successful FAR CR -// and consistently check if the volume attachment exist and was not deleted -func testVADeletion(vaName string, resourceDeletionWasTriggered bool) { - vaKey := client.ObjectKey{ - Namespace: defaultNamespace, - Name: vaName, - } - if resourceDeletionWasTriggered { - EventuallyWithOffset(1, func() bool { - va := &storagev1.VolumeAttachment{} - err := k8sClient.Get(context.Background(), vaKey, va) - return apierrors.IsNotFound(err) - - }, timeoutDeletion, pollInterval).Should(BeTrue()) - log.Info("Volume attachment is no longer exist", "va", vaName) - } else { - ConsistentlyWithOffset(1, func() bool { - va := &storagev1.VolumeAttachment{} - err := k8sClient.Get(context.Background(), vaKey, va) - return apierrors.IsNotFound(err) - - }, timeoutDeletion, pollInterval).Should(BeFalse()) - log.Info("Volume attachment exist", "va", vaName) - } -} - // testPodDeletion tests whether the pod no longer exist for successful FAR CR // and consistently check if the pod exist and was not deleted func testPodDeletion(podName string, resourceDeletionWasTriggered bool) { diff --git a/test/e2e/far_e2e_test.go b/test/e2e/far_e2e_test.go index 6dce08b4..c46432f0 100644 --- a/test/e2e/far_e2e_test.go +++ b/test/e2e/far_e2e_test.go @@ -12,7 +12,6 @@ import ( . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" - storagev1 "k8s.io/api/storage/v1" apiErrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -35,7 +34,6 @@ const ( nodeIdentifierPrefixAWS = "--plug" nodeIdentifierPrefixIPMI = "--ipport" containerName = "manager" - testVolumeAttachment = "test-va" testContainerName = "test-container" testPodName = "test-pod" @@ -83,7 +81,6 @@ var _ = Describe("FAR E2e", func() { var ( nodes, filteredNodes *corev1.NodeList nodeName string - va *storagev1.VolumeAttachment pod *corev1.Pod creationTimePod, nodeBootTimeBefore time.Time err error @@ -127,8 +124,7 @@ var _ = Describe("FAR E2e", func() { Expect(k8sClient.Create(context.Background(), pod)).To(Succeed()) log.Info("Tested pod has been created", "pod", testPodName) creationTimePod = metav1.Now().Time - va = createVA(nodeName) - DeferCleanup(cleanupTestedResources, va, pod) + DeferCleanup(cleanupTestedResources, pod) // set the node as "unhealthy" by disabling kubelet makeNodeUnready(selectedNode) @@ -138,10 +134,10 @@ var _ = Describe("FAR E2e", func() { }) When("running FAR to reboot two nodes", func() { It("should successfully remediate the first node", func() { - checkRemediation(nodeName, nodeBootTimeBefore, creationTimePod, va, pod) + checkRemediation(nodeName, nodeBootTimeBefore, creationTimePod, pod) }) It("should successfully remediate the second node", func() { - checkRemediation(nodeName, nodeBootTimeBefore, creationTimePod, va, pod) + checkRemediation(nodeName, nodeBootTimeBefore, creationTimePod, pod) }) }) }) @@ -243,28 +239,6 @@ func randomizeWorkerNode(nodes *corev1.NodeList) *corev1.Node { return &nodes.Items[r.Intn(len(nodes.Items))] } -// createVA creates dummy volume attachment for testing the resource deletion -func createVA(nodeName string) *storagev1.VolumeAttachment { - pv := "test-pv" - va := &storagev1.VolumeAttachment{ - ObjectMeta: metav1.ObjectMeta{ - Name: testVolumeAttachment, - Namespace: testNsName, - }, - Spec: storagev1.VolumeAttachmentSpec{ - Attacher: pv, - Source: storagev1.VolumeAttachmentSource{ - PersistentVolumeName: &pv, - }, - NodeName: nodeName, - }, - } - - ExpectWithOffset(1, k8sClient.Create(context.Background(), va)).To(Succeed()) - log.Info("Volume attachment has been created", "va", va.Name) - return va -} - // createFAR assigns the input to FenceAgentsRemediation object, creates CR, and returns the CR object func createFAR(nodeName string, agent string, sharedParameters map[v1alpha1.ParameterName]string, nodeParameters map[v1alpha1.ParameterName]map[v1alpha1.NodeName]string) *v1alpha1.FenceAgentsRemediation { far := &v1alpha1.FenceAgentsRemediation{ @@ -291,13 +265,7 @@ func deleteFAR(far *v1alpha1.FenceAgentsRemediation) { } // cleanupTestedResources deletes an old pod and old va if it was not deleted from FAR CR -func cleanupTestedResources(va *storagev1.VolumeAttachment, pod *corev1.Pod) { - newVa := &storagev1.VolumeAttachment{} - if err := k8sClient.Get(context.Background(), client.ObjectKeyFromObject(va), newVa); err == nil { - Expect(k8sClient.Delete(context.Background(), newVa)).To(Succeed()) - log.Info("cleanup: Volume attachment has not been deleted by remediation", "va name", va.Name) - } - +func cleanupTestedResources(pod *corev1.Pod) { newPod := &corev1.Pod{} if err := k8sClient.Get(context.Background(), client.ObjectKeyFromObject(pod), newPod); err == nil { Expect(k8sClient.Delete(context.Background(), newPod)).To(Succeed()) @@ -403,17 +371,6 @@ func wasNodeRebooted(nodeName string, nodeBootTimeBefore time.Time) { log.Info("successful reboot", "node", nodeName, "offset between last boot", nodeBootTimeAfter.Sub(nodeBootTimeBefore), "new boot time", nodeBootTimeAfter) } -// checkVaDeleted verifies if the va has already been deleted due to resource deletion -func checkVaDeleted(va *storagev1.VolumeAttachment) { - EventuallyWithOffset(1, func() bool { - newVa := &storagev1.VolumeAttachment{} - err := k8sClient.Get(context.Background(), client.ObjectKeyFromObject(va), newVa) - return apiErrors.IsNotFound(err) - - }, timeoutDeletion, pollDeletion).Should(BeTrue()) - log.Info("Volume Attachment has already been deleted", "va name", va.Name) -} - // checkPodDeleted vefifies if the pod has already been deleted due to resource deletion func checkPodDeleted(pod *corev1.Pod) { ConsistentlyWithOffset(1, func() bool { @@ -441,7 +398,7 @@ func verifyStatusCondition(nodeName, conditionType string, conditionStatus *meta } // checkRemediation verify whether the node was remediated -func checkRemediation(nodeName string, nodeBootTimeBefore time.Time, oldPodCreationTime time.Time, va *storagev1.VolumeAttachment, pod *corev1.Pod) { +func checkRemediation(nodeName string, nodeBootTimeBefore time.Time, oldPodCreationTime time.Time, pod *corev1.Pod) { By("Check if FAR NoExecute taint was added") wasFarTaintAdded(nodeName) @@ -452,9 +409,6 @@ func checkRemediation(nodeName string, nodeBootTimeBefore time.Time, oldPodCreat By("Getting new node's boot time") wasNodeRebooted(nodeName, nodeBootTimeBefore) - By("checking if old VA has been deleted") - checkVaDeleted(va) - By("checking if old pod has been deleted") checkPodDeleted(pod) From 513cb409b9da492d28e0fb16c39a887849b7f33d Mon Sep 17 00:00:00 2001 From: oraz Date: Sun, 5 Nov 2023 10:31:05 +0200 Subject: [PATCH 2/2] Update common with PodDeletion PodDeletion has moved to common so SNR and FAR used the same resource deletion --- go.mod | 2 +- go.sum | 4 +- pkg/utils/resources.go | 74 ------------------- .../medik8s/common/pkg/resources/resources.go | 52 +++++++++++++ vendor/modules.txt | 3 +- 5 files changed, 57 insertions(+), 78 deletions(-) delete mode 100644 pkg/utils/resources.go create mode 100644 vendor/github.com/medik8s/common/pkg/resources/resources.go diff --git a/go.mod b/go.mod index 8769c5b6..864e84b8 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,7 @@ go 1.20 require ( github.com/go-logr/logr v1.2.4 - github.com/medik8s/common v1.7.0 + github.com/medik8s/common v1.9.0 github.com/onsi/ginkgo/v2 v2.11.0 github.com/onsi/gomega v1.27.10 github.com/openshift/api v0.0.0-20230621174358-ea40115b9fa6 diff --git a/go.sum b/go.sum index a4f1b050..0c39ee7e 100644 --- a/go.sum +++ b/go.sum @@ -76,8 +76,8 @@ github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0 github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 h1:jWpvCLoY8Z/e3VKvlsiIGKtc+UG6U5vzxaoagmhXfyg= github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0/go.mod h1:QUyp042oQthUoa9bqDv0ER0wrtXnBruoNd7aNjkbP+k= -github.com/medik8s/common v1.7.0 h1:JwimhWigPTAszGG2jYJmh+uVHq2nFUPRRljw7ZhlQhg= -github.com/medik8s/common v1.7.0/go.mod h1:ZT/3hfMXJLmZEcqmxRWB5LGC8Wl+qKGGQ4zM8hOE7PY= +github.com/medik8s/common v1.9.0 h1:nMMffmj+e0l0xRB0RfpsbdDpW9upXU2MFeGGADJlwyE= +github.com/medik8s/common v1.9.0/go.mod h1:ZT/3hfMXJLmZEcqmxRWB5LGC8Wl+qKGGQ4zM8hOE7PY= github.com/moby/spdystream v0.2.0 h1:cjW1zVyyoiM0T7b6UoySUFqzXMoqRckQtXwGPiBhOM8= github.com/moby/spdystream v0.2.0/go.mod h1:f7i0iNDQJ059oMTcWxx8MA/zKFIuD/lY+0GqbN2Wy8c= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= diff --git a/pkg/utils/resources.go b/pkg/utils/resources.go deleted file mode 100644 index 2ac8f8a6..00000000 --- a/pkg/utils/resources.go +++ /dev/null @@ -1,74 +0,0 @@ -package utils - -// Inspired from SNR - https://github.com/medik8s/self-node-remediation/blob/main/controllers/selfnoderemediation_controller.go#L283-L346 -import ( - "context" - - corev1 "k8s.io/api/core/v1" - storagev1 "k8s.io/api/storage/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/fields" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -var ( - log = ctrl.Log.WithName("utils-resource") -) - -func DeleteResources(ctx context.Context, r client.Client, nodeName string) error { - zero := int64(0) - backgroundDeletePolicy := metav1.DeletePropagationBackground - - deleteOptions := &client.DeleteAllOfOptions{ - ListOptions: client.ListOptions{ - FieldSelector: fields.SelectorFromSet(fields.Set{"spec.nodeName": nodeName}), - Namespace: "", - Limit: 0, - }, - DeleteOptions: client.DeleteOptions{ - GracePeriodSeconds: &zero, - PropagationPolicy: &backgroundDeletePolicy, - }, - } - - namespaces := corev1.NamespaceList{} - if err := r.List(ctx, &namespaces); err != nil { - log.Error(err, "failed to list namespaces", err) - return err - } - - log.Info("starting to delete node resources", "node name", nodeName) - - pod := &corev1.Pod{} - for _, ns := range namespaces.Items { - deleteOptions.Namespace = ns.Name - err := r.DeleteAllOf(ctx, pod, deleteOptions) - if err != nil { - log.Error(err, "failed to delete pods of unhealthy node", "namespace", ns.Name) - return err - } - } - - volumeAttachments := &storagev1.VolumeAttachmentList{} - if err := r.List(ctx, volumeAttachments); err != nil { - log.Error(err, "failed to get volumeAttachments list") - return err - } - forceDeleteOption := &client.DeleteOptions{ - GracePeriodSeconds: &zero, - } - for _, va := range volumeAttachments.Items { - if va.Spec.NodeName == nodeName { - err := r.Delete(ctx, &va, forceDeleteOption) - if err != nil { - log.Error(err, "failed to delete volumeAttachment", "name", va.Name) - return err - } - } - } - - log.Info("done deleting node resources", "node name", nodeName) - - return nil -} diff --git a/vendor/github.com/medik8s/common/pkg/resources/resources.go b/vendor/github.com/medik8s/common/pkg/resources/resources.go new file mode 100644 index 00000000..33b9d70b --- /dev/null +++ b/vendor/github.com/medik8s/common/pkg/resources/resources.go @@ -0,0 +1,52 @@ +package resources + +import ( + "context" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// DeletePods deletes all the pods from the node +func DeletePods(ctx context.Context, r client.Client, nodeName string) error { + log := ctrl.Log.WithName("commons-resource") + zero := int64(0) + backgroundDeletePolicy := metav1.DeletePropagationBackground + + deleteOptions := &client.DeleteAllOfOptions{ + ListOptions: client.ListOptions{ + FieldSelector: fields.SelectorFromSet(fields.Set{"spec.nodeName": nodeName}), + Namespace: "", + Limit: 0, + }, + DeleteOptions: client.DeleteOptions{ + GracePeriodSeconds: &zero, + PropagationPolicy: &backgroundDeletePolicy, + }, + } + + namespaces := corev1.NamespaceList{} + if err := r.List(ctx, &namespaces); err != nil { + log.Error(err, "failed to list namespaces") + return err + } + + log.Info("starting to delete pods", "node name", nodeName) + + pod := &corev1.Pod{} + for _, ns := range namespaces.Items { + deleteOptions.Namespace = ns.Name + err := r.DeleteAllOf(ctx, pod, deleteOptions) + if err != nil { + log.Error(err, "failed to delete pods of node", "namespace", ns.Name) + return err + } + } + + log.Info("done deleting pods", "node name", nodeName) + + return nil +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 62faa929..177f26f0 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -92,11 +92,12 @@ github.com/mailru/easyjson/jwriter # github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 ## explicit; go 1.19 github.com/matttproud/golang_protobuf_extensions/v2/pbutil -# github.com/medik8s/common v1.7.0 +# github.com/medik8s/common v1.9.0 ## explicit; go 1.20 github.com/medik8s/common/pkg/annotations github.com/medik8s/common/pkg/conditions github.com/medik8s/common/pkg/labels +github.com/medik8s/common/pkg/resources # github.com/moby/spdystream v0.2.0 ## explicit; go 1.13 github.com/moby/spdystream