diff --git a/pkg/controller/controller_utils.go b/pkg/controller/controller_utils.go index 6432729f9..ef5913e58 100644 --- a/pkg/controller/controller_utils.go +++ b/pkg/controller/controller_utils.go @@ -37,6 +37,7 @@ import ( "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1" machineapi "github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/typed/machine/v1alpha1" + annotationutils "github.com/gardener/machine-controller-manager/pkg/util/annotations" hashutil "github.com/gardener/machine-controller-manager/pkg/util/hash" taintutils "github.com/gardener/machine-controller-manager/pkg/util/taints" v1 "k8s.io/api/core/v1" @@ -88,8 +89,8 @@ const ( SlowStartInitialBatchSize = 1 ) -// UpdateTaintBackoff is the backoff period used while updating taint -var UpdateTaintBackoff = wait.Backoff{ +// UpdateNodeBackoff is the backoff period used while updating nodes +var UpdateNodeBackoff = wait.Backoff{ Steps: 5, Duration: 100 * time.Millisecond, Jitter: 1.0, @@ -901,7 +902,7 @@ func AddOrUpdateTaintOnNode(c clientset.Interface, nodeName string, taints ...*v return nil } firstTry := true - return clientretry.RetryOnConflict(UpdateTaintBackoff, func() error { + return clientretry.RetryOnConflict(UpdateNodeBackoff, func() error { var err error var oldNode *v1.Node // First we try getting node from the API server cache, as it's cheaper. If it fails @@ -958,7 +959,7 @@ func RemoveTaintOffNode(c clientset.Interface, nodeName string, node *v1.Node, t } firstTry := true - return clientretry.RetryOnConflict(UpdateTaintBackoff, func() error { + return clientretry.RetryOnConflict(UpdateNodeBackoff, func() error { var err error var oldNode *v1.Node // First we try getting node from the API server cache, as it's cheaper. If it fails @@ -1032,6 +1033,52 @@ func UpdateNodeTaints(c clientset.Interface, nodeName string, oldNode *v1.Node, return nil } +// AddOrUpdateAnnotationsOnNode add annotations to the node. If annotations was added into node, it'll issue API calls +// to update nodes; otherwise, no API calls. Return error if any. +func AddOrUpdateAnnotationsOnNode(c clientset.Interface, nodeName string, annotations map[string]string) error { + if annotations == nil { + return nil + } + firstTry := true + return clientretry.RetryOnConflict(UpdateNodeBackoff, func() error { + var err error + var oldNode *v1.Node + if firstTry { + oldNode, err = c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{ResourceVersion: "0"}) + firstTry = false + } else { + oldNode, err = c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) + } + + if err != nil { + return err + } + + var newNode *v1.Node + oldNodeCopy := oldNode + for key, value := range annotations { + curNewNode := annotationutils.AddOrUpdateAnnotation(oldNodeCopy, key, value) + newNode = curNewNode + oldNodeCopy = curNewNode + } + return UpdateNodeAnnotations(c, nodeName, oldNode, newNode) + }) +} + +// UpdateNodeAnnotations is for updating the node annotations from oldNode to the newNode +// using the nodes Update() method +func UpdateNodeAnnotations(c clientset.Interface, nodeName string, oldNode *v1.Node, newNode *v1.Node) error { + newNodeClone := oldNode.DeepCopy() + newNodeClone.ObjectMeta.Annotations = newNode.ObjectMeta.Annotations + + _, err := c.CoreV1().Nodes().Update(newNodeClone) + if err != nil { + return fmt.Errorf("failed to create update annotations for node %q: %v", nodeName, err) + } + + return nil +} + // WaitForCacheSync is a wrapper around cache.WaitForCacheSync that generates log messages // indicating that the controller identified by controllerName is waiting for syncs, followed by // either a successful or failed sync. diff --git a/pkg/controller/machine.go b/pkg/controller/machine.go index f6e54fd3b..57bae8363 100644 --- a/pkg/controller/machine.go +++ b/pkg/controller/machine.go @@ -565,10 +565,50 @@ func (c *controller) machineDelete(machine *v1alpha1.Machine, driver driver.Driv if finalizers := sets.NewString(machine.Finalizers...); finalizers.Has(DeleteFinalizerName) { klog.V(2).Infof("Deleting Machine %q", machine.Name) + var ( + forceDeletePods = false + forceDeleteMachine = false + timeOutOccurred = false + maxEvictRetries = int32(math.Min(float64(*c.getEffectiveMaxEvictRetries(machine)), c.getEffectiveDrainTimeout(machine).Seconds()/PodEvictionRetryInterval.Seconds())) + pvDetachTimeOut = c.safetyOptions.PvDetachTimeout.Duration + timeOutDuration = c.getEffectiveDrainTimeout(machine).Duration + forceDeleteLabelPresent = machine.Labels["force-deletion"] == "True" + ) + + // Timeout value obtained by subtracting last operation with expected time out period + timeOut := metav1.Now().Add(-timeOutDuration).Sub(machine.Status.CurrentStatus.LastUpdateTime.Time) + timeOutOccurred = timeOut > 0 + + if forceDeleteLabelPresent || timeOutOccurred { + // To perform forceful machine drain/delete either one of the below conditions must be satified + // 1. force-deletion: "True" label must be present + // 2. Deletion operation is more than drain-timeout minutes old + forceDeleteMachine = true + forceDeletePods = true + timeOutDuration = 1 * time.Minute + maxEvictRetries = 1 + + klog.V(2).Infof( + "Force deletion has been triggerred for machine %q due to ForceDeletionLabel:%t, Timeout:%t", + machine.Name, + forceDeleteLabelPresent, + timeOutOccurred, + ) + } // If machine was created on the cloud provider machineID, _ := driver.GetExisting() + // update node with the machine's state prior to termination + if err = c.AnnotateTerminatingMachineNode(machine); err != nil { + if forceDeleteMachine { + klog.Warningf("Annotation of node failed: %v. However, since it's a force deletion shall continue deletion of VM.", err) + } else { + klog.Error(err) + return err + } + } + if machine.Status.CurrentStatus.Phase != v1alpha1.MachineTerminating { lastOperation := v1alpha1.LastOperation{ Description: "Deleting machine from cloud provider", @@ -601,38 +641,6 @@ func (c *controller) machineDelete(machine *v1alpha1.Machine, driver driver.Driv if machineID != "" && nodeName != "" { // Begin drain logic only when the nodeName & providerID exist's for the machine - - var ( - forceDeletePods = false - forceDeleteMachine = false - timeOutOccurred = false - maxEvictRetries = int32(math.Min(float64(*c.getEffectiveMaxEvictRetries(machine)), c.getEffectiveDrainTimeout(machine).Seconds()/PodEvictionRetryInterval.Seconds())) - pvDetachTimeOut = c.safetyOptions.PvDetachTimeout.Duration - timeOutDuration = c.getEffectiveDrainTimeout(machine).Duration - forceDeleteLabelPresent = machine.Labels["force-deletion"] == "True" - ) - - // Timeout value obtained by subtracting last operation with expected time out period - timeOut := metav1.Now().Add(-timeOutDuration).Sub(machine.Status.CurrentStatus.LastUpdateTime.Time) - timeOutOccurred = timeOut > 0 - - if forceDeleteLabelPresent || timeOutOccurred { - // To perform forceful machine drain/delete either one of the below conditions must be satified - // 1. force-deletion: "True" label must be present - // 2. Deletion operation is more than drain-timeout minutes old - forceDeleteMachine = true - forceDeletePods = true - timeOutDuration = 1 * time.Minute - maxEvictRetries = 1 - - klog.V(2).Infof( - "Force deletion has been triggerred for machine %q due to ForceDeletionLabel:%t, Timeout:%t", - machine.Name, - forceDeleteLabelPresent, - timeOutOccurred, - ) - } - buf := bytes.NewBuffer([]byte{}) errBuf := bytes.NewBuffer([]byte{}) diff --git a/pkg/controller/machine_util.go b/pkg/controller/machine_util.go index 388622342..3fa8b8a60 100644 --- a/pkg/controller/machine_util.go +++ b/pkg/controller/machine_util.go @@ -24,8 +24,10 @@ package controller import ( "encoding/json" + "fmt" "github.com/gardener/machine-controller-manager/pkg/apis/machine/validation" + apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/klog" machineapi "github.com/gardener/machine-controller-manager/pkg/apis/machine" @@ -40,6 +42,8 @@ import ( const ( // LastAppliedALTAnnotation contains the last configuration of annotations, labels & taints applied on the node object LastAppliedALTAnnotation = "node.machine.sapcloud.io/last-applied-anno-labels-taints" + // NodeTerminationAnnotation contains the machine's phase before it was terminated + NodeTerminationAnnotation = "node.machine.sapcloud.io/termination-phase" ) var ( @@ -496,3 +500,21 @@ func SyncMachineTaints( return toBeUpdated } + +func (c *controller) AnnotateTerminatingMachineNode(machine *v1alpha1.Machine) error { + if machine.Status.CurrentStatus.Phase == "" || machine.Status.CurrentStatus.Phase == v1alpha1.MachineTerminating { + return nil + } + + nodeName := machine.Status.Node + if machine.Status.CurrentStatus.Phase == "" { + return nil + } + + annotations := map[string]string{NodeTerminationAnnotation: fmt.Sprintf("%v", machine.Status.CurrentStatus.Phase)} + err := AddOrUpdateAnnotationsOnNode(c.targetCoreClient, nodeName, annotations) + if apierrors.IsNotFound(err) { + return nil + } + return err +} diff --git a/pkg/controller/machine_util_test.go b/pkg/controller/machine_util_test.go index 0bc35e5d8..a53516984 100644 --- a/pkg/controller/machine_util_test.go +++ b/pkg/controller/machine_util_test.go @@ -1850,4 +1850,207 @@ var _ = Describe("machine_util", func() { ) }) + + Describe("#AnnotateTerminatingMachineNode", func() { + + type setup struct { + machine *machinev1.Machine + } + type action struct { + node *corev1.Node + } + type expect struct { + node *corev1.Node + err bool + } + type data struct { + setup setup + action action + expect expect + } + + DescribeTable("##table", + func(data *data) { + stop := make(chan struct{}) + defer close(stop) + + controlObjects := []runtime.Object{} + coreObjects := []runtime.Object{} + + machineObject := data.setup.machine + + nodeObject := data.action.node + coreObjects = append(coreObjects, nodeObject) + controlObjects = append(controlObjects, machineObject) + + c, trackers := createController(stop, testNamespace, controlObjects, nil, coreObjects) + defer trackers.Stop() + waitForCacheSync(stop, c) + + err := c.AnnotateTerminatingMachineNode(machineObject) + + waitForCacheSync(stop, c) + + if !data.expect.err { + Expect(err).To(BeNil()) + } else { + Expect(err).To(HaveOccurred()) + } + + updatedNodeObject, _ := c.targetCoreClient.CoreV1().Nodes().Get(nodeObject.Name, metav1.GetOptions{}) + + if data.expect.node != nil { + Expect(updatedNodeObject.Annotations).Should(Equal(data.expect.node.Annotations)) + } + }, + + Entry("when machine phase is failed", &data{ + setup: setup{ + machine: newMachine( + &machinev1.MachineTemplateSpec{}, + &machinev1.MachineStatus{ + Node: "test-node", + CurrentStatus: machinev1.CurrentStatus{Phase: MachineFailed}, + }, + nil, nil, nil), + }, + action: action{ + node: &corev1.Node{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "v1", + Kind: "Node", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node-0", + Annotations: map[string]string{ + "anno1": "anno1", + }, + }, + Spec: corev1.NodeSpec{}, + }, + }, + expect: expect{ + node: &corev1.Node{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "v1", + Kind: "Node", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node-0", + Annotations: map[string]string{ + "anno1": "anno1", + NodeTerminationAnnotation: "Failed", + }, + }, + }, + err: false, + }, + }), + + Entry("when machine phase is terminating", &data{ + setup: setup{ + machine: newMachine( + &machinev1.MachineTemplateSpec{}, + &machinev1.MachineStatus{ + Node: "test-node", + CurrentStatus: machinev1.CurrentStatus{Phase: MachineTerminating}, + }, + nil, nil, nil), + }, + action: action{ + node: &corev1.Node{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "v1", + Kind: "Node", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node-0", + Annotations: map[string]string{ + "anno1": "anno1", + }, + }, + Spec: corev1.NodeSpec{}, + }, + }, + expect: expect{ + node: &corev1.Node{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "v1", + Kind: "Node", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node-0", + Annotations: map[string]string{ + "anno1": "anno1", + }, + }, + }, + err: false, + }, + }), + + Entry("when annotation already exists", &data{ + setup: setup{ + machine: newMachine( + &machinev1.MachineTemplateSpec{}, + &machinev1.MachineStatus{ + Node: "test-node", + CurrentStatus: machinev1.CurrentStatus{Phase: MachineRunning}, + }, + nil, nil, nil), + }, + action: action{ + node: &corev1.Node{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "v1", + Kind: "Node", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node-0", + Annotations: map[string]string{ + NodeTerminationAnnotation: "Failed", + }, + }, + Spec: corev1.NodeSpec{}, + }, + }, + expect: expect{ + node: &corev1.Node{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "v1", + Kind: "Node", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node-0", + Annotations: map[string]string{ + NodeTerminationAnnotation: "Running", + }, + }, + }, + err: false, + }, + }), + + Entry("when node object does not exist", &data{ + setup: setup{ + machine: newMachine( + &machinev1.MachineTemplateSpec{}, + &machinev1.MachineStatus{ + Node: "test-node", + CurrentStatus: machinev1.CurrentStatus{Phase: MachineTerminating}, + }, + nil, nil, nil), + }, + action: action{ + node: &corev1.Node{}, + }, + expect: expect{ + node: &corev1.Node{}, + err: false, // we should not return error if node-object does not exist to ensure rest of the steps are then executed. + }, + }), + ) + + }) + }) diff --git a/pkg/util/annotations/annotations.go b/pkg/util/annotations/annotations.go new file mode 100644 index 000000000..18a0e8f01 --- /dev/null +++ b/pkg/util/annotations/annotations.go @@ -0,0 +1,30 @@ +package annotations + +import ( + v1 "k8s.io/api/core/v1" +) + +// CloneAndAddAnnotation the given map and returns a new map with the given key and value added. +// Returns the given map, if labelKey is empty. +func CloneAndAddAnnotation(annotations map[string]string, annotationKey, annotationValue string) map[string]string { + if annotationKey == "" { + // Don't need to add a label. + return annotations + } + // Clone. + newAnnotations := map[string]string{} + for key, value := range annotations { + newAnnotations[key] = value + } + newAnnotations[annotationKey] = annotationValue + return newAnnotations +} + +// AddOrUpdateAnnotation tries to add a annotation to annotations list. Returns a new copy of updated Node and true if something was updated +// false otherwise. +func AddOrUpdateAnnotation(node *v1.Node, annotationKey, annotationValue string) *v1.Node { + newNode := node.DeepCopy() + nodeAnnotations := newNode.ObjectMeta.Annotations + newNode.ObjectMeta.Annotations = CloneAndAddAnnotation(nodeAnnotations, annotationKey, annotationValue) + return newNode +}