diff --git a/kubernetes/deployment/in-tree/control-cluster-role.yaml b/kubernetes/deployment/in-tree/control-cluster-role.yaml index b03bb7f0a..63849b6e5 100644 --- a/kubernetes/deployment/in-tree/control-cluster-role.yaml +++ b/kubernetes/deployment/in-tree/control-cluster-role.yaml @@ -28,6 +28,7 @@ rules: - "" resources: - nodes + - nodes/status - configmaps - secrets - endpoints diff --git a/kubernetes/deployment/out-of-tree/control-cluster-role.yaml b/kubernetes/deployment/out-of-tree/control-cluster-role.yaml index b03bb7f0a..63849b6e5 100644 --- a/kubernetes/deployment/out-of-tree/control-cluster-role.yaml +++ b/kubernetes/deployment/out-of-tree/control-cluster-role.yaml @@ -28,6 +28,7 @@ rules: - "" resources: - nodes + - nodes/status - configmaps - secrets - endpoints diff --git a/pkg/controller/controller_utils.go b/pkg/controller/controller_utils.go index 6432729f9..2932f01b8 100644 --- a/pkg/controller/controller_utils.go +++ b/pkg/controller/controller_utils.go @@ -37,6 +37,7 @@ import ( "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1" machineapi "github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/typed/machine/v1alpha1" + conditionutils "github.com/gardener/machine-controller-manager/pkg/util/conditions" hashutil "github.com/gardener/machine-controller-manager/pkg/util/hash" taintutils "github.com/gardener/machine-controller-manager/pkg/util/taints" v1 "k8s.io/api/core/v1" @@ -88,8 +89,8 @@ const ( SlowStartInitialBatchSize = 1 ) -// UpdateTaintBackoff is the backoff period used while updating taint -var UpdateTaintBackoff = wait.Backoff{ +// Backoff is the backoff period used while updating nodes +var Backoff = wait.Backoff{ Steps: 5, Duration: 100 * time.Millisecond, Jitter: 1.0, @@ -901,7 +902,7 @@ func AddOrUpdateTaintOnNode(c clientset.Interface, nodeName string, taints ...*v return nil } firstTry := true - return clientretry.RetryOnConflict(UpdateTaintBackoff, func() error { + return clientretry.RetryOnConflict(Backoff, func() error { var err error var oldNode *v1.Node // First we try getting node from the API server cache, as it's cheaper. If it fails @@ -958,7 +959,7 @@ func RemoveTaintOffNode(c clientset.Interface, nodeName string, node *v1.Node, t } firstTry := true - return clientretry.RetryOnConflict(UpdateTaintBackoff, func() error { + return clientretry.RetryOnConflict(Backoff, func() error { var err error var oldNode *v1.Node // First we try getting node from the API server cache, as it's cheaper. If it fails @@ -1032,6 +1033,55 @@ func UpdateNodeTaints(c clientset.Interface, nodeName string, oldNode *v1.Node, return nil } +// GetNodeCondition get the nodes condition matching the specified type +func GetNodeCondition(c clientset.Interface, nodeName string, conditionType v1.NodeConditionType) (*v1.NodeCondition, error) { + node, err := c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) + if err != nil { + return nil, err + } + return conditionutils.GetNodeCondition(node, conditionType), nil +} + +// AddOrUpdateConditionsOnNode adds a condition to the node's status +func AddOrUpdateConditionsOnNode(c clientset.Interface, nodeName string, condition v1.NodeCondition) error { + firstTry := true + return clientretry.RetryOnConflict(Backoff, func() error { + var err error + var oldNode *v1.Node + // First we try getting node from the API server cache, as it's cheaper. If it fails + // we get it from etcd to be sure to have fresh data. + if firstTry { + oldNode, err = c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{ResourceVersion: "0"}) + firstTry = false + } else { + oldNode, err = c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) + } + + if err != nil { + return err + } + + var newNode *v1.Node + oldNodeCopy := oldNode + newNode = conditionutils.AddOrUpdateCondition(oldNodeCopy, condition) + return UpdateNodeConditions(c, nodeName, oldNode, newNode) + }) +} + +// UpdateNodeConditions is for updating the node conditions from oldNode to the newNode +// using the nodes Update() method +func UpdateNodeConditions(c clientset.Interface, nodeName string, oldNode *v1.Node, newNode *v1.Node) error { + newNodeClone := oldNode.DeepCopy() + newNodeClone.Status.Conditions = newNode.Status.Conditions + + _, err := c.CoreV1().Nodes().UpdateStatus(newNodeClone) + if err != nil { + return fmt.Errorf("failed to create update conditions for node %q: %v", nodeName, err) + } + + return nil +} + // WaitForCacheSync is a wrapper around cache.WaitForCacheSync that generates log messages // indicating that the controller identified by controllerName is waiting for syncs, followed by // either a successful or failed sync. diff --git a/pkg/controller/machine.go b/pkg/controller/machine.go index f6e54fd3b..36b2f7202 100644 --- a/pkg/controller/machine.go +++ b/pkg/controller/machine.go @@ -565,10 +565,52 @@ func (c *controller) machineDelete(machine *v1alpha1.Machine, driver driver.Driv if finalizers := sets.NewString(machine.Finalizers...); finalizers.Has(DeleteFinalizerName) { klog.V(2).Infof("Deleting Machine %q", machine.Name) + var ( + forceDeletePods = false + forceDeleteMachine = false + timeOutOccurred = false + maxEvictRetries = int32(math.Min(float64(*c.getEffectiveMaxEvictRetries(machine)), c.getEffectiveDrainTimeout(machine).Seconds()/PodEvictionRetryInterval.Seconds())) + pvDetachTimeOut = c.safetyOptions.PvDetachTimeout.Duration + timeOutDuration = c.getEffectiveDrainTimeout(machine).Duration + forceDeleteLabelPresent = machine.Labels["force-deletion"] == "True" + ) + + // Timeout value obtained by subtracting last operation with expected time out period + timeOut := metav1.Now().Add(-timeOutDuration).Sub(machine.Status.CurrentStatus.LastUpdateTime.Time) + timeOutOccurred = timeOut > 0 + + if forceDeleteLabelPresent || timeOutOccurred { + // To perform forceful machine drain/delete either one of the below conditions must be satified + // 1. force-deletion: "True" label must be present + // 2. Deletion operation is more than drain-timeout minutes old + forceDeleteMachine = true + forceDeletePods = true + timeOutDuration = 1 * time.Minute + maxEvictRetries = 1 + + klog.V(2).Infof( + "Force deletion has been triggerred for machine %q due to ForceDeletionLabel:%t, Timeout:%t", + machine.Name, + forceDeleteLabelPresent, + timeOutOccurred, + ) + } // If machine was created on the cloud provider machineID, _ := driver.GetExisting() + // update node with the machine's state prior to termination + if nodeName != "" && machineID != "" { + if err = c.UpdateNodeTerminationCondition(machine); err != nil { + if forceDeleteMachine { + klog.Warningf("failed to update node conditions: %v. However, since it's a force deletion shall continue deletion of VM.", err) + } else { + klog.Error(err) + return err + } + } + } + if machine.Status.CurrentStatus.Phase != v1alpha1.MachineTerminating { lastOperation := v1alpha1.LastOperation{ Description: "Deleting machine from cloud provider", @@ -601,38 +643,6 @@ func (c *controller) machineDelete(machine *v1alpha1.Machine, driver driver.Driv if machineID != "" && nodeName != "" { // Begin drain logic only when the nodeName & providerID exist's for the machine - - var ( - forceDeletePods = false - forceDeleteMachine = false - timeOutOccurred = false - maxEvictRetries = int32(math.Min(float64(*c.getEffectiveMaxEvictRetries(machine)), c.getEffectiveDrainTimeout(machine).Seconds()/PodEvictionRetryInterval.Seconds())) - pvDetachTimeOut = c.safetyOptions.PvDetachTimeout.Duration - timeOutDuration = c.getEffectiveDrainTimeout(machine).Duration - forceDeleteLabelPresent = machine.Labels["force-deletion"] == "True" - ) - - // Timeout value obtained by subtracting last operation with expected time out period - timeOut := metav1.Now().Add(-timeOutDuration).Sub(machine.Status.CurrentStatus.LastUpdateTime.Time) - timeOutOccurred = timeOut > 0 - - if forceDeleteLabelPresent || timeOutOccurred { - // To perform forceful machine drain/delete either one of the below conditions must be satified - // 1. force-deletion: "True" label must be present - // 2. Deletion operation is more than drain-timeout minutes old - forceDeleteMachine = true - forceDeletePods = true - timeOutDuration = 1 * time.Minute - maxEvictRetries = 1 - - klog.V(2).Infof( - "Force deletion has been triggerred for machine %q due to ForceDeletionLabel:%t, Timeout:%t", - machine.Name, - forceDeleteLabelPresent, - timeOutOccurred, - ) - } - buf := bytes.NewBuffer([]byte{}) errBuf := bytes.NewBuffer([]byte{}) diff --git a/pkg/controller/machine_util.go b/pkg/controller/machine_util.go index 388622342..c485bdd12 100644 --- a/pkg/controller/machine_util.go +++ b/pkg/controller/machine_util.go @@ -26,6 +26,8 @@ import ( "encoding/json" "github.com/gardener/machine-controller-manager/pkg/apis/machine/validation" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/klog" machineapi "github.com/gardener/machine-controller-manager/pkg/apis/machine" @@ -40,6 +42,12 @@ import ( const ( // LastAppliedALTAnnotation contains the last configuration of annotations, labels & taints applied on the node object LastAppliedALTAnnotation = "node.machine.sapcloud.io/last-applied-anno-labels-taints" + // NodeTerminationCondition describes nodes that are terminating + NodeTerminationCondition = "Termination" + // NodeUnhealthy is a node termination reason for failed machines + NodeUnhealthy = "Unhealthy" + // NodeScaledDown is a node termination reason for healthy deleted machines + NodeScaledDown = "ScaleDown" ) var ( @@ -496,3 +504,51 @@ func SyncMachineTaints( return toBeUpdated } + +func (c *controller) UpdateNodeTerminationCondition(machine *v1alpha1.Machine) error { + if machine.Status.CurrentStatus.Phase == "" { + return nil + } + + nodeName := machine.Status.Node + + terminationCondition := v1.NodeCondition{ + Type: NodeTerminationCondition, + Status: v1.ConditionTrue, + LastHeartbeatTime: metav1.Now(), + LastTransitionTime: metav1.Now(), + } + + // check if condition already exists + cond, err := GetNodeCondition(c.targetCoreClient, nodeName, NodeTerminationCondition) + if err != nil { + if apierrors.IsNotFound(err) { + return nil + } + return err + } + + if cond != nil && machine.Status.CurrentStatus.Phase == v1alpha1.MachineTerminating { + // do not consider machine terminating phase if node already terminating + terminationCondition.Reason = cond.Reason + terminationCondition.Message = cond.Message + } else { + setTerminationReasonByPhase(machine.Status.CurrentStatus.Phase, &terminationCondition) + } + + err = AddOrUpdateConditionsOnNode(c.targetCoreClient, nodeName, terminationCondition) + if apierrors.IsNotFound(err) { + return nil + } + return err +} + +func setTerminationReasonByPhase(phase v1alpha1.MachinePhase, terminationCondition *v1.NodeCondition) { + if phase == v1alpha1.MachineFailed { // if failed, terminated due to health + terminationCondition.Reason = NodeUnhealthy + terminationCondition.Message = "Machine Controller is terminating failed machine" + } else { // in all other cases (except for already terminating): assume scale down + terminationCondition.Reason = NodeScaledDown + terminationCondition.Message = "Machine Controller is scaling down machine" + } +} diff --git a/pkg/controller/machine_util_test.go b/pkg/controller/machine_util_test.go index 0bc35e5d8..4480f5c7a 100644 --- a/pkg/controller/machine_util_test.go +++ b/pkg/controller/machine_util_test.go @@ -1850,4 +1850,318 @@ var _ = Describe("machine_util", func() { ) }) + + Describe("#UpdateNodeTerminationCondition", func() { + + type setup struct { + machine *machinev1.Machine + } + type action struct { + node *corev1.Node + } + type expect struct { + node *corev1.Node + err bool + } + type data struct { + setup setup + action action + expect expect + } + + DescribeTable("##table", + func(data *data) { + stop := make(chan struct{}) + defer close(stop) + + controlObjects := []runtime.Object{} + coreObjects := []runtime.Object{} + + machineObject := data.setup.machine + + nodeObject := data.action.node + coreObjects = append(coreObjects, nodeObject) + controlObjects = append(controlObjects, machineObject) + + c, trackers := createController(stop, testNamespace, controlObjects, nil, coreObjects) + defer trackers.Stop() + waitForCacheSync(stop, c) + + err := c.UpdateNodeTerminationCondition(machineObject) + + waitForCacheSync(stop, c) + + if !data.expect.err { + Expect(err).To(BeNil()) + } else { + Expect(err).To(HaveOccurred()) + } + + updatedNodeObject, _ := c.targetCoreClient.CoreV1().Nodes().Get(nodeObject.Name, metav1.GetOptions{}) + + if data.expect.node != nil { + conditions := data.expect.node.Status.Conditions + if len(conditions) > 0 { + Expect(updatedNodeObject.Status.Conditions[0].Type).Should(Equal(data.expect.node.Status.Conditions[0].Type)) + Expect(updatedNodeObject.Status.Conditions[0].Status).Should(Equal(data.expect.node.Status.Conditions[0].Status)) + Expect(updatedNodeObject.Status.Conditions[0].Reason).Should(Equal(data.expect.node.Status.Conditions[0].Reason)) + } + } + }, + + Entry("when machine phase is failed", &data{ + setup: setup{ + machine: newMachine( + &machinev1.MachineTemplateSpec{}, + &machinev1.MachineStatus{ + Node: "test-node", + CurrentStatus: machinev1.CurrentStatus{Phase: MachineFailed}, + }, + nil, nil, nil), + }, + action: action{ + node: &corev1.Node{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "v1", + Kind: "Node", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node-0", + Annotations: map[string]string{ + "anno1": "anno1", + }, + }, + Spec: corev1.NodeSpec{}, + }, + }, + expect: expect{ + node: &corev1.Node{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "v1", + Kind: "Node", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node-0", + Annotations: map[string]string{ + "anno1": "anno1", + }, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + {Type: NodeTerminationCondition, Status: corev1.ConditionTrue, Reason: NodeUnhealthy}, + }}, + }, + err: false, + }, + }), + + Entry("when machine phase is running", &data{ + setup: setup{ + machine: newMachine( + &machinev1.MachineTemplateSpec{}, + &machinev1.MachineStatus{ + Node: "test-node", + CurrentStatus: machinev1.CurrentStatus{Phase: MachineRunning}, + }, + nil, nil, nil), + }, + action: action{ + node: &corev1.Node{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "v1", + Kind: "Node", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node-0", + Annotations: map[string]string{ + "anno1": "anno1", + }, + }, + Spec: corev1.NodeSpec{}, + }, + }, + expect: expect{ + node: &corev1.Node{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "v1", + Kind: "Node", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node-0", + Annotations: map[string]string{ + "anno1": "anno1", + }, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + {Type: NodeTerminationCondition, Status: corev1.ConditionTrue, Reason: NodeScaledDown}, + }}, + }, + err: false, + }, + }), + + Entry("when machine phase is terminating", &data{ + setup: setup{ + machine: newMachine( + &machinev1.MachineTemplateSpec{}, + &machinev1.MachineStatus{ + Node: "test-node", + CurrentStatus: machinev1.CurrentStatus{Phase: MachineTerminating}, + }, + nil, nil, nil), + }, + action: action{ + node: &corev1.Node{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "v1", + Kind: "Node", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node-0", + Annotations: map[string]string{ + "anno1": "anno1", + }, + }, + Spec: corev1.NodeSpec{}, + }, + }, + expect: expect{ + node: &corev1.Node{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "v1", + Kind: "Node", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node-0", + Annotations: map[string]string{ + "anno1": "anno1", + }, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + {Type: NodeTerminationCondition, Status: corev1.ConditionTrue, Reason: NodeScaledDown}, + }}, + }, + err: false, + }, + }), + + Entry("when machine phase is terminating and condition already exists", &data{ + setup: setup{ + machine: newMachine( + &machinev1.MachineTemplateSpec{}, + &machinev1.MachineStatus{ + Node: "test-node", + CurrentStatus: machinev1.CurrentStatus{Phase: MachineTerminating}, + }, + nil, nil, nil), + }, + action: action{ + node: &corev1.Node{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "v1", + Kind: "Node", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node-0", + Annotations: map[string]string{ + "anno1": "anno1", + }, + }, + Spec: corev1.NodeSpec{}, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + {Type: NodeTerminationCondition, Status: corev1.ConditionTrue, Reason: NodeUnhealthy}, + }}, + }, + }, + expect: expect{ + node: &corev1.Node{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "v1", + Kind: "Node", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node-0", + Annotations: map[string]string{ + "anno1": "anno1", + }, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + {Type: NodeTerminationCondition, Status: corev1.ConditionTrue, Reason: NodeUnhealthy}, + }}, + }, + err: false, + }, + }), + + Entry("when phase changes and condition already exists", &data{ + setup: setup{ + machine: newMachine( + &machinev1.MachineTemplateSpec{}, + &machinev1.MachineStatus{ + Node: "test-node", + CurrentStatus: machinev1.CurrentStatus{Phase: MachineFailed}, + }, + nil, nil, nil), + }, + action: action{ + node: &corev1.Node{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "v1", + Kind: "Node", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node-0", + }, + Spec: corev1.NodeSpec{}, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + {Type: NodeTerminationCondition, Status: corev1.ConditionTrue, Reason: NodeScaledDown}, + }}, + }, + }, + expect: expect{ + node: &corev1.Node{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "v1", + Kind: "Node", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node-0", + Annotations: map[string]string{}, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + {Type: NodeTerminationCondition, Status: corev1.ConditionTrue, Reason: NodeUnhealthy}, + }}, + }, + err: false, + }, + }), + + Entry("when node object does not exist", &data{ + setup: setup{ + machine: newMachine( + &machinev1.MachineTemplateSpec{}, + &machinev1.MachineStatus{ + Node: "test-node", + CurrentStatus: machinev1.CurrentStatus{Phase: MachineTerminating}, + }, + nil, nil, nil), + }, + action: action{ + node: &corev1.Node{}, + }, + expect: expect{ + node: &corev1.Node{}, + err: false, // we should not return error if node-object does not exist to ensure rest of the steps are then executed. + }, + }), + ) + + }) + }) diff --git a/pkg/util/conditions/conditions.go b/pkg/util/conditions/conditions.go new file mode 100644 index 000000000..3d480d433 --- /dev/null +++ b/pkg/util/conditions/conditions.go @@ -0,0 +1,46 @@ +package conditions + +import ( + v1 "k8s.io/api/core/v1" +) + +// CloneAndAddCondition adds condition to the conditions slice if +func CloneAndAddCondition(conditions []v1.NodeCondition, condition v1.NodeCondition) []v1.NodeCondition { + if condition.Type == "" || condition.Status == "" { + return conditions + } + // Clone + var newConditions []v1.NodeCondition + + for _, existingCondition := range conditions { + if existingCondition.Type != condition.Type { // filter out the condition that is being updated + newConditions = append(newConditions, existingCondition) + } else { // condition with this type already exists + if existingCondition.Status == condition.Status && existingCondition.Reason == condition.Reason { + // condition status and reason are the same, keep existing transition time + condition.LastTransitionTime = existingCondition.LastTransitionTime + } + } + } + + newConditions = append(newConditions, condition) + return newConditions +} + +// AddOrUpdateCondition adds a condition to the condition list. Returns a new copy of updated Node +func AddOrUpdateCondition(node *v1.Node, condition v1.NodeCondition) *v1.Node { + newNode := node.DeepCopy() + nodeConditions := newNode.Status.Conditions + newNode.Status.Conditions = CloneAndAddCondition(nodeConditions, condition) + return newNode +} + +// GetNodeCondition returns a condition matching the type from the node's status +func GetNodeCondition(node *v1.Node, conditionType v1.NodeConditionType) *v1.NodeCondition { + for _, cond := range node.Status.Conditions { + if cond.Type == conditionType { + return &cond + } + } + return nil +}