Skip to content

Commit

Permalink
Annotate node with machine state leading to termination
Browse files Browse the repository at this point in the history
  • Loading branch information
guydc committed Aug 2, 2020
1 parent 8f36522 commit 0a93d9a
Show file tree
Hide file tree
Showing 5 changed files with 324 additions and 11 deletions.
55 changes: 51 additions & 4 deletions pkg/controller/controller_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (

"github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1"
machineapi "github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/typed/machine/v1alpha1"
annotationutils "github.com/gardener/machine-controller-manager/pkg/util/annotations"
hashutil "github.com/gardener/machine-controller-manager/pkg/util/hash"
taintutils "github.com/gardener/machine-controller-manager/pkg/util/taints"
v1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -88,8 +89,8 @@ const (
SlowStartInitialBatchSize = 1
)

// UpdateTaintBackoff is the backoff period used while updating taint
var UpdateTaintBackoff = wait.Backoff{
// UpdateNodeBackoff is the backoff period used while updating nodes
var UpdateNodeBackoff = wait.Backoff{
Steps: 5,
Duration: 100 * time.Millisecond,
Jitter: 1.0,
Expand Down Expand Up @@ -901,7 +902,7 @@ func AddOrUpdateTaintOnNode(c clientset.Interface, nodeName string, taints ...*v
return nil
}
firstTry := true
return clientretry.RetryOnConflict(UpdateTaintBackoff, func() error {
return clientretry.RetryOnConflict(UpdateNodeBackoff, func() error {
var err error
var oldNode *v1.Node
// First we try getting node from the API server cache, as it's cheaper. If it fails
Expand Down Expand Up @@ -958,7 +959,7 @@ func RemoveTaintOffNode(c clientset.Interface, nodeName string, node *v1.Node, t
}

firstTry := true
return clientretry.RetryOnConflict(UpdateTaintBackoff, func() error {
return clientretry.RetryOnConflict(UpdateNodeBackoff, func() error {
var err error
var oldNode *v1.Node
// First we try getting node from the API server cache, as it's cheaper. If it fails
Expand Down Expand Up @@ -1032,6 +1033,52 @@ func UpdateNodeTaints(c clientset.Interface, nodeName string, oldNode *v1.Node,
return nil
}

// AddOrUpdateAnnotationsOnNode add annotations to the node. If annotations was added into node, it'll issue API calls
// to update nodes; otherwise, no API calls. Return error if any.
func AddOrUpdateAnnotationsOnNode(c clientset.Interface, nodeName string, annotations map[string]string) error {
if annotations == nil {
return nil
}
firstTry := true
return clientretry.RetryOnConflict(UpdateNodeBackoff, func() error {
var err error
var oldNode *v1.Node
if firstTry {
oldNode, err = c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{ResourceVersion: "0"})
firstTry = false
} else {
oldNode, err = c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
}

if err != nil {
return err
}

var newNode *v1.Node
oldNodeCopy := oldNode
for key, value := range annotations {
curNewNode := annotationutils.AddOrUpdateAnnotation(oldNodeCopy, key, value)
newNode = curNewNode
oldNodeCopy = curNewNode
}
return UpdateNodeAnnotations(c, nodeName, oldNode, newNode)
})
}

// UpdateNodeAnnotations is for updating the node annotations from oldNode to the newNode
// using the nodes Update() method
func UpdateNodeAnnotations(c clientset.Interface, nodeName string, oldNode *v1.Node, newNode *v1.Node) error {
newNodeClone := oldNode.DeepCopy()
newNodeClone.ObjectMeta.Annotations = newNode.ObjectMeta.Annotations

_, err := c.CoreV1().Nodes().Update(newNodeClone)
if err != nil {
return fmt.Errorf("failed to create update annotations for node %q: %v", nodeName, err)
}

return nil
}

// WaitForCacheSync is a wrapper around cache.WaitForCacheSync that generates log messages
// indicating that the controller identified by controllerName is waiting for syncs, followed by
// either a successful or failed sync.
Expand Down
25 changes: 18 additions & 7 deletions pkg/controller/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,18 @@ func (c *controller) machineDelete(machine *v1alpha1.Machine, driver driver.Driv
// If machine was created on the cloud provider
machineID, _ := driver.GetExisting()

forceDeleteLabelPresent := machine.Labels["force-deletion"] == "True"

// update node with the machine's state prior to termination
if err = c.AnnotateTerminatingMachineNode(machine); err != nil {
if forceDeleteLabelPresent {
klog.Warningf("Annotation of node failed: %v. However, since it's a force deletion shall continue deletion of VM.", err)
} else {
klog.Error(err)
return err
}
}

if machine.Status.CurrentStatus.Phase != v1alpha1.MachineTerminating {
lastOperation := v1alpha1.LastOperation{
Description: "Deleting machine from cloud provider",
Expand Down Expand Up @@ -603,13 +615,12 @@ func (c *controller) machineDelete(machine *v1alpha1.Machine, driver driver.Driv
// Begin drain logic only when the nodeName & providerID exist's for the machine

var (
forceDeletePods = false
forceDeleteMachine = false
timeOutOccurred = false
maxEvictRetries = int32(math.Min(float64(*c.getEffectiveMaxEvictRetries(machine)), c.getEffectiveDrainTimeout(machine).Seconds()/PodEvictionRetryInterval.Seconds()))
pvDetachTimeOut = c.safetyOptions.PvDetachTimeout.Duration
timeOutDuration = c.getEffectiveDrainTimeout(machine).Duration
forceDeleteLabelPresent = machine.Labels["force-deletion"] == "True"
forceDeletePods = false
forceDeleteMachine = false
timeOutOccurred = false
maxEvictRetries = int32(math.Min(float64(*c.getEffectiveMaxEvictRetries(machine)), c.getEffectiveDrainTimeout(machine).Seconds()/PodEvictionRetryInterval.Seconds()))
pvDetachTimeOut = c.safetyOptions.PvDetachTimeout.Duration
timeOutDuration = c.getEffectiveDrainTimeout(machine).Duration
)

// Timeout value obtained by subtracting last operation with expected time out period
Expand Down
22 changes: 22 additions & 0 deletions pkg/controller/machine_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@ package controller

import (
"encoding/json"
"fmt"

"github.com/gardener/machine-controller-manager/pkg/apis/machine/validation"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/klog"

machineapi "github.com/gardener/machine-controller-manager/pkg/apis/machine"
Expand All @@ -40,6 +42,8 @@ import (
const (
// LastAppliedALTAnnotation contains the last configuration of annotations, labels & taints applied on the node object
LastAppliedALTAnnotation = "node.machine.sapcloud.io/last-applied-anno-labels-taints"
// NodeTerminationAnnotation contains the machine's phase before it was terminated
NodeTerminationAnnotation = "node.machine.sapcloud.io/termination-phase"
)

var (
Expand Down Expand Up @@ -496,3 +500,21 @@ func SyncMachineTaints(

return toBeUpdated
}

func (c *controller) AnnotateTerminatingMachineNode(machine *v1alpha1.Machine) error {
if machine.Status.CurrentStatus.Phase == "" || machine.Status.CurrentStatus.Phase == v1alpha1.MachineTerminating {
return nil
}

nodeName := machine.Status.Node
if machine.Status.CurrentStatus.Phase == "" {
return nil
}

annotations := map[string]string{NodeTerminationAnnotation: fmt.Sprintf("%v", machine.Status.CurrentStatus.Phase)}
err := AddOrUpdateAnnotationsOnNode(c.targetCoreClient, nodeName, annotations)
if apierrors.IsNotFound(err) {
return nil
}
return err
}
203 changes: 203 additions & 0 deletions pkg/controller/machine_util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1850,4 +1850,207 @@ var _ = Describe("machine_util", func() {
)

})

Describe("#AnnotateTerminatingMachineNode", func() {

type setup struct {
machine *machinev1.Machine
}
type action struct {
node *corev1.Node
}
type expect struct {
node *corev1.Node
err bool
}
type data struct {
setup setup
action action
expect expect
}

DescribeTable("##table",
func(data *data) {
stop := make(chan struct{})
defer close(stop)

controlObjects := []runtime.Object{}
coreObjects := []runtime.Object{}

machineObject := data.setup.machine

nodeObject := data.action.node
coreObjects = append(coreObjects, nodeObject)
controlObjects = append(controlObjects, machineObject)

c, trackers := createController(stop, testNamespace, controlObjects, nil, coreObjects)
defer trackers.Stop()
waitForCacheSync(stop, c)

err := c.AnnotateTerminatingMachineNode(machineObject)

waitForCacheSync(stop, c)

if !data.expect.err {
Expect(err).To(BeNil())
} else {
Expect(err).To(HaveOccurred())
}

updatedNodeObject, _ := c.targetCoreClient.CoreV1().Nodes().Get(nodeObject.Name, metav1.GetOptions{})

if data.expect.node != nil {
Expect(updatedNodeObject.Annotations).Should(Equal(data.expect.node.Annotations))
}
},

Entry("when machine phase is failed", &data{
setup: setup{
machine: newMachine(
&machinev1.MachineTemplateSpec{},
&machinev1.MachineStatus{
Node: "test-node",
CurrentStatus: machinev1.CurrentStatus{Phase: MachineFailed},
},
nil, nil, nil),
},
action: action{
node: &corev1.Node{
TypeMeta: metav1.TypeMeta{
APIVersion: "v1",
Kind: "Node",
},
ObjectMeta: metav1.ObjectMeta{
Name: "test-node-0",
Annotations: map[string]string{
"anno1": "anno1",
},
},
Spec: corev1.NodeSpec{},
},
},
expect: expect{
node: &corev1.Node{
TypeMeta: metav1.TypeMeta{
APIVersion: "v1",
Kind: "Node",
},
ObjectMeta: metav1.ObjectMeta{
Name: "test-node-0",
Annotations: map[string]string{
"anno1": "anno1",
NodeTerminationAnnotation: "Failed",
},
},
},
err: false,
},
}),

Entry("when machine phase is terminating", &data{
setup: setup{
machine: newMachine(
&machinev1.MachineTemplateSpec{},
&machinev1.MachineStatus{
Node: "test-node",
CurrentStatus: machinev1.CurrentStatus{Phase: MachineTerminating},
},
nil, nil, nil),
},
action: action{
node: &corev1.Node{
TypeMeta: metav1.TypeMeta{
APIVersion: "v1",
Kind: "Node",
},
ObjectMeta: metav1.ObjectMeta{
Name: "test-node-0",
Annotations: map[string]string{
"anno1": "anno1",
},
},
Spec: corev1.NodeSpec{},
},
},
expect: expect{
node: &corev1.Node{
TypeMeta: metav1.TypeMeta{
APIVersion: "v1",
Kind: "Node",
},
ObjectMeta: metav1.ObjectMeta{
Name: "test-node-0",
Annotations: map[string]string{
"anno1": "anno1",
},
},
},
err: false,
},
}),

Entry("when annotation already exists", &data{
setup: setup{
machine: newMachine(
&machinev1.MachineTemplateSpec{},
&machinev1.MachineStatus{
Node: "test-node",
CurrentStatus: machinev1.CurrentStatus{Phase: MachineRunning},
},
nil, nil, nil),
},
action: action{
node: &corev1.Node{
TypeMeta: metav1.TypeMeta{
APIVersion: "v1",
Kind: "Node",
},
ObjectMeta: metav1.ObjectMeta{
Name: "test-node-0",
Annotations: map[string]string{
NodeTerminationAnnotation: "Failed",
},
},
Spec: corev1.NodeSpec{},
},
},
expect: expect{
node: &corev1.Node{
TypeMeta: metav1.TypeMeta{
APIVersion: "v1",
Kind: "Node",
},
ObjectMeta: metav1.ObjectMeta{
Name: "test-node-0",
Annotations: map[string]string{
NodeTerminationAnnotation: "Running",
},
},
},
err: false,
},
}),

Entry("when node object does not exist", &data{
setup: setup{
machine: newMachine(
&machinev1.MachineTemplateSpec{},
&machinev1.MachineStatus{
Node: "test-node",
CurrentStatus: machinev1.CurrentStatus{Phase: MachineTerminating},
},
nil, nil, nil),
},
action: action{
node: &corev1.Node{},
},
expect: expect{
node: &corev1.Node{},
err: false, // we should not return error if node-object does not exist to ensure rest of the steps are then executed.
},
}),
)

})

})
Loading

0 comments on commit 0a93d9a

Please sign in to comment.