diff --git a/pkg/controller/controller_suite_test.go b/pkg/controller/controller_suite_test.go index aaff5c131..fa0960182 100644 --- a/pkg/controller/controller_suite_test.go +++ b/pkg/controller/controller_suite_test.go @@ -312,6 +312,8 @@ func newMachines( labels = make(map[string]string, 0) } + currentTime := metav1.Now() + for i := range machines { m := &v1alpha1.Machine{ TypeMeta: metav1.TypeMeta{ @@ -324,6 +326,7 @@ func newMachines( Labels: labels, Annotations: annotations, CreationTimestamp: metav1.Now(), + DeletionTimestamp: ¤tTime, }, Spec: *newMachineSpec(&specTemplate.Spec, i), } diff --git a/pkg/controller/machine.go b/pkg/controller/machine.go index 9e6bb3ae9..e95e9c2d1 100644 --- a/pkg/controller/machine.go +++ b/pkg/controller/machine.go @@ -42,6 +42,7 @@ import ( "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1" "github.com/gardener/machine-controller-manager/pkg/apis/machine/validation" "github.com/gardener/machine-controller-manager/pkg/driver" + utiltime "github.com/gardener/machine-controller-manager/pkg/util/time" ) const ( @@ -624,9 +625,37 @@ func (c *controller) machineDelete(machine *v1alpha1.Machine, driver driver.Driv forceDeleteLabelPresent = machine.Labels["force-deletion"] == "True" ) - // Timeout value obtained by subtracting last operation with expected time out period - timeOut := metav1.Now().Add(-timeOutDuration).Sub(machine.Status.CurrentStatus.LastUpdateTime.Time) - timeOutOccurred = timeOut > 0 + if machine.Status.CurrentStatus.Phase != v1alpha1.MachineTerminating { + lastOperation := v1alpha1.LastOperation{ + Description: "Deleting machine from cloud provider", + State: v1alpha1.MachineStateProcessing, + Type: v1alpha1.MachineOperationDelete, + LastUpdateTime: metav1.Now(), + } + currentStatus := v1alpha1.CurrentStatus{ + Phase: v1alpha1.MachineTerminating, + TimeoutActive: false, + LastUpdateTime: metav1.Now(), + } + + err = c.deleteBootstrapToken(machine.Name) + if err != nil { + klog.Warning(err) + } + + machine, err = c.updateMachineStatus(machine, lastOperation, currentStatus) + if err != nil && apierrors.IsNotFound(err) { + // Object no longer exists and has been deleted + klog.Warning(err) + return nil + } else if err != nil { + // Any other type of errors + klog.Error(err) + return err + } + } + + timeOutOccurred = utiltime.HasTimeOutOccurred(*machine.DeletionTimestamp, timeOutDuration) if forceDeleteLabelPresent || timeOutOccurred { // To perform forceful machine drain/delete either one of the below conditions must be satified @@ -638,11 +667,18 @@ func (c *controller) machineDelete(machine *v1alpha1.Machine, driver driver.Driv maxEvictRetries = 1 klog.V(2).Infof( - "Force deletion has been triggerred for machine %q due to ForceDeletionLabel:%t, Timeout:%t", + "Force delete/drain has been triggerred for machine %q due to Label:%t, timeout:%t", machine.Name, forceDeleteLabelPresent, timeOutOccurred, ) + } else { + klog.V(2).Infof( + "Normal delete/drain has been triggerred for machine %q with drain-timeout:%v & maxEvictRetries:%d", + machine.Name, + timeOutDuration, + maxEvictRetries, + ) } // If machine was created on the cloud provider @@ -660,36 +696,6 @@ func (c *controller) machineDelete(machine *v1alpha1.Machine, driver driver.Driv } } - if machine.Status.CurrentStatus.Phase != v1alpha1.MachineTerminating { - lastOperation := v1alpha1.LastOperation{ - Description: "Deleting machine from cloud provider", - State: v1alpha1.MachineStateProcessing, - Type: v1alpha1.MachineOperationDelete, - LastUpdateTime: metav1.Now(), - } - currentStatus := v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachineTerminating, - TimeoutActive: false, - LastUpdateTime: metav1.Now(), - } - - err = c.deleteBootstrapToken(machine.Name) - if err != nil { - klog.Warning(err) - } - - machine, err = c.updateMachineStatus(machine, lastOperation, currentStatus) - if err != nil && apierrors.IsNotFound(err) { - // Object no longer exists and has been deleted - klog.Warning(err) - return nil - } else if err != nil { - // Any other type of errors - klog.Error(err) - return err - } - } - if machineID != "" && nodeName != "" { // Begin drain logic only when the nodeName & providerID exist's for the machine buf := bytes.NewBuffer([]byte{}) diff --git a/pkg/util/provider/machinecontroller/controller_suite_test.go b/pkg/util/provider/machinecontroller/controller_suite_test.go index 586731e88..cc587a998 100644 --- a/pkg/util/provider/machinecontroller/controller_suite_test.go +++ b/pkg/util/provider/machinecontroller/controller_suite_test.go @@ -335,6 +335,7 @@ func newMachines( Labels: labels, Annotations: annotations, CreationTimestamp: creationTimestamp, + DeletionTimestamp: &creationTimestamp, //TODO: Add parametrize this }, Spec: *newMachineSpec(&specTemplate.Spec, i), } diff --git a/pkg/util/provider/machinecontroller/machine_test.go b/pkg/util/provider/machinecontroller/machine_test.go index 1bec6fec1..1d6c56bbd 100644 --- a/pkg/util/provider/machinecontroller/machine_test.go +++ b/pkg/util/provider/machinecontroller/machine_test.go @@ -1684,6 +1684,115 @@ var _ = Describe("machine", func() { ), }, }), + Entry("Drain machine failure before drain timeout, hence deletion fails", &data{ + setup: setup{ + secrets: []*corev1.Secret{ + { + ObjectMeta: *newObjectMeta(objMeta, 0), + }, + }, + machineClasses: []*v1alpha1.MachineClass{ + { + ObjectMeta: *newObjectMeta(objMeta, 0), + SecretRef: newSecretReference(objMeta, 0), + }, + }, + machines: newMachines( + 1, + &v1alpha1.MachineTemplateSpec{ + ObjectMeta: *newObjectMeta(objMeta, 0), + Spec: v1alpha1.MachineSpec{ + Class: v1alpha1.ClassSpec{ + Kind: "MachineClass", + Name: "machine-0", + }, + ProviderID: "fakeID", + }, + }, + &v1alpha1.MachineStatus{ + Node: "fakeNode", + CurrentStatus: v1alpha1.CurrentStatus{ + Phase: v1alpha1.MachineTerminating, + LastUpdateTime: metav1.NewTime(time.Now().Add(-3 * time.Minute)), + }, + LastOperation: v1alpha1.LastOperation{ + Description: machineutils.InitiateDrain, + State: v1alpha1.MachineStateProcessing, + Type: v1alpha1.MachineOperationDelete, + LastUpdateTime: metav1.NewTime(time.Now().Add(-3 * time.Minute)), + }, + }, + nil, + map[string]string{ + machineutils.MachinePriority: "3", + }, + map[string]string{ + "node": "fakeID-0", + }, + true, + metav1.NewTime(time.Now().Add(-3*time.Minute)), + ), + nodes: []*corev1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakeID-0", + }, + }, + }, + fakeResourceActions: &customfake.ResourceActions{ + Node: customfake.Actions{ + Update: "Failed to update node", + }, + }, + }, + action: action{ + machine: "machine-0", + fakeDriver: &driver.FakeDriver{ + VMExists: true, + ProviderID: "fakeID-0", + NodeName: "fakeNode-0", + Err: nil, + }, + }, + expect: expect{ + err: fmt.Errorf("failed to create update conditions for node \"fakeID-0\": Failed to update node"), + retry: machineutils.ShortRetry, + machine: newMachine( + &v1alpha1.MachineTemplateSpec{ + ObjectMeta: *newObjectMeta(objMeta, 0), + Spec: v1alpha1.MachineSpec{ + Class: v1alpha1.ClassSpec{ + Kind: "MachineClass", + Name: "machine-0", + }, + ProviderID: "fakeID", + }, + }, + &v1alpha1.MachineStatus{ + Node: "fakeNode", + CurrentStatus: v1alpha1.CurrentStatus{ + Phase: v1alpha1.MachineTerminating, + LastUpdateTime: metav1.Now(), + }, + LastOperation: v1alpha1.LastOperation{ + Description: fmt.Sprintf("Drain failed due to failure in update of node conditions - %s. Will retry in next sync. %s", "failed to create update conditions for node \"fakeID-0\": Failed to update node", machineutils.InitiateDrain), + State: v1alpha1.MachineStateFailed, + Type: v1alpha1.MachineOperationDelete, + LastUpdateTime: metav1.Now(), + }, + }, + nil, + map[string]string{ + machineutils.MachinePriority: "3", + }, + map[string]string{ + "node": "fakeID-0", + }, + true, + metav1.Now(), + ), + }, + }), Entry("Drain machine failure after drain timeout, hence deletion continues", &data{ setup: setup{ secrets: []*corev1.Secret{ @@ -1730,7 +1839,7 @@ var _ = Describe("machine", func() { "node": "fakeID-0", }, true, - metav1.Now(), + metav1.NewTime(time.Now().Add(-3*time.Hour)), ), nodes: []*corev1.Node{ { diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 6efa84c8a..fce97052c 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -40,6 +40,7 @@ import ( "github.com/gardener/machine-controller-manager/pkg/util/provider/machinecodes/status" "github.com/gardener/machine-controller-manager/pkg/util/provider/machineutils" utilstrings "github.com/gardener/machine-controller-manager/pkg/util/strings" + utiltime "github.com/gardener/machine-controller-manager/pkg/util/time" corev1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1" @@ -912,9 +913,7 @@ func (c *controller) drainNode(deleteMachineRequest *driver.DeleteMachineRequest if skipDrain { state = v1alpha1.MachineStateProcessing } else { - // Timeout value obtained by subtracting last operation with expected time out period - timeOut := metav1.Now().Add(-timeOutDuration).Sub(machine.Status.CurrentStatus.LastUpdateTime.Time) - timeOutOccurred = timeOut > 0 + timeOutOccurred = utiltime.HasTimeOutOccurred(*machine.DeletionTimestamp, timeOutDuration) if forceDeleteLabelPresent || timeOutOccurred { // To perform forceful machine drain/delete either one of the below conditions must be satified diff --git a/pkg/util/time/time.go b/pkg/util/time/time.go new file mode 100644 index 000000000..21a5fe7f9 --- /dev/null +++ b/pkg/util/time/time.go @@ -0,0 +1,31 @@ +/* +Copyright (c) 2020 SAP SE or an SAP affiliate company. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package time is used to provide the core functionalities of machine-controller-manager +package time + +import ( + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// HasTimeOutOccurred returns true, when time.Now() is more than time + period +func HasTimeOutOccurred(timeStamp metav1.Time, period time.Duration) bool { + // Timeout value obtained by subtracting last operation with expected time out period + timeOut := metav1.Now().Add(-period).Sub(timeStamp.Time) + return timeOut > 0 +} diff --git a/pkg/util/time/time_suite_test.go b/pkg/util/time/time_suite_test.go new file mode 100644 index 000000000..69f1ac6f7 --- /dev/null +++ b/pkg/util/time/time_suite_test.go @@ -0,0 +1,13 @@ +package time_test + +import ( + "testing" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" +) + +func TestTime(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Time Suite") +} diff --git a/pkg/util/time/time_test.go b/pkg/util/time/time_test.go new file mode 100644 index 000000000..d64a69a72 --- /dev/null +++ b/pkg/util/time/time_test.go @@ -0,0 +1,77 @@ +/* +Copyright (c) 2017 SAP SE or an SAP affiliate company. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package time + +import ( + "time" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/ginkgo/extensions/table" + . "github.com/onsi/gomega" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +var _ = Describe("time", func() { + Describe("#hasTimeOutOccurred", func() { + type setup struct { + timeStamp metav1.Time + period time.Duration + } + type action struct { + } + type expect struct { + timeOutOccurred bool + } + type data struct { + setup setup + action action + expect expect + } + DescribeTable("##TimeOut scenarios", + func(data *data) { + timeOutOccurred := HasTimeOutOccurred(data.setup.timeStamp, data.setup.period) + Expect(timeOutOccurred).To(Equal(data.expect.timeOutOccurred)) + }, + Entry("Time stamp is one hour ago and period is 30mins", &data{ + setup: setup{ + timeStamp: metav1.Time{Time: time.Now().Add(-time.Hour)}, + period: 30 * time.Minute, + }, + expect: expect{ + timeOutOccurred: true, + }, + }), + Entry("Time stamp is one hour ago and period is 90mins", &data{ + setup: setup{ + timeStamp: metav1.Time{Time: time.Now().Add(-time.Hour)}, + period: 90 * time.Minute, + }, + expect: expect{ + timeOutOccurred: false, + }, + }), + Entry("Time stamp is now and period is 5mins", &data{ + setup: setup{ + timeStamp: metav1.Time{Time: time.Now()}, + period: 5 * time.Minute, + }, + expect: expect{ + timeOutOccurred: false, + }, + }), + ) + }) +})