Skip to content

Commit

Permalink
Fixes issues with machines being force drained
Browse files Browse the repository at this point in the history
The machines were being force drained during race conditions due to PR gardener#492.
This commit fixes that by
	- Make the drain calculation based on deletionTimestamp
	- Reverted logic from PR gardener#492 to first set machine Termination phase
  • Loading branch information
prashanth26 committed Nov 23, 2020
1 parent d27195c commit 3736023
Show file tree
Hide file tree
Showing 7 changed files with 169 additions and 37 deletions.
3 changes: 3 additions & 0 deletions pkg/controller/controller_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,8 @@ func newMachines(
labels = make(map[string]string, 0)
}

currentTime := metav1.Now()

for i := range machines {
m := &v1alpha1.Machine{
TypeMeta: metav1.TypeMeta{
Expand All @@ -324,6 +326,7 @@ func newMachines(
Labels: labels,
Annotations: annotations,
CreationTimestamp: metav1.Now(),
DeletionTimestamp: &currentTime,
},
Spec: *newMachineSpec(&specTemplate.Spec, i),
}
Expand Down
74 changes: 40 additions & 34 deletions pkg/controller/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ import (
"github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1"
"github.com/gardener/machine-controller-manager/pkg/apis/machine/validation"
"github.com/gardener/machine-controller-manager/pkg/driver"
utiltime "github.com/gardener/machine-controller-manager/pkg/util/time"
)

const (
Expand Down Expand Up @@ -624,9 +625,37 @@ func (c *controller) machineDelete(machine *v1alpha1.Machine, driver driver.Driv
forceDeleteLabelPresent = machine.Labels["force-deletion"] == "True"
)

// Timeout value obtained by subtracting last operation with expected time out period
timeOut := metav1.Now().Add(-timeOutDuration).Sub(machine.Status.CurrentStatus.LastUpdateTime.Time)
timeOutOccurred = timeOut > 0
if machine.Status.CurrentStatus.Phase != v1alpha1.MachineTerminating {
lastOperation := v1alpha1.LastOperation{
Description: "Deleting machine from cloud provider",
State: v1alpha1.MachineStateProcessing,
Type: v1alpha1.MachineOperationDelete,
LastUpdateTime: metav1.Now(),
}
currentStatus := v1alpha1.CurrentStatus{
Phase: v1alpha1.MachineTerminating,
TimeoutActive: false,
LastUpdateTime: metav1.Now(),
}

err = c.deleteBootstrapToken(machine.Name)
if err != nil {
klog.Warning(err)
}

machine, err = c.updateMachineStatus(machine, lastOperation, currentStatus)
if err != nil && apierrors.IsNotFound(err) {
// Object no longer exists and has been deleted
klog.Warning(err)
return nil
} else if err != nil {
// Any other type of errors
klog.Error(err)
return err
}
}

timeOutOccurred = utiltime.HasTimeOutOccurred(*machine.DeletionTimestamp, timeOutDuration)

if forceDeleteLabelPresent || timeOutOccurred {
// To perform forceful machine drain/delete either one of the below conditions must be satified
Expand All @@ -638,11 +667,18 @@ func (c *controller) machineDelete(machine *v1alpha1.Machine, driver driver.Driv
maxEvictRetries = 1

klog.V(2).Infof(
"Force deletion has been triggerred for machine %q due to ForceDeletionLabel:%t, Timeout:%t",
"Force delete/drain has been triggerred for machine %q due to Label:%t, timeout:%t",
machine.Name,
forceDeleteLabelPresent,
timeOutOccurred,
)
} else {
klog.V(2).Infof(
"Normal delete/drain has been triggerred for machine %q with drain-timeout:%v & maxEvictRetries:%d",
machine.Name,
timeOutDuration,
maxEvictRetries,
)
}

// If machine was created on the cloud provider
Expand All @@ -660,36 +696,6 @@ func (c *controller) machineDelete(machine *v1alpha1.Machine, driver driver.Driv
}
}

if machine.Status.CurrentStatus.Phase != v1alpha1.MachineTerminating {
lastOperation := v1alpha1.LastOperation{
Description: "Deleting machine from cloud provider",
State: v1alpha1.MachineStateProcessing,
Type: v1alpha1.MachineOperationDelete,
LastUpdateTime: metav1.Now(),
}
currentStatus := v1alpha1.CurrentStatus{
Phase: v1alpha1.MachineTerminating,
TimeoutActive: false,
LastUpdateTime: metav1.Now(),
}

err = c.deleteBootstrapToken(machine.Name)
if err != nil {
klog.Warning(err)
}

machine, err = c.updateMachineStatus(machine, lastOperation, currentStatus)
if err != nil && apierrors.IsNotFound(err) {
// Object no longer exists and has been deleted
klog.Warning(err)
return nil
} else if err != nil {
// Any other type of errors
klog.Error(err)
return err
}
}

if machineID != "" && nodeName != "" {
// Begin drain logic only when the nodeName & providerID exist's for the machine
buf := bytes.NewBuffer([]byte{})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,7 @@ func newMachines(
Labels: labels,
Annotations: annotations,
CreationTimestamp: creationTimestamp,
DeletionTimestamp: &creationTimestamp, //TODO: Add parametrize this
},
Spec: *newMachineSpec(&specTemplate.Spec, i),
}
Expand Down
5 changes: 2 additions & 3 deletions pkg/util/provider/machinecontroller/machine_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ import (
"github.com/gardener/machine-controller-manager/pkg/util/provider/machinecodes/status"
"github.com/gardener/machine-controller-manager/pkg/util/provider/machineutils"
utilstrings "github.com/gardener/machine-controller-manager/pkg/util/strings"
utiltime "github.com/gardener/machine-controller-manager/pkg/util/time"

corev1 "k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -912,9 +913,7 @@ func (c *controller) drainNode(deleteMachineRequest *driver.DeleteMachineRequest
if skipDrain {
state = v1alpha1.MachineStateProcessing
} else {
// Timeout value obtained by subtracting last operation with expected time out period
timeOut := metav1.Now().Add(-timeOutDuration).Sub(machine.Status.CurrentStatus.LastUpdateTime.Time)
timeOutOccurred = timeOut > 0
timeOutOccurred = utiltime.HasTimeOutOccurred(*machine.DeletionTimestamp, timeOutDuration)

if forceDeleteLabelPresent || timeOutOccurred {
// To perform forceful machine drain/delete either one of the below conditions must be satified
Expand Down
33 changes: 33 additions & 0 deletions pkg/util/time/time.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
Copyright (c) 2020 SAP SE or an SAP affiliate company. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

// Package time is used to provide the core functionalities of machine-controller-manager
package time

import (
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/klog"
)

// HasTimeOutOccurred trues true, when time.Now() is more than time + period
func HasTimeOutOccurred(timeStamp metav1.Time, period time.Duration) bool {
// Timeout value obtained by subtracting last operation with expected time out period
klog.Error(timeStamp)
timeOut := metav1.Now().Add(-period).Sub(timeStamp.Time)
return timeOut > 0
}
13 changes: 13 additions & 0 deletions pkg/util/time/time_suite_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package time_test

import (
"testing"

. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)

func TestTime(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Time Suite")
}
77 changes: 77 additions & 0 deletions pkg/util/time/time_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/*
Copyright (c) 2017 SAP SE or an SAP affiliate company. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package time

import (
"time"

. "github.com/onsi/ginkgo"
. "github.com/onsi/ginkgo/extensions/table"
. "github.com/onsi/gomega"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

var _ = Describe("time", func() {
Describe("#hasTimeOutOccurred", func() {
type setup struct {
timeStamp metav1.Time
period time.Duration
}
type action struct {
}
type expect struct {
timeOutOccurred bool
}
type data struct {
setup setup
action action
expect expect
}
DescribeTable("##TimeOut scenarios",
func(data *data) {
timeOutOccurred := HasTimeOutOccurred(data.setup.timeStamp, data.setup.period)
Expect(timeOutOccurred).To(Equal(data.expect.timeOutOccurred))
},
Entry("Time stamp is one hour ago and period is 30mins", &data{
setup: setup{
timeStamp: metav1.Time{Time: time.Now().Add(-time.Hour)},
period: 30 * time.Minute,
},
expect: expect{
timeOutOccurred: true,
},
}),
Entry("Time stamp is one hour ago and period is 90mins", &data{
setup: setup{
timeStamp: metav1.Time{Time: time.Now().Add(-time.Hour)},
period: 90 * time.Minute,
},
expect: expect{
timeOutOccurred: false,
},
}),
Entry("Time stamp is now and peroid is 5mins", &data{
setup: setup{
timeStamp: metav1.Time{Time: time.Now()},
period: 5 * time.Minute,
},
expect: expect{
timeOutOccurred: false,
},
}),
)
})
})

0 comments on commit 3736023

Please sign in to comment.