Skip to content

Commit

Permalink
check for taints on low nodes and honor them
Browse files Browse the repository at this point in the history
  • Loading branch information
jw-s committed Feb 21, 2020
1 parent 137b9b7 commit cddcd24
Show file tree
Hide file tree
Showing 2 changed files with 212 additions and 7 deletions.
37 changes: 31 additions & 6 deletions pkg/descheduler/strategies/lownodeutilization.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@ package strategies
import (
"sort"

"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog"

helperUtil "k8s.io/kubernetes/pkg/apis/core/v1/helper"
"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
Expand Down Expand Up @@ -161,7 +163,9 @@ func evictPodsFromTargetNodes(client clientset.Interface, evictionPolicyGroupVer

// upper bound on total number of pods/cpu/memory to be moved
var totalPods, totalCPU, totalMem float64
var taintsOfLowNodes = make(map[string][]v1.Taint, len(lowNodes))
for _, node := range lowNodes {
taintsOfLowNodes[node.node.Name] = node.node.Spec.Taints
nodeCapacity := node.node.Status.Capacity
if len(node.node.Status.Allocatable) > 0 {
nodeCapacity = node.node.Status.Allocatable
Expand Down Expand Up @@ -202,18 +206,18 @@ func evictPodsFromTargetNodes(client clientset.Interface, evictionPolicyGroupVer

// sort the evictable Pods based on priority. This also sorts them based on QoS. If there are multiple pods with same priority, they are sorted based on QoS tiers.
sortPodsBasedOnPriority(evictablePods)
evictPods(evictablePods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict)
evictPods(evictablePods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict, taintsOfLowNodes)
} else {
// TODO: Remove this when we support only priority.
// Falling back to evicting pods based on priority.
klog.V(1).Infof("Evicting pods based on QoS")
klog.V(1).Infof("There are %v non-evictable pods on the node", len(node.nonRemovablePods))
// evict best effort pods
evictPods(node.bePods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict)
evictPods(node.bePods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict, taintsOfLowNodes)
// evict burstable pods
evictPods(node.bPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict)
evictPods(node.bPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict, taintsOfLowNodes)
// evict guaranteed pods
evictPods(node.gPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict)
evictPods(node.gPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict, taintsOfLowNodes)
}
nodepodCount[node.node] = currentPodsEvicted
podsEvicted = podsEvicted + nodepodCount[node.node]
Expand All @@ -232,15 +236,24 @@ func evictPods(inputPods []*v1.Pod,
totalCPU *float64,
totalMem *float64,
podsEvicted *int,
dryRun bool, maxPodsToEvict int) {
dryRun bool,
maxPodsToEvict int,
taintsOfLowNodes map[string][]v1.Taint) {
if IsNodeAboveTargetUtilization(nodeUsage, targetThresholds) && (*totalPods > 0 || *totalCPU > 0 || *totalMem > 0) {
onePodPercentage := api.Percentage((float64(1) * 100) / float64(nodeCapacity.Pods().Value()))
for _, pod := range inputPods {
if maxPodsToEvict > 0 && *podsEvicted+1 > maxPodsToEvict {
break
}

if !podToleratesTaints(pod, taintsOfLowNodes) {
klog.V(3).Infof("Skipping eviction for Pod: %#v, doesn't tolerate node taint", pod.Name)
continue
}

cUsage := utils.GetResourceRequest(pod, v1.ResourceCPU)
mUsage := utils.GetResourceRequest(pod, v1.ResourceMemory)

success, err := evictions.EvictPod(client, pod, evictionPolicyGroupVersion, dryRun)
if !success {
klog.Warningf("Error when evicting pod: %#v (%#v)", pod.Name, err)
Expand Down Expand Up @@ -269,6 +282,18 @@ func evictPods(inputPods []*v1.Pod,
}
}

// podToleratesTaints returns true if a pod tolerates one node's taints
func podToleratesTaints(pod *v1.Pod, taintsOfNodes map[string][]v1.Taint) bool {
for nodeName, taintsForNode := range taintsOfNodes {
if len(pod.Spec.Tolerations) >= len(taintsForNode) && helperUtil.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, taintsForNode, nil) {
return true
}
klog.V(5).Infof("pod: %#v doesn't tolerate node %s's taints", pod.Name, nodeName)
}

return false
}

func SortNodesByUsage(nodes []NodeUsageMap) {
sort.Slice(nodes, func(i, j int) bool {
var ti, tj api.Percentage
Expand Down
182 changes: 181 additions & 1 deletion pkg/descheduler/strategies/lownodeutilization_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,18 @@ import (

"reflect"

"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
serializer "k8s.io/apimachinery/pkg/runtime/serializer"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/apis/componentconfig"
"sigs.k8s.io/descheduler/pkg/utils"
"sigs.k8s.io/descheduler/test"
)
Expand Down Expand Up @@ -327,3 +333,177 @@ func TestValidateThresholds(t *testing.T) {
}
}
}

func newFake(objects ...runtime.Object) *core.Fake {
scheme := runtime.NewScheme()
codecs := serializer.NewCodecFactory(scheme)
fake.AddToScheme(scheme)
o := core.NewObjectTracker(scheme, codecs.UniversalDecoder())
for _, obj := range objects {
if err := o.Add(obj); err != nil {
panic(err)
}
}

fakePtr := core.Fake{}
fakePtr.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
objs, err := o.List(
schema.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"},
schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Pod"},
action.GetNamespace(),
)
if err != nil {
return true, nil, err
}

obj := &v1.PodList{
Items: []v1.Pod{},
}
for _, pod := range objs.(*v1.PodList).Items {
podFieldSet := fields.Set(map[string]string{
"spec.nodeName": pod.Spec.NodeName,
"status.phase": string(pod.Status.Phase),
})
match := action.(core.ListAction).GetListRestrictions().Fields.Matches(podFieldSet)
if !match {
continue
}
obj.Items = append(obj.Items, *pod.DeepCopy())
}
return true, obj, nil
})
fakePtr.AddReactor("*", "*", core.ObjectReaction(o))
fakePtr.AddWatchReactor("*", core.DefaultWatchReactor(watch.NewFake(), nil))

return &fakePtr
}

func TestWithTaints(t *testing.T) {
strategy := api.DeschedulerStrategy{
Enabled: true,
Params: api.StrategyParameters{
NodeResourceUtilizationThresholds: api.NodeResourceUtilizationThresholds{
Thresholds: api.ResourceThresholds{
v1.ResourcePods: 20,
},
TargetThresholds: api.ResourceThresholds{
v1.ResourcePods: 70,
},
},
},
}

n1 := test.BuildTestNode("n1", 1000, 3000, 10)
n2 := test.BuildTestNode("n2", 1000, 3000, 10)
n3 := test.BuildTestNode("n3", 1000, 3000, 10)
n3withTaints := n3.DeepCopy()
n3withTaints.Spec.Taints = []v1.Taint{
{
Key: "key",
Value: "value",
Effect: v1.TaintEffectNoSchedule,
},
}

podThatToleratesTaint := test.BuildTestPod("tolerate_pod", 200, 0, n1.Name)
podThatToleratesTaint.Spec.Tolerations = []v1.Toleration{
{
Key: "key",
Value: "value",
},
}

tests := []struct {
nodes []*v1.Node
pods []*v1.Pod
evictionsExpected int
}{
{
nodes: []*v1.Node{n1, n2, n3},
pods: []*v1.Pod{
//Node 1 pods
test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_2_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_3_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_4_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_5_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_6_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_7_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_8_%s", n1.Name), 200, 0, n1.Name),
// Node 2 pods
test.BuildTestPod(fmt.Sprintf("pod_9_%s", n1.Name), 200, 0, n2.Name),
},
evictionsExpected: 1,
},
{
nodes: []*v1.Node{n1, n3withTaints},
pods: []*v1.Pod{
//Node 1 pods
test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_2_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_3_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_4_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_5_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_6_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_7_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_8_%s", n1.Name), 200, 0, n1.Name),
// Node 3 pods
test.BuildTestPod(fmt.Sprintf("pod_9_%s", n3withTaints.Name), 200, 0, n3withTaints.Name),
},
evictionsExpected: 0,
},
{
nodes: []*v1.Node{n1, n3withTaints},
pods: []*v1.Pod{
//Node 1 pods
test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_2_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_3_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_4_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_5_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_6_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_7_%s", n1.Name), 200, 0, n1.Name),
podThatToleratesTaint,
// Node 3 pods
test.BuildTestPod(fmt.Sprintf("pod_9_%s", n3withTaints.Name), 200, 0, n3withTaints.Name),
},
evictionsExpected: 1,
},
}

for _, item := range tests {
var objs []runtime.Object
for _, node := range item.nodes {
objs = append(objs, node)
}

for _, pod := range item.pods {
pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
objs = append(objs, pod)
}

fakePtr := newFake(objs...)
var evictionCounter int
fakePtr.PrependReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) {
if action.GetSubresource() != "eviction" || action.GetResource().Resource != "pods" {
return false, nil, nil
}
evictionCounter++
return true, nil, nil
})

ds := &options.DeschedulerServer{
Client: &fake.Clientset{Fake: *fakePtr},
DeschedulerConfiguration: componentconfig.DeschedulerConfiguration{
EvictLocalStoragePods: false,
},
}

nodePodCount := InitializeNodePodCount(item.nodes)
LowNodeUtilization(ds, strategy, "policy/v1", item.nodes, nodePodCount)

if item.evictionsExpected != evictionCounter {
t.Errorf("Expected %v evictions, got %v", item.evictionsExpected, evictionCounter)
}
}
}

0 comments on commit cddcd24

Please sign in to comment.