From d49d805feacca6b8747437cca0e10b5a1c46d2a8 Mon Sep 17 00:00:00 2001 From: Andres Morey Date: Fri, 27 Aug 2021 11:07:25 +0300 Subject: [PATCH] Add `label-with-exclude-from-external-lbs` CLI argument to enable graceful removal/addition from external load balancers Previously, kured issued the system reboot command without first removing nodes from any connected external load balancers (ELBs). This behavior caused downtime on restart because ELBs send traffic to kube-proxy pods running on nodes until the ELB health checks fail or the node is de-registered explicitly. This patch solves the problem by adding a command line argument (`label-with-exclude-from-external-lbs`) that, when enabled, adds a "node.kubernetes.io/exclude-from-external-load-balancers" label to nodes undergoing kured reboot. This label tells the Kubernetes control plane to de-register the affected node from any connected ELBs. The node label is removed after restart which causes the control plane to re-register the node with the ELBs. Close https://github.com/weaveworks/kured/issues/358 --- README.md | 63 +++++++++++++++++++------------------- cmd/kured/main.go | 69 ++++++++++++++++++++++++++++++++++++++++++ cmd/kured/main_test.go | 61 +++++++++++++++++++++++++++++++++++++ kured-ds.yaml | 1 + 4 files changed, 163 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 5b3f50b00..0da95680e 100644 --- a/README.md +++ b/README.md @@ -85,37 +85,38 @@ The following arguments can be passed to kured via the daemonset pod template: ```console Flags: - --alert-filter-regexp regexp.Regexp alert names to ignore when checking for active alerts - --alert-firing-only bool only consider firing alerts when checking for active alerts - --blocking-pod-selector stringArray label selector identifying pods whose presence should prevent reboots - --drain-grace-period int time in seconds given to each pod to terminate gracefully, if negative, the default value specified in the pod will be used (default: -1) - --skip-wait-for-delete-timeout int when seconds is greater than zero, skip waiting for the pods whose deletion timestamp is older than N seconds while draining a node (default: 0) - --ds-name string name of daemonset on which to place lock (default "kured") - --ds-namespace string namespace containing daemonset on which to place lock (default "kube-system") - --end-time string schedule reboot only before this time of day (default "23:59:59") - --force-reboot bool force a reboot even if the drain is still running (default: false) - --drain-timeout duration timeout after which the drain is aborted (default: 0, infinite time) - -h, --help help for kured - --lock-annotation string annotation in which to record locking node (default "weave.works/kured-node-lock") - --lock-release-delay duration hold lock after reboot by this duration (default: 0, disabled) - --lock-ttl duration expire lock annotation after this duration (default: 0, disabled) - --message-template-drain string message template used to notify about a node being drained (default "Draining node %s") - --message-template-reboot string message template used to notify about a node being rebooted (default "Rebooting node %s") - --notify-url url for reboot notifications (cannot use with --slack-hook-url flags) - --period duration reboot check period (default 1h0m0s) - --prefer-no-schedule-taint string Taint name applied during pending node reboot (to prevent receiving additional pods from other rebooting nodes). Disabled by default. Set e.g. to "weave.works/kured-node-reboot" to enable tainting. - --prometheus-url string Prometheus instance to probe for active alerts - --reboot-command string command to run when a reboot is required by the sentinel (default "/sbin/systemctl reboot") - --reboot-days strings schedule reboot on these days (default [su,mo,tu,we,th,fr,sa]) - --reboot-delay duration add a delay after drain finishes but before the reboot command is issued (default 0, no time) - --reboot-sentinel string path to file whose existence signals need to reboot (default "/var/run/reboot-required") - --reboot-sentinel-command string command for which a successful run signals need to reboot (default ""). If non-empty, sentinel file will be ignored. - --slack-channel string slack channel for reboot notfications - --slack-hook-url string slack hook URL for reboot notfications [deprecated in favor of --notify-url] - --slack-username string slack username for reboot notfications (default "kured") - --start-time string schedule reboot only after this time of day (default "0:00") - --time-zone string use this timezone for schedule inputs (default "UTC") - --log-format string log format specified as text or json, defaults to "text" + --alert-filter-regexp regexp.Regexp alert names to ignore when checking for active alerts + --alert-firing-only bool only consider firing alerts when checking for active alerts + --blocking-pod-selector stringArray label selector identifying pods whose presence should prevent reboots + --drain-grace-period int time in seconds given to each pod to terminate gracefully, if negative, the default value specified in the pod will be used (default: -1) + --skip-wait-for-delete-timeout int when seconds is greater than zero, skip waiting for the pods whose deletion timestamp is older than N seconds while draining a node (default: 0) + --ds-name string name of daemonset on which to place lock (default "kured") + --ds-namespace string namespace containing daemonset on which to place lock (default "kube-system") + --end-time string schedule reboot only before this time of day (default "23:59:59") + --force-reboot bool force a reboot even if the drain is still running (default: false) + --drain-timeout duration timeout after which the drain is aborted (default: 0, infinite time) + -h, --help help for kured + --label-with-exclude-from-external-lbs add "node.kubernetes.io/exclude-from-external-load-balancers" label to nodes undergoing kured reboot (default: false) + --lock-annotation string annotation in which to record locking node (default "weave.works/kured-node-lock") + --lock-release-delay duration hold lock after reboot by this duration (default: 0, disabled) + --lock-ttl duration expire lock annotation after this duration (default: 0, disabled) + --message-template-drain string message template used to notify about a node being drained (default "Draining node %s") + --message-template-reboot string message template used to notify about a node being rebooted (default "Rebooting node %s") + --notify-url url for reboot notifications (cannot use with --slack-hook-url flags) + --period duration reboot check period (default 1h0m0s) + --prefer-no-schedule-taint string Taint name applied during pending node reboot (to prevent receiving additional pods from other rebooting nodes). Disabled by default. Set e.g. to "weave.works/kured-node-reboot" to enable tainting. + --prometheus-url string Prometheus instance to probe for active alerts + --reboot-command string command to run when a reboot is required by the sentinel (default "/sbin/systemctl reboot") + --reboot-days strings schedule reboot on these days (default [su,mo,tu,we,th,fr,sa]) + --reboot-delay duration add a delay after drain finishes but before the reboot command is issued (default 0, no time) + --reboot-sentinel string path to file whose existence signals need to reboot (default "/var/run/reboot-required") + --reboot-sentinel-command string command for which a successful run signals need to reboot (default ""). If non-empty, sentinel file will be ignored. + --slack-channel string slack channel for reboot notfications + --slack-hook-url string slack hook URL for reboot notfications [deprecated in favor of --notify-url] + --slack-username string slack username for reboot notfications (default "kured") + --start-time string schedule reboot only after this time of day (default "0:00") + --time-zone string use this timezone for schedule inputs (default "UTC") + --log-format string log format specified as text or json, defaults to "text" ``` ### Reboot Sentinel File & Period diff --git a/cmd/kured/main.go b/cmd/kured/main.go index 528dc7669..42134a119 100644 --- a/cmd/kured/main.go +++ b/cmd/kured/main.go @@ -71,6 +71,7 @@ var ( rebootEnd string timezone string annotateNodes bool + labelWithELBX bool // Metrics rebootRequiredGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ @@ -89,6 +90,15 @@ const ( KuredMostRecentRebootNeededAnnotation string = "weave.works/kured-most-recent-reboot-needed" ) +const ( + // ExcludeFromELBsLabelKey is a label key that tells the K8S control plane to exclude a node from external load balancers + ExcludeFromELBsLabelKey = "node.kubernetes.io/exclude-from-external-load-balancers" + // ExcludeFromELBsLabelVal is a label value used to track label placement by kured + ExcludeFromELBsLabelVal = "kured-remove-after-reboot" + // ExcludeFromELBsLabelKeyEscaped is the escaped label key value passed to the Patch() function + ExcludeFromELBsLabelKeyEscaped = "node.kubernetes.io~1exclude-from-external-load-balancers" +) + func init() { prometheus.MustRegister(rebootRequiredGauge) } @@ -164,6 +174,8 @@ func main() { rootCmd.PersistentFlags().BoolVar(&annotateNodes, "annotate-nodes", false, "if set, the annotations 'weave.works/kured-reboot-in-progress' and 'weave.works/kured-most-recent-reboot-needed' will be given to nodes undergoing kured reboots") + rootCmd.PersistentFlags().BoolVar(&labelWithELBX, "label-with-exclude-from-external-lbs", false, + "if set, the label 'node.kubernetes.io/exclude-from-external-load-balancers' will be added to nodes undergoing kured reboots") rootCmd.PersistentFlags().StringVar(&logFormat, "log-format", "text", "use text or json log format") @@ -359,6 +371,54 @@ func release(lock *daemonsetlock.DaemonSetLock) { } } +func enableExcludeFromELBs(client kubernetes.Interface, nodeID string) error { + log.Infof("Adding ExcludeFromELBs label to node") + + // Add ExcludeFromELBs node label + labelPatch := fmt.Sprintf(`[{"op":"add","path":"/metadata/labels/%s","value":"%s" }]`, ExcludeFromELBsLabelKeyEscaped, ExcludeFromELBsLabelVal) + _, err := client.CoreV1().Nodes().Patch(context.Background(), nodeID, types.JSONPatchType, []byte(labelPatch), metav1.PatchOptions{}) + if err != nil { + log.Errorf("Unable to add ExcludeFromELBs label to node: %s", err.Error()) + return err + } + + return nil +} + +func disableExcludeFromELBs(client kubernetes.Interface, nodeID string) error { + ctx := context.Background() + + // Get node + node, err := client.CoreV1().Nodes().Get(ctx, nodeID, metav1.GetOptions{}) + if err != nil { + log.Warnf("Unable to find node: %s", nodeID) + return err + } + + // Check ExcludeFromELBs node label + labelVal, ok := node.Labels[ExcludeFromELBsLabelKey] + if !ok { + return nil + } + + // Different label value found + if labelVal != ExcludeFromELBsLabelVal { + log.Debugf("Found ExcludeFromELBs label on node with value: '%s' (no action taken)", labelVal) + return nil + } + + // Remove ExcludeFromELBs node label + labelPatch := fmt.Sprintf(`[{"op":"remove","path":"/metadata/labels/%s"}]`, ExcludeFromELBsLabelKeyEscaped) + _, err = client.CoreV1().Nodes().Patch(ctx, nodeID, types.JSONPatchType, []byte(labelPatch), metav1.PatchOptions{}) + if err != nil { + log.Errorf("Unable to remove ExcludeFromELBs label from node: %s", err.Error()) + return err + } + + log.Infof("Removed ExcludeFromELBs label from node") + return nil +} + func drain(client *kubernetes.Clientset, node *v1.Node) { nodename := node.GetName() @@ -493,6 +553,9 @@ func rebootAsRequired(nodeID string, rebootCommand []string, sentinelCommand []s log.Fatal(err) } + // Remove ExcludeFromELBs label immediately to allow ELB registration + disableExcludeFromELBs(client, nodeID) + lock := daemonsetlock.New(client, nodeID, dsNamespace, dsName, lockAnnotation) nodeMeta := nodeMeta{} @@ -501,6 +564,7 @@ func rebootAsRequired(nodeID string, rebootCommand []string, sentinelCommand []s if err != nil { log.Fatalf("Error retrieving node object via k8s API: %v", err) } + if !nodeMeta.Unschedulable { uncordon(client, node) } @@ -514,6 +578,7 @@ func rebootAsRequired(nodeID string, rebootCommand []string, sentinelCommand []s deleteNodeAnnotation(client, nodeID, KuredRebootInProgressAnnotation) } } + throttle(releaseDelay) release(lock) } @@ -584,6 +649,10 @@ func rebootAsRequired(nodeID string, rebootCommand []string, sentinelCommand []s continue } + if labelWithELBX { + enableExcludeFromELBs(client, nodeID) + } + drain(client, node) if rebootDelay > 0 { diff --git a/cmd/kured/main_test.go b/cmd/kured/main_test.go index 24e8efb5b..52ccf2e33 100644 --- a/cmd/kured/main_test.go +++ b/cmd/kured/main_test.go @@ -1,6 +1,7 @@ package main import ( + "context" "reflect" "testing" @@ -8,6 +9,9 @@ import ( "github.com/spf13/cobra" "github.com/weaveworks/kured/pkg/alerts" assert "gotest.tools/v3/assert" + k8sv1 "k8s.io/api/core/v1" + k8smetav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + k8sfake "k8s.io/client-go/kubernetes/fake" papi "github.com/prometheus/client_golang/api" ) @@ -233,3 +237,60 @@ func Test_rebootRequired_fatals(t *testing.T) { } } + +func newK8STestClient() *k8sfake.Clientset { + // Init test client and add a node + client := k8sfake.NewSimpleClientset() + + node := &k8sv1.Node{ + ObjectMeta: k8smetav1.ObjectMeta{ + Name: "test-node-id", + Labels: map[string]string{"key1": "val1"}, // seed map so patch `add` operation will work + }, + } + + _, err := client.CoreV1().Nodes().Create(context.TODO(), node, k8smetav1.CreateOptions{}) + if err != nil { + log.Fatal(err.Error()) + } + + return client +} + +func Test_enableExcludeFromELBs(t *testing.T) { + testclient := newK8STestClient() + + // Test that method returns error if node doesn't exist + err := enableExcludeFromELBs(testclient, "doesnt-exist") + assert.Error(t, err, "nodes \"doesnt-exist\" not found") + + // Test that method adds ExcludeFromELBs label if node exists + err = enableExcludeFromELBs(testclient, "test-node-id") + assert.Assert(t, err == nil) + + nodes, err := testclient.CoreV1().Nodes().List(context.TODO(), k8smetav1.ListOptions{}) + labels := nodes.Items[0].Labels + assert.Equal(t, labels[ExcludeFromELBsLabelKey], ExcludeFromELBsLabelVal) +} + +func Test_disableExcludeFromELBs(t *testing.T) { + testclient := newK8STestClient() + + // Test that method returns error if node doesn't exist + err := disableExcludeFromELBs(testclient, "doesnt-exist") + assert.Error(t, err, "nodes \"doesnt-exist\" not found") + + // Test that method executes silently on existing node without pre-existing ExcludeFromELBs label + err = disableExcludeFromELBs(testclient, "test-node-id") + assert.Assert(t, err == nil) + + // Add ExcludeFromELBs label and check that method removes it + enableExcludeFromELBs(testclient, "test-node-id") + err = disableExcludeFromELBs(testclient, "test-node-id") + assert.Assert(t, err == nil) + + nodes, err := testclient.CoreV1().Nodes().List(context.TODO(), k8smetav1.ListOptions{}) + labels := nodes.Items[0].Labels + _, ok := labels[ExcludeFromELBsLabelKey] + assert.Assert(t, !ok) +} diff --git a/kured-ds.yaml b/kured-ds.yaml index 96622de7e..05d93abc8 100644 --- a/kured-ds.yaml +++ b/kured-ds.yaml @@ -74,5 +74,6 @@ spec: # - --end-time=23:59:59 # - --time-zone=UTC # - --annotate-nodes=false +# - --label-with-exclude-from-external-lbs=false # - --lock-release-delay=30m # - --log-format=text