Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Drain node during scale down #183

Merged
merged 13 commits into from
Mar 23, 2022
5 changes: 5 additions & 0 deletions changes/unreleased/Added-20220321-134942.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
kind: Added
body: Scale down will drain active connections before removing pod
time: 2022-03-21T13:49:42.627023537-03:00
custom:
Issue: "183"
2 changes: 1 addition & 1 deletion pkg/controllers/clientroutinglabel_reconcile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
)

var _ = Describe("subscriptionlabel_reconcile", func() {
var _ = Describe("clientroutinglabel_reconcile", func() {
ctx := context.Background()

It("should add label to pods that have at least one shard subscription", func() {
Expand Down
91 changes: 91 additions & 0 deletions pkg/controllers/drainnode_reconcile.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
(c) Copyright [2021-2022] Micro Focus or one of its affiliates.
Licensed under the Apache License, Version 2.0 (the "License");
You may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package controllers

import (
"context"
"fmt"

vapi "github.com/vertica/vertica-kubernetes/api/v1beta1"
"github.com/vertica/vertica-kubernetes/pkg/cmds"
verrors "github.com/vertica/vertica-kubernetes/pkg/errors"
"github.com/vertica/vertica-kubernetes/pkg/events"
"github.com/vertica/vertica-kubernetes/pkg/names"
corev1 "k8s.io/api/core/v1"
ctrl "sigs.k8s.io/controller-runtime"
)

type DrainNodeReconciler struct {
VRec *VerticaDBReconciler
Vdb *vapi.VerticaDB // Vdb is the CRD we are acting on.
PRunner cmds.PodRunner
PFacts *PodFacts
}

func MakeDrainNodeReconciler(vdbrecon *VerticaDBReconciler,
vdb *vapi.VerticaDB, prunner cmds.PodRunner, pfacts *PodFacts) ReconcileActor {
return &DrainNodeReconciler{
VRec: vdbrecon,
Vdb: vdb,
PRunner: prunner,
PFacts: pfacts,
}
}

// Reconcile will wait for active connections to leave in any pod that is marked
// as pending delete. This will drain those pods that we are going to scale
// down before we actually remove them from the cluster.
func (s *DrainNodeReconciler) Reconcile(ctx context.Context, req *ctrl.Request) (ctrl.Result, error) {
if err := s.PFacts.Collect(ctx, s.Vdb); err != nil {
return ctrl.Result{}, err
}

// Note: this reconciler depends on the clien routing reconciler to have run
// and directed traffic away from pending delete pods.
for _, pf := range s.PFacts.Detail {
if pf.pendingDelete && pf.upNode {
if res, err := s.reconcilePod(ctx, pf); verrors.IsReconcileAborted(res, err) {
return res, err
}
}
}

return ctrl.Result{}, nil
}

// reconcilePod will handle drain logic for a single pod
func (s *DrainNodeReconciler) reconcilePod(ctx context.Context, pf *PodFact) (ctrl.Result, error) {
sql := fmt.Sprintf(
"select count(*)"+
" from sessions"+
" where node_name = '%s'"+
" and session_id not in ("+
" select session_id from current_session"+
" )", pf.vnodeName)
cmd := []string{"-tAc", sql}
stdout, _, err := s.PRunner.ExecVSQL(ctx, pf.name, names.ServerContainer, cmd...)
if err != nil {
return ctrl.Result{}, err
}
// If there is an active connection, we will requeue, which causes us to use
// the exponential backoff algorithm.
activeConnections := anyActiveConnections(stdout)
if activeConnections {
s.VRec.EVRec.Eventf(s.Vdb, corev1.EventTypeWarning, events.DrainNodeRetry,
"Pod '%s' has active connections preventing the drain from succeeding", pf.name.Name)
}
return ctrl.Result{Requeue: activeConnections}, nil
}
94 changes: 94 additions & 0 deletions pkg/controllers/drainnode_reconcile_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
(c) Copyright [2021-2022] Micro Focus or one of its affiliates.
Licensed under the Apache License, Version 2.0 (the "License");
You may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package controllers

import (
"context"

. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
vapi "github.com/vertica/vertica-kubernetes/api/v1beta1"
"github.com/vertica/vertica-kubernetes/pkg/cmds"
"github.com/vertica/vertica-kubernetes/pkg/names"
"github.com/vertica/vertica-kubernetes/pkg/test"
ctrl "sigs.k8s.io/controller-runtime"
)

var _ = Describe("drainnode_reconcile", func() {
ctx := context.Background()

It("should query sessions if pod is pending delete", func() {
vdb := vapi.MakeVDB()
vdb.Spec.Subclusters = []vapi.Subcluster{
{Name: "sc1", Size: 2},
}
test.CreatePods(ctx, k8sClient, vdb, test.AllPodsRunning)
defer test.DeletePods(ctx, k8sClient, vdb)
vdb.Spec.Subclusters[0].Size-- // Reduce size to make one pod pending delete
test.CreateVDB(ctx, k8sClient, vdb)
defer test.DeleteVDB(ctx, k8sClient, vdb)

fpr := &cmds.FakePodRunner{}
pfacts := MakePodFacts(k8sClient, fpr)
r := MakeDrainNodeReconciler(vdbRec, vdb, fpr, &pfacts)
Expect(r.Reconcile(ctx, &ctrl.Request{})).Should(Equal(ctrl.Result{}))
cmds := fpr.FindCommands("select count(*) from session")
Expect(len(cmds)).Should(Equal(1))
})

It("should not query sessions if no pod is pending delete", func() {
vdb := vapi.MakeVDB()
vdb.Spec.Subclusters = []vapi.Subcluster{
{Name: "sc1", Size: 2},
}
test.CreatePods(ctx, k8sClient, vdb, test.AllPodsRunning)
defer test.DeletePods(ctx, k8sClient, vdb)
test.CreateVDB(ctx, k8sClient, vdb)
defer test.DeleteVDB(ctx, k8sClient, vdb)

fpr := &cmds.FakePodRunner{}
pfacts := MakePodFacts(k8sClient, fpr)
r := MakeDrainNodeReconciler(vdbRec, vdb, fpr, &pfacts)
Expect(r.Reconcile(ctx, &ctrl.Request{})).Should(Equal(ctrl.Result{}))
cmds := fpr.FindCommands("select count(*) from session")
Expect(len(cmds)).Should(Equal(0))
})

It("should requeue if one pending delete pod has active connections", func() {
vdb := vapi.MakeVDB()
vdb.Spec.Subclusters = []vapi.Subcluster{
{Name: "sc1", Size: 2},
}
test.CreatePods(ctx, k8sClient, vdb, test.AllPodsRunning)
defer test.DeletePods(ctx, k8sClient, vdb)
vdb.Spec.Subclusters[0].Size-- // Reduce size to make one pod pending delete
test.CreateVDB(ctx, k8sClient, vdb)
defer test.DeleteVDB(ctx, k8sClient, vdb)

fpr := &cmds.FakePodRunner{Results: make(cmds.CmdResults)}
pfacts := MakePodFacts(k8sClient, fpr)
Expect(pfacts.Collect(ctx, vdb)).Should(Succeed())
penDelPodName := names.GenPodName(vdb, &vdb.Spec.Subclusters[0], 1)
fpr.Results[penDelPodName] = []cmds.CmdResult{
{Stdout: "10\n"},
}

r := MakeDrainNodeReconciler(vdbRec, vdb, fpr, &pfacts)
Expect(r.Reconcile(ctx, &ctrl.Request{})).Should(Equal(ctrl.Result{Requeue: true}))
cmds := fpr.FindCommands("select count(*) from session")
Expect(len(cmds)).Should(Equal(1))
})
})
6 changes: 3 additions & 3 deletions pkg/controllers/onlineupgrade_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -783,13 +783,13 @@ func (o *OnlineUpgradeReconciler) isSubclusterIdle(ctx context.Context, scName s

// Parse the output. We requeue if there is an active connection. This
// will rely on the UpgradeRequeueTime that is set to default
return ctrl.Result{Requeue: o.doesScHaveActiveConnections(stdout)}, nil
return ctrl.Result{Requeue: anyActiveConnections(stdout)}, nil
}

// doesScHaveActiveConnections will parse the output from vsql to see if there
// anyActiveConnections will parse the output from vsql to see if there
// are any active connections. Returns true if there is at least one
// connection.
func (o *OnlineUpgradeReconciler) doesScHaveActiveConnections(stdout string) bool {
func anyActiveConnections(stdout string) bool {
lines := strings.Split(stdout, "\n")
res := strings.Trim(lines[0], " ")
// As a convience for test, allow empty string to be treated as having no
Expand Down
2 changes: 2 additions & 0 deletions pkg/controllers/verticadb_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ func (r *VerticaDBReconciler) constructActors(log logr.Logger, vdb *vapi.Vertica
// Remove Service label for any pods that are pending delete. This will
// cause the Service object to stop routing traffic to them.
MakeClientRoutingLabelReconciler(r, vdb, pfacts, DelNodeApplyMethod, ""),
// Wait for any nodes that are pending delete with active connections to leave.
MakeDrainNodeReconciler(r, vdb, prunner, pfacts),
// Handles calls to admintools -t db_remove_subcluster
MakeDBRemoveSubclusterReconciler(r, log, vdb, prunner, pfacts),
MakeStatusReconciler(r.Client, r.Scheme, log, vdb, pfacts),
Expand Down
1 change: 1 addition & 0 deletions pkg/events/event.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,5 @@ const (
OperatorUpgrade = "OperatorUpgrade"
InvalidUpgradePath = "InvalidUpgradePath"
RebalanceShards = "RebalanceShards"
DrainNodeRetry = "DrainNodeRetry"
)
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@

set -o errexit

# Some e2e tests run long running vsql connections. This is a helper script to
# wait for that connection to be established.

NAMESPACE=$1
SC_TYPE=$2
POD=test-long-running-connection-$SC_TYPE
POD=$2

while ! kubectl get pod -n $NAMESPACE $POD 2> /dev/null; do sleep 0.1; done
echo "Waiting for pod to be in ready state..."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ apiVersion: kuttl.dev/v1beta1
kind: TestStep
commands:
- command: bash -c "kustomize build long-running-connection-primary/overlay | kubectl -n $NAMESPACE apply -f - "
- command: ./wait-for-connection.sh $NAMESPACE primary
- command: ../../../scripts/wait-for-long-running-connection.sh $NAMESPACE test-long-running-connection-primary
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ apiVersion: kuttl.dev/v1beta1
kind: TestStep
commands:
- command: bash -c "kustomize build long-running-connection-secondary/overlay | kubectl -n $NAMESPACE apply -f - "
- command: ./wait-for-connection.sh $NAMESPACE secondary
- command: ../../../scripts/wait-for-long-running-connection.sh $NAMESPACE test-long-running-connection-secondary
17 changes: 17 additions & 0 deletions tests/e2e/scale-down-drain/05-create-communal-creds.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# (c) Copyright [2021-2022] Micro Focus or one of its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# You may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: kuttl.dev/v1beta1
kind: TestStep
commands:
- script: kustomize build ../../manifests/communal-creds/overlay | kubectl apply -f - --namespace $NAMESPACE
20 changes: 20 additions & 0 deletions tests/e2e/scale-down-drain/10-assert.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# (c) Copyright [2021-2022] Micro Focus or one of its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# You may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: v1
kind: Pod
metadata:
labels:
control-plane: controller-manager
status:
phase: Running
17 changes: 17 additions & 0 deletions tests/e2e/scale-down-drain/10-deploy-operator.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# (c) Copyright [2021-2022] Micro Focus or one of its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# You may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: kuttl.dev/v1beta1
kind: TestStep
commands:
- command: sh -c "cd ../../.. && make deploy-operator NAMESPACE=$NAMESPACE"
21 changes: 21 additions & 0 deletions tests/e2e/scale-down-drain/15-assert.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# (c) Copyright [2021-2022] Micro Focus or one of its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# You may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: vertica.com/v1beta1
kind: VerticaDB
metadata:
name: v-scale-down-drain
status:
installCount: 3
addedToDBCount: 1
upNodeCount: 1
17 changes: 17 additions & 0 deletions tests/e2e/scale-down-drain/15-create-db.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# (c) Copyright [2021-2022] Micro Focus or one of its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# You may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: kuttl.dev/v1beta1
kind: TestStep
commands:
- command: bash -c "kustomize build setup-vdb/overlay | kubectl -n $NAMESPACE apply -f - "
25 changes: 25 additions & 0 deletions tests/e2e/scale-down-drain/20-assert.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# (c) Copyright [2021-2022] Micro Focus or one of its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# You may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: vertica.com/v1beta1
kind: VerticaDB
metadata:
name: v-scale-down-drain
status:
subclusters:
- installCount: 1
addedToDBCount: 1
upNodeCount: 1
- installCount: 2
addedToDBCount: 2
upNodeCount: 2
1 change: 1 addition & 0 deletions tests/e2e/scale-down-drain/20-wait-for-secondary.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Intentionally empty to give this step a name in kuttl
Loading