diff --git a/.changelog/2962.txt b/.changelog/2962.txt index f707e4828f..f4550c2781 100644 --- a/.changelog/2962.txt +++ b/.changelog/2962.txt @@ -1,3 +1,3 @@ -```releast-note:feature +```release-note:feature api-gateway: (Consul Enterprise) Add JWT authentication and authorization for API Gateway and HTTPRoutes. ``` diff --git a/.changelog/3000.txt b/.changelog/3000.txt new file mode 100644 index 0000000000..1e6be21f78 --- /dev/null +++ b/.changelog/3000.txt @@ -0,0 +1,36 @@ +```release-note:breaking-change +server: set `leave_on_terminate` to `true` and set the server pod disruption budget `maxUnavailable` to `1`. + +This change makes server rollouts faster and more reliable. However, there is now a potential for reduced reliability if users accidentally +scale the statefulset down. Now servers will leave the raft pool when they are stopped gracefully which reduces the fault +tolerance. For example, with 5 servers, you can tolerate a loss of 2 servers' data as raft guarantees data is replicated to +a majority of nodes (3). However, if you accidentally scale the statefulset down to 3, then the raft quorum will now be 2, and +if you lose 2 servers, you may lose data. Before this change, the quorum would have remained at 3. + +During a regular rollout, the number of servers will be reduced by 1 at a time, which doesn't affect quorum when running +an odd number of servers, e.g. quorum for 5 servers is 3, and quorum for 4 servers is also 3. That's why the pod disruption +budget is being set to 1 now. + +If a server is stopped ungracefully, e.g. due to a node loss, it will not leave the raft pool, and so fault tolerance won't be affected. + +For the vast majority of users, this change will be beneficial, however if you wish to remain with the old settings you +can set: + + server: + extraConfig: | + {"leave_on_terminate": false} + disruptionBudget: + maxUnavailable: + +``` + +```release-note:breaking-change +server: set `autopilot.min_quorum` to the correct quorum value to ensure autopilot doesn't prune servers needed for quorum. Also set `autopilot. disable_upgrade_migration` to `true` as that setting is meant for blue/green deploys, not rolling deploys. + +This setting makes sense for most use-cases, however if you had a specific reason to use the old settings you can use the following config to keep them: + + server: + extraConfig: | + {"autopilot": {"min_quorum": 0, "disable_upgrade_migration": false}} + +``` diff --git a/acceptance/ci-inputs/aks_acceptance_test_packages.yaml b/acceptance/ci-inputs/aks_acceptance_test_packages.yaml index 9b36143c6b..b2874fd373 100644 --- a/acceptance/ci-inputs/aks_acceptance_test_packages.yaml +++ b/acceptance/ci-inputs/aks_acceptance_test_packages.yaml @@ -4,4 +4,4 @@ # Cloud package is not included in test suite as it is triggered from a non consul-k8s repo and requires HCP credentials - {runner: 0, test-packages: "connect peering snapshot-agent wan-federation"} - {runner: 1, test-packages: "consul-dns example partitions metrics sync"} -- {runner: 2, test-packages: "basic cli config-entries api-gateway ingress-gateway terminating-gateway vault"} \ No newline at end of file +- {runner: 2, test-packages: "basic cli config-entries api-gateway ingress-gateway terminating-gateway vault server"} \ No newline at end of file diff --git a/acceptance/ci-inputs/eks_acceptance_test_packages.yaml b/acceptance/ci-inputs/eks_acceptance_test_packages.yaml index 9b36143c6b..b2874fd373 100644 --- a/acceptance/ci-inputs/eks_acceptance_test_packages.yaml +++ b/acceptance/ci-inputs/eks_acceptance_test_packages.yaml @@ -4,4 +4,4 @@ # Cloud package is not included in test suite as it is triggered from a non consul-k8s repo and requires HCP credentials - {runner: 0, test-packages: "connect peering snapshot-agent wan-federation"} - {runner: 1, test-packages: "consul-dns example partitions metrics sync"} -- {runner: 2, test-packages: "basic cli config-entries api-gateway ingress-gateway terminating-gateway vault"} \ No newline at end of file +- {runner: 2, test-packages: "basic cli config-entries api-gateway ingress-gateway terminating-gateway vault server"} \ No newline at end of file diff --git a/acceptance/ci-inputs/gke_acceptance_test_packages.yaml b/acceptance/ci-inputs/gke_acceptance_test_packages.yaml index 9b36143c6b..b2874fd373 100644 --- a/acceptance/ci-inputs/gke_acceptance_test_packages.yaml +++ b/acceptance/ci-inputs/gke_acceptance_test_packages.yaml @@ -4,4 +4,4 @@ # Cloud package is not included in test suite as it is triggered from a non consul-k8s repo and requires HCP credentials - {runner: 0, test-packages: "connect peering snapshot-agent wan-federation"} - {runner: 1, test-packages: "consul-dns example partitions metrics sync"} -- {runner: 2, test-packages: "basic cli config-entries api-gateway ingress-gateway terminating-gateway vault"} \ No newline at end of file +- {runner: 2, test-packages: "basic cli config-entries api-gateway ingress-gateway terminating-gateway vault server"} \ No newline at end of file diff --git a/acceptance/ci-inputs/kind_acceptance_test_packages.yaml b/acceptance/ci-inputs/kind_acceptance_test_packages.yaml index 7e670fd0aa..a4e09abd9c 100644 --- a/acceptance/ci-inputs/kind_acceptance_test_packages.yaml +++ b/acceptance/ci-inputs/kind_acceptance_test_packages.yaml @@ -6,7 +6,7 @@ - {runner: 1, test-packages: "peering"} - {runner: 2, test-packages: "sameness"} - {runner: 3, test-packages: "connect snapshot-agent wan-federation"} -- {runner: 4, test-packages: "cli vault metrics"} +- {runner: 4, test-packages: "cli vault metrics server"} - {runner: 5, test-packages: "api-gateway ingress-gateway sync example consul-dns"} - {runner: 6, test-packages: "config-entries terminating-gateway basic"} - {runner: 7, test-packages: "mesh_v2 tenancy_v2"} diff --git a/acceptance/tests/example/main_test.go b/acceptance/tests/example/main_test.go index f92fff8a59..e35893a1d3 100644 --- a/acceptance/tests/example/main_test.go +++ b/acceptance/tests/example/main_test.go @@ -2,6 +2,7 @@ // SPDX-License-Identifier: MPL-2.0 // Rename package to your test package. +// NOTE: Remember to add your test package to acceptance/ci-inputs so it gets run in CI. package example import ( diff --git a/acceptance/tests/server/main_test.go b/acceptance/tests/server/main_test.go new file mode 100644 index 0000000000..497df9dca2 --- /dev/null +++ b/acceptance/tests/server/main_test.go @@ -0,0 +1,18 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package server + +import ( + "os" + "testing" + + testsuite "github.com/hashicorp/consul-k8s/acceptance/framework/suite" +) + +var suite testsuite.Suite + +func TestMain(m *testing.M) { + suite = testsuite.NewSuite(m) + os.Exit(suite.Run()) +} diff --git a/acceptance/tests/server/server_test.go b/acceptance/tests/server/server_test.go new file mode 100644 index 0000000000..5511671935 --- /dev/null +++ b/acceptance/tests/server/server_test.go @@ -0,0 +1,91 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package server + +import ( + "encoding/json" + "fmt" + "testing" + "time" + + "github.com/hashicorp/consul-k8s/acceptance/framework/consul" + "github.com/hashicorp/consul-k8s/acceptance/framework/helpers" + "github.com/hashicorp/consul-k8s/acceptance/framework/k8s" + "github.com/hashicorp/consul-k8s/acceptance/framework/logger" + "github.com/hashicorp/go-multierror" + "github.com/stretchr/testify/require" +) + +// Test that when servers are restarted, they don't lose leadership. +func TestServerRestart(t *testing.T) { + cfg := suite.Config() + if cfg.EnableCNI || cfg.EnableTransparentProxy { + t.Skipf("skipping because -enable-cni or -enable-transparent-proxy is set and server restart " + + "is already tested without those settings and those settings don't affect this test") + } + + ctx := suite.Environment().DefaultContext(t) + replicas := 3 + releaseName := helpers.RandomName() + helmValues := map[string]string{ + "global.enabled": "false", + "connectInject.enabled": "false", + "server.enabled": "true", + "server.replicas": fmt.Sprintf("%d", replicas), + "server.affinity": "null", // Allow >1 pods per node so we can test in minikube with one node. + } + consulCluster := consul.NewHelmCluster(t, helmValues, suite.Environment().DefaultContext(t), suite.Config(), releaseName) + consulCluster.Create(t) + + // Start a separate goroutine to check if at any point more than one server is without + // a leader. We expect the server that is restarting to be without a leader because it hasn't + // yet joined the cluster but the other servers should have a leader. + expReadyPods := replicas - 1 + var unmarshallErrs error + timesWithoutLeader := 0 + done := make(chan bool) + defer close(done) + go func() { + for { + select { + case <-done: + return + default: + out, err := k8s.RunKubectlAndGetOutputE(t, ctx.KubectlOptions(t), "get", fmt.Sprintf("statefulset/%s-consul-server", releaseName), + "-o", "jsonpath={.status}") + if err != nil { + // Not failing the test on this error to reduce flakiness. + logger.Logf(t, "kubectl err: %s: %s", err, out) + break + } + type statefulsetOut struct { + ReadyReplicas *int `json:"readyReplicas,omitempty"` + } + var jsonOut statefulsetOut + if err = json.Unmarshal([]byte(out), &jsonOut); err != nil { + unmarshallErrs = multierror.Append(err) + } else if jsonOut.ReadyReplicas == nil || *jsonOut.ReadyReplicas < expReadyPods { + // note: for some k8s api reason when readyReplicas is 0 it's not included in the json output so + // that's why we're checking if it's nil. + timesWithoutLeader++ + } + time.Sleep(1 * time.Second) + } + } + }() + + // Restart servers + out, err := k8s.RunKubectlAndGetOutputE(t, ctx.KubectlOptions(t), "rollout", "restart", fmt.Sprintf("statefulset/%s-consul-server", releaseName)) + require.NoError(t, err, out) + + // Wait for restart to finish. + start := time.Now() + out, err = k8s.RunKubectlAndGetOutputE(t, ctx.KubectlOptions(t), "rollout", "status", "--timeout", "5m", "--watch", fmt.Sprintf("statefulset/%s-consul-server", releaseName)) + require.NoError(t, err, out, "rollout status command errored, this likely means the rollout didn't complete in time") + + // Check results + require.NoError(t, unmarshallErrs, "there were some json unmarshall errors, this is likely a bug") + logger.Logf(t, "restart took %s, there were %d instances where more than one server had no leader", time.Since(start), timesWithoutLeader) + require.Equal(t, 0, timesWithoutLeader, "there were %d instances where more than one server had no leader", timesWithoutLeader) +} diff --git a/charts/consul/templates/_helpers.tpl b/charts/consul/templates/_helpers.tpl index decb40319c..b78987f908 100644 --- a/charts/consul/templates/_helpers.tpl +++ b/charts/consul/templates/_helpers.tpl @@ -166,24 +166,27 @@ Expand the name of the chart. {{- end -}} {{/* -Compute the maximum number of unavailable replicas for the PodDisruptionBudget. -This defaults to (n/2)-1 where n is the number of members of the server cluster. -Special case of replica equaling 3 and allowing a minor disruption of 1 otherwise -use the integer value -Add a special case for replicas=1, where it should default to 0 as well. +Calculate max number of server pods that are allowed to be voluntarily disrupted. +When there's 1 server, this is set to 0 because this pod should not be disrupted. This is an edge +case and I'm not sure it makes a difference when there's only one server but that's what the previous config was and +I don't want to change it for this edge case. +Otherwise we've changed this to always be 1 as part of the move to set leave_on_terminate +to true. With leave_on_terminate set to true, whenever a server pod is stopped, the number of peers in raft +is reduced. If the number of servers is odd and the count is reduced by 1, the quorum size doesn't change, +but if it's reduced by more than 1, the quorum size can change so that's why this is now always hardcoded to 1. */}} -{{- define "consul.pdb.maxUnavailable" -}} +{{- define "consul.server.pdb.maxUnavailable" -}} {{- if eq (int .Values.server.replicas) 1 -}} {{ 0 }} {{- else if .Values.server.disruptionBudget.maxUnavailable -}} {{ .Values.server.disruptionBudget.maxUnavailable -}} {{- else -}} -{{- if eq (int .Values.server.replicas) 3 -}} -{{- 1 -}} -{{- else -}} -{{- sub (div (int .Values.server.replicas) 2) 1 -}} +{{ 1 }} {{- end -}} {{- end -}} + +{{- define "consul.server.autopilotMinQuorum" -}} +{{- add (div (int .Values.server.replicas) 2) 1 -}} {{- end -}} {{- define "consul.pdb.connectInject.maxUnavailable" -}} diff --git a/charts/consul/templates/server-config-configmap.yaml b/charts/consul/templates/server-config-configmap.yaml index 8cd726f445..4ce79794b2 100644 --- a/charts/consul/templates/server-config-configmap.yaml +++ b/charts/consul/templates/server-config-configmap.yaml @@ -56,7 +56,12 @@ data: "enabled": true }, {{- end }} - "server": true + "server": true, + "leave_on_terminate": true, + "autopilot": { + "min_quorum": {{ template "consul.server.autopilotMinQuorum" . }}, + "disable_upgrade_migration": true + } } {{- $vaultConnectCAEnabled := and .Values.global.secretsBackend.vault.connectCA.address .Values.global.secretsBackend.vault.connectCA.rootPKIPath .Values.global.secretsBackend.vault.connectCA.intermediatePKIPath -}} {{- if and .Values.global.secretsBackend.vault.enabled $vaultConnectCAEnabled }} diff --git a/charts/consul/templates/server-disruptionbudget.yaml b/charts/consul/templates/server-disruptionbudget.yaml index edf9c1c57f..56805edc2a 100644 --- a/charts/consul/templates/server-disruptionbudget.yaml +++ b/charts/consul/templates/server-disruptionbudget.yaml @@ -17,7 +17,7 @@ metadata: release: {{ .Release.Name }} component: server spec: - maxUnavailable: {{ template "consul.pdb.maxUnavailable" . }} + maxUnavailable: {{ template "consul.server.pdb.maxUnavailable" . }} selector: matchLabels: app: {{ template "consul.name" . }} diff --git a/charts/consul/test/unit/server-config-configmap.bats b/charts/consul/test/unit/server-config-configmap.bats index 7ed7f57a9c..53f20a91f6 100755 --- a/charts/consul/test/unit/server-config-configmap.bats +++ b/charts/consul/test/unit/server-config-configmap.bats @@ -1307,4 +1307,62 @@ load _helpers yq -r '.data["server.json"]' | jq -r .log_level | tee /dev/stderr) [ "${configmap}" = "DEBUG" ] -} \ No newline at end of file +} + +#-------------------------------------------------------------------- +# server.autopilot.min_quorum + +@test "server/ConfigMap: autopilot.min_quorum=1 when replicas=1" { + cd `chart_dir` + local actual=$(helm template \ + -s templates/server-config-configmap.yaml \ + --set 'server.replicas=1' \ + . | tee /dev/stderr | + yq -r '.data["server.json"]' | jq -r .autopilot.min_quorum | tee /dev/stderr) + + [ "${actual}" = "1" ] +} + +@test "server/ConfigMap: autopilot.min_quorum=2 when replicas=2" { + cd `chart_dir` + local actual=$(helm template \ + -s templates/server-config-configmap.yaml \ + --set 'server.replicas=2' \ + . | tee /dev/stderr | + yq -r '.data["server.json"]' | jq -r .autopilot.min_quorum | tee /dev/stderr) + + [ "${actual}" = "2" ] +} + +@test "server/ConfigMap: autopilot.min_quorum=2 when replicas=3" { + cd `chart_dir` + local actual=$(helm template \ + -s templates/server-config-configmap.yaml \ + --set 'server.replicas=3' \ + . | tee /dev/stderr | + yq -r '.data["server.json"]' | jq -r .autopilot.min_quorum | tee /dev/stderr) + + [ "${actual}" = "2" ] +} + +@test "server/ConfigMap: autopilot.min_quorum=3 when replicas=4" { + cd `chart_dir` + local actual=$(helm template \ + -s templates/server-config-configmap.yaml \ + --set 'server.replicas=4' \ + . | tee /dev/stderr | + yq -r '.data["server.json"]' | jq -r .autopilot.min_quorum | tee /dev/stderr) + + [ "${actual}" = "3" ] +} + +@test "server/ConfigMap: autopilot.min_quorum=3 when replicas=5" { + cd `chart_dir` + local actual=$(helm template \ + -s templates/server-config-configmap.yaml \ + --set 'server.replicas=5' \ + . | tee /dev/stderr | + yq -r '.data["server.json"]' | jq -r .autopilot.min_quorum | tee /dev/stderr) + + [ "${actual}" = "3" ] +} diff --git a/charts/consul/test/unit/server-disruptionbudget.bats b/charts/consul/test/unit/server-disruptionbudget.bats index eb076ac775..5d30d8b628 100755 --- a/charts/consul/test/unit/server-disruptionbudget.bats +++ b/charts/consul/test/unit/server-disruptionbudget.bats @@ -59,6 +59,16 @@ load _helpers [ "${actual}" = "0" ] } +@test "server/DisruptionBudget: correct maxUnavailable with replicas=2" { + cd `chart_dir` + local actual=$(helm template \ + -s templates/server-disruptionbudget.yaml \ + --set 'server.replicas=2' \ + . | tee /dev/stderr | + yq '.spec.maxUnavailable' | tee /dev/stderr) + [ "${actual}" = "1" ] +} + @test "server/DisruptionBudget: correct maxUnavailable with replicas=3" { cd `chart_dir` local actual=$(helm template \ @@ -97,7 +107,7 @@ load _helpers --set 'server.replicas=6' \ . | tee /dev/stderr | yq '.spec.maxUnavailable' | tee /dev/stderr) - [ "${actual}" = "2" ] + [ "${actual}" = "1" ] } @test "server/DisruptionBudget: correct maxUnavailable with replicas=7" { @@ -107,7 +117,7 @@ load _helpers --set 'server.replicas=7' \ . | tee /dev/stderr | yq '.spec.maxUnavailable' | tee /dev/stderr) - [ "${actual}" = "2" ] + [ "${actual}" = "1" ] } @test "server/DisruptionBudget: correct maxUnavailable with replicas=8" { @@ -117,9 +127,21 @@ load _helpers --set 'server.replicas=8' \ . | tee /dev/stderr | yq '.spec.maxUnavailable' | tee /dev/stderr) - [ "${actual}" = "3" ] + [ "${actual}" = "1" ] +} + +@test "server/DisruptionBudget: correct maxUnavailable when set with value" { + cd `chart_dir` + local actual=$(helm template \ + -s templates/server-disruptionbudget.yaml \ + --set 'server.replicas=5' \ + --set 'server.disruptionBudget.maxUnavailable=5' \ + . | tee /dev/stderr | + yq '.spec.maxUnavailable' | tee /dev/stderr) + [ "${actual}" = "5" ] } + #-------------------------------------------------------------------- # apiVersion diff --git a/charts/consul/test/unit/server-statefulset.bats b/charts/consul/test/unit/server-statefulset.bats index f4702af827..388d4dc986 100755 --- a/charts/consul/test/unit/server-statefulset.bats +++ b/charts/consul/test/unit/server-statefulset.bats @@ -792,7 +792,7 @@ load _helpers -s templates/server-statefulset.yaml \ . | tee /dev/stderr | yq -r '.spec.template.metadata.annotations."consul.hashicorp.com/config-checksum"' | tee /dev/stderr) - [ "${actual}" = 0e599137f8357c786d46e1b694d7d867c541cb34d6056241a037afd0de14866b ] + [ "${actual}" = 59777d0692f3bf9143a09e96e58cc95296c0eaeb7565b1c49bc932954fe4423a ] } @test "server/StatefulSet: adds config-checksum annotation when extraConfig is provided" { @@ -802,7 +802,7 @@ load _helpers --set 'server.extraConfig="{\"hello\": \"world\"}"' \ . | tee /dev/stderr | yq -r '.spec.template.metadata.annotations."consul.hashicorp.com/config-checksum"' | tee /dev/stderr) - [ "${actual}" = 3f54c51be3473d7ae4cb91c24ba03263b7700d9a3dc3196f624ce3c6c8e93b8f ] + [ "${actual}" = 326eeb3fd8f497297671791c0091e7f804727b3aaeff79a47841b65571bd4cf9 ] } @test "server/StatefulSet: adds config-checksum annotation when config is updated" { @@ -812,7 +812,7 @@ load _helpers --set 'global.acls.manageSystemACLs=true' \ . | tee /dev/stderr | yq -r '.spec.template.metadata.annotations."consul.hashicorp.com/config-checksum"' | tee /dev/stderr) - [ "${actual}" = b44c82c9e4732433f54eeed8a299f11de0bad82a920047c8a3ad039e512ba281 ] + [ "${actual}" = e14ba19e37a6b79b6fd8566597464154abe9cdc4aa7079012fb3c445ac08c526 ] } #-------------------------------------------------------------------- diff --git a/charts/consul/values.yaml b/charts/consul/values.yaml index 47b875d3e3..4df6ebc569 100644 --- a/charts/consul/values.yaml +++ b/charts/consul/values.yaml @@ -1005,8 +1005,14 @@ server: # the server cluster is enabled. To disable, set to `false`. enabled: true - # The maximum number of unavailable pods. By default, this will be - # automatically computed based on the `server.replicas` value to be `(n/2)-1`. + # The maximum number of unavailable pods. In most cases you should not change this as it is automatically set to + # the correct number when left as null. This setting has been kept to not break backwards compatibility. + # + # By default, this is set to 1 internally in the chart. When server pods are stopped gracefully, they leave the Raft + # consensus pool. When running an odd number of servers, one server leaving the pool does not change the quorum + # size, and so fault tolerance is not affected. However, if more than one server were to leave the pool, the quorum + # size would change. That's why this is set to 1 internally and should not be changed in most cases. + # # If you need to set this to `0`, you will need to add a # --set 'server.disruptionBudget.maxUnavailable=0'` flag to the helm chart installation # command because of a limitation in the Helm templating language.