Skip to content

Commit

Permalink
Set autopilot.min_quorum
Browse files Browse the repository at this point in the history
  • Loading branch information
lkysow committed Nov 23, 2023
1 parent e8b4e85 commit 96ef1d1
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 3 deletions.
22 changes: 22 additions & 0 deletions .changelog/3000.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,25 @@
```release-note:breaking-change
server: set `leave_on_terminate` to `true` and set the server pod disruption budget `maxUnavailable` to `1`.

This change makes server rollouts faster and more reliable. However, there is now a potential for reduced reliability if users accidentally
scale the statefulset down. Now servers will leave the raft pool when they are stopped gracefully which reduces the fault
tolerance. For example, with 5 servers, you can tolerate a loss of 2 servers' data as raft guarantees data is replicated to
a majority of nodes (3). However, if you accidentally scale the statefulset down to 3, then the raft quorum will now be 2, and
if you lose 2 servers, you may lose data. Before this change, the quorum would have remained at 3.

During a regular rollout, the number of servers will be reduced by 1 at a time, which doesn't affect quorum when running
an odd number of servers, e.g. quorum for 5 servers is 3, and quorum for 4 servers is also 3. That's why the pod disruption
budget is being set to 1 now.

If a server is stopped ungracefully, e.g. due to a node loss, it will not leave the raft pool, and so fault tolerance won't be affected.

For the vast majority of users, this change will be beneficial, however if you wish to remain with the old settings you
can set:

server:
extraConfig: |
{"leave_on_terminate": false}
disruptionBudget:
maxUnavailable: <previous setting>

```
2 changes: 1 addition & 1 deletion acceptance/tests/server/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,6 @@ func TestServerRestart(t *testing.T) {

require.NoError(t, unmarshallErrs, "there were some json unmarshall errors, this is likely a bug")

logger.Logf(t, "restart took %s, there were %d seconds without quorum", time.Now().Sub(start), noQuorumCount)
logger.Logf(t, "restart took %s, there were %d seconds without quorum", time.Since(start), noQuorumCount)
require.Equal(t, 0, noQuorumCount, "there was %d seconds without quorum", noQuorumCount)
}
4 changes: 4 additions & 0 deletions charts/consul/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,10 @@ but if it's reduced by more than 1, the quorum size can change so that's why thi
{{- end -}}
{{- end -}}
{{- define "consul.server.autopilotMinQuorum" -}}
{{- add (div (int .Values.server.replicas) 2) 1 -}}
{{- end -}}
{{- define "consul.pdb.connectInject.maxUnavailable" -}}
{{- if eq (int .Values.connectInject.replicas) 1 -}}
{{ 0 }}
Expand Down
5 changes: 4 additions & 1 deletion charts/consul/templates/server-config-configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ data:
},
{{- end }}
"server": true,
"leave_on_terminate": true
"leave_on_terminate": true,
"autopilot": {
"min_quorum": {{ template "consul.server.autopilotMinQuorum" . }}
}
}
{{- $vaultConnectCAEnabled := and .Values.global.secretsBackend.vault.connectCA.address .Values.global.secretsBackend.vault.connectCA.rootPKIPath .Values.global.secretsBackend.vault.connectCA.intermediatePKIPath -}}
{{- if and .Values.global.secretsBackend.vault.enabled $vaultConnectCAEnabled }}
Expand Down
60 changes: 59 additions & 1 deletion charts/consul/test/unit/server-config-configmap.bats
Original file line number Diff line number Diff line change
Expand Up @@ -1317,4 +1317,62 @@ load _helpers
yq -r '.data["server.json"]' | jq -r .log_level | tee /dev/stderr)

[ "${configmap}" = "DEBUG" ]
}
}

#--------------------------------------------------------------------
# server.autopilot.min_quorum

@test "server/ConfigMap: autopilot.min_quorum=1 when replicas=1" {
cd `chart_dir`
local actual=$(helm template \
-s templates/server-config-configmap.yaml \
--set 'server.replicas=1' \
. | tee /dev/stderr |
yq -r '.data["server.json"]' | jq -r .autopilot.min_quorum | tee /dev/stderr)

[ "${actual}" = "1" ]
}

@test "server/ConfigMap: autopilot.min_quorum=2 when replicas=2" {
cd `chart_dir`
local actual=$(helm template \
-s templates/server-config-configmap.yaml \
--set 'server.replicas=2' \
. | tee /dev/stderr |
yq -r '.data["server.json"]' | jq -r .autopilot.min_quorum | tee /dev/stderr)

[ "${actual}" = "2" ]
}

@test "server/ConfigMap: autopilot.min_quorum=2 when replicas=3" {
cd `chart_dir`
local actual=$(helm template \
-s templates/server-config-configmap.yaml \
--set 'server.replicas=3' \
. | tee /dev/stderr |
yq -r '.data["server.json"]' | jq -r .autopilot.min_quorum | tee /dev/stderr)

[ "${actual}" = "2" ]
}

@test "server/ConfigMap: autopilot.min_quorum=3 when replicas=4" {
cd `chart_dir`
local actual=$(helm template \
-s templates/server-config-configmap.yaml \
--set 'server.replicas=4' \
. | tee /dev/stderr |
yq -r '.data["server.json"]' | jq -r .autopilot.min_quorum | tee /dev/stderr)

[ "${actual}" = "3" ]
}

@test "server/ConfigMap: autopilot.min_quorum=3 when replicas=5" {
cd `chart_dir`
local actual=$(helm template \
-s templates/server-config-configmap.yaml \
--set 'server.replicas=5' \
. | tee /dev/stderr |
yq -r '.data["server.json"]' | jq -r .autopilot.min_quorum | tee /dev/stderr)

[ "${actual}" = "3" ]
}

0 comments on commit 96ef1d1

Please sign in to comment.