Skip to content

Commit

Permalink
Fix quorum check when recovering broken etcd cluster
Browse files Browse the repository at this point in the history
  • Loading branch information
floryut committed Oct 26, 2021
1 parent c2d4822 commit 062f587
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 2 deletions.
3 changes: 3 additions & 0 deletions roles/etcd/tasks/join_etcd_member.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@
register: etcd_member_in_cluster
changed_when: false
check_mode: no
retries: "{{ etcd_retries }}"
delay: "{{ retry_stagger | random + 3 }}"
until: etcd_member_in_cluster.rc == 0
tags:
- facts
environment:
Expand Down
3 changes: 1 addition & 2 deletions roles/recover_control_plane/etcd/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,9 @@
when:
- groups['broken_etcd']

# When there is an error, everything is printed in stderr_lines, even "is healthy" messages.
- name: Set has_quorum fact
set_fact:
has_quorum: "{{ etcd_endpoint_health.stderr_lines | select('match', '.*is healthy.*') | list | length >= etcd_endpoint_health.stderr_lines | select('match', '.*is unhealthy.*') | list | length }}"
has_quorum: "{{ etcd_endpoint_health.stdout_lines | select('match', '.*is healthy.*') | list | length >= etcd_endpoint_health.stderr_lines | select('match', '.*is unhealthy.*') | list | length }}"
when:
- groups['broken_etcd']

Expand Down

0 comments on commit 062f587

Please sign in to comment.