-
Notifications
You must be signed in to change notification settings - Fork 6.5k
/
main.yml
93 lines (85 loc) · 2.98 KB
/
main.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
---
- name: Get etcd endpoint health
command: "{{ bin_dir }}/etcdctl endpoint health"
register: etcd_endpoint_health
ignore_errors: true # noqa ignore-errors
changed_when: false
check_mode: no
environment:
ETCDCTL_API: "3"
ETCDCTL_ENDPOINTS: "{{ etcd_access_addresses }}"
ETCDCTL_CERT: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem"
ETCDCTL_KEY: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem"
ETCDCTL_CACERT: "{{ etcd_cert_dir }}/ca.pem"
when:
- groups['broken_etcd']
- name: Set healthy fact
set_fact:
healthy: "{{ etcd_endpoint_health.stderr is match('Error: unhealthy cluster') }}"
when:
- groups['broken_etcd']
- name: Set has_quorum fact
set_fact:
has_quorum: "{{ etcd_endpoint_health.stdout_lines | select('match', '.*is healthy.*') | list | length >= etcd_endpoint_health.stderr_lines | select('match', '.*is unhealthy.*') | list | length }}"
when:
- groups['broken_etcd']
- name: Recover lost etcd quorum
include_tasks: recover_lost_quorum.yml
when:
- groups['broken_etcd']
- not has_quorum
- name: Remove etcd data dir
file:
path: "{{ etcd_data_dir }}"
state: absent
delegate_to: "{{ item }}"
with_items: "{{ groups['broken_etcd'] }}"
ignore_errors: true # noqa ignore-errors
when:
- groups['broken_etcd']
- has_quorum
- name: Delete old certificates
shell: "rm {{ etcd_cert_dir }}/*{{ item }}*"
with_items: "{{ groups['broken_etcd'] }}"
register: delete_old_cerificates
ignore_errors: true
when: groups['broken_etcd']
- name: Fail if unable to delete old certificates
fail:
msg: "Unable to delete old certificates for: {{ item.item }}"
loop: "{{ delete_old_cerificates.results }}"
changed_when: false
when:
- groups['broken_etcd']
- "item.rc != 0 and not 'No such file or directory' in item.stderr"
- name: Get etcd cluster members
command: "{{ bin_dir }}/etcdctl member list"
register: member_list
changed_when: false
check_mode: no
environment:
ETCDCTL_API: "3"
ETCDCTL_ENDPOINTS: "{{ etcd_access_addresses }}"
ETCDCTL_CERT: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem"
ETCDCTL_KEY: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem"
ETCDCTL_CACERT: "{{ etcd_cert_dir }}/ca.pem"
when:
- groups['broken_etcd']
- not healthy
- has_quorum
- name: Remove broken cluster members
command: "{{ bin_dir }}/etcdctl member remove {{ item[1].replace(' ', '').split(',')[0] }}"
environment:
ETCDCTL_API: "3"
ETCDCTL_ENDPOINTS: "{{ etcd_access_addresses }}"
ETCDCTL_CERT: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem"
ETCDCTL_KEY: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem"
ETCDCTL_CACERT: "{{ etcd_cert_dir }}/ca.pem"
with_nested:
- "{{ groups['broken_etcd'] }}"
- "{{ member_list.stdout_lines }}"
when:
- groups['broken_etcd']
- not healthy
- has_quorum
- hostvars[item[0]]['etcd_member_name'] == item[1].replace(' ', '').split(',')[2]