forked from milvus-io/milvus
-
Notifications
You must be signed in to change notification settings - Fork 0
197 lines (173 loc) · 8.21 KB
/
mem-stress-chaos-test.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
name: Memory Stress Chaos Test
on:
workflow_dispatch:
jobs:
test-memory-stress-chaos:
runs-on: ubuntu-latest
timeout-minutes: 40
strategy:
fail-fast: false
matrix:
pod: [datanode, indexnode, proxy, pulsar, querynode, etcd, minio]
steps:
- name: Set env param
run: |
echo "RELEASE=test-${{ matrix.pod }}-memory-stress" >> $GITHUB_ENV
- name: Creating kind cluster
uses: helm/[email protected]
- name: Print cluster information
run: |
kubectl config view
kubectl cluster-info
kubectl get nodes
kubectl get pods -n kube-system
helm version
kubectl version
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Install dependency
uses: nick-invision/retry@v2
with:
timeout_minutes: 5
max_attempts: 3
retry_on: error
shell: bash
command: |
pip install -r tests/python_client/requirements.txt --trusted-host https://test.pypi.org
pip install --upgrade protobuf
- name: Deploy Chaos Mesh
shell: bash
run: |
helm repo add chaos-mesh https://charts.chaos-mesh.org
helm search repo chaos-mesh
kubectl create ns chaos-testing
helm install --wait --timeout 360s chaos-mesh chaos-mesh/chaos-mesh --namespace=chaos-testing --version v2.0.3 --set chaosDaemon.runtime=containerd --set chaosDaemon.socketPath=/run/containerd/containerd.sock
kubectl get po -n chaos-testing
- name: Deploy Milvus
shell: bash
working-directory: tests/python_client/chaos
run: |
echo "latest tag:"
bash ../../../scripts/docker_image_find_tag.sh -n milvusdb/milvus -t master-latest -f master- -F -L -q
helm repo add milvus https://zilliztech.github.io/milvus-helm
helm repo update
if [[ ${{ matrix.pod }} != *"standalone"* ]]; then helm install --wait --timeout 720s ${{ env.RELEASE }} milvus/milvus -f cluster-values.yaml -n=chaos-testing; fi
if [[ ${{ matrix.pod }} == *"standalone"* ]]; then helm install --wait --timeout 720s ${{ env.RELEASE }} milvus/milvus -f standalone-values.yaml -n=chaos-testing; fi
kubectl get pods -n chaos-testing
sleep 20s
kubectl get pods -n chaos-testing
kubectl port-forward service/${{ env.RELEASE }}-milvus 19530 -n chaos-testing >/dev/null 2>&1 &
sleep 20s
# check whether port-forward success
nc -vz 127.0.0.1 19530
# check whether milvus server is healthy
python scripts/hello_milvus.py
- name: Chaos Test
timeout-minutes: 15
shell: bash
working-directory: tests/python_client/chaos
run: |
# replace chaos object
sed -i "s/TESTS_CONFIG_LOCATION =.*/TESTS_CONFIG_LOCATION = \'chaos_objects\/mem_stress\/'/g" constants.py
sed -i "s/ALL_CHAOS_YAMLS =.*/ALL_CHAOS_YAMLS = \'chaos_${{ matrix.pod }}_mem_stress.yaml\'/g" constants.py
sed -i "s/RELEASE_NAME =.*/RELEASE_NAME = \'${{ env.RELEASE }}\'/g" constants.py
cat constants.py
timeout 14m pytest -s -v test_chaos.py --host 127.0.0.1 --log-cli-level=INFO --capture=no || echo "chaos test failed"
- name: Result Analysis
timeout-minutes: 15
shell: bash
working-directory: tests/python_client/chaos/reports
run: |
echo "result analysis"
cat ${{ env.RELEASE }}.log || echo "no log file"
- name: Milvus E2E Test
timeout-minutes: 10
if: ${{ always() }}
shell: bash
working-directory: tests/python_client
run: |
kubectl get networkchaos -n chaos-testing
kubectl get pod -n chaos-testing
# wait all pod to be ready
kubectl wait --for=condition=Ready pod -l app.kubernetes.io/instance=${{ env.RELEASE }} -n chaos-testing --timeout=360s
kubectl wait --for=condition=Ready pod -l release=${{ env.RELEASE }} -n chaos-testing --timeout=360s
kubectl get pod -n chaos-testing
ps aux|grep forward|grep -v grep|awk '{print $2}'|xargs kill -9
kubectl port-forward service/${{ env.RELEASE }}-milvus 19530 -n chaos-testing >/dev/null 2>&1 &
sleep 20s
nc -vz 127.0.0.1 19530
pytest -s -v testcases/test_e2e.py --host 127.0.0.1 --log-cli-level=INFO --capture=no
python chaos/scripts/hello_milvus.py --host 127.0.0.1
- name: Export logs
if: ${{ always() }}
shell: bash
working-directory: tests/python_client/chaos
run: |
#in this step, verify whether pod has been killed by pod's age
kubectl get po -n chaos-testing
# export k8s log for chaos mesh and milvus
bash ../../scripts/export_log_k8s.sh chaos-testing ${{ env.RELEASE }} k8s_logs/chaos-test
- name: Deploy Milvus Again If Previous E2E Test Failed
timeout-minutes: 15
if: ${{ failure() }}
shell: bash
working-directory: tests/python_client/chaos
run: |
kubectl config set-context --current --namespace=chaos-testing
bash scripts/uninstall_milvus.sh ${{ env.RELEASE }}
if [ ${{ matrix.pod }} != "standalone" ]; then helm install --wait --timeout 720s ${{ env.RELEASE }} milvus/milvus -f cluster-values.yaml -n=chaos-testing; fi
if [ ${{ matrix.pod }} == "standalone" ]; then helm install --wait --timeout 720s ${{ env.RELEASE }} milvus/milvus --set cluster.enabled=false --set etcd.replicaCount=1 --set minio.mode=standalone --set pulsar.enabled=false -n=chaos-testing; fi
kubectl get pods -n chaos-testing
sleep 20s
kubectl get pods -n chaos-testing
ps aux|grep forward|grep -v grep|awk '{print $2}'|xargs kill -9
kubectl port-forward service/${{ env.RELEASE }}-milvus 19530 -n chaos-testing >/dev/null 2>&1 &
sleep 20s
# check whether port-forward success
nc -vz 127.0.0.1 19530
# check whether milvus server is healthy
python scripts/hello_milvus.py
- name: Data Consist Test
timeout-minutes: 5
if: ${{ always() }}
shell: bash
working-directory: tests/python_client/chaos
run: |
pytest -s -v test_chaos_data_consist.py --host 127.0.0.1 --log-cli-level=INFO --capture=no || echo "data consist chaos test failed"
- name: Milvus E2E Test
timeout-minutes: 10
if: ${{ always() }}
shell: bash
working-directory: tests/python_client
run: |
kubectl get pod -n chaos-testing
kubectl wait --for=condition=Ready pod -l app.kubernetes.io/instance=${{ env.RELEASE }} -n chaos-testing --timeout=360s
kubectl wait --for=condition=Ready pod -l release=${{ env.RELEASE }} -n chaos-testing --timeout=360s
kubectl get pod -n chaos-testing
ps aux|grep forward|grep -v grep|awk '{print $2}'|xargs kill -9
kubectl port-forward service/${{ env.RELEASE }}-milvus 19530 -n chaos-testing >/dev/null 2>&1 &
sleep 20s
nc -vz 127.0.0.1 19530
pytest -s -v testcases/test_e2e.py --host 127.0.0.1 --log-cli-level=INFO --capture=no
python chaos/scripts/hello_milvus.py --host 127.0.0.1
- name: Export logs
if: ${{ always() }}
shell: bash
working-directory: tests/python_client/chaos
run: |
#in this step, verify whether pod has been killed by pod's age
kubectl get po -n chaos-testing
# export k8s log for chaos mesh and milvus
bash ../../scripts/export_log_k8s.sh chaos-testing ${{ env.RELEASE }} k8s_logs/data-consist-test
bash ../../scripts/export_log_k8s.sh chaos-testing chaos-daemon k8s_logs/chaos-mesh-daemon
- name: Upload logs
if: ${{ always() }}
uses: actions/upload-artifact@v2
with:
name: logs-${{ matrix.pod }}
path: |
tests/python_client/chaos/k8s_logs
tests/python_client/chaos/reports