Skip to content

Commit

Permalink
metrics: Add check for unhealthy stores (#5612)
Browse files Browse the repository at this point in the history
close #5611

Signed-off-by: Daniël van Eeden <[email protected]>

Co-authored-by: Ti Chi Robot <[email protected]>
  • Loading branch information
dveeden and ti-chi-bot authored Oct 26, 2022
1 parent 2b51932 commit a4d9f29
Showing 1 changed file with 15 additions and 0 deletions.
15 changes: 15 additions & 0 deletions metrics/alertmanager/pd.rules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,21 @@ groups:
value: '{{ $value }}'
summary: PD_cluster_lost_connect_tikv_nums

- alert: PD_cluster_unhealthy_tikv_nums
expr: (sum ( pd_cluster_status{type="store_unhealth_count"} ) by (instance)
> 0) and (sum(etcd_server_is_leader) by (instance) > 0)
for: 1m
labels:
env: ENV_LABELS_ENV
expr: (sum ( pd_cluster_status{type="store_unhealth_count"} ) by (instance)
> 0) and (sum(etcd_server_is_leader) by (instance) > 0)
level: warning
annotations:
description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, values:{{
$value }}'
summary: PD_cluster_unhealth_tikv_nums
value: '{{ $value }}'

- alert: PD_cluster_low_space
expr: (sum(pd_cluster_status{type="store_low_space_count"}) by (instance) > 0) and (sum(etcd_server_is_leader) by (instance) > 0)
for: 1m
Expand Down

0 comments on commit a4d9f29

Please sign in to comment.