Skip to content

Commit

Permalink
Add check for unhealthy stores
Browse files Browse the repository at this point in the history
  • Loading branch information
dveeden committed Oct 19, 2022
1 parent 4ee3537 commit 617240e
Showing 1 changed file with 15 additions and 0 deletions.
15 changes: 15 additions & 0 deletions metrics/alertmanager/pd.rules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,21 @@ groups:
value: '{{ $value }}'
summary: PD_cluster_lost_connect_tikv_nums

- alert: PD_cluster_unhealthy_tikv_nums
expr: (sum ( pd_cluster_status{type="store_unhealth_count"} ) by (instance)
> 0) and (sum(etcd_server_is_leader) by (instance) > 0)
for: 1m
labels:
env: ENV_LABELS_ENV
expr: (sum ( pd_cluster_status{type="store_unhealth_count"} ) by (instance)
> 0) and (sum(etcd_server_is_leader) by (instance) > 0)
level: warning
annotations:
description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, values:{{
$value }}'
summary: PD_cluster_unhealth_tikv_nums
value: '{{ $value }}'

- alert: PD_cluster_low_space
expr: (sum(pd_cluster_status{type="store_low_space_count"}) by (instance) > 0) and (sum(etcd_server_is_leader) by (instance) > 0)
for: 1m
Expand Down

0 comments on commit 617240e

Please sign in to comment.