From e1dd7d78e1543b8791282b0d613e1f818509bccc Mon Sep 17 00:00:00 2001 From: Kemal Akkoyun Date: Fri, 5 Mar 2021 14:04:27 +0100 Subject: [PATCH] Fix regression introduced in previous PR Signed-off-by: Kemal Akkoyun --- examples/alerts/alerts.md | 6 +++--- examples/alerts/alerts.yaml | 6 +++--- examples/alerts/tests.yaml | 36 +++++++++++++++++----------------- mixin/alerts/sidecar.libsonnet | 4 ++-- mixin/runbook.md | 2 +- 5 files changed, 27 insertions(+), 27 deletions(-) diff --git a/examples/alerts/alerts.md b/examples/alerts/alerts.md index 13fc5f0726..be63d66b02 100644 --- a/examples/alerts/alerts.md +++ b/examples/alerts/alerts.md @@ -324,12 +324,12 @@ rules: severity: critical - alert: ThanosSidecarUnhealthy annotations: - description: Thanos Sidecar {{$labels.job}} {{$labels.pod}} is unhealthy for more - than {{ $value }} seconds. + description: Thanos Sidecar {{$labels.job}} {{$labels.instance}} is unhealthy + for more than {{$value}} seconds. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy summary: Thanos Sidecar is unhealthy. expr: | - time() - max(timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{job=~"thanos-sidecar.*"})) by (job,pod) >= 240 + time() - max by (job, instance) (timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{job=~"thanos-sidecar.*"})) >= 240 labels: severity: critical ``` diff --git a/examples/alerts/alerts.yaml b/examples/alerts/alerts.yaml index ad074bbf69..6d6e4a7a84 100644 --- a/examples/alerts/alerts.yaml +++ b/examples/alerts/alerts.yaml @@ -308,12 +308,12 @@ groups: severity: critical - alert: ThanosSidecarUnhealthy annotations: - description: Thanos Sidecar {{$labels.job}} {{$labels.pod}} is unhealthy for - more than {{ $value }} seconds. + description: Thanos Sidecar {{$labels.job}} {{$labels.instance}} is unhealthy + for more than {{$value}} seconds. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy summary: Thanos Sidecar is unhealthy. expr: | - time() - max(timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{job=~"thanos-sidecar.*"})) by (job,pod) >= 240 + time() - max by (job, instance) (timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{job=~"thanos-sidecar.*"})) >= 240 labels: severity: critical - name: thanos-store diff --git a/examples/alerts/tests.yaml b/examples/alerts/tests.yaml index b856a23223..d65135f9ee 100644 --- a/examples/alerts/tests.yaml +++ b/examples/alerts/tests.yaml @@ -7,9 +7,9 @@ evaluation_interval: 1m tests: - interval: 1m input_series: - - series: 'thanos_sidecar_last_heartbeat_success_time_seconds{namespace="production", job="thanos-sidecar", pod="thanos-sidecar-pod-0"}' + - series: 'thanos_sidecar_last_heartbeat_success_time_seconds{namespace="production", job="thanos-sidecar", instance="thanos-sidecar-0"}' values: '5 10 43 17 11 _x5 0x10' - - series: 'thanos_sidecar_last_heartbeat_success_time_seconds{namespace="production", job="thanos-sidecar", pod="thanos-sidecar-pod-1"}' + - series: 'thanos_sidecar_last_heartbeat_success_time_seconds{namespace="production", job="thanos-sidecar", instance="thanos-sidecar-1"}' values: '4 9 42 15 10 _x5 0x10' promql_expr_test: - expr: time() @@ -22,33 +22,33 @@ tests: exp_samples: - labels: '{}' value: 120 - - expr: time() - max(timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{job="thanos-sidecar"})) by (job, pod) + - expr: time() - max(timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{job="thanos-sidecar"})) by (job, instance) eval_time: 5m exp_samples: - - labels: '{job="thanos-sidecar", pod="thanos-sidecar-pod-0"}' + - labels: '{job="thanos-sidecar", instance="thanos-sidecar-0"}' value: 60 - - labels: '{job="thanos-sidecar", pod="thanos-sidecar-pod-1"}' + - labels: '{job="thanos-sidecar", instance="thanos-sidecar-1"}' value: 60 - - expr: time() - max(timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{job="thanos-sidecar"})) by (job, pod) + - expr: time() - max(timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{job="thanos-sidecar"})) by (job, instance) eval_time: 6m exp_samples: - - labels: '{job="thanos-sidecar", pod="thanos-sidecar-pod-0"}' + - labels: '{job="thanos-sidecar", instance="thanos-sidecar-0"}' value: 120 - - labels: '{job="thanos-sidecar", pod="thanos-sidecar-pod-1"}' + - labels: '{job="thanos-sidecar", instance="thanos-sidecar-1"}' value: 120 - - expr: time() - max(timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{job="thanos-sidecar"})) by (job, pod) + - expr: time() - max(timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{job="thanos-sidecar"})) by (job, instance) eval_time: 7m exp_samples: - - labels: '{job="thanos-sidecar", pod="thanos-sidecar-pod-0"}' + - labels: '{job="thanos-sidecar", instance="thanos-sidecar-0"}' value: 180 - - labels: '{job="thanos-sidecar", pod="thanos-sidecar-pod-1"}' + - labels: '{job="thanos-sidecar", instance="thanos-sidecar-1"}' value: 180 - - expr: time() - max(timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{job="thanos-sidecar"})) by (job, pod) + - expr: time() - max(timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{job="thanos-sidecar"})) by (job, instance) eval_time: 8m exp_samples: - - labels: '{job="thanos-sidecar", pod="thanos-sidecar-pod-0"}' + - labels: '{job="thanos-sidecar", instance="thanos-sidecar-0"}' value: 240 - - labels: '{job="thanos-sidecar", pod="thanos-sidecar-pod-1"}' + - labels: '{job="thanos-sidecar", instance="thanos-sidecar-1"}' value: 240 alert_rule_test: - eval_time: 1m @@ -65,17 +65,17 @@ tests: - exp_labels: severity: critical job: thanos-sidecar - pod: thanos-sidecar-pod-0 + instance: thanos-sidecar-0 exp_annotations: - description: 'Thanos Sidecar thanos-sidecar thanos-sidecar-pod-0 is unhealthy for more than 240 seconds.' + description: 'Thanos Sidecar thanos-sidecar thanos-sidecar-0 is unhealthy for more than 240 seconds.' runbook_url: 'https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy' summary: 'Thanos Sidecar is unhealthy.' - exp_labels: severity: critical job: thanos-sidecar - pod: thanos-sidecar-pod-1 + instance: thanos-sidecar-1 exp_annotations: - description: 'Thanos Sidecar thanos-sidecar thanos-sidecar-pod-1 is unhealthy for more than 240 seconds.' + description: 'Thanos Sidecar thanos-sidecar thanos-sidecar-1 is unhealthy for more than 240 seconds.' runbook_url: 'https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy' summary: 'Thanos Sidecar is unhealthy.' - eval_time: 10m diff --git a/mixin/alerts/sidecar.libsonnet b/mixin/alerts/sidecar.libsonnet index b59cd3a161..0d67486ff4 100644 --- a/mixin/alerts/sidecar.libsonnet +++ b/mixin/alerts/sidecar.libsonnet @@ -39,11 +39,11 @@ { alert: 'ThanosSidecarUnhealthy', annotations: { - description: 'Thanos Sidecar {{$labels.job}} {{$labels.pod}} is unhealthy for more than {{ $value }} seconds.', + description: 'Thanos Sidecar {{$labels.job}} {{$labels.instance}} is unhealthy for more than {{$value}} seconds.', summary: 'Thanos Sidecar is unhealthy.', }, expr: ||| - time() - max(timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{%(selector)s})) by (job,pod) >= 240 + time() - max by (job, instance) (timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{%(selector)s})) >= 240 ||| % thanos.sidecar, labels: { severity: 'critical', diff --git a/mixin/runbook.md b/mixin/runbook.md index 99d0fd627a..78da401bcd 100755 --- a/mixin/runbook.md +++ b/mixin/runbook.md @@ -87,7 +87,7 @@ |---|---|---|---|---| |ThanosSidecarPrometheusDown|Thanos Sidecar cannot connect to Prometheus|Thanos Sidecar {{$labels.job}} {{$labels.instance}} cannot connect to Prometheus.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarprometheusdown](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarprometheusdown)| |ThanosSidecarBucketOperationsFailed|Thanos Sidecar bucket operations are failing|Thanos Sidecar {{$labels.job}} {{$labels.instance}} bucket operations are failing|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarbucketoperationsfailed](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarbucketoperationsfailed)| -|ThanosSidecarUnhealthy|Thanos Sidecar is unhealthy.|Thanos Sidecar {{$labels.job}} {{$labels.pod}} is unhealthy for more than {{ $value }} seconds.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy)| +|ThanosSidecarUnhealthy|Thanos Sidecar is unhealthy.|Thanos Sidecar {{$labels.job}} {{$labels.instance}} is unhealthy for more than {{$value}} seconds.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy)| ## thanos-store