From 6e246188f1410eae013756ec69fec24548586357 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 14 Jun 2021 14:27:37 +0000 Subject: [PATCH 1/3] Add container name to operational dashboard Memory and CPU metrics are reported per cgroup, so pods with more than one container have several lines. This change allows them to be distinguished. --- operations/tempo-mixin/tempo-operational.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/operations/tempo-mixin/tempo-operational.json b/operations/tempo-mixin/tempo-operational.json index 776418a9dee..489660865c0 100644 --- a/operations/tempo-mixin/tempo-operational.json +++ b/operations/tempo-mixin/tempo-operational.json @@ -369,7 +369,7 @@ "expr": "rate(container_cpu_usage_seconds_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$component.*\", container!=\"POD\"}[$__rate_interval])", "interval": "", "intervalFactor": 5, - "legendFormat": "{{pod}}", + "legendFormat": "{{pod}} {{container}}", "refId": "A" } ], @@ -462,7 +462,7 @@ { "expr": "container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$component.*\", container!=\"POD\"}", "interval": "", - "legendFormat": "{{pod}}", + "legendFormat": "{{pod}} {{container}}", "refId": "A" } ], From 7039f617073c2c5a9d90196651bea2870356d472 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 14 Jun 2021 14:42:38 +0000 Subject: [PATCH 2/3] Filter out 'pause' container from operational dashboard These metrics are typically very low because the "pause" container is a piece of Kubernetes internals that sits and does nothing. Filtering them out of the display aids readability. Note in some Kubernetes installations the pause container is labled "POD", which was previously filtered out from the dashboard. See https://github.com/containerd/cri/issues/922 for background on the name. --- operations/tempo-mixin/tempo-operational.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/operations/tempo-mixin/tempo-operational.json b/operations/tempo-mixin/tempo-operational.json index 489660865c0..0d3393feb5f 100644 --- a/operations/tempo-mixin/tempo-operational.json +++ b/operations/tempo-mixin/tempo-operational.json @@ -366,7 +366,7 @@ "steppedLine": false, "targets": [ { - "expr": "rate(container_cpu_usage_seconds_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$component.*\", container!=\"POD\"}[$__rate_interval])", + "expr": "rate(container_cpu_usage_seconds_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$component.*\", container!=\"POD\", container!=\"\"}[$__rate_interval])", "interval": "", "intervalFactor": 5, "legendFormat": "{{pod}} {{container}}", @@ -460,7 +460,7 @@ "steppedLine": false, "targets": [ { - "expr": "container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$component.*\", container!=\"POD\"}", + "expr": "container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$component.*\", container!=\"POD\", container!=\"\"}", "interval": "", "legendFormat": "{{pod}} {{container}}", "refId": "A" From 5f0897dddf2528bd12b65ee859d13c893fc6b7d9 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 14 Jun 2021 14:51:26 +0000 Subject: [PATCH 3/3] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a302d4327f..fbdbd8a0363 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ ## main / unreleased * [FEATURE] Added the ability to hedge requests with all backends [#750](https://github.com/grafana/tempo/pull/750) (@joe-elliott) +* [ENHANCEMENT] Improve readability of cpu and memory metrics on operational dashboard [#764](https://github.com/grafana/tempo/pull/764) (@bboreham) ## v1.0.0