Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: Add multi-cluster support for Nodes dashboard #2945

Merged
merged 1 commit into from
Mar 8, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 56 additions & 39 deletions docs/node-mixin/lib/prom-mixin.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,23 @@ local table = grafana70.panel.table;
type: 'datasource',
},

local clusterTemplatePrototype =
template.new(
'cluster',
'$datasource',
'',
hide=if config.showMultiCluster then '' else '2',
refresh='time',
label='Cluster',
),
local clusterTemplate =
if platform == 'Darwin' then
clusterTemplatePrototype
{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname="Darwin"}, %(clusterLabel)s)' % config }
else
clusterTemplatePrototype
{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname!="Darwin"}, %(clusterLabel)s)' % config },

local instanceTemplatePrototype =
template.new(
'instance',
Expand All @@ -38,11 +55,10 @@ local table = grafana70.panel.table;
local instanceTemplate =
if platform == 'Darwin' then
instanceTemplatePrototype
{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname="Darwin"}, instance)' % config }
{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster", sysname="Darwin"}, instance)' % config }
else
instanceTemplatePrototype
{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname!="Darwin"}, instance)' % config },

{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster", sysname!="Darwin"}, instance)' % config },

local idleCPU =
graphPanel.new(
Expand All @@ -57,9 +73,9 @@ local table = grafana70.panel.table;
.addTarget(prometheus.target(
|||
(
(1 - sum without (mode) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode=~"idle|iowait|steal", instance="$instance"}[$__rate_interval])))
(1 - sum without (mode) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode=~"idle|iowait|steal", instance="$instance", %(clusterLabel)s="$cluster"}[$__rate_interval])))
/ ignoring(cpu) group_left
count without (cpu, mode) (node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance"})
count without (cpu, mode) (node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance", %(clusterLabel)s="$cluster"})
)
||| % config,
legendFormat='{{cpu}}',
Expand All @@ -75,10 +91,10 @@ local table = grafana70.panel.table;
min=0,
fill=0,
)
.addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='1m load average'))
.addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='5m load average'))
.addTarget(prometheus.target('node_load15{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='15m load average'))
.addTarget(prometheus.target('count(node_cpu_seconds_total{%(nodeExporterSelector)s, instance="$instance", mode="idle"})' % config, legendFormat='logical cores')),
.addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='1m load average'))
.addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='5m load average'))
.addTarget(prometheus.target('node_load15{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='15m load average'))
.addTarget(prometheus.target('count(node_cpu_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", mode="idle"})' % config, legendFormat='logical cores')),

local memoryGraphPanelPrototype =
graphPanel.new(
Expand All @@ -94,44 +110,44 @@ local table = grafana70.panel.table;
.addTarget(prometheus.target(
|||
(
node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance"}
node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
-
node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance"}
node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
-
node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance"}
node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
-
node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance"}
node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
)
||| % config,
legendFormat='memory used'
))
.addTarget(prometheus.target('node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='memory buffers'))
.addTarget(prometheus.target('node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='memory cached'))
.addTarget(prometheus.target('node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='memory free'))
.addTarget(prometheus.target('node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='memory buffers'))
.addTarget(prometheus.target('node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='memory cached'))
.addTarget(prometheus.target('node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='memory free'))
else if platform == 'Darwin' then
// not useful to stack
memoryGraphPanelPrototype { stack: false }
.addTarget(prometheus.target('node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='Physical Memory'))
.addTarget(prometheus.target('node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Physical Memory'))
.addTarget(prometheus.target(
|||
(
node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance"} -
node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance"} +
node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance"} +
node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance"}
node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} -
node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} +
node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} +
node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
)
||| % config, legendFormat='Memory Used'
))
.addTarget(prometheus.target(
|||
(
node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance"} -
node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance"}
node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} -
node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
)
||| % config, legendFormat='App Memory'
))
.addTarget(prometheus.target('node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='Wired Memory'))
.addTarget(prometheus.target('node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='Compressed')),
.addTarget(prometheus.target('node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Wired Memory'))
.addTarget(prometheus.target('node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Compressed')),

// NOTE: avg() is used to circumvent a label change caused by a node_exporter rollout.
local memoryGaugePanelPrototype =
Expand All @@ -155,8 +171,8 @@ local table = grafana70.panel.table;
|||
100 -
(
avg(node_memory_MemAvailable_bytes{%(nodeExporterSelector)s, instance="$instance"}) /
avg(node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance"})
avg(node_memory_MemAvailable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) /
avg(node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"})
* 100
)
||| % config,
Expand All @@ -168,12 +184,12 @@ local table = grafana70.panel.table;
|||
(
(
avg(node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance"}) -
avg(node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance"}) +
avg(node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance"}) +
avg(node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance"})
avg(node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) -
avg(node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) +
avg(node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) +
avg(node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"})
) /
avg(node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance"})
avg(node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"})
)
*
100
Expand All @@ -190,17 +206,17 @@ local table = grafana70.panel.table;
)
// TODO: Does it make sense to have those three in the same panel?
.addTarget(prometheus.target(
'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__rate_interval])' % config,
'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config,
legendFormat='{{device}} read',
intervalFactor=1,
))
.addTarget(prometheus.target(
'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__rate_interval])' % config,
'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config,
legendFormat='{{device}} written',
intervalFactor=1,
))
.addTarget(prometheus.target(
'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__rate_interval])' % config,
'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config,
legendFormat='{{device}} io time',
intervalFactor=1,
)) +
Expand Down Expand Up @@ -232,15 +248,15 @@ local table = grafana70.panel.table;
.addThresholdStep(color='red', value=0.9)
.addTarget(prometheus.target(
|||
max by (mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s, %(fsMountpointSelector)s})
max by (mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s})
||| % config,
legendFormat='',
instant=true,
format='table'
))
.addTarget(prometheus.target(
|||
max by (mountpoint) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s, %(fsMountpointSelector)s})
max by (mountpoint) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s})
||| % config,
legendFormat='',
instant=true,
Expand Down Expand Up @@ -421,7 +437,7 @@ local table = grafana70.panel.table;
fill=0,
)
.addTarget(prometheus.target(
'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__rate_interval]) * 8' % config,
'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config,
legendFormat='{{device}}',
intervalFactor=1,
)),
Expand All @@ -437,7 +453,7 @@ local table = grafana70.panel.table;
fill=0,
)
.addTarget(prometheus.target(
'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__rate_interval]) * 8' % config,
'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config,
legendFormat='{{device}}',
intervalFactor=1,
)),
Expand Down Expand Up @@ -473,6 +489,7 @@ local table = grafana70.panel.table;
local templates =
[
prometheusDatasourceTemplate,
clusterTemplate,
instanceTemplate,
],

Expand Down
Loading