From dc43b3b594f33dda86cb13a6cbcbd338155a58f8 Mon Sep 17 00:00:00 2001 From: Alex Kristiansen Date: Tue, 15 Dec 2020 12:18:13 -0800 Subject: [PATCH 1/8] dynamically generate CPU counts for metrics --- libbeat/metric/system/cpu/cpu.go | 22 +++++++------------ libbeat/metric/system/cpu/cpu_test.go | 6 +++++ libbeat/metric/system/process/process.go | 5 +---- libbeat/metric/system/process/process_test.go | 3 +++ 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/libbeat/metric/system/cpu/cpu.go b/libbeat/metric/system/cpu/cpu.go index abf274ff066..26b22bcd51c 100644 --- a/libbeat/metric/system/cpu/cpu.go +++ b/libbeat/metric/system/cpu/cpu.go @@ -26,12 +26,6 @@ import ( sigar "github.com/elastic/gosigar" ) -var ( - // NumCores is the number of CPU cores in the system. Changes to operating - // system CPU allocation after process startup are not reflected. - NumCores = runtime.NumCPU() -) - // CPU Monitor // Monitor is used to monitor the overall CPU usage of the system. @@ -83,16 +77,16 @@ type Metrics struct { } // NormalizedPercentages returns CPU percentage usage information that is -// normalized by the number of CPU cores (NumCores). The values will range from +// normalized by the number of CPU cores. The values will range from // 0 to 100%. func (m *Metrics) NormalizedPercentages() Percentages { return cpuPercentages(m.previousSample, m.currentSample, 1) } // Percentages returns CPU percentage usage information. The values range from -// 0 to 100% * NumCores. +// 0 to 100% * NumCPU. func (m *Metrics) Percentages() Percentages { - return cpuPercentages(m.previousSample, m.currentSample, NumCores) + return cpuPercentages(m.previousSample, m.currentSample, runtime.NumCPU()) } // cpuPercentages calculates the amount of CPU time used between the two given @@ -215,7 +209,7 @@ type LoadAverages struct { } // Averages return the CPU load averages. These values should range from -// 0 to NumCores. +// 0 to NumCPU. func (m *LoadMetrics) Averages() LoadAverages { return LoadAverages{ OneMinute: common.Round(m.sample.One, common.DefaultDecimalPlacesCount), @@ -224,12 +218,12 @@ func (m *LoadMetrics) Averages() LoadAverages { } } -// NormalizedAverages return the CPU load averages normalized by the NumCores. +// NormalizedAverages return the CPU load averages normalized by the NumCPU. // These values should range from 0 to 1. func (m *LoadMetrics) NormalizedAverages() LoadAverages { return LoadAverages{ - OneMinute: common.Round(m.sample.One/float64(NumCores), common.DefaultDecimalPlacesCount), - FiveMinute: common.Round(m.sample.Five/float64(NumCores), common.DefaultDecimalPlacesCount), - FifteenMinute: common.Round(m.sample.Fifteen/float64(NumCores), common.DefaultDecimalPlacesCount), + OneMinute: common.Round(m.sample.One/float64(runtime.NumCPU()), common.DefaultDecimalPlacesCount), + FiveMinute: common.Round(m.sample.Five/float64(runtime.NumCPU()), common.DefaultDecimalPlacesCount), + FifteenMinute: common.Round(m.sample.Fifteen/float64(runtime.NumCPU()), common.DefaultDecimalPlacesCount), } } diff --git a/libbeat/metric/system/cpu/cpu_test.go b/libbeat/metric/system/cpu/cpu_test.go index f45e43136dc..53b58bbbb5b 100644 --- a/libbeat/metric/system/cpu/cpu_test.go +++ b/libbeat/metric/system/cpu/cpu_test.go @@ -29,6 +29,12 @@ import ( "github.com/elastic/gosigar" ) +var ( + // NumCores is the number of CPU cores in the system. Changes to operating + // system CPU allocation after process startup are not reflected. + NumCores = runtime.NumCPU() +) + func TestMonitorSample(t *testing.T) { cpu := &Monitor{lastSample: &gosigar.Cpu{}} s, err := cpu.Sample() diff --git a/libbeat/metric/system/process/process.go b/libbeat/metric/system/process/process.go index 03ed5085c44..76098a43f19 100644 --- a/libbeat/metric/system/process/process.go +++ b/libbeat/metric/system/process/process.go @@ -36,9 +36,6 @@ import ( sigar "github.com/elastic/gosigar" ) -// NumCPU is the number of CPUs of the host -var NumCPU = runtime.NumCPU() - // ProcsMap is a map where the keys are the names of processes and the value is the Process with that name type ProcsMap map[int]*Process @@ -365,7 +362,7 @@ func GetProcCPUPercentage(s0, s1 *Process) (normalizedPct, pct, totalPct float64 totalCPUDeltaMillis := int64(s1.Cpu.Total - s0.Cpu.Total) pct := float64(totalCPUDeltaMillis) / float64(timeDeltaMillis) - normalizedPct := pct / float64(NumCPU) + normalizedPct := pct / float64(runtime.NumCPU()) return common.Round(normalizedPct, common.DefaultDecimalPlacesCount), common.Round(pct, common.DefaultDecimalPlacesCount), diff --git a/libbeat/metric/system/process/process_test.go b/libbeat/metric/system/process/process_test.go index 5e04346edbf..f1dce3c2b26 100644 --- a/libbeat/metric/system/process/process_test.go +++ b/libbeat/metric/system/process/process_test.go @@ -33,6 +33,9 @@ import ( "github.com/elastic/gosigar" ) +// NumCPU is the number of CPUs of the host +var NumCPU = runtime.NumCPU() + func TestPids(t *testing.T) { pids, err := Pids() From 03e7a7fbefcd46a80c3b3dc96aec7e10d9e89f65 Mon Sep 17 00:00:00 2001 From: Alex Kristiansen Date: Tue, 15 Dec 2020 13:18:16 -0800 Subject: [PATCH 2/8] dynamic-numcpu --- libbeat/cmd/instance/metrics/metrics.go | 2 +- libbeat/metric/system/diskio/diskstat_linux.go | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libbeat/cmd/instance/metrics/metrics.go b/libbeat/cmd/instance/metrics/metrics.go index a1fd1f56783..425952cc7c2 100644 --- a/libbeat/cmd/instance/metrics/metrics.go +++ b/libbeat/cmd/instance/metrics/metrics.go @@ -264,7 +264,7 @@ func reportSystemCPUUsage(_ monitoring.Mode, V monitoring.Visitor) { V.OnRegistryStart() defer V.OnRegistryFinished() - monitoring.ReportInt(V, "cores", int64(process.NumCPU)) + monitoring.ReportInt(V, "cores", int64(runtime.NumCPU())) } func reportRuntime(_ monitoring.Mode, V monitoring.Visitor) { diff --git a/libbeat/metric/system/diskio/diskstat_linux.go b/libbeat/metric/system/diskio/diskstat_linux.go index 826aed78c27..5ab0f7e3723 100644 --- a/libbeat/metric/system/diskio/diskstat_linux.go +++ b/libbeat/metric/system/diskio/diskstat_linux.go @@ -20,10 +20,10 @@ package diskio import ( + "runtime" + "github.com/pkg/errors" "github.com/shirou/gopsutil/disk" - - "github.com/elastic/beats/v7/libbeat/metric/system/cpu" ) // GetCLKTCK emulates the _SC_CLK_TCK syscall @@ -63,7 +63,7 @@ func (stat *IOStat) CalcIOStatistics(counter disk.IOCountersStat) (IOMetric, err } // calculate the delta ms between the CloseSampling and OpenSampling - deltams := 1000.0 * float64(stat.curCPU.Total()-stat.lastCPU.Total()) / float64(cpu.NumCores) / float64(GetCLKTCK()) + deltams := 1000.0 * float64(stat.curCPU.Total()-stat.lastCPU.Total()) / float64(runtime.NumCPU()) / float64(GetCLKTCK()) if deltams <= 0 { return IOMetric{}, errors.New("The delta cpu time between close sampling and open sampling is less or equal to 0") } From a1a69d1e50c14068109221cd89043be4e016cea3 Mon Sep 17 00:00:00 2001 From: Alex Kristiansen Date: Tue, 15 Dec 2020 13:37:40 -0800 Subject: [PATCH 3/8] fix exported vars --- libbeat/metric/system/cpu/cpu_test.go | 18 +++++++++--------- libbeat/metric/system/process/process_test.go | 8 ++++---- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/libbeat/metric/system/cpu/cpu_test.go b/libbeat/metric/system/cpu/cpu_test.go index 53b58bbbb5b..300b814b23a 100644 --- a/libbeat/metric/system/cpu/cpu_test.go +++ b/libbeat/metric/system/cpu/cpu_test.go @@ -30,9 +30,9 @@ import ( ) var ( - // NumCores is the number of CPU cores in the system. Changes to operating + // numCores is the number of CPU cores in the system. Changes to operating // system CPU allocation after process startup are not reflected. - NumCores = runtime.NumCPU() + numCores = runtime.NumCPU() ) func TestMonitorSample(t *testing.T) { @@ -61,7 +61,7 @@ func TestMonitorSample(t *testing.T) { } func TestCoresMonitorSample(t *testing.T) { - cores := &CoresMonitor{lastSample: make([]gosigar.Cpu, NumCores)} + cores := &CoresMonitor{lastSample: make([]gosigar.Cpu, numCores)} sample, err := cores.Sample() if err != nil { t.Fatal(err) @@ -108,8 +108,8 @@ func TestMetricsRounding(t *testing.T) { // TestMetricsPercentages tests that Metrics returns the correct // percentages and normalized percentages. func TestMetricsPercentages(t *testing.T) { - NumCores = 10 - defer func() { NumCores = runtime.NumCPU() }() + numCores = 10 + defer func() { numCores = runtime.NumCPU() }() // This test simulates 30% user and 70% system (normalized), or 3% and 7% // respectively when there are 10 CPUs. @@ -139,8 +139,8 @@ func TestMetricsPercentages(t *testing.T) { assert.EqualValues(t, 1., pct.Total) pct = sample.Percentages() - assert.EqualValues(t, .3*float64(NumCores), pct.User) - assert.EqualValues(t, .7*float64(NumCores), pct.System) - assert.EqualValues(t, .0*float64(NumCores), pct.Idle) - assert.EqualValues(t, 1.*float64(NumCores), pct.Total) + assert.EqualValues(t, .3*float64(numCores), pct.User) + assert.EqualValues(t, .7*float64(numCores), pct.System) + assert.EqualValues(t, .0*float64(numCores), pct.Idle) + assert.EqualValues(t, 1.*float64(numCores), pct.Total) } diff --git a/libbeat/metric/system/process/process_test.go b/libbeat/metric/system/process/process_test.go index f1dce3c2b26..55997ae7fef 100644 --- a/libbeat/metric/system/process/process_test.go +++ b/libbeat/metric/system/process/process_test.go @@ -33,8 +33,8 @@ import ( "github.com/elastic/gosigar" ) -// NumCPU is the number of CPUs of the host -var NumCPU = runtime.NumCPU() +// numCPU is the number of CPUs of the host +var numCPU = runtime.NumCPU() func TestPids(t *testing.T) { pids, err := Pids() @@ -160,8 +160,8 @@ func TestProcCpuPercentage(t *testing.T) { SampleTime: p1.SampleTime.Add(time.Second), } - NumCPU = 48 - defer func() { NumCPU = runtime.NumCPU() }() + numCPU = 48 + defer func() { numCPU = runtime.NumCPU() }() totalPercentNormalized, totalPercent, totalValue := GetProcCPUPercentage(p1, p2) assert.EqualValues(t, 0.0721, totalPercentNormalized) From 5c63d15e1f9b403604ac413b016c69a490cfbc45 Mon Sep 17 00:00:00 2001 From: Alex Kristiansen Date: Tue, 15 Dec 2020 13:41:47 -0800 Subject: [PATCH 4/8] clean up CPU code --- libbeat/metric/system/cpu/cpu.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/libbeat/metric/system/cpu/cpu.go b/libbeat/metric/system/cpu/cpu.go index 26b22bcd51c..c5f0695c2af 100644 --- a/libbeat/metric/system/cpu/cpu.go +++ b/libbeat/metric/system/cpu/cpu.go @@ -221,9 +221,10 @@ func (m *LoadMetrics) Averages() LoadAverages { // NormalizedAverages return the CPU load averages normalized by the NumCPU. // These values should range from 0 to 1. func (m *LoadMetrics) NormalizedAverages() LoadAverages { + cpus := runtime.NumCPU() return LoadAverages{ - OneMinute: common.Round(m.sample.One/float64(runtime.NumCPU()), common.DefaultDecimalPlacesCount), - FiveMinute: common.Round(m.sample.Five/float64(runtime.NumCPU()), common.DefaultDecimalPlacesCount), - FifteenMinute: common.Round(m.sample.Fifteen/float64(runtime.NumCPU()), common.DefaultDecimalPlacesCount), + OneMinute: common.Round(m.sample.One/float64(cpus), common.DefaultDecimalPlacesCount), + FiveMinute: common.Round(m.sample.Five/float64(cpus), common.DefaultDecimalPlacesCount), + FifteenMinute: common.Round(m.sample.Fifteen/float64(cpus), common.DefaultDecimalPlacesCount), } } From b684fdfc33fcd7711b182de3ea85e0421ed062b6 Mon Sep 17 00:00:00 2001 From: Alex Kristiansen Date: Wed, 16 Dec 2020 07:58:10 -0800 Subject: [PATCH 5/8] fix CPU count in system/cpu --- metricbeat/module/system/cpu/data.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metricbeat/module/system/cpu/data.go b/metricbeat/module/system/cpu/data.go index 497a91a6173..555f9e62cfa 100644 --- a/metricbeat/module/system/cpu/data.go +++ b/metricbeat/module/system/cpu/data.go @@ -97,7 +97,7 @@ func getPlatformCPUMetrics(sample *cpu.Metrics, selectors []string, event common // gather CPU metrics func collectCPUMetrics(selectors []string, sample *cpu.Metrics) mb.Event { - event := common.MapStr{"cores": cpu.NumCores} + event := common.MapStr{"cores": runtime.NumCPU()} getPlatformCPUMetrics(sample, selectors, event) //generate the host fields here, since we don't want users disabling it. From 7912daaa9ded3632eea177be6923ea26e50d1bb2 Mon Sep 17 00:00:00 2001 From: Alex Kristiansen Date: Wed, 16 Dec 2020 08:55:35 -0800 Subject: [PATCH 6/8] update load --- metricbeat/module/system/load/load.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/metricbeat/module/system/load/load.go b/metricbeat/module/system/load/load.go index 1e7a6fe2b3b..dd10d24cef1 100644 --- a/metricbeat/module/system/load/load.go +++ b/metricbeat/module/system/load/load.go @@ -20,6 +20,8 @@ package load import ( + "runtime" + "github.com/pkg/errors" "github.com/elastic/beats/v7/libbeat/common" @@ -58,7 +60,7 @@ func (m *MetricSet) Fetch(r mb.ReporterV2) error { normAvgs := load.NormalizedAverages() event := common.MapStr{ - "cores": cpu.NumCores, + "cores": runtime.NumCPU(), "1": avgs.OneMinute, "5": avgs.FiveMinute, "15": avgs.FifteenMinute, From 2fa43d8a4c6b2b28982bd59df12abe2ac819d885 Mon Sep 17 00:00:00 2001 From: Alex Kristiansen Date: Wed, 16 Dec 2020 08:57:07 -0800 Subject: [PATCH 7/8] add changelog --- CHANGELOG-developer.next.asciidoc | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG-developer.next.asciidoc b/CHANGELOG-developer.next.asciidoc index edec465f854..239bfa7d395 100644 --- a/CHANGELOG-developer.next.asciidoc +++ b/CHANGELOG-developer.next.asciidoc @@ -52,6 +52,7 @@ The list below covers the major changes between 7.0.0-rc2 and master only. - Replace `ACKCount`, `ACKEvents`, and `ACKLastEvent` callbacks with `ACKHandler` and interface in `beat.ClientConfig`. {pull}19632[19632] - Remove global ACK handler support via `SetACKHandler` from publisher pipeline. {pull}19632[19632] - Make implementing `Close` required for `reader.Reader` interfaces. {pull}20455[20455] +- Remove `NumCPU` as clients should update the CPU count on the fly in case of config changes in a VM. {pull}23154[23154] ==== Bugfixes From b51f33a541624e89e485d6055104b471f1515bb1 Mon Sep 17 00:00:00 2001 From: Alex Kristiansen Date: Wed, 16 Dec 2020 11:05:07 -0800 Subject: [PATCH 8/8] fix tests --- libbeat/metric/system/cpu/cpu_test.go | 3 ++- libbeat/metric/system/process/process_test.go | 11 +++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/libbeat/metric/system/cpu/cpu_test.go b/libbeat/metric/system/cpu/cpu_test.go index 300b814b23a..05e3ce035bf 100644 --- a/libbeat/metric/system/cpu/cpu_test.go +++ b/libbeat/metric/system/cpu/cpu_test.go @@ -138,7 +138,8 @@ func TestMetricsPercentages(t *testing.T) { assert.EqualValues(t, .0, pct.Idle) assert.EqualValues(t, 1., pct.Total) - pct = sample.Percentages() + //bypass the Metrics API so we can have a constant CPU value + pct = cpuPercentages(&s0, &s1, numCores) assert.EqualValues(t, .3*float64(numCores), pct.User) assert.EqualValues(t, .7*float64(numCores), pct.System) assert.EqualValues(t, .0*float64(numCores), pct.Idle) diff --git a/libbeat/metric/system/process/process_test.go b/libbeat/metric/system/process/process_test.go index 55997ae7fef..6bc6be447a5 100644 --- a/libbeat/metric/system/process/process_test.go +++ b/libbeat/metric/system/process/process_test.go @@ -160,11 +160,14 @@ func TestProcCpuPercentage(t *testing.T) { SampleTime: p1.SampleTime.Add(time.Second), } - numCPU = 48 - defer func() { numCPU = runtime.NumCPU() }() - totalPercentNormalized, totalPercent, totalValue := GetProcCPUPercentage(p1, p2) - assert.EqualValues(t, 0.0721, totalPercentNormalized) + //GetProcCPUPercentage wil return a number that varies based on the host, due to NumCPU() + // So "un-normalize" it, then re-normalized with a constant. + cpu := float64(runtime.NumCPU()) + unNormalized := totalPercentNormalized * cpu + normalizedTest := common.Round(unNormalized/48, common.DefaultDecimalPlacesCount) + + assert.EqualValues(t, 0.0721, normalizedTest) assert.EqualValues(t, 3.459, totalPercent) assert.EqualValues(t, 14841, totalValue) }