-
Notifications
You must be signed in to change notification settings - Fork 4.9k
/
metrics.go
204 lines (175 loc) · 7.05 KB
/
metrics.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package cpu
import (
"github.com/pkg/errors"
"github.com/elastic/beats/v7/libbeat/common"
"github.com/elastic/beats/v7/libbeat/metric/system/resolve"
"github.com/elastic/beats/v7/libbeat/opt"
)
// CPU manages the CPU metrics from /proc/stat
// If a given metric isn't available on a given platform,
// The value will be null. All methods that use these fields
// should assume that any value can be null.
// The values are in "ticks", which translates to milliseconds of CPU time
type CPU struct {
User opt.Uint `struct:"user,omitempty"`
Sys opt.Uint `struct:"system,omitempty"`
Idle opt.Uint `struct:"idle,omitempty"`
Nice opt.Uint `struct:"nice,omitempty"` // Linux, Darwin, BSD
Irq opt.Uint `struct:"irq,omitempty"` // Linux and openbsd
Wait opt.Uint `struct:"iowait,omitempty"` // Linux and AIX
SoftIrq opt.Uint `struct:"softirq,omitempty"` // Linux only
Stolen opt.Uint `struct:"steal,omitempty"` // Linux only
}
// MetricOpts defines the fields that are passed along to the formatted output
type MetricOpts struct {
Ticks bool
Percentages bool
NormalizedPercentages bool
}
// CPUMetrics carries global and per-core CPU metrics
type CPUMetrics struct {
totals CPU
// list carries the same data, broken down by CPU
list []CPU
}
// Total returns the total CPU time in ticks as scraped by the API
func (cpu CPU) Total() uint64 {
// it's generally safe to blindly sum these up,
// As we're just trying to get a total of all CPU time.
return opt.SumOptUint(cpu.User, cpu.Nice, cpu.Sys, cpu.Idle, cpu.Wait, cpu.Irq, cpu.SoftIrq, cpu.Stolen)
}
/*
The below code implements a "metrics tracker" that gives us the ability to
calculate CPU percentages, as we average usage across a time period.
*/
// Monitor is used to monitor the overall CPU usage of the system over time.
type Monitor struct {
lastSample CPUMetrics
Hostfs resolve.Resolver
}
// New returns a new CPU metrics monitor
// Hostfs is only relevant on linux and freebsd.
func New(hostfs resolve.Resolver) *Monitor {
return &Monitor{Hostfs: hostfs}
}
// Fetch collects a new sample of the CPU usage metrics.
// This will overwrite the currently stored samples.
func (m *Monitor) Fetch() (Metrics, error) {
metric, err := Get(m.Hostfs)
if err != nil {
return Metrics{}, errors.Wrap(err, "Error fetching CPU metrics")
}
oldLastSample := m.lastSample
m.lastSample = metric
return Metrics{previousSample: oldLastSample.totals, currentSample: metric.totals, count: len(metric.list), isTotals: true}, nil
}
// FetchCores collects a new sample of CPU usage metrics per-core
// This will overwrite the currently stored samples.
func (m *Monitor) FetchCores() ([]Metrics, error) {
metric, err := Get(m.Hostfs)
if err != nil {
return nil, errors.Wrap(err, "Error fetching CPU metrics")
}
coreMetrics := make([]Metrics, len(metric.list))
for i := 0; i < len(metric.list); i++ {
lastMetric := CPU{}
// Count of CPUs can change
if len(m.lastSample.list) > i {
lastMetric = m.lastSample.list[i]
}
coreMetrics[i] = Metrics{
currentSample: metric.list[i],
previousSample: lastMetric,
isTotals: false,
}
}
m.lastSample = metric
return coreMetrics, nil
}
// Metrics stores the current and the last sample collected by a Beat.
type Metrics struct {
previousSample CPU
currentSample CPU
count int
isTotals bool
}
// Format returns the final MapStr data object for the metrics.
func (metric Metrics) Format(opts MetricOpts) (common.MapStr, error) {
timeDelta := metric.currentSample.Total() - metric.previousSample.Total()
if timeDelta <= 0 {
return nil, errors.New("Previous sample is newer than current sample")
}
normCPU := metric.count
if !metric.isTotals {
normCPU = 1
}
formattedMetrics := common.MapStr{}
reportOptMetric := func(name string, current, previous opt.Uint, norm int) {
if !current.IsZero() {
formattedMetrics[name] = fillMetric(opts, current, previous, timeDelta, norm)
}
}
if opts.Percentages {
formattedMetrics.Put("total.pct", createTotal(metric.previousSample, metric.currentSample, timeDelta, normCPU))
}
if opts.NormalizedPercentages {
formattedMetrics.Put("total.norm.pct", createTotal(metric.previousSample, metric.currentSample, timeDelta, 1))
}
reportOptMetric("user", metric.currentSample.User, metric.previousSample.User, normCPU)
reportOptMetric("system", metric.currentSample.Sys, metric.previousSample.Sys, normCPU)
reportOptMetric("idle", metric.currentSample.Idle, metric.previousSample.Idle, normCPU)
reportOptMetric("nice", metric.currentSample.Nice, metric.previousSample.Nice, normCPU)
reportOptMetric("irq", metric.currentSample.Irq, metric.previousSample.Irq, normCPU)
reportOptMetric("iowait", metric.currentSample.Wait, metric.previousSample.Wait, normCPU)
reportOptMetric("softirq", metric.currentSample.SoftIrq, metric.previousSample.SoftIrq, normCPU)
reportOptMetric("steal", metric.currentSample.Stolen, metric.previousSample.Stolen, normCPU)
return formattedMetrics, nil
}
func createTotal(prev, cur CPU, timeDelta uint64, numCPU int) float64 {
idleTime := cpuMetricTimeDelta(prev.Idle, cur.Idle, timeDelta, numCPU)
// Subtract wait time from total
// Wait time is not counted from the total as per #7627.
if !cur.Wait.IsZero() {
idleTime = idleTime + cpuMetricTimeDelta(prev.Wait, cur.Wait, timeDelta, numCPU)
}
return common.Round(float64(numCPU)-idleTime, common.DefaultDecimalPlacesCount)
}
func fillMetric(opts MetricOpts, cur, prev opt.Uint, timeDelta uint64, numCPU int) common.MapStr {
event := common.MapStr{}
if opts.Ticks {
event.Put("ticks", cur.ValueOr(0))
}
if opts.Percentages {
event.Put("pct", cpuMetricTimeDelta(prev, cur, timeDelta, numCPU))
}
if opts.NormalizedPercentages {
event.Put("norm.pct", cpuMetricTimeDelta(prev, cur, timeDelta, 1))
}
return event
}
// CPUCount returns the count of CPUs. When available, use this instead of runtime.NumCPU()
func (m *Metrics) CPUCount() int {
return m.count
}
// cpuMetricTimeDelta is a helper used by fillTicks to calculate the delta between two CPU tick values
func cpuMetricTimeDelta(prev, current opt.Uint, timeDelta uint64, numCPU int) float64 {
cpuDelta := int64(current.ValueOr(0) - prev.ValueOr(0))
pct := float64(cpuDelta) / float64(timeDelta)
return common.Round(pct*float64(numCPU), common.DefaultDecimalPlacesCount)
}