-
Notifications
You must be signed in to change notification settings - Fork 9
/
metrics.go
145 lines (124 loc) · 4.17 KB
/
metrics.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
package ckit
import (
"github.com/grafana/ckit/internal/metricsutil"
"github.com/hashicorp/memberlist"
"github.com/prometheus/client_golang/prometheus"
)
// Possible label values for metrics.gossipEventsTotal
const (
eventStateChange = "state_change_message"
eventUnkownMessage = "unknown_message"
eventGetLocalState = "get_local_state"
eventMergeRemoteState = "merge_remote_state"
eventNodeJoin = "node_join"
eventNodeLeave = "node_leave"
eventNodeUpdate = "node_update"
eventNodeConflict = "node_conflict"
)
const clusterNameLabel = "cluster_name"
// metrics holds the set of metrics for a Node. Additional Collectors can be
// registered by calling Add.
type metrics struct {
metricsutil.Container
gossipEventsTotal *prometheus.CounterVec
nodePeers *prometheus.GaugeVec
nodeUpdating prometheus.Gauge
nodeUpdateDuration prometheus.Histogram
nodeObservers prometheus.Gauge
nodeInfo *metricsutil.InfoCollector
}
var _ prometheus.Collector = (*metrics)(nil)
func newMetrics(clusterName string) *metrics {
var m metrics
m.gossipEventsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "cluster_node_gossip_received_events_total",
Help: "Total number of gossip messages handled by the node.",
ConstLabels: prometheus.Labels{
clusterNameLabel: clusterName,
},
}, []string{"event"})
m.nodePeers = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "cluster_node_peers",
Help: "Current number of healthy peers by state",
ConstLabels: prometheus.Labels{
clusterNameLabel: clusterName,
},
}, []string{"state"})
m.nodeUpdating = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "cluster_node_updating",
Help: "1 if the node is currently processing a change to the cluster state.",
ConstLabels: prometheus.Labels{
clusterNameLabel: clusterName,
},
})
m.nodeUpdateDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
Name: "cluster_node_update_duration_seconds",
Help: "Histogram of the latency it took to process a change to the cluster state.",
Buckets: prometheus.DefBuckets,
ConstLabels: prometheus.Labels{
clusterNameLabel: clusterName,
},
})
m.nodeObservers = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "cluster_node_update_observers",
Help: "Number of internal observers waiting for changes to cluster state.",
ConstLabels: prometheus.Labels{
clusterNameLabel: clusterName,
},
})
m.nodeInfo = metricsutil.NewInfoCollector(metricsutil.InfoOpts{
Name: "cluster_node_info",
Help: "Info about the local node. Label values will change as the node changes state.",
ConstLabels: prometheus.Labels{
clusterNameLabel: clusterName,
},
}, "state")
m.Add(
m.gossipEventsTotal,
m.nodePeers,
m.nodeUpdating,
m.nodeUpdateDuration,
m.nodeObservers,
m.nodeInfo,
)
return &m
}
func newMemberlistCollector(ml *memberlist.Memberlist, clusterName string) prometheus.Collector {
var container metricsutil.Container
gossipProtoVersion := prometheus.NewGaugeFunc(prometheus.GaugeOpts{
Name: "cluster_node_gossip_proto_version",
Help: "Gossip protocol version used by nodes to maintain the cluster",
ConstLabels: prometheus.Labels{
clusterNameLabel: clusterName,
},
}, func() float64 {
// NOTE(rfratto): while this is static at the time of writing, the internal
// documentation for memberlist claims that ProtocolVersion may one day be
// updated at runtime.
return float64(ml.ProtocolVersion())
})
gossipHealthScore := prometheus.NewGaugeFunc(prometheus.GaugeOpts{
Name: "cluster_node_gossip_health_score",
Help: "Health value of a node; lower values means healthier. 0 is the minimum.",
ConstLabels: prometheus.Labels{
clusterNameLabel: clusterName,
},
}, func() float64 {
return float64(ml.GetHealthScore())
})
gossipPeers := prometheus.NewGaugeFunc(prometheus.GaugeOpts{
Name: "cluster_node_gossip_alive_peers",
Help: "How many alive gossip peers a node has, including the local node.",
ConstLabels: prometheus.Labels{
clusterNameLabel: clusterName,
},
}, func() float64 {
return float64(ml.NumMembers())
})
container.Add(
gossipProtoVersion,
gossipHealthScore,
gossipPeers,
)
return &container
}