From d6303888039aa182908ca32204f386d1282b1e69 Mon Sep 17 00:00:00 2001 From: Kobi Samoray Date: Wed, 29 Apr 2020 03:38:08 +0300 Subject: [PATCH] Antrea Prometheus integration (#321) Integrate with Prometheus monitoring solution. Integration of the Prometheus client into Antrea controller and agent allows the exposure of various metrics to Prometheus server. In addition to Antrea's own set of metrics, Prometheus client will also expose metrics which are defined by various components which are part of the Antrea ecosystem, e.g golang, Prometheus itself etc. See #236 --- build/yamls/antrea-eks.yml | 12 ++- build/yamls/antrea-gke.yml | 12 ++- build/yamls/antrea-ipsec.yml | 12 ++- build/yamls/antrea.yml | 12 ++- build/yamls/base/conf/antrea-agent.conf | 3 + build/yamls/base/conf/antrea-controller.conf | 3 + cmd/antrea-agent/agent.go | 11 +- cmd/antrea-agent/config.go | 3 + cmd/antrea-controller/config.go | 3 + cmd/antrea-controller/controller.go | 12 ++- go.mod | 1 + pkg/agent/agent.go | 23 +--- pkg/agent/agent_test.go | 33 ------ pkg/agent/apiserver/apiserver.go | 8 +- pkg/agent/metrics/prometheus.go | 106 +++++++++++++++++++ pkg/controller/metrics/prometheus.go | 41 +++++++ pkg/util/env/env.go | 54 ++++++++++ pkg/util/env/env_test.go | 76 +++++++++++++ 18 files changed, 353 insertions(+), 72 deletions(-) create mode 100644 pkg/agent/metrics/prometheus.go create mode 100644 pkg/controller/metrics/prometheus.go create mode 100644 pkg/util/env/env.go create mode 100644 pkg/util/env/env_test.go diff --git a/build/yamls/antrea-eks.yml b/build/yamls/antrea-eks.yml index d1057c8b80a..e835bc6e3ed 100644 --- a/build/yamls/antrea-eks.yml +++ b/build/yamls/antrea-eks.yml @@ -326,6 +326,9 @@ data: # Note that if it's set to another value, the `containerPort` of the `api` port of the # `antrea-agent` container must be set to the same value. #apiPort: 10350 + + # Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener. + #enablePrometheusMetrics: false antrea-cni.conflist: | { "cniVersion":"0.3.0", @@ -348,12 +351,15 @@ data: # Note that if it's set to another value, the `containerPort` of the `api` port of the # `antrea-controller` container must be set to the same value. #apiPort: 10349 + + # Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener. + #enablePrometheusMetrics: false kind: ConfigMap metadata: annotations: {} labels: app: antrea - name: antrea-config-t4m46b8f6h + name: antrea-config-522bfh947f namespace: kube-system --- apiVersion: v1 @@ -453,7 +459,7 @@ spec: key: node-role.kubernetes.io/master volumes: - configMap: - name: antrea-config-t4m46b8f6h + name: antrea-config-522bfh947f name: antrea-config - hostPath: path: /var/log/antrea @@ -651,7 +657,7 @@ spec: operator: Exists volumes: - configMap: - name: antrea-config-t4m46b8f6h + name: antrea-config-522bfh947f name: antrea-config - hostPath: path: /etc/cni/net.d diff --git a/build/yamls/antrea-gke.yml b/build/yamls/antrea-gke.yml index 43c9af6a6e3..7eb32df692a 100644 --- a/build/yamls/antrea-gke.yml +++ b/build/yamls/antrea-gke.yml @@ -326,6 +326,9 @@ data: # Note that if it's set to another value, the `containerPort` of the `api` port of the # `antrea-agent` container must be set to the same value. #apiPort: 10350 + + # Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener. + #enablePrometheusMetrics: false antrea-cni.conflist: | { "cniVersion":"0.3.0", @@ -348,12 +351,15 @@ data: # Note that if it's set to another value, the `containerPort` of the `api` port of the # `antrea-controller` container must be set to the same value. #apiPort: 10349 + + # Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener. + #enablePrometheusMetrics: false kind: ConfigMap metadata: annotations: {} labels: app: antrea - name: antrea-config-5754dg84hf + name: antrea-config-t5f26b6m9f namespace: kube-system --- apiVersion: v1 @@ -453,7 +459,7 @@ spec: key: node-role.kubernetes.io/master volumes: - configMap: - name: antrea-config-5754dg84hf + name: antrea-config-t5f26b6m9f name: antrea-config - hostPath: path: /var/log/antrea @@ -651,7 +657,7 @@ spec: operator: Exists volumes: - configMap: - name: antrea-config-5754dg84hf + name: antrea-config-t5f26b6m9f name: antrea-config - hostPath: path: /etc/cni/net.d diff --git a/build/yamls/antrea-ipsec.yml b/build/yamls/antrea-ipsec.yml index 1cf268dab98..5f2bc41ece1 100644 --- a/build/yamls/antrea-ipsec.yml +++ b/build/yamls/antrea-ipsec.yml @@ -326,6 +326,9 @@ data: # Note that if it's set to another value, the `containerPort` of the `api` port of the # `antrea-agent` container must be set to the same value. #apiPort: 10350 + + # Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener. + #enablePrometheusMetrics: false antrea-cni.conflist: | { "cniVersion":"0.3.0", @@ -348,12 +351,15 @@ data: # Note that if it's set to another value, the `containerPort` of the `api` port of the # `antrea-controller` container must be set to the same value. #apiPort: 10349 + + # Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener. + #enablePrometheusMetrics: false kind: ConfigMap metadata: annotations: {} labels: app: antrea - name: antrea-config-c7579447k2 + name: antrea-config-hdhc998mb8 namespace: kube-system --- apiVersion: v1 @@ -462,7 +468,7 @@ spec: key: node-role.kubernetes.io/master volumes: - configMap: - name: antrea-config-c7579447k2 + name: antrea-config-hdhc998mb8 name: antrea-config - hostPath: path: /var/log/antrea @@ -692,7 +698,7 @@ spec: operator: Exists volumes: - configMap: - name: antrea-config-c7579447k2 + name: antrea-config-hdhc998mb8 name: antrea-config - hostPath: path: /etc/cni/net.d diff --git a/build/yamls/antrea.yml b/build/yamls/antrea.yml index ec798da85ee..b869ed6c377 100644 --- a/build/yamls/antrea.yml +++ b/build/yamls/antrea.yml @@ -326,6 +326,9 @@ data: # Note that if it's set to another value, the `containerPort` of the `api` port of the # `antrea-agent` container must be set to the same value. #apiPort: 10350 + + # Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener. + #enablePrometheusMetrics: false antrea-cni.conflist: | { "cniVersion":"0.3.0", @@ -348,12 +351,15 @@ data: # Note that if it's set to another value, the `containerPort` of the `api` port of the # `antrea-controller` container must be set to the same value. #apiPort: 10349 + + # Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener. + #enablePrometheusMetrics: false kind: ConfigMap metadata: annotations: {} labels: app: antrea - name: antrea-config-428d4tg64g + name: antrea-config-m8cb9g82tf namespace: kube-system --- apiVersion: v1 @@ -453,7 +459,7 @@ spec: key: node-role.kubernetes.io/master volumes: - configMap: - name: antrea-config-428d4tg64g + name: antrea-config-m8cb9g82tf name: antrea-config - hostPath: path: /var/log/antrea @@ -651,7 +657,7 @@ spec: operator: Exists volumes: - configMap: - name: antrea-config-428d4tg64g + name: antrea-config-m8cb9g82tf name: antrea-config - hostPath: path: /etc/cni/net.d diff --git a/build/yamls/base/conf/antrea-agent.conf b/build/yamls/base/conf/antrea-agent.conf index 108596bbf02..6229b30706c 100644 --- a/build/yamls/base/conf/antrea-agent.conf +++ b/build/yamls/base/conf/antrea-agent.conf @@ -46,3 +46,6 @@ # Note that if it's set to another value, the `containerPort` of the `api` port of the # `antrea-agent` container must be set to the same value. #apiPort: 10350 + +# Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener. +#enablePrometheusMetrics: false diff --git a/build/yamls/base/conf/antrea-controller.conf b/build/yamls/base/conf/antrea-controller.conf index 3823f0723c3..d6dda5bbccd 100644 --- a/build/yamls/base/conf/antrea-controller.conf +++ b/build/yamls/base/conf/antrea-controller.conf @@ -2,3 +2,6 @@ # Note that if it's set to another value, the `containerPort` of the `api` port of the # `antrea-controller` container must be set to the same value. #apiPort: 10349 + +# Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener. +#enablePrometheusMetrics: false diff --git a/cmd/antrea-agent/agent.go b/cmd/antrea-agent/agent.go index 685bd388afe..cfef57e51aa 100644 --- a/cmd/antrea-agent/agent.go +++ b/cmd/antrea-agent/agent.go @@ -30,6 +30,7 @@ import ( "github.com/vmware-tanzu/antrea/pkg/agent/controller/networkpolicy" "github.com/vmware-tanzu/antrea/pkg/agent/controller/noderoute" "github.com/vmware-tanzu/antrea/pkg/agent/interfacestore" + "github.com/vmware-tanzu/antrea/pkg/agent/metrics" "github.com/vmware-tanzu/antrea/pkg/agent/openflow" "github.com/vmware-tanzu/antrea/pkg/agent/querier" "github.com/vmware-tanzu/antrea/pkg/agent/route" @@ -161,11 +162,19 @@ func run(o *Options) error { ovsBridgeClient, networkPolicyController) + if o.config.EnablePrometheusMetrics { + metrics.InitializePrometheusMetrics(o.config.OVSBridge, ifaceStore, ofClient) + } + agentMonitor := monitor.NewAgentMonitor(crdClient, agentQuerier) go agentMonitor.Run(stopCh) - apiServer, err := apiserver.New(agentQuerier, networkPolicyController, o.config.APIPort) + apiServer, err := apiserver.New( + agentQuerier, + networkPolicyController, + o.config.APIPort, + o.config.EnablePrometheusMetrics) if err != nil { return fmt.Errorf("error when creating agent API server: %v", err) } diff --git a/cmd/antrea-agent/config.go b/cmd/antrea-agent/config.go index 4dfaace5109..25de498b6cd 100644 --- a/cmd/antrea-agent/config.go +++ b/cmd/antrea-agent/config.go @@ -75,4 +75,7 @@ type AgentConfig struct { // APIPort is the port for the antrea-agent APIServer to serve on. // Defaults to 10350. APIPort int `yaml:"apiPort,omitempty"` + // Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener + // Defaults to false. + EnablePrometheusMetrics bool `yaml:"enablePrometheusMetrics,omitempty"` } diff --git a/cmd/antrea-controller/config.go b/cmd/antrea-controller/config.go index cca42592507..1c52730be92 100644 --- a/cmd/antrea-controller/config.go +++ b/cmd/antrea-controller/config.go @@ -25,4 +25,7 @@ type ControllerConfig struct { // APIPort is the port for the antrea-controller APIServer to serve on. // Defaults to 10349. APIPort int `yaml:"apiPort,omitempty"` + // Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener + // Defaults to false. + EnablePrometheusMetrics bool `yaml:"enablePrometheusMetrics,omitempty"` } diff --git a/cmd/antrea-controller/controller.go b/cmd/antrea-controller/controller.go index 250650ffefa..848096b3b83 100644 --- a/cmd/antrea-controller/controller.go +++ b/cmd/antrea-controller/controller.go @@ -31,6 +31,7 @@ import ( "github.com/vmware-tanzu/antrea/pkg/apiserver" "github.com/vmware-tanzu/antrea/pkg/apiserver/openapi" "github.com/vmware-tanzu/antrea/pkg/apiserver/storage" + "github.com/vmware-tanzu/antrea/pkg/controller/metrics" "github.com/vmware-tanzu/antrea/pkg/controller/networkpolicy" "github.com/vmware-tanzu/antrea/pkg/controller/networkpolicy/store" "github.com/vmware-tanzu/antrea/pkg/controller/querier" @@ -81,7 +82,8 @@ func run(o *Options) error { addressGroupStore, appliedToGroupStore, networkPolicyStore, - controllerQuerier) + controllerQuerier, + o.config.EnablePrometheusMetrics) if err != nil { return fmt.Errorf("error creating API server config: %v", err) } @@ -103,6 +105,10 @@ func run(o *Options) error { go apiServer.GenericAPIServer.PrepareRun().Run(stopCh) + if o.config.EnablePrometheusMetrics { + metrics.InitializePrometheusMetrics() + } + <-stopCh klog.Info("Stopping Antrea controller") return nil @@ -113,7 +119,8 @@ func createAPIServerConfig(kubeconfig string, addressGroupStore storage.Interface, appliedToGroupStore storage.Interface, networkPolicyStore storage.Interface, - controllerQuerier querier.ControllerQuerier) (*apiserver.Config, error) { + controllerQuerier querier.ControllerQuerier, + enableMetrics bool) (*apiserver.Config, error) { // TODO: // 1. Support user-provided certificate. secureServing := genericoptions.NewSecureServingOptions().WithLoopback() @@ -155,6 +162,7 @@ func createAPIServerConfig(kubeconfig string, openapi.GetOpenAPIDefinitions, genericopenapi.NewDefinitionNamer(apiserver.Scheme)) serverConfig.OpenAPIConfig.Info.Title = "Antrea" + serverConfig.EnableMetrics = enableMetrics return apiserver.NewConfig( serverConfig, diff --git a/go.mod b/go.mod index b11d5c74b94..2313ca2e09c 100644 --- a/go.mod +++ b/go.mod @@ -25,6 +25,7 @@ require ( github.com/imdario/mergo v0.3.7 // indirect github.com/j-keck/arping v1.0.0 github.com/kevinburke/ssh_config v0.0.0-20190725054713-01f96b0aa0cd + github.com/prometheus/client_golang v0.9.3-0.20190127221311-3c4408c8b829 github.com/satori/go.uuid v1.2.0 github.com/sirupsen/logrus v1.4.1 github.com/spf13/cobra v0.0.5 diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go index 54454608358..9d56fc0813f 100644 --- a/pkg/agent/agent.go +++ b/pkg/agent/agent.go @@ -37,12 +37,11 @@ import ( "github.com/vmware-tanzu/antrea/pkg/agent/types" "github.com/vmware-tanzu/antrea/pkg/agent/util" "github.com/vmware-tanzu/antrea/pkg/ovs/ovsconfig" + "github.com/vmware-tanzu/antrea/pkg/util/env" ) const ( maxRetryForHostLink = 5 - // nodeNameEnvKey is environment variable. - nodeNameEnvKey = "NODE_NAME" // ipsecPSKEnvKey is environment variable. ipsecPSKEnvKey = "ANTREA_IPSEC_PSK" roundNumKey = "roundNum" // round number key in externalIDs. @@ -436,7 +435,7 @@ func (i *Initializer) setupDefaultTunnelInterface(tunnelPortName string) error { // initNodeLocalConfig retrieves node's subnet CIDR from node.spec.PodCIDR, which is used for IPAM and setup // host gateway interface. func (i *Initializer) initNodeLocalConfig() error { - nodeName, err := getNodeName() + nodeName, err := env.GetNodeName() if err != nil { return err } @@ -476,24 +475,6 @@ func (i *Initializer) initNodeLocalConfig() error { return nil } -// getNodeName returns the node's name used in Kubernetes, based on the priority: -// - Environment variable NODE_NAME, which should be set by Downward API -// - OS's hostname -func getNodeName() (string, error) { - nodeName := os.Getenv(nodeNameEnvKey) - if nodeName != "" { - return nodeName, nil - } - klog.Infof("Environment variable %s not found, using hostname instead", nodeNameEnvKey) - var err error - nodeName, err = os.Hostname() - if err != nil { - klog.Errorf("Failed to get local hostname: %v", err) - return "", err - } - return nodeName, nil -} - // readIPSecPSK reads the IPSec PSK value from environment variable // ANTREA_IPSEC_PSK, when enableIPSecTunnel is set to true. func (i *Initializer) readIPSecPSK() error { diff --git a/pkg/agent/agent_test.go b/pkg/agent/agent_test.go index 33930ad4641..f7fbaff4448 100644 --- a/pkg/agent/agent_test.go +++ b/pkg/agent/agent_test.go @@ -17,7 +17,6 @@ package agent import ( "fmt" "net" - "os" "testing" mock "github.com/golang/mock/gomock" @@ -30,38 +29,6 @@ import ( ovsconfigtest "github.com/vmware-tanzu/antrea/pkg/ovs/ovsconfig/testing" ) -func TestGetNodeName(t *testing.T) { - hostName, err := os.Hostname() - if err != nil { - t.Fatalf("Failed to retrieve hostname, %v", err) - } - testTable := map[string]string{ - "node1": "node1", - "node_12": "node_12", - "": hostName, - "node-1234": "node-1234", - } - - for k, v := range testTable { - compareNodeName(k, v, t) - } -} - -func compareNodeName(k, v string, t *testing.T) { - if k != "" { - _ = os.Setenv(nodeNameEnvKey, k) - defer os.Unsetenv(nodeNameEnvKey) - } - nodeName, err := getNodeName() - if err != nil { - t.Errorf("Failure with expected name %s, %v", k, err) - return - } - if nodeName != v { - t.Errorf("Failed to retrieve nodename, want: %s, get: %s", v, nodeName) - } -} - func newAgentInitializer(ovsBridgeClient ovsconfig.OVSBridgeClient, ifaceStore interfacestore.InterfaceStore) *Initializer { return &Initializer{ovsBridgeClient: ovsBridgeClient, ifaceStore: ifaceStore, hostGateway: "gw0"} } diff --git a/pkg/agent/apiserver/apiserver.go b/pkg/agent/apiserver/apiserver.go index 79bc6701660..00753913f44 100644 --- a/pkg/agent/apiserver/apiserver.go +++ b/pkg/agent/apiserver/apiserver.go @@ -64,8 +64,9 @@ func installHandlers(aq agentquerier.AgentQuerier, npq querier.AgentNetworkPolic } // New creates an APIServer for running in antrea agent. -func New(aq agentquerier.AgentQuerier, npq querier.AgentNetworkPolicyInfoQuerier, bindPort int) (*agentAPIServer, error) { - cfg, err := newConfig(bindPort) +func New(aq agentquerier.AgentQuerier, npq querier.AgentNetworkPolicyInfoQuerier, bindPort int, + enableMetrics bool) (*agentAPIServer, error) { + cfg, err := newConfig(bindPort, enableMetrics) if err != nil { return nil, err } @@ -77,7 +78,7 @@ func New(aq agentquerier.AgentQuerier, npq querier.AgentNetworkPolicyInfoQuerier return &agentAPIServer{GenericAPIServer: s}, nil } -func newConfig(bindPort int) (*genericapiserver.CompletedConfig, error) { +func newConfig(bindPort int, enableMetrics bool) (*genericapiserver.CompletedConfig, error) { secureServing := genericoptions.NewSecureServingOptions().WithLoopback() authentication := genericoptions.NewDelegatingAuthenticationOptions() authorization := genericoptions.NewDelegatingAuthorizationOptions() @@ -114,6 +115,7 @@ func newConfig(bindPort int) (*genericapiserver.CompletedConfig, error) { GitTreeState: antreaversion.GitTreeState, GitCommit: antreaversion.GetGitSHA(), } + serverConfig.EnableMetrics = enableMetrics completedServerCfg := serverConfig.Complete(nil) return &completedServerCfg, nil diff --git a/pkg/agent/metrics/prometheus.go b/pkg/agent/metrics/prometheus.go new file mode 100644 index 00000000000..a52abfa8d7d --- /dev/null +++ b/pkg/agent/metrics/prometheus.go @@ -0,0 +1,106 @@ +// Copyright 2020 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package metrics + +import ( + "strconv" + + "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog" + + "github.com/vmware-tanzu/antrea/pkg/agent/interfacestore" + "github.com/vmware-tanzu/antrea/pkg/agent/openflow" + "github.com/vmware-tanzu/antrea/pkg/util/env" +) + +// ovsStatManager implements prometheus.Collector +type ovsStatManager struct { + ofClient openflow.Client + ovsBridge string + ovsTableDesc *prometheus.Desc +} + +func (c *ovsStatManager) getOVSStatistics() (ovsFlowsByTable map[string]float64) { + ovsFlowsByTable = make(map[string]float64) + flowTableStatus := c.ofClient.GetFlowTableStatus() + for _, tableStatus := range flowTableStatus { + ovsFlowsByTable[strconv.Itoa(int(tableStatus.ID))] = float64(tableStatus.FlowCount) + } + return +} + +func (c *ovsStatManager) Describe(ch chan<- *prometheus.Desc) { + ch <- c.ovsTableDesc +} + +func (c *ovsStatManager) Collect(ch chan<- prometheus.Metric) { + ovsFlowsByTable := c.getOVSStatistics() + for tableId, tableFlowCount := range ovsFlowsByTable { + ch <- prometheus.MustNewConstMetric( + c.ovsTableDesc, + prometheus.GaugeValue, + tableFlowCount, + tableId, + ) + } +} + +func newOVSStatManager(ovsBridge string, ofClient openflow.Client) *ovsStatManager { + return &ovsStatManager{ + ofClient: ofClient, + ovsBridge: ovsBridge, + ovsTableDesc: prometheus.NewDesc( + "antrea_agent_ovs_flow_table", + "OVS flow table flow count.", + []string{"table_id"}, + prometheus.Labels{"bridge": ovsBridge}, + ), + } +} + +func InitializePrometheusMetrics( + ovsBridge string, + ifaceStore interfacestore.InterfaceStore, + ofClient openflow.Client) { + + klog.Info("Initializing prometheus metrics") + prometheus.NewGaugeFunc( + prometheus.GaugeOpts{ + Name: "antrea_agent_local_pod_count", + Help: "Number of pods on local node which are managed by the Antrea Agent.", + }, + func() float64 { return float64(ifaceStore.GetContainerInterfaceNum()) }, + ) + + nodeName, err := env.GetNodeName() + if err != nil { + klog.Errorf("Failed to retrieve agent K8S node name: %v", err) + } + + gaugeHost := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "antrea_agent_runtime_info", + Help: "Antrea agent runtime info , defined as labels. The value of the gauge is always set to 1.", + ConstLabels: prometheus.Labels{"k8s_nodename": nodeName, "k8s_podname": env.GetPodName()}, + }) + gaugeHost.Set(1) + if err := prometheus.Register(gaugeHost); err != nil { + klog.Error("Failed to register antrea_agent_runtime_info with Prometheus") + } + + ovsStats := newOVSStatManager(ovsBridge, ofClient) + if err := prometheus.Register(ovsStats); err != nil { + klog.Error("Failed to register antrea_agent_ovs_flow_table with Prometheus") + } +} diff --git a/pkg/controller/metrics/prometheus.go b/pkg/controller/metrics/prometheus.go new file mode 100644 index 00000000000..e2ce558cc13 --- /dev/null +++ b/pkg/controller/metrics/prometheus.go @@ -0,0 +1,41 @@ +// Copyright 2020 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog" + + "github.com/vmware-tanzu/antrea/pkg/util/env" +) + +// Initialize Prometheus metrics collection. +func InitializePrometheusMetrics() { + nodeName, err := env.GetNodeName() + if err != nil { + klog.Errorf("Failed to retrieve controller K8S node name: %v", err) + } + + klog.Info("Initializing prometheus metrics") + gaugeHost := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "antrea_controller_runtime_info", + Help: "Antrea controller runtime info, defined as labels. The value of the gauge is always set to 1.", + ConstLabels: prometheus.Labels{"k8s_nodename": nodeName, "k8s_podname": env.GetPodName()}, + }) + gaugeHost.Set(1) + if err = prometheus.Register(gaugeHost); err != nil { + klog.Error("Failed to register antrea_controller_runtime_info with Prometheus") + } +} diff --git a/pkg/util/env/env.go b/pkg/util/env/env.go new file mode 100644 index 00000000000..b036979c0cb --- /dev/null +++ b/pkg/util/env/env.go @@ -0,0 +1,54 @@ +// Copyright 2020 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package env + +import ( + "os" + + "k8s.io/klog" +) + +// nodeNameEnvKey is environment variable. +const ( + nodeNameEnvKey = "NODE_NAME" + podNameEnvKey = "POD_NAME" +) + +// GetNodeName returns the node's name used in Kubernetes, based on the priority: +// - Environment variable NODE_NAME, which should be set by Downward API +// - OS's hostname +func GetNodeName() (string, error) { + nodeName := os.Getenv(nodeNameEnvKey) + if nodeName != "" { + return nodeName, nil + } + klog.Infof("Environment variable %s not found, using hostname instead", nodeNameEnvKey) + var err error + nodeName, err = os.Hostname() + if err != nil { + klog.Errorf("Failed to get local hostname: %v", err) + return "", err + } + return nodeName, nil +} + +// GetPodName returns the pod name where the code executes +func GetPodName() string { + podName := os.Getenv(podNameEnvKey) + if podName == "" { + klog.Warningf("Environment variable %s not found", podNameEnvKey) + } + return podName +} diff --git a/pkg/util/env/env_test.go b/pkg/util/env/env_test.go new file mode 100644 index 00000000000..8e76eab83ff --- /dev/null +++ b/pkg/util/env/env_test.go @@ -0,0 +1,76 @@ +// Copyright 2020 Antrea Authors + +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package env + +import ( + "os" + "testing" +) + +func TestGetNodeName(t *testing.T) { + hostName, err := os.Hostname() + if err != nil { + t.Fatalf("Failed to retrieve hostname: %v", err) + } + testTable := map[string]string{ + "node1": "node1", + "node_12": "node_12", + "": hostName, + "node-1234": "node-1234", + } + + for k, v := range testTable { + compareNodeName(k, v, t) + } +} + +func compareNodeName(k, v string, t *testing.T) { + if k != "" { + _ = os.Setenv(nodeNameEnvKey, k) + defer os.Unsetenv(nodeNameEnvKey) + } + nodeName, err := GetNodeName() + if err != nil { + t.Errorf("Failure with expected name %s: %v", k, err) + return + } + if nodeName != v { + t.Errorf("Failed to retrieve nodename, want: %s, get: %s", v, nodeName) + } +} + +func TestGetPodName(t *testing.T) { + testTable := map[string]string{ + "pod1": "pod1", + "pod-1212-x": "pod-1212-x", + "antrea-controller-577f4ffb4b-njprt": "antrea-controller-577f4ffb4b-njprt", + } + + for k, v := range testTable { + comparePodName(k, v, t) + } +} + +func comparePodName(k, v string, t *testing.T) { + if k != "" { + _ = os.Setenv(podNameEnvKey, k) + defer os.Unsetenv(podNameEnvKey) + } + podName := GetPodName() + if podName != v { + t.Errorf("Failed to retrieve pod name, want: %s, get: %s", v, podName) + } +}