diff --git a/cmd/frontend/internal/env/config.go b/cmd/frontend/internal/env/config.go index c4816825..83f9091f 100644 --- a/cmd/frontend/internal/env/config.go +++ b/cmd/frontend/internal/env/config.go @@ -44,4 +44,6 @@ type Config struct { DelayConnectivity time.Duration `default:"1s" desc:"Delay between checks with connectivity"` DelayNoConnectivity time.Duration `default:"3s" desc:"Delay between checks without connectivity"` MaxSessionErrors int `default:"5" desc:"Max session errors when checking Bird until denounce"` + MetricsEnabled bool `default:"false" desc:"Enable the metrics collection" split_words:"true"` + MetricsPort int `default:"2224" desc:"Specify the port used to expose the metrics" split_words:"true"` } diff --git a/cmd/frontend/main.go b/cmd/frontend/main.go index 6135ae30..1f438e74 100644 --- a/cmd/frontend/main.go +++ b/cmd/frontend/main.go @@ -28,6 +28,8 @@ import ( "github.com/go-logr/logr" "github.com/kelseyhightower/envconfig" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" "google.golang.org/grpc" "google.golang.org/grpc/backoff" "google.golang.org/grpc/keepalive" @@ -38,7 +40,9 @@ import ( "github.com/nordix/meridio/cmd/frontend/internal/frontend" "github.com/nordix/meridio/pkg/health" "github.com/nordix/meridio/pkg/health/connection" + linuxKernel "github.com/nordix/meridio/pkg/kernel" "github.com/nordix/meridio/pkg/log" + "github.com/nordix/meridio/pkg/metrics" "github.com/nordix/meridio/pkg/retry" "github.com/nordix/meridio/pkg/security/credentials" ) @@ -155,6 +159,47 @@ func main() { // start watching events of interest via NSP go watchConfig(ctx, cancel, c, fe) + hostname, err := os.Hostname() + if err != nil { + log.Fatal(logger, "Unable to get hostname", "error", err) + } + interfaceMetrics := linuxKernel.NewInterfaceMetrics([]metric.ObserveOption{ + metric.WithAttributes(attribute.String("Hostname", hostname)), + metric.WithAttributes(attribute.String("Trench", config.TrenchName)), + metric.WithAttributes(attribute.String("Attractor", config.AttractorName)), + }) + interfaceMetrics.Register(config.ExternalInterface) + + if config.MetricsEnabled { + func() { + _, err = metrics.Init(ctx) + if err != nil { + logger.Error(err, "Unable to init metrics collector") + cancel() + return + } + + err = interfaceMetrics.Collect() + if err != nil { + logger.Error(err, "Unable to start interface metrics collector") + cancel() + return + } + + metricsServer := metrics.Server{ + IP: "", + Port: config.MetricsPort, + } + go func() { + err := metricsServer.Start(ctx) + if err != nil { + logger.Error(err, "Unable to start metrics server") + cancel() + } + }() + }() + } + <-ctx.Done() logger.Info("FE shutting down") } diff --git a/cmd/stateless-lb/main.go b/cmd/stateless-lb/main.go index 5d8aa990..4307e559 100644 --- a/cmd/stateless-lb/main.go +++ b/cmd/stateless-lb/main.go @@ -56,10 +56,13 @@ import ( "github.com/nordix/meridio/pkg/networking" "github.com/nordix/meridio/pkg/nsm" "github.com/nordix/meridio/pkg/nsm/interfacemonitor" + nsmmetrics "github.com/nordix/meridio/pkg/nsm/metrics" "github.com/nordix/meridio/pkg/retry" "github.com/nordix/meridio/pkg/security/credentials" "github.com/sirupsen/logrus" "github.com/vishvananda/netlink" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" "google.golang.org/grpc" "google.golang.org/grpc/backoff" "google.golang.org/grpc/keepalive" @@ -167,6 +170,17 @@ func main() { }, } + hostname, err := os.Hostname() + if err != nil { + log.Fatal(logger, "Unable to get hostname", "error", err) + } + + interfaceMetrics := linuxKernel.NewInterfaceMetrics([]metric.ObserveOption{ + metric.WithAttributes(attribute.String("Hostname", hostname)), + metric.WithAttributes(attribute.String("Trench", config.TrenchName)), + metric.WithAttributes(attribute.String("Conduit", config.ConduitName)), + }) + lbFactory := nfqlb.NewLbFactory(nfqlb.WithNFQueue(config.Nfqueue)) nfa, err := nfqlb.NewNetfilterAdaptor(nfqlb.WithNFQueue(config.Nfqueue), nfqlb.WithNFQueueFanout(config.NfqueueFanout)) if err != nil { @@ -198,6 +212,7 @@ func main() { noop.MECHANISM: null.NewServer(), }), interfaceMonitorEndpoint, + nsmmetrics.NewServer(interfaceMetrics), sendfd.NewServer(), } @@ -256,13 +271,6 @@ func main() { return } - hostname, err := os.Hostname() - if err != nil { - logger.Error(err, "Unable to get hostname") - cancel() - return - } - err = flow.CollectMetrics( flow.WithHostname(hostname), flow.WithTrenchName(config.TrenchName), @@ -274,6 +282,13 @@ func main() { return } + err = interfaceMetrics.Collect() + if err != nil { + logger.Error(err, "Unable to start interface metrics collector") + cancel() + return + } + metricsServer := metrics.Server{ IP: "", Port: config.MetricsPort, diff --git a/config/templates/charts/meridio/deployment/stateless-lb-frontend.yaml b/config/templates/charts/meridio/deployment/stateless-lb-frontend.yaml index 55b3e8ed..75ee7423 100644 --- a/config/templates/charts/meridio/deployment/stateless-lb-frontend.yaml +++ b/config/templates/charts/meridio/deployment/stateless-lb-frontend.yaml @@ -184,6 +184,9 @@ spec: - name: frontend image: {{ .Values.registry }}/{{ .Values.repository }}/{{ .Values.frontEnd.image }}:{{ .Values.version }} imagePullPolicy: # Kubernetes default according to image tag + ports: + - name: metrics + containerPort: 2224 startupProbe: # will be filled by operator if not specified exec: command: @@ -244,6 +247,8 @@ spec: value: # to be filled by operator - name: NFE_LOG_LEVEL value: # to be filled by operator + - name: NFE_METRICS_ENABLED + value: "true" securityContext: runAsNonRoot: true readOnlyRootFilesystem: true diff --git a/docs/observability/dashboard.json b/docs/observability/dashboard.json index 4155b070..daf8aaeb 100644 --- a/docs/observability/dashboard.json +++ b/docs/observability/dashboard.json @@ -81,12 +81,12 @@ "overrides": [] }, "gridPos": { - "h": 8, + "h": 20, "w": 12, "x": 0, "y": 0 }, - "id": 1, + "id": 7, "options": { "legend": { "calcs": [], @@ -99,6 +99,7 @@ "sort": "none" } }, + "pluginVersion": "10.1.5", "targets": [ { "datasource": { @@ -107,16 +108,153 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "sum by(Flow, Stream, Conduit, Trench) (rate(meridio_conduit_stream_flow_matches_total[$__rate_interval]))", + "exemplar": false, + "expr": "sum by(Trench, Conduit, Attractor) (rate(meridio_interface_rx_bytes_total[$__rate_interval]))", + "format": "time_series", "fullMetaSearch": false, "includeNullMetadata": true, - "legendFormat": "{{Flow}}.{{Stream}}.{{Conduit}}.{{Trench}}", + "instant": false, + "legendFormat": "rx_bytes.{{Conduit}}{{Attractor}}.{{Trench}}", "range": true, - "refId": "A", + "refId": "rx bytes", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "sum by(Trench, Conduit, Attractor) (rate(meridio_interface_tx_bytes_total[$__rate_interval]))", + "format": "time_series", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "tx_bytes.{{Conduit}}{{Attractor}}.{{Trench}}", + "range": true, + "refId": "tx bytes", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "sum by(Trench, Conduit, Attractor) (rate(meridio_interface_rx_packets_total[$__rate_interval]))", + "format": "time_series", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "rx_packets.{{Conduit}}{{Attractor}}.{{Trench}}", + "range": true, + "refId": "rx packets", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "sum by(Trench, Conduit, Attractor) (rate(meridio_interface_tx_packets_total[$__rate_interval]))", + "format": "time_series", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "tx_packets.{{Conduit}}{{Attractor}}.{{Trench}}", + "range": true, + "refId": "tx packets", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "sum by(Trench, Conduit, Attractor) (rate(meridio_interface_rx_errors_total[$__rate_interval]))", + "format": "time_series", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "rx_errors.{{Conduit}}{{Attractor}}.{{Trench}}", + "range": true, + "refId": "rx errors", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "sum by(Trench, Conduit, Attractor) (rate(meridio_interface_tx_errors_total[$__rate_interval]))", + "format": "time_series", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "tx_errors.{{Conduit}}{{Attractor}}.{{Trench}}", + "range": true, + "refId": "tx errors", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "sum by(Trench, Conduit, Attractor) (rate(meridio_interface_rx_dropped_total[$__rate_interval]))", + "format": "time_series", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "rx_dropped.{{Conduit}}{{Attractor}}.{{Trench}}", + "range": true, + "refId": "rx dropped", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "sum by(Trench, Conduit, Attractor) (rate(meridio_interface_tx_dropped_total[$__rate_interval]))", + "format": "time_series", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "tx_dropped.{{Conduit}}{{Attractor}}.{{Trench}}", + "range": true, + "refId": "tx dropped", "useBackend": false } ], - "title": "Flow Match (packet per second)", + "title": "Interface Metrics", + "transformations": [], "type": "timeseries" }, { @@ -172,7 +310,7 @@ }, "showHeader": true }, - "pluginVersion": "10.1.4", + "pluginVersion": "10.1.5", "targets": [ { "datasource": { @@ -231,8 +369,467 @@ "pod": 13, "service": 14 }, + "renameByName": { + "Value": "Packets" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 12 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(Flow, Stream, Conduit, Trench) (rate(meridio_conduit_stream_flow_matches_total[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{Flow}}.{{Stream}}.{{Conduit}}.{{Trench}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Flow Match (packet per second)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 4, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "10.1.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "meridio_interface_rx_bytes_total", + "format": "table", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "rx bytes", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "meridio_interface_tx_bytes_total", + "format": "table", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "tx bytes", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "meridio_interface_rx_packets_total", + "format": "table", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "rx packets", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "meridio_interface_tx_packets_total", + "format": "table", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "tx packets", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "meridio_interface_rx_errors_total", + "format": "table", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "rx errors", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "meridio_interface_tx_errors_total", + "format": "table", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "tx errors", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "meridio_interface_rx_dropped_total", + "format": "table", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "rx dropped", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "meridio_interface_tx_dropped_total", + "format": "table", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "tx dropped", + "useBackend": false + } + ], + "title": "Interface Metrics", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "MAC_Address": false, + "Time": true, + "__name__": true, + "container": true, + "endpoint": true, + "instance": true, + "job": true, + "namespace": true, + "otel_scope_name": true, + "pod": true + }, + "indexByName": { + "Attractor": 1, + "Conduit": 2, + "Hostname": 3, + "IP_Addresses": 6, + "Interface_Name": 4, + "MAC_Address": 5, + "Time": 7, + "Trench": 0, + "Value #rx bytes": 16, + "Value #rx dropped": 22, + "Value #rx errors": 20, + "Value #rx packets": 18, + "Value #tx bytes": 17, + "Value #tx dropped": 23, + "Value #tx errors": 21, + "Value #tx packets": 19, + "__name__": 8, + "container": 9, + "endpoint": 10, + "instance": 11, + "job": 12, + "namespace": 13, + "otel_scope_name": 14, + "pod": 15 + }, "renameByName": {} } + }, + { + "id": "groupBy", + "options": { + "fields": { + "Attractor": { + "aggregations": [], + "operation": "groupby" + }, + "Conduit": { + "aggregations": [], + "operation": "groupby" + }, + "Hostname": { + "aggregations": [], + "operation": "groupby" + }, + "IP_Addresses": { + "aggregations": [], + "operation": "groupby" + }, + "Interface_Name": { + "aggregations": [], + "operation": "groupby" + }, + "MAC_Address": { + "aggregations": [], + "operation": "groupby" + }, + "Trench": { + "aggregations": [], + "operation": "groupby" + }, + "Value #rx bytes": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + }, + "Value #rx dropped": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + }, + "Value #rx errors": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + }, + "Value #rx packets": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + }, + "Value #tx bytes": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + }, + "Value #tx dropped": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + }, + "Value #tx errors": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + }, + "Value #tx packets": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + } + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "Value #rx bytes (lastNotNull)": "RX Bytes", + "Value #rx dropped (lastNotNull)": "RX Dropped", + "Value #rx errors (lastNotNull)": "RX Errors", + "Value #rx packets (lastNotNull)": "RX Packets", + "Value #tx bytes (lastNotNull)": "TX Bytes", + "Value #tx dropped (lastNotNull)": "TX Dropped", + "Value #tx errors (lastNotNull)": "TX Errors", + "Value #tx packets (lastNotNull)": "TX Packets" + } + } } ], "type": "table" diff --git a/docs/observability/metrics.md b/docs/observability/metrics.md index 6fbabc1a..538d1ba0 100644 --- a/docs/observability/metrics.md +++ b/docs/observability/metrics.md @@ -2,7 +2,7 @@ ## Metric List -### meridio.interface.`METRIC_TYPE` (Planned) +### meridio.interface.`METRIC_TYPE` `METRIC_TYPE`: rx_packets, tx_packets, rx_bytes, tx_bytes, rx_errors, tx_errors, rx_dropped, tx_dropped diff --git a/pkg/kernel/metrics.go b/pkg/kernel/metrics.go new file mode 100644 index 00000000..a72855b2 --- /dev/null +++ b/pkg/kernel/metrics.go @@ -0,0 +1,264 @@ +/* +Copyright (c) 2023 Nordix Foundation + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kernel + +import ( + "context" + "fmt" + "net" + "sync" + + meridioMetrics "github.com/nordix/meridio/pkg/metrics" + "github.com/vishvananda/netlink" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" +) + +type InterfaceMetrics struct { + meter metric.Meter + interfaces map[string]struct{} + metricAttributes []metric.ObserveOption + mu sync.Mutex +} + +func NewInterfaceMetrics(metricAttributes []metric.ObserveOption) *InterfaceMetrics { + meter := otel.GetMeterProvider().Meter(meridioMetrics.METER_NAME) + im := &InterfaceMetrics{ + meter: meter, + interfaces: map[string]struct{}{}, + metricAttributes: metricAttributes, + } + + return im +} + +func (im *InterfaceMetrics) Register(interfaceName string) { + im.mu.Lock() + defer im.mu.Unlock() + im.interfaces[interfaceName] = struct{}{} +} + +func (im *InterfaceMetrics) Unregister(interfaceName string) { + im.mu.Lock() + defer im.mu.Unlock() + delete(im.interfaces, interfaceName) +} + +// Collect collects the metrics for the interfaces. +func (im *InterfaceMetrics) Collect() error { + _, err := im.meter.Int64ObservableCounter( + meridioMetrics.MERIDIO_INTERFACE_RX_BYTES, + metric.WithUnit("bytes"), + metric.WithDescription("Counts number of received bytes for a network interface."), + metric.WithInt64Callback(func(ctx context.Context, observer metric.Int64Observer) error { + return im.observe( + ctx, + observer, + func(metrics *netlink.LinkStatistics) int64 { + return int64(metrics.RxBytes) + }, + ) + }), + ) + if err != nil { + return fmt.Errorf("interface metrics, failed to Int64ObservableCounter (%s): %w", meridioMetrics.MERIDIO_INTERFACE_RX_BYTES, err) + } + + _, err = im.meter.Int64ObservableCounter( + meridioMetrics.MERIDIO_INTERFACE_TX_BYTES, + metric.WithUnit("bytes"), + metric.WithDescription("Counts number of transfered bytes for a network interface."), + metric.WithInt64Callback(func(ctx context.Context, observer metric.Int64Observer) error { + return im.observe( + ctx, + observer, + func(metrics *netlink.LinkStatistics) int64 { + return int64(metrics.TxBytes) + }, + ) + }), + ) + if err != nil { + return fmt.Errorf("interface metrics, failed to Int64ObservableCounter (%s): %w", meridioMetrics.MERIDIO_INTERFACE_TX_BYTES, err) + } + + _, err = im.meter.Int64ObservableCounter( + meridioMetrics.MERIDIO_INTERFACE_RX_PACKETS, + metric.WithUnit("packets"), + metric.WithDescription("Counts number of received packets for a network interface."), + metric.WithInt64Callback(func(ctx context.Context, observer metric.Int64Observer) error { + return im.observe( + ctx, + observer, + func(metrics *netlink.LinkStatistics) int64 { + return int64(metrics.RxPackets) + }, + ) + }), + ) + if err != nil { + return fmt.Errorf("interface metrics, failed to Int64ObservableCounter (%s): %w", meridioMetrics.MERIDIO_INTERFACE_RX_PACKETS, err) + } + + _, err = im.meter.Int64ObservableCounter( + meridioMetrics.MERIDIO_INTERFACE_TX_PACKET, + metric.WithUnit("packets"), + metric.WithDescription("Counts number of transfered packets for a network interface."), + metric.WithInt64Callback(func(ctx context.Context, observer metric.Int64Observer) error { + return im.observe( + ctx, + observer, + func(metrics *netlink.LinkStatistics) int64 { + return int64(metrics.TxPackets) + }, + ) + }), + ) + if err != nil { + return fmt.Errorf("interface metrics, failed to Int64ObservableCounter (%s): %w", meridioMetrics.MERIDIO_INTERFACE_TX_PACKET, err) + } + + _, err = im.meter.Int64ObservableCounter( + meridioMetrics.MERIDIO_INTERFACE_RX_ERRORS, + metric.WithUnit("errors"), + metric.WithDescription("Counts number of received errors for a network interface."), + metric.WithInt64Callback(func(ctx context.Context, observer metric.Int64Observer) error { + return im.observe( + ctx, + observer, + func(metrics *netlink.LinkStatistics) int64 { + return int64(metrics.RxErrors) + }, + ) + }), + ) + if err != nil { + return fmt.Errorf("interface metrics, failed to Int64ObservableCounter (%s): %w", meridioMetrics.MERIDIO_INTERFACE_RX_ERRORS, err) + } + + _, err = im.meter.Int64ObservableCounter( + meridioMetrics.MERIDIO_INTERFACE_TX_ERRORS, + metric.WithUnit("errors"), + metric.WithDescription("Counts number of transfered errors for a network interface."), + metric.WithInt64Callback(func(ctx context.Context, observer metric.Int64Observer) error { + return im.observe( + ctx, + observer, + func(metrics *netlink.LinkStatistics) int64 { + return int64(metrics.TxErrors) + }, + ) + }), + ) + if err != nil { + return fmt.Errorf("interface metrics, failed to Int64ObservableCounter (%s): %w", meridioMetrics.MERIDIO_INTERFACE_TX_ERRORS, err) + } + + _, err = im.meter.Int64ObservableCounter( + meridioMetrics.MERIDIO_INTERFACE_RX_DROPPED, + metric.WithUnit("dropped"), + metric.WithDescription("Counts number of received dropped for a network interface."), + metric.WithInt64Callback(func(ctx context.Context, observer metric.Int64Observer) error { + return im.observe( + ctx, + observer, + func(metrics *netlink.LinkStatistics) int64 { + return int64(metrics.RxDropped) + }, + ) + }), + ) + if err != nil { + return fmt.Errorf("interface metrics, failed to Int64ObservableCounter (%s): %w", meridioMetrics.MERIDIO_INTERFACE_RX_DROPPED, err) + } + + _, err = im.meter.Int64ObservableCounter( + meridioMetrics.MERIDIO_INTERFACE_TX_DROPPED, + metric.WithUnit("dropped"), + metric.WithDescription("Counts number of transfered dropped for a network interface."), + metric.WithInt64Callback(func(ctx context.Context, observer metric.Int64Observer) error { + return im.observe( + ctx, + observer, + func(metrics *netlink.LinkStatistics) int64 { + return int64(metrics.TxDropped) + }, + ) + }), + ) + if err != nil { + return fmt.Errorf("interface metrics, failed to Int64ObservableCounter (%s): %w", meridioMetrics.MERIDIO_INTERFACE_TX_DROPPED, err) + } + + return nil +} + +func (im *InterfaceMetrics) observe(ctx context.Context, observer metric.Int64Observer, valueFunc func(*netlink.LinkStatistics) int64) error { + im.mu.Lock() + defer im.mu.Unlock() + + for interfaceName := range im.interfaces { + metricAttributes := []metric.ObserveOption{ + metric.WithAttributes(attribute.String("Interface Name", interfaceName)), + } + metricAttributes = append(metricAttributes, im.metricAttributes...) + link := getLinkByName(interfaceName) + if link == nil { + continue + } + metricAttributes = append(metricAttributes, metric.WithAttributes(attribute.String("MAC Address", link.Attrs().HardwareAddr.String()))) + metricAttributes = append(metricAttributes, metric.WithAttributes(attribute.StringSlice("IP Addresses", listIPs(link)))) + observer.Observe( + valueFunc(link.Attrs().Statistics), + metricAttributes..., + ) + } + return nil +} + +func getLinkByName(interfaceName string) netlink.Link { + link, err := netlink.LinkByName(interfaceName) + if err != nil || + link == nil || + link.Attrs() == nil || + link.Attrs().Flags&net.FlagUp != net.FlagUp { + return nil + } + return link +} + +func listIPs(link netlink.Link) []string { + res := []string{} + addresses, err := netlink.AddrList(link, netlink.FAMILY_ALL) + if err != nil { + return res + } + + for _, addr := range addresses { + if isLinkLocalIPv6(addr.IP) { + continue + } + res = append(res, addr.IPNet.String()) + } + + return res +} + +func isLinkLocalIPv6(ip net.IP) bool { + return ip.To4() == nil && (ip.IsLinkLocalMulticast() || ip.IsLinkLocalUnicast()) +} diff --git a/pkg/metrics/const.go b/pkg/metrics/const.go index c78404ee..2424b84f 100644 --- a/pkg/metrics/const.go +++ b/pkg/metrics/const.go @@ -18,6 +18,14 @@ package metrics const ( MERIDIO_CONDUIT_STREAM_FLOW_MATCHES = "meridio.conduit.stream.flow.matches" + MERIDIO_INTERFACE_RX_PACKETS = "meridio.interface.rx_packets" + MERIDIO_INTERFACE_TX_PACKET = "meridio.interface.tx_packets" + MERIDIO_INTERFACE_RX_BYTES = "meridio.interface.rx_bytes" + MERIDIO_INTERFACE_TX_BYTES = "meridio.interface.tx_bytes" + MERIDIO_INTERFACE_RX_ERRORS = "meridio.interface.rx_errors" + MERIDIO_INTERFACE_TX_ERRORS = "meridio.interface.tx_errors" + MERIDIO_INTERFACE_RX_DROPPED = "meridio.interface.rx_dropped" + MERIDIO_INTERFACE_TX_DROPPED = "meridio.interface.tx_dropped" METER_NAME = "Meridio" ) diff --git a/pkg/nsm/metrics/server.go b/pkg/nsm/metrics/server.go new file mode 100644 index 00000000..cafea48a --- /dev/null +++ b/pkg/nsm/metrics/server.go @@ -0,0 +1,63 @@ +/* +Copyright (c) 2023 Nordix Foundation + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ipcontext + +import ( + "context" + + "github.com/networkservicemesh/api/pkg/api/networkservice" + "github.com/networkservicemesh/api/pkg/api/networkservice/mechanisms/common" + "github.com/networkservicemesh/sdk/pkg/networkservice/core/next" + "github.com/nordix/meridio/pkg/kernel" + "google.golang.org/protobuf/types/known/emptypb" +) + +type metricsServer struct { + InterfaceMetrics *kernel.InterfaceMetrics +} + +// NewServer +func NewServer(interfaceMetrics *kernel.InterfaceMetrics) networkservice.NetworkServiceServer { + return &metricsServer{ + InterfaceMetrics: interfaceMetrics, + } +} + +// Request +func (ms *metricsServer) Request(ctx context.Context, request *networkservice.NetworkServiceRequest) (*networkservice.Connection, error) { + if request == nil || + request.Connection == nil || + request.Connection.GetMechanism() == nil || + request.Connection.GetMechanism().GetParameters() == nil { + return next.Server(ctx).Request(ctx, request) + } + interfaceName := request.Connection.GetMechanism().GetParameters()[common.InterfaceNameKey] + ms.InterfaceMetrics.Register(interfaceName) + return next.Server(ctx).Request(ctx, request) +} + +// Close +func (ms *metricsServer) Close(ctx context.Context, conn *networkservice.Connection) (*emptypb.Empty, error) { + if conn == nil || + conn.GetMechanism() == nil || + conn.GetMechanism().GetParameters() == nil { + return next.Server(ctx).Close(ctx, conn) + } + interfaceName := conn.GetMechanism().GetParameters()[common.InterfaceNameKey] + ms.InterfaceMetrics.Unregister(interfaceName) + return next.Server(ctx).Close(ctx, conn) +}