diff --git a/cmd/exporter/init.go b/cmd/exporter/init.go index b3542d55..9c36174b 100644 --- a/cmd/exporter/init.go +++ b/cmd/exporter/init.go @@ -14,6 +14,7 @@ import ( _ "github.com/alibaba/kubeskoop/pkg/exporter/probe/procsock" _ "github.com/alibaba/kubeskoop/pkg/exporter/probe/procsoftnet" _ "github.com/alibaba/kubeskoop/pkg/exporter/probe/proctcpsummary" + _ "github.com/alibaba/kubeskoop/pkg/exporter/probe/rdma" _ "github.com/alibaba/kubeskoop/pkg/exporter/probe/tracebiolatency" _ "github.com/alibaba/kubeskoop/pkg/exporter/probe/tracekernel" _ "github.com/alibaba/kubeskoop/pkg/exporter/probe/tracenetiftxlatency" diff --git a/go.mod b/go.mod index 678b47dd..4b24e0bb 100644 --- a/go.mod +++ b/go.mod @@ -47,7 +47,7 @@ require ( github.com/vishvananda/netns v0.0.4 golang.org/x/exp v0.0.0-20221204150635-6dcec336b2bb golang.org/x/sync v0.1.0 - golang.org/x/sys v0.8.0 + golang.org/x/sys v0.10.0 google.golang.org/grpc v1.56.2 google.golang.org/protobuf v1.33.0 gopkg.in/yaml.v3 v3.0.1 @@ -203,3 +203,5 @@ require ( sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect sigs.k8s.io/yaml v1.3.0 // indirect ) + +replace github.com/vishvananda/netlink v1.2.1-beta.2 => github.com/bswang/netlink v1.0.1-0.20240423021740-86cd4b5bb65d diff --git a/go.sum b/go.sum index e929fe04..ae376baf 100644 --- a/go.sum +++ b/go.sum @@ -187,6 +187,8 @@ github.com/blang/semver v3.5.1+incompatible h1:cQNTCjp13qL8KC3Nbxr/y2Bqb63oX6wdn github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4= github.com/bshuster-repo/logrus-logstash-hook v0.4.1/go.mod h1:zsTqEiSzDgAa/8GZR7E1qaXrhYNDKBYy5/dWPTIflbk= +github.com/bswang/netlink v1.0.1-0.20240423021740-86cd4b5bb65d h1:v/jMfwlqJxCqdVtcNIiHwKDnfTBWGjNWFbfS+HYuFfc= +github.com/bswang/netlink v1.0.1-0.20240423021740-86cd4b5bb65d/go.mod h1:whJevzBpTrid75eZy99s3DqCmy05NfibNaF2Ol5Ox5A= github.com/buger/jsonparser v0.0.0-20180808090653-f4dd9f5a6b44/go.mod h1:bbYlZJ7hK1yFx9hf58LP0zeX7UjIGs20ufpu3evjr+s= github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0= github.com/bugsnag/bugsnag-go v0.0.0-20141110184014-b1d153021fcd/go.mod h1:2oa8nejYd4cQ/b0hMIopN0lCRxU0bueqREvZLWFrtK8= @@ -1120,8 +1122,6 @@ github.com/vishvananda/netlink v0.0.0-20181108222139-023a6dafdcdf/go.mod h1:+SR5 github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE= github.com/vishvananda/netlink v1.1.1-0.20201029203352-d40f9887b852/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho= github.com/vishvananda/netlink v1.1.1-0.20210330154013-f5de75959ad5/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho= -github.com/vishvananda/netlink v1.2.1-beta.2 h1:Llsql0lnQEbHj0I1OuKyp8otXp0r3q0mPkuhwHfStVs= -github.com/vishvananda/netlink v1.2.1-beta.2/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho= github.com/vishvananda/netns v0.0.0-20180720170159-13995c7128cc/go.mod h1:ZjcWmFBXmLKZu9Nxj3WKYEafiSqer2rnvPr0en9UNpI= github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU= github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0= @@ -1476,8 +1476,8 @@ golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU= -golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA= +golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= diff --git a/pkg/exporter/probe/rdma/erdma.go b/pkg/exporter/probe/rdma/erdma.go new file mode 100644 index 00000000..c82955cf --- /dev/null +++ b/pkg/exporter/probe/rdma/erdma.go @@ -0,0 +1,79 @@ +package rdma + +import ( + "github.com/alibaba/kubeskoop/pkg/exporter/probe" + "github.com/prometheus/client_golang/prometheus" + "github.com/samber/lo" +) + +const ( + rdmaERdmaProbeName = "erdma_" +) + +var ( + erdmaStatisticCounterEntries = map[string]string{ + "accept_failed_cnt": "The total number of failed connection accept attempts.", + "accept_success_cnt": "The total number of successful connection accept attempts.", + "accept_total_cnt": "The total number of connection accept attempts, successful or not.", + "cmdq_comp_cnt": "The total number of command queue completions processed.", + "cmdq_cq_armed_cnt": "The total number of command queue completion events that have been armed.", + "cmdq_eq_event_cnt": "The total number of command queue event queue events received.", + "cmdq_eq_notify_cnt": "The total number of command queue event queue notifications triggered.", + "cmdq_submitted_cnt": "The total number of command queue submissions.", + "connect_failed_cnt": "The total number of failed connection attempts.", + "connect_reset_cnt": "The total number of connection attempts that have been reset.", + "connect_success_cnt": "The total number of successful connection attempts.", + "connect_timeout_cnt": "The total number of connection attempts that timed out.", + "connect_total_cnt": "The total number of connection attempts, successful or not.", + "erdma_aeq_event_cnt": "The total number of ERDMA asynchronous event queue events received.", + "erdma_aeq_notify_cnt": "The total number of ERDMA asynchronous event queue notifications triggered.", + "hw_bps_limit_drop_cnt": "The total number of packets dropped due to hardware bandwidth limit.", + "hw_disable_drop_cnt": "The total number of packets dropped due to hardware being disabled.", + "hw_pps_limit_drop_cnt": "The total number of packets dropped due to hardware packets-per-second limit.", + "hw_rx_bps_limit_drop_cnt": "The total number of received packets dropped due to hardware receive bandwidth limit.", + "hw_rx_bytes_cnt": "The total number of bytes received by the hardware.", + "hw_rx_disable_drop_cnt": "The total number of received packets dropped due to receive hardware being disabled.", + "hw_rx_packets_cnt": "The total number of packets received by the hardware.", + "hw_rx_pps_limit_drop_cnt": "The total number of received packets dropped due to hardware receive packets-per-second limit.", + "hw_tx_bytes_cnt": "The total number of bytes transmitted by the hardware.", + "hw_tx_packets_cnt": "The total number of packets transmitted by the hardware.", + "hw_tx_reqs_cnt": "The total number of transmit requests processed by the hardware.", + "listen_create_cnt": "The total number of successfully created listen sockets.", + "listen_destroy_cnt": "The total number of destroyed listen sockets.", + "listen_failed_cnt": "The total number of failed attempts to create listen sockets.", + "listen_ipv6_cnt": "The total number of listen sockets created for IPv6 addresses.", + "listen_success_cnt": "The total number of successful listen operations.", + "reject_cnt": "The total number of received connection requests that were rejected.", + "reject_failed_cnt": "The total number of failed attempts to reject connection requests.", + "verbs_alloc_mr_cnt": "The total number of successful memory region allocations using verbs API.", + "verbs_alloc_mr_failed_cnt": "The total number of failed memory region allocation attempts using verbs API.", + "verbs_alloc_pd_cnt": "The total number of successful protection domain allocations using verbs API.", + "verbs_alloc_pd_failed_cnt": "The total number of failed protection domain allocation attempts using verbs API.", + "verbs_alloc_uctx_cnt": "The total number of successful user context allocations using verbs API.", + "verbs_alloc_uctx_failed_cnt": "The total number of failed user context allocation attempts using verbs API.", + "verbs_create_cq_cnt": "The total number of successful completion queue creations using verbs API.", + "verbs_create_cq_failed_cnt": "The total number of failed completion queue creation attempts using verbs API.", + "verbs_destroy_cq_failed_cnt": "The total number of failed completion queue deletion using verbs API.", + "verbs_create_qp_cnt": "The total number of successful queue pair creations using verbs API.", + "verbs_create_qp_failed_cnt": "The total number of failed queue pair creation attempts using verbs API.", + "verbs_destroy_qp_cnt": "The total number of failed queue pair deletion using verbs API.", + "verbs_dealloc_pd_cnt": "The total number of deallocated protection domains using verbs API.", + "verbs_dealloc_uctx_cnt": "The total number of deallocated user contexts using verbs API.", + "verbs_dereg_mr_cnt": "The total number of successful memory region deregistrations using verbs API.", + "verbs_dereg_mr_failed_cnt": "The total number of failed memory region deregistration attempts using verbs API.", + "verbs_destroy_cq_cnt": "The total number of destroyed completion queues using verbs API.", + "verbs_destroy_qp_failed_cnt": "The total number of failed attempts to destroy queue pairs (QPs) using verbs API.", + "verbs_get_dma_mr_cnt": "The total number of successful direct memory access (DMA) memory region acquisitions using verbs API.", + "verbs_get_dma_mr_failed_cnt": "The total number of failed attempts to acquire direct memory access (DMA) memory regions using verbs API.", + "verbs_reg_usr_mr_cnt": "The total number of user memory regions successfully registered with the verbs API.", + "verbs_reg_usr_mr_failed_cnt": "The total number of failed attempts to register user memory regions with the verbs API.", + } + erdmaMetrics = lo.Map(lo.Keys(erdmaStatisticCounterEntries), func(k string, _ int) probe.SingleMetricsOpts { + return probe.SingleMetricsOpts{ + Name: rdmaERdmaProbeName + k, + VariableLabels: rdmaDevPortLabels, + Help: erdmaStatisticCounterEntries[k], + ValueType: prometheus.CounterValue, + } + }) +) diff --git a/pkg/exporter/probe/rdma/mellanox.go b/pkg/exporter/probe/rdma/mellanox.go new file mode 100644 index 00000000..26195af1 --- /dev/null +++ b/pkg/exporter/probe/rdma/mellanox.go @@ -0,0 +1,53 @@ +package rdma + +import ( + "github.com/alibaba/kubeskoop/pkg/exporter/probe" + "github.com/prometheus/client_golang/prometheus" + "github.com/samber/lo" +) + +const ( + rdmaMellanoxProbeName = "mellanox_mlx5_" +) + +var ( + mlx5 = map[string]string{ + "rx_write_requests": "The number of received WRITE requests for the associated QPs.", + "rx_read_requests": "The number of received READ requests for the associated QPs.", + "rx_atomic_requests": "The number of received ATOMIC request for the associated QPs.", + "out_of_buffer": "The number of drops occurred due to lack of WQE for the associated QPs.", + "out_of_sequence": "The number of out of sequence packets received.", + "duplicate_request": "Number of duplicate request packets.", + "rnr_nak_retry_err": "The number of received RNR NAK packets. The QP retry limit was not exceeded.", + "packet_seq_err": "The number of received NAK sequence error packets. The QP retry limit was not exceeded.", + "implied_nak_seq_err": "Number of time the requested decided an ACK with a PSN larger than the expected PSN for an RDMA read or response.", + "local_ack_timeout_err": "The number of times QP's ack timer expired for RC, XRC, DCT QPs at the sender side.", + "rx_dct_connect": "The number of received connection request for the associated DCTs.", + "resp_local_length_error": "The number of times responder detected local length errors.", + "resp_cqe_error": "The number of times responder detected CQEs completed with errors.", + "req_cqe_error": "The number of times requester detected CQEs completed with errors.", + "req_remote_invalid_request": "The number of times requester detected remote invalid request errors.", + "req_remote_access_errors": "The number of times requester detected remote access errors.", + "resp_remote_access_errors": "The number of times responder detected remote access errors.", + "resp_cqe_flush_error": "The number of times responder detected CQEs completed with flushed errors.", + "req_cqe_flush_error": "The number of times requester detected CQEs completed with flushed errors.", + "roce_adp_retrans": "The number of adaptive retransmissions for RoCE traffic", + "roce_adp_retrans_to": "The number of times RoCE traffic reached timeout due to adaptive retransmission", + "roce_slow_restart": "The number of times RoCE slow restart was used", + "roce_slow_restart_cnps": "The number of times RoCE slow restart generated CNP packets", + "roce_slow_restart_trans": "The number of times RoCE slow restart changed state to slow restart", + "rp_cnp_ignored": "The number of CNP packets received and ignored by the Reaction Point HCA.", + "rp_cnp_handled": "The number of CNP packets handled by the Reaction Point HCA to throttle the transmission rate.", + "np_ecn_marked_roce_packets": "The number of RoCEv2 packets received by the notification point which were marked for experiencing the congestion (ECN bits where '11' on the ingress RoCE traffic) .", + "np_cnp_sent": "The number of CNP packets sent by the Notification Point when it noticed congestion experienced in the RoCEv2 IP header (ECN bits).", + "rx_icrc_encapsulated": "The number of RoCE packets with ICRC errors.", + } + mlx5Metrics = lo.Map(lo.Keys(mlx5), func(k string, _ int) probe.SingleMetricsOpts { + return probe.SingleMetricsOpts{ + Name: rdmaMellanoxProbeName + k, + VariableLabels: rdmaDevPortLabels, + Help: mlx5[k], + ValueType: prometheus.CounterValue, + } + }) +) diff --git a/pkg/exporter/probe/rdma/rdma.go b/pkg/exporter/probe/rdma/rdma.go new file mode 100644 index 00000000..72952909 --- /dev/null +++ b/pkg/exporter/probe/rdma/rdma.go @@ -0,0 +1,115 @@ +package rdma + +import ( + "context" + "fmt" + "github.com/alibaba/kubeskoop/pkg/exporter/nettop" + "github.com/prometheus/client_golang/prometheus" + "github.com/samber/lo" + log "github.com/sirupsen/logrus" + "github.com/vishvananda/netlink" + "strconv" + "strings" + + "github.com/alibaba/kubeskoop/pkg/exporter/probe" +) + +const ( + probeName = "rdma" +) + +var ( + resourceSummaryEntries = []string{"cm_id", "cq", "ctx", "mr", "pd", "qp"} + rdmaDevLabels = []string{"device", "type"} + rdmaDevPortLabels = append(rdmaDevLabels, "port") +) + +func init() { + probe.MustRegisterMetricsProbe(probeName, metricsProbeCreator) +} + +func metricsProbeCreator() (probe.MetricsProbe, error) { + p := &metricsProbe{} + + opts := probe.BatchMetricsOpts{ + Namespace: probe.MetricsNamespace, + Subsystem: probeName, + VariableLabels: probe.StandardMetricsLabels, + SingleMetricsOpts: lo.Map(resourceSummaryEntries, func(entry string, _ int) probe.SingleMetricsOpts { + return probe.SingleMetricsOpts{Name: entry, VariableLabels: rdmaDevLabels, Help: fmt.Sprintf("rdma resource summary %s", entry), ValueType: prometheus.GaugeValue} + }), + } + opts.SingleMetricsOpts = append(opts.SingleMetricsOpts, mlx5Metrics...) + opts.SingleMetricsOpts = append(opts.SingleMetricsOpts, erdmaMetrics...) + batchMetrics := probe.NewBatchMetrics(opts, p.collectOnce) + return probe.NewMetricsProbe(probeName, p, batchMetrics), nil +} + +type metricsProbe struct { +} + +func (p *metricsProbe) Start(_ context.Context) error { + return nil +} + +func (p *metricsProbe) Stop(_ context.Context) error { + return nil +} + +func (p *metricsProbe) collectOnce(emit probe.Emit) error { + // rdma only collect host network + entity, err := nettop.GetHostNetworkEntity() + if err != nil { + return err + } + rdmaRes, err := netlink.RdmaResourceList() + if err != nil { + return err + } + if len(rdmaRes) == 0 { + return nil + } + standardLabelValues := probe.BuildStandardMetricsLabelValues(entity) + for _, res := range rdmaRes { + link, err := netlink.RdmaLinkByName(res.Name) + if err != nil { + log.Errorf("failed get rdma link %v, error: %v", res.Name, err) + continue + } + linkType := rdmaLinkType(link) + deviceLabelValues := append(standardLabelValues, res.Name, linkType) + for resKey, resVal := range res.RdmaResourceSummaryEntries { + emit(resKey, deviceLabelValues, float64(resVal)) + } + if linkType == "unknown" { + continue + } + linkStatistics, err := netlink.RdmaStatistic(link) + if err != nil { + log.Errorf("failed get rdma statistics %v, error: %v", res.Name, err) + continue + } + + for _, port := range linkStatistics.RdmaPortStatistics { + devicePortLabelValues := append(deviceLabelValues, strconv.FormatUint(uint64(port.PortIndex), 10)) + for statKey, statVal := range port.Statistics { + emit(strings.Join([]string{linkType, statKey}, "_"), devicePortLabelValues, float64(statVal)) + } + } + } + return nil +} + +func rdmaLinkType(link *netlink.RdmaLink) string { + if link == nil { + return "unknown" + } + switch strings.Split(link.Attrs.Name, "_")[0] { + case "mlx5": + return "mlx5" + case "erdma": + return "erdma" + default: + return "unknown" + } +}