-
Notifications
You must be signed in to change notification settings - Fork 238
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add volume attachment time & csi grpc execution time metrics
- Loading branch information
Showing
14 changed files
with
420 additions
and
116 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
package common | ||
|
||
import ( | ||
"context" | ||
"time" | ||
|
||
"github.com/container-storage-interface/spec/lib/go/csi" | ||
"github.com/kubernetes-sigs/alibaba-cloud-csi-driver/pkg/metric" | ||
"github.com/prometheus/client_golang/prometheus" | ||
"google.golang.org/grpc/codes" | ||
"google.golang.org/grpc/status" | ||
) | ||
|
||
func WrapControllerServerWithMetricRecorder(driverType string, server csi.ControllerServer) csi.ControllerServer { | ||
return &ControllerServerWithMetricRecorder{driverType, server} | ||
} | ||
|
||
type ControllerServerWithMetricRecorder struct { | ||
driverType string | ||
csi.ControllerServer | ||
} | ||
|
||
func (cs *ControllerServerWithMetricRecorder) observeExecTime(time int64, statType metric.VolumeStatType, volumeId string, err error) { | ||
errCode := getCodeFromError(err).String() | ||
metric.VolumeStatCollector.Metrics[statType].With(prometheus.Labels{ | ||
metric.VolumeStatTypeLabelName: cs.driverType, | ||
metric.VolumeStatIdLabelName: volumeId, | ||
metric.VolumeStatErrCodeLabelName: errCode, | ||
}).Observe(float64(time)) | ||
} | ||
|
||
func getCodeFromError(err error) codes.Code { | ||
status, ok := status.FromError(err) | ||
if ok { | ||
return status.Code() | ||
} | ||
return codes.Unknown | ||
} | ||
|
||
func (cs *ControllerServerWithMetricRecorder) CreateVolume(context context.Context, req *csi.CreateVolumeRequest) (*csi.CreateVolumeResponse, error) { | ||
return cs.ControllerServer.CreateVolume(context, req) | ||
} | ||
|
||
func (cs *ControllerServerWithMetricRecorder) DeleteVolume(context context.Context, req *csi.DeleteVolumeRequest) (*csi.DeleteVolumeResponse, error) { | ||
return cs.ControllerServer.DeleteVolume(context, req) | ||
} | ||
|
||
func (cs *ControllerServerWithMetricRecorder) ControllerPublishVolume(context context.Context, req *csi.ControllerPublishVolumeRequest) (*csi.ControllerPublishVolumeResponse, error) { | ||
startTime := time.Now() | ||
resp, err := cs.ControllerServer.ControllerPublishVolume(context, req) | ||
execTime := time.Since(startTime).Milliseconds() | ||
cs.observeExecTime(execTime, metric.ControllerPublishExecTimeStat, req.GetVolumeId(), err) | ||
return resp, err | ||
} | ||
|
||
func (cs *ControllerServerWithMetricRecorder) ControllerUnpublishVolume(context context.Context, req *csi.ControllerUnpublishVolumeRequest) (*csi.ControllerUnpublishVolumeResponse, error) { | ||
startTime := time.Now() | ||
resp, err := cs.ControllerServer.ControllerUnpublishVolume(context, req) | ||
execTime := time.Since(startTime).Milliseconds() | ||
cs.observeExecTime(execTime, metric.ControllerUnpublishExecTimeStat, req.GetVolumeId(), err) | ||
return resp, err | ||
} | ||
|
||
func (cs *ControllerServerWithMetricRecorder) ValidateVolumeCapabilities(context context.Context, req *csi.ValidateVolumeCapabilitiesRequest) (*csi.ValidateVolumeCapabilitiesResponse, error) { | ||
return cs.ControllerServer.ValidateVolumeCapabilities(context, req) | ||
} | ||
|
||
func (cs *ControllerServerWithMetricRecorder) CreateSnapshot(context context.Context, req *csi.CreateSnapshotRequest) (*csi.CreateSnapshotResponse, error) { | ||
return cs.ControllerServer.CreateSnapshot(context, req) | ||
} | ||
|
||
func (cs *ControllerServerWithMetricRecorder) DeleteSnapshot(context context.Context, req *csi.DeleteSnapshotRequest) (*csi.DeleteSnapshotResponse, error) { | ||
return cs.ControllerServer.DeleteSnapshot(context, req) | ||
} | ||
|
||
func (cs *ControllerServerWithMetricRecorder) ControllerExpandVolume(context context.Context, req *csi.ControllerExpandVolumeRequest) (*csi.ControllerExpandVolumeResponse, error) { | ||
return cs.ControllerServer.ControllerExpandVolume(context, req) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
package common | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"time" | ||
|
||
"github.com/container-storage-interface/spec/lib/go/csi" | ||
"github.com/kubernetes-sigs/alibaba-cloud-csi-driver/pkg/metric" | ||
"github.com/kubernetes-sigs/alibaba-cloud-csi-driver/pkg/options" | ||
"github.com/prometheus/client_golang/prometheus" | ||
"github.com/sirupsen/logrus" | ||
v1 "k8s.io/api/core/v1" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/client-go/kubernetes" | ||
"k8s.io/client-go/tools/clientcmd" | ||
) | ||
|
||
const ( | ||
podNameKey = "csi.storage.k8s.io/pod.name" | ||
podNamespaceKey = "csi.storage.k8s.io/pod.namespace" | ||
) | ||
|
||
func WrapNodeServerWithMetricRecorder(driverType string, server csi.NodeServer) csi.NodeServer { | ||
config, err := clientcmd.BuildConfigFromFlags(options.MasterURL, options.Kubeconfig) | ||
if err != nil { | ||
logrus.Errorf("initializing kubernetes config for node server with metric recorder failed: %s", err) | ||
return &NodeServerWithMetricRecorder{server, driverType, nil} | ||
} | ||
clientset, err := kubernetes.NewForConfig(config) | ||
if err != nil { | ||
logrus.Errorf("initializing kubernetes clientset for node server with metric recorder failed: %s", err) | ||
return &NodeServerWithMetricRecorder{server, driverType, nil} | ||
} | ||
return &NodeServerWithMetricRecorder{server, driverType, clientset} | ||
} | ||
|
||
type NodeServerWithMetricRecorder struct { | ||
csi.NodeServer | ||
driverType string | ||
clientset *kubernetes.Clientset | ||
} | ||
|
||
func (ns *NodeServerWithMetricRecorder) observeExecTime(time int64, statType metric.VolumeStatType, volumeId string, err error) { | ||
errCode := getCodeFromError(err).String() | ||
metric.VolumeStatCollector.Metrics[statType].With(prometheus.Labels{ | ||
metric.VolumeStatTypeLabelName: ns.driverType, | ||
metric.VolumeStatIdLabelName: volumeId, | ||
metric.VolumeStatErrCodeLabelName: errCode, | ||
}).Observe(float64(time)) | ||
} | ||
|
||
func (ns *NodeServerWithMetricRecorder) observeVolumeAttachmentTime(curTime int64, req *csi.NodePublishVolumeRequest, err error) { | ||
errCode := getCodeFromError(err).String() | ||
if ns.clientset == nil { | ||
return | ||
} | ||
podName, podNamespace := req.VolumeContext[podNameKey], req.VolumeContext[podNamespaceKey] | ||
if podName == "" || podNamespace == "" { | ||
logrus.Warnf("observeVolumeAttachmentTime: empty pod name/namespace: %s, %s", podName, podNamespace) | ||
return | ||
} | ||
pod, err := ns.clientset.CoreV1().Pods(podNamespace).Get(context.Background(), podName, metav1.GetOptions{}) | ||
if err != nil { | ||
logrus.Errorf("error getting pod %s/%s when observing volume attachment time for volume %s", podNamespace, podName, req.GetVolumeId()) | ||
return | ||
} | ||
podStartTime, err := getPodStartTimestamp(pod) | ||
if err != nil { | ||
logrus.Errorf("error getting scheduled time for pod %s/%s when observing volume attachment time for volume %s", podNamespace, podName, req.GetVolumeId()) | ||
return | ||
} | ||
|
||
metric.VolumeStatCollector.Metrics[metric.VolumeAttachTimeStat].With(prometheus.Labels{ | ||
metric.VolumeStatTypeLabelName: ns.driverType, | ||
metric.VolumeStatIdLabelName: req.GetVolumeId(), | ||
metric.VolumeStatErrCodeLabelName: errCode, | ||
}).Observe(float64(curTime) - float64(podStartTime)) | ||
} | ||
|
||
func getPodStartTimestamp(pod *v1.Pod) (int64, error) { | ||
startTime := pod.Status.StartTime | ||
if startTime == nil { | ||
return 0, fmt.Errorf("no start time found for pod %s/%s ", pod.GetNamespace(), pod.GetName()) | ||
} | ||
return startTime.Time.UnixMilli(), nil | ||
} | ||
|
||
func (ns *NodeServerWithMetricRecorder) NodeStageVolume(context context.Context, req *csi.NodeStageVolumeRequest) (*csi.NodeStageVolumeResponse, error) { | ||
startTime := time.Now() | ||
resp, err := ns.NodeServer.NodeStageVolume(context, req) | ||
execTime := time.Since(startTime).Milliseconds() | ||
ns.observeExecTime(execTime, metric.NodeStageExecTimeStat, req.GetVolumeId(), err) | ||
return resp, err | ||
} | ||
|
||
func (ns *NodeServerWithMetricRecorder) NodeUnstageVolume(context context.Context, req *csi.NodeUnstageVolumeRequest) (*csi.NodeUnstageVolumeResponse, error) { | ||
startTime := time.Now() | ||
resp, err := ns.NodeServer.NodeUnstageVolume(context, req) | ||
execTime := time.Since(startTime).Milliseconds() | ||
ns.observeExecTime(execTime, metric.NodeUnstageExecTimeStat, req.GetVolumeId(), err) | ||
return resp, err | ||
} | ||
|
||
func (ns *NodeServerWithMetricRecorder) NodePublishVolume(context context.Context, req *csi.NodePublishVolumeRequest) (*csi.NodePublishVolumeResponse, error) { | ||
startTime := time.Now() | ||
resp, err := ns.NodeServer.NodePublishVolume(context, req) | ||
curTime := time.Now() | ||
execTime := curTime.Sub(startTime) | ||
ns.observeExecTime(execTime.Milliseconds(), metric.NodePublishExecTimeStat, req.GetVolumeId(), err) | ||
ns.observeVolumeAttachmentTime(curTime.UnixMilli(), req, err) | ||
return resp, err | ||
} | ||
|
||
func (ns *NodeServerWithMetricRecorder) NodeUnpublishVolume(context context.Context, req *csi.NodeUnpublishVolumeRequest) (*csi.NodeUnpublishVolumeResponse, error) { | ||
startTime := time.Now() | ||
resp, err := ns.NodeServer.NodeUnpublishVolume(context, req) | ||
time := time.Since(startTime).Milliseconds() | ||
ns.observeExecTime(time, metric.NodeUnpublishExecTimeStat, req.GetVolumeId(), err) | ||
return resp, err | ||
} | ||
|
||
func (ns *NodeServerWithMetricRecorder) NodeGetVolumeStats(context context.Context, req *csi.NodeGetVolumeStatsRequest) (*csi.NodeGetVolumeStatsResponse, error) { | ||
return ns.NodeServer.NodeGetVolumeStats(context, req) | ||
} | ||
|
||
func (ns *NodeServerWithMetricRecorder) NodeExpandVolume(context context.Context, req *csi.NodeExpandVolumeRequest) (*csi.NodeExpandVolumeResponse, error) { | ||
return ns.NodeServer.NodeExpandVolume(context, req) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.