Skip to content

Commit

Permalink
Improve logging, handling of empty metric values
Browse files Browse the repository at this point in the history
  • Loading branch information
s-fairchild authored and cadenmarchese committed Jul 8, 2024
1 parent cf51f26 commit 3db4360
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 62 deletions.
94 changes: 60 additions & 34 deletions pkg/backend/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ import (
func (ocb *openShiftClusterBackend) emitMetrics(log *logrus.Entry, doc *api.OpenShiftClusterDocument, operationType, provisioningState api.ProvisioningState, backendErr error) map[string]string {
dimensions := map[string]string{}

ocb.gatherOperationMetrics(operationType, provisioningState, backendErr, dimensions)
ocb.gatherCorrelationID(doc, dimensions)
ocb.gatherMiscMetrics(doc, dimensions)
ocb.gatherAuthMetrics(doc, dimensions)
ocb.gatherNetworkMetrics(doc, dimensions)
ocb.gatherNodeMetrics(doc, dimensions)
ocb.gatherOperationMetrics(log, operationType, provisioningState, backendErr, dimensions)
ocb.gatherCorrelationID(log, doc, dimensions)
ocb.gatherMiscMetrics(log, doc, dimensions)
ocb.gatherAuthMetrics(log, doc, dimensions)
ocb.gatherNetworkMetrics(log, doc, dimensions)
ocb.gatherNodeMetrics(log, doc, dimensions)

ocb.logMetricDimensions(log, operationType, dimensions)
ocb.m.EmitGauge(ocb.getMetricName(operationType), metricValue, dimensions)
Expand All @@ -44,49 +44,64 @@ func (ocb *openShiftClusterBackend) getResultType(backendErr error) utillog.Resu
return resultType
}

func (ocb *openShiftClusterBackend) getStringMetricValue(log *logrus.Entry, metricName, value string) string {
if value != "" {
return value
}

log.Warnf("%s %s", metricFailToCollectErr, metricName)
return empty
}

func (ocb *openShiftClusterBackend) logMetricDimensions(log *logrus.Entry, operationType api.ProvisioningState, dimensions map[string]string) {
for metric, value := range dimensions {
log.Info(fmt.Sprintf("%s.%s: %s = %s", metricPackage, operationType, metric, value))
}
}

func (m *openShiftClusterBackend) gatherCorrelationID(doc *api.OpenShiftClusterDocument, dimensions map[string]string) {
func (ocb *openShiftClusterBackend) gatherCorrelationID(log *logrus.Entry, doc *api.OpenShiftClusterDocument, dimensions map[string]string) {
if doc.CorrelationData != nil {
dimensions[correlationDataIdMetricName] = doc.CorrelationData.CorrelationID
dimensions[correlationDataClientRequestIdMetricName] = doc.CorrelationData.ClientRequestID
dimensions[correlationDataRequestIdMetricName] = doc.CorrelationData.RequestID
dimensions[correlationDataIdMetricName] = ocb.getStringMetricValue(log, correlationDataIdMetricName, doc.CorrelationData.CorrelationID)
dimensions[correlationDataClientRequestIdMetricName] = ocb.getStringMetricValue(log, correlationDataClientRequestIdMetricName, doc.CorrelationData.ClientRequestID)
dimensions[correlationDataRequestIdMetricName] = ocb.getStringMetricValue(log, correlationDataRequestIdMetricName, doc.CorrelationData.RequestID)
} else {
log.Warnf("%s %s", metricFailToCollectErr, correlationDataMetricName)
dimensions[correlationDataIdMetricName] = empty
dimensions[correlationDataClientRequestIdMetricName] = empty
dimensions[correlationDataRequestIdMetricName] = empty
}
}

func (ocb *openShiftClusterBackend) gatherOperationMetrics(operationType, provisioningState api.ProvisioningState, backendErr error, dimensions map[string]string) {
func (ocb *openShiftClusterBackend) gatherOperationMetrics(log *logrus.Entry, operationType, provisioningState api.ProvisioningState, backendErr error, dimensions map[string]string) {
// These are provided internally by endLease, not expected to be ""
dimensions[operationTypeMetricName] = operationType.String()
dimensions[provisioningStateMetricName] = provisioningState.String()
dimensions[resultTypeMetricName] = string(ocb.getResultType(backendErr))

dimensions[resultTypeMetricName] = ocb.getStringMetricValue(log, resultTypeMetricName, string(ocb.getResultType(backendErr)))
}

func (ocb *openShiftClusterBackend) gatherMiscMetrics(doc *api.OpenShiftClusterDocument, dimensions map[string]string) {
dimensions[subscriptionIdMetricName] = ocb.env.SubscriptionID()
dimensions[resourceIdMetricName] = doc.ResourceID
if doc.OpenShiftCluster != nil {
dimensions[clusterNameMetricName] = doc.OpenShiftCluster.Name
dimensions[locationMetricName] = doc.OpenShiftCluster.Location
dimensions[ocpVersionMetricName] = doc.OpenShiftCluster.Properties.ClusterProfile.Version
dimensions[rpVersionMetricName] = doc.OpenShiftCluster.Properties.ProvisionedBy
dimensions[resourecGroupMetricName] = doc.OpenShiftCluster.Properties.ClusterProfile.ResourceGroupID

for flag, feature := range doc.OpenShiftCluster.Properties.OperatorFlags {
dimensions[fmt.Sprintf("%s-%s", operatorFlagsMetricName, flag)] = feature
}
func (ocb *openShiftClusterBackend) gatherMiscMetrics(log *logrus.Entry, doc *api.OpenShiftClusterDocument, dimensions map[string]string) {
dimensions[subscriptionIdMetricName] = ocb.getStringMetricValue(log, subscriptionIdMetricName, ocb.env.SubscriptionID())
dimensions[resourceIdMetricName] = ocb.getStringMetricValue(log, resourceIdMetricName, doc.ResourceID)

dimensions[clusterNameMetricName] = ocb.getStringMetricValue(log, clusterNameMetricName, doc.OpenShiftCluster.Name)
dimensions[clusterIdMetricName] = ocb.getStringMetricValue(log, clusterIdMetricName, doc.OpenShiftCluster.ID)
dimensions[locationMetricName] = ocb.getStringMetricValue(log, locationMetricName, doc.OpenShiftCluster.Location)
dimensions[ocpVersionMetricName] = ocb.getStringMetricValue(log, ocpVersionMetricName, doc.OpenShiftCluster.Properties.ClusterProfile.Version)
dimensions[rpVersionMetricName] = ocb.getStringMetricValue(log, rpVersionMetricName, doc.OpenShiftCluster.Properties.ProvisionedBy)
dimensions[resourecGroupMetricName] = ocb.getStringMetricValue(log, resourecGroupMetricName, doc.OpenShiftCluster.Properties.ClusterProfile.ResourceGroupID)

for flag, feature := range doc.OpenShiftCluster.Properties.OperatorFlags {
flagMetricName := fmt.Sprintf("%s-%s", operatorFlagsMetricName, flag)
dimensions[flagMetricName] = ocb.getStringMetricValue(log, flagMetricName, feature)
}

dimensions[asyncOperationsIdMetricName] = doc.AsyncOperationID
dimensions[asyncOperationsIdMetricName] = ocb.getStringMetricValue(log, asyncOperationsIdMetricName, doc.AsyncOperationID)

if doc.OpenShiftCluster.Properties.WorkerProfiles != nil {
dimensions[workerProfileCountMetricName] = strconv.FormatInt(int64(len(doc.OpenShiftCluster.Properties.WorkerProfiles)), 10)
} else {
dimensions[workerProfileCountMetricName] = ocb.getStringMetricValue(log, workerProfileCountMetricName, "")
}

if doc.OpenShiftCluster.Tags != nil {
Expand All @@ -96,37 +111,37 @@ func (ocb *openShiftClusterBackend) gatherMiscMetrics(doc *api.OpenShiftClusterD
}
}

func (ocb *openShiftClusterBackend) gatherNodeMetrics(doc *api.OpenShiftClusterDocument, dimensions map[string]string) {
func (ocb *openShiftClusterBackend) gatherNodeMetrics(log *logrus.Entry, doc *api.OpenShiftClusterDocument, dimensions map[string]string) {
if doc.OpenShiftCluster.Properties.MasterProfile.DiskEncryptionSetID != "" {
dimensions[masterProfileEncryptionSetIdMetricName] = enabled
} else {
dimensions[masterProfileEncryptionSetIdMetricName] = disabled
}

mp := doc.OpenShiftCluster.Properties.MasterProfile
dimensions[masterProfileVmSizeMetricName] = string(mp.VMSize)
dimensions[masterProfileVmSizeMetricName] = ocb.getStringMetricValue(log, masterProfileVmSizeMetricName, string(mp.VMSize))

if doc.OpenShiftCluster.Properties.MasterProfile.EncryptionAtHost == api.EncryptionAtHostEnabled {
dimensions[masterEncryptionAtHostMetricName] = string(api.EncryptionAtHostEnabled)
} else if doc.OpenShiftCluster.Properties.MasterProfile.EncryptionAtHost == api.EncryptionAtHostDisabled {
dimensions[masterEncryptionAtHostMetricName] = string(api.EncryptionAtHostDisabled)
} else {
log.Warnf("%s %s", metricFailToCollectErr, masterEncryptionAtHostMetricName)
dimensions[masterEncryptionAtHostMetricName] = unknown
}

if len(doc.OpenShiftCluster.Properties.WorkerProfiles) > 0 {
wp := doc.OpenShiftCluster.Properties.WorkerProfiles[0]
dimensions[workerVmSizeMetricName] = string(wp.VMSize)
dimensions[workerVmDiskSizeMetricName] = strconv.FormatInt(int64(wp.DiskSizeGB), 10)

dimensions[workerVmSizeMetricName] = string(wp.VMSize)
dimensions[workerVmSizeMetricName] = ocb.getStringMetricValue(log, workerVmSizeMetricName, string(wp.VMSize))
dimensions[workerVmDiskSizeMetricName] = strconv.FormatInt(int64(wp.DiskSizeGB), 10)

if wp.EncryptionAtHost == api.EncryptionAtHostEnabled {
dimensions[workerEncryptionAtHostMetricName] = string(api.EncryptionAtHostEnabled)
} else if wp.EncryptionAtHost == api.EncryptionAtHostDisabled {
dimensions[workerEncryptionAtHostMetricName] = string(api.EncryptionAtHostDisabled)
} else {
log.Warnf("%s %s", metricFailToCollectErr, workerEncryptionAtHostMetricName)
dimensions[workerEncryptionAtHostMetricName] = unknown
}
}
Expand All @@ -136,16 +151,18 @@ func (ocb *openShiftClusterBackend) gatherNodeMetrics(doc *api.OpenShiftClusterD
} else if doc.OpenShiftCluster.Properties.ClusterProfile.FipsValidatedModules == api.FipsValidatedModulesDisabled {
dimensions[fipsMetricName] = string(api.FipsValidatedModulesDisabled)
} else {
log.Warnf("%s %s", metricFailToCollectErr, fipsMetricName)
dimensions[fipsMetricName] = unknown
}
}

func (ocb *openShiftClusterBackend) gatherAuthMetrics(doc *api.OpenShiftClusterDocument, dimensions map[string]string) {
func (ocb *openShiftClusterBackend) gatherAuthMetrics(log *logrus.Entry, doc *api.OpenShiftClusterDocument, dimensions map[string]string) {
if doc.OpenShiftCluster.Properties.PlatformWorkloadIdentityProfile != nil {
dimensions[clusterIdentityMetricName] = clusterIdentityManagedIdMetricName
} else if doc.OpenShiftCluster.Properties.ServicePrincipalProfile != nil {
dimensions[clusterIdentityMetricName] = clusterIdentityServicePrincipalMetricName
} else {
log.Warnf("%s %s", metricFailToCollectErr, clusterIdentityMetricName)
dimensions[clusterIdentityMetricName] = unknown
}

Expand All @@ -156,13 +173,14 @@ func (ocb *openShiftClusterBackend) gatherAuthMetrics(doc *api.OpenShiftClusterD
}
}

func (ocb *openShiftClusterBackend) gatherNetworkMetrics(doc *api.OpenShiftClusterDocument, dimensions map[string]string) {
func (ocb *openShiftClusterBackend) gatherNetworkMetrics(log *logrus.Entry, doc *api.OpenShiftClusterDocument, dimensions map[string]string) {
for _, p := range doc.OpenShiftCluster.Properties.IngressProfiles {
if p.Visibility == api.VisibilityPrivate {
dimensions[ingressProfileMetricName] = fmt.Sprintf("%s.%s", string(api.VisibilityPrivate), p.Name)
} else if p.Visibility == api.VisibilityPublic {
dimensions[ingressProfileMetricName] = fmt.Sprintf("%s.%s", string(api.VisibilityPublic), p.Name)
} else {
log.Warnf("%s %s", metricFailToCollectErr, ingressProfileMetricName)
dimensions[ingressProfileMetricName] = unknown
}
}
Expand All @@ -172,6 +190,7 @@ func (ocb *openShiftClusterBackend) gatherNetworkMetrics(doc *api.OpenShiftClust
} else if doc.OpenShiftCluster.Properties.NetworkProfile.OutboundType == api.OutboundTypeLoadbalancer {
dimensions[networkProfileOutboundTypeMetricName] = string(api.OutboundTypeLoadbalancer)
} else {
log.Warnf("%s %s", metricFailToCollectErr, networkProfileManagedOutboundIpsMetricName)
dimensions[networkProfileOutboundTypeMetricName] = unknown
}

Expand All @@ -188,7 +207,10 @@ func (ocb *openShiftClusterBackend) gatherNetworkMetrics(doc *api.OpenShiftClust
}

domain, err := dns.ManagedDomain(ocb.env, doc.OpenShiftCluster.Properties.ClusterProfile.Domain)
if err == nil {
if err != nil {
dimensions[clusterProfileDomainMetricName] = empty
log.Warnf("%s %s, due to %s", metricFailToCollectErr, clusterProfileDomainMetricName, err.Error())
} else {
if domain != "" {
dimensions[clusterProfileDomainMetricName] = custom
} else {
Expand All @@ -198,13 +220,17 @@ func (ocb *openShiftClusterBackend) gatherNetworkMetrics(doc *api.OpenShiftClust

if doc.OpenShiftCluster.Properties.NetworkProfile.LoadBalancerProfile.ManagedOutboundIPs != nil {
dimensions[networkProfileManagedOutboundIpsMetricName] = strconv.FormatInt(int64(doc.OpenShiftCluster.Properties.NetworkProfile.LoadBalancerProfile.ManagedOutboundIPs.Count), 10)
} else {
log.Warnf("%s %s", metricFailToCollectErr, networkProfileManagedOutboundIpsMetricName)
dimensions[networkProfileManagedOutboundIpsMetricName] = unknown
}

if doc.OpenShiftCluster.Properties.NetworkProfile.PreconfiguredNSG == api.PreconfiguredNSGEnabled {
dimensions[networkProfilePreConfiguredNSGMetricName] = string(api.PreconfiguredNSGEnabled)
} else if doc.OpenShiftCluster.Properties.NetworkProfile.PreconfiguredNSG == api.PreconfiguredNSGDisabled {
dimensions[networkProfilePreConfiguredNSGMetricName] = string(api.PreconfiguredNSGDisabled)
} else {
log.Warnf("%s %s", metricFailToCollectErr, networkProfilePreConfiguredNSGMetricName)
dimensions[networkProfilePreConfiguredNSGMetricName] = unknown
}

Expand Down
31 changes: 17 additions & 14 deletions pkg/backend/metrics_const.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,16 @@ package backend
// Licensed under the Apache License 2.0.

const (
metricPackage = "backend.openshiftcluster"
metricValue int64 = 1
enabled = "Enabled"
disabled = "Disabled"
custom = "Custom"
defaultSet = "Default"
unknown = "unknown"
empty = "empty"
managed = "managed"
metricPackage = "backend.openshiftcluster"
metricValue int64 = 1
enabled = "Enabled"
disabled = "Disabled"
custom = "Custom"
defaultSet = "Default"
unknown = "unknown"
empty = "empty"
managed = "managed"
metricFailToCollectErr = "failed to collect metric:"

encryptionAtHostMetricName = "encryptionathost"
diskSizeMetricName = "disksize"
Expand Down Expand Up @@ -56,11 +57,13 @@ const (
operatorFlagsMetricName = "operatorflags"

asyncOperationsIdMetricName = "async_operationsid"
rpVersionMetricName = "rpversion"
ocpVersionMetricName = "ocpversion"
clusterNameMetricName = "clustername"
resourecGroupMetricName = "resourcegroup"
locationMetricName = "location"
openshiftClusterMetricName = "openshiftcluster"
rpVersionMetricName = openshiftClusterMetricName + "." + "rpversion"
ocpVersionMetricName = openshiftClusterMetricName + "." + "ocpversion"
clusterNameMetricName = openshiftClusterMetricName + "." + "clustername"
clusterIdMetricName = openshiftClusterMetricName + "." + "clusterid"
resourecGroupMetricName = openshiftClusterMetricName + "." + "resourcegroup"
locationMetricName = openshiftClusterMetricName + "." + "location"
resourceIdMetricName = "resourceid"
subscriptionIdMetricName = "subscriptionid"

Expand Down
24 changes: 10 additions & 14 deletions pkg/backend/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,10 @@ func TestEmitMetrics(t *testing.T) {
FipsValidatedModules: api.FipsValidatedModulesEnabled,
},
NetworkProfile: api.NetworkProfile{
LoadBalancerProfile: &api.LoadBalancerProfile{
ManagedOutboundIPs: &api.ManagedOutboundIPs{
Count: 1,
},
},
PodCIDR: "10.128.0.1/14",
ServiceCIDR: "172.30.0.1/16",
PreconfiguredNSG: api.PreconfiguredNSGEnabled,
LoadBalancerProfile: &api.LoadBalancerProfile{},
PodCIDR: "10.128.0.1/14",
ServiceCIDR: "172.30.0.1/16",
PreconfiguredNSG: api.PreconfiguredNSGEnabled,
},
OperatorFlags: api.OperatorFlags{"testFlag": "true"},
WorkerProfiles: []api.WorkerProfile{
Expand Down Expand Up @@ -204,12 +200,12 @@ func TestEmitMetrics(t *testing.T) {
}

dimensions := map[string]string{}
ocb.gatherOperationMetrics(tt.operationType, tt.provisioningState, tt.backendErr, dimensions)
ocb.gatherCorrelationID(tt.doc, dimensions)
ocb.gatherMiscMetrics(tt.doc, dimensions)
ocb.gatherAuthMetrics(tt.doc, dimensions)
ocb.gatherNetworkMetrics(tt.doc, dimensions)
ocb.gatherNodeMetrics(tt.doc, dimensions)
ocb.gatherOperationMetrics(log, tt.operationType, tt.provisioningState, tt.backendErr, dimensions)
ocb.gatherCorrelationID(log, tt.doc, dimensions)
ocb.gatherMiscMetrics(log, tt.doc, dimensions)
ocb.gatherAuthMetrics(log, tt.doc, dimensions)
ocb.gatherNetworkMetrics(log, tt.doc, dimensions)
ocb.gatherNodeMetrics(log, tt.doc, dimensions)

emitter.EXPECT().EmitGauge(ocb.getMetricName(tt.operationType), metricValue, dimensions).MaxTimes(1)

Expand Down

0 comments on commit 3db4360

Please sign in to comment.