Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[receiver/vcenter] Adds vSAN metrics for Virtual Machines #34082

Merged
merged 9 commits into from
Jul 26, 2024
27 changes: 27 additions & 0 deletions .chloggen/vcenterreceiver-vm-vsan.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: 'enhancement'

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: 'vcenterreceiver'

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Adds a number of default disabled vSAN metrics for Virtual Machines.

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [33556]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: []
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ dist/
# Miscellaneous files
*.sw[op]
*.DS_Store
__debug_bin*
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
__debug_bin*

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@djaglowski These were building up in my directory as I've been debugging and showing up as files that could be committed. Any reason not to ignore them?


# Coverage
coverage/*
Expand Down
2 changes: 2 additions & 0 deletions cmd/otelcontribcol/go.sum

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

274 changes: 263 additions & 11 deletions receiver/vcenterreceiver/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,35 +7,45 @@ import (
"context"
"errors"
"fmt"
"maps"
"net/url"
"reflect"
"strconv"
"strings"
"time"

"github.com/vmware/govmomi"
"github.com/vmware/govmomi/find"
"github.com/vmware/govmomi/object"
"github.com/vmware/govmomi/performance"
"github.com/vmware/govmomi/property"
"github.com/vmware/govmomi/view"
"github.com/vmware/govmomi/vim25"
"github.com/vmware/govmomi/vim25/mo"
"github.com/vmware/govmomi/vim25/soap"
vt "github.com/vmware/govmomi/vim25/types"
"github.com/vmware/govmomi/vsan"
"github.com/vmware/govmomi/vsan/types"
"go.uber.org/zap"
)

// vcenterClient is a client that collects data from a vCenter endpoint.
type vcenterClient struct {
moClient *govmomi.Client
vimDriver *vim25.Client
finder *find.Finder
pc *property.Collector
pm *performance.Manager
vm *view.Manager
cfg *Config
logger *zap.Logger
moClient *govmomi.Client
vimDriver *vim25.Client
vsanDriver *vsan.Client
finder *find.Finder
pm *performance.Manager
vm *view.Manager
cfg *Config
}

var newVcenterClient = defaultNewVcenterClient

func defaultNewVcenterClient(c *Config) *vcenterClient {
func defaultNewVcenterClient(l *zap.Logger, c *Config) *vcenterClient {
return &vcenterClient{
cfg: c,
logger: l,
cfg: c,
}
}

Expand Down Expand Up @@ -70,10 +80,15 @@ func (vc *vcenterClient) EnsureConnection(ctx context.Context) error {
}
vc.moClient = client
vc.vimDriver = client.Client
vc.pc = property.DefaultCollector(vc.vimDriver)
vc.finder = find.NewFinder(vc.vimDriver)
vc.pm = performance.NewManager(vc.vimDriver)
vc.vm = view.NewManager(vc.vimDriver)
vsanDriver, err := vsan.NewClient(ctx, vc.vimDriver)
if err != nil {
vc.logger.Info(fmt.Errorf("could not create VSAN client: %w", err).Error())
} else {
vc.vsanDriver = vsanDriver
}
return nil
}

Expand Down Expand Up @@ -320,3 +335,240 @@ func (vc *vcenterClient) PerfMetricsQuery(
resultsByRef: resultsByRef,
}, nil
}

// VSANQueryResults contains all returned vSAN metric related data
type VSANQueryResults struct {
// Contains vSAN metric data keyed by UUID string
MetricResultsByUUID map[string]*VSANMetricResults
}

// VSANMetricResults contains vSAN metric related data for a single resource
type VSANMetricResults struct {
// Contains UUID info for related resource
UUID string
// Contains returned metric value info for all metrics
MetricDetails []*VSANMetricDetails
}

// VSANMetricDetails contains vSAN metric data for a single metric
type VSANMetricDetails struct {
// Contains the metric label
MetricLabel string
// Contains the metric interval in seconds
Interval int32
// Contains timestamps for all metric values
Timestamps []*time.Time
// Contains all values for vSAN metric label
Values []int64
}

// vSANQueryType represents the type of VSAN query
type vSANQueryType string

const (
VSANQueryTypeVirtualMachines vSANQueryType = "virtual-machine:*"
)

// getLabelsForQueryType returns the appropriate labels for each query type
func (vc *vcenterClient) getLabelsForQueryType(queryType vSANQueryType) []string {
switch queryType {
case VSANQueryTypeVirtualMachines:
return []string{
"iopsRead", "iopsWrite", "throughputRead", "throughputWrite",
"latencyRead", "latencyWrite",
}
default:
return []string{}
}
}

// VSANVirtualMachines returns back virtual machine vSAN performance metrics
func (vc *vcenterClient) VSANVirtualMachines(
ctx context.Context,
clusterRefs []*vt.ManagedObjectReference,
) (*VSANQueryResults, error) {
results, err := vc.vSANQuery(ctx, VSANQueryTypeVirtualMachines, clusterRefs)
err = vc.handleVSANError(err, VSANQueryTypeVirtualMachines)
return results, err
}

// vSANQuery performs a vSAN query for the specified type across all clusters
func (vc *vcenterClient) vSANQuery(
ctx context.Context,
queryType vSANQueryType,
clusterRefs []*vt.ManagedObjectReference,
) (*VSANQueryResults, error) {
allResults := VSANQueryResults{
MetricResultsByUUID: map[string]*VSANMetricResults{},
}

for _, clusterRef := range clusterRefs {
results, err := vc.vSANQueryByCluster(ctx, queryType, clusterRef)
if err != nil {
return &allResults, err
}

maps.Copy(allResults.MetricResultsByUUID, results.MetricResultsByUUID)
}

return &allResults, nil
}

// vSANQueryByCluster performs a vSAN query for the specified type for one cluster
func (vc *vcenterClient) vSANQueryByCluster(
ctx context.Context,
queryType vSANQueryType,
clusterRef *vt.ManagedObjectReference,
) (*VSANQueryResults, error) {
queryResults := VSANQueryResults{
MetricResultsByUUID: map[string]*VSANMetricResults{},
}
// Not all vCenters support vSAN so just return an empty result
if vc.vsanDriver == nil {
return &queryResults, nil
}

now := time.Now()
querySpec := []types.VsanPerfQuerySpec{
{
EntityRefId: string(queryType),
StartTime: &now,
EndTime: &now,
Labels: vc.getLabelsForQueryType(queryType),
},
}
rawResults, err := vc.vsanDriver.VsanPerfQueryPerf(ctx, clusterRef, querySpec)
if err != nil {
return nil, fmt.Errorf("problem retrieving %s vSAN metrics for cluster %s: %w", queryType, clusterRef.Value, err)
}

queryResults.MetricResultsByUUID = map[string]*VSANMetricResults{}
for _, rawResult := range rawResults {
metricResults, err := vc.convertVSANResultToMetricResults(rawResult)
if err != nil && metricResults != nil {
return &queryResults, fmt.Errorf("problem processing %s [%s] vSAN metrics for cluster %s: %w", queryType, metricResults.UUID, clusterRef.Value, err)
}
if err != nil {
return &queryResults, fmt.Errorf("problem processing %s vSAN metrics for cluster %s: %w", queryType, clusterRef.Value, err)
}

queryResults.MetricResultsByUUID[metricResults.UUID] = metricResults
}
return &queryResults, nil
}

func (vc *vcenterClient) handleVSANError(
err error,
queryType vSANQueryType,
) error {
faultErr := errors.Unwrap(err)
if faultErr == nil {
return err
}
if !soap.IsSoapFault(faultErr) {
return err
}

fault := soap.ToSoapFault(faultErr)
msg := fault.String

if fault.Detail.Fault != nil {
msg = reflect.TypeOf(fault.Detail.Fault).Name()
}
switch msg {
case "NotSupported":
vc.logger.Debug(fmt.Sprintf("%s vSAN metrics not supported: %s", queryType, err.Error()))
return nil
case "NotFound":
vc.logger.Debug(fmt.Sprintf("no %s vSAN metrics found: %s", queryType, err.Error()))
return nil
default:
return err
}
}

func (vc *vcenterClient) convertVSANResultToMetricResults(vSANResult types.VsanPerfEntityMetricCSV) (*VSANMetricResults, error) {
uuid, err := vc.uuidFromEntityRefID(vSANResult.EntityRefId)
if err != nil {
return nil, err
}

metricResults := VSANMetricResults{
UUID: uuid,
MetricDetails: []*VSANMetricDetails{},
}

// Parse all timestamps
localZone, _ := time.Now().Local().Zone()
timeStrings := strings.Split(vSANResult.SampleInfo, ",")
timestamps := []time.Time{}
for _, timeString := range timeStrings {
// Assuming the collector is making the request in the same time zone as the localized response
// from the vSAN API. Not a great assumption, but otherwise it will almost definitely be wrong
// if we assume that it is UTC. There is precedent for this method at least.
timestamp, err := time.Parse("2006-01-02 15:04:05 MST", fmt.Sprintf("%s %s", timeString, localZone))
if err != nil {
return &metricResults, fmt.Errorf("problem parsing timestamp from %s: %w", timeString, err)
}

timestamps = append(timestamps, timestamp)
}

// Parse all metrics
for _, vSANValue := range vSANResult.Value {
metricDetails, err := vc.convertVSANValueToMetricDetails(vSANValue, timestamps)
if err != nil {
return &metricResults, err
}

metricResults.MetricDetails = append(metricResults.MetricDetails, metricDetails)
}
return &metricResults, nil
}

func (vc *vcenterClient) convertVSANValueToMetricDetails(
vSANValue types.VsanPerfMetricSeriesCSV,
timestamps []time.Time,
) (*VSANMetricDetails, error) {
metricLabel := vSANValue.MetricId.Label
metricInterval := vSANValue.MetricId.MetricsCollectInterval
// If not found assume the interval is 5m
if metricInterval == 0 {
vc.logger.Warn(fmt.Sprintf("no interval found for vSAN metric [%s] so assuming 5m", metricLabel))
metricInterval = 300
}
metricDetails := VSANMetricDetails{
MetricLabel: metricLabel,
Interval: metricInterval,
Timestamps: []*time.Time{},
Values: []int64{},
}
valueStrings := strings.Split(vSANValue.Values, ",")
if len(valueStrings) != len(timestamps) {
return nil, fmt.Errorf("number of timestamps [%d] doesn't match number of values [%d] for metric %s", len(timestamps), len(valueStrings), metricLabel)
}

// Match up timestamps with metric values
for i, valueString := range valueStrings {
value, err := strconv.ParseInt(valueString, 10, 64)
if err != nil {
return nil, fmt.Errorf("problem converting value [%s] for metric %s", valueString, metricLabel)
}

metricDetails.Timestamps = append(metricDetails.Timestamps, &timestamps[i])
metricDetails.Values = append(metricDetails.Values, value)
}

return &metricDetails, nil
}

// uuidFromEntityRefID returns the UUID portion of the EntityRefId
func (vc *vcenterClient) uuidFromEntityRefID(id string) (string, error) {
colonIndex := strings.Index(id, ":")
if colonIndex != -1 {
uuid := id[colonIndex+1:]
return uuid, nil
}

return "", fmt.Errorf("no ':' found in EntityRefId [%s] to parse UUID", id)
}
Loading