feat(inputs.vsphere): Collect resource pools metrics and add resource…

… pool tag in VM metrics (#10574) * Collect Resource Pool metrics; Add rpname tag on VM metrics * Update vSphere readme file * Update vSphere readme file * Correct typo in vSphere Readme * Correct Markdown of metrics.md * Fix metrics file * Fix code in endpoint (filter); add some tests * Update plugins/inputs/vsphere/endpoint.go That's true I commit this suggestion Co-authored-by: Sebastian Spaink <[email protected]> * Removed Context and Endpoint from getResourcePoolName func Co-authored-by: Simon LAMBERT <[email protected]> Co-authored-by: Sebastian Spaink <[email protected]>
influxdata · May 12, 2022 · fa72335 · fa72335
1 parent b36953d
commit fa72335
Show file tree

Hide file tree

Showing 6 changed files with 233 additions and 71 deletions.
diff --git a/plugins/inputs/vsphere/METRICS.md b/plugins/inputs/vsphere/METRICS.md
@@ -193,6 +193,59 @@ vmop.numSVMotion.latest
 vmop.numXVMotion.latest
 ```
 
+## Resource Pool Metrics
+
+```metrics
+cpu.usagemhz.average
+cpu.cpuentitlement.latest
+cpu.usagemhz.minimum
+cpu.usagemhz.maximum
+cpu.capacity.entitlement.average
+cpu.capacity.usage.average
+cpu.capacity.demand.average
+cpu.capacity.contention.average
+cpu.corecount.provisioned.average
+cpu.corecount.contention.average
+disk.throughput.usage.average
+disk.throughput.contention.average
+mem.capacity.contention.average
+mem.overhead.average
+mem.consumed.average
+mem.granted.average
+mem.active.average
+mem.shared.average
+mem.zero.average
+mem.swapped.average
+mem.vmmemctl.average
+mem.capacity.provisioned.average
+mem.capacity.entitlement.average
+mem.capacity.usage.average
+mem.mementitlement.latest
+mem.compressed.average
+mem.compressionRate.average
+mem.decompressionRate.average
+mem.overhead.minimum
+mem.consumed.minimum
+mem.granted.minimum
+mem.active.minimum
+mem.shared.minimum
+mem.zero.minimum
+mem.swapped.minimum
+mem.vmmemctl.maximum
+mem.overhead.maximum
+mem.consumed.maximum
+mem.granted.maximum
+mem.active.maximum
+mem.shared.maximum
+mem.zero.maximum
+mem.swapped.maximum
+mem.vmmemctl.minimum
+net.throughput.usage.average
+net.throughput.contention.summation
+power.power.average
+power.energy.summation
+```
+
 ## Cluster Metrics
 
 ```metrics

diff --git a/plugins/inputs/vsphere/README.md b/plugins/inputs/vsphere/README.md
@@ -4,6 +4,7 @@ The VMware vSphere plugin uses the vSphere API to gather metrics from multiple v
 
 * Clusters
 * Hosts
+* Resource Pools
 * VMs
 * Datastores
 
@@ -140,7 +141,14 @@ vm_metric_exclude = [ "*" ]
   # cluster_metric_exclude = [] ## Nothing excluded by default
   # cluster_instances = false ## false by default
 
-  ## Datastores
+  ## Resource Pools
+  # datastore_include = [ "/*/host/**"] # Inventory path to datastores to collect (by default all are collected)
+  # datastore_exclude = [] # Inventory paths to exclude
+  # datastore_metric_include = [] ## if omitted or empty, all metrics are collected
+  # datastore_metric_exclude = [] ## Nothing excluded by default
+  # datastore_instances = false ## false by default
+
+    ## Datastores
   # datastore_include = [ "/*/datastore/**"] # Inventory path to datastores to collect (by default all are collected)
   # datastore_exclude = [] # Inventory paths to exclude
   # datastore_metric_include = [] ## if omitted or empty, all metrics are collected
@@ -252,10 +260,13 @@ to a file system. A vSphere inventory has a structure similar to this:
    | | | +-VM1
    | | | +-VM2
    | | | +-hadoop1
-   | +-Host2 # Dummy cluster created for non-clustered host
-   | | +-Host2
+   | | +-ResourcePool1
    | | | +-VM3
    | | | +-VM4
+   | +-Host2 # Dummy cluster created for non-clustered host
+   | | +-Host2
+   | | | +-VM5
+   | | | +-VM6
    +-vm # VM folder (created by system)
    | +-VM1
    | +-VM2
@@ -289,7 +300,7 @@ We can extend this to looking at a cluster level: ```/DC0/host/Cluster1/*/hadoop
 vCenter keeps two different kinds of metrics, known as realtime and historical metrics.
 
 * Realtime metrics: Available at a 20 second granularity. These metrics are stored in memory and are very fast and cheap to query. Our tests have shown that a complete set of realtime metrics for 7000 virtual machines can be obtained in less than 20 seconds. Realtime metrics are only available on **ESXi hosts** and **virtual machine** resources. Realtime metrics are only stored for 1 hour in vCenter.
-* Historical metrics: Available at a (default) 5 minute, 30 minutes, 2 hours and 24 hours rollup levels. The vSphere Telegraf plugin only uses the most granular rollup which defaults to 5 minutes but can be changed in vCenter to other interval durations. These metrics are stored in the vCenter database and can be expensive and slow to query. Historical metrics are the only type of metrics available for **clusters**, **datastores** and **datacenters**.
+* Historical metrics: Available at a (default) 5 minute, 30 minutes, 2 hours and 24 hours rollup levels. The vSphere Telegraf plugin only uses the most granular rollup which defaults to 5 minutes but can be changed in vCenter to other interval durations. These metrics are stored in the vCenter database and can be expensive and slow to query. Historical metrics are the only type of metrics available for **clusters**, **datastores**, **resource pools** and **datacenters**.
 
 For more information, refer to the vSphere documentation here: <https://pubs.vmware.com/vsphere-50/index.jsp?topic=%2Fcom.vmware.wssdk.pg.doc_50%2FPG_Ch16_Performance.18.2.html>
 
@@ -314,6 +325,7 @@ This will disrupt the metric collection and can result in missed samples. The be
   datastore_metric_exclude = ["*"]
   cluster_metric_exclude = ["*"]
   datacenter_metric_exclude = ["*"]
+  resourcepool_metric_exclude = ["*"]
 
   collect_concurrency = 5
   discover_concurrency = 5
@@ -400,6 +412,12 @@ When the vSphere plugin queries vCenter for historical statistics it queries for
   * Res CPU: active, max, running
   * System: operating system uptime, uptime
   * Virtual Disk: seeks, # reads/writes, latency, load
+* Resource Pools stats:
+  * Memory: total, usage, active, latency, swap, shared, vmmemctl
+  * CPU: capacity, usage, corecount
+  * Disk: throughput
+  * Network: throughput
+  * Power: energy, usage
 * Datastore stats:
   * Disk: Capacity, provisioned, used
 
@@ -415,6 +433,7 @@ For a detailed list of commonly available metrics, please refer to [METRICS.md](
   * cluster (vcenter cluster)
   * esxhost (name of ESXi host)
   * guest (guest operating system id)
+  * resource pool (name of resource pool)
 * cpu stats for Host and VM
   * cpu (cpu core - not all CPU fields will have this tag)
 * datastore stats for Host and VM

diff --git a/plugins/inputs/vsphere/endpoint.go b/plugins/inputs/vsphere/endpoint.go
@@ -100,6 +100,7 @@ type objectRef struct {
 	parentRef    *types.ManagedObjectReference //Pointer because it must be nillable
 	guest        string
 	dcname       string
+	rpname       string
 	customValues map[string]string
 	lookup       map[string]string
 }
@@ -165,6 +166,24 @@ func NewEndpoint(ctx context.Context, parent *VSphere, address *url.URL, log tel
 			getObjects:       getClusters,
 			parent:           "datacenter",
 		},
+		"resourcepool": {
+			name:             "resourcepool",
+			vcName:           "ResourcePool",
+			pKey:             "rpname",
+			parentTag:        "clustername",
+			enabled:          anythingEnabled(parent.ResourcePoolMetricExclude),
+			realTime:         false,
+			sampling:         int32(time.Duration(parent.HistoricalInterval).Seconds()),
+			objects:          make(objectMap),
+			filters:          newFilterOrPanic(parent.ResourcePoolMetricInclude, parent.ResourcePoolMetricExclude),
+			paths:            parent.ResourcePoolInclude,
+			excludePaths:     parent.ResourcePoolExclude,
+			simple:           isSimple(parent.ResourcePoolMetricInclude, parent.ResourcePoolMetricExclude),
+			include:          parent.ResourcePoolMetricInclude,
+			collectInstances: parent.ResourcePoolInstances,
+			getObjects:       getResourcePools,
+			parent:           "cluster",
+		},
 		"host": {
 			name:             "host",
 			vcName:           "HostSystem",
@@ -653,6 +672,35 @@ func getClusters(ctx context.Context, e *Endpoint, resourceFilter *ResourceFilte
 	return m, nil
 }
 
+//noinspection GoUnusedParameter
+func getResourcePools(ctx context.Context, e *Endpoint, resourceFilter *ResourceFilter) (objectMap, error) {
+	var resources []mo.ResourcePool
+	err := resourceFilter.FindAll(ctx, &resources)
+	if err != nil {
+		return nil, err
+	}
+	m := make(objectMap)
+	for _, r := range resources {
+		m[r.ExtensibleManagedObject.Reference().Value] = &objectRef{
+			name:         r.Name,
+			ref:          r.ExtensibleManagedObject.Reference(),
+			parentRef:    r.Parent,
+			customValues: e.loadCustomAttributes(&r.ManagedEntity),
+		}
+	}
+	return m, nil
+}
+
+func getResourcePoolName(rp types.ManagedObjectReference, rps objectMap) string {
+	//Loop through the Resource Pools objectmap to find the corresponding one
+	for _, r := range rps {
+		if r.ref == rp {
+			return r.name
+		}
+	}
+	return "Resources" //Default value
+}
+
 //noinspection GoUnusedParameter
 func getHosts(ctx context.Context, e *Endpoint, resourceFilter *ResourceFilter) (objectMap, error) {
 	var resources []mo.HostSystem
@@ -681,13 +729,29 @@ func getVMs(ctx context.Context, e *Endpoint, resourceFilter *ResourceFilter) (o
 		return nil, err
 	}
 	m := make(objectMap)
+	client, err := e.clientFactory.GetClient(ctx)
+	if err != nil {
+		return nil, err
+	}
+	//Create a ResourcePool Filter and get the list of Resource Pools
+	rprf := ResourceFilter{
+		finder:       &Finder{client},
+		resType:      "ResourcePool",
+		paths:        []string{"/*/host/**"},
+		excludePaths: nil}
+	resourcePools, err := getResourcePools(ctx, e, &rprf)
+	if err != nil {
+		return nil, err
+	}
 	for _, r := range resources {
 		if r.Runtime.PowerState != "poweredOn" {
 			continue
 		}
 		guest := "unknown"
 		uuid := ""
 		lookup := make(map[string]string)
+		// Get the name of the VM resource pool
+		rpname := getResourcePoolName(*r.ResourcePool, resourcePools)
 
 		// Extract host name
 		if r.Guest != nil && r.Guest.HostName != "" {
@@ -755,6 +819,7 @@ func getVMs(ctx context.Context, e *Endpoint, resourceFilter *ResourceFilter) (o
 			parentRef:    r.Runtime.Host,
 			guest:        guest,
 			altID:        uuid,
+			rpname:       rpname,
 			customValues: e.loadCustomAttributes(&r.ManagedEntity),
 			lookup:       lookup,
 		}
@@ -1191,6 +1256,9 @@ func (e *Endpoint) populateTags(objectRef *objectRef, resourceType string, resou
 	if resourceType == "vm" && objectRef.altID != "" {
 		t["uuid"] = objectRef.altID
 	}
+	if resourceType == "vm" && objectRef.rpname != "" {
+		t["rpname"] = objectRef.rpname
+	}
 
 	// Map parent reference
 	parent, found := e.getParent(objectRef, resource)

diff --git a/plugins/inputs/vsphere/finder.go b/plugins/inputs/vsphere/finder.go
@@ -246,6 +246,7 @@ func matchName(f property.Filter, props []types.DynamicProperty) bool {
 func init() {
 	childTypes = map[string][]string{
 		"HostSystem":             {"VirtualMachine"},
+		"ResourcePool":           {"VirtualMachine"},
 		"ComputeResource":        {"HostSystem", "ResourcePool", "VirtualApp"},
 		"ClusterComputeResource": {"HostSystem", "ResourcePool", "VirtualApp"},
 		"Datacenter":             {"Folder"},
@@ -260,9 +261,10 @@ func init() {
 	}
 
 	addFields = map[string][]string{
-		"HostSystem": {"parent", "summary.customValue", "customValue"},
+		"HostSystem":   {"parent", "summary.customValue", "customValue"},
+		"ResourcePool": {"parent", "customValue"},
 		"VirtualMachine": {"runtime.host", "config.guestId", "config.uuid", "runtime.powerState",
-			"summary.customValue", "guest.net", "guest.hostName", "customValue"},
+			"summary.customValue", "guest.net", "guest.hostName", "resourcePool", "customValue"},
 		"Datastore":              {"parent", "info", "customValue"},
 		"ClusterComputeResource": {"parent", "customValue"},
 		"Datacenter":             {"parent", "customValue"},