inputs.vsphere, reports duplicate points #11168

yogeshprasad · 2022-05-23T17:37:31Z

Relevant telegraf.conf

[agent]
  ## Default data collection interval for all inputs
  interval = "60s"

# Read metrics from one or many vCenters
[[inputs.vsphere]]
    ## List of vCenter URLs to be monitored. These three lines must be uncommented
  ## and edited for the plugin to work.
  vcenters = [ "https://vcenter.local/sdk" ]
  username = "[email protected]"
  password = "secret"

  ## VMs
  ## Typical VM metrics (if omitted or empty, all metrics are collected)
  # vm_include = [ "/*/vm/**"] # Inventory path to VMs to collect (by default all are collected)
  # vm_exclude = [] # Inventory paths to exclude
  vm_metric_include = [
    "cpu.demand.average",
    "cpu.idle.summation",
    "cpu.latency.average",
    "cpu.readiness.average",
    "cpu.ready.summation",
    "cpu.run.summation",
    "cpu.usagemhz.average",
    "cpu.used.summation",
    "cpu.wait.summation",
    "mem.active.average",
    "mem.granted.average",
    "mem.latency.average",
    "mem.swapin.average",
    "mem.swapinRate.average",
    "mem.swapout.average",
    "mem.swapoutRate.average",
    "mem.usage.average",
    "mem.vmmemctl.average",
    "net.bytesRx.average",
    "net.bytesTx.average",
    "net.droppedRx.summation",
    "net.droppedTx.summation",
    "net.usage.average",
    "power.power.average",
    "virtualDisk.numberReadAveraged.average",
    "virtualDisk.numberWriteAveraged.average",
    "virtualDisk.read.average",
    "virtualDisk.readOIO.latest",
    "virtualDisk.throughput.usage.average",
    "virtualDisk.totalReadLatency.average",
    "virtualDisk.totalWriteLatency.average",
    "virtualDisk.write.average",
    "virtualDisk.writeOIO.latest",
    "sys.uptime.latest",
  ]
  # vm_metric_exclude = [] ## Nothing is excluded by default
  # vm_instances = true ## true by default

  ## Hosts
  ## Typical host metrics (if omitted or empty, all metrics are collected)
  # host_include = [ "/*/host/**"] # Inventory path to hosts to collect (by default all are collected)
  # host_exclude [] # Inventory paths to exclude
  host_metric_include = [
    "cpu.coreUtilization.average",
    "cpu.costop.summation",
    "cpu.demand.average",
    "cpu.idle.summation",
    "cpu.latency.average",
    "cpu.readiness.average",
    "cpu.ready.summation",
    "cpu.swapwait.summation",
    "cpu.usage.average",
    "cpu.usagemhz.average",
    "cpu.used.summation",
    "cpu.utilization.average",
    "cpu.wait.summation",
    "disk.deviceReadLatency.average",
    "disk.deviceWriteLatency.average",
    "disk.kernelReadLatency.average",
    "disk.kernelWriteLatency.average",
    "disk.numberReadAveraged.average",
    "disk.numberWriteAveraged.average",
    "disk.read.average",
    "disk.totalReadLatency.average",
    "disk.totalWriteLatency.average",
    "disk.write.average",
    "mem.active.average",
    "mem.latency.average",
    "mem.state.latest",
    "mem.swapin.average",
    "mem.swapinRate.average",
    "mem.swapout.average",
    "mem.swapoutRate.average",
    "mem.totalCapacity.average",
    "mem.usage.average",
    "mem.vmmemctl.average",
    "net.bytesRx.average",
    "net.bytesTx.average",
    "net.droppedRx.summation",
    "net.droppedTx.summation",
    "net.errorsRx.summation",
    "net.errorsTx.summation",
    "net.usage.average",
    "power.power.average",
    "storageAdapter.numberReadAveraged.average",
    "storageAdapter.numberWriteAveraged.average",
    "storageAdapter.read.average",
    "storageAdapter.write.average",
    "sys.uptime.latest",
  ]
    ## Collect IP addresses? Valid values are "ipv4" and "ipv6"
  # ip_addresses = ["ipv6", "ipv4" ]

  # host_metric_exclude = [] ## Nothing excluded by default
  # host_instances = true ## true by default


  ## Clusters
  # cluster_include = [ "/*/host/**"] # Inventory path to clusters to collect (by default all are collected)
  # cluster_exclude = [] # Inventory paths to exclude
  # cluster_metric_include = [] ## if omitted or empty, all metrics are collected
  # cluster_metric_exclude = [] ## Nothing excluded by default
  # cluster_instances = false ## false by default

  ## Resource Pools
  # datastore_include = [ "/*/host/**"] # Inventory path to datastores to collect (by default all are collected)
  # datastore_exclude = [] # Inventory paths to exclude
  # datastore_metric_include = [] ## if omitted or empty, all metrics are collected
  # datastore_metric_exclude = [] ## Nothing excluded by default
  # datastore_instances = false ## false by default

    ## Datastores
  # datastore_include = [ "/*/datastore/**"] # Inventory path to datastores to collect (by default all are collected)
  # datastore_exclude = [] # Inventory paths to exclude
  # datastore_metric_include = [] ## if omitted or empty, all metrics are collected
  # datastore_metric_exclude = [] ## Nothing excluded by default
  # datastore_instances = false ## false by default

  ## Datacenters
  # datacenter_include = [ "/*/host/**"] # Inventory path to clusters to collect (by default all are collected)
  # datacenter_exclude = [] # Inventory paths to exclude
  datacenter_metric_include = [] ## if omitted or empty, all metrics are collected
  datacenter_metric_exclude = [ "*" ] ## Datacenters are not collected by default.
  # datacenter_instances = false ## false by default

  ## Plugin Settings
  ## separator character to use for measurement and field names (default: "_")
  # separator = "_"

  ## number of objects to retrieve per query for realtime resources (vms and hosts)
  ## set to 64 for vCenter 5.5 and 6.0 (default: 256)
  # max_query_objects = 256

  ## number of metrics to retrieve per query for non-realtime resources (clusters and datastores)
  ## set to 64 for vCenter 5.5 and 6.0 (default: 256)
  # max_query_metrics = 256

  ## number of go routines to use for collection and discovery of objects and metrics
  # collect_concurrency = 1
  # discover_concurrency = 1

  ## the interval before (re)discovering objects subject to metrics collection (default: 300s)
  # object_discovery_interval = "300s"

  ## timeout applies to any of the api request made to vcenter
  # timeout = "60s"

  ## When set to true, all samples are sent as integers. This makes the output
  ## data types backwards compatible with Telegraf 1.9 or lower. Normally all
  ## samples from vCenter, with the exception of percentages, are integer
  ## values, but under some conditions, some averaging takes place internally in
  ## the plugin. Setting this flag to "false" will send values as floats to
  ## preserve the full precision when averaging takes place.
  # use_int_samples = true

  ## The number of vSphere 5 minute metric collection cycles to look back for non-realtime metrics. In 
  ## some versions (6.7, 7.0 and possible more), certain metrics, such as cluster metrics, may be reported
  ## with a significant delay (>30min). If this happens, try increasing this number. Please note that increasing
  ## it too much may cause performance issues.
  # metric_lookback = 3

  ## Custom attributes from vCenter can be very useful for queries in order to slice the
  ## metrics along different dimension and for forming ad-hoc relationships. They are disabled
  ## by default, since they can add a considerable amount of tags to the resulting metrics. To
  ## enable, simply set custom_attribute_exclude to [] (empty set) and use custom_attribute_include
  ## to select the attributes you want to include.
  ## By default, since they can add a considerable amount of tags to the resulting metrics. To
  ## enable, simply set custom_attribute_exclude to [] (empty set) and use custom_attribute_include
  ## to select the attributes you want to include.
  # custom_attribute_include = []
  # custom_attribute_exclude = ["*"]

  ## Optional SSL Config
  # ssl_ca = "/path/to/cafile"
  # ssl_cert = "/path/to/certfile"
  # ssl_key = "/path/to/keyfile"
  ## Use SSL but skip chain & host verification
  # insecure_skip_verify = false

Logs from Telegraf

No relevant logs

System info

Telegraf v1.22.4

Docker

No response

Steps to reproduce

Set interval = "60s" in agent
Enable the Vsphere input plugin with default settings
Run the telegraf for 5 min

Expected behavior

There should not be any duplicate points

Actual behavior

Nearly 10 % of points are being duplicate

Additional info

This is the function which creates duplicate points as we are adjusting the time here.
https://github.com/influxdata/telegraf/blob/master/plugins/inputs/vsphere/endpoint.go#L1104

The text was updated successfully, but these errors were encountered:

powersj · 2022-05-24T20:13:12Z

Hi,

Would you be willing to share some of the example duplicate points from a telegraf log? It would be helpful to use the [[outputs.file]] output to see what metrics you are getting and if they do show up multiple times.

Thanks!

yogeshprasad · 2022-05-26T08:43:10Z

Hi @powersj
Here is the file metrics.out.zip, it has data for more than 10 min and nearly 25% of data is duplicated.
Some of the metrics:

"vsphere.vm.mem.usage.average" source="skajagar-win2008" 1653477360000 6.99 "esxhostname"="gty-a003.test.com" "dcname"="WF_BGLR_INT_DC" "clustername"="WF_BGLR_INT_Cluster01" "_wavefront_source"="proxy::abcd-efgh-241-239.test.com" "vcenter"="int-vc.test.com" "guest"="winLonghorn64" "uuid"="420b5c3c-6f1b-d231-314b-23b88901cd98" "vmname"="skajagar-win2008" "moid"="vm-129" "guesthostname"="WIN-PR69FI29KLE"
--->"vsphere.vm.mem.usage.average" source="skajagar-win2008" 1653477360000 6.99 "esxhostname"="gty-a003.test.com" "dcname"="WF_BGLR_INT_DC" "clustername"="WF_BGLR_INT_Cluster01" "_wavefront_source"="proxy::abcd-efgh-241-239.test.com" "vcenter"="int-vc.test.com" "guest"="winLonghorn64" "vmname"="skajagar-win2008" "uuid"="420b5c3c-6f1b-d231-314b-23b88901cd98" "moid"="vm-129" "guesthostname"="WIN-PR69FI29KLE"

"vsphere.host.cpu.used.summation" source="gty-a003.test.com" 1653477360000 43.0 "esxhostname"="gty-a003.test.com" "dcname"="WF_BGLR_INT_DC" "clustername"="WF_BGLR_INT_Cluster01" "_wavefront_source"="proxy::abcd-efgh-241-239.test.com" "vcenter"="int-vc.test.com" "cpu"="81" "moid"="host-17"
--->"vsphere.host.cpu.used.summation" source="gty-a003.test.com" 1653477360000 48.0 "esxhostname"="gty-a003.test.com" "dcname"="WF_BGLR_INT_DC" "clustername"="WF_BGLR_INT_Cluster01" "_wavefront_source"="proxy::abcd-efgh-241-239.test.com" "vcenter"="int-vc.test.com" "cpu"="81" "moid"="host-17"

"vsphere.host.cpu.coreUtilization.average" source="gty-a003.test.com" 1653477360000 1.04 "esxhostname"="gty-a003.test.com" "dcname"="WF_BGLR_INT_DC" "clustername"="WF_BGLR_INT_Cluster01" "_wavefront_source"="proxy::abcd-efgh-241-239.test.com" "vcenter"="int-vc.test.com" "cpu"="31" "moid"="host-17"
--->"vsphere.host.cpu.coreUtilization.average" source="gty-a003.test.com" 1653477360000 0.98 "esxhostname"="gty-a003.test.com" "dcname"="WF_BGLR_INT_DC" "clustername"="WF_BGLR_INT_Cluster01" "_wavefront_source"="proxy::abcd-efgh-241-239.test.com" "vcenter"="int-vc.test.com" "cpu"="31" "moid"="host-17"

powersj · 2022-05-27T19:13:09Z

What is your output you are pushing this to? And what is the output in your comment above from?

When looking at the metrics.out file I first looked at the metrics at the timestamp you first referenced, 1653477360000 and looked for "winLonghorn64". There are some very similar lines, but they are different.

For example:

- vsphere_vm_power,clustername=WF_BGLR_INT_Cluster01,dcname=WF_BGLR_INT_DC,esxhostname=gty-a003.test.com,guest=winLonghorn64,guesthostname=WIN-PR69FI29KLE,host=telegraf_rhel_nossl,moid=vm-129,source=skajagar-win2008,uuid=420b5c3c-6f1b-d231-314b-23b88901cd98,vcenter=int-vc.test.com,vmname=skajagar-win2008 power_average=244i 1653477360000000000
+ vsphere_vm_power,clustername=WF_BGLR_INT_Cluster01,dcname=WF_BGLR_INT_DC,esxhostname=gty-a003.test.com,guest=winLonghorn64,guesthostname=WIN-PR69FI29KLE,host=telegraf_rhel_nossl,moid=vm-129,source=skajagar-win2008,uuid=420b5c3c-6f1b-d231-314b-23b88901cd98,vcenter=int-vc.test.com,vmname=skajagar-win2008 power_average=247i 1653477360000000000

In the above, the tags are all the same, but the field value reported for power_average is different between the two.

A similar example looking at the memory output:

- vsphere_vm_mem,clustername=WF_BGLR_INT_Cluster01,dcname=WF_BGLR_INT_DC,esxhostname=gty-a003.test.com,guest=winLonghorn64,guesthostname=WIN-FCCHP2L47JJ,host=telegraf_rhel_nossl,moid=vm-131,source=skajagar-win2016-Activated,uuid=420bc4e3-d966-403e-f6f7-7bb6223b6fae,vcenter=int-vc.test.com,vmname=skajagar-win2016-Activated swapinRate_average=0i,active_average=419429i,swapout_average=0i,swapin_average=0i,swapoutRate_average=0i,latency_average=0,vmmemctl_average=0i,usage_average=9.99,granted_average=4194304i 1653477360000000000
+ vsphere_vm_mem,clustername=WF_BGLR_INT_Cluster01,dcname=WF_BGLR_INT_DC,esxhostname=gty-a003.test.com,guest=winLonghorn64,guesthostname=WIN-FCCHP2L47JJ,host=telegraf_rhel_nossl,moid=vm-131,source=skajagar-win2016-Activated,uuid=420bc4e3-d966-403e-f6f7-7bb6223b6fae,vcenter=int-vc.test.com,vmname=skajagar-win2016-Activated active_average=398458i,granted_average=4194304i,swapout_average=0i,swapin_average=0i,swapoutRate_average=0i,swapinRate_average=0i,latency_average=0,vmmemctl_average=0i,usage_average=9.49 1653477360000000000

Here the active_average is different as well.

So I would not say that these are duplicates. What I do wonder about is why the metric is showing up twice with the same timestamp.

Can you please provide the rest of your configuration?

yogeshprasad · 2022-05-28T06:01:02Z

@powersj Thanks for the analysis.
as you mentioned for the below example everything is same except the value and that's the problem exactly. How it is possible for a resource to have different values at the same time, and because of this we are not able to conclude which value is the correct one.

- vsphere_vm_power,clustername=WF_BGLR_INT_Cluster01,dcname=WF_BGLR_INT_DC,esxhostname=gty-a003.test.com,guest=winLonghorn64,guesthostname=WIN-PR69FI29KLE,host=telegraf_rhel_nossl,moid=vm-129,source=skajagar-win2008,uuid=420b5c3c-6f1b-d231-314b-23b88901cd98,vcenter=int-vc.test.com,vmname=skajagar-win2008 power_average=244i 1653477360000000000
+ vsphere_vm_power,clustername=WF_BGLR_INT_Cluster01,dcname=WF_BGLR_INT_DC,esxhostname=gty-a003.test.com,guest=winLonghorn64,guesthostname=WIN-PR69FI29KLE,host=telegraf_rhel_nossl,moid=vm-129,source=skajagar-win2008,uuid=420b5c3c-6f1b-d231-314b-23b88901cd98,vcenter=int-vc.test.com,vmname=skajagar-win2008 power_average=247i 1653477360000000000

powersj · 2022-06-01T16:03:25Z

As I don't think the plugin would suddenly start generating duplicate metrics, it looks like you have two plugins running at the same time.

Some follow up questions:

How are you running Telegraf?
How many configuration files do you have?
Can you provide your entire configuration file(s) with secrets removed?
Do you have multiple vsphere input plugins configured? Can you provide the start of the logs which shows what plugins are loaded?

If you add the following to your config, does the output change?

[[inputs.vsphere]]
  name_override = "vsphere_local"
  vcenters = [ "https://vcenter.local/sdk" ]

I would expect to see a single metric called "vsphere_local".

yogeshprasad · 2022-06-02T13:31:50Z

Running Telegraf as a Service

2022-06-02T13:17:30Z I! Starting Telegraf 1.22.3
2022-06-02T13:17:30Z I! Loaded inputs: vsphere
2022-06-02T13:17:30Z I! Loaded aggregators:
2022-06-02T13:17:30Z I! Loaded processors:
2022-06-02T13:17:30Z I! Loaded outputs: file wavefront
2022-06-02T13:17:30Z I! Tags enabled: host=sk-tel-centos-8
2022-06-02T13:17:30Z W! Deprecated inputs: 0 and 1 options
2022-06-02T13:17:30Z I! [agent] Config: Interval:1m0s, Quiet:false, Hostname:"sk-tel-centos-8", Flush Interval:10s
2022-06-02T13:17:30Z I! [inputs.vsphere] Starting plugin
2022-06-02T13:17:31Z I! connected to Wavefront proxy at address: 19.201.21.22:2878

How many configuration files do you have?
1. telegraf.conf
2. vsphere.conf
3. files.conf
4. wavefront.conf
Do you have multiple vsphere input plugins configured? Can you provide the start of the logs which shows what plugins are loaded?

No we have only one vsphere plugin enabled

if you add the following to your config, does the output change?

yes all metrics name got replaced with vsphere_local

All conf files are here
conf-file.zip

powersj · 2022-06-02T19:56:14Z

What version of vsphere are you running?
If you exclude all the vm_metrics with vm_metric_exclude = [ "*" ] and delete the vm_metric_include array, do you still get duplicates?

yogeshprasad · 2022-06-03T10:46:46Z

What version of vsphere are you running?

vSphere Client version 7.0.1.00200

If you exclude all the vm_metrics with vm_metric_exclude = [ "*" ] and delete the vm_metric_include array, do you still get duplicates?

We are getting duplicates for all kinds of metrics.

I am suspecting these two places as we are adjusting the time here:
https://github.com/influxdata/telegraf/blob/master/plugins/inputs/vsphere/endpoint.go#L1104
https://github.com/influxdata/telegraf/blob/master/plugins/inputs/vsphere/endpoint.go#L936

powersj · 2022-06-03T13:29:03Z

We are getting duplicates for all kinds of metrics.

Does that mean you still are getting duplicates with excluding VM metrics?

yogeshprasad · 2022-06-06T19:38:15Z

I disabled the VM metric as you mentioned and captured the result for 7 minutes and found nearly 33% of duplicate

Total Points: 30113
Duplicate points: 10008
Duplicate %: 33.2348155281772

powersj · 2022-06-10T19:47:57Z

Can you do one more thing, run with --debug and get me the full logs. The plugin appears to have quite a few debug statements and I'd like to follow along with what is happening.

yogeshprasad · 2022-06-13T13:56:16Z

Please find the log file with debug enabled
out.log

yogeshprasad · 2022-06-22T12:35:48Z

@powersj are we looking on it?

powersj · 2022-06-22T12:43:10Z

It's on my list, but not something I've gotten back around to. I have only briefly looked at the log you provided, and I do believe the next step is to add a bit more logging to see where duplicates are getting created.

yogeshprasad · 2022-06-22T16:32:06Z

if you need any help please let me know I can add extra logs wherever you suggest and provide you with the output.

yogeshprasad · 2022-06-29T07:49:10Z

@powersj any idea on when are we prioritizing this?
I have to communicate the same with the customers.

powersj · 2022-06-30T15:57:32Z

Thanks again for the logs.

It would be really nice to isolate it down to a metric that we know is duplicated. For example, we identified the power.power.average metric as a duplicate. Can you run and collect that metric for only a VM and exclude all other metrics, with debugging still enabled. I believe this config, with the correct address and credentials, would do this:

[agent]
  interval = "60s"
  debug = true
  
[[inputs.vsphere]]
  vcenters = [ "https://vcenter.local/sdk" ]
  username = "[email protected]"
  password = "secret"

  vm_metric_include = ["power.power.average"]
  
  host_metric_exclude = ["*"]
  cluster_metric_exclude = ["*"]
  datastore_metric_exclude = ["*"]
  datacenter_metric_exclude = ["*"]
  resourcepool_metric_exclude = ["*"]

Then look at the data and let me know if you still see the duplicates. If so, please include the debug log.

If that does not produce duplicates, then I would make the following change to the config:

--- host_metric_exclude = ["*"]
+++ host_metric_exclude = ["power.power.average"]

Based on the inventory path example, it does look like there are multiple ways to reference a VM. Either via the host folder path or the VM path. I have not looked deeper into this, but that seems like an obvious place where duplicate metrics could be showing up.

yogeshprasad · 2022-07-02T06:29:08Z

@powersj
Thanks for having a look into it. I see other metrics also as duplicates. for me, the below line looks suspicious
https://github.com/influxdata/telegraf/blob/master/plugins/inputs/vsphere/endpoint.go#L1121

as we know real-time metrics are available at 20 second granularity. and let's say we have a one minute of refresh rate and the last time when we reported the point is 10:30 with timestamp of {10:30:00, 10:31:00} so the next refresh will happen at 10:31 and for real time metrics we will get 3 points as they have 20 seconds granularity. let say those 3 points have timestamp of {10:30:20, 10:30:40, 10:31:00} basically after Truncate this will get converted to two points with timestamp {10:30:00, 10:31:00} and in this case first point will becomea duplicate as we already reported with that timestamp in the last refresh
below is the output after adding extra log:

2022-07-01T14:41:06Z D! [inputs.vsphere] Before Align timestamp: [{{} 2022-07-01 14:40:20 +0000 UTC 20} {{} 2022-07-01 14:40:40 +0000 UTC 20} {{} 2022-07-01 14:41:00 +0000 UTC 20}] values: [2 17 1]
2022-07-01T14:41:06Z D! [inputs.vsphere] After Align timestamp: [{{} 2022-07-01 14:40:00 +0000 UTC 20} {{} 2022-07-01 14:41:00 +0000 UTC 20}] values: [9.5 1]
2022-07-01T14:41:06Z D! [inputs.vsphere] Before Align timestamp: [{{} 2022-07-01 14:40:20 +0000 UTC 20} {{} 2022-07-01 14:40:40 +0000 UTC 20} {{} 2022-07-01 14:41:00 +0000 UTC 20}] values: [0 0 0]
2022-07-01T14:41:06Z D! [inputs.vsphere] After Align timestamp: [{{} 2022-07-01 14:40:00 +0000 UTC 20} {{} 2022-07-01 14:41:00 +0000 UTC 20}] values: [0 0]
2022-07-01T14:41:06Z D! [inputs.vsphere] Before Align timestamp: [{{} 2022-07-01 14:40:20 +0000 UTC 20} {{} 2022-07-01 14:40:40 +0000 UTC 20} {{} 2022-07-01 14:41:00 +0000 UTC 20}] values: [0 0 0]
2022-07-01T14:41:06Z D! [inputs.vsphere] After Align timestamp: [{{} 2022-07-01 14:40:00 +0000 UTC 20} {{} 2022-07-01 14:41:00 +0000 UTC 20}] values: [0 0]
2022-07-01T14:41:06Z D! [inputs.vsphere] Before Align timestamp: [{{} 2022-07-01 14:40:20 +0000 UTC 20} {{} 2022-07-01 14:40:40 +0000 UTC 20} {{} 2022-07-01 14:41:00 +0000 UTC 20}] values: [68 201 98]
2022-07-01T14:41:06Z D! [inputs.vsphere] After Align timestamp: [{{} 2022-07-01 14:40:00 +0000 UTC 20} {{} 2022-07-01 14:41:00 +0000 UTC 20}] values: [134.5 98]

I did some experiments by removing the alignSamples function and found no duplicate and with the alignSamples function it gives 25% of duplicate.

yogeshprasad · 2022-07-06T18:02:08Z

@powersj does this analysis make any sense or am I missing something here?

powersj · 2022-07-07T21:12:51Z

as we know real-time metrics are available at 20 second granularity

We do not all know this, at least I did not :) Telegraf has hundreds of plugins connecting to various services and software, but none of the maintainers currently know all of the plugins on a deep level. I do see the discussion about this in realtime vs historical in the README, but it was not clear to me that it could be a culprit yet.

As such, I was still really hoping to at least see the logs with a single metric included to hopefully learn more about how a single metric makes its way through the plugin and see how the vsphere interval was set and if any padding was occurring.

basically after Truncate this will get converted to two points with timestamp {10:30:00, 10:31:00} and in this case first point will becomea duplicate as we already reported with that timestamp in the last refresh

Said another way, your hypothesis is that at Telegraf's flush interval N+1 we produce a metric that is identical to a metric that was in flush interval N due to the alignSamples function's? It is not clear to me why this function was even added in #5113 and what problem it solves?

@prydin do you have details or help you could provide on why a user might be seeing duplicate metrics come out of the alignSamples function?

yogeshprasad · 2022-08-03T06:53:37Z

@powersj as we are not getting a response from @prydin can we make alignSamples configurable?

prydin · 2022-08-03T15:23:31Z

Sorry for the delay. Let me have a look at alignSamples. The idea was to avoid duplicates, not create them, so something is clearly amiss there.

yogeshprasad · 2022-08-08T08:53:39Z

Hi @prydin did you get a chance to look into it?

powersj · 2022-10-12T13:27:43Z

Hi,

Wanted to check in and see if you both were able to resolve the issue?

prydin · 2022-10-13T13:45:20Z

@powersj I'm trying to reproduce this in my lab right now. I'll get back to you once I have an idea what's going on.

powersj · 2022-10-13T14:05:49Z

Thanks!

telegraf-tiger · 2022-10-27T18:09:48Z

Hello! I am closing this issue due to inactivity. I hope you were able to resolve your problem, if not please try posting this question in our Community Slack or Community Page. Thank you!

prydin · 2022-10-28T20:34:55Z

Not sure why this go auto cloused. @powersj I think I might have found a workaround. @powersj have you tried adding metric_lookback = 0 to your config file? This seems to solve the problem. Make sure you're not seeing any gaps in the data.

powersj · 2022-10-28T20:50:49Z

@yogeshprasad can you try the above suggestion?

powersj · 2022-11-17T14:33:38Z

@yogeshprasad not sure if you have already, but can you try the PR in #12259?

yogeshprasad added the bug unexpected problem or unintended behavior label May 23, 2022

telegraf-tiger bot added the area/discovery label May 23, 2022

powersj added the waiting for response waiting for response from contributor label May 24, 2022

telegraf-tiger bot removed the waiting for response waiting for response from contributor label May 26, 2022

powersj added the waiting for response waiting for response from contributor label May 27, 2022

telegraf-tiger bot removed the waiting for response waiting for response from contributor label May 28, 2022

powersj added the waiting for response waiting for response from contributor label Jun 1, 2022

telegraf-tiger bot removed the waiting for response waiting for response from contributor label Jun 2, 2022

powersj added the waiting for response waiting for response from contributor label Jun 2, 2022

telegraf-tiger bot removed the waiting for response waiting for response from contributor label Jun 3, 2022

powersj added the waiting for response waiting for response from contributor label Jun 10, 2022

telegraf-tiger bot removed the waiting for response waiting for response from contributor label Jun 13, 2022

powersj added the waiting for response waiting for response from contributor label Oct 12, 2022

telegraf-tiger bot removed the waiting for response waiting for response from contributor label Oct 13, 2022

powersj added the waiting for response waiting for response from contributor label Oct 13, 2022

telegraf-tiger bot closed this as completed Oct 27, 2022

telegraf-tiger bot removed the waiting for response waiting for response from contributor label Oct 28, 2022

powersj added the waiting for response waiting for response from contributor label Oct 28, 2022

prydin mentioned this issue Nov 17, 2022

fix(inputs.vsphere): Eliminated duplicate samples #12259

Merged

3 tasks

powersj reopened this Nov 17, 2022

telegraf-tiger bot removed the waiting for response waiting for response from contributor label Nov 17, 2022

srebhan closed this as completed in #12259 Dec 7, 2022

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

inputs.vsphere, reports duplicate points #11168

inputs.vsphere, reports duplicate points #11168

yogeshprasad commented May 23, 2022

powersj commented May 24, 2022

yogeshprasad commented May 26, 2022

powersj commented May 27, 2022

yogeshprasad commented May 28, 2022

powersj commented Jun 1, 2022

yogeshprasad commented Jun 2, 2022

powersj commented Jun 2, 2022

yogeshprasad commented Jun 3, 2022

powersj commented Jun 3, 2022

yogeshprasad commented Jun 6, 2022

powersj commented Jun 10, 2022

yogeshprasad commented Jun 13, 2022

yogeshprasad commented Jun 22, 2022

powersj commented Jun 22, 2022

yogeshprasad commented Jun 22, 2022

yogeshprasad commented Jun 29, 2022

powersj commented Jun 30, 2022

yogeshprasad commented Jul 2, 2022

yogeshprasad commented Jul 6, 2022

powersj commented Jul 7, 2022

yogeshprasad commented Aug 3, 2022

prydin commented Aug 3, 2022

yogeshprasad commented Aug 8, 2022

powersj commented Oct 12, 2022

prydin commented Oct 13, 2022

powersj commented Oct 13, 2022

telegraf-tiger bot commented Oct 27, 2022

prydin commented Oct 28, 2022

powersj commented Oct 28, 2022

powersj commented Nov 17, 2022

inputs.vsphere, reports duplicate points #11168

inputs.vsphere, reports duplicate points #11168

Comments

yogeshprasad commented May 23, 2022

Relevant telegraf.conf

Logs from Telegraf

System info

Docker

Steps to reproduce

Expected behavior

Actual behavior

Additional info

powersj commented May 24, 2022

yogeshprasad commented May 26, 2022

powersj commented May 27, 2022

yogeshprasad commented May 28, 2022

powersj commented Jun 1, 2022

yogeshprasad commented Jun 2, 2022

powersj commented Jun 2, 2022

yogeshprasad commented Jun 3, 2022

powersj commented Jun 3, 2022

yogeshprasad commented Jun 6, 2022

powersj commented Jun 10, 2022

yogeshprasad commented Jun 13, 2022

yogeshprasad commented Jun 22, 2022

powersj commented Jun 22, 2022

yogeshprasad commented Jun 22, 2022

yogeshprasad commented Jun 29, 2022

powersj commented Jun 30, 2022

yogeshprasad commented Jul 2, 2022

yogeshprasad commented Jul 6, 2022

powersj commented Jul 7, 2022

yogeshprasad commented Aug 3, 2022

prydin commented Aug 3, 2022

yogeshprasad commented Aug 8, 2022

powersj commented Oct 12, 2022

prydin commented Oct 13, 2022

powersj commented Oct 13, 2022

telegraf-tiger bot commented Oct 27, 2022

prydin commented Oct 28, 2022

powersj commented Oct 28, 2022

powersj commented Nov 17, 2022