diff --git a/auditbeat/docs/troubleshooting.asciidoc b/auditbeat/docs/troubleshooting.asciidoc index cd5906710f8..19eb279272b 100644 --- a/auditbeat/docs/troubleshooting.asciidoc +++ b/auditbeat/docs/troubleshooting.asciidoc @@ -8,6 +8,7 @@ following tips: * <> * <> +* <> * <> //sets block macro for getting-help.asciidoc included in next section @@ -26,5 +27,15 @@ include::{libbeat-dir}/getting-help.asciidoc[] include::{libbeat-dir}/debugging.asciidoc[] +//sets block macro for metrics-in-logs.asciidoc included in next section +[id="understand-{beatname_lc}-logs"] +[role="xpack"] +== Understand metrics in {beatname_uc} logs + +++++ +Understand logged metrics +++++ + +include::{libbeat-dir}/metrics-in-logs.asciidoc[] diff --git a/filebeat/docs/troubleshooting.asciidoc b/filebeat/docs/troubleshooting.asciidoc index 0edffce46cf..f16042bc871 100644 --- a/filebeat/docs/troubleshooting.asciidoc +++ b/filebeat/docs/troubleshooting.asciidoc @@ -8,6 +8,7 @@ following tips: * <> * <> +* <> * <> //sets block macro for getting-help.asciidoc included in next section @@ -26,5 +27,15 @@ include::{libbeat-dir}/getting-help.asciidoc[] include::{libbeat-dir}/debugging.asciidoc[] +//sets block macro for metrics-in-logs.asciidoc included in next section +[id="understand-{beatname_lc}-logs"] +[role="xpack"] +== Understand metrics in {beatname_uc} logs + +++++ +Understand logged metrics +++++ + +include::{libbeat-dir}/metrics-in-logs.asciidoc[] diff --git a/heartbeat/docs/troubleshooting.asciidoc b/heartbeat/docs/troubleshooting.asciidoc index 12c429715df..ebed4a6863f 100644 --- a/heartbeat/docs/troubleshooting.asciidoc +++ b/heartbeat/docs/troubleshooting.asciidoc @@ -8,6 +8,7 @@ following tips: * <> * <> +* <> * <> //sets block macro for getting-help.asciidoc included in next section @@ -26,5 +27,14 @@ include::{libbeat-dir}/getting-help.asciidoc[] include::{libbeat-dir}/debugging.asciidoc[] +//sets block macro for metrics-in-logs.asciidoc included in next section +[id="understand-{beatname_lc}-logs"] +[role="xpack"] +== Understand metrics in {beatname_uc} logs +++++ +Understand logged metrics +++++ + +include::{libbeat-dir}/metrics-in-logs.asciidoc[] diff --git a/libbeat/docs/metrics-in-logs.asciidoc b/libbeat/docs/metrics-in-logs.asciidoc new file mode 100644 index 00000000000..c499e7462f4 --- /dev/null +++ b/libbeat/docs/metrics-in-logs.asciidoc @@ -0,0 +1,242 @@ + + +Every 30 seconds (by default), {beatname_uc} collects a _snapshot_ of metrics about itself. From this snapshot, {beatname_uc} computes a _delta snapshot_; this delta snapshot contains any metrics that have _changed_ since the last snapshot. Note that the values of the metrics are the values when the snapshot is taken, _NOT_ the _difference_ in values from the last snapshot. + +If this delta snapshot contains _any_ metrics (indicating at least one metric that has changed since the last snapshot), this delta snapshot is serialized as JSON and emitted in {beatname_uc}'s logs at the `INFO` log level. Here is an example of such a log entry: + +[source,json] +---- +{"log.level":"info","@timestamp":"2023-07-14T12:50:36.811Z","log.logger":"monitoring","log.origin":{"file.name":"log/log.go","file.line":187},"message":"Non-zero metrics in the last 30s","service.name":"filebeat","monitoring":{"metrics":{"beat":{"cgroup":{"memory":{"mem":{"usage":{"bytes":0}}}},"cpu":{"system":{"ticks":692690,"time":{"ms":60}},"total":{"ticks":3167250,"time":{"ms":150},"value":3167250},"user":{"ticks":2474560,"time":{"ms":90}}},"handles":{"limit":{"hard":1048576,"soft":1048576},"open":32},"info":{"ephemeral_id":"2bab8688-34c0-4522-80af-db86948d547d","uptime":{"ms":617670096},"version":"8.6.2"},"memstats":{"gc_next":57189272,"memory_alloc":43589824,"memory_total":275281335792,"rss":183574528},"runtime":{"goroutines":212}},"filebeat":{"events":{"active":5,"added":52,"done":49},"harvester":{"open_files":6,"running":6,"started":1}},"libbeat":{"config":{"module":{"running":15}},"output":{"events":{"acked":48,"active":0,"batches":6,"total":48},"read":{"bytes":210},"write":{"bytes":26923}},"pipeline":{"clients":15,"events":{"active":5,"filtered":1,"published":51,"total":52},"queue":{"acked":48}}},"registrar":{"states":{"current":14,"update":49},"writes":{"success":6,"total":6}},"system":{"load":{"1":0.91,"15":0.37,"5":0.4,"norm":{"1":0.1138,"15":0.0463,"5":0.05}}}},"ecs.version":"1.6.0"}} +---- + +[discrete] +== Details + +Focussing on the `.monitoring.metrics` field, and formatting the JSON, it's value is: + +[source,json] +---- +{ + "beat": { + "cgroup": { + "memory": { + "mem": { + "usage": { + "bytes": 0 + } + } + } + }, + "cpu": { + "system": { + "ticks": 692690, + "time": { + "ms": 60 + } + }, + "total": { + "ticks": 3167250, + "time": { + "ms": 150 + }, + "value": 3167250 + }, + "user": { + "ticks": 2474560, + "time": { + "ms": 90 + } + } + }, + "handles": { + "limit": { + "hard": 1048576, + "soft": 1048576 + }, + "open": 32 + }, + "info": { + "ephemeral_id": "2bab8688-34c0-4522-80af-db86948d547d", + "uptime": { + "ms": 617670096 + }, + "version": "8.6.2" + }, + "memstats": { + "gc_next": 57189272, + "memory_alloc": 43589824, + "memory_total": 275281335792, + "rss": 183574528 + }, + "runtime": { + "goroutines": 212 + } + }, + "filebeat": { + "events": { + "active": 5, + "added": 52, + "done": 49 + }, + "harvester": { + "open_files": 6, + "running": 6, + "started": 1 + } + }, + "libbeat": { + "config": { + "module": { + "running": 15 + } + }, + "output": { + "events": { + "acked": 48, + "active": 0, + "batches": 6, + "total": 48 + }, + "read": { + "bytes": 210 + }, + "write": { + "bytes": 26923 + } + }, + "pipeline": { + "clients": 15, + "events": { + "active": 5, + "filtered": 1, + "published": 51, + "total": 52 + }, + "queue": { + "acked": 48 + } + } + }, + "registrar": { + "states": { + "current": 14, + "update": 49 + }, + "writes": { + "success": 6, + "total": 6 + } + }, + "system": { + "load": { + "1": 0.91, + "5": 0.4, + "15": 0.37, + "norm": { + "1": 0.1138, + "5": 0.05, + "15": 0.0463 + } + } + } +} +---- + +The following tables explain the meaning of the most important fields under `.monitoring.metrics` and also provide hints that might be helpful in troubleshooting {beatname_uc} issues. + +[cols="1,1,2,2"] +|=== +| Field path (relative to `.monitoring.metrics`) | Type | Meaning | Troubleshooting hints + +| `.beat` | Object | Information that is common to all Beats, e.g. version, goroutines, file handles, CPU, memory | +| `.libbeat` | Object | Information about the publisher pipeline and output, also common to all Beats | +ifeval::["{beatname_lc}"=="filebeat"] +| `.filebeat` | Object | Information specific to {filebeat}, e.g. harvester, events | +endif::[] +|=== + +[cols="1,1,2,2"] +|=== +| Field path (relative to `.monitoring.metrics.beat`) | Type | Meaning | Troubleshooting hints + +| `.runtime.goroutines` | Integer | Number of goroutines running | If this number grows over time, it indicates a goroutine leak +|=== + +[cols="1,1,2,2"] +|=== +| Field path (relative to `.monitoring.metrics.libbeat`) | Type | Meaning | Troubleshooting hints + +| `.pipeline.events.active` | Integer | Number of events currently in the libbeat publisher pipeline. | If this number grows over time, it may indicate that {beatname_uc} is producing events faster than the output can consume them. Consider increasing the number of output workers (if this setting is supported by the output; {es} and {ls} outputs support this setting). The pipeline includes events currently being processed as well as events in the queue. So this metric can sometimes end up slightly higher than the queue size. If this metric reaches the maximum queue size (`queue.mem.events` for the in-memory queue), it almost certainly indicates backpressure on {beatname_uc}, implying that {beatname_uc} may need to temporarily stop ingesting more events from the source until this backpressure is relieved. +| `.output.events.total` | Integer | Number of events currently being processed by the output. | If this number grows over time, it may indicate that the output destination (e.g. {ls} pipeline or {es} cluster) is not able to accept events at the same or faster rate than what {beatname_uc} is sending to it. +| `.output.events.acked` | Integer | Number of events acknowledged by the output destination. | Generally, we want this number to be the same as `.output.events.total` as this indicates that the output destination has reliably received all the events sent to it. +| `.output.events.failed` | Integer | Number of events that {beatname_uc} tried to send to the output destination, but the destination failed to receive them. | Generally, we want this field to be absent or its value to be zero. When the value is greater than zero, it's useful to check {beatname_uc}'s logs right before this log entry's `@timestamp` to see if there are any connectivity issues with the output destination. Note that failed events are not lost or dropped; they will be sent back to the publisher pipeline for retrying later. +|=== + +ifeval::["{beatname_lc}"=="filebeat"] +[cols="1,1,2,2"] +|=== +| Field path (relative to `.monitoring.metrics.filebeat`) | Type | Meaning | Troubleshooting hints + +| `.events.active` | Integer | Number of events being actively processed by {filebeat} (including events {filebeat} has already sent to the libbeat publisher pipeline, but not including events the pipeline has sent to the output). | If this number grows over time, it may indicate that {filebeat} inputs are harvesting events too fast for the pipeline and output to keep up. +|=== +endif::[] + +[discrete] +== Useful commands + +[discrete] +=== Parse monitoring metrics from unstructured {beatname_uc} logs + +For {beatname_uc} versions that emit unstructured logs, the following script can be +used to parse monitoring metrics from such logs: https://github.com/elastic/beats/blob/main/script/metrics_from_log_file.sh. + + +ifeval::["{beatname_lc}"=="filebeat"] +[discrete] +=== Check if {filebeat} is processing events + +[source] +---- +$ cat beat.log | jq -r '[.["@timestamp"],.monitoring.metrics.filebeat.events.active,.monitoring.metrics.libbeat.pipeline.events.active,.monitoring.metrics.libbeat.output.events.total,.monitoring.metrics.libbeat.output.events.acked,.monitoring.metrics.libbeat.output.events.failed//0] | @tsv' | sort +---- + +Example output: + +[source] +---- +2023-07-14T11:24:36.811Z 1 1 38033 38033 0 +2023-07-14T11:25:06.811Z 1 1 17 17 0 +2023-07-14T11:25:36.812Z 1 1 16 16 0 +2023-07-14T11:26:06.811Z 1 1 17 17 0 +2023-07-14T11:26:36.811Z 2 2 21 21 0 +2023-07-14T11:27:06.812Z 1 1 18 18 0 +2023-07-14T11:27:36.811Z 1 1 17 17 0 +2023-07-14T11:28:06.811Z 1 1 18 18 0 +2023-07-14T11:28:36.811Z 1 1 16 16 0 +2023-07-14T11:37:06.811Z 1 1 270 270 0 +2023-07-14T11:37:36.811Z 1 1 16 16 0 +2023-07-14T11:38:06.811Z 1 1 17 17 0 +2023-07-14T11:38:36.811Z 1 1 16 16 0 +2023-07-14T11:41:36.811Z 3 3 323 323 0 +2023-07-14T11:42:06.811Z 3 3 17 17 0 +2023-07-14T11:42:36.812Z 4 4 18 18 0 +2023-07-14T11:43:06.811Z 4 4 17 17 0 +2023-07-14T11:43:36.811Z 2 2 17 17 0 +2023-07-14T11:47:06.811Z 0 0 117 117 0 +2023-07-14T11:47:36.811Z 2 2 14 14 0 +2023-07-14T11:48:06.811Z 3 3 17 17 0 +2023-07-14T11:48:36.811Z 2 2 17 17 0 +2023-07-14T12:49:36.811Z 3 3 2008 1960 48 +2023-07-14T12:50:06.812Z 2 2 18 18 0 +2023-07-14T12:50:36.811Z 5 5 48 48 0 +---- + +The columns here are: + +1. `.@timestamp` +2. `.monitoring.metrics.filebeat.events.active` +3. `.monitoring.metrics.libbeat.pipeline.events.active` +4. `.monitoring.metrics.libbeat.output.events.total` +5. `.monitoring.metrics.libbeat.output.events.acked` +6. `.monitoring.metrics.libbeat.output.events.failed` +endif::[] diff --git a/metricbeat/docs/troubleshooting.asciidoc b/metricbeat/docs/troubleshooting.asciidoc index 823ad0d3357..f1876af0f27 100644 --- a/metricbeat/docs/troubleshooting.asciidoc +++ b/metricbeat/docs/troubleshooting.asciidoc @@ -7,6 +7,7 @@ If you have issues installing or running {beatname_uc}, read the following tips: * <> * <> +* <> * <> //sets block macro for getting-help.asciidoc included in next section @@ -24,3 +25,15 @@ include::{libbeat-dir}/getting-help.asciidoc[] == Debug include::{libbeat-dir}/debugging.asciidoc[] + +//sets block macro for metrics-in-logs.asciidoc included in next section + +[id="understand-{beatname_lc}-logs"] +[role="xpack"] +== Understand metrics in {beatname_uc} logs + +++++ +Understand logged metrics +++++ + +include::{libbeat-dir}/metrics-in-logs.asciidoc[] \ No newline at end of file diff --git a/packetbeat/docs/troubleshooting.asciidoc b/packetbeat/docs/troubleshooting.asciidoc index f6a3fb23be3..ef210371ad0 100644 --- a/packetbeat/docs/troubleshooting.asciidoc +++ b/packetbeat/docs/troubleshooting.asciidoc @@ -8,6 +8,7 @@ following tips: * <> * <> +* <> * <> * <> @@ -29,6 +30,18 @@ include::{libbeat-dir}/debugging.asciidoc[] //sets block macro for recording-trace content included in next section +//sets block macro for metrics-in-logs.asciidoc included in next section + +[id="understand-{beatname_lc}-logs"] +[role="xpack"] +== Understand metrics in {beatname_uc} logs + +++++ +Understand logged metrics +++++ + +include::{libbeat-dir}/metrics-in-logs.asciidoc[] + [[recording-trace]] == Record a trace diff --git a/winlogbeat/docs/troubleshooting.asciidoc b/winlogbeat/docs/troubleshooting.asciidoc index bc9f7dddacc..e14d6802c2a 100644 --- a/winlogbeat/docs/troubleshooting.asciidoc +++ b/winlogbeat/docs/troubleshooting.asciidoc @@ -7,6 +7,7 @@ If you have issues installing or running Winlogbeat, read the following tips: * <> * <> +* <> * <> //sets block macro for getting-help.asciidoc included in next section @@ -25,3 +26,15 @@ include::{libbeat-dir}/getting-help.asciidoc[] == Debug include::{libbeat-dir}/debugging.asciidoc[] + +//sets block macro for metrics-in-logs.asciidoc included in next section + +[id="understand-{beatname_lc}-logs"] +[role="xpack"] +== Understand metrics in {beatname_uc} logs + +++++ +Understand logged metrics +++++ + +include::{libbeat-dir}/metrics-in-logs.asciidoc[] \ No newline at end of file diff --git a/x-pack/functionbeat/docs/troubleshooting.asciidoc b/x-pack/functionbeat/docs/troubleshooting.asciidoc index b18a37a2fe1..777f24edd82 100644 --- a/x-pack/functionbeat/docs/troubleshooting.asciidoc +++ b/x-pack/functionbeat/docs/troubleshooting.asciidoc @@ -9,6 +9,7 @@ following tips: * <> * <> +* <> * <> //sets block macro for getting-help.asciidoc included in next section @@ -29,5 +30,14 @@ include::{libbeat-dir}/getting-help.asciidoc[] include::{libbeat-dir}/debugging.asciidoc[] +//sets block macro for metrics-in-logs.asciidoc included in next section +[id="understand-{beatname_lc}-logs"] +[role="xpack"] +== Understand metrics in {beatname_uc} logs + +++++ +Understand logged metrics +++++ +include::{libbeat-dir}/metrics-in-logs.asciidoc[]