From 9a7d92f960e2e32cfe32ad90db625e55d32da230 Mon Sep 17 00:00:00 2001 From: Chris Mark Date: Thu, 23 Jul 2020 16:42:07 +0300 Subject: [PATCH] Cherry-pick #20084 to 7.9: Fix terminating pod autodiscover issue (#20193) --- CHANGELOG.next.asciidoc | 1 + .../autodiscover/providers/kubernetes/pod.go | 18 +++++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 45c1e1f2c07..5ffbe0d4c69 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -89,6 +89,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Fix goroutine leak and Elasticsearch output file descriptor leak when output reloading is in use. {issue}10491[10491] {pull}17381[17381] - Fix Elasticsearch license endpoint URL referenced in error message. {issue}17880[17880] {pull}18030[18030] - Change `decode_json_fields` processor, to merge parsed json objects with existing objects in the event instead of fully replacing them. {pull}17958[17958] +- Fix terminating pod autodiscover issue. {pull}20084[20084] - Fix seccomp policy for calls to `chmod` and `chown`. {pull}20054[20054] *Auditbeat* diff --git a/libbeat/autodiscover/providers/kubernetes/pod.go b/libbeat/autodiscover/providers/kubernetes/pod.go index c856f790a6e..39df134b809 100644 --- a/libbeat/autodiscover/providers/kubernetes/pod.go +++ b/libbeat/autodiscover/providers/kubernetes/pod.go @@ -138,7 +138,7 @@ func (p *pod) OnUpdate(obj interface{}) { switch pod.Status.Phase { case kubernetes.PodSucceeded, kubernetes.PodFailed: // If Pod is in a phase where all containers in the have terminated emit a stop event - p.logger.Debugf("Watcher Pod update (terminating): %+v", obj) + p.logger.Debugf("Watcher Pod update (terminated): %+v", obj) time.AfterFunc(p.config.CleanupTimeout, func() { p.emit(pod, "stop") }) return case kubernetes.PodPending: @@ -146,6 +146,22 @@ func (p *pod) OnUpdate(obj interface{}) { return } + // here handle the case when a Pod is in `Terminating` phase. + // In this case the pod is neither `PodSucceeded` nor `PodFailed` and + // hence requires special handling. + if pod.GetObjectMeta().GetDeletionTimestamp() != nil { + p.logger.Debugf("Watcher Pod update (terminating): %+v", obj) + // Pod is terminating, don't reload its configuration and ignore the event + // if some pod is still running, we will receive more events when containers + // terminate. + for _, container := range pod.Status.ContainerStatuses { + if container.State.Running != nil { + return + } + } + time.AfterFunc(p.config.CleanupTimeout, func() { p.emit(pod, "stop") }) + } + p.logger.Debugf("Watcher Pod update: %+v", obj) p.emit(pod, "stop") p.emit(pod, "start")