From 3ed8bc1c521e7bd0cf0150dfdb72038ab377a40a Mon Sep 17 00:00:00 2001 From: Seena Fallah Date: Mon, 28 Nov 2022 09:18:14 +0100 Subject: [PATCH] compact: retry on cleanPartialMarked errors if possible (#5922) cleanPartialMarked is calling SyncMetas which basically can have retriable errors. By checking for retriable errors and retrying, it can prevent the compact from shutdown the HTTP server. Signed-off-by: Seena Fallah Signed-off-by: Seena Fallah --- cmd/thanos/compact.go | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 33a7618416..6cb4eae3e9 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -557,7 +557,19 @@ func runCompact( // since one iteration potentially could take a long time. if conf.cleanupBlocksInterval > 0 { g.Add(func() error { - return runutil.Repeat(conf.cleanupBlocksInterval, ctx.Done(), cleanPartialMarked) + return runutil.Repeat(conf.cleanupBlocksInterval, ctx.Done(), func() error { + err := cleanPartialMarked() + if err != nil && compact.IsRetryError(err) { + // The RetryError signals that we hit an retriable error (transient error, no connection). + // You should alert on this being triggered too frequently. + level.Error(logger).Log("msg", "retriable error", "err", err) + compactMetrics.retried.Inc() + + return nil + } + + return err + }) }, func(error) { cancel() })