From 9651013a51cedc1218b4b304dd7c50887274581b Mon Sep 17 00:00:00 2001 From: Michael Hoffmann Date: Wed, 24 Apr 2024 16:00:16 +0200 Subject: [PATCH] Sidecar: mark as unqueryable if prometheus is down (#7297) If the prometheus that belongs to a sidecar is down we dont need to query the sidecar. This PR makes it so that we take the sidecar out of the endpoint set then. We do the same for all other store APIs by retuning an error in the info/Info gRPC call if they are marked as not ready. Signed-off-by: Michael Hoffmann --- CHANGELOG.md | 1 + cmd/thanos/query.go | 6 +++--- cmd/thanos/receive.go | 6 +++--- cmd/thanos/rule.go | 6 +++--- cmd/thanos/sidecar.go | 6 +++--- cmd/thanos/store.go | 6 +++--- pkg/info/info.go | 17 +++++++++++------ pkg/query/endpointset.go | 11 ++++++++--- pkg/query/endpointset_test.go | 11 +++++------ 9 files changed, 40 insertions(+), 30 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8564bfc778f..68afcd15460 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#7233](https://github.com/thanos-io/thanos/pull/7233): UI: Showing Block Size Stats - [#7280](https://github.com/thanos-io/thanos/pull/7281): Adding User-Agent to request logs - [#7219](https://github.com/thanos-io/thanos/pull/7219): Receive: add `--remote-write.client-tls-secure` and `--remote-write.client-tls-skip-verify` flags to stop relying on grpc server config to determine grpc client secure/skipVerify. +- [#7297](https://github.com/thanos-io/thanos/pull/7297): *: mark as not queryable if status is not ready ### Changed diff --git a/cmd/thanos/query.go b/cmd/thanos/query.go index 9300940aa1a..750927b3e61 100644 --- a/cmd/thanos/query.go +++ b/cmd/thanos/query.go @@ -803,7 +803,7 @@ func runQuery( infoSrv := info.NewInfoServer( component.Query.String(), info.WithLabelSetFunc(func() []labelpb.ZLabelSet { return proxy.LabelSet() }), - info.WithStoreInfoFunc(func() *infopb.StoreInfo { + info.WithStoreInfoFunc(func() (*infopb.StoreInfo, error) { if httpProbe.IsReady() { mint, maxt := proxy.TimeRange() return &infopb.StoreInfo{ @@ -812,9 +812,9 @@ func runQuery( SupportsSharding: true, SupportsWithoutReplicaLabels: true, TsdbInfos: proxy.TSDBInfos(), - } + }, nil } - return nil + return nil, errors.New("Not ready") }), info.WithExemplarsInfoFunc(), info.WithRulesInfoFunc(), diff --git a/cmd/thanos/receive.go b/cmd/thanos/receive.go index 22ebccf0d2a..05467ad38e3 100644 --- a/cmd/thanos/receive.go +++ b/cmd/thanos/receive.go @@ -347,7 +347,7 @@ func runReceive( infoSrv := info.NewInfoServer( component.Receive.String(), info.WithLabelSetFunc(func() []labelpb.ZLabelSet { return proxy.LabelSet() }), - info.WithStoreInfoFunc(func() *infopb.StoreInfo { + info.WithStoreInfoFunc(func() (*infopb.StoreInfo, error) { if httpProbe.IsReady() { minTime, maxTime := proxy.TimeRange() return &infopb.StoreInfo{ @@ -356,9 +356,9 @@ func runReceive( SupportsSharding: true, SupportsWithoutReplicaLabels: true, TsdbInfos: proxy.TSDBInfos(), - } + }, nil } - return nil + return nil, errors.New("Not ready") }), info.WithExemplarsInfoFunc(), ) diff --git a/cmd/thanos/rule.go b/cmd/thanos/rule.go index 3a14d294658..c23d7d70e14 100644 --- a/cmd/thanos/rule.go +++ b/cmd/thanos/rule.go @@ -741,7 +741,7 @@ func runRule( info.WithLabelSetFunc(func() []labelpb.ZLabelSet { return tsdbStore.LabelSet() }), - info.WithStoreInfoFunc(func() *infopb.StoreInfo { + info.WithStoreInfoFunc(func() (*infopb.StoreInfo, error) { if httpProbe.IsReady() { mint, maxt := tsdbStore.TimeRange() return &infopb.StoreInfo{ @@ -750,9 +750,9 @@ func runRule( SupportsSharding: true, SupportsWithoutReplicaLabels: true, TsdbInfos: tsdbStore.TSDBInfos(), - } + }, nil } - return nil + return nil, errors.New("Not ready") }), ) storeServer := store.NewLimitedStoreServer(store.NewInstrumentedStoreServer(reg, tsdbStore), reg, conf.storeRateLimits) diff --git a/cmd/thanos/sidecar.go b/cmd/thanos/sidecar.go index 74ab3090fa2..9b8c2fedede 100644 --- a/cmd/thanos/sidecar.go +++ b/cmd/thanos/sidecar.go @@ -280,7 +280,7 @@ func runSidecar( info.WithLabelSetFunc(func() []labelpb.ZLabelSet { return promStore.LabelSet() }), - info.WithStoreInfoFunc(func() *infopb.StoreInfo { + info.WithStoreInfoFunc(func() (*infopb.StoreInfo, error) { if httpProbe.IsReady() { mint, maxt := promStore.Timestamps() return &infopb.StoreInfo{ @@ -289,9 +289,9 @@ func runSidecar( SupportsSharding: true, SupportsWithoutReplicaLabels: true, TsdbInfos: promStore.TSDBInfos(), - } + }, nil } - return nil + return nil, errors.New("Not ready") }), info.WithExemplarsInfoFunc(), info.WithRulesInfoFunc(), diff --git a/cmd/thanos/store.go b/cmd/thanos/store.go index 3b923417450..6c752ce15d4 100644 --- a/cmd/thanos/store.go +++ b/cmd/thanos/store.go @@ -491,7 +491,7 @@ func runStore( info.WithLabelSetFunc(func() []labelpb.ZLabelSet { return bs.LabelSet() }), - info.WithStoreInfoFunc(func() *infopb.StoreInfo { + info.WithStoreInfoFunc(func() (*infopb.StoreInfo, error) { if httpProbe.IsReady() { mint, maxt := bs.TimeRange() return &infopb.StoreInfo{ @@ -500,9 +500,9 @@ func runStore( SupportsSharding: true, SupportsWithoutReplicaLabels: true, TsdbInfos: bs.TSDBInfos(), - } + }, nil } - return nil + return nil, errors.New("Not ready") }), ) diff --git a/pkg/info/info.go b/pkg/info/info.go index f61fdef1875..0b5feaa113d 100644 --- a/pkg/info/info.go +++ b/pkg/info/info.go @@ -6,9 +6,10 @@ package info import ( "context" + "google.golang.org/grpc" + "github.com/thanos-io/thanos/pkg/info/infopb" "github.com/thanos-io/thanos/pkg/store/labelpb" - "google.golang.org/grpc" ) // InfoServer implements the corresponding protobuf interface @@ -20,7 +21,7 @@ type InfoServer struct { component string getLabelSet func() []labelpb.ZLabelSet - getStoreInfo func() *infopb.StoreInfo + getStoreInfo func() (*infopb.StoreInfo, error) getExemplarsInfo func() *infopb.ExemplarsInfo getRulesInfo func() *infopb.RulesInfo getTargetsInfo func() *infopb.TargetsInfo @@ -38,7 +39,7 @@ func NewInfoServer( component: component, // By default, do not return info for any API. getLabelSet: func() []labelpb.ZLabelSet { return nil }, - getStoreInfo: func() *infopb.StoreInfo { return nil }, + getStoreInfo: func() (*infopb.StoreInfo, error) { return nil, nil }, getExemplarsInfo: func() *infopb.ExemplarsInfo { return nil }, getRulesInfo: func() *infopb.RulesInfo { return nil }, getTargetsInfo: func() *infopb.TargetsInfo { return nil }, @@ -74,10 +75,10 @@ func WithLabelSetFunc(getLabelSet ...func() []labelpb.ZLabelSet) ServerOptionFun // WithStoreInfoFunc determines the function that should be executed to obtain // the store information. If no function is provided, the default empty // store info is returned. Only the first function from the list is considered. -func WithStoreInfoFunc(getStoreInfo ...func() *infopb.StoreInfo) ServerOptionFunc { +func WithStoreInfoFunc(getStoreInfo ...func() (*infopb.StoreInfo, error)) ServerOptionFunc { if len(getStoreInfo) == 0 { return func(s *InfoServer) { - s.getStoreInfo = func() *infopb.StoreInfo { return &infopb.StoreInfo{} } + s.getStoreInfo = func() (*infopb.StoreInfo, error) { return &infopb.StoreInfo{}, nil } } } @@ -170,10 +171,14 @@ func RegisterInfoServer(infoSrv infopb.InfoServer) func(*grpc.Server) { // Info returns the information about label set and available APIs exposed by the component. func (srv *InfoServer) Info(ctx context.Context, req *infopb.InfoRequest) (*infopb.InfoResponse, error) { + storeInfo, err := srv.getStoreInfo() + if err != nil { + return nil, err + } return &infopb.InfoResponse{ LabelSets: srv.getLabelSet(), ComponentType: srv.component, - Store: srv.getStoreInfo(), + Store: storeInfo, Exemplars: srv.getExemplarsInfo(), Rules: srv.getRulesInfo(), Targets: srv.getTargetsInfo(), diff --git a/pkg/query/endpointset.go b/pkg/query/endpointset.go index ce1b63678e9..2c3bdb28682 100644 --- a/pkg/query/endpointset.go +++ b/pkg/query/endpointset.go @@ -15,15 +15,16 @@ import ( "time" "unicode/utf8" - "github.com/thanos-io/thanos/pkg/api/query/querypb" - "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/prometheus/model/labels" "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "github.com/thanos-io/thanos/pkg/api/query/querypb" "github.com/thanos-io/thanos/pkg/component" "github.com/thanos-io/thanos/pkg/exemplars/exemplarspb" "github.com/thanos-io/thanos/pkg/info/infopb" @@ -108,7 +109,11 @@ func (es *GRPCEndpointSpec) ReplicaKey() string { func (es *endpointRef) Metadata(ctx context.Context, infoClient infopb.InfoClient, storeClient storepb.StoreClient) (*endpointMetadata, error) { if infoClient != nil { resp, err := infoClient.Info(ctx, &infopb.InfoRequest{}, grpc.WaitForReady(true)) - if err == nil { + if err != nil { + if status.Convert(err).Code() != codes.Unimplemented { + return nil, err + } + } else { return &endpointMetadata{resp}, nil } } diff --git a/pkg/query/endpointset_test.go b/pkg/query/endpointset_test.go index 21a3bc6c94d..2bcb036f980 100644 --- a/pkg/query/endpointset_test.go +++ b/pkg/query/endpointset_test.go @@ -14,20 +14,19 @@ import ( "testing" "time" + "github.com/efficientgo/core/testutil" + "github.com/pkg/errors" "github.com/stretchr/testify/require" - - "github.com/prometheus/prometheus/model/labels" - "github.com/thanos-io/thanos/pkg/store" - "golang.org/x/sync/errgroup" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" - "github.com/efficientgo/core/testutil" - "github.com/pkg/errors" promtestutil "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/prometheus/prometheus/model/labels" + "github.com/thanos-io/thanos/pkg/component" "github.com/thanos-io/thanos/pkg/info/infopb" + "github.com/thanos-io/thanos/pkg/store" "github.com/thanos-io/thanos/pkg/store/labelpb" "github.com/thanos-io/thanos/pkg/store/storepb" )