Skip to content

Commit

Permalink
Upgrade Thanos and simplify store-gateway metrics (#142)
Browse files Browse the repository at this point in the history
* Upgraded Thanos

Signed-off-by: Marco Pracucci <[email protected]>

* Enable index-header lazy loading in local dev env

Signed-off-by: Marco Pracucci <[email protected]>

* Updated CHANGELOG

Signed-off-by: Marco Pracucci <[email protected]>
  • Loading branch information
pracucci authored Aug 17, 2021
1 parent c084db8 commit 628cbe6
Show file tree
Hide file tree
Showing 18 changed files with 234 additions and 269 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
* [CHANGE] Query-frontend: Enable query stats by default, they can still be disabled with `-frontend.query-stats-enabled=false`. #83
* [CHANGE] Ingester: default `-ingester.min-ready-duration` reduced from 1m to 15s. #126
* [CHANGE] Ingester: `-ingester.min-ready-duration` now start counting the delay after the ring's health checks have passed instead of when the ring client was started. #126
* [CHANGE] Blocks storage: memcached client DNS resolution switched from golang built-in to [`miekg/dns`](https://github.com/miekg/dns). #142
* [FEATURE] Query Frontend: Add `cortex_query_fetched_chunks_total` per-user counter to expose the number of chunks fetched as part of queries. This metric can be enabled with the `-frontend.query-stats-enabled` flag (or its respective YAML config option `query_stats_enabled`). #31
* [FEATURE] Query Frontend: Add experimental querysharding for the blocks storage. You can now enabled querysharding for blocks storage (`-store.engine=blocks`) by setting `-querier.parallelise-shardable-queries` to `true`. The following additional config and exported metrics have been added. #79 #80 #100
* New config options:
Expand All @@ -36,7 +37,7 @@
* [ENHANCEMENT] Ingester: added option `-ingester.readiness-check-ring-health` to disable the ring health check in the readiness endpoint. When disabled, the health checks are run against only the ingester itself instead of all ingesters in the ring. #48 #126
* [ENHANCEMENT] Added option `-distributor.excluded-zones` to exclude ingesters running in specific zones both on write and read path. #51
* [ENHANCEMENT] Store-gateway: added `cortex_bucket_store_sent_chunk_size_bytes` metric, tracking the size of chunks sent from store-gateway to querier. #123
* [ENHANCEMENT] Store-gateway: reduced CPU and memory utilization due to exported metrics aggregation for instances with a large number of tenants. #123
* [ENHANCEMENT] Store-gateway: reduced CPU and memory utilization due to exported metrics aggregation for instances with a large number of tenants. #123 #142
* [BUGFIX] Upgrade Prometheus. TSDB now waits for pending readers before truncating Head block, fixing the `chunk not found` error and preventing wrong query results. #16
* [BUGFIX] Compactor: fixed panic while collecting Prometheus metrics. #28

Expand Down
1 change: 1 addition & 0 deletions development/tsdb-blocks-storage-s3/config/mimir.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ blocks_storage:
bucket_store:
sync_dir: /tmp/mimir-tsdb-querier
consistency_delay: 5s
index_header_lazy_loading_enabled: true

index_cache:
backend: memcached
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ require (
github.com/sony/gobreaker v0.4.1
github.com/spf13/afero v1.3.4
github.com/stretchr/testify v1.7.0
github.com/thanos-io/thanos v0.19.1-0.20210804003402-cd18b6155169
github.com/thanos-io/thanos v0.19.1-0.20210816083900-2be2db775cbc
github.com/uber/jaeger-client-go v2.29.1+incompatible
github.com/weaveworks/common v0.0.0-20210419092856-009d1eebd624
go.etcd.io/bbolt v1.3.5
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1540,8 +1540,8 @@ github.com/thanos-io/thanos v0.13.1-0.20210204123931-82545cdd16fe/go.mod h1:ZLDG
github.com/thanos-io/thanos v0.13.1-0.20210224074000-659446cab117/go.mod h1:kdqFpzdkveIKpNNECVJd75RPvgsAifQgJymwCdfev1w=
github.com/thanos-io/thanos v0.13.1-0.20210226164558-03dace0a1aa1/go.mod h1:gMCy4oCteKTT7VuXVvXLTPGzzjovX1VPE5p+HgL1hyU=
github.com/thanos-io/thanos v0.13.1-0.20210401085038-d7dff0c84d17/go.mod h1:zU8KqE+6A+HksK4wiep8e/3UvCZLm+Wrw9AqZGaAm9k=
github.com/thanos-io/thanos v0.19.1-0.20210804003402-cd18b6155169 h1:eAUloaaBjWh5u1spQ5QpLq6s9UM+UtzfyFei1WVYark=
github.com/thanos-io/thanos v0.19.1-0.20210804003402-cd18b6155169/go.mod h1:Xskx78e0CYL6w0yDNOZHGdvwQMlsuzPsePmPtbp9Xuk=
github.com/thanos-io/thanos v0.19.1-0.20210816083900-2be2db775cbc h1:pJj2HmzCsf58IiqVdhSJFfLiVcK0aT5w3Z+a7iB8CHM=
github.com/thanos-io/thanos v0.19.1-0.20210816083900-2be2db775cbc/go.mod h1:Xskx78e0CYL6w0yDNOZHGdvwQMlsuzPsePmPtbp9Xuk=
github.com/themihai/gomemcache v0.0.0-20180902122335-24332e2d58ab h1:7ZR3hmisBWw77ZpO1/o86g+JV3VKlk3d48jopJxzTjU=
github.com/themihai/gomemcache v0.0.0-20180902122335-24332e2d58ab/go.mod h1:eheTFp954zcWZXCU8d0AT76ftsQOTo4DTqkN/h3k1MY=
github.com/tidwall/pretty v0.0.0-20180105212114-65a9db5fad51/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
Expand Down
2 changes: 2 additions & 0 deletions pkg/compactor/compactor.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ var (
reg,
blocksMarkedForDeletion,
garbageCollectedBlocks,
prometheus.NewCounter(prometheus.CounterOpts{}), // Do not track blocks marked for no-compact cause it's not supported by Mimir yet.
metadata.NoneFunc)
}

Expand Down Expand Up @@ -661,6 +662,7 @@ func (c *Compactor) compactUser(ctx context.Context, userID string) error {
path.Join(c.compactorCfg.DataDir, "compact"),
bucket,
c.compactorCfg.CompactionConcurrency,
false, // Do not skip blocks with out of order chunks.
)
if err != nil {
return errors.Wrap(err, "failed to create bucket compactor")
Expand Down
11 changes: 1 addition & 10 deletions pkg/storegateway/bucket.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ import (
"github.com/thanos-io/thanos/pkg/block/metadata"
"github.com/thanos-io/thanos/pkg/compact/downsample"
"github.com/thanos-io/thanos/pkg/component"
"github.com/thanos-io/thanos/pkg/extprom"
"github.com/thanos-io/thanos/pkg/gate"
"github.com/thanos-io/thanos/pkg/model"
"github.com/thanos-io/thanos/pkg/objstore"
Expand Down Expand Up @@ -106,7 +105,6 @@ type BucketStoreStats struct {
// When used with in-memory cache, memory usage should decrease overall, thanks to postings being smaller.
type BucketStore struct {
logger log.Logger
reg prometheus.Registerer // TODO(metalmatze) remove and add via BucketStoreOption
metrics *BucketStoreMetrics
bkt objstore.InstrumentedBucketReader
fetcher block.MetadataFetcher
Expand Down Expand Up @@ -169,13 +167,6 @@ func WithLogger(logger log.Logger) BucketStoreOption {
}
}

// WithRegistry sets a registry that BucketStore uses to register metrics with.
func WithRegistry(reg prometheus.Registerer) BucketStoreOption {
return func(s *BucketStore) {
s.reg = reg
}
}

// WithIndexCache sets a indexCache to use instead of a noopCache.
func WithIndexCache(cache storecache.IndexCache) BucketStoreOption {
return func(s *BucketStore) {
Expand Down Expand Up @@ -256,7 +247,7 @@ func NewBucketStore(
}

// Depend on the options
s.indexReaderPool = indexheader.NewReaderPool(s.logger, lazyIndexReaderEnabled, lazyIndexReaderIdleTimeout, extprom.WrapRegistererWithPrefix("thanos_bucket_store_", s.reg))
s.indexReaderPool = indexheader.NewReaderPool(s.logger, lazyIndexReaderEnabled, lazyIndexReaderIdleTimeout, metrics.indexHeaderReaderMetrics)

if err := os.MkdirAll(dir, 0750); err != nil {
return nil, errors.Wrap(err, "create dir")
Expand Down
73 changes: 6 additions & 67 deletions pkg/storegateway/bucket_store_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ package storegateway
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"

"github.com/grafana/mimir/pkg/util"
"github.com/thanos-io/thanos/pkg/block/indexheader"
"github.com/thanos-io/thanos/pkg/extprom"
)

// BucketStoreMetrics holds all the metrics tracked by BucketStore. These metrics
Expand Down Expand Up @@ -47,6 +47,8 @@ type BucketStoreMetrics struct {

seriesFetchDuration prometheus.Histogram
postingsFetchDuration prometheus.Histogram

indexHeaderReaderMetrics *indexheader.ReaderPoolMetrics
}

func NewBucketStoreMetrics(reg prometheus.Registerer) *BucketStoreMetrics {
Expand Down Expand Up @@ -172,70 +174,7 @@ func NewBucketStoreMetrics(reg prometheus.Registerer) *BucketStoreMetrics {
},
})

return &m
}

// IndexHeaderMetrics aggregates metrics exported by Thanos index-header lazy loader
// and re-exports those aggregates as Mimir metrics.
type IndexHeaderMetrics struct {
regs *util.UserRegistries

indexHeaderLazyLoadCount *prometheus.Desc
indexHeaderLazyLoadFailedCount *prometheus.Desc
indexHeaderLazyUnloadCount *prometheus.Desc
indexHeaderLazyUnloadFailedCount *prometheus.Desc
indexHeaderLazyLoadDuration *prometheus.Desc
}

func NewIndexHeaderMetrics() *IndexHeaderMetrics {
return &IndexHeaderMetrics{
regs: util.NewUserRegistries(),

indexHeaderLazyLoadCount: prometheus.NewDesc(
"cortex_bucket_store_indexheader_lazy_load_total",
"Total number of index-header lazy load operations.",
nil, nil),
indexHeaderLazyLoadFailedCount: prometheus.NewDesc(
"cortex_bucket_store_indexheader_lazy_load_failed_total",
"Total number of failed index-header lazy load operations.",
nil, nil),
indexHeaderLazyUnloadCount: prometheus.NewDesc(
"cortex_bucket_store_indexheader_lazy_unload_total",
"Total number of index-header lazy unload operations.",
nil, nil),
indexHeaderLazyUnloadFailedCount: prometheus.NewDesc(
"cortex_bucket_store_indexheader_lazy_unload_failed_total",
"Total number of failed index-header lazy unload operations.",
nil, nil),
indexHeaderLazyLoadDuration: prometheus.NewDesc(
"cortex_bucket_store_indexheader_lazy_load_duration_seconds",
"Duration of the index-header lazy loading in seconds.",
nil, nil),
}
}
m.indexHeaderReaderMetrics = indexheader.NewReaderPoolMetrics(extprom.WrapRegistererWithPrefix("cortex_bucket_store_", reg))

func (m *IndexHeaderMetrics) AddUserRegistry(user string, reg *prometheus.Registry) {
m.regs.AddUserRegistry(user, reg)
}

func (m *IndexHeaderMetrics) RemoveUserRegistry(user string) {
m.regs.RemoveUserRegistry(user, false)
}

func (m *IndexHeaderMetrics) Describe(out chan<- *prometheus.Desc) {
out <- m.indexHeaderLazyLoadCount
out <- m.indexHeaderLazyLoadFailedCount
out <- m.indexHeaderLazyUnloadCount
out <- m.indexHeaderLazyUnloadFailedCount
out <- m.indexHeaderLazyLoadDuration
}

func (m *IndexHeaderMetrics) Collect(out chan<- prometheus.Metric) {
data := m.regs.BuildMetricFamiliesPerUser()

data.SendSumOfCounters(out, m.indexHeaderLazyLoadCount, "thanos_bucket_store_indexheader_lazy_load_total")
data.SendSumOfCounters(out, m.indexHeaderLazyLoadFailedCount, "thanos_bucket_store_indexheader_lazy_load_failed_total")
data.SendSumOfCounters(out, m.indexHeaderLazyUnloadCount, "thanos_bucket_store_indexheader_lazy_unload_total")
data.SendSumOfCounters(out, m.indexHeaderLazyUnloadFailedCount, "thanos_bucket_store_indexheader_lazy_unload_failed_total")
data.SendSumOfHistograms(out, m.indexHeaderLazyLoadDuration, "thanos_bucket_store_indexheader_lazy_load_duration_seconds")
return &m
}
145 changes: 0 additions & 145 deletions pkg/storegateway/bucket_store_metrics_test.go

This file was deleted.

8 changes: 1 addition & 7 deletions pkg/storegateway/bucket_stores.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ type BucketStores struct {
bucket objstore.Bucket
logLevel logging.Level
bucketStoreMetrics *BucketStoreMetrics
indexHeaderMetrics *IndexHeaderMetrics
metaFetcherMetrics *MetadataFetcherMetrics
shardingStrategy ShardingStrategy

Expand Down Expand Up @@ -107,7 +106,6 @@ func NewBucketStores(cfg tsdb.BlocksStorageConfig, shardingStrategy ShardingStra
stores: map[string]*BucketStore{},
logLevel: logLevel,
bucketStoreMetrics: NewBucketStoreMetrics(reg),
indexHeaderMetrics: NewIndexHeaderMetrics(),
metaFetcherMetrics: NewMetadataFetcherMetrics(),
queryGate: queryGate,
partitioner: newGapBasedPartitioner(cfg.BucketStore.PartitionerMaxGapBytes, reg),
Expand Down Expand Up @@ -148,7 +146,7 @@ func NewBucketStores(cfg tsdb.BlocksStorageConfig, shardingStrategy ShardingStra
}

if reg != nil {
reg.MustRegister(u.indexHeaderMetrics, u.metaFetcherMetrics)
reg.MustRegister(u.metaFetcherMetrics)
}

return u, nil
Expand Down Expand Up @@ -407,7 +405,6 @@ func (u *BucketStores) closeEmptyBucketStore(userID string) error {
u.storesMu.Unlock()

u.metaFetcherMetrics.RemoveUserRegistry(userID)
u.indexHeaderMetrics.RemoveUserRegistry(userID)
return bs.Close()
}

Expand Down Expand Up @@ -498,10 +495,8 @@ func (u *BucketStores) getOrCreateStore(userID string) (*BucketStore, error) {
}
}

bucketStoreReg := prometheus.NewRegistry()
bucketStoreOpts := []BucketStoreOption{
WithLogger(userLogger),
WithRegistry(bucketStoreReg),
WithIndexCache(u.indexCache),
WithQueryGate(u.queryGate),
WithChunkPool(u.chunksPool),
Expand Down Expand Up @@ -533,7 +528,6 @@ func (u *BucketStores) getOrCreateStore(userID string) (*BucketStore, error) {

u.stores[userID] = bs
u.metaFetcherMetrics.AddUserRegistry(userID, fetcherReg)
u.indexHeaderMetrics.AddUserRegistry(userID, bucketStoreReg)

return bs, nil
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/storegateway/bucket_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1421,7 +1421,7 @@ func TestBucketSeries_OneBlock_InMemIndexCacheSegfault(t *testing.T) {
bkt: objstore.WithNoopInstr(bkt),
logger: logger,
indexCache: indexCache,
indexReaderPool: indexheader.NewReaderPool(log.NewNopLogger(), false, 0, nil),
indexReaderPool: indexheader.NewReaderPool(log.NewNopLogger(), false, 0, indexheader.NewReaderPoolMetrics(nil)),
metrics: NewBucketStoreMetrics(nil),
blockSets: map[uint64]*bucketBlockSet{
labels.Labels{{Name: "ext1", Value: "1"}}.Hash(): {blocks: [][]*bucketBlock{{b1, b2}}},
Expand Down
Loading

0 comments on commit 628cbe6

Please sign in to comment.