Skip to content

Commit

Permalink
add offline dedup compactor
Browse files Browse the repository at this point in the history
Signed-off-by: yeya24 <[email protected]>
  • Loading branch information
yeya24 committed May 18, 2021
1 parent eacc686 commit 6884bbb
Show file tree
Hide file tree
Showing 11 changed files with 588 additions and 52 deletions.
23 changes: 22 additions & 1 deletion cmd/thanos/compact.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,16 @@ import (
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/common/model"
"github.com/prometheus/common/route"
"github.com/prometheus/prometheus/storage"
"github.com/prometheus/prometheus/tsdb"

blocksAPI "github.com/thanos-io/thanos/pkg/api/blocks"
"github.com/thanos-io/thanos/pkg/block"
"github.com/thanos-io/thanos/pkg/block/metadata"
"github.com/thanos-io/thanos/pkg/compact"
"github.com/thanos-io/thanos/pkg/compact/downsample"
"github.com/thanos-io/thanos/pkg/component"
"github.com/thanos-io/thanos/pkg/dedup"
"github.com/thanos-io/thanos/pkg/extflag"
"github.com/thanos-io/thanos/pkg/extkingpin"
"github.com/thanos-io/thanos/pkg/extprom"
Expand Down Expand Up @@ -302,9 +305,20 @@ func runCompact(
cancel()
}
}()

var mergeFunc storage.VerticalChunkSeriesMergeFunc
switch conf.dedupFunc {
case compact.DedupAlgorithmPenalty:
mergeFunc = dedup.NewDedupChunkSeriesMerger()

if len(conf.dedupReplicaLabels) == 0 {
return errors.New("penalty based deduplication needs at least one replica label specified")
}
}

// Instantiate the compactor with different time slices. Timestamps in TSDB
// are in milliseconds.
comp, err := tsdb.NewLeveledCompactor(ctx, reg, logger, levels, downsample.NewPool())
comp, err := tsdb.NewLeveledCompactor(ctx, reg, logger, levels, downsample.NewPool(), mergeFunc)
if err != nil {
return errors.Wrap(err, "create compactor")
}
Expand Down Expand Up @@ -564,6 +578,7 @@ type compactConfig struct {
maxBlockIndexSize units.Base2Bytes
hashFunc string
enableVerticalCompaction bool
dedupFunc string
}

func (cc *compactConfig) registerFlag(cmd extkingpin.FlagClause) {
Expand Down Expand Up @@ -627,6 +642,12 @@ func (cc *compactConfig) registerFlag(cmd extkingpin.FlagClause) {
"NOTE: This flag is ignored and (enabled) when --deduplication.replica-label flag is set.").
Hidden().Default("false").BoolVar(&cc.enableVerticalCompaction)

cmd.Flag("compact.dedup-func", "Experimental. Deduplication algorithm for merging overlapping blocks. "+
"Possible values are: \"\", \"penalty\". If no value is specified, the default compact deduplication merger is used, which performs 1:1 deduplication for samples. "+
"When set to penalty, penalty based deduplication algorithm will be used. At least one replica label has to be set via --deduplication.replica-label flag.").
Default("").EnumVar(&cc.dedupFunc, compact.DedupAlgorithmPenalty, "")

// Update this. This flag works for both dedup version compactor and the original compactor.
cmd.Flag("deduplication.replica-label", "Label to treat as a replica indicator of blocks that can be deduplicated (repeated flag). This will merge multiple replica blocks into one. This process is irreversible."+
"Experimental. When it is set to true, compactor will ignore the given labels so that vertical compaction can merge the blocks."+
"Please note that this uses a NAIVE algorithm for merging (no smart replica deduplication, just chaining samples together)."+
Expand Down
9 changes: 9 additions & 0 deletions docs/components/compact.md
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,15 @@ Flags:
happen at the end of an iteration.
--compact.concurrency=1 Number of goroutines to use when compacting
groups.
--compact.dedup-func= Experimental. Deduplication algorithm for
merging overlapping blocks. Possible values are:
"", "penalty". If no value is specified, the
default compact deduplication merger is used,
which performs 1:1 deduplication for samples.
When set to penalty, penalty based deduplication
algorithm will be used. At least one replica
label has to be set via
--deduplication.replica-label flag.
--consistency-delay=30m Minimum age of fresh (non-compacted) blocks
before they are being processed. Malformed
blocks older than the maximum of
Expand Down
24 changes: 12 additions & 12 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
module github.com/thanos-io/thanos

require (
cloud.google.com/go v0.79.0
cloud.google.com/go v0.81.0
cloud.google.com/go/storage v1.10.0
github.com/Azure/azure-pipeline-go v0.2.2
github.com/Azure/azure-storage-blob-go v0.8.0
Expand All @@ -21,7 +21,7 @@ require (
github.com/felixge/fgprof v0.9.1
github.com/fsnotify/fsnotify v1.4.9
github.com/go-kit/kit v0.10.0
github.com/go-openapi/strfmt v0.20.0
github.com/go-openapi/strfmt v0.20.1
github.com/gogo/protobuf v1.3.2
github.com/gogo/status v1.0.3
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e
Expand Down Expand Up @@ -50,24 +50,24 @@ require (
github.com/prometheus/alertmanager v0.21.1-0.20210422101724-8176f78a70e1
github.com/prometheus/client_golang v1.10.0
github.com/prometheus/client_model v0.2.0
github.com/prometheus/common v0.21.0
github.com/prometheus/common v0.23.0
github.com/prometheus/exporter-toolkit v0.5.1
github.com/prometheus/prometheus v1.8.2-0.20210421143221-52df5ef7a3be
github.com/uber/jaeger-client-go v2.25.0+incompatible
github.com/uber/jaeger-lib v2.4.0+incompatible
github.com/prometheus/prometheus v1.8.2-0.20210518163837-0a8912433a45
github.com/uber/jaeger-client-go v2.28.0+incompatible
github.com/uber/jaeger-lib v2.4.1+incompatible
github.com/weaveworks/common v0.0.0-20210419092856-009d1eebd624
go.elastic.co/apm v1.11.0
go.elastic.co/apm/module/apmot v1.11.0
go.uber.org/atomic v1.7.0
go.uber.org/automaxprocs v1.2.0
go.uber.org/goleak v1.1.10
golang.org/x/crypto v0.0.0-20210220033148-5ea612d1eb83
golang.org/x/oauth2 v0.0.0-20210323180902-22b0adad7558
golang.org/x/oauth2 v0.0.0-20210427180440-81ed05c6b58c
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c
golang.org/x/text v0.3.5
google.golang.org/api v0.42.0
google.golang.org/genproto v0.0.0-20210312152112-fc591d9ea70f
google.golang.org/grpc v1.36.0
golang.org/x/text v0.3.6
google.golang.org/api v0.46.0
google.golang.org/genproto v0.0.0-20210429181445-86c259c2b4ab
google.golang.org/grpc v1.37.0
gopkg.in/alecthomas/kingpin.v2 v2.2.6
gopkg.in/fsnotify.v1 v1.4.7
gopkg.in/yaml.v2 v2.4.0
Expand All @@ -84,7 +84,7 @@ replace (
// TODO: Remove this: https://github.com/thanos-io/thanos/issues/3967.
github.com/minio/minio-go/v7 => github.com/bwplotka/minio-go/v7 v7.0.11-0.20210324165441-f9927e5255a6
// Make sure Prometheus version is pinned as Prometheus semver does not include Go APIs.
github.com/prometheus/prometheus => github.com/prometheus/prometheus v1.8.2-0.20210421143221-52df5ef7a3be
github.com/prometheus/prometheus => github.com/prometheus/prometheus v1.8.2-0.20210518163837-0a8912433a45
github.com/sercand/kuberesolver => github.com/sercand/kuberesolver v2.4.0+incompatible
google.golang.org/grpc => google.golang.org/grpc v1.29.1

Expand Down
Loading

0 comments on commit 6884bbb

Please sign in to comment.