thanos-io · fpetkovski · Dec 1, 2022 · Dec 2, 2022 · Dec 2, 2022 · Dec 2, 2022
diff --git a/pkg/compact/compact.go b/pkg/compact/compact.go
@@ -542,26 +542,28 @@ func (ps *CompactionProgressCalculator) ProgressCalculate(ctx context.Context, g
 			if len(plan) == 0 {
 				continue
 			}
-			groupCompactions[g.key]++
 
-			toRemove := make(map[ulid.ULID]struct{}, len(plan))
-			metas := make([]*tsdb.BlockMeta, 0, len(plan))
-			for _, p := range plan {
-				metas = append(metas, &p.BlockMeta)
-				toRemove[p.BlockMeta.ULID] = struct{}{}
-			}
-			g.deleteFromGroup(toRemove)
+			for _, groupTask := range plan {
+				groupCompactions[g.key]++
+				toRemove := make(map[ulid.ULID]struct{}, len(groupTask))
+				metas := make([]*tsdb.BlockMeta, 0, len(groupTask))
+				for _, meta := range groupTask {
+					metas = append(metas, &meta.BlockMeta)
+					toRemove[meta.BlockMeta.ULID] = struct{}{}
+				}
+				g.deleteFromGroup(toRemove)
+				groupBlocks[g.key] += len(groupTask)
 
-			groupBlocks[g.key] += len(plan)
+				newMeta := tsdb.CompactBlockMetas(ulid.MustNew(uint64(time.Now().Unix()), nil), metas...)
+				if err := g.AppendMeta(&metadata.Meta{BlockMeta: *newMeta, Thanos: metadata.Thanos{Downsample: metadata.ThanosDownsample{Resolution: g.Resolution()}, Labels: g.Labels().Map()}}); err != nil {
+					return errors.Wrapf(err, "append meta")
+				}
+			}
 
 			if len(g.metasByMinTime) == 0 {
 				continue
 			}
 
-			newMeta := tsdb.CompactBlockMetas(ulid.MustNew(uint64(time.Now().Unix()), nil), metas...)
-			if err := g.AppendMeta(&metadata.Meta{BlockMeta: *newMeta, Thanos: metadata.Thanos{Downsample: metadata.ThanosDownsample{Resolution: g.Resolution()}, Labels: g.Labels().Map()}}); err != nil {
-				return errors.Wrapf(err, "append meta")
-			}
 			tmpGroups = append(tmpGroups, g)
 		}
 
@@ -727,7 +729,7 @@ func (rs *RetentionProgressCalculator) ProgressCalculate(ctx context.Context, gr
 type Planner interface {
 	// Plan returns a list of blocks that should be compacted into single one.
 	// The blocks can be overlapping. The provided metadata has to be ordered by minTime.
-	Plan(ctx context.Context, metasByMinTime []*metadata.Meta) ([]*metadata.Meta, error)
+	Plan(ctx context.Context, metasByMinTime []*metadata.Meta) ([]CompactionTask, error)
 }
 
 // Compactor provides compaction against an underlying storage of time series data.
@@ -751,7 +753,7 @@ type Compactor interface {
 
 // Compact plans and runs a single compaction against the group. The compacted result
 // is uploaded into the bucket the blocks were retrieved from.
-func (cg *Group) Compact(ctx context.Context, dir string, planner Planner, comp Compactor) (shouldRerun bool, compID ulid.ULID, rerr error) {
+func (cg *Group) Compact(ctx context.Context, dir string, planner Planner, comp Compactor) (shouldRerun bool, rerr error) {
 	cg.compactionRunsStarted.Inc()
 
 	subDir := filepath.Join(dir, cg.Key())
@@ -768,19 +770,19 @@ func (cg *Group) Compact(ctx context.Context, dir string, planner Planner, comp
 	}()
 
 	if err := os.MkdirAll(subDir, 0750); err != nil {
-		return false, ulid.ULID{}, errors.Wrap(err, "create compaction group dir")
+		return false, errors.Wrap(err, "create compaction group dir")
 	}
 
 	err := tracing.DoInSpanWithErr(ctx, "compaction_group", func(ctx context.Context) (err error) {
-		shouldRerun, compID, err = cg.compact(ctx, subDir, planner, comp)
+		shouldRerun, err = cg.compact(ctx, subDir, planner, comp)
 		return err
 	}, opentracing.Tags{"group.key": cg.Key()})
 	if err != nil {
 		cg.compactionFailures.Inc()
-		return false, ulid.ULID{}, err
+		return false, err
 	}
 	cg.compactionRunsCompleted.Inc()
-	return shouldRerun, compID, nil
+	return shouldRerun, nil
 }
 
 // Issue347Error is a type wrapper for errors that should invoke repair process for broken block.
@@ -974,7 +976,7 @@ func RepairIssue347(ctx context.Context, logger log.Logger, bkt objstore.Bucket,
 	return nil
 }
 
-func (cg *Group) compact(ctx context.Context, dir string, planner Planner, comp Compactor) (shouldRerun bool, compID ulid.ULID, _ error) {
+func (cg *Group) compact(ctx context.Context, dir string, planner Planner, comp Compactor) (shouldRerun bool, _ error) {
 	cg.mtx.Lock()
 	defer cg.mtx.Unlock()
 
@@ -984,45 +986,74 @@ func (cg *Group) compact(ctx context.Context, dir string, planner Planner, comp
 		// TODO(bwplotka): It would really nice if we could still check for other overlaps than replica. In fact this should be checked
 		// in syncer itself. Otherwise with vertical compaction enabled we will sacrifice this important check.
 		if !cg.enableVerticalCompaction {
-			return false, ulid.ULID{}, halt(errors.Wrap(err, "pre compaction overlap check"))
+			return false, halt(errors.Wrap(err, "pre compaction overlap check"))
 		}
 
 		overlappingBlocks = true
 	}
 
-	var toCompact []*metadata.Meta
+	var tasks []CompactionTask
 	if err := tracing.DoInSpanWithErr(ctx, "compaction_planning", func(ctx context.Context) (e error) {
-		toCompact, e = planner.Plan(ctx, cg.metasByMinTime)
+		tasks, e = planner.Plan(ctx, cg.metasByMinTime)
 		return e
 	}); err != nil {
-		return false, ulid.ULID{}, errors.Wrap(err, "plan compaction")
+		return false, errors.Wrap(err, "plan compaction")
 	}
-	if len(toCompact) == 0 {
+	if len(tasks) == 0 {
 		// Nothing to do.
-		return false, ulid.ULID{}, nil
+		return false, nil
 	}
 
-	level.Info(cg.logger).Log("msg", "compaction available and planned; downloading blocks", "plan", fmt.Sprintf("%v", toCompact))
-
 	// Due to #183 we verify that none of the blocks in the plan have overlapping sources.
 	// This is one potential source of how we could end up with duplicated chunks.
 	uniqueSources := map[ulid.ULID]struct{}{}
+	for _, task := range tasks {
+		for _, m := range task {
+			for _, s := range m.Compaction.Sources {
+				if _, ok := uniqueSources[s]; ok {
+					return false, halt(errors.Errorf("overlapping sources detected for plan %v", task))
+				}
+				uniqueSources[s] = struct{}{}
+			}
+		}
+	}
+
+	level.Info(cg.logger).Log("msg", "compaction available and planned; downloading blocks", "plan", fmt.Sprintf("%v", tasks))
+
+	var (
+		wg         sync.WaitGroup
+		mu         sync.Mutex
+		groupErr   errutil.MultiError
+		rerunGroup bool
+	)
+	for _, task := range tasks {
+		wg.Add(1)
+		go func(task CompactionTask) {
+			defer wg.Done()
+			rerunTask, err := cg.compactBlocks(ctx, dir, task, comp, overlappingBlocks)
+
+			mu.Lock()
+			defer mu.Unlock()
+			rerunGroup = rerunGroup || rerunTask
+			groupErr.Add(err)
+		}(task)
+	}
+	wg.Wait()
+
+	return rerunGroup, groupErr.Err()
+}
 
+func (cg *Group) compactBlocks(ctx context.Context, dir string, task CompactionTask, comp Compactor, overlappingBlocks bool) (bool, error) {
 	// Once we have a plan we need to download the actual data.
-	groupCompactionBegin := time.Now()
-	begin := groupCompactionBegin
+	compactionBegin := time.Now()
+	begin := compactionBegin
+
 	g, errCtx := errgroup.WithContext(ctx)
 	g.SetLimit(cg.compactBlocksFetchConcurrency)
 
-	toCompactDirs := make([]string, 0, len(toCompact))
-	for _, m := range toCompact {
+	toCompactDirs := make([]string, 0, len(task))
+	for _, m := range task {
 		bdir := filepath.Join(dir, m.ULID.String())
-		for _, s := range m.Compaction.Sources {
-			if _, ok := uniqueSources[s]; ok {
-				return false, ulid.ULID{}, halt(errors.Errorf("overlapping sources detected for plan %v", toCompact))
-			}
-			uniqueSources[s] = struct{}{}
-		}
 		func(ctx context.Context, meta *metadata.Meta) {
 			g.Go(func() error {
 				if err := tracing.DoInSpanWithErr(ctx, "compaction_block_download", func(ctx context.Context) error {
@@ -1062,33 +1093,35 @@ func (cg *Group) compact(ctx context.Context, dir string, planner Planner, comp
 
 		toCompactDirs = append(toCompactDirs, bdir)
 	}
+
 	sourceBlockStr := fmt.Sprintf("%v", toCompactDirs)
 
 	if err := g.Wait(); err != nil {
-		return false, ulid.ULID{}, err
+		return false, err
 	}
 
 	level.Info(cg.logger).Log("msg", "downloaded and verified blocks; compacting blocks", "plan", sourceBlockStr, "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds())
 
+	var compID ulid.ULID
 	begin = time.Now()
 	if err := tracing.DoInSpanWithErr(ctx, "compaction", func(ctx context.Context) (e error) {
 		compID, e = comp.Compact(dir, toCompactDirs, nil)
 		return e
 	}); err != nil {
-		return false, ulid.ULID{}, halt(errors.Wrapf(err, "compact blocks %v", toCompactDirs))
+		return false, halt(errors.Wrapf(err, "compact blocks %v", toCompactDirs))
 	}
 	if compID == (ulid.ULID{}) {
 		// Prometheus compactor found that the compacted block would have no samples.
 		level.Info(cg.logger).Log("msg", "compacted block would have no samples, deleting source blocks", "blocks", sourceBlockStr)
-		for _, meta := range toCompact {
+		for _, meta := range task {
 			if meta.Stats.NumSamples == 0 {
 				if err := cg.deleteBlock(meta.ULID, filepath.Join(dir, meta.ULID.String())); err != nil {
 					level.Warn(cg.logger).Log("msg", "failed to mark for deletion an empty block found during compaction", "block", meta.ULID)
 				}
 			}
 		}
 		// Even though this block was empty, there may be more work to do.
-		return true, ulid.ULID{}, nil
+		return true, nil
 	}
 	cg.compactions.Inc()
 	if overlappingBlocks {
@@ -1107,26 +1140,26 @@ func (cg *Group) compact(ctx context.Context, dir string, planner Planner, comp
 		SegmentFiles: block.GetSegmentFiles(bdir),
 	}, nil)
 	if err != nil {
-		return false, ulid.ULID{}, errors.Wrapf(err, "failed to finalize the block %s", bdir)
+		return false, errors.Wrapf(err, "failed to finalize the block %s", bdir)
 	}
 
 	if err = os.Remove(filepath.Join(bdir, "tombstones")); err != nil {
-		return false, ulid.ULID{}, errors.Wrap(err, "remove tombstones")
+		return false, errors.Wrap(err, "remove tombstones")
 	}
 
 	// Ensure the output block is valid.
 	err = tracing.DoInSpanWithErr(ctx, "compaction_verify_index", func(ctx context.Context) error {
 		return block.VerifyIndex(cg.logger, index, newMeta.MinTime, newMeta.MaxTime)
 	})
 	if !cg.acceptMalformedIndex && err != nil {
-		return false, ulid.ULID{}, halt(errors.Wrapf(err, "invalid result block %s", bdir))
+		return false, halt(errors.Wrapf(err, "invalid result block %s", bdir))
 	}
 
 	// Ensure the output block is not overlapping with anything else,
 	// unless vertical compaction is enabled.
 	if !cg.enableVerticalCompaction {
-		if err := cg.areBlocksOverlapping(newMeta, toCompact...); err != nil {
-			return false, ulid.ULID{}, halt(errors.Wrapf(err, "resulted compacted block %s overlaps with something", bdir))
+		if err := cg.areBlocksOverlapping(newMeta, task...); err != nil {
+			return false, halt(errors.Wrapf(err, "resulted compacted block %s overlaps with something", bdir))
 		}
 	}
 
@@ -1136,26 +1169,26 @@ func (cg *Group) compact(ctx context.Context, dir string, planner Planner, comp
 		return block.Upload(ctx, cg.logger, cg.bkt, bdir, cg.hashFunc, objstore.WithUploadConcurrency(cg.blockFilesConcurrency))
 	})
 	if err != nil {
-		return false, ulid.ULID{}, retry(errors.Wrapf(err, "upload of %s failed", compID))
+		return false, retry(errors.Wrapf(err, "upload of %s failed", compID))
 	}
 	level.Info(cg.logger).Log("msg", "uploaded block", "result_block", compID, "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds())
 
 	// Mark for deletion the blocks we just compacted from the group and bucket so they do not get included
 	// into the next planning cycle.
 	// Eventually the block we just uploaded should get synced into the group again (including sync-delay).
-	for _, meta := range toCompact {
+	for _, meta := range task {
 		err = tracing.DoInSpanWithErr(ctx, "compaction_block_delete", func(ctx context.Context) error {
 			return cg.deleteBlock(meta.ULID, filepath.Join(dir, meta.ULID.String()))
 		}, opentracing.Tags{"block.id": meta.ULID})
 		if err != nil {
-			return false, ulid.ULID{}, retry(errors.Wrapf(err, "mark old block for deletion from bucket"))
+			return false, retry(errors.Wrapf(err, "mark old block for deletion from bucket"))
 		}
 		cg.groupGarbageCollectedBlocks.Inc()
 	}
 
 	level.Info(cg.logger).Log("msg", "finished compacting blocks", "result_block", compID, "source_blocks", sourceBlockStr,
-		"duration", time.Since(groupCompactionBegin), "duration_ms", time.Since(groupCompactionBegin).Milliseconds())
-	return true, compID, nil
+		"duration", time.Since(compactionBegin), "duration_ms", time.Since(compactionBegin).Milliseconds())
+	return true, nil
 }
 
 func (cg *Group) deleteBlock(id ulid.ULID, bdir string) error {
@@ -1247,43 +1280,57 @@ func (c *BucketCompactor) Compact(ctx context.Context) (rerr error) {
 			go func() {
 				defer wg.Done()
 				for g := range groupChan {
-					shouldRerunGroup, _, err := g.Compact(workCtx, c.compactDir, c.planner, c.comp)
-					if err == nil {
+					shouldRerunGroup, compactErrs := g.Compact(workCtx, c.compactDir, c.planner, c.comp)
+					if compactErrs == nil {
 						if shouldRerunGroup {
 							mtx.Lock()
 							finishedAllGroups = false
 							mtx.Unlock()
 						}
 						continue
 					}
+					errs, ok := compactErrs.(errutil.NonNilMultiError)
+					if !ok {
+						errs = []error{compactErrs}
+					}
 
-					if IsIssue347Error(err) {
-						if err := RepairIssue347(workCtx, c.logger, c.bkt, c.sy.metrics.blocksMarkedForDeletion, err); err == nil {
-							mtx.Lock()
-							finishedAllGroups = false
-							mtx.Unlock()
-							continue
+					var nonRecoverableErrs errutil.MultiError
+					for _, err := range errs {
+						if IsIssue347Error(err) {
+							if err := RepairIssue347(workCtx, c.logger, c.bkt, c.sy.metrics.blocksMarkedForDeletion, err); err == nil {
+								mtx.Lock()
+								finishedAllGroups = false
+								mtx.Unlock()
+								continue
+							}
 						}
-					}
-					// If block has out of order chunk and it has been configured to skip it,
-					// then we can mark the block for no compaction so that the next compaction run
-					// will skip it.
-					if IsOutOfOrderChunkError(err) && c.skipBlocksWithOutOfOrderChunks {
-						if err := block.MarkForNoCompact(
-							ctx,
-							c.logger,
-							c.bkt,
-							err.(OutOfOrderChunksError).id,
-							metadata.OutOfOrderChunksNoCompactReason,
-							"OutofOrderChunk: marking block with out-of-order series/chunks to as no compact to unblock compaction", g.blocksMarkedForNoCompact); err == nil {
-							mtx.Lock()
-							finishedAllGroups = false
-							mtx.Unlock()
-							continue
+
+						// If block has out of order chunk and it has been configured to skip it,
+						// then we can mark the block for no compaction so that the next compaction run
+						// will skip it.
+						if IsOutOfOrderChunkError(err) && c.skipBlocksWithOutOfOrderChunks {
+							if err := block.MarkForNoCompact(
+								ctx,
+								c.logger,
+								c.bkt,
+								err.(OutOfOrderChunksError).id,
+								metadata.OutOfOrderChunksNoCompactReason,
+								"OutofOrderChunk: marking block with out-of-order series/chunks to as no compact to unblock compaction", g.blocksMarkedForNoCompact,
+							); err == nil {
+								mtx.Lock()
+								finishedAllGroups = false
+								mtx.Unlock()
+								continue
+							}
 						}
+
+						nonRecoverableErrs.Add(err)
+					}
+
+					if nonRecoverableErrs.Err() != nil {
+						errChan <- errors.Wrapf(nonRecoverableErrs.Err(), "group %s", g.Key())
+						return
 					}
-					errChan <- errors.Wrapf(err, "group %s", g.Key())
-					return
 				}
 			}()
 		}

diff --git a/pkg/compact/compact_test.go b/pkg/compact/compact_test.go
@@ -451,6 +451,21 @@ func TestCompactProgressCalculate(t *testing.T) {
 				},
 			},
 		},
+		{
+			testName: "multiple_vertical_compactions",
+			input: []*metadata.Meta{
+				createBlockMeta(1, 0, 10, map[string]string{"a": "1"}, 0, []uint64{}),
+				createBlockMeta(2, 5, 15, map[string]string{"a": "1"}, 0, []uint64{}),
+				createBlockMeta(3, 20, 30, map[string]string{"a": "1"}, 0, []uint64{}),
+				createBlockMeta(4, 25, 40, map[string]string{"a": "1"}, 0, []uint64{}),
+			},
+			expected: map[string]planResult{
+				keys[0]: {
+					compactionRuns:   2.0,
+					compactionBlocks: 4.0,
+				},
+			},
+		},
 	} {
 		if ok := t.Run(tcase.testName, func(t *testing.T) {
 			blocks := make(map[ulid.ULID]*metadata.Meta, len(tcase.input))