diff --git a/br/pkg/config/kv.go b/br/pkg/config/kv.go index e211887099854..f1f3752a3085c 100644 --- a/br/pkg/config/kv.go +++ b/br/pkg/config/kv.go @@ -7,7 +7,7 @@ import ( "github.com/docker/go-units" ) -type ConfigTerm[T uint | uint64] struct { +type ConfigTerm[T any] struct { Value T Modified bool } @@ -16,6 +16,7 @@ type KVConfig struct { ImportGoroutines ConfigTerm[uint] MergeRegionSize ConfigTerm[uint64] MergeRegionKeyCount ConfigTerm[uint64] + SplitRegionOnTable ConfigTerm[bool] } func ParseImportThreadsFromConfig(resp []byte) (uint, error) { @@ -35,6 +36,19 @@ func ParseImportThreadsFromConfig(resp []byte) (uint, error) { return c.Import.Threads, nil } +func ParseSplitRegionOnTable(resp []byte) (bool, error) { + type coprocessor struct { + SplitRegionOnTable bool `json:"split-region-on-table"` + } + + type config struct { + Cop coprocessor `json:"coprocessor"` + } + var c config + e := json.Unmarshal(resp, &c) + return c.Cop.SplitRegionOnTable, e +} + func ParseMergeRegionSizeFromConfig(resp []byte) (uint64, uint64, error) { type coprocessor struct { RegionSplitSize string `json:"region-split-size"` diff --git a/br/pkg/conn/conn.go b/br/pkg/conn/conn.go index cdb81a011c8a5..c6f9f9da6b137 100644 --- a/br/pkg/conn/conn.go +++ b/br/pkg/conn/conn.go @@ -313,6 +313,7 @@ func (mgr *Mgr) ProcessTiKVConfigs(ctx context.Context, cfg *kvconfig.KVConfig, mergeRegionSize := cfg.MergeRegionSize mergeRegionKeyCount := cfg.MergeRegionKeyCount importGoroutines := cfg.ImportGoroutines + splitRegionOnTable := cfg.SplitRegionOnTable if mergeRegionSize.Modified && mergeRegionKeyCount.Modified && importGoroutines.Modified { log.Info("no need to retrieve the config from tikv if user has set the config") @@ -324,6 +325,14 @@ func (mgr *Mgr) ProcessTiKVConfigs(ctx context.Context, cfg *kvconfig.KVConfig, if err != nil { return err } + if !splitRegionOnTable.Modified { + splitTable, e := kvconfig.ParseSplitRegionOnTable(respBytes) + if e != nil { + log.Warn("Failed to parse split region on table from config", logutil.ShortError(e)) + return e + } + splitRegionOnTable.Value = splitRegionOnTable.Value || splitTable + } if !mergeRegionSize.Modified || !mergeRegionKeyCount.Modified { size, keys, e := kvconfig.ParseMergeRegionSizeFromConfig(respBytes) if e != nil { @@ -347,6 +356,7 @@ func (mgr *Mgr) ProcessTiKVConfigs(ctx context.Context, cfg *kvconfig.KVConfig, } } // replace the value + cfg.SplitRegionOnTable = splitRegionOnTable cfg.MergeRegionSize = mergeRegionSize cfg.MergeRegionKeyCount = mergeRegionKeyCount cfg.ImportGoroutines = importGoroutines diff --git a/br/pkg/restore/internal/log_split/BUILD.bazel b/br/pkg/restore/internal/log_split/BUILD.bazel index ffe8b4f5f1dea..d929b04c003ad 100644 --- a/br/pkg/restore/internal/log_split/BUILD.bazel +++ b/br/pkg/restore/internal/log_split/BUILD.bazel @@ -10,10 +10,9 @@ go_library( visibility = ["//br/pkg/restore:__subpackages__"], deps = [ "//br/pkg/logutil", - "//br/pkg/restore/internal/utils", + "//br/pkg/restore/internal/snap_split", "//br/pkg/restore/split", "//br/pkg/restore/utils", - "//br/pkg/rtree", "//br/pkg/utils", "//pkg/kv", "//pkg/tablecodec", @@ -40,7 +39,7 @@ go_test( flaky = True, shard_count = 4, deps = [ - "//br/pkg/restore/internal/utils", + "//br/pkg/restore/internal/snap_split", "//br/pkg/restore/split", "//br/pkg/restore/utils", "//br/pkg/utiltest", diff --git a/br/pkg/restore/internal/log_split/split.go b/br/pkg/restore/internal/log_split/split.go index 1c2319d513425..eb9b3165ce761 100644 --- a/br/pkg/restore/internal/log_split/split.go +++ b/br/pkg/restore/internal/log_split/split.go @@ -24,10 +24,9 @@ import ( "github.com/pingcap/errors" backuppb "github.com/pingcap/kvproto/pkg/brpb" "github.com/pingcap/log" - "github.com/pingcap/tidb/br/pkg/restore/internal/utils" + snapsplit "github.com/pingcap/tidb/br/pkg/restore/internal/snap_split" "github.com/pingcap/tidb/br/pkg/restore/split" restoreutils "github.com/pingcap/tidb/br/pkg/restore/utils" - "github.com/pingcap/tidb/br/pkg/rtree" "github.com/pingcap/tidb/pkg/tablecodec" "github.com/pingcap/tidb/pkg/util" "github.com/pingcap/tidb/pkg/util/codec" @@ -139,11 +138,11 @@ func (helper *LogSplitHelper) Merge(file *backuppb.DataFileInfo) { }) } -type splitFunc = func(context.Context, *utils.RegionSplitter, uint64, int64, *split.RegionInfo, []Valued) error +type splitFunc = func(context.Context, *snapsplit.RegionSplitter, uint64, int64, *split.RegionInfo, []Valued) error func (helper *LogSplitHelper) splitRegionByPoints( ctx context.Context, - regionSplitter *utils.RegionSplitter, + regionSplitter *snapsplit.RegionSplitter, initialLength uint64, initialNumber int64, region *split.RegionInfo, @@ -176,14 +175,10 @@ func (helper *LogSplitHelper) splitRegionByPoints( newRegions, errSplit := regionSplitter.SplitWaitAndScatter(ctx, region, splitPoints) if errSplit != nil { log.Warn("failed to split the scaned region", zap.Error(errSplit)) - _, startKey, _ := codec.DecodeBytes(region.Region.StartKey, nil) - ranges := make([]rtree.Range, 0, len(splitPoints)) - for _, point := range splitPoints { - ranges = append(ranges, rtree.Range{StartKey: startKey, EndKey: point}) - startKey = point - } - - return regionSplitter.ExecuteSplit(ctx, ranges) + sort.Slice(splitPoints, func(i, j int) bool { + return bytes.Compare(splitPoints[i], splitPoints[j]) < 0 + }) + return regionSplitter.ExecuteSplit(ctx, splitPoints) } select { case <-ctx.Done(): @@ -205,7 +200,7 @@ func SplitPoint( ) (err error) { // common status var ( - regionSplitter *utils.RegionSplitter = utils.NewRegionSplitter(client) + regionSplitter *snapsplit.RegionSplitter = snapsplit.NewRegionSplitter(client) ) // region traverse status var ( @@ -357,7 +352,7 @@ func (helper *LogSplitHelper) Split(ctx context.Context) error { } } - regionSplitter := utils.NewRegionSplitter(helper.client) + regionSplitter := snapsplit.NewRegionSplitter(helper.client) // It is too expensive to stop recovery and wait for a small number of regions // to complete scatter, so the maximum waiting time is reduced to 1 minute. _ = regionSplitter.WaitForScatterRegionsTimeout(ctx, scatterRegions, time.Minute) diff --git a/br/pkg/restore/internal/log_split/split_test.go b/br/pkg/restore/internal/log_split/split_test.go index 578d9eefb447d..acbf61ae12f29 100644 --- a/br/pkg/restore/internal/log_split/split_test.go +++ b/br/pkg/restore/internal/log_split/split_test.go @@ -23,7 +23,7 @@ import ( backuppb "github.com/pingcap/kvproto/pkg/brpb" "github.com/pingcap/kvproto/pkg/import_sstpb" logsplit "github.com/pingcap/tidb/br/pkg/restore/internal/log_split" - "github.com/pingcap/tidb/br/pkg/restore/internal/utils" + snapsplit "github.com/pingcap/tidb/br/pkg/restore/internal/snap_split" "github.com/pingcap/tidb/br/pkg/restore/split" restoreutils "github.com/pingcap/tidb/br/pkg/restore/utils" "github.com/pingcap/tidb/br/pkg/utiltest" @@ -66,7 +66,7 @@ func TestSplitPoint(t *testing.T) { client.AppendRegion(keyWithTablePrefix(tableID, "j"), keyWithTablePrefix(tableID+1, "a")) iter := logsplit.NewSplitHelperIteratorForTest(splitHelper, tableID, rewriteRules) - err := logsplit.SplitPoint(ctx, iter, client, func(ctx context.Context, rs *utils.RegionSplitter, u uint64, o int64, ri *split.RegionInfo, v []logsplit.Valued) error { + err := logsplit.SplitPoint(ctx, iter, client, func(ctx context.Context, rs *snapsplit.RegionSplitter, u uint64, o int64, ri *split.RegionInfo, v []logsplit.Valued) error { require.Equal(t, u, uint64(0)) require.Equal(t, o, int64(0)) require.Equal(t, ri.Region.StartKey, keyWithTablePrefix(tableID, "a")) @@ -124,7 +124,7 @@ func TestSplitPoint2(t *testing.T) { firstSplit := true iter := logsplit.NewSplitHelperIteratorForTest(splitHelper, tableID, rewriteRules) - err := logsplit.SplitPoint(ctx, iter, client, func(ctx context.Context, rs *utils.RegionSplitter, u uint64, o int64, ri *split.RegionInfo, v []logsplit.Valued) error { + err := logsplit.SplitPoint(ctx, iter, client, func(ctx context.Context, rs *snapsplit.RegionSplitter, u uint64, o int64, ri *split.RegionInfo, v []logsplit.Valued) error { if firstSplit { require.Equal(t, u, uint64(0)) require.Equal(t, o, int64(0)) diff --git a/br/pkg/restore/internal/utils/BUILD.bazel b/br/pkg/restore/internal/snap_split/BUILD.bazel similarity index 73% rename from br/pkg/restore/internal/utils/BUILD.bazel rename to br/pkg/restore/internal/snap_split/BUILD.bazel index 9d791b3d5fefd..ab6df360220d6 100644 --- a/br/pkg/restore/internal/utils/BUILD.bazel +++ b/br/pkg/restore/internal/snap_split/BUILD.bazel @@ -1,16 +1,12 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") go_library( - name = "utils", + name = "snap_split", srcs = ["split.go"], - importpath = "github.com/pingcap/tidb/br/pkg/restore/internal/utils", + importpath = "github.com/pingcap/tidb/br/pkg/restore/internal/snap_split", visibility = ["//br/pkg/restore:__subpackages__"], deps = [ - "//br/pkg/errors", - "//br/pkg/logutil", "//br/pkg/restore/split", - "//br/pkg/rtree", - "@com_github_opentracing_opentracing_go//:opentracing-go", "@com_github_pingcap_errors//:errors", "@com_github_pingcap_log//:log", "@org_uber_go_zap//:zap", @@ -18,17 +14,16 @@ go_library( ) go_test( - name = "utils_test", + name = "snap_split_test", timeout = "short", srcs = ["split_test.go"], flaky = True, - shard_count = 5, + shard_count = 4, deps = [ - ":utils", + ":snap_split", "//br/pkg/restore/split", "//br/pkg/restore/utils", "//br/pkg/rtree", - "//pkg/tablecodec", "//pkg/util/codec", "@com_github_pingcap_kvproto//pkg/import_sstpb", "@com_github_stretchr_testify//require", diff --git a/br/pkg/restore/internal/utils/split.go b/br/pkg/restore/internal/snap_split/split.go similarity index 70% rename from br/pkg/restore/internal/utils/split.go rename to br/pkg/restore/internal/snap_split/split.go index 82ff17acc2817..fca4a69cb5e6b 100644 --- a/br/pkg/restore/internal/utils/split.go +++ b/br/pkg/restore/internal/snap_split/split.go @@ -1,18 +1,14 @@ // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. -package utils +package snapsplit import ( "context" "time" - "github.com/opentracing/opentracing-go" "github.com/pingcap/errors" "github.com/pingcap/log" - berrors "github.com/pingcap/tidb/br/pkg/errors" - "github.com/pingcap/tidb/br/pkg/logutil" "github.com/pingcap/tidb/br/pkg/restore/split" - "github.com/pingcap/tidb/br/pkg/rtree" "go.uber.org/zap" ) @@ -41,37 +37,15 @@ func (rs *RegionSplitter) SplitWaitAndScatter(ctx context.Context, region *split // note: all ranges and rewrite rules must have raw key. func (rs *RegionSplitter) ExecuteSplit( ctx context.Context, - ranges []rtree.Range, + sortedSplitKeys [][]byte, ) error { - if len(ranges) == 0 { - log.Info("skip split regions, no range") + if len(sortedSplitKeys) == 0 { + log.Info("skip split regions, no split keys") return nil } - if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { - span1 := span.Tracer().StartSpan("RegionSplitter.Split", opentracing.ChildOf(span.Context())) - defer span1.Finish() - ctx = opentracing.ContextWithSpan(ctx, span1) - } - - // Sort the range for getting the min and max key of the ranges - // TODO: this sort may not needed if we sort tables after creatation outside. - sortedRanges, errSplit := SortRanges(ranges) - if errSplit != nil { - return errors.Trace(errSplit) - } - if len(sortedRanges) == 0 { - log.Info("skip split regions after sorted, no range") - return nil - } - sortedKeys := make([][]byte, 0, len(sortedRanges)) - totalRangeSize := uint64(0) - for _, r := range sortedRanges { - sortedKeys = append(sortedKeys, r.EndKey) - totalRangeSize += r.Size - } - // the range size must be greater than 0 here - return rs.executeSplitByRanges(ctx, sortedKeys) + log.Info("execute split sorted keys", zap.Int("keys count", len(sortedSplitKeys))) + return rs.executeSplitByRanges(ctx, sortedSplitKeys) } func (rs *RegionSplitter) executeSplitByRanges( @@ -151,20 +125,3 @@ func (rs *RegionSplitter) WaitForScatterRegionsTimeout(ctx context.Context, regi leftRegions, _ := rs.client.WaitRegionsScattered(ctx2, regionInfos) return leftRegions } - -// SortRanges checks if the range overlapped and sort them. -func SortRanges(ranges []rtree.Range) ([]rtree.Range, error) { - rangeTree := rtree.NewRangeTree() - for _, rg := range ranges { - if out := rangeTree.InsertRange(rg); out != nil { - log.Error("insert ranges overlapped", - logutil.Key("startKeyOut", out.StartKey), - logutil.Key("endKeyOut", out.EndKey), - logutil.Key("startKeyIn", rg.StartKey), - logutil.Key("endKeyIn", rg.EndKey)) - return nil, errors.Annotatef(berrors.ErrInvalidRange, "ranges overlapped") - } - } - sortedRanges := rangeTree.GetSortedRanges() - return sortedRanges, nil -} diff --git a/br/pkg/restore/internal/utils/split_test.go b/br/pkg/restore/internal/snap_split/split_test.go similarity index 52% rename from br/pkg/restore/internal/utils/split_test.go rename to br/pkg/restore/internal/snap_split/split_test.go index 4eeacb69f61c9..0507950d589c5 100644 --- a/br/pkg/restore/internal/utils/split_test.go +++ b/br/pkg/restore/internal/snap_split/split_test.go @@ -1,17 +1,18 @@ // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. -package utils_test +package snapsplit_test import ( + "bytes" "context" + "sort" "testing" "github.com/pingcap/kvproto/pkg/import_sstpb" - "github.com/pingcap/tidb/br/pkg/restore/internal/utils" + snapsplit "github.com/pingcap/tidb/br/pkg/restore/internal/snap_split" "github.com/pingcap/tidb/br/pkg/restore/split" restoreutils "github.com/pingcap/tidb/br/pkg/restore/utils" "github.com/pingcap/tidb/br/pkg/rtree" - "github.com/pingcap/tidb/pkg/tablecodec" "github.com/pingcap/tidb/pkg/util/codec" "github.com/stretchr/testify/require" ) @@ -20,13 +21,13 @@ func TestScanEmptyRegion(t *testing.T) { mockPDCli := split.NewMockPDClientForSplit() mockPDCli.SetRegions([][]byte{{}, {12}, {34}, {}}) client := split.NewClient(mockPDCli, nil, nil, 100, 4) - ranges := initRanges() - // make ranges has only one - ranges = ranges[0:1] - regionSplitter := utils.NewRegionSplitter(client) + keys := initKeys() + // make keys has only one + keys = keys[0:1] + regionSplitter := snapsplit.NewRegionSplitter(client) ctx := context.Background() - err := regionSplitter.ExecuteSplit(ctx, ranges) + err := regionSplitter.ExecuteSplit(ctx, keys) // should not return error with only one range entry require.NoError(t, err) } @@ -35,7 +36,7 @@ func TestSplitEmptyRegion(t *testing.T) { mockPDCli := split.NewMockPDClientForSplit() mockPDCli.SetRegions([][]byte{{}, {12}, {34}, {}}) client := split.NewClient(mockPDCli, nil, nil, 100, 4) - regionSplitter := utils.NewRegionSplitter(client) + regionSplitter := snapsplit.NewRegionSplitter(client) err := regionSplitter.ExecuteSplit(context.Background(), nil) require.NoError(t, err) } @@ -53,17 +54,21 @@ func TestSplitAndScatter(t *testing.T) { mockPDCli := split.NewMockPDClientForSplit() mockPDCli.SetRegions(rangeBoundaries) client := split.NewClient(mockPDCli, nil, nil, 100, 4) - regionSplitter := utils.NewRegionSplitter(client) + regionSplitter := snapsplit.NewRegionSplitter(client) ctx := context.Background() ranges := initRanges() rules := initRewriteRules() - for i, rg := range ranges { + splitKeys := make([][]byte, 0, len(ranges)) + for _, rg := range ranges { tmp, err := restoreutils.RewriteRange(&rg, rules) require.NoError(t, err) - ranges[i] = *tmp + splitKeys = append(splitKeys, tmp.EndKey) } - err := regionSplitter.ExecuteSplit(ctx, ranges) + sort.Slice(splitKeys, func(i, j int) bool { + return bytes.Compare(splitKeys[i], splitKeys[j]) < 0 + }) + err := regionSplitter.ExecuteSplit(ctx, splitKeys) require.NoError(t, err) regions := mockPDCli.Regions.ScanRange(nil, nil, 100) expected := [][]byte{[]byte(""), []byte("aay"), []byte("bba"), []byte("bbf"), []byte("bbh"), []byte("bbj"), []byte("cca"), []byte("xxe"), []byte("xxz"), []byte("")} @@ -86,20 +91,15 @@ func encodeBytes(keys [][]byte) { func TestRawSplit(t *testing.T) { // Fix issue #36490. - ranges := []rtree.Range{ - { - StartKey: []byte{0}, - EndKey: []byte{}, - }, - } + splitKeys := [][]byte{{}} ctx := context.Background() rangeBoundaries := [][]byte{[]byte(""), []byte("aay"), []byte("bba"), []byte("bbh"), []byte("cca"), []byte("")} mockPDCli := split.NewMockPDClientForSplit() mockPDCli.SetRegions(rangeBoundaries) client := split.NewClient(mockPDCli, nil, nil, 100, 4, split.WithRawKV()) - regionSplitter := utils.NewRegionSplitter(client) - err := regionSplitter.ExecuteSplit(ctx, ranges) + regionSplitter := snapsplit.NewRegionSplitter(client) + err := regionSplitter.ExecuteSplit(ctx, splitKeys) require.NoError(t, err) regions := mockPDCli.Regions.ScanRange(nil, nil, 100) @@ -110,6 +110,16 @@ func TestRawSplit(t *testing.T) { } } +// keys: aae, aaz, ccf, ccj +func initKeys() [][]byte { + return [][]byte{ + []byte("aae"), + []byte("aaz"), + []byte("ccf"), + []byte("ccj"), + } +} + // range: [aaa, aae), [aae, aaz), [ccd, ccf), [ccf, ccj) func initRanges() []rtree.Range { var ranges [4]rtree.Range @@ -146,89 +156,3 @@ func initRewriteRules() *restoreutils.RewriteRules { Data: rules[:], } } - -func TestSortRange(t *testing.T) { - dataRules := []*import_sstpb.RewriteRule{ - {OldKeyPrefix: tablecodec.GenTableRecordPrefix(1), NewKeyPrefix: tablecodec.GenTableRecordPrefix(4)}, - {OldKeyPrefix: tablecodec.GenTableRecordPrefix(2), NewKeyPrefix: tablecodec.GenTableRecordPrefix(5)}, - } - rewriteRules := &restoreutils.RewriteRules{ - Data: dataRules, - } - ranges1 := []rtree.Range{ - { - StartKey: append(tablecodec.GenTableRecordPrefix(1), []byte("aaa")...), - EndKey: append(tablecodec.GenTableRecordPrefix(1), []byte("bbb")...), Files: nil, - }, - } - for i, rg := range ranges1 { - tmp, _ := restoreutils.RewriteRange(&rg, rewriteRules) - ranges1[i] = *tmp - } - rs1, err := utils.SortRanges(ranges1) - require.NoErrorf(t, err, "sort range1 failed: %v", err) - rangeEquals(t, rs1, []rtree.Range{ - { - StartKey: append(tablecodec.GenTableRecordPrefix(4), []byte("aaa")...), - EndKey: append(tablecodec.GenTableRecordPrefix(4), []byte("bbb")...), Files: nil, - }, - }) - - ranges2 := []rtree.Range{ - { - StartKey: append(tablecodec.GenTableRecordPrefix(1), []byte("aaa")...), - EndKey: append(tablecodec.GenTableRecordPrefix(2), []byte("bbb")...), Files: nil, - }, - } - for _, rg := range ranges2 { - _, err := restoreutils.RewriteRange(&rg, rewriteRules) - require.Error(t, err) - require.Regexp(t, "table id mismatch.*", err.Error()) - } - - ranges3 := []rtree.Range{ - {StartKey: []byte("aaa"), EndKey: []byte("aae")}, - {StartKey: []byte("aae"), EndKey: []byte("aaz")}, - {StartKey: []byte("ccd"), EndKey: []byte("ccf")}, - {StartKey: []byte("ccf"), EndKey: []byte("ccj")}, - } - rewriteRules1 := &restoreutils.RewriteRules{ - Data: []*import_sstpb.RewriteRule{ - { - OldKeyPrefix: []byte("aa"), - NewKeyPrefix: []byte("xx"), - }, { - OldKeyPrefix: []byte("cc"), - NewKeyPrefix: []byte("bb"), - }, - }, - } - for i, rg := range ranges3 { - tmp, _ := restoreutils.RewriteRange(&rg, rewriteRules1) - ranges3[i] = *tmp - } - rs3, err := utils.SortRanges(ranges3) - require.NoErrorf(t, err, "sort range1 failed: %v", err) - rangeEquals(t, rs3, []rtree.Range{ - {StartKey: []byte("bbd"), EndKey: []byte("bbf"), Files: nil}, - {StartKey: []byte("bbf"), EndKey: []byte("bbj"), Files: nil}, - {StartKey: []byte("xxa"), EndKey: []byte("xxe"), Files: nil}, - {StartKey: []byte("xxe"), EndKey: []byte("xxz"), Files: nil}, - }) - - // overlap ranges - ranges4 := []rtree.Range{ - {StartKey: []byte("aaa"), EndKey: []byte("aae")}, - {StartKey: []byte("aaa"), EndKey: []byte("aaz")}, - } - _, err = utils.SortRanges(ranges4) - require.Error(t, err) -} - -func rangeEquals(t *testing.T, obtained, expected []rtree.Range) { - require.Equal(t, len(expected), len(obtained)) - for i := range obtained { - require.Equal(t, expected[i].StartKey, obtained[i].StartKey) - require.Equal(t, expected[i].EndKey, obtained[i].EndKey) - } -} diff --git a/br/pkg/restore/snap_client/BUILD.bazel b/br/pkg/restore/snap_client/BUILD.bazel index 6f01151d99bc6..53d9dbf2a3244 100644 --- a/br/pkg/restore/snap_client/BUILD.bazel +++ b/br/pkg/restore/snap_client/BUILD.bazel @@ -3,13 +3,12 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "snap_client", srcs = [ - "batcher.go", "client.go", - "context_manager.go", "import.go", "pipeline_items.go", + "placement_rule_manager.go", "systable_restore.go", - "zap.go", + "tikv_sender.go", ], importpath = "github.com/pingcap/tidb/br/pkg/restore/snap_client", visibility = ["//visibility:public"], @@ -27,10 +26,9 @@ go_library( "//br/pkg/restore/internal/import_client", "//br/pkg/restore/internal/prealloc_db", "//br/pkg/restore/internal/prealloc_table_id", - "//br/pkg/restore/internal/utils", + "//br/pkg/restore/internal/snap_split", "//br/pkg/restore/split", "//br/pkg/restore/utils", - "//br/pkg/rtree", "//br/pkg/storage", "//br/pkg/summary", "//br/pkg/utils", @@ -75,32 +73,30 @@ go_test( name = "snap_client_test", timeout = "short", srcs = [ - "batcher_test.go", "client_test.go", - "context_manager_test.go", "export_test.go", "import_test.go", "main_test.go", - "pipeline_items_test.go", + "placement_rule_manager_test.go", "systable_restore_test.go", + "tikv_sender_test.go", ], embed = [":snap_client"], flaky = True, - shard_count = 23, + shard_count = 17, deps = [ "//br/pkg/errors", "//br/pkg/glue", "//br/pkg/gluetidb", - "//br/pkg/logutil", "//br/pkg/metautil", "//br/pkg/mock", "//br/pkg/restore", "//br/pkg/restore/internal/import_client", "//br/pkg/restore/utils", - "//br/pkg/rtree", "//br/pkg/utils", "//br/pkg/utiltest", "//pkg/domain", + "//pkg/kv", "//pkg/parser/model", "//pkg/parser/mysql", "//pkg/session", @@ -115,11 +111,9 @@ go_test( "@com_github_pingcap_kvproto//pkg/import_sstpb", "@com_github_pingcap_kvproto//pkg/kvrpcpb", "@com_github_pingcap_kvproto//pkg/metapb", - "@com_github_pingcap_log//:log", "@com_github_stretchr_testify//require", "@com_github_tikv_pd_client//:client", "@org_golang_x_exp//slices", "@org_uber_go_goleak//:goleak", - "@org_uber_go_zap//:zap", ], ) diff --git a/br/pkg/restore/snap_client/batcher.go b/br/pkg/restore/snap_client/batcher.go deleted file mode 100644 index 8795a3044397c..0000000000000 --- a/br/pkg/restore/snap_client/batcher.go +++ /dev/null @@ -1,456 +0,0 @@ -// Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. - -package snapclient - -import ( - "context" - "sync" - "sync/atomic" - "time" - - "github.com/opentracing/opentracing-go" - backuppb "github.com/pingcap/kvproto/pkg/brpb" - "github.com/pingcap/log" - "github.com/pingcap/tidb/br/pkg/glue" - "github.com/pingcap/tidb/br/pkg/restore/utils" - "github.com/pingcap/tidb/br/pkg/rtree" - "github.com/pingcap/tidb/br/pkg/summary" - "go.uber.org/zap" -) - -// SendType is the 'type' of a send. -// when we make a 'send' command to worker, we may want to flush all pending ranges (when auto commit enabled), -// or, we just want to clean overflowing ranges(when just adding a table to batcher). -type SendType int - -const ( - // SendUntilLessThanBatch will make the batcher send batch until - // its remaining range is less than its batchSizeThreshold. - SendUntilLessThanBatch SendType = iota - // SendAll will make the batcher send all pending ranges. - SendAll - // SendAllThenClose will make the batcher send all pending ranges and then close itself. - SendAllThenClose -) - -// Batcher collects ranges to restore and send batching split/ingest request. -type Batcher struct { - cachedTables []TableWithRange - cachedTablesMu *sync.Mutex - - // autoCommitJoiner is for joining the background batch sender. - autoCommitJoiner chan<- struct{} - // everythingIsDone is for waiting for worker done: that is, after we send a - // signal to autoCommitJoiner, we must give it enough time to get things done. - // Then, it should notify us by this wait group. - // Use wait group instead of a trivial channel for further extension. - everythingIsDone *sync.WaitGroup - // sendErr is for output error information. - sendErr chan<- error - // sendCh is for communiate with sendWorker. - sendCh chan<- SendType - // outCh is for output the restored table, so it can be sent to do something like checksum. - outCh chan<- *CreatedTable - - updateCh glue.Progress - - sender BatchSender - manager ContextManager - batchSizeThreshold int - size int32 - - checkpointSetWithTableID map[int64]map[string]struct{} -} - -// Len calculate the current size of this batcher. -func (b *Batcher) Len() int { - return int(atomic.LoadInt32(&b.size)) -} - -// contextCleaner is the worker goroutine that cleaning the 'context' -// (e.g. make regions leave restore mode). -func (b *Batcher) contextCleaner(ctx context.Context, tables <-chan []CreatedTable) { - defer func() { - if ctx.Err() != nil { - log.Info("restore canceled, cleaning in background context") - b.manager.Close(context.Background()) - } else { - b.manager.Close(ctx) - } - }() - defer b.everythingIsDone.Done() - for { - select { - case <-ctx.Done(): - return - case tbls, ok := <-tables: - if !ok { - return - } - if err := b.manager.Leave(ctx, tbls); err != nil { - b.sendErr <- err - return - } - for _, tbl := range tbls { - cloneTable := tbl - b.outCh <- &cloneTable - } - } - } -} - -// NewBatcher creates a new batcher by a sender and a context manager. -// the former defines how the 'restore' a batch(i.e. send, or 'push down' the task to where). -// the context manager defines the 'lifetime' of restoring tables(i.e. how to enter 'restore' mode, and how to exit). -// this batcher will work background, send batches per second, or batch size reaches limit. -// and it will emit full-restored tables to the output channel returned. -func NewBatcher( - ctx context.Context, - sender BatchSender, - manager ContextManager, - errCh chan<- error, - updateCh glue.Progress, -) (*Batcher, chan *CreatedTable) { - outCh := defaultOutputTableChan() - sendChan := make(chan SendType, 2) - b := &Batcher{ - sendErr: errCh, - outCh: outCh, - sender: sender, - manager: manager, - sendCh: sendChan, - updateCh: updateCh, - cachedTablesMu: new(sync.Mutex), - everythingIsDone: new(sync.WaitGroup), - batchSizeThreshold: 1, - } - b.everythingIsDone.Add(2) - go b.sendWorker(ctx, sendChan) - restoredTables := make(chan []CreatedTable, defaultChannelSize) - go b.contextCleaner(ctx, restoredTables) - sink := chanTableSink{restoredTables, errCh} - sender.PutSink(sink) - return b, outCh -} - -// EnableAutoCommit enables the batcher commit batch periodically even batcher size isn't big enough. -// we make this function for disable AutoCommit in some case. -func (b *Batcher) EnableAutoCommit(ctx context.Context, delay time.Duration) { - if b.autoCommitJoiner != nil { - // IMO, making two auto commit goroutine wouldn't be a good idea. - // If desire(e.g. change the peroid of auto commit), please disable auto commit firstly. - log.L().DPanic("enabling auto commit on a batcher that auto commit has been enabled, which isn't allowed") - } - joiner := make(chan struct{}) - go b.autoCommitWorker(ctx, joiner, delay) - b.autoCommitJoiner = joiner -} - -// DisableAutoCommit blocks the current goroutine until the worker can gracefully stop, -// and then disable auto commit. -func (b *Batcher) DisableAutoCommit() { - b.joinAutoCommitWorker() - b.autoCommitJoiner = nil -} - -func (b *Batcher) waitUntilSendDone() { - b.sendCh <- SendAllThenClose - b.everythingIsDone.Wait() -} - -// joinAutoCommitWorker blocks the current goroutine until the worker can gracefully stop. -// return immediately when auto commit disabled. -func (b *Batcher) joinAutoCommitWorker() { - if b.autoCommitJoiner != nil { - log.Debug("gracefully stopping worker goroutine") - b.autoCommitJoiner <- struct{}{} - close(b.autoCommitJoiner) - log.Debug("gracefully stopped worker goroutine") - } -} - -// sendWorker is the 'worker' that send all ranges to TiKV. -// TODO since all operations are asynchronous now, it's possible to remove this worker. -func (b *Batcher) sendWorker(ctx context.Context, send <-chan SendType) { - sendUntil := func(lessOrEqual int) { - for b.Len() > lessOrEqual { - b.Send(ctx) - } - } - - for sendType := range send { - switch sendType { - case SendUntilLessThanBatch: - sendUntil(b.batchSizeThreshold) - case SendAll: - sendUntil(0) - case SendAllThenClose: - sendUntil(0) - b.sender.Close() - b.everythingIsDone.Done() - return - } - } -} - -func (b *Batcher) autoCommitWorker(ctx context.Context, joiner <-chan struct{}, delay time.Duration) { - tick := time.NewTicker(delay) - defer tick.Stop() - for { - select { - case <-joiner: - log.Debug("graceful stop signal received") - return - case <-ctx.Done(): - b.sendErr <- ctx.Err() - return - case <-tick.C: - if b.Len() > 0 { - log.Debug("sending batch because time limit exceed", zap.Int("size", b.Len())) - b.asyncSend(SendAll) - } - } - } -} - -func (b *Batcher) asyncSend(t SendType) { - // add a check here so we won't replica sending. - if len(b.sendCh) == 0 { - b.sendCh <- t - } -} - -// DrainResult is the collection of some ranges and theirs metadata. -type DrainResult struct { - // TablesToSend are tables that would be send at this batch. - TablesToSend []CreatedTable - // BlankTablesAfterSend are tables that will be full-restored after this batch send. - BlankTablesAfterSend []CreatedTable - // RewriteRules are the rewrite rules for the tables. - // the key is the table id after rewritten. - RewriteRulesMap map[int64]*utils.RewriteRules - Ranges []rtree.Range - // Record which part of ranges belongs to the table - TableEndOffsetInRanges []int -} - -// Files returns all files of this drain result. -func (result DrainResult) Files() []TableIDWithFiles { - tableIDWithFiles := make([]TableIDWithFiles, 0, len(result.TableEndOffsetInRanges)) - var startOffset int = 0 - for i, endOffset := range result.TableEndOffsetInRanges { - tableID := result.TablesToSend[i].Table.ID - ranges := result.Ranges[startOffset:endOffset] - // each range has at least a default file + a write file - files := make([]*backuppb.File, 0, len(ranges)*2) - for _, rg := range ranges { - files = append(files, rg.Files...) - } - var rules *utils.RewriteRules - if r, ok := result.RewriteRulesMap[tableID]; ok { - rules = r - } - tableIDWithFiles = append(tableIDWithFiles, TableIDWithFiles{ - TableID: tableID, - Files: files, - RewriteRules: rules, - }) - - // update start offset - startOffset = endOffset - } - - return tableIDWithFiles -} - -func newDrainResult() DrainResult { - return DrainResult{ - TablesToSend: make([]CreatedTable, 0), - BlankTablesAfterSend: make([]CreatedTable, 0), - RewriteRulesMap: utils.EmptyRewriteRulesMap(), - Ranges: make([]rtree.Range, 0), - TableEndOffsetInRanges: make([]int, 0), - } -} - -// fileterOutRanges filter out the files from `drained-range` that exists in the checkpoint set. -func (b *Batcher) filterOutRanges(checkpointSet map[string]struct{}, drained []rtree.Range) []rtree.Range { - progress := int(0) - totalKVs := uint64(0) - totalBytes := uint64(0) - for i, rg := range drained { - newFiles := make([]*backuppb.File, 0, len(rg.Files)) - for _, f := range rg.Files { - rangeKey := getFileRangeKey(f.Name) - if _, exists := checkpointSet[rangeKey]; exists { - // the range has been import done, so skip it and - // update the summary information - progress += 1 - totalKVs += f.TotalKvs - totalBytes += f.TotalBytes - } else { - newFiles = append(newFiles, f) - } - } - // the newFiles may be empty - drained[i].Files = newFiles - } - if progress > 0 { - // (split/scatter + download/ingest) / (default cf + write cf) - b.updateCh.IncBy(int64(progress) * 2 / 2) - summary.CollectSuccessUnit(summary.TotalKV, progress, totalKVs) - summary.CollectSuccessUnit(summary.SkippedKVCountByCheckpoint, progress, totalKVs) - summary.CollectSuccessUnit(summary.TotalBytes, progress, totalBytes) - summary.CollectSuccessUnit(summary.SkippedBytesByCheckpoint, progress, totalBytes) - } - return drained -} - -// drainRanges 'drains' ranges from current tables. -// for example, let a '-' character be a range, assume we have: -// |---|-----|-------| -// |t1 |t2 |t3 | -// after we run drainRanges() with batchSizeThreshold = 6, let '*' be the ranges will be sent this batch : -// |***|***--|-------| -// |t1 |t2 |-------| -// -// drainRanges() will return: -// TablesToSend: [t1, t2] (so we can make them enter restore mode) -// BlankTableAfterSend: [t1] (so we can make them leave restore mode after restoring this batch) -// RewriteRules: rewrite rules for [t1, t2] (so we can restore them) -// Ranges: those stared ranges (so we can restore them) -// -// then, it will leaving the batcher's cachedTables like this: -// |--|-------| -// |t2|t3 | -// as you can see, all restored ranges would be removed. -func (b *Batcher) drainRanges() DrainResult { - result := newDrainResult() - - b.cachedTablesMu.Lock() - defer b.cachedTablesMu.Unlock() - - for offset, thisTable := range b.cachedTables { - t, exists := b.checkpointSetWithTableID[thisTable.Table.ID] - thisTableLen := len(thisTable.Range) - collected := len(result.Ranges) - - result.RewriteRulesMap[thisTable.Table.ID] = thisTable.RewriteRule - result.TablesToSend = append(result.TablesToSend, thisTable.CreatedTable) - - // the batch is full, we should stop here! - // we use strictly greater than because when we send a batch at equal, the offset should plus one. - // (because the last table is sent, we should put it in emptyTables), and this will introduce extra complex. - if thisTableLen+collected > b.batchSizeThreshold { - drainSize := b.batchSizeThreshold - collected - thisTableRanges := thisTable.Range - - var drained []rtree.Range - drained, b.cachedTables[offset].Range = thisTableRanges[:drainSize], thisTableRanges[drainSize:] - log.Debug("draining partial table to batch", - zap.Stringer("db", thisTable.OldTable.DB.Name), - zap.Stringer("table", thisTable.Table.Name), - zap.Int("size", thisTableLen), - zap.Int("drained", drainSize), - ) - // Firstly calculated the batcher size, and then - // filter out ranges by checkpoint. - atomic.AddInt32(&b.size, -int32(len(drained))) - if exists { - drained = b.filterOutRanges(t, drained) - } - result.Ranges = append(result.Ranges, drained...) - result.TableEndOffsetInRanges = append(result.TableEndOffsetInRanges, len(result.Ranges)) - b.cachedTables = b.cachedTables[offset:] - return result - } - - result.BlankTablesAfterSend = append(result.BlankTablesAfterSend, thisTable.CreatedTable) - // Firstly calculated the batcher size, and then filter out ranges by checkpoint. - atomic.AddInt32(&b.size, -int32(len(thisTable.Range))) - // let's 'drain' the ranges of current table. This op must not make the batch full. - if exists { - result.Ranges = append(result.Ranges, b.filterOutRanges(t, thisTable.Range)...) - } else { - result.Ranges = append(result.Ranges, thisTable.Range...) - } - result.TableEndOffsetInRanges = append(result.TableEndOffsetInRanges, len(result.Ranges)) - // clear the table length. - b.cachedTables[offset].Range = []rtree.Range{} - log.Debug("draining table to batch", - zap.Stringer("db", thisTable.OldTable.DB.Name), - zap.Stringer("table", thisTable.Table.Name), - zap.Int("size", thisTableLen), - ) - } - - // all tables are drained. - b.cachedTables = []TableWithRange{} - return result -} - -// Send sends all pending requests in the batcher. -// returns tables sent FULLY in the current batch. -func (b *Batcher) Send(ctx context.Context) { - if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { - span1 := span.Tracer().StartSpan("Batcher.Send", opentracing.ChildOf(span.Context())) - defer span1.Finish() - ctx = opentracing.ContextWithSpan(ctx, span1) - } - - drainResult := b.drainRanges() - tbs := drainResult.TablesToSend - ranges := drainResult.Ranges - log.Info("restore batch start", rtree.ZapRanges(ranges), zapTables(tbs)) - // Leave is called at b.contextCleaner - if err := b.manager.Enter(ctx, drainResult.TablesToSend); err != nil { - b.sendErr <- err - return - } - b.sender.RestoreBatch(drainResult) -} - -func (b *Batcher) sendIfFull() { - if b.Len() >= b.batchSizeThreshold { - log.Debug("sending batch because batcher is full", zap.Int("size", b.Len())) - b.asyncSend(SendUntilLessThanBatch) - } -} - -// Add adds a task to the Batcher. -func (b *Batcher) Add(tbs TableWithRange) { - b.cachedTablesMu.Lock() - log.Debug("adding table to batch", - zap.Stringer("db", tbs.OldTable.DB.Name), - zap.Stringer("table", tbs.Table.Name), - zap.Int64("old id", tbs.OldTable.Info.ID), - zap.Int64("new id", tbs.Table.ID), - zap.Int("table size", len(tbs.Range)), - zap.Int("batch size", b.Len()), - ) - b.cachedTables = append(b.cachedTables, tbs) - atomic.AddInt32(&b.size, int32(len(tbs.Range))) - b.cachedTablesMu.Unlock() - - b.sendIfFull() -} - -// Close closes the batcher, sending all pending requests, close updateCh. -func (b *Batcher) Close() { - log.Info("sending batch lastly on close", zap.Int("size", b.Len())) - b.DisableAutoCommit() - b.waitUntilSendDone() - close(b.outCh) - close(b.sendCh) -} - -// SetThreshold sets the threshold that how big the batch size reaching need to send batch. -// note this function isn't goroutine safe yet, -// just set threshold before anything starts(e.g. EnableAutoCommit), please. -func (b *Batcher) SetThreshold(newThreshold int) { - b.batchSizeThreshold = newThreshold -} - -func (b *Batcher) SetCheckpoint(sets map[int64]map[string]struct{}) { - b.checkpointSetWithTableID = sets -} diff --git a/br/pkg/restore/snap_client/batcher_test.go b/br/pkg/restore/snap_client/batcher_test.go deleted file mode 100644 index f18bf9ade1adb..0000000000000 --- a/br/pkg/restore/snap_client/batcher_test.go +++ /dev/null @@ -1,387 +0,0 @@ -// Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. - -package snapclient_test - -import ( - "bytes" - "context" - "sync" - "testing" - "time" - - "github.com/pingcap/errors" - "github.com/pingcap/kvproto/pkg/import_sstpb" - "github.com/pingcap/log" - "github.com/pingcap/tidb/br/pkg/metautil" - snapclient "github.com/pingcap/tidb/br/pkg/restore/snap_client" - "github.com/pingcap/tidb/br/pkg/restore/utils" - "github.com/pingcap/tidb/br/pkg/rtree" - "github.com/pingcap/tidb/pkg/parser/model" - "github.com/stretchr/testify/require" - "go.uber.org/zap" -) - -type drySender struct { - mu *sync.Mutex - - rewriteRules *utils.RewriteRules - ranges []rtree.Range - nBatch int - - sink snapclient.TableSink -} - -func (sender *drySender) PutSink(sink snapclient.TableSink) { - sender.sink = sink -} - -func (sender *drySender) RestoreBatch(ranges snapclient.DrainResult) { - sender.mu.Lock() - defer sender.mu.Unlock() - log.Info("fake restore range", rtree.ZapRanges(ranges.Ranges)) - sender.nBatch++ - for _, r := range ranges.RewriteRulesMap { - sender.rewriteRules.Append(*r) - } - sender.ranges = append(sender.ranges, ranges.Ranges...) - sender.sink.EmitTables(ranges.BlankTablesAfterSend...) -} - -func (sender *drySender) Close() { - sender.sink.Close() -} - -func waitForSend() { - time.Sleep(50 * time.Millisecond) -} - -func (sender *drySender) Ranges() []rtree.Range { - return sender.ranges -} - -func newDrySender() *drySender { - return &drySender{ - rewriteRules: utils.EmptyRewriteRule(), - ranges: []rtree.Range{}, - mu: new(sync.Mutex), - } -} - -type recordCurrentTableManager struct { - lock sync.Mutex - m map[int64]bool -} - -func (manager *recordCurrentTableManager) Close(ctx context.Context) { - manager.lock.Lock() - defer manager.lock.Unlock() - if len(manager.m) > 0 { - log.Panic("When closing, there are still some tables doesn't be sent", - zap.Any("tables", manager.m)) - } -} - -func newMockManager() *recordCurrentTableManager { - return &recordCurrentTableManager{ - m: make(map[int64]bool), - } -} - -func (manager *recordCurrentTableManager) Enter(_ context.Context, tables []snapclient.CreatedTable) error { - manager.lock.Lock() - defer manager.lock.Unlock() - for _, t := range tables { - log.Info("entering", zap.Int64("table ID", t.Table.ID)) - manager.m[t.Table.ID] = true - } - return nil -} - -func (manager *recordCurrentTableManager) Leave(_ context.Context, tables []snapclient.CreatedTable) error { - manager.lock.Lock() - defer manager.lock.Unlock() - for _, t := range tables { - if !manager.m[t.Table.ID] { - return errors.Errorf("Table %d is removed before added", t.Table.ID) - } - log.Info("leaving", zap.Int64("table ID", t.Table.ID)) - delete(manager.m, t.Table.ID) - } - return nil -} - -func (manager *recordCurrentTableManager) Has(tables ...snapclient.TableWithRange) bool { - manager.lock.Lock() - defer manager.lock.Unlock() - ids := make([]int64, 0, len(tables)) - currentIDs := make([]int64, 0, len(manager.m)) - for _, t := range tables { - ids = append(ids, t.Table.ID) - } - for id, contains := range manager.m { - if contains { - currentIDs = append(currentIDs, id) - } - } - log.Info("testing", zap.Int64s("should has ID", ids), zap.Int64s("has ID", currentIDs)) - for _, i := range ids { - if !manager.m[i] { - return false - } - } - return true -} - -func (sender *drySender) HasRewriteRuleOfKey(prefix string) bool { - sender.mu.Lock() - defer sender.mu.Unlock() - for _, rule := range sender.rewriteRules.Data { - if bytes.Equal([]byte(prefix), rule.OldKeyPrefix) { - return true - } - } - return false -} - -func (sender *drySender) RangeLen() int { - sender.mu.Lock() - defer sender.mu.Unlock() - return len(sender.ranges) -} - -func (sender *drySender) BatchCount() int { - return sender.nBatch -} - -func fakeTableWithRange(id int64, rngs []rtree.Range) snapclient.TableWithRange { - tbl := &metautil.Table{ - DB: &model.DBInfo{}, - Info: &model.TableInfo{ - ID: id, - }, - } - tblWithRng := snapclient.TableWithRange{ - CreatedTable: snapclient.CreatedTable{ - RewriteRule: utils.EmptyRewriteRule(), - Table: tbl.Info, - OldTable: tbl, - }, - Range: rngs, - } - return tblWithRng -} - -func fakeRewriteRules(oldPrefix string, newPrefix string) *utils.RewriteRules { - return &utils.RewriteRules{ - Data: []*import_sstpb.RewriteRule{ - { - OldKeyPrefix: []byte(oldPrefix), - NewKeyPrefix: []byte(newPrefix), - }, - }, - } -} - -func fakeRange(startKey, endKey string) rtree.Range { - return rtree.Range{ - StartKey: []byte(startKey), - EndKey: []byte(endKey), - } -} - -func join(nested [][]rtree.Range) (plain []rtree.Range) { - for _, ranges := range nested { - plain = append(plain, ranges...) - } - return plain -} - -// TestBasic tests basic workflow of batcher. -func TestBasic(t *testing.T) { - ctx := context.Background() - errCh := make(chan error, 8) - sender := newDrySender() - manager := newMockManager() - batcher, _ := snapclient.NewBatcher(ctx, sender, manager, errCh, nil) - batcher.SetThreshold(2) - - tableRanges := [][]rtree.Range{ - {fakeRange("aaa", "aab")}, - {fakeRange("baa", "bab"), fakeRange("bac", "bad")}, - {fakeRange("caa", "cab"), fakeRange("cac", "cad")}, - } - - simpleTables := []snapclient.TableWithRange{} - for i, ranges := range tableRanges { - simpleTables = append(simpleTables, fakeTableWithRange(int64(i), ranges)) - } - for _, tbl := range simpleTables { - batcher.Add(tbl) - } - - batcher.Close() - rngs := sender.Ranges() - - require.Equal(t, rngs, join(tableRanges)) - select { - case err := <-errCh: - t.Fatal(errors.Trace(err)) - default: - } -} - -func TestAutoSend(t *testing.T) { - ctx := context.Background() - errCh := make(chan error, 8) - sender := newDrySender() - manager := newMockManager() - batcher, _ := snapclient.NewBatcher(ctx, sender, manager, errCh, nil) - batcher.SetThreshold(1024) - - simpleTable := fakeTableWithRange(1, []rtree.Range{fakeRange("caa", "cab"), fakeRange("cac", "cad")}) - - batcher.Add(simpleTable) - require.Greater(t, batcher.Len(), 0) - - // enable auto commit. - batcher.EnableAutoCommit(ctx, 100*time.Millisecond) - time.Sleep(200 * time.Millisecond) - - require.Greater(t, sender.RangeLen(), 0) - require.Equal(t, 0, batcher.Len()) - - batcher.Close() - - rngs := sender.Ranges() - require.Equal(t, simpleTable.Range, rngs) - select { - case err := <-errCh: - t.Fatal(errors.Trace(err)) - default: - } -} - -func TestSplitRangeOnSameTable(t *testing.T) { - ctx := context.Background() - errCh := make(chan error, 8) - sender := newDrySender() - manager := newMockManager() - batcher, _ := snapclient.NewBatcher(ctx, sender, manager, errCh, nil) - batcher.SetThreshold(2) - - simpleTable := fakeTableWithRange(1, []rtree.Range{ - fakeRange("caa", "cab"), fakeRange("cac", "cad"), - fakeRange("cae", "caf"), fakeRange("cag", "cai"), - fakeRange("caj", "cak"), fakeRange("cal", "cam"), - fakeRange("can", "cao"), fakeRange("cap", "caq"), - }) - - batcher.Add(simpleTable) - batcher.Close() - require.Equal(t, 4, sender.BatchCount()) - - rngs := sender.Ranges() - require.Equal(t, simpleTable.Range, rngs) - select { - case err := <-errCh: - t.Fatal(errors.Trace(err)) - default: - } -} - -func TestRewriteRules(t *testing.T) { - tableRanges := [][]rtree.Range{ - {fakeRange("aaa", "aab")}, - {fakeRange("baa", "bab"), fakeRange("bac", "bad")}, - { - fakeRange("caa", "cab"), fakeRange("cac", "cad"), - fakeRange("cae", "caf"), fakeRange("cag", "cai"), - fakeRange("caj", "cak"), fakeRange("cal", "cam"), - fakeRange("can", "cao"), fakeRange("cap", "caq"), - }, - } - rewriteRules := []*utils.RewriteRules{ - fakeRewriteRules("a", "ada"), - fakeRewriteRules("b", "bob"), - fakeRewriteRules("c", "cpp"), - } - - tables := make([]snapclient.TableWithRange, 0, len(tableRanges)) - for i, ranges := range tableRanges { - table := fakeTableWithRange(int64(i), ranges) - table.RewriteRule = rewriteRules[i] - tables = append(tables, table) - } - - ctx := context.Background() - errCh := make(chan error, 8) - sender := newDrySender() - manager := newMockManager() - batcher, _ := snapclient.NewBatcher(ctx, sender, manager, errCh, nil) - batcher.SetThreshold(2) - - batcher.Add(tables[0]) - waitForSend() - require.Equal(t, 0, sender.RangeLen()) - - batcher.Add(tables[1]) - waitForSend() - require.True(t, sender.HasRewriteRuleOfKey("a")) - require.True(t, sender.HasRewriteRuleOfKey("b")) - require.True(t, manager.Has(tables[1])) - require.Equal(t, 2, sender.RangeLen()) - - batcher.Add(tables[2]) - batcher.Close() - require.True(t, sender.HasRewriteRuleOfKey("c")) - require.Equal(t, join(tableRanges), sender.Ranges()) - - select { - case err := <-errCh: - t.Fatal(errors.Trace(err)) - default: - } -} - -func TestBatcherLen(t *testing.T) { - ctx := context.Background() - errCh := make(chan error, 8) - sender := newDrySender() - manager := newMockManager() - batcher, _ := snapclient.NewBatcher(ctx, sender, manager, errCh, nil) - batcher.SetThreshold(15) - - simpleTable := fakeTableWithRange(1, []rtree.Range{ - fakeRange("caa", "cab"), fakeRange("cac", "cad"), - fakeRange("cae", "caf"), fakeRange("cag", "cai"), - fakeRange("caj", "cak"), fakeRange("cal", "cam"), - fakeRange("can", "cao"), fakeRange("cap", "caq"), - }) - - simpleTable2 := fakeTableWithRange(2, []rtree.Range{ - fakeRange("caa", "cab"), fakeRange("cac", "cad"), - fakeRange("cae", "caf"), fakeRange("cag", "cai"), - fakeRange("caj", "cak"), fakeRange("cal", "cam"), - fakeRange("can", "cao"), fakeRange("cap", "caq"), - }) - - batcher.Add(simpleTable) - waitForSend() - require.Equal(t, 8, batcher.Len()) - require.False(t, manager.Has(simpleTable)) - require.False(t, manager.Has(simpleTable2)) - - batcher.Add(simpleTable2) - waitForSend() - require.Equal(t, 1, batcher.Len()) - require.True(t, manager.Has(simpleTable2)) - require.False(t, manager.Has(simpleTable)) - batcher.Close() - require.Equal(t, 0, batcher.Len()) - - select { - case err := <-errCh: - t.Fatal(errors.Trace(err)) - default: - } -} diff --git a/br/pkg/restore/snap_client/client.go b/br/pkg/restore/snap_client/client.go index 061fad6388016..7162e4b88cefc 100644 --- a/br/pkg/restore/snap_client/client.go +++ b/br/pkg/restore/snap_client/client.go @@ -20,7 +20,6 @@ import ( "context" "crypto/tls" "encoding/json" - "fmt" "slices" "strings" "sync" @@ -44,10 +43,8 @@ import ( importclient "github.com/pingcap/tidb/br/pkg/restore/internal/import_client" tidallocdb "github.com/pingcap/tidb/br/pkg/restore/internal/prealloc_db" tidalloc "github.com/pingcap/tidb/br/pkg/restore/internal/prealloc_table_id" - internalutils "github.com/pingcap/tidb/br/pkg/restore/internal/utils" "github.com/pingcap/tidb/br/pkg/restore/split" restoreutils "github.com/pingcap/tidb/br/pkg/restore/utils" - "github.com/pingcap/tidb/br/pkg/rtree" "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/br/pkg/summary" "github.com/pingcap/tidb/br/pkg/utils" @@ -672,12 +669,43 @@ func (rc *SnapClient) getRebasedTables() map[restore.UniqueTableName]bool { return rc.rebasedTablesMap } +// CreateTables create tables, and generate their information. +// this function will use workers as the same number of sessionPool, +// leave sessionPool nil to send DDLs sequential. +func (rc *SnapClient) CreateTables( + ctx context.Context, + tables []*metautil.Table, + newTS uint64, +) ([]*CreatedTable, error) { + log.Info("start create tables", zap.Int("total count", len(tables))) + rc.generateRebasedTables(tables) + + // try to restore tables in batch + if rc.batchDdlSize > minBatchDdlSize && len(rc.dbPool) > 0 { + tables, err := rc.createTablesBatch(ctx, tables, newTS) + if err == nil { + return tables, nil + } else if !utils.FallBack2CreateTable(err) { + return nil, errors.Trace(err) + } + // fall back to old create table (sequential create table) + log.Info("fall back to the sequential create table") + } + + // restore tables in db pool + if len(rc.dbPool) > 0 { + return rc.createTablesSingle(ctx, rc.dbPool, tables, newTS) + } + // restore tables in one db + return rc.createTablesSingle(ctx, []*tidallocdb.DB{rc.db}, tables, newTS) +} + func (rc *SnapClient) createTables( ctx context.Context, db *tidallocdb.DB, tables []*metautil.Table, newTS uint64, -) ([]CreatedTable, error) { +) ([]*CreatedTable, error) { log.Info("client to create tables") if rc.IsSkipCreateSQL() { log.Info("skip create table and alter autoIncID") @@ -687,7 +715,7 @@ func (rc *SnapClient) createTables( return nil, errors.Trace(err) } } - cts := make([]CreatedTable, 0, len(tables)) + cts := make([]*CreatedTable, 0, len(tables)) for _, table := range tables { newTableInfo, err := restore.GetTableSchema(rc.dom, table.DB.Name, table.Info.Name) if err != nil { @@ -701,7 +729,7 @@ func (rc *SnapClient) createTables( newTableInfo.IsCommonHandle) } rules := restoreutils.GetRewriteRules(newTableInfo, table.Info, newTS, true) - ct := CreatedTable{ + ct := &CreatedTable{ RewriteRule: rules, Table: newTableInfo, OldTable: table, @@ -712,11 +740,17 @@ func (rc *SnapClient) createTables( return cts, nil } -func (rc *SnapClient) createTablesInWorkerPool(ctx context.Context, tables []*metautil.Table, newTS uint64, outCh chan<- CreatedTable) error { +func (rc *SnapClient) createTablesBatch(ctx context.Context, tables []*metautil.Table, newTS uint64) ([]*CreatedTable, error) { eg, ectx := errgroup.WithContext(ctx) rater := logutil.TraceRateOver(logutil.MetricTableCreatedCounter) workers := tidbutil.NewWorkerPool(uint(len(rc.dbPool)), "Create Tables Worker") numOfTables := len(tables) + createdTables := struct { + sync.Mutex + tables []*CreatedTable + }{ + tables: make([]*CreatedTable, 0, len(tables)), + } for lastSent := 0; lastSent < numOfTables; lastSent += int(rc.batchDdlSize) { end := min(lastSent+int(rc.batchDdlSize), len(tables)) @@ -735,21 +769,19 @@ func (rc *SnapClient) createTablesInWorkerPool(ctx context.Context, tables []*me log.Error("create tables fail", zap.Error(err)) return err } - for _, ct := range cts { - log.Debug("table created and send to next", - zap.Int("output chan size", len(outCh)), - zap.Stringer("table", ct.OldTable.Info.Name), - zap.Stringer("database", ct.OldTable.DB.Name)) - outCh <- ct - rater.Inc() - rater.L().Info("table created", - zap.Stringer("table", ct.OldTable.Info.Name), - zap.Stringer("database", ct.OldTable.DB.Name)) - } + rater.Add(float64(len(cts))) + rater.L().Info("tables created", zap.Int("num", len(cts))) + createdTables.Lock() + createdTables.tables = append(createdTables.tables, cts...) + createdTables.Unlock() return err }) } - return eg.Wait() + if err := eg.Wait(); err != nil { + return nil, errors.Trace(err) + } + + return createdTables.tables, nil } func (rc *SnapClient) createTable( @@ -757,28 +789,28 @@ func (rc *SnapClient) createTable( db *tidallocdb.DB, table *metautil.Table, newTS uint64, -) (CreatedTable, error) { +) (*CreatedTable, error) { if rc.IsSkipCreateSQL() { log.Info("skip create table and alter autoIncID", zap.Stringer("table", table.Info.Name)) } else { err := db.CreateTable(ctx, table, rc.getRebasedTables(), rc.supportPolicy, rc.policyMap) if err != nil { - return CreatedTable{}, errors.Trace(err) + return nil, errors.Trace(err) } } newTableInfo, err := restore.GetTableSchema(rc.dom, table.DB.Name, table.Info.Name) if err != nil { - return CreatedTable{}, errors.Trace(err) + return nil, errors.Trace(err) } if newTableInfo.IsCommonHandle != table.Info.IsCommonHandle { - return CreatedTable{}, errors.Annotatef(berrors.ErrRestoreModeMismatch, + return nil, errors.Annotatef(berrors.ErrRestoreModeMismatch, "Clustered index option mismatch. Restored cluster's @@tidb_enable_clustered_index should be %v (backup table = %v, created table = %v).", restore.TransferBoolToValue(table.Info.IsCommonHandle), table.Info.IsCommonHandle, newTableInfo.IsCommonHandle) } rules := restoreutils.GetRewriteRules(newTableInfo, table.Info, newTS, true) - et := CreatedTable{ + et := &CreatedTable{ RewriteRule: rules, Table: newTableInfo, OldTable: table, @@ -786,30 +818,49 @@ func (rc *SnapClient) createTable( return et, nil } -func (rc *SnapClient) createTablesWithSoleDB(ctx context.Context, - createOneTable func(ctx context.Context, db *tidallocdb.DB, t *metautil.Table) error, - tables []*metautil.Table) error { - for _, t := range tables { - if err := createOneTable(ctx, rc.db, t); err != nil { - return errors.Trace(err) - } - } - return nil -} - -func (rc *SnapClient) createTablesWithDBPool(ctx context.Context, - createOneTable func(ctx context.Context, db *tidallocdb.DB, t *metautil.Table) error, - tables []*metautil.Table) error { +func (rc *SnapClient) createTablesSingle( + ctx context.Context, + dbPool []*tidallocdb.DB, + tables []*metautil.Table, + newTS uint64, +) ([]*CreatedTable, error) { eg, ectx := errgroup.WithContext(ctx) - workers := tidbutil.NewWorkerPool(uint(len(rc.dbPool)), "DDL workers") - for _, t := range tables { - table := t + workers := tidbutil.NewWorkerPool(uint(len(dbPool)), "DDL workers") + rater := logutil.TraceRateOver(logutil.MetricTableCreatedCounter) + createdTables := struct { + sync.Mutex + tables []*CreatedTable + }{ + tables: make([]*CreatedTable, 0, len(tables)), + } + for _, tbl := range tables { + table := tbl workers.ApplyWithIDInErrorGroup(eg, func(id uint64) error { - db := rc.dbPool[id%uint64(len(rc.dbPool))] - return createOneTable(ectx, db, table) + db := dbPool[id%uint64(len(dbPool))] + rt, err := rc.createTable(ectx, db, table, newTS) + if err != nil { + log.Error("create table failed", + zap.Error(err), + zap.Stringer("db", table.DB.Name), + zap.Stringer("table", table.Info.Name)) + return errors.Trace(err) + } + rater.Inc() + rater.L().Info("table created", + zap.Stringer("table", table.Info.Name), + zap.Stringer("database", table.DB.Name)) + + createdTables.Lock() + createdTables.tables = append(createdTables.tables, rt) + createdTables.Unlock() + return nil }) } - return eg.Wait() + if err := eg.Wait(); err != nil { + return nil, errors.Trace(err) + } + + return createdTables.tables, nil } // InitFullClusterRestore init fullClusterRestore and set SkipGrantTable as needed @@ -921,128 +972,6 @@ func (rc *SnapClient) setSpeedLimit(ctx context.Context, rateLimit uint64) error return nil } -func getFileRangeKey(f string) string { - // the backup date file pattern is `{store_id}_{region_id}_{epoch_version}_{key}_{ts}_{cf}.sst` - // so we need to compare with out the `_{cf}.sst` suffix - idx := strings.LastIndex(f, "_") - if idx < 0 { - panic(fmt.Sprintf("invalid backup data file name: '%s'", f)) - } - - return f[:idx] -} - -// isFilesBelongToSameRange check whether two files are belong to the same range with different cf. -func isFilesBelongToSameRange(f1, f2 string) bool { - return getFileRangeKey(f1) == getFileRangeKey(f2) -} - -func drainFilesByRange(files []*backuppb.File) ([]*backuppb.File, []*backuppb.File) { - if len(files) == 0 { - return nil, nil - } - idx := 1 - for idx < len(files) { - if !isFilesBelongToSameRange(files[idx-1].Name, files[idx].Name) { - break - } - idx++ - } - - return files[:idx], files[idx:] -} - -// RestoreSSTFiles tries to restore the files. -func (rc *SnapClient) RestoreSSTFiles( - ctx context.Context, - tableIDWithFiles []TableIDWithFiles, - updateCh glue.Progress, -) (err error) { - start := time.Now() - fileCount := 0 - defer func() { - elapsed := time.Since(start) - if err == nil { - log.Info("Restore files", zap.Duration("take", elapsed)) - summary.CollectSuccessUnit("files", fileCount, elapsed) - } - }() - - log.Debug("start to restore files", zap.Int("files", fileCount)) - - if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { - span1 := span.Tracer().StartSpan("Client.RestoreSSTFiles", opentracing.ChildOf(span.Context())) - defer span1.Finish() - ctx = opentracing.ContextWithSpan(ctx, span1) - } - - eg, ectx := errgroup.WithContext(ctx) - err = rc.setSpeedLimit(ctx, rc.rateLimit) - if err != nil { - return errors.Trace(err) - } - - var rangeFiles []*backuppb.File - var leftFiles []*backuppb.File -LOOPFORTABLE: - for _, tableIDWithFile := range tableIDWithFiles { - tableID := tableIDWithFile.TableID - files := tableIDWithFile.Files - rules := tableIDWithFile.RewriteRules - fileCount += len(files) - for rangeFiles, leftFiles = drainFilesByRange(files); len(rangeFiles) != 0; rangeFiles, leftFiles = drainFilesByRange(leftFiles) { - if ectx.Err() != nil { - log.Warn("Restoring encountered error and already stopped, give up remained files.", - zap.Int("remained", len(leftFiles)), - logutil.ShortError(ectx.Err())) - // We will fetch the error from the errgroup then (If there were). - // Also note if the parent context has been canceled or something, - // breaking here directly is also a reasonable behavior. - break LOOPFORTABLE - } - filesReplica := rangeFiles - rc.fileImporter.WaitUntilUnblock() - rc.workerPool.ApplyOnErrorGroup(eg, func() (restoreErr error) { - fileStart := time.Now() - defer func() { - if restoreErr == nil { - log.Info("import files done", logutil.Files(filesReplica), - zap.Duration("take", time.Since(fileStart))) - updateCh.Inc() - } - }() - if importErr := rc.fileImporter.ImportSSTFiles(ectx, filesReplica, rules, rc.cipher, rc.dom.Store().GetCodec().GetAPIVersion()); importErr != nil { - return errors.Trace(importErr) - } - - // the data of this range has been import done - if rc.checkpointRunner != nil && len(filesReplica) > 0 { - rangeKey := getFileRangeKey(filesReplica[0].Name) - // The checkpoint range shows this ranges of kvs has been restored into - // the table corresponding to the table-id. - if err := checkpoint.AppendRangesForRestore(ectx, rc.checkpointRunner, tableID, rangeKey); err != nil { - return errors.Trace(err) - } - } - return nil - }) - } - } - - if err := eg.Wait(); err != nil { - summary.CollectFailureUnit("file", err) - log.Error( - "restore files failed", - zap.Error(err), - ) - return errors.Trace(err) - } - // Once the parent context canceled and there is no task running in the errgroup, - // we may break the for loop without error in the errgroup. (Will this happen?) - // At that time, return the error in the context here. - return ctx.Err() -} - func (rc *SnapClient) execChecksum( ctx context.Context, tbl *CreatedTable, @@ -1131,7 +1060,7 @@ func (rc *SnapClient) WaitForFilesRestored(ctx context.Context, files []*backupp log.Info("import sst files done", logutil.Files(files)) updateCh.Inc() }() - return rc.fileImporter.ImportSSTFiles(ectx, []*backuppb.File{fileReplica}, restoreutils.EmptyRewriteRule(), rc.cipher, rc.backupMeta.ApiVersion) + return rc.fileImporter.ImportSSTFiles(ectx, []TableIDWithFiles{{Files: []*backuppb.File{fileReplica}, RewriteRule: restoreutils.EmptyRewriteRule()}}, rc.cipher, rc.backupMeta.ApiVersion) }) } if err := eg.Wait(); err != nil { @@ -1168,33 +1097,3 @@ func (rc *SnapClient) RestoreRaw( ) return nil } - -// SplitRanges implements TiKVRestorer. It splits region by -// data range after rewrite. -func (rc *SnapClient) SplitRanges( - ctx context.Context, - ranges []rtree.Range, - updateCh glue.Progress, - isRawKv bool, -) error { - splitClientOpts := make([]split.ClientOptionalParameter, 0, 2) - splitClientOpts = append(splitClientOpts, split.WithOnSplit(func(keys [][]byte) { - for range keys { - updateCh.Inc() - } - })) - if isRawKv { - splitClientOpts = append(splitClientOpts, split.WithRawKV()) - } - - splitter := internalutils.NewRegionSplitter(split.NewClient( - rc.pdClient, - rc.pdHTTPClient, - rc.tlsConf, - maxSplitKeysOnce, - rc.storeCount+1, - splitClientOpts..., - )) - - return splitter.ExecuteSplit(ctx, ranges) -} diff --git a/br/pkg/restore/snap_client/client_test.go b/br/pkg/restore/snap_client/client_test.go index 12b91b4937556..b43324d9fca64 100644 --- a/br/pkg/restore/snap_client/client_test.go +++ b/br/pkg/restore/snap_client/client_test.go @@ -77,7 +77,7 @@ func TestCreateTables(t *testing.T) { }, } } - rules, newTables, err := client.CreateTables(m.Domain, tables, 0) + rules, newTables, err := client.CreateTablesTest(m.Domain, tables, 0) require.NoError(t, err) // make sure tables and newTables have same order for i, tbl := range tables { @@ -191,7 +191,7 @@ func TestCheckTargetClusterFreshWithTable(t *testing.T) { Collate: "utf8mb4_bin", }, } - _, _, err = client.CreateTables(cluster.Domain, []*metautil.Table{table}, 0) + _, _, err = client.CreateTablesTest(cluster.Domain, []*metautil.Table{table}, 0) require.NoError(t, err) require.True(t, berrors.ErrRestoreNotFreshCluster.Equal(client.CheckTargetClusterFresh(ctx))) diff --git a/br/pkg/restore/snap_client/export_test.go b/br/pkg/restore/snap_client/export_test.go index c519fd0421200..ff6bb30e4920f 100644 --- a/br/pkg/restore/snap_client/export_test.go +++ b/br/pkg/restore/snap_client/export_test.go @@ -35,6 +35,8 @@ var ( GetSSTMetaFromFile = getSSTMetaFromFile GetKeyRangeByMode = getKeyRangeByMode + MapTableToFiles = mapTableToFiles + GetFileRangeKey = getFileRangeKey ) // MockClient create a fake Client used to test. @@ -55,7 +57,7 @@ func MockCallSetSpeedLimit(ctx context.Context, fakeImportClient importclient.Im } // CreateTables creates multiple tables, and returns their rewrite rules. -func (rc *SnapClient) CreateTables( +func (rc *SnapClient) CreateTablesTest( dom *domain.Domain, tables []*metautil.Table, newTS uint64, @@ -65,28 +67,22 @@ func (rc *SnapClient) CreateTables( Data: make([]*import_sstpb.RewriteRule, 0), } newTables := make([]*model.TableInfo, 0, len(tables)) - errCh := make(chan error, 1) tbMapping := map[string]int{} for i, t := range tables { tbMapping[t.Info.Name.String()] = i } - dataCh := rc.GoCreateTables(context.TODO(), tables, newTS, errCh) - for et := range dataCh { - rules := et.RewriteRule + createdTables, err := rc.CreateTables(context.TODO(), tables, newTS) + if err != nil { + return nil, nil, err + } + for _, table := range createdTables { + rules := table.RewriteRule rewriteRules.Data = append(rewriteRules.Data, rules.Data...) - newTables = append(newTables, et.Table) + newTables = append(newTables, table.Table) } // Let's ensure that it won't break the original order. slices.SortFunc(newTables, func(i, j *model.TableInfo) int { return cmp.Compare(tbMapping[i.Name.String()], tbMapping[j.Name.String()]) }) - - select { - case err, ok := <-errCh: - if ok { - return nil, nil, errors.Trace(err) - } - default: - } return rewriteRules, newTables, nil } diff --git a/br/pkg/restore/snap_client/import.go b/br/pkg/restore/snap_client/import.go index cdab5a678628a..24b44a1430d5b 100644 --- a/br/pkg/restore/snap_client/import.go +++ b/br/pkg/restore/snap_client/import.go @@ -42,7 +42,6 @@ import ( "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/util/codec" kvutil "github.com/tikv/client-go/v2/util" - "go.uber.org/multierr" "go.uber.org/zap" "golang.org/x/exp/maps" "golang.org/x/sync/errgroup" @@ -275,8 +274,7 @@ func getKeyRangeByMode(mode KvMode) func(f *backuppb.File, rules *restoreutils.R // getKeyRangeForFiles gets the maximum range on files. func (importer *SnapFileImporter) getKeyRangeForFiles( - files []*backuppb.File, - rewriteRules *restoreutils.RewriteRules, + filesGroup []TableIDWithFiles, ) ([]byte, []byte, error) { var ( startKey, endKey []byte @@ -284,21 +282,21 @@ func (importer *SnapFileImporter) getKeyRangeForFiles( err error ) getRangeFn := getKeyRangeByMode(importer.kvMode) - for _, f := range files { - start, end, err = getRangeFn(f, rewriteRules) - if err != nil { - return nil, nil, errors.Trace(err) - } - if len(startKey) == 0 || bytes.Compare(start, startKey) < 0 { - startKey = start - } - if len(endKey) == 0 || bytes.Compare(endKey, end) < 0 { - endKey = end + for _, files := range filesGroup { + for _, f := range files.Files { + start, end, err = getRangeFn(f, files.RewriteRule) + if err != nil { + return nil, nil, errors.Trace(err) + } + if len(startKey) == 0 || bytes.Compare(start, startKey) < 0 { + startKey = start + } + if len(endKey) == 0 || bytes.Compare(endKey, end) < 0 { + endKey = end + } } } - log.Debug("rewrite file keys", logutil.Files(files), - logutil.Key("startKey", startKey), logutil.Key("endKey", endKey)) return startKey, endKey, nil } @@ -306,16 +304,12 @@ func (importer *SnapFileImporter) getKeyRangeForFiles( // All rules must contain encoded keys. func (importer *SnapFileImporter) ImportSSTFiles( ctx context.Context, - files []*backuppb.File, - rewriteRules *restoreutils.RewriteRules, + filesGroup []TableIDWithFiles, cipher *backuppb.CipherInfo, apiVersion kvrpcpb.APIVersion, ) error { - start := time.Now() - log.Debug("import file", logutil.Files(files)) - // Rewrite the start key and end key of file to scan regions - startKey, endKey, err := importer.getKeyRangeForFiles(files, rewriteRules) + startKey, endKey, err := importer.getKeyRangeForFiles(filesGroup) if err != nil { return errors.Trace(err) } @@ -328,69 +322,53 @@ func (importer *SnapFileImporter) ImportSSTFiles( return errors.Trace(errScanRegion) } - log.Debug("scan regions", logutil.Files(files), zap.Int("count", len(regionInfos))) + log.Debug("scan regions", logutil.Key("start key", startKey), logutil.Key("end key", endKey), zap.Int("count", len(regionInfos))) + start := time.Now() // Try to download and ingest the file in every region - regionLoop: for _, regionInfo := range regionInfos { info := regionInfo // Try to download file. - downloadMetas, errDownload := importer.download(ctx, info, files, rewriteRules, cipher, apiVersion) + downloadMetas, errDownload := importer.download(ctx, info, filesGroup, cipher, apiVersion) if errDownload != nil { - for _, e := range multierr.Errors(errDownload) { - switch errors.Cause(e) { // nolint:errorlint - case berrors.ErrKVRewriteRuleNotFound, berrors.ErrKVRangeIsEmpty: - // Skip this region - log.Warn("download file skipped", - logutil.Files(files), - logutil.Region(info.Region), - logutil.Key("startKey", startKey), - logutil.Key("endKey", endKey), - logutil.Key("file-simple-start", files[0].StartKey), - logutil.Key("file-simple-end", files[0].EndKey), - logutil.ShortError(e)) - continue regionLoop - } - } log.Warn("download file failed, retry later", - logutil.Files(files), logutil.Region(info.Region), logutil.Key("startKey", startKey), logutil.Key("endKey", endKey), logutil.ShortError(errDownload)) return errors.Trace(errDownload) } - log.Debug("download file done", - zap.String("file-sample", files[0].Name), zap.Stringer("take", time.Since(start)), - logutil.Key("start", files[0].StartKey), logutil.Key("end", files[0].EndKey)) + log.Debug("download file done", zap.Stringer("take", time.Since(start)), + logutil.Key("start", startKey), logutil.Key("end", endKey)) start = time.Now() - if errIngest := importer.ingest(ctx, files, info, downloadMetas); errIngest != nil { + if errIngest := importer.ingest(ctx, info, downloadMetas); errIngest != nil { log.Warn("ingest file failed, retry later", - logutil.Files(files), + logutil.Key("start", startKey), + logutil.Key("end", endKey), logutil.SSTMetas(downloadMetas), logutil.Region(info.Region), zap.Error(errIngest)) return errors.Trace(errIngest) } - log.Debug("ingest file done", zap.String("file-sample", files[0].Name), zap.Stringer("take", time.Since(start))) - } - - for _, f := range files { - summary.CollectSuccessUnit(summary.TotalKV, 1, f.TotalKvs) - summary.CollectSuccessUnit(summary.TotalBytes, 1, f.TotalBytes) + log.Debug("ingest file done", logutil.Key("start", startKey), logutil.Key("end", endKey), zap.Stringer("take", time.Since(start))) } return nil }, utils.NewImportSSTBackoffer()) if err != nil { - log.Error("import sst file failed after retry, stop the whole progress", logutil.Files(files), zap.Error(err)) + log.Error("import sst file failed after retry, stop the whole progress", zapFilesGroup(filesGroup), zap.Error(err)) return errors.Trace(err) } + for _, files := range filesGroup { + for _, f := range files.Files { + summary.CollectSuccessUnit(summary.TotalKV, 1, f.TotalKvs) + summary.CollectSuccessUnit(summary.TotalBytes, 1, f.TotalBytes) + } + } return nil } // getSSTMetaFromFile compares the keys in file, region and rewrite rules, then returns a sst conn. // The range of the returned sst meta is [regionRule.NewKeyPrefix, append(regionRule.NewKeyPrefix, 0xff)]. func getSSTMetaFromFile( - id []byte, file *backuppb.File, region *metapb.Region, regionRule *import_sstpb.RewriteRule, @@ -452,8 +430,9 @@ func getSSTMetaFromFile( logutil.Key("startKey", rangeStart), logutil.Key("endKey", rangeEnd)) + uid := uuid.New() return &import_sstpb.SSTMeta{ - Uuid: id, + Uuid: uid[:], CfName: cfName, Range: &import_sstpb.Range{ Start: rangeStart, @@ -472,21 +451,18 @@ func getSSTMetaFromFile( func (importer *SnapFileImporter) download( ctx context.Context, regionInfo *split.RegionInfo, - files []*backuppb.File, - rewriteRules *restoreutils.RewriteRules, + filesGroup []TableIDWithFiles, cipher *backuppb.CipherInfo, apiVersion kvrpcpb.APIVersion, ) ([]*import_sstpb.SSTMeta, error) { - var ( - downloadMetas = make([]*import_sstpb.SSTMeta, 0, len(files)) - ) + var downloadMetas []*import_sstpb.SSTMeta errDownload := utils.WithRetry(ctx, func() error { var e error // we treat Txn kv file as Raw kv file. because we don't have table id to decode if importer.kvMode == Raw || importer.kvMode == Txn { - downloadMetas, e = importer.downloadRawKVSST(ctx, regionInfo, files, cipher, apiVersion) + downloadMetas, e = importer.downloadRawKVSST(ctx, regionInfo, filesGroup, cipher, apiVersion) } else { - downloadMetas, e = importer.downloadSST(ctx, regionInfo, files, rewriteRules, cipher, apiVersion) + downloadMetas, e = importer.downloadSST(ctx, regionInfo, filesGroup, cipher, apiVersion) } failpoint.Inject("restore-storage-error", func(val failpoint.Value) { @@ -499,11 +475,11 @@ func (importer *SnapFileImporter) download( e = status.Error(codes.Unavailable, "the connection to TiKV has been cut by a neko, meow :3") }) if isDecryptSstErr(e) { - log.Info("fail to decrypt when download sst, try again with no-crypt", logutil.Files(files)) + log.Info("fail to decrypt when download sst, try again with no-crypt", zapFilesGroup(filesGroup)) if importer.kvMode == Raw || importer.kvMode == Txn { - downloadMetas, e = importer.downloadRawKVSST(ctx, regionInfo, files, nil, apiVersion) + downloadMetas, e = importer.downloadRawKVSST(ctx, regionInfo, filesGroup, nil, apiVersion) } else { - downloadMetas, e = importer.downloadSST(ctx, regionInfo, files, rewriteRules, nil, apiVersion) + downloadMetas, e = importer.downloadSST(ctx, regionInfo, filesGroup, nil, apiVersion) } } if e != nil { @@ -516,18 +492,28 @@ func (importer *SnapFileImporter) download( return downloadMetas, errDownload } +// Notice that the KvMode must be TiDB. func (importer *SnapFileImporter) buildDownloadRequest( file *backuppb.File, rewriteRules *restoreutils.RewriteRules, regionInfo *split.RegionInfo, cipher *backuppb.CipherInfo, -) (*import_sstpb.DownloadRequest, import_sstpb.SSTMeta, error) { - uid := uuid.New() - id := uid[:] +) (*import_sstpb.DownloadRequest, import_sstpb.SSTMeta, bool, error) { // Get the rewrite rule for the file. fileRule := restoreutils.FindMatchedRewriteRule(file, rewriteRules) if fileRule == nil { - return nil, import_sstpb.SSTMeta{}, errors.Trace(berrors.ErrKVRewriteRuleNotFound) + log.Warn("download file skipped", logutil.Region(regionInfo.Region), zap.Error(berrors.ErrKVRewriteRuleNotFound)) + return nil, import_sstpb.SSTMeta{}, true, nil + } + + // Check whether the range of the file overlaps with the region + encodedStartKey := restoreutils.RewriteAndEncodeRawKey(file.StartKey, fileRule) + if len(regionInfo.Region.EndKey) > 0 && bytes.Compare(encodedStartKey, regionInfo.Region.EndKey) >= 0 { + return nil, import_sstpb.SSTMeta{}, true, nil + } + encodedEndKey := restoreutils.RewriteAndEncodeRawKey(file.EndKey, fileRule) + if bytes.Compare(encodedEndKey, regionInfo.Region.StartKey) <= 0 { + return nil, import_sstpb.SSTMeta{}, true, nil } // For the legacy version of TiKV, we need to encode the key prefix, since in the legacy @@ -544,9 +530,9 @@ func (importer *SnapFileImporter) buildDownloadRequest( rule.NewKeyPrefix = restoreutils.EncodeKeyPrefix(fileRule.GetNewKeyPrefix()) } - sstMeta, err := getSSTMetaFromFile(id, file, regionInfo.Region, &rule, importer.rewriteMode) + sstMeta, err := getSSTMetaFromFile(file, regionInfo.Region, &rule, importer.rewriteMode) if err != nil { - return nil, import_sstpb.SSTMeta{}, err + return nil, import_sstpb.SSTMeta{}, false, err } req := &import_sstpb.DownloadRequest{ @@ -565,14 +551,13 @@ func (importer *SnapFileImporter) buildDownloadRequest( RequestSource: kvutil.BuildRequestSource(true, kv.InternalTxnBR, kvutil.ExplicitTypeBR), }, } - return req, *sstMeta, nil + return req, *sstMeta, false, nil } func (importer *SnapFileImporter) downloadSST( ctx context.Context, regionInfo *split.RegionInfo, - files []*backuppb.File, - rewriteRules *restoreutils.RewriteRules, + filesGroup []TableIDWithFiles, cipher *backuppb.CipherInfo, apiVersion kvrpcpb.APIVersion, ) ([]*import_sstpb.SSTMeta, error) { @@ -580,14 +565,20 @@ func (importer *SnapFileImporter) downloadSST( downloadMetasMap := make(map[string]import_sstpb.SSTMeta) resultMetasMap := make(map[string]*import_sstpb.SSTMeta) downloadReqsMap := make(map[string]*import_sstpb.DownloadRequest) - for _, file := range files { - req, sstMeta, err := importer.buildDownloadRequest(file, rewriteRules, regionInfo, cipher) - if err != nil { - return nil, errors.Trace(err) + for _, files := range filesGroup { + for _, file := range files.Files { + req, sstMeta, skip, err := importer.buildDownloadRequest(file, files.RewriteRule, regionInfo, cipher) + if err != nil { + return nil, errors.Trace(err) + } + // the range of the file does not overlap with the region + if skip { + continue + } + sstMeta.ApiVersion = apiVersion + downloadMetasMap[file.Name] = sstMeta + downloadReqsMap[file.Name] = req } - sstMeta.ApiVersion = apiVersion - downloadMetasMap[file.Name] = sstMeta - downloadReqsMap[file.Name] = req } eg, ectx := errgroup.WithContext(ctx) @@ -603,11 +594,7 @@ func (importer *SnapFileImporter) downloadSST( defer func() { importer.releaseToken(tokenCh) }() - for _, file := range files { - req, ok := downloadReqsMap[file.Name] - if !ok { - return errors.New("not found file key for download request") - } + for fileName, req := range downloadReqsMap { var err error var resp *import_sstpb.DownloadResponse resp, err = utils.WithRetryV2(ectx, utils.NewDownloadSSTBackoffer(), func(ctx context.Context) (*import_sstpb.DownloadResponse, error) { @@ -622,31 +609,32 @@ func (importer *SnapFileImporter) downloadSST( return errors.Annotate(berrors.ErrKVDownloadFailed, resp.GetError().GetMessage()) } if resp.GetIsEmpty() { - return errors.Trace(berrors.ErrKVRangeIsEmpty) + log.Warn("download file skipped", zap.String("filename", fileName), + logutil.Region(regionInfo.Region), zap.Error(berrors.ErrKVRangeIsEmpty)) + continue } mu.Lock() - sstMeta, ok := downloadMetasMap[file.Name] + sstMeta, ok := downloadMetasMap[fileName] if !ok { mu.Unlock() - return errors.Errorf("not found file %s for download sstMeta", file.Name) + return errors.Errorf("not found file %s for download sstMeta", fileName) } sstMeta.Range = &import_sstpb.Range{ Start: restoreutils.TruncateTS(resp.Range.GetStart()), End: restoreutils.TruncateTS(resp.Range.GetEnd()), } - resultMetasMap[file.Name] = &sstMeta + resultMetasMap[fileName] = &sstMeta mu.Unlock() log.Debug("download from peer", + zap.String("filename", fileName), logutil.Region(regionInfo.Region), - logutil.File(file), logutil.Peer(peer), logutil.Key("resp-range-start", resp.Range.Start), logutil.Key("resp-range-end", resp.Range.End), zap.Bool("resp-isempty", resp.IsEmpty), zap.Uint32("resp-crc32", resp.Crc32), - zap.Int("len files", len(files)), ) } return nil @@ -661,85 +649,94 @@ func (importer *SnapFileImporter) downloadSST( func (importer *SnapFileImporter) downloadRawKVSST( ctx context.Context, regionInfo *split.RegionInfo, - files []*backuppb.File, + filesGroup []TableIDWithFiles, cipher *backuppb.CipherInfo, apiVersion kvrpcpb.APIVersion, ) ([]*import_sstpb.SSTMeta, error) { - downloadMetas := make([]*import_sstpb.SSTMeta, 0, len(files)) - for _, file := range files { - uid := uuid.New() - id := uid[:] - // Empty rule - var rule import_sstpb.RewriteRule - sstMeta, err := getSSTMetaFromFile(id, file, regionInfo.Region, &rule, RewriteModeLegacy) - if err != nil { - return nil, err - } + downloadMetas := make([]*import_sstpb.SSTMeta, 0, len(filesGroup)*2) + for _, files := range filesGroup { + for _, file := range files.Files { + // Empty rule + var rule import_sstpb.RewriteRule + sstMeta, err := getSSTMetaFromFile(file, regionInfo.Region, &rule, RewriteModeLegacy) + if err != nil { + return nil, err + } - // Cut the SST file's range to fit in the restoring range. - if bytes.Compare(importer.rawStartKey, sstMeta.Range.GetStart()) > 0 { - sstMeta.Range.Start = importer.rawStartKey - } - if len(importer.rawEndKey) > 0 && - (len(sstMeta.Range.GetEnd()) == 0 || bytes.Compare(importer.rawEndKey, sstMeta.Range.GetEnd()) <= 0) { - sstMeta.Range.End = importer.rawEndKey - sstMeta.EndKeyExclusive = true - } - if bytes.Compare(sstMeta.Range.GetStart(), sstMeta.Range.GetEnd()) > 0 { - return nil, errors.Trace(berrors.ErrKVRangeIsEmpty) - } + // Cut the SST file's range to fit in the restoring range. + if bytes.Compare(importer.rawStartKey, sstMeta.Range.GetStart()) > 0 { + sstMeta.Range.Start = importer.rawStartKey + } + if len(importer.rawEndKey) > 0 && + (len(sstMeta.Range.GetEnd()) == 0 || bytes.Compare(importer.rawEndKey, sstMeta.Range.GetEnd()) <= 0) { + sstMeta.Range.End = importer.rawEndKey + sstMeta.EndKeyExclusive = true + } + if bytes.Compare(sstMeta.Range.GetStart(), sstMeta.Range.GetEnd()) > 0 { + log.Warn("download file skipped", zap.String("filename", file.Name), + logutil.Region(regionInfo.Region), zap.Error(berrors.ErrKVRangeIsEmpty)) + continue + } - req := &import_sstpb.DownloadRequest{ - Sst: *sstMeta, - StorageBackend: importer.backend, - Name: file.GetName(), - RewriteRule: rule, - IsRawKv: true, - CipherInfo: cipher, - StorageCacheId: importer.cacheKey, - } - log.Debug("download SST", logutil.SSTMeta(sstMeta), logutil.Region(regionInfo.Region)) - - var atomicResp atomic.Pointer[import_sstpb.DownloadResponse] - eg, ectx := errgroup.WithContext(ctx) - for _, p := range regionInfo.Region.GetPeers() { - peer := p - eg.Go(func() error { - resp, err := importer.importClient.DownloadSST(ectx, peer.GetStoreId(), req) - if err != nil { - return errors.Trace(err) - } - if resp.GetError() != nil { - return errors.Annotate(berrors.ErrKVDownloadFailed, resp.GetError().GetMessage()) - } - if resp.GetIsEmpty() { - return errors.Trace(berrors.ErrKVRangeIsEmpty) - } + req := &import_sstpb.DownloadRequest{ + Sst: *sstMeta, + StorageBackend: importer.backend, + Name: file.GetName(), + RewriteRule: rule, + IsRawKv: true, + CipherInfo: cipher, + StorageCacheId: importer.cacheKey, + } + log.Debug("download SST", logutil.SSTMeta(sstMeta), logutil.Region(regionInfo.Region)) + + var atomicResp atomic.Pointer[import_sstpb.DownloadResponse] + eg, ectx := errgroup.WithContext(ctx) + for _, p := range regionInfo.Region.GetPeers() { + peer := p + eg.Go(func() error { + resp, err := importer.importClient.DownloadSST(ectx, peer.GetStoreId(), req) + if err != nil { + return errors.Trace(err) + } + if resp.GetError() != nil { + return errors.Annotate(berrors.ErrKVDownloadFailed, resp.GetError().GetMessage()) + } + if resp.GetIsEmpty() { + log.Warn("download file skipped", zap.String("filename", file.Name), + logutil.Region(regionInfo.Region), zap.Error(berrors.ErrKVRangeIsEmpty)) + return nil + } - atomicResp.Store(resp) - return nil - }) - } + atomicResp.Store(resp) + return nil + }) + } - if err := eg.Wait(); err != nil { - return nil, err - } + if err := eg.Wait(); err != nil { + return nil, err + } - downloadResp := atomicResp.Load() - sstMeta.Range.Start = downloadResp.Range.GetStart() - sstMeta.Range.End = downloadResp.Range.GetEnd() - sstMeta.ApiVersion = apiVersion - downloadMetas = append(downloadMetas, sstMeta) + downloadResp := atomicResp.Load() + if downloadResp == nil { + continue + } + sstMeta.Range.Start = downloadResp.Range.GetStart() + sstMeta.Range.End = downloadResp.Range.GetEnd() + sstMeta.ApiVersion = apiVersion + downloadMetas = append(downloadMetas, sstMeta) + } } return downloadMetas, nil } func (importer *SnapFileImporter) ingest( ctx context.Context, - files []*backuppb.File, info *split.RegionInfo, downloadMetas []*import_sstpb.SSTMeta, ) error { + if len(downloadMetas) == 0 { + return nil + } tokenCh := importer.ingestTokensMap.acquireTokenCh(info.Leader.GetStoreId(), importer.concurrencyPerStore) select { case <-ctx.Done(): @@ -780,7 +777,6 @@ func (importer *SnapFileImporter) ingest( } // do not get region info, wait a second and GetRegion() again. log.Warn("ingest get region by key return nil", logutil.Region(info.Region), - logutil.Files(files), logutil.SSTMetas(downloadMetas), ) time.Sleep(time.Second) @@ -791,7 +787,6 @@ func (importer *SnapFileImporter) ingest( return errors.Trace(berrors.ErrKVEpochNotMatch) } log.Debug("ingest sst returns not leader error, retry it", - logutil.Files(files), logutil.SSTMetas(downloadMetas), logutil.Region(info.Region), zap.Stringer("newLeader", newInfo.Leader)) diff --git a/br/pkg/restore/snap_client/import_test.go b/br/pkg/restore/snap_client/import_test.go index 71d5a758f39c7..38600a1ae9d46 100644 --- a/br/pkg/restore/snap_client/import_test.go +++ b/br/pkg/restore/snap_client/import_test.go @@ -103,7 +103,7 @@ func TestGetSSTMetaFromFile(t *testing.T) { StartKey: []byte("t2abc"), EndKey: []byte("t3a"), } - sstMeta, err := snapclient.GetSSTMetaFromFile([]byte{}, file, region, rule, snapclient.RewriteModeLegacy) + sstMeta, err := snapclient.GetSSTMetaFromFile(file, region, rule, snapclient.RewriteModeLegacy) require.Nil(t, err) require.Equal(t, "t2abc", string(sstMeta.GetRange().GetStart())) require.Equal(t, "t2\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff", string(sstMeta.GetRange().GetEnd())) @@ -171,7 +171,7 @@ func TestSnapImporter(t *testing.T) { files, rules := generateFiles() for _, file := range files { importer.WaitUntilUnblock() - err = importer.ImportSSTFiles(ctx, []*backuppb.File{file}, rules, nil, kvrpcpb.APIVersion_V1) + err = importer.ImportSSTFiles(ctx, []snapclient.TableIDWithFiles{{Files: []*backuppb.File{file}, RewriteRule: rules}}, nil, kvrpcpb.APIVersion_V1) require.NoError(t, err) } err = importer.Close() @@ -192,7 +192,7 @@ func TestSnapImporterRaw(t *testing.T) { files, rules := generateFiles() for _, file := range files { importer.WaitUntilUnblock() - err = importer.ImportSSTFiles(ctx, []*backuppb.File{file}, rules, nil, kvrpcpb.APIVersion_V1) + err = importer.ImportSSTFiles(ctx, []snapclient.TableIDWithFiles{{Files: []*backuppb.File{file}, RewriteRule: rules}}, nil, kvrpcpb.APIVersion_V1) require.NoError(t, err) } err = importer.Close() diff --git a/br/pkg/restore/snap_client/pipeline_items.go b/br/pkg/restore/snap_client/pipeline_items.go index 79f47d3a9c71c..b3da9c87660ca 100644 --- a/br/pkg/restore/snap_client/pipeline_items.go +++ b/br/pkg/restore/snap_client/pipeline_items.go @@ -16,23 +16,17 @@ package snapclient import ( "context" - "sort" - "sync" "time" - "github.com/opentracing/opentracing-go" "github.com/pingcap/errors" backuppb "github.com/pingcap/kvproto/pkg/brpb" "github.com/pingcap/log" "github.com/pingcap/tidb/br/pkg/glue" "github.com/pingcap/tidb/br/pkg/logutil" "github.com/pingcap/tidb/br/pkg/metautil" - tidallocdb "github.com/pingcap/tidb/br/pkg/restore/internal/prealloc_db" restoreutils "github.com/pingcap/tidb/br/pkg/restore/utils" - "github.com/pingcap/tidb/br/pkg/rtree" "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/br/pkg/summary" - "github.com/pingcap/tidb/br/pkg/utils" "github.com/pingcap/tidb/pkg/domain/infosync" "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/parser/model" @@ -40,6 +34,7 @@ import ( "github.com/pingcap/tidb/pkg/util/engine" pdhttp "github.com/tikv/pd/client/http" "go.uber.org/zap" + "go.uber.org/zap/zapcore" "golang.org/x/sync/errgroup" ) @@ -49,31 +44,6 @@ const defaultChannelSize = 1024 // checksum tasks. const defaultChecksumConcurrency = 64 -// TableSink is the 'sink' of restored data by a sender. -type TableSink interface { - EmitTables(tables ...CreatedTable) - EmitError(error) - Close() -} - -type chanTableSink struct { - outCh chan<- []CreatedTable - errCh chan<- error -} - -func (sink chanTableSink) EmitTables(tables ...CreatedTable) { - sink.outCh <- tables -} - -func (sink chanTableSink) EmitError(err error) { - sink.errCh <- err -} - -func (sink chanTableSink) Close() { - // ErrCh may has multi sender part, don't close it. - close(sink.outCh) -} - // CreatedTable is a table created on restore process, // but not yet filled with data. type CreatedTable struct { @@ -82,18 +52,6 @@ type CreatedTable struct { OldTable *metautil.Table } -func defaultOutputTableChan() chan *CreatedTable { - return make(chan *CreatedTable, defaultChannelSize) -} - -// TableWithRange is a CreatedTable that has been bind to some of key ranges. -type TableWithRange struct { - CreatedTable - - // Range has been rewrited by rewrite rules. - Range []rtree.Range -} - type TableIDWithFiles struct { TableID int64 @@ -101,252 +59,45 @@ type TableIDWithFiles struct { // RewriteRules is the rewrite rules for the specify table. // because these rules belongs to the *one table*. // we can hold them here. - RewriteRules *restoreutils.RewriteRules -} - -// BatchSender is the abstract of how the batcher send a batch. -type BatchSender interface { - // PutSink sets the sink of this sender, user to this interface promise - // call this function at least once before first call to `RestoreBatch`. - PutSink(sink TableSink) - // RestoreBatch will send the restore request. - RestoreBatch(ranges DrainResult) - Close() -} - -// TiKVRestorer is the minimal methods required for restoring. -// It contains the primitive APIs extract from `restore.Client`, so some of arguments may seem redundant. -// Maybe TODO: make a better abstraction? -type TiKVRestorer interface { - // SplitRanges split regions implicated by the ranges and rewrite rules. - // After spliting, it also scatters the fresh regions. - SplitRanges(ctx context.Context, - ranges []rtree.Range, - updateCh glue.Progress, - isRawKv bool) error - // RestoreSSTFiles import the files to the TiKV. - RestoreSSTFiles(ctx context.Context, - tableIDWithFiles []TableIDWithFiles, - updateCh glue.Progress) error -} - -type tikvSender struct { - client TiKVRestorer - - updateCh glue.Progress - - sink TableSink - inCh chan<- DrainResult - - wg *sync.WaitGroup - - tableWaiters *sync.Map -} - -func (b *tikvSender) PutSink(sink TableSink) { - // don't worry about visibility, since we will call this before first call to - // RestoreBatch, which is a sync point. - b.sink = sink -} - -func (b *tikvSender) RestoreBatch(ranges DrainResult) { - log.Info("restore batch: waiting ranges", zap.Int("range", len(b.inCh))) - b.inCh <- ranges -} - -// NewTiKVSender make a sender that send restore requests to TiKV. -func NewTiKVSender( - ctx context.Context, - cli TiKVRestorer, - updateCh glue.Progress, - splitConcurrency uint, -) (BatchSender, error) { - inCh := make(chan DrainResult, defaultChannelSize) - midCh := make(chan drainResultAndDone, defaultChannelSize) - - sender := &tikvSender{ - client: cli, - updateCh: updateCh, - inCh: inCh, - wg: new(sync.WaitGroup), - tableWaiters: new(sync.Map), - } - - sender.wg.Add(2) - go sender.splitWorker(ctx, inCh, midCh, splitConcurrency) - outCh := make(chan drainResultAndDone, defaultChannelSize) - // block on splitting and scattering regions. - // in coarse-grained mode, wait all regions are split and scattered is - // no longer a time-consuming operation, then we can batch download files - // as much as enough and reduce the time of blocking restore. - go sender.blockPipelineWorker(ctx, midCh, outCh) - go sender.restoreWorker(ctx, outCh) - return sender, nil -} - -func (b *tikvSender) Close() { - close(b.inCh) - b.wg.Wait() - log.Debug("tikv sender closed") -} - -type drainResultAndDone struct { - result DrainResult - done func() -} - -func (b *tikvSender) blockPipelineWorker(ctx context.Context, - inCh <-chan drainResultAndDone, - outCh chan<- drainResultAndDone, -) { - defer close(outCh) - res := make([]drainResultAndDone, 0, defaultChannelSize) - for dr := range inCh { - res = append(res, dr) - } - - for _, dr := range res { - select { - case <-ctx.Done(): - return - default: - outCh <- dr - } - } + RewriteRule *restoreutils.RewriteRules } -func (b *tikvSender) splitWorker(ctx context.Context, - ranges <-chan DrainResult, - next chan<- drainResultAndDone, - concurrency uint, -) { - defer log.Debug("split worker closed") - eg, ectx := errgroup.WithContext(ctx) - defer func() { - b.wg.Done() - if err := eg.Wait(); err != nil { - b.sink.EmitError(err) - } - close(next) - log.Info("TiKV Sender: split worker exits.") - }() - - start := time.Now() - defer func() { - elapsed := time.Since(start) - summary.CollectDuration("split region", elapsed) - }() +type zapFilesGroupMarshaler []TableIDWithFiles - pool := tidbutil.NewWorkerPool(concurrency, "split") - for { - select { - case <-ectx.Done(): - return - case result, ok := <-ranges: - if !ok { - return - } - // When the batcher has sent all ranges from a table, it would - // mark this table 'all done'(BlankTablesAfterSend), and then we can send it to checksum. - // - // When there a sole worker sequentially running those batch tasks, everything is fine, however, - // in the context of multi-workers, that become buggy, for example: - // |------table 1, ranges 1------|------table 1, ranges 2------| - // The batcher send batches: [ - // {Ranges: ranges 1}, - // {Ranges: ranges 2, BlankTablesAfterSend: table 1} - // ] - // And there are two workers runs concurrently: - // worker 1: {Ranges: ranges 1} - // worker 2: {Ranges: ranges 2, BlankTablesAfterSend: table 1} - // And worker 2 finished its job before worker 1 done. Note the table wasn't restored fully, - // hence the checksum would fail. - done := b.registerTableIsRestoring(result.TablesToSend) - pool.ApplyOnErrorGroup(eg, func() error { - err := b.client.SplitRanges(ectx, result.Ranges, b.updateCh, false) - if err != nil { - log.Error("failed on split range", rtree.ZapRanges(result.Ranges), zap.Error(err)) - return err - } - next <- drainResultAndDone{ - result: result, - done: done, - } - return nil - }) - } - } +// MarshalLogObjectForFiles is an internal util function to zap something having `Files` field. +func MarshalLogObjectForFiles(files []TableIDWithFiles, encoder zapcore.ObjectEncoder) error { + return zapFilesGroupMarshaler(files).MarshalLogObject(encoder) } -// registerTableIsRestoring marks some tables as 'current restoring'. -// Returning a function that mark the restore has been done. -func (b *tikvSender) registerTableIsRestoring(ts []CreatedTable) func() { - wgs := make([]*sync.WaitGroup, 0, len(ts)) - for _, t := range ts { - i, _ := b.tableWaiters.LoadOrStore(t.Table.ID, new(sync.WaitGroup)) - wg := i.(*sync.WaitGroup) - wg.Add(1) - wgs = append(wgs, wg) - } - return func() { - for _, wg := range wgs { - wg.Done() +func (fgs zapFilesGroupMarshaler) MarshalLogObject(encoder zapcore.ObjectEncoder) error { + elements := make([]string, 0) + total := 0 + totalKVs := uint64(0) + totalBytes := uint64(0) + totalSize := uint64(0) + for _, fg := range fgs { + for _, f := range fg.Files { + total += 1 + elements = append(elements, f.GetName()) + totalKVs += f.GetTotalKvs() + totalBytes += f.GetTotalBytes() + totalSize += f.GetSize_() } } + encoder.AddInt("total", total) + _ = encoder.AddArray("files", logutil.AbbreviatedArrayMarshaler(elements)) + encoder.AddUint64("totalKVs", totalKVs) + encoder.AddUint64("totalBytes", totalBytes) + encoder.AddUint64("totalSize", totalSize) + return nil } -// waitTablesDone block the current goroutine, -// till all tables provided are no more ‘current restoring’. -func (b *tikvSender) waitTablesDone(ts []CreatedTable) { - for _, t := range ts { - wg, ok := b.tableWaiters.LoadAndDelete(t.Table.ID) - if !ok { - log.Panic("bug! table done before register!", - zap.Any("wait-table-map", b.tableWaiters), - zap.Stringer("table", t.Table.Name)) - } - wg.(*sync.WaitGroup).Wait() - } +func zapFilesGroup(filesGroup []TableIDWithFiles) zap.Field { + return zap.Object("files", zapFilesGroupMarshaler(filesGroup)) } -func (b *tikvSender) restoreWorker(ctx context.Context, ranges <-chan drainResultAndDone) { - eg, ectx := errgroup.WithContext(ctx) - defer func() { - log.Info("TiKV Sender: restore worker prepare to close.") - if err := eg.Wait(); err != nil { - b.sink.EmitError(err) - } - b.sink.Close() - b.wg.Done() - log.Info("TiKV Sender: restore worker exits.") - }() - for { - select { - case <-ectx.Done(): - return - case r, ok := <-ranges: - if !ok { - return - } - - files := r.result.Files() - // There has been a worker in the `RestoreSSTFiles` procedure. - // Spawning a raw goroutine won't make too many requests to TiKV. - eg.Go(func() error { - e := b.client.RestoreSSTFiles(ectx, files, b.updateCh) - if e != nil { - log.Error("restore batch meet error", logutil.ShortError(e), zapTableIDWithFiles(files)) - r.done() - return e - } - log.Info("restore batch done", rtree.ZapRanges(r.result.Ranges), zapTableIDWithFiles(files)) - r.done() - b.waitTablesDone(r.result.BlankTablesAfterSend) - b.sink.EmitTables(r.result.BlankTablesAfterSend...) - return nil - }) - } - } +func defaultOutputTableChan() chan *CreatedTable { + return make(chan *CreatedTable, defaultChannelSize) } func concurrentHandleTablesCh( @@ -390,189 +141,6 @@ func concurrentHandleTablesCh( } } -// GoCreateTables create tables, and generate their information. -// this function will use workers as the same number of sessionPool, -// leave sessionPool nil to send DDLs sequential. -func (rc *SnapClient) GoCreateTables( - ctx context.Context, - tables []*metautil.Table, - newTS uint64, - errCh chan<- error, -) <-chan CreatedTable { - // Could we have a smaller size of tables? - log.Info("start create tables") - - rc.generateRebasedTables(tables) - if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { - span1 := span.Tracer().StartSpan("Client.GoCreateTables", opentracing.ChildOf(span.Context())) - defer span1.Finish() - ctx = opentracing.ContextWithSpan(ctx, span1) - } - outCh := make(chan CreatedTable, len(tables)) - rater := logutil.TraceRateOver(logutil.MetricTableCreatedCounter) - - var err error - - if rc.batchDdlSize > minBatchDdlSize && len(rc.dbPool) > 0 { - err = rc.createTablesInWorkerPool(ctx, tables, newTS, outCh) - if err == nil { - defer log.Debug("all tables are created") - close(outCh) - return outCh - } else if !utils.FallBack2CreateTable(err) { - errCh <- err - close(outCh) - return outCh - } - // fall back to old create table (sequential create table) - log.Info("fall back to the sequential create table") - } - - createOneTable := func(c context.Context, db *tidallocdb.DB, t *metautil.Table) error { - select { - case <-c.Done(): - return c.Err() - default: - } - rt, err := rc.createTable(c, db, t, newTS) - if err != nil { - log.Error("create table failed", - zap.Error(err), - zap.Stringer("db", t.DB.Name), - zap.Stringer("table", t.Info.Name)) - return errors.Trace(err) - } - log.Debug("table created and send to next", - zap.Int("output chan size", len(outCh)), - zap.Stringer("table", t.Info.Name), - zap.Stringer("database", t.DB.Name)) - outCh <- rt - rater.Inc() - rater.L().Info("table created", - zap.Stringer("table", t.Info.Name), - zap.Stringer("database", t.DB.Name)) - return nil - } - go func() { - defer close(outCh) - defer log.Debug("all tables are created") - var err error - if len(rc.dbPool) > 0 { - err = rc.createTablesWithDBPool(ctx, createOneTable, tables) - } else { - err = rc.createTablesWithSoleDB(ctx, createOneTable, tables) - } - if err != nil { - errCh <- err - } - }() - - return outCh -} - -func (rc *SnapClient) GoBlockCreateTablesPipeline(ctx context.Context, sz int, inCh <-chan CreatedTable) <-chan CreatedTable { - outCh := make(chan CreatedTable, sz) - - go func() { - defer close(outCh) - cachedTables := make([]CreatedTable, 0, sz) - for tbl := range inCh { - cachedTables = append(cachedTables, tbl) - } - - sort.Slice(cachedTables, func(a, b int) bool { - return cachedTables[a].Table.ID < cachedTables[b].Table.ID - }) - - for _, tbl := range cachedTables { - select { - case <-ctx.Done(): - return - default: - outCh <- tbl - } - } - }() - return outCh -} - -// GoValidateFileRanges validate files by a stream of tables and yields -// tables with range. -func (rc *SnapClient) GoValidateFileRanges( - ctx context.Context, - tableStream <-chan CreatedTable, - fileOfTable map[int64][]*backuppb.File, - splitSizeBytes, splitKeyCount uint64, - errCh chan<- error, -) <-chan TableWithRange { - // Could we have a smaller outCh size? - outCh := make(chan TableWithRange, len(fileOfTable)) - go func() { - defer close(outCh) - defer log.Info("all range generated") - for { - select { - case <-ctx.Done(): - errCh <- ctx.Err() - return - case t, ok := <-tableStream: - if !ok { - return - } - files := fileOfTable[t.OldTable.Info.ID] - if partitions := t.OldTable.Info.Partition; partitions != nil { - log.Debug("table partition", - zap.Stringer("database", t.OldTable.DB.Name), - zap.Stringer("table", t.Table.Name), - zap.Any("partition info", partitions), - ) - for _, partition := range partitions.Definitions { - files = append(files, fileOfTable[partition.ID]...) - } - } - for _, file := range files { - err := restoreutils.ValidateFileRewriteRule(file, t.RewriteRule) - if err != nil { - errCh <- err - return - } - } - // Merge small ranges to reduce split and scatter regions. - ranges, stat, err := restoreutils.MergeAndRewriteFileRanges( - files, t.RewriteRule, splitSizeBytes, splitKeyCount) - if err != nil { - errCh <- err - return - } - log.Info("merge and validate file", - zap.Stringer("database", t.OldTable.DB.Name), - zap.Stringer("table", t.Table.Name), - zap.Int("Files(total)", stat.TotalFiles), - zap.Int("File(write)", stat.TotalWriteCFFile), - zap.Int("File(default)", stat.TotalDefaultCFFile), - zap.Int("Region(total)", stat.TotalRegions), - zap.Int("Regoin(keys avg)", stat.RegionKeysAvg), - zap.Int("Region(bytes avg)", stat.RegionBytesAvg), - zap.Int("Merged(regions)", stat.MergedRegions), - zap.Int("Merged(keys avg)", stat.MergedRegionKeysAvg), - zap.Int("Merged(bytes avg)", stat.MergedRegionBytesAvg)) - - tableWithRange := TableWithRange{ - CreatedTable: t, - Range: ranges, - } - log.Debug("sending range info", - zap.Stringer("table", t.Table.Name), - zap.Int("files", len(files)), - zap.Int("range size", len(ranges)), - zap.Int("output channel size", len(outCh))) - outCh <- tableWithRange - } - } - }() - return outCh -} - // GoValidateChecksum forks a goroutine to validate checksum after restore. // it returns a channel fires a struct{} when all things get done. func (rc *SnapClient) GoValidateChecksum( diff --git a/br/pkg/restore/snap_client/pipeline_items_test.go b/br/pkg/restore/snap_client/pipeline_items_test.go deleted file mode 100644 index 97660996b116a..0000000000000 --- a/br/pkg/restore/snap_client/pipeline_items_test.go +++ /dev/null @@ -1,182 +0,0 @@ -// Copyright 2024 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package snapclient_test - -import ( - "context" - "sync" - "testing" - "time" - - "github.com/pingcap/errors" - backuppb "github.com/pingcap/kvproto/pkg/brpb" - "github.com/pingcap/log" - berrors "github.com/pingcap/tidb/br/pkg/errors" - "github.com/pingcap/tidb/br/pkg/glue" - "github.com/pingcap/tidb/br/pkg/logutil" - snapclient "github.com/pingcap/tidb/br/pkg/restore/snap_client" - "github.com/pingcap/tidb/br/pkg/rtree" - "github.com/pingcap/tidb/pkg/parser/model" - "github.com/stretchr/testify/require" -) - -type fakeRestorer struct { - mu sync.Mutex - errorInSplit bool - splitRanges []rtree.Range - restoredFiles []*backuppb.File - tableIDIsInsequence bool -} - -func (f *fakeRestorer) SplitRanges(ctx context.Context, ranges []rtree.Range, updateCh glue.Progress, isRawKv bool) error { - f.mu.Lock() - defer f.mu.Unlock() - - if ctx.Err() != nil { - return ctx.Err() - } - f.splitRanges = append(f.splitRanges, ranges...) - if f.errorInSplit { - err := errors.Annotatef(berrors.ErrRestoreSplitFailed, - "the key space takes many efforts and finally get together, how dare you split them again... :<") - log.Error("error happens :3", logutil.ShortError(err)) - return err - } - return nil -} - -func (f *fakeRestorer) RestoreSSTFiles(ctx context.Context, tableIDWithFiles []snapclient.TableIDWithFiles, updateCh glue.Progress) error { - f.mu.Lock() - defer f.mu.Unlock() - - if ctx.Err() != nil { - return ctx.Err() - } - for i, tableIDWithFile := range tableIDWithFiles { - if int64(i) != tableIDWithFile.TableID { - f.tableIDIsInsequence = false - } - f.restoredFiles = append(f.restoredFiles, tableIDWithFile.Files...) - } - err := errors.Annotatef(berrors.ErrRestoreWriteAndIngest, "the files to restore are taken by a hijacker, meow :3") - log.Error("error happens :3", logutil.ShortError(err)) - return err -} - -func fakeRanges(keys ...string) (r snapclient.DrainResult) { - for i := range keys { - if i+1 == len(keys) { - return - } - r.Ranges = append(r.Ranges, rtree.Range{ - StartKey: []byte(keys[i]), - EndKey: []byte(keys[i+1]), - Files: []*backuppb.File{{Name: "fake.sst"}}, - }) - r.TableEndOffsetInRanges = append(r.TableEndOffsetInRanges, len(r.Ranges)) - r.TablesToSend = append(r.TablesToSend, snapclient.CreatedTable{ - Table: &model.TableInfo{ - ID: int64(i), - }, - }) - } - return -} - -type errorInTimeSink struct { - ctx context.Context - errCh chan error - t *testing.T -} - -func (e errorInTimeSink) EmitTables(tables ...snapclient.CreatedTable) {} - -func (e errorInTimeSink) EmitError(err error) { - e.errCh <- err -} - -func (e errorInTimeSink) Close() {} - -func (e errorInTimeSink) Wait() { - select { - case <-e.ctx.Done(): - e.t.Logf("The context is canceled but no error happen") - e.t.FailNow() - case <-e.errCh: - } -} - -func assertErrorEmitInTime(ctx context.Context, t *testing.T) errorInTimeSink { - errCh := make(chan error, 1) - return errorInTimeSink{ - ctx: ctx, - errCh: errCh, - t: t, - } -} - -func TestSplitFailed(t *testing.T) { - ranges := []snapclient.DrainResult{ - fakeRanges("aax", "abx", "abz"), - fakeRanges("abz", "bbz", "bcy"), - fakeRanges("bcy", "cad", "xxy"), - } - r := &fakeRestorer{errorInSplit: true, tableIDIsInsequence: true} - sender, err := snapclient.NewTiKVSender(context.TODO(), r, nil, 1) - require.NoError(t, err) - dctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - sink := assertErrorEmitInTime(dctx, t) - sender.PutSink(sink) - for _, r := range ranges { - sender.RestoreBatch(r) - } - sink.Wait() - sender.Close() - require.GreaterOrEqual(t, len(r.splitRanges), 2) - require.Len(t, r.restoredFiles, 0) - require.True(t, r.tableIDIsInsequence) -} - -func TestRestoreFailed(t *testing.T) { - ranges := []snapclient.DrainResult{ - fakeRanges("aax", "abx", "abz"), - fakeRanges("abz", "bbz", "bcy"), - fakeRanges("bcy", "cad", "xxy"), - } - r := &fakeRestorer{ - tableIDIsInsequence: true, - } - sender, err := snapclient.NewTiKVSender(context.TODO(), r, nil, 1) - require.NoError(t, err) - dctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - sink := assertErrorEmitInTime(dctx, t) - sender.PutSink(sink) - for _, r := range ranges { - sender.RestoreBatch(r) - } - var wg sync.WaitGroup - wg.Add(1) - go func() { - defer wg.Done() - sink.Wait() - }() - sink.Close() - sender.Close() - wg.Wait() - require.GreaterOrEqual(t, len(r.restoredFiles), 1) - require.True(t, r.tableIDIsInsequence) -} diff --git a/br/pkg/restore/snap_client/context_manager.go b/br/pkg/restore/snap_client/placement_rule_manager.go similarity index 51% rename from br/pkg/restore/snap_client/context_manager.go rename to br/pkg/restore/snap_client/placement_rule_manager.go index 294f774630db6..70915b0e93f34 100644 --- a/br/pkg/restore/snap_client/context_manager.go +++ b/br/pkg/restore/snap_client/placement_rule_manager.go @@ -20,7 +20,6 @@ import ( "encoding/hex" "fmt" "strconv" - "sync" "time" "github.com/pingcap/errors" @@ -31,7 +30,6 @@ import ( "github.com/pingcap/tidb/br/pkg/conn/util" berrors "github.com/pingcap/tidb/br/pkg/errors" "github.com/pingcap/tidb/br/pkg/restore/split" - "github.com/pingcap/tidb/pkg/parser/model" "github.com/pingcap/tidb/pkg/tablecodec" "github.com/pingcap/tidb/pkg/util/codec" pd "github.com/tikv/pd/client" @@ -39,143 +37,115 @@ import ( "go.uber.org/zap" ) -// ContextManager is the struct to manage a TiKV 'context' for restore. -// Batcher will call Enter when any table should be restore on batch, -// so you can do some prepare work here(e.g. set placement rules for online restore). -type ContextManager interface { - // Enter make some tables 'enter' this context(a.k.a., prepare for restore). - Enter(ctx context.Context, tables []CreatedTable) error - // Leave make some tables 'leave' this context(a.k.a., restore is done, do some post-works). - Leave(ctx context.Context, tables []CreatedTable) error - // Close closes the context manager, sometimes when the manager is 'killed' and should do some cleanup - // it would be call. - Close(ctx context.Context) +// PlacementRuleManager manages to set the placement rule of tables to label constraint key `exclusive`, +// and unset the rule. +type PlacementRuleManager interface { + SetPlacementRule(ctx context.Context, tables []*CreatedTable) error + ResetPlacementRules(ctx context.Context) error } -// NewBRContextManager makes a BR context manager, that is, -// set placement rules for online restore when enter(see ), -// unset them when leave. -func NewBRContextManager(ctx context.Context, pdClient pd.Client, pdHTTPCli pdhttp.Client, tlsConf *tls.Config, isOnline bool) (ContextManager, error) { - manager := &brContextManager{ - // toolClient reuse the split.SplitClient to do miscellaneous things. It doesn't - // call split related functions so set the arguments to arbitrary values. - toolClient: split.NewClient(pdClient, pdHTTPCli, tlsConf, maxSplitKeysOnce, 3), - isOnline: isOnline, +const ( + restoreLabelKey = "exclusive" + restoreLabelValue = "restore" +) - hasTable: make(map[int64]CreatedTable), +// loadRestoreStores loads the stores used to restore data. This function is called only when is online. +func loadRestoreStores(ctx context.Context, pdClient util.StoreMeta) ([]uint64, error) { + restoreStores := make([]uint64, 0) + stores, err := conn.GetAllTiKVStoresWithRetry(ctx, pdClient, util.SkipTiFlash) + if err != nil { + return nil, errors.Trace(err) } - - err := manager.loadRestoreStores(ctx, pdClient) - return manager, errors.Trace(err) -} - -type brContextManager struct { - toolClient split.SplitClient - restoreStores []uint64 - isOnline bool - - // This 'set' of table ID allow us to handle each table just once. - hasTable map[int64]CreatedTable - mu sync.Mutex -} - -func (manager *brContextManager) Close(ctx context.Context) { - tbls := make([]*model.TableInfo, 0, len(manager.hasTable)) - for _, tbl := range manager.hasTable { - tbls = append(tbls, tbl.Table) + for _, s := range stores { + if s.GetState() != metapb.StoreState_Up { + continue + } + for _, l := range s.GetLabels() { + if l.GetKey() == restoreLabelKey && l.GetValue() == restoreLabelValue { + restoreStores = append(restoreStores, s.GetId()) + break + } + } } - manager.splitPostWork(ctx, tbls) + log.Info("load restore stores", zap.Uint64s("store-ids", restoreStores)) + return restoreStores, nil } -func (manager *brContextManager) Enter(ctx context.Context, tables []CreatedTable) error { - placementRuleTables := make([]*model.TableInfo, 0, len(tables)) - manager.mu.Lock() - defer manager.mu.Unlock() +// NewPlacementRuleManager sets and unset placement rules for online restore. +func NewPlacementRuleManager(ctx context.Context, pdClient pd.Client, pdHTTPCli pdhttp.Client, tlsConf *tls.Config, isOnline bool) (PlacementRuleManager, error) { + if !isOnline { + return offlinePlacementRuleManager{}, nil + } - for _, tbl := range tables { - if _, ok := manager.hasTable[tbl.Table.ID]; !ok { - placementRuleTables = append(placementRuleTables, tbl.Table) - } - manager.hasTable[tbl.Table.ID] = tbl + restoreStores, err := loadRestoreStores(ctx, pdClient) + if err != nil { + return nil, errors.Trace(err) + } + if len(restoreStores) == 0 { + log.Warn("The cluster has not any TiKV node with the specify label, so skip setting placement rules", + zap.String("label-key", restoreLabelKey), zap.String("label-value", restoreLabelValue)) + return offlinePlacementRuleManager{}, nil } - return manager.splitPrepareWork(ctx, placementRuleTables) + return &onlinePlacementRuleManager{ + // toolClient reuse the split.SplitClient to do miscellaneous things. It doesn't + // call split related functions so set the arguments to arbitrary values. + toolClient: split.NewClient(pdClient, pdHTTPCli, tlsConf, maxSplitKeysOnce, 3), + + restoreStores: restoreStores, + restoreTables: make(map[int64]struct{}), + }, nil } -func (manager *brContextManager) Leave(ctx context.Context, tables []CreatedTable) error { - manager.mu.Lock() - defer manager.mu.Unlock() - placementRuleTables := make([]*model.TableInfo, 0, len(tables)) +// An offline placement rule manager, which does nothing for placement rule. +type offlinePlacementRuleManager struct{} - for _, table := range tables { - placementRuleTables = append(placementRuleTables, table.Table) - } +// SetPlacementRule implements the interface `PlacementRuleManager`, it does nothing actually. +func (offlinePlacementRuleManager) SetPlacementRule(ctx context.Context, tables []*CreatedTable) error { + return nil +} - manager.splitPostWork(ctx, placementRuleTables) - log.Info("restore table done", zapTables(tables)) - for _, tbl := range placementRuleTables { - delete(manager.hasTable, tbl.ID) - } +// ResetPlacementRules implements the interface `PlacementRuleManager`, it does nothing actually. +func (offlinePlacementRuleManager) ResetPlacementRules(ctx context.Context) error { return nil } -func (manager *brContextManager) splitPostWork(ctx context.Context, tables []*model.TableInfo) { - err := manager.resetPlacementRules(ctx, tables) - if err != nil { - log.Warn("reset placement rules failed", zap.Error(err)) - return - } +// An online placement rule manager, it sets the placement rule of tables to label constraint key `exclusive`, +// and unsets the rule. +type onlinePlacementRuleManager struct { + toolClient split.SplitClient + + restoreStores []uint64 + restoreTables map[int64]struct{} } -func (manager *brContextManager) splitPrepareWork(ctx context.Context, tables []*model.TableInfo) error { - err := manager.setupPlacementRules(ctx, tables) - if err != nil { - log.Error("setup placement rules failed", zap.Error(err)) - return errors.Trace(err) +// SetPlacementRule sets the placement rule of tables to label constraint key `exclusive`, +func (manager *onlinePlacementRuleManager) SetPlacementRule(ctx context.Context, tables []*CreatedTable) error { + for _, tbl := range tables { + manager.restoreTables[tbl.Table.ID] = struct{}{} + if tbl.Table.Partition != nil && tbl.Table.Partition.Definitions != nil { + for _, def := range tbl.Table.Partition.Definitions { + manager.restoreTables[def.ID] = struct{}{} + } + } } - err = manager.waitPlacementSchedule(ctx, tables) + err := manager.setupPlacementRules(ctx) if err != nil { - log.Error("wait placement schedule failed", zap.Error(err)) + log.Error("setup placement rules failed", zap.Error(err)) return errors.Trace(err) } - return nil -} -const ( - restoreLabelKey = "exclusive" - restoreLabelValue = "restore" -) - -// loadRestoreStores loads the stores used to restore data. This function is called only when is online. -func (manager *brContextManager) loadRestoreStores(ctx context.Context, pdClient util.StoreMeta) error { - if !manager.isOnline { - return nil - } - stores, err := conn.GetAllTiKVStoresWithRetry(ctx, pdClient, util.SkipTiFlash) + err = manager.waitPlacementSchedule(ctx) if err != nil { + log.Error("wait placement schedule failed", zap.Error(err)) return errors.Trace(err) } - for _, s := range stores { - if s.GetState() != metapb.StoreState_Up { - continue - } - for _, l := range s.GetLabels() { - if l.GetKey() == restoreLabelKey && l.GetValue() == restoreLabelValue { - manager.restoreStores = append(manager.restoreStores, s.GetId()) - break - } - } - } - log.Info("load restore stores", zap.Uint64s("store-ids", manager.restoreStores)) return nil } // SetupPlacementRules sets rules for the tables' regions. -func (manager *brContextManager) setupPlacementRules(ctx context.Context, tables []*model.TableInfo) error { - if !manager.isOnline || len(manager.restoreStores) == 0 { - return nil - } +func (manager *onlinePlacementRuleManager) setupPlacementRules(ctx context.Context) error { log.Info("start setting placement rules") rule, err := manager.toolClient.GetPlacementRule(ctx, "pd", "default") if err != nil { @@ -188,10 +158,10 @@ func (manager *brContextManager) setupPlacementRules(ctx context.Context, tables Op: "in", Values: []string{restoreLabelValue}, }) - for _, t := range tables { - rule.ID = getRuleID(t.ID) - rule.StartKeyHex = hex.EncodeToString(codec.EncodeBytes([]byte{}, tablecodec.EncodeTablePrefix(t.ID))) - rule.EndKeyHex = hex.EncodeToString(codec.EncodeBytes([]byte{}, tablecodec.EncodeTablePrefix(t.ID+1))) + for tableID := range manager.restoreTables { + rule.ID = getRuleID(tableID) + rule.StartKeyHex = hex.EncodeToString(codec.EncodeBytes([]byte{}, tablecodec.EncodeTablePrefix(tableID))) + rule.EndKeyHex = hex.EncodeToString(codec.EncodeBytes([]byte{}, tablecodec.EncodeTablePrefix(tableID+1))) err = manager.toolClient.SetPlacementRule(ctx, rule) if err != nil { return errors.Trace(err) @@ -201,22 +171,24 @@ func (manager *brContextManager) setupPlacementRules(ctx context.Context, tables return nil } -func (manager *brContextManager) checkRegions(ctx context.Context, tables []*model.TableInfo) (bool, string, error) { - for i, t := range tables { - start := codec.EncodeBytes([]byte{}, tablecodec.EncodeTablePrefix(t.ID)) - end := codec.EncodeBytes([]byte{}, tablecodec.EncodeTablePrefix(t.ID+1)) +func (manager *onlinePlacementRuleManager) checkRegions(ctx context.Context) (bool, string, error) { + progress := 0 + for tableID := range manager.restoreTables { + start := codec.EncodeBytes([]byte{}, tablecodec.EncodeTablePrefix(tableID)) + end := codec.EncodeBytes([]byte{}, tablecodec.EncodeTablePrefix(tableID+1)) ok, regionProgress, err := manager.checkRange(ctx, start, end) if err != nil { return false, "", errors.Trace(err) } if !ok { - return false, fmt.Sprintf("table %v/%v, %s", i, len(tables), regionProgress), nil + return false, fmt.Sprintf("table %v/%v, %s", progress, len(manager.restoreTables), regionProgress), nil } + progress += 1 } return true, "", nil } -func (manager *brContextManager) checkRange(ctx context.Context, start, end []byte) (bool, string, error) { +func (manager *onlinePlacementRuleManager) checkRange(ctx context.Context, start, end []byte) (bool, string, error) { regions, err := manager.toolClient.ScanRegions(ctx, start, end, -1) if err != nil { return false, "", errors.Trace(err) @@ -236,10 +208,7 @@ func (manager *brContextManager) checkRange(ctx context.Context, start, end []by } // waitPlacementSchedule waits PD to move tables to restore stores. -func (manager *brContextManager) waitPlacementSchedule(ctx context.Context, tables []*model.TableInfo) error { - if !manager.isOnline || len(manager.restoreStores) == 0 { - return nil - } +func (manager *onlinePlacementRuleManager) waitPlacementSchedule(ctx context.Context) error { log.Info("start waiting placement schedule") ticker := time.NewTicker(time.Second * 10) failpoint.Inject("wait-placement-schedule-quicker-ticker", func() { @@ -250,7 +219,7 @@ func (manager *brContextManager) waitPlacementSchedule(ctx context.Context, tabl for { select { case <-ticker.C: - ok, progress, err := manager.checkRegions(ctx, tables) + ok, progress, err := manager.checkRegions(ctx) if err != nil { return errors.Trace(err) } @@ -270,17 +239,14 @@ func getRuleID(tableID int64) string { } // resetPlacementRules removes placement rules for tables. -func (manager *brContextManager) resetPlacementRules(ctx context.Context, tables []*model.TableInfo) error { - if !manager.isOnline || len(manager.restoreStores) == 0 { - return nil - } +func (manager *onlinePlacementRuleManager) ResetPlacementRules(ctx context.Context) error { log.Info("start resetting placement rules") var failedTables []int64 - for _, t := range tables { - err := manager.toolClient.DeletePlacementRule(ctx, "pd", getRuleID(t.ID)) + for tableID := range manager.restoreTables { + err := manager.toolClient.DeletePlacementRule(ctx, "pd", getRuleID(tableID)) if err != nil { - log.Info("failed to delete placement rule for table", zap.Int64("table-id", t.ID)) - failedTables = append(failedTables, t.ID) + log.Info("failed to delete placement rule for table", zap.Int64("table-id", tableID)) + failedTables = append(failedTables, tableID) } } if len(failedTables) > 0 { diff --git a/br/pkg/restore/snap_client/context_manager_test.go b/br/pkg/restore/snap_client/placement_rule_manager_test.go similarity index 84% rename from br/pkg/restore/snap_client/context_manager_test.go rename to br/pkg/restore/snap_client/placement_rule_manager_test.go index c13326d21653a..8ff29e6dc0aa6 100644 --- a/br/pkg/restore/snap_client/context_manager_test.go +++ b/br/pkg/restore/snap_client/placement_rule_manager_test.go @@ -33,8 +33,8 @@ import ( pd "github.com/tikv/pd/client" ) -func generateTables() []snapclient.CreatedTable { - return []snapclient.CreatedTable{ +func generateTables() []*snapclient.CreatedTable { + return []*snapclient.CreatedTable{ { Table: &model.TableInfo{ ID: 1, @@ -56,26 +56,15 @@ func generateTables() []snapclient.CreatedTable { } } -func TestContextManagerOfflineLeave(t *testing.T) { +func TestContextManagerOffline(t *testing.T) { ctx := context.Background() - brContextManager, err := snapclient.NewBRContextManager(ctx, nil, nil, nil, false) + placementRuleManager, err := snapclient.NewPlacementRuleManager(ctx, nil, nil, nil, false) require.NoError(t, err) tables := generateTables() - err = brContextManager.Enter(ctx, tables) + err = placementRuleManager.SetPlacementRule(ctx, tables) require.NoError(t, err) - err = brContextManager.Leave(ctx, tables) + err = placementRuleManager.ResetPlacementRules(ctx) require.NoError(t, err) - brContextManager.Close(ctx) -} - -func TestContextManagerOfflineClose(t *testing.T) { - ctx := context.Background() - brContextManager, err := snapclient.NewBRContextManager(ctx, nil, nil, nil, false) - require.NoError(t, err) - tables := generateTables() - err = brContextManager.Enter(ctx, tables) - require.NoError(t, err) - brContextManager.Close(ctx) } func TestContextManagerOnlineNoStores(t *testing.T) { @@ -105,14 +94,13 @@ func TestContextManagerOnlineNoStores(t *testing.T) { pdClient := utiltest.NewFakePDClient(stores, false, nil) pdHTTPCli := utiltest.NewFakePDHTTPClient() - brContextManager, err := snapclient.NewBRContextManager(ctx, pdClient, pdHTTPCli, nil, true) + placementRuleManager, err := snapclient.NewPlacementRuleManager(ctx, pdClient, pdHTTPCli, nil, true) require.NoError(t, err) tables := generateTables() - err = brContextManager.Enter(ctx, tables) + err = placementRuleManager.SetPlacementRule(ctx, tables) require.NoError(t, err) - err = brContextManager.Leave(ctx, tables) + err = placementRuleManager.ResetPlacementRules(ctx) require.NoError(t, err) - brContextManager.Close(ctx) } func generateRegions() []*pd.Region { @@ -248,12 +236,11 @@ func TestContextManagerOnlineLeave(t *testing.T) { pdClient := utiltest.NewFakePDClient(stores, false, nil) pdClient.SetRegions(regions) pdHTTPCli := utiltest.NewFakePDHTTPClient() - brContextManager, err := snapclient.NewBRContextManager(ctx, pdClient, pdHTTPCli, nil, true) + placementRuleManager, err := snapclient.NewPlacementRuleManager(ctx, pdClient, pdHTTPCli, nil, true) require.NoError(t, err) tables := generateTables() - err = brContextManager.Enter(ctx, tables) + err = placementRuleManager.SetPlacementRule(ctx, tables) require.NoError(t, err) - err = brContextManager.Leave(ctx, tables) + err = placementRuleManager.ResetPlacementRules(ctx) require.NoError(t, err) - brContextManager.Close(ctx) } diff --git a/br/pkg/restore/snap_client/tikv_sender.go b/br/pkg/restore/snap_client/tikv_sender.go new file mode 100644 index 0000000000000..eaf3bf411efac --- /dev/null +++ b/br/pkg/restore/snap_client/tikv_sender.go @@ -0,0 +1,392 @@ +// Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. + +package snapclient + +import ( + "context" + "fmt" + "sort" + "strings" + "time" + + "github.com/pingcap/errors" + backuppb "github.com/pingcap/kvproto/pkg/brpb" + "github.com/pingcap/log" + "github.com/pingcap/tidb/br/pkg/checkpoint" + "github.com/pingcap/tidb/br/pkg/glue" + "github.com/pingcap/tidb/br/pkg/logutil" + snapsplit "github.com/pingcap/tidb/br/pkg/restore/internal/snap_split" + "github.com/pingcap/tidb/br/pkg/restore/split" + restoreutils "github.com/pingcap/tidb/br/pkg/restore/utils" + "github.com/pingcap/tidb/br/pkg/summary" + "github.com/pingcap/tidb/pkg/tablecodec" + "go.uber.org/zap" + "golang.org/x/sync/errgroup" +) + +// mapTableToFiles makes a map that mapping table ID to its backup files. +// aware that one file can and only can hold one table. +func mapTableToFiles(files []*backuppb.File) (map[int64][]*backuppb.File, int) { + result := map[int64][]*backuppb.File{} + // count the write cf file that hint for split key slice size + maxSplitKeyCount := 0 + for _, file := range files { + tableID := tablecodec.DecodeTableID(file.GetStartKey()) + tableEndID := tablecodec.DecodeTableID(file.GetEndKey()) + if tableID != tableEndID { + log.Panic("key range spread between many files.", + zap.String("file name", file.Name), + logutil.Key("startKey", file.StartKey), + logutil.Key("endKey", file.EndKey)) + } + if tableID == 0 { + log.Panic("invalid table key of file", + zap.String("file name", file.Name), + logutil.Key("startKey", file.StartKey), + logutil.Key("endKey", file.EndKey)) + } + result[tableID] = append(result[tableID], file) + if file.Cf == restoreutils.WriteCFName { + maxSplitKeyCount += 1 + } + } + return result, maxSplitKeyCount +} + +// fileterOutFiles filter out files that exists in the checkpoint set. +func filterOutFiles(checkpointSet map[string]struct{}, files []*backuppb.File, updateCh glue.Progress) []*backuppb.File { + progress := int(0) + totalKVs := uint64(0) + totalBytes := uint64(0) + newFiles := make([]*backuppb.File, 0, len(files)) + for _, file := range files { + rangeKey := getFileRangeKey(file.Name) + if _, exists := checkpointSet[rangeKey]; exists { + // the range has been import done, so skip it and + // update the summary information + progress += 1 + totalKVs += file.TotalKvs + totalBytes += file.TotalBytes + } else { + newFiles = append(newFiles, file) + } + } + if progress > 0 { + // (split/scatter + download/ingest) / (default cf + write cf) + updateCh.IncBy(int64(progress) * 2 / 2) + summary.CollectSuccessUnit(summary.TotalKV, progress, totalKVs) + summary.CollectSuccessUnit(summary.SkippedKVCountByCheckpoint, progress, totalKVs) + summary.CollectSuccessUnit(summary.TotalBytes, progress, totalBytes) + summary.CollectSuccessUnit(summary.SkippedBytesByCheckpoint, progress, totalBytes) + } + return newFiles +} + +// SortAndValidateFileRanges sort, merge and validate files by tables and yields tables with range. +func SortAndValidateFileRanges( + createdTables []*CreatedTable, + allFiles []*backuppb.File, + checkpointSetWithTableID map[int64]map[string]struct{}, + splitSizeBytes, splitKeyCount uint64, + splitOnTable bool, + updateCh glue.Progress, +) ([][]byte, [][]TableIDWithFiles, error) { + // sort the created table by downstream stream table id + sort.Slice(createdTables, func(a, b int) bool { + return createdTables[a].Table.ID < createdTables[b].Table.ID + }) + // mapping table ID to its backup files + fileOfTable, hintSplitKeyCount := mapTableToFiles(allFiles) + // sort, merge, and validate files in each tables, and generate split keys by the way + var ( + // to generate region split keys, merge the small ranges over the adjacent tables + sortedSplitKeys = make([][]byte, 0, hintSplitKeyCount) + groupSize = uint64(0) + groupCount = uint64(0) + lastKey []byte = nil + + // group the files by the generated split keys + tableIDWithFilesGroup = make([][]TableIDWithFiles, 0, hintSplitKeyCount) + lastFilesGroup []TableIDWithFiles = nil + + // statistic + mergedRangeCount = 0 + ) + + log.Info("start to merge ranges", zap.Uint64("kv size threshold", splitSizeBytes), zap.Uint64("kv count threshold", splitKeyCount)) + // Notice that TiDB does not split partition even if the config `split-table` is on. + for _, table := range createdTables { + files := fileOfTable[table.OldTable.Info.ID] + if partitions := table.OldTable.Info.Partition; partitions != nil { + for _, partition := range partitions.Definitions { + files = append(files, fileOfTable[partition.ID]...) + } + } + for _, file := range files { + if err := restoreutils.ValidateFileRewriteRule(file, table.RewriteRule); err != nil { + return nil, nil, errors.Trace(err) + } + } + // Merge small ranges to reduce split and scatter regions. + // Notice that the files having the same start key and end key are in the same range. + sortedRanges, stat, err := restoreutils.MergeAndRewriteFileRanges( + files, table.RewriteRule, splitSizeBytes, splitKeyCount) + if err != nil { + return nil, nil, errors.Trace(err) + } + log.Info("merge and validate file", + zap.Stringer("database", table.OldTable.DB.Name), + zap.Stringer("table", table.Table.Name), + zap.Int("Files(total)", stat.TotalFiles), + zap.Int("File(write)", stat.TotalWriteCFFile), + zap.Int("File(default)", stat.TotalDefaultCFFile), + zap.Int("Region(total)", stat.TotalRegions), + zap.Int("Regoin(keys avg)", stat.RegionKeysAvg), + zap.Int("Region(bytes avg)", stat.RegionBytesAvg), + zap.Int("Merged(regions)", stat.MergedRegions), + zap.Int("Merged(keys avg)", stat.MergedRegionKeysAvg), + zap.Int("Merged(bytes avg)", stat.MergedRegionBytesAvg)) + + // skip some ranges if recorded by checkpoint + // Notice that skip ranges after select split keys in order to make the split keys + // always the same. + checkpointSet := checkpointSetWithTableID[table.Table.ID] + + // Generate the split keys, and notice that the way to generate split keys must be deterministic + // and regardless of the current cluster region distribution. Therefore, when restore fails, the + // generated split keys keep the same as before the next time we retry to restore. + // + // Here suppose that all the ranges is in the one region at beginning. + // In general, the ids of tables, which are created in the previous stage, are continuously because: + // + // 1. Before create tables, the cluster global id is allocated to ${GLOBAL_ID}; + // 2. Suppose the ids of tables to be created are {t_i}, which t_i < t_j if i < j. + // 3. BR preallocate the global id from ${GLOBAL_ID} to t_max, so the table ids, which are larger + // than ${GLOBAL_ID}, has the same downstream ids. + // 4. Then BR creates tables, and the table ids, which are less than or equal to ${GLOBAL_ID}, are + // allocated to [t_max + 1, ...) in the downstream cluster. + // 5. Therefore, the BR-created tables are usually continuously. + // + // Besides, the prefix of the existing region's start key and end key should not be `t{restored_table_id}`. + for _, rg := range sortedRanges { + // split key generation + afterMergedGroupSize := groupSize + rg.Size + afterMergedGroupCount := groupCount + rg.Count + if afterMergedGroupSize > splitSizeBytes || afterMergedGroupCount > splitKeyCount { + log.Info("merge ranges across tables due to kv size/count threshold exceeded", zap.Uint64("merged kv size", groupSize), zap.Uint64("merged kv count", groupCount), + zap.Int("merged range count", mergedRangeCount)) + groupSize, groupCount = rg.Size, rg.Count + mergedRangeCount = 0 + // can not merge files anymore, so generate a new split key + if lastKey != nil { + sortedSplitKeys = append(sortedSplitKeys, lastKey) + } + // then generate a new files group + if lastFilesGroup != nil { + tableIDWithFilesGroup = append(tableIDWithFilesGroup, lastFilesGroup) + lastFilesGroup = nil + } + } else { + groupSize = afterMergedGroupSize + groupCount = afterMergedGroupCount + } + // override the previous key, which may not become a split key. + lastKey = rg.EndKey + mergedRangeCount += len(rg.Files) + // checkpoint filter out the import done files in the previous restore executions. + // Notice that skip ranges after select split keys in order to make the split keys + // always the same. + newFiles := filterOutFiles(checkpointSet, rg.Files, updateCh) + // append the new files into the group + if len(newFiles) > 0 { + if len(lastFilesGroup) == 0 || lastFilesGroup[len(lastFilesGroup)-1].TableID != table.Table.ID { + lastFilesGroup = append(lastFilesGroup, TableIDWithFiles{ + TableID: table.Table.ID, + Files: nil, + RewriteRule: table.RewriteRule, + }) + } + lastFilesGroup[len(lastFilesGroup)-1].Files = append(lastFilesGroup[len(lastFilesGroup)-1].Files, newFiles...) + } + } + + // If the config split-table/split-region-on-table is on, it skip merging ranges over tables. + if splitOnTable { + log.Info("merge ranges across tables due to split on table", zap.Uint64("merged kv size", groupSize), zap.Uint64("merged kv count", groupCount), + zap.Int("merged range count", mergedRangeCount)) + groupSize, groupCount = 0, 0 + mergedRangeCount = 0 + // Besides, ignore the table's last key that might be chosen as a split key, because there + // is already a table split key. + lastKey = nil + if lastFilesGroup != nil { + tableIDWithFilesGroup = append(tableIDWithFilesGroup, lastFilesGroup) + lastFilesGroup = nil + } + } + } + // append the key of the last range anyway + if lastKey != nil { + sortedSplitKeys = append(sortedSplitKeys, lastKey) + } + // append the last files group anyway + if lastFilesGroup != nil { + log.Info("merge ranges across tables due to the last group", zap.Uint64("merged kv size", groupSize), zap.Uint64("merged kv count", groupCount), + zap.Int("merged range count", mergedRangeCount)) + tableIDWithFilesGroup = append(tableIDWithFilesGroup, lastFilesGroup) + } + return sortedSplitKeys, tableIDWithFilesGroup, nil +} + +func (rc *SnapClient) RestoreTables( + ctx context.Context, + placementRuleManager PlacementRuleManager, + createdTables []*CreatedTable, + allFiles []*backuppb.File, + checkpointSetWithTableID map[int64]map[string]struct{}, + splitSizeBytes, splitKeyCount uint64, + splitOnTable bool, + updateCh glue.Progress, +) error { + if err := placementRuleManager.SetPlacementRule(ctx, createdTables); err != nil { + return errors.Trace(err) + } + defer func() { + err := placementRuleManager.ResetPlacementRules(ctx) + if err != nil { + log.Warn("failed to reset placement rules", zap.Error(err)) + } + }() + + start := time.Now() + sortedSplitKeys, tableIDWithFilesGroup, err := SortAndValidateFileRanges(createdTables, allFiles, checkpointSetWithTableID, splitSizeBytes, splitKeyCount, splitOnTable, updateCh) + if err != nil { + return errors.Trace(err) + } + log.Info("Merge ranges", zap.Duration("take", time.Since(start))) + + start = time.Now() + if err = rc.SplitRanges(ctx, sortedSplitKeys, updateCh, false); err != nil { + return errors.Trace(err) + } + log.Info("Split regions", zap.Duration("take", time.Since(start))) + + start = time.Now() + if err = rc.RestoreSSTFiles(ctx, tableIDWithFilesGroup, updateCh); err != nil { + return errors.Trace(err) + } + elapsed := time.Since(start) + log.Info("Retore files", zap.Duration("take", elapsed)) + + summary.CollectSuccessUnit("files", len(allFiles), elapsed) + return nil +} + +// SplitRanges implements TiKVRestorer. It splits region by +// data range after rewrite. +func (rc *SnapClient) SplitRanges( + ctx context.Context, + sortedSplitKeys [][]byte, + updateCh glue.Progress, + isRawKv bool, +) error { + splitClientOpts := make([]split.ClientOptionalParameter, 0, 2) + splitClientOpts = append(splitClientOpts, split.WithOnSplit(func(keys [][]byte) { + for range keys { + updateCh.Inc() + } + })) + if isRawKv { + splitClientOpts = append(splitClientOpts, split.WithRawKV()) + } + + splitter := snapsplit.NewRegionSplitter(split.NewClient( + rc.pdClient, + rc.pdHTTPClient, + rc.tlsConf, + maxSplitKeysOnce, + rc.storeCount+1, + splitClientOpts..., + )) + + return splitter.ExecuteSplit(ctx, sortedSplitKeys) +} + +func getFileRangeKey(f string) string { + // the backup date file pattern is `{store_id}_{region_id}_{epoch_version}_{key}_{ts}_{cf}.sst` + // so we need to compare with out the `_{cf}.sst` suffix + idx := strings.LastIndex(f, "_") + if idx < 0 { + panic(fmt.Sprintf("invalid backup data file name: '%s'", f)) + } + + return f[:idx] +} + +// RestoreSSTFiles tries to restore the files. +func (rc *SnapClient) RestoreSSTFiles( + ctx context.Context, + tableIDWithFilesGroup [][]TableIDWithFiles, + updateCh glue.Progress, +) error { + if err := rc.setSpeedLimit(ctx, rc.rateLimit); err != nil { + return errors.Trace(err) + } + + eg, ectx := errgroup.WithContext(ctx) + for _, tableIDWithFiles := range tableIDWithFilesGroup { + if ectx.Err() != nil { + log.Warn("Restoring encountered error and already stopped, give up remained files.", + logutil.ShortError(ectx.Err())) + // We will fetch the error from the errgroup then (If there were). + // Also note if the parent context has been canceled or something, + // breaking here directly is also a reasonable behavior. + break + } + filesReplica := tableIDWithFiles + rc.fileImporter.WaitUntilUnblock() + rc.workerPool.ApplyOnErrorGroup(eg, func() (restoreErr error) { + fileStart := time.Now() + defer func() { + if restoreErr == nil { + log.Info("import files done", zapFilesGroup(filesReplica), + zap.Duration("take", time.Since(fileStart))) + updateCh.Inc() + } + }() + if importErr := rc.fileImporter.ImportSSTFiles(ectx, filesReplica, rc.cipher, rc.dom.Store().GetCodec().GetAPIVersion()); importErr != nil { + return errors.Trace(importErr) + } + + // the data of this range has been import done + if rc.checkpointRunner != nil && len(filesReplica) > 0 { + for _, filesGroup := range filesReplica { + rangeKeySet := make(map[string]struct{}) + for _, file := range filesGroup.Files { + rangeKey := getFileRangeKey(file.Name) + rangeKeySet[rangeKey] = struct{}{} + } + for rangeKey := range rangeKeySet { + // The checkpoint range shows this ranges of kvs has been restored into + // the table corresponding to the table-id. + if err := checkpoint.AppendRangesForRestore(ectx, rc.checkpointRunner, filesGroup.TableID, rangeKey); err != nil { + return errors.Trace(err) + } + } + } + } + + return nil + }) + } + + if err := eg.Wait(); err != nil { + summary.CollectFailureUnit("file", err) + log.Error("restore files failed", zap.Error(err)) + return errors.Trace(err) + } + // Once the parent context canceled and there is no task running in the errgroup, + // we may break the for loop without error in the errgroup. (Will this happen?) + // At that time, return the error in the context here. + return ctx.Err() +} diff --git a/br/pkg/restore/snap_client/tikv_sender_test.go b/br/pkg/restore/snap_client/tikv_sender_test.go new file mode 100644 index 0000000000000..624a623a96153 --- /dev/null +++ b/br/pkg/restore/snap_client/tikv_sender_test.go @@ -0,0 +1,633 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package snapclient_test + +import ( + "fmt" + "math/rand" + "testing" + + backuppb "github.com/pingcap/kvproto/pkg/brpb" + "github.com/pingcap/tidb/br/pkg/glue" + "github.com/pingcap/tidb/br/pkg/metautil" + snapclient "github.com/pingcap/tidb/br/pkg/restore/snap_client" + restoreutils "github.com/pingcap/tidb/br/pkg/restore/utils" + "github.com/pingcap/tidb/pkg/kv" + "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/tablecodec" + "github.com/stretchr/testify/require" +) + +func TestMapTableToFiles(t *testing.T) { + filesOfTable1 := []*backuppb.File{ + { + Name: "table1-1.sst", + StartKey: tablecodec.EncodeTablePrefix(1), + EndKey: tablecodec.EncodeTablePrefix(1), + Cf: restoreutils.WriteCFName, + }, + { + Name: "table1-2.sst", + StartKey: tablecodec.EncodeTablePrefix(1), + EndKey: tablecodec.EncodeTablePrefix(1), + Cf: restoreutils.WriteCFName, + }, + { + Name: "table1-3.sst", + StartKey: tablecodec.EncodeTablePrefix(1), + EndKey: tablecodec.EncodeTablePrefix(1), + }, + } + filesOfTable2 := []*backuppb.File{ + { + Name: "table2-1.sst", + StartKey: tablecodec.EncodeTablePrefix(2), + EndKey: tablecodec.EncodeTablePrefix(2), + Cf: restoreutils.WriteCFName, + }, + { + Name: "table2-2.sst", + StartKey: tablecodec.EncodeTablePrefix(2), + EndKey: tablecodec.EncodeTablePrefix(2), + }, + } + + result, hintSplitKeyCount := snapclient.MapTableToFiles(append(filesOfTable2, filesOfTable1...)) + + require.Equal(t, filesOfTable1, result[1]) + require.Equal(t, filesOfTable2, result[2]) + require.Equal(t, 3, hintSplitKeyCount) +} + +type MockUpdateCh struct { + glue.Progress +} + +func (m MockUpdateCh) IncBy(cnt int64) {} + +func generateCreatedTables(t *testing.T, upstreamTableIDs []int64, upstreamPartitionIDs map[int64][]int64, downstreamID func(upstream int64) int64) []*snapclient.CreatedTable { + createdTables := make([]*snapclient.CreatedTable, 0, len(upstreamTableIDs)) + triggerID := 0 + for _, upstreamTableID := range upstreamTableIDs { + downstreamTableID := downstreamID(upstreamTableID) + createdTable := &snapclient.CreatedTable{ + Table: &model.TableInfo{ + ID: downstreamTableID, + Name: model.NewCIStr(fmt.Sprintf("tbl-%d", upstreamTableID)), + Indices: []*model.IndexInfo{ + {Name: model.NewCIStr("idx1"), ID: 1}, + {Name: model.NewCIStr("idx2"), ID: 2}, + {Name: model.NewCIStr("idx3"), ID: 3}, + }, + }, + OldTable: &metautil.Table{ + DB: &model.DBInfo{Name: model.NewCIStr("test")}, + Info: &model.TableInfo{ + ID: upstreamTableID, + Indices: []*model.IndexInfo{ + {Name: model.NewCIStr("idx1"), ID: 1}, + {Name: model.NewCIStr("idx2"), ID: 2}, + {Name: model.NewCIStr("idx3"), ID: 3}, + }, + }, + }, + } + partitionIDs, exists := upstreamPartitionIDs[upstreamTableID] + if exists { + triggerID += 1 + downDefs := make([]model.PartitionDefinition, 0, len(partitionIDs)) + upDefs := make([]model.PartitionDefinition, 0, len(partitionIDs)) + for _, partitionID := range partitionIDs { + downDefs = append(downDefs, model.PartitionDefinition{ + Name: model.NewCIStr(fmt.Sprintf("p_%d", partitionID)), + ID: downstreamID(partitionID), + }) + upDefs = append(upDefs, model.PartitionDefinition{ + Name: model.NewCIStr(fmt.Sprintf("p_%d", partitionID)), + ID: partitionID, + }) + } + createdTable.OldTable.Info.Partition = &model.PartitionInfo{ + Definitions: upDefs, + } + createdTable.Table.Partition = &model.PartitionInfo{ + Definitions: downDefs, + } + } + // generate rewrite rules + createdTable.RewriteRule = restoreutils.GetRewriteRules(createdTable.Table, createdTable.OldTable.Info, 0, true) + createdTables = append(createdTables, createdTable) + } + + require.Equal(t, len(upstreamPartitionIDs), triggerID) + disorderTables(createdTables) + return createdTables +} + +func disorderTables(createdTables []*snapclient.CreatedTable) { + // Each position will be replaced by a random table + for i := range createdTables { + randIndex := rand.Int() % len(createdTables) + tmp := createdTables[i] + createdTables[i] = createdTables[randIndex] + createdTables[randIndex] = tmp + } +} + +func file(tableID int64, startRow, endRow int, totalKvs, totalBytes uint64, cf string) *backuppb.File { + return &backuppb.File{ + Name: fmt.Sprintf("file_%d_%d_%s.sst", tableID, startRow, cf), + StartKey: tablecodec.EncodeRowKeyWithHandle(tableID, kv.IntHandle(startRow)), + EndKey: tablecodec.EncodeRowKeyWithHandle(tableID, kv.IntHandle(endRow)), + TotalKvs: totalKvs, + TotalBytes: totalBytes, + Cf: cf, + } +} + +func key(tableID int64, row int) []byte { + return tablecodec.EncodeRowKeyWithHandle(downstreamID(tableID), kv.IntHandle(row)) +} + +func files(logicalTableID, physicalTableID int64, startRows []int, cfs []string) snapclient.TableIDWithFiles { + files := make([]*backuppb.File, 0, len(startRows)) + for i, startRow := range startRows { + files = append(files, &backuppb.File{Name: fmt.Sprintf("file_%d_%d_%s.sst", physicalTableID, startRow, cfs[i])}) + } + return snapclient.TableIDWithFiles{ + TableID: downstreamID(logicalTableID), + Files: files, + } +} + +func pfiles(logicalTableID int64, physicalTableIDs []int64, startRowss [][]int, cfss [][]string) snapclient.TableIDWithFiles { + files := make([]*backuppb.File, 0, len(startRowss)*2) + for i, physicalTableID := range physicalTableIDs { + for j, startRow := range startRowss[i] { + files = append(files, &backuppb.File{Name: fmt.Sprintf("file_%d_%d_%s.sst", physicalTableID, startRow, cfss[i][j])}) + } + } + + return snapclient.TableIDWithFiles{ + TableID: downstreamID(logicalTableID), + Files: files, + } +} + +func downstreamID(upstream int64) int64 { return upstream + 1000 } + +func cptKey(tableID int64, startRow int, cf string) string { + return snapclient.GetFileRangeKey(fmt.Sprintf("file_%d_%d_%s.sst", tableID, startRow, cf)) +} + +func TestSortAndValidateFileRanges(t *testing.T) { + updateCh := MockUpdateCh{} + + d := restoreutils.DefaultCFName + w := restoreutils.WriteCFName + cases := []struct { + // created tables + upstreamTableIDs []int64 + upstreamPartitionIDs map[int64][]int64 + + // files + files []*backuppb.File + + // checkpoint set + checkpointSetWithTableID map[int64]map[string]struct{} + + // config + splitSizeBytes uint64 + splitKeyCount uint64 + splitOnTable bool + + // expected result + splitKeys [][]byte + tableIDWithFilesGroups [][]snapclient.TableIDWithFiles + }{ + { // large sst, split-on-table, no checkpoint + upstreamTableIDs: []int64{100, 200, 300}, + upstreamPartitionIDs: map[int64][]int64{100: {101, 102, 103}, 200: {201, 202, 203}, 300: {301, 302, 303}}, + files: []*backuppb.File{ + file(100, 1, 2, 100, 100, w), file(100, 1, 2, 100, 100, d), + file(102, 1, 2, 100, 100, w), + file(202, 1, 2, 100, 100, w), file(202, 1, 2, 100, 100, d), + file(202, 2, 3, 100, 100, w), file(202, 2, 3, 100, 100, d), + file(302, 1, 2, 100, 100, w), + }, + checkpointSetWithTableID: nil, + splitSizeBytes: 80, + splitKeyCount: 80, + splitOnTable: true, + splitKeys: [][]byte{ + key(100, 2) /*split table key*/, key(202, 2), /*split table key*/ + }, + tableIDWithFilesGroups: [][]snapclient.TableIDWithFiles{ + {files(100, 100, []int{1, 1}, []string{w, d})}, + {files(100, 102, []int{1}, []string{w})}, + {files(200, 202, []int{1, 1}, []string{w, d})}, + {files(200, 202, []int{2, 2}, []string{w, d})}, + {files(300, 302, []int{1}, []string{w})}, + }, + }, + { // large sst, split-on-table, checkpoint + upstreamTableIDs: []int64{100, 200, 300}, + upstreamPartitionIDs: map[int64][]int64{100: {101, 102, 103}, 200: {201, 202, 203}, 300: {301, 302, 303}}, + files: []*backuppb.File{ + file(100, 1, 2, 100, 100, w), file(100, 1, 2, 100, 100, d), + file(102, 1, 2, 100, 100, w), + file(202, 1, 2, 100, 100, w), file(202, 1, 2, 100, 100, d), + file(202, 2, 3, 100, 100, w), file(202, 2, 3, 100, 100, d), + file(302, 1, 2, 100, 100, w), + }, + checkpointSetWithTableID: map[int64]map[string]struct{}{ + downstreamID(100): {cptKey(100, 1, w): struct{}{}}, + downstreamID(200): {cptKey(202, 1, w): struct{}{}}, + }, + splitSizeBytes: 80, + splitKeyCount: 80, + splitOnTable: true, + splitKeys: [][]byte{ + key(100, 2) /*split table key*/, key(202, 2), /*split table key*/ + }, + tableIDWithFilesGroups: [][]snapclient.TableIDWithFiles{ + //{files(100, 100, []int{1, 1}, []string{w, d})}, + {files(100, 102, []int{1}, []string{w})}, + //{files(200, 202, []int{1, 1}, []string{w, d})}, + {files(200, 202, []int{2, 2}, []string{w, d})}, + {files(300, 302, []int{1}, []string{w})}, + }, + }, + { // large sst, no split-on-table, no checkpoint + upstreamTableIDs: []int64{100, 200, 300}, + upstreamPartitionIDs: map[int64][]int64{100: {101, 102, 103}, 200: {201, 202, 203}, 300: {301, 302, 303}}, + files: []*backuppb.File{ + file(100, 1, 2, 100, 100, w), file(100, 1, 2, 100, 100, d), + file(102, 1, 2, 100, 100, w), + file(202, 1, 2, 100, 100, w), file(202, 1, 2, 100, 100, d), + file(202, 2, 3, 100, 100, w), file(202, 2, 3, 100, 100, d), + file(302, 1, 2, 100, 100, w), + }, + checkpointSetWithTableID: nil, + splitSizeBytes: 80, + splitKeyCount: 80, + splitOnTable: false, + splitKeys: [][]byte{ + key(100, 2), key(102, 2), key(202, 2), key(202, 3), key(302, 2), + }, + tableIDWithFilesGroups: [][]snapclient.TableIDWithFiles{ + {files(100, 100, []int{1, 1}, []string{w, d})}, + {files(100, 102, []int{1}, []string{w})}, + {files(200, 202, []int{1, 1}, []string{w, d})}, + {files(200, 202, []int{2, 2}, []string{w, d})}, + {files(300, 302, []int{1}, []string{w})}, + }, + }, + { // large sst, no split-on-table, checkpoint + upstreamTableIDs: []int64{100, 200, 300}, + upstreamPartitionIDs: map[int64][]int64{100: {101, 102, 103}, 200: {201, 202, 203}, 300: {301, 302, 303}}, + files: []*backuppb.File{ + file(100, 1, 2, 100, 100, w), file(100, 1, 2, 100, 100, d), + file(102, 1, 2, 100, 100, w), + file(202, 1, 2, 100, 100, w), file(202, 1, 2, 100, 100, d), + file(202, 2, 3, 100, 100, w), file(202, 2, 3, 100, 100, d), + file(302, 1, 2, 100, 100, w), + }, + checkpointSetWithTableID: map[int64]map[string]struct{}{ + downstreamID(100): {cptKey(100, 1, w): struct{}{}}, + downstreamID(200): {cptKey(202, 1, w): struct{}{}}, + }, + splitSizeBytes: 80, + splitKeyCount: 80, + splitOnTable: false, + splitKeys: [][]byte{ + key(100, 2), key(102, 2), key(202, 2), key(202, 3), key(302, 2), + }, + tableIDWithFilesGroups: [][]snapclient.TableIDWithFiles{ + //{files(100, 100, []int{1, 1}, []string{w, d})}, + {files(100, 102, []int{1}, []string{w})}, + //{files(200, 202, []int{1, 1}, []string{w, d})}, + {files(200, 202, []int{2, 2}, []string{w, d})}, + {files(300, 302, []int{1}, []string{w})}, + }, + }, + { // small sst 1, split-table, no checkpoint + upstreamTableIDs: []int64{100, 200, 300}, + upstreamPartitionIDs: map[int64][]int64{100: {101, 102, 103}, 200: {201, 202, 203}, 300: {301, 302, 303}}, + files: []*backuppb.File{ + file(100, 1, 2, 100, 100, w), file(100, 1, 2, 100, 100, d), + file(102, 1, 2, 100, 100, w), + file(202, 1, 2, 100, 100, w), file(202, 1, 2, 100, 100, d), + file(202, 2, 3, 100, 100, w), file(202, 2, 3, 100, 100, d), + file(302, 1, 2, 100, 100, w), + }, + checkpointSetWithTableID: nil, + splitSizeBytes: 350, + splitKeyCount: 350, + splitOnTable: true, + splitKeys: [][]byte{ + key(202, 2), + }, + tableIDWithFilesGroups: [][]snapclient.TableIDWithFiles{ + {pfiles(100, []int64{100, 102}, [][]int{{1, 1}, {1}}, [][]string{{w, d}, {w}})}, + {files(200, 202, []int{1, 1}, []string{w, d})}, + {files(200, 202, []int{2, 2}, []string{w, d})}, + {files(300, 302, []int{1}, []string{w})}, + }, + }, + { // small sst 1, split-table, checkpoint + upstreamTableIDs: []int64{100, 200, 300}, + upstreamPartitionIDs: map[int64][]int64{100: {101, 102, 103}, 200: {201, 202, 203}, 300: {301, 302, 303}}, + files: []*backuppb.File{ + file(100, 1, 2, 100, 100, w), file(100, 1, 2, 100, 100, d), + file(102, 1, 2, 100, 100, w), + file(202, 1, 2, 100, 100, w), file(202, 1, 2, 100, 100, d), + file(202, 2, 3, 100, 100, w), file(202, 2, 3, 100, 100, d), + file(302, 1, 2, 100, 100, w), + }, + checkpointSetWithTableID: map[int64]map[string]struct{}{ + downstreamID(100): {cptKey(100, 1, w): struct{}{}}, + downstreamID(200): {cptKey(202, 1, w): struct{}{}}, + }, + splitSizeBytes: 350, + splitKeyCount: 350, + splitOnTable: true, + splitKeys: [][]byte{ + key(202, 2), + }, + tableIDWithFilesGroups: [][]snapclient.TableIDWithFiles{ + {files(100, 102, []int{1}, []string{w})}, + {files(200, 202, []int{2, 2}, []string{w, d})}, + {files(300, 302, []int{1}, []string{w})}, + }, + }, + { // small sst 1, no split-table, no checkpoint + upstreamTableIDs: []int64{100, 200, 300}, + upstreamPartitionIDs: map[int64][]int64{100: {101, 102, 103}, 200: {201, 202, 203}, 300: {301, 302, 303}}, + files: []*backuppb.File{ + file(100, 1, 2, 100, 100, w), file(100, 1, 2, 100, 100, d), + file(102, 1, 2, 100, 100, w), + file(202, 1, 2, 100, 100, w), file(202, 1, 2, 100, 100, d), + file(202, 2, 3, 100, 100, w), file(202, 2, 3, 100, 100, d), + file(302, 1, 2, 100, 100, w), + }, + checkpointSetWithTableID: nil, + splitSizeBytes: 350, + splitKeyCount: 350, + splitOnTable: false, + splitKeys: [][]byte{ + key(102, 2), key(202, 2), key(302, 2), + }, + tableIDWithFilesGroups: [][]snapclient.TableIDWithFiles{ + {pfiles(100, []int64{100, 102}, [][]int{{1, 1}, {1}}, [][]string{{w, d}, {w}})}, + {files(200, 202, []int{1, 1}, []string{w, d})}, + {files(200, 202, []int{2, 2}, []string{w, d}), files(300, 302, []int{1}, []string{w})}, + }, + }, + { // small sst 1, no split-table, checkpoint + upstreamTableIDs: []int64{100, 200, 300}, + upstreamPartitionIDs: map[int64][]int64{100: {101, 102, 103}, 200: {201, 202, 203}, 300: {301, 302, 303}}, + files: []*backuppb.File{ + file(100, 1, 2, 100, 100, w), file(100, 1, 2, 100, 100, d), + file(102, 1, 2, 100, 100, w), + file(202, 1, 2, 100, 100, w), file(202, 1, 2, 100, 100, d), + file(202, 2, 3, 100, 100, w), file(202, 2, 3, 100, 100, d), + file(302, 1, 2, 100, 100, w), + }, + checkpointSetWithTableID: map[int64]map[string]struct{}{ + downstreamID(100): {cptKey(100, 1, w): struct{}{}}, + downstreamID(200): {cptKey(202, 1, w): struct{}{}}, + }, + splitSizeBytes: 350, + splitKeyCount: 350, + splitOnTable: false, + splitKeys: [][]byte{ + key(102, 2), key(202, 2), key(302, 2), + }, + tableIDWithFilesGroups: [][]snapclient.TableIDWithFiles{ + {files(100, 102, []int{1}, []string{w})}, + {files(200, 202, []int{2, 2}, []string{w, d}), files(300, 302, []int{1}, []string{w})}, + }, + }, + { // small sst 2, split-table, no checkpoint + upstreamTableIDs: []int64{100, 200, 300}, + upstreamPartitionIDs: map[int64][]int64{100: {101, 102, 103}, 200: {201, 202, 203}, 300: {301, 302, 303}}, + files: []*backuppb.File{ + file(100, 1, 2, 100, 100, w), file(100, 1, 2, 100, 100, d), + file(102, 1, 2, 100, 100, w), + file(202, 1, 2, 100, 100, w), file(202, 1, 2, 100, 100, d), + file(202, 2, 3, 100, 100, w), file(202, 2, 3, 100, 100, d), + file(302, 1, 2, 100, 100, w), + }, + checkpointSetWithTableID: nil, + splitSizeBytes: 450, + splitKeyCount: 450, + splitOnTable: true, + splitKeys: [][]byte{}, + tableIDWithFilesGroups: [][]snapclient.TableIDWithFiles{ + {pfiles(100, []int64{100, 102}, [][]int{{1, 1}, {1}}, [][]string{{w, d}, {w}})}, + {files(200, 202, []int{1, 1, 2, 2}, []string{w, d, w, d})}, + {files(300, 302, []int{1}, []string{w})}, + }, + }, + { // small sst 2, split-table, checkpoint + upstreamTableIDs: []int64{100, 200, 300}, + upstreamPartitionIDs: map[int64][]int64{100: {101, 102, 103}, 200: {201, 202, 203}, 300: {301, 302, 303}}, + files: []*backuppb.File{ + file(100, 1, 2, 100, 100, w), file(100, 1, 2, 100, 100, d), + file(102, 1, 2, 100, 100, w), + file(202, 1, 2, 100, 100, w), file(202, 1, 2, 100, 100, d), + file(202, 2, 3, 100, 100, w), file(202, 2, 3, 100, 100, d), + file(302, 1, 2, 100, 100, w), + }, + checkpointSetWithTableID: map[int64]map[string]struct{}{ + downstreamID(100): {cptKey(100, 1, w): struct{}{}}, + downstreamID(200): {cptKey(202, 1, w): struct{}{}}, + }, + splitSizeBytes: 450, + splitKeyCount: 450, + splitOnTable: true, + splitKeys: [][]byte{}, + tableIDWithFilesGroups: [][]snapclient.TableIDWithFiles{ + {files(100, 102, []int{1}, []string{w})}, + {files(200, 202, []int{2, 2}, []string{w, d})}, + {files(300, 302, []int{1}, []string{w})}, + }, + }, + { // small sst 2, no split-table, no checkpoint + upstreamTableIDs: []int64{100, 200, 300}, + upstreamPartitionIDs: map[int64][]int64{100: {101, 102, 103}, 200: {201, 202, 203}, 300: {301, 302, 303}}, + files: []*backuppb.File{ + file(100, 1, 2, 100, 100, w), file(100, 1, 2, 100, 100, d), + file(102, 1, 2, 100, 100, w), + file(202, 1, 2, 100, 100, w), file(202, 1, 2, 100, 100, d), + file(202, 2, 3, 100, 100, w), file(202, 2, 3, 100, 100, d), + file(302, 1, 2, 100, 100, w), + }, + checkpointSetWithTableID: nil, + splitSizeBytes: 450, + splitKeyCount: 450, + splitOnTable: false, + splitKeys: [][]byte{ + key(102, 2), key(202, 3), key(302, 2), + }, + tableIDWithFilesGroups: [][]snapclient.TableIDWithFiles{ + {pfiles(100, []int64{100, 102}, [][]int{{1, 1}, {1}}, [][]string{{w, d}, {w}})}, + {files(200, 202, []int{1, 1, 2, 2}, []string{w, d, w, d})}, + {files(300, 302, []int{1}, []string{w})}, + }, + }, + { // small sst 2, no split-table, checkpoint + upstreamTableIDs: []int64{100, 200, 300}, + upstreamPartitionIDs: map[int64][]int64{100: {101, 102, 103}, 200: {201, 202, 203}, 300: {301, 302, 303}}, + files: []*backuppb.File{ + file(100, 1, 2, 100, 100, w), file(100, 1, 2, 100, 100, d), + file(102, 1, 2, 100, 100, w), + file(202, 1, 2, 100, 100, w), file(202, 1, 2, 100, 100, d), + file(202, 2, 3, 100, 100, w), file(202, 2, 3, 100, 100, d), + file(302, 1, 2, 100, 100, w), + }, + checkpointSetWithTableID: map[int64]map[string]struct{}{ + downstreamID(100): {cptKey(100, 1, w): struct{}{}}, + downstreamID(200): {cptKey(202, 1, w): struct{}{}}, + }, + splitSizeBytes: 450, + splitKeyCount: 450, + splitOnTable: false, + splitKeys: [][]byte{ + key(102, 2), key(202, 3), key(302, 2), + }, + tableIDWithFilesGroups: [][]snapclient.TableIDWithFiles{ + {files(100, 102, []int{1}, []string{w})}, + {files(200, 202, []int{2, 2}, []string{w, d})}, + {files(300, 302, []int{1}, []string{w})}, + }, + }, + { // small sst 3, no split-table, no checkpoint + upstreamTableIDs: []int64{100, 200, 300}, + upstreamPartitionIDs: map[int64][]int64{100: {101, 102, 103}, 200: {201, 202, 203}, 300: {301, 302, 303}}, + files: []*backuppb.File{ + file(100, 1, 2, 100, 100, w), file(100, 1, 2, 100, 100, d), + file(102, 1, 2, 100, 100, w), + file(202, 1, 2, 100, 100, w), file(202, 1, 2, 100, 100, d), + file(202, 2, 3, 100, 100, w), file(202, 2, 3, 100, 100, d), + file(302, 1, 2, 100, 100, w), + }, + checkpointSetWithTableID: nil, + splitSizeBytes: 501, + splitKeyCount: 501, + splitOnTable: false, + splitKeys: [][]byte{ + key(102, 2), key(302, 2), + }, + tableIDWithFilesGroups: [][]snapclient.TableIDWithFiles{ + {pfiles(100, []int64{100, 102}, [][]int{{1, 1}, {1}}, [][]string{{w, d}, {w}})}, + {files(200, 202, []int{1, 1, 2, 2}, []string{w, d, w, d}), files(300, 302, []int{1}, []string{w})}, + }, + }, + { // small sst 3, no split-table, checkpoint + upstreamTableIDs: []int64{100, 200, 300}, + upstreamPartitionIDs: map[int64][]int64{100: {101, 102, 103}, 200: {201, 202, 203}, 300: {301, 302, 303}}, + files: []*backuppb.File{ + file(100, 1, 2, 100, 100, w), file(100, 1, 2, 100, 100, d), + file(102, 1, 2, 100, 100, w), + file(202, 1, 2, 100, 100, w), file(202, 1, 2, 100, 100, d), + file(202, 2, 3, 100, 100, w), file(202, 2, 3, 100, 100, d), + file(302, 1, 2, 100, 100, w), + }, + checkpointSetWithTableID: map[int64]map[string]struct{}{ + downstreamID(100): {cptKey(100, 1, w): struct{}{}}, + downstreamID(200): {cptKey(202, 1, w): struct{}{}}, + }, + splitSizeBytes: 501, + splitKeyCount: 501, + splitOnTable: false, + splitKeys: [][]byte{ + key(102, 2), key(302, 2), + }, + tableIDWithFilesGroups: [][]snapclient.TableIDWithFiles{ + {files(100, 102, []int{1}, []string{w})}, + {files(200, 202, []int{2, 2}, []string{w, d}), files(300, 302, []int{1}, []string{w})}, + }, + }, + { // small sst 4, no split-table, no checkpoint + upstreamTableIDs: []int64{100, 200, 300}, + upstreamPartitionIDs: map[int64][]int64{100: {101, 102, 103}, 200: {201, 202, 203}, 300: {301, 302, 303}}, + files: []*backuppb.File{ + file(100, 1, 2, 100, 100, w), file(100, 1, 2, 100, 100, d), + file(102, 1, 2, 100, 100, w), + file(202, 1, 2, 100, 100, w), file(202, 1, 2, 100, 100, d), + file(202, 2, 3, 400, 400, w), file(202, 2, 3, 80, 80, d), + file(302, 1, 2, 10, 10, w), + }, + checkpointSetWithTableID: nil, + splitSizeBytes: 501, + splitKeyCount: 501, + splitOnTable: false, + splitKeys: [][]byte{ + key(202, 2), key(302, 2), + }, + tableIDWithFilesGroups: [][]snapclient.TableIDWithFiles{ + {pfiles(100, []int64{100, 102}, [][]int{{1, 1}, {1}}, [][]string{{w, d}, {w}}), files(200, 202, []int{1, 1}, []string{w, d})}, + {files(200, 202, []int{2, 2}, []string{w, d}), files(300, 302, []int{1}, []string{w})}, + }, + }, + { // small sst 4, no split-table, checkpoint + upstreamTableIDs: []int64{100, 200, 300}, + upstreamPartitionIDs: map[int64][]int64{100: {101, 102, 103}, 200: {201, 202, 203}, 300: {301, 302, 303}}, + files: []*backuppb.File{ + file(100, 1, 2, 100, 100, w), file(100, 1, 2, 100, 100, d), + file(102, 1, 2, 100, 100, w), + file(202, 1, 2, 100, 100, w), file(202, 1, 2, 100, 100, d), + file(202, 2, 3, 400, 400, w), file(202, 2, 3, 80, 80, d), + file(302, 1, 2, 10, 10, w), + }, + checkpointSetWithTableID: map[int64]map[string]struct{}{ + downstreamID(100): {cptKey(100, 1, w): struct{}{}}, + downstreamID(200): {cptKey(202, 1, w): struct{}{}}, + }, + splitSizeBytes: 501, + splitKeyCount: 501, + splitOnTable: false, + splitKeys: [][]byte{ + key(202, 2), key(302, 2), + }, + tableIDWithFilesGroups: [][]snapclient.TableIDWithFiles{ + {files(100, 102, []int{1}, []string{w})}, + {files(200, 202, []int{2, 2}, []string{w, d}), files(300, 302, []int{1}, []string{w})}, + }, + }, + } + + for _, cs := range cases { + createdTables := generateCreatedTables(t, cs.upstreamTableIDs, cs.upstreamPartitionIDs, downstreamID) + splitKeys, tableIDWithFilesGroups, err := snapclient.SortAndValidateFileRanges(createdTables, cs.files, cs.checkpointSetWithTableID, cs.splitSizeBytes, cs.splitKeyCount, cs.splitOnTable, updateCh) + require.NoError(t, err) + require.Equal(t, cs.splitKeys, splitKeys) + require.Equal(t, len(cs.tableIDWithFilesGroups), len(tableIDWithFilesGroups)) + for i, expectFilesGroup := range cs.tableIDWithFilesGroups { + actualFilesGroup := tableIDWithFilesGroups[i] + require.Equal(t, len(expectFilesGroup), len(actualFilesGroup)) + for j, expectFiles := range expectFilesGroup { + actualFiles := actualFilesGroup[j] + require.Equal(t, expectFiles.TableID, actualFiles.TableID) + for k, expectFile := range expectFiles.Files { + actualFile := actualFiles.Files[k] + require.Equal(t, expectFile.Name, actualFile.Name) + } + } + } + } +} diff --git a/br/pkg/restore/snap_client/zap.go b/br/pkg/restore/snap_client/zap.go deleted file mode 100644 index 453b3337d5e82..0000000000000 --- a/br/pkg/restore/snap_client/zap.go +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2024 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package snapclient - -import ( - "fmt" - - "github.com/pingcap/errors" - "github.com/pingcap/tidb/br/pkg/logutil" - "github.com/pingcap/tidb/br/pkg/utils" - "go.uber.org/zap" - "go.uber.org/zap/zapcore" -) - -// ZapTables make zap field of table for debuging, including table names. -func zapTables(tables []CreatedTable) zapcore.Field { - return logutil.AbbreviatedArray("tables", tables, func(input any) []string { - tables := input.([]CreatedTable) - names := make([]string, 0, len(tables)) - for _, t := range tables { - names = append(names, fmt.Sprintf("%s.%s", - utils.EncloseName(t.OldTable.DB.Name.String()), - utils.EncloseName(t.OldTable.Info.Name.String()))) - } - return names - }) -} - -type zapTableIDWithFilesMarshaler []TableIDWithFiles - -func zapTableIDWithFiles(fs []TableIDWithFiles) zap.Field { - return zap.Object("files", zapTableIDWithFilesMarshaler(fs)) -} - -func (fs zapTableIDWithFilesMarshaler) MarshalLogObject(encoder zapcore.ObjectEncoder) error { - for _, f := range fs { - encoder.AddInt64("table-id", f.TableID) - if err := logutil.MarshalLogObjectForFiles(f.Files, encoder); err != nil { - return errors.Trace(err) - } - } - return nil -} diff --git a/br/pkg/restore/utils/merge.go b/br/pkg/restore/utils/merge.go index 837eacd9937c5..14809396143ed 100644 --- a/br/pkg/restore/utils/merge.go +++ b/br/pkg/restore/utils/merge.go @@ -3,12 +3,15 @@ package utils import ( + "bytes" + "log" "strings" "github.com/pingcap/errors" backuppb "github.com/pingcap/kvproto/pkg/brpb" berrors "github.com/pingcap/tidb/br/pkg/errors" "github.com/pingcap/tidb/br/pkg/rtree" + "go.uber.org/zap" ) // MergeRangesStat holds statistics for the MergeRanges. @@ -34,9 +37,9 @@ func MergeAndRewriteFileRanges( rewriteRules *RewriteRules, splitSizeBytes, splitKeyCount uint64, -) ([]rtree.Range, *MergeRangesStat, error) { +) ([]rtree.RangeStats, *MergeRangesStat, error) { if len(files) == 0 { - return []rtree.Range{}, &MergeRangesStat{}, nil + return []rtree.RangeStats{}, &MergeRangesStat{}, nil } totalBytes := uint64(0) totalKvs := uint64(0) @@ -48,6 +51,14 @@ func MergeAndRewriteFileRanges( for _, file := range files { filesMap[string(file.StartKey)] = append(filesMap[string(file.StartKey)], file) + // Assert that it has the same end key. + if !bytes.Equal(filesMap[string(file.StartKey)][0].EndKey, file.EndKey) { + log.Panic("there are two files having the same start key, but different end key", + zap.ByteString("start key", file.StartKey), + zap.ByteString("file 1 end key", file.EndKey), + zap.ByteString("file 2 end key", filesMap[string(file.StartKey)][0].EndKey), + ) + } // We skips all default cf files because we don't range overlap. if file.Cf == WriteCFName || strings.Contains(file.GetName(), WriteCFName) { writeCFFile++ @@ -58,7 +69,7 @@ func MergeAndRewriteFileRanges( totalKvs += file.TotalKvs } if writeCFFile == 0 && defaultCFFile == 0 { - return []rtree.Range{}, nil, errors.Annotatef(berrors.ErrRestoreInvalidBackup, + return []rtree.RangeStats{}, nil, errors.Annotatef(berrors.ErrRestoreInvalidBackup, "unknown backup data from neither Wrtie CF nor Default CF") } @@ -69,18 +80,19 @@ func MergeAndRewriteFileRanges( } // Check if files are overlapped - rangeTree := rtree.NewRangeTree() + rangeTree := rtree.NewRangeStatsTree() for key := range filesMap { files := filesMap[key] rangeSize := uint64(0) + rangeCount := uint64(0) for _, f := range filesMap[key] { - rangeSize += f.Size_ + rangeSize += f.TotalBytes + rangeCount += f.TotalKvs } rg := &rtree.Range{ StartKey: files[0].GetStartKey(), EndKey: files[0].GetEndKey(), Files: files, - Size: rangeSize, } // rewrite Range for split. // so that splitRanges no need to handle rewrite rules any more. @@ -89,7 +101,12 @@ func MergeAndRewriteFileRanges( return nil, nil, errors.Annotatef(berrors.ErrInvalidRange, "unable to rewrite range files %+v", files) } - if out := rangeTree.InsertRange(*tmpRng); out != nil { + rgstats := rtree.RangeStats{ + Range: tmpRng, + Size: rangeSize, + Count: rangeCount, + } + if out := rangeTree.InsertRange(rgstats); out != nil { return nil, nil, errors.Annotatef(berrors.ErrInvalidRange, "duplicate range %s files %+v", out, files) } diff --git a/br/pkg/restore/utils/rewrite_rule.go b/br/pkg/restore/utils/rewrite_rule.go index 5d9878fd3f689..938cfcace6da3 100644 --- a/br/pkg/restore/utils/rewrite_rule.go +++ b/br/pkg/restore/utils/rewrite_rule.go @@ -238,12 +238,16 @@ func rewriteRawKey(key []byte, rewriteRules *RewriteRules) ([]byte, *import_sstp } if len(key) > 0 { rule := matchOldPrefix(key, rewriteRules) - ret := bytes.Replace(key, rule.GetOldKeyPrefix(), rule.GetNewKeyPrefix(), 1) - return codec.EncodeBytes([]byte{}, ret), rule + return RewriteAndEncodeRawKey(key, rule), rule } return nil, nil } +func RewriteAndEncodeRawKey(key []byte, rule *import_sstpb.RewriteRule) []byte { + ret := bytes.Replace(key, rule.GetOldKeyPrefix(), rule.GetNewKeyPrefix(), 1) + return codec.EncodeBytes([]byte{}, ret) +} + func matchOldPrefix(key []byte, rewriteRules *RewriteRules) *import_sstpb.RewriteRule { for _, rule := range rewriteRules.Data { if bytes.HasPrefix(key, rule.GetOldKeyPrefix()) { diff --git a/br/pkg/rtree/merge_fuzz_test.go b/br/pkg/rtree/merge_fuzz_test.go index df6e304cc1e52..3bb2896520f66 100644 --- a/br/pkg/rtree/merge_fuzz_test.go +++ b/br/pkg/rtree/merge_fuzz_test.go @@ -15,8 +15,8 @@ func FuzzMerge(f *testing.F) { baseKeyB := tablecodec.EncodeIndexSeekKey(42, 1, nil) f.Add([]byte(baseKeyA), []byte(baseKeyB)) f.Fuzz(func(t *testing.T, a, b []byte) { - left := rtree.Range{StartKey: a, Files: []*backup.File{{TotalKvs: 1, TotalBytes: 1}}} - right := rtree.Range{StartKey: b, Files: []*backup.File{{TotalKvs: 1, TotalBytes: 1}}} + left := rtree.RangeStats{Range: &rtree.Range{StartKey: a, Files: []*backup.File{{TotalKvs: 1, TotalBytes: 1}}}} + right := rtree.RangeStats{Range: &rtree.Range{StartKey: b, Files: []*backup.File{{TotalKvs: 1, TotalBytes: 1}}}} rtree.NeedsMerge(&left, &right, 42, 42) }) } diff --git a/br/pkg/rtree/rtree.go b/br/pkg/rtree/rtree.go index 0c7d0ed5ce460..c8be7028c9005 100644 --- a/br/pkg/rtree/rtree.go +++ b/br/pkg/rtree/rtree.go @@ -20,7 +20,6 @@ type Range struct { StartKey []byte EndKey []byte Files []*backuppb.File - Size uint64 } // BytesAndKeys returns total bytes and keys in a range. @@ -85,8 +84,67 @@ func (rg *Range) Less(than btree.Item) bool { return bytes.Compare(rg.StartKey, ta.StartKey) < 0 } +var _ btree.Item = &RangeStats{} + +// RangeStats represents a restore merge result. +type RangeStats struct { + *Range + Size uint64 + Count uint64 +} + +// Less impls btree.Item. +func (rg *RangeStats) Less(than btree.Item) bool { + // rg.StartKey < than.StartKey + ta := than.(*RangeStats) + return bytes.Compare(rg.StartKey, ta.StartKey) < 0 +} + +type RangeStatsTree struct { + *btree.BTree +} + +func NewRangeStatsTree() RangeStatsTree { + return RangeStatsTree{ + BTree: btree.New(32), + } +} + +// InsertRange inserts ranges into the range tree. +// It returns a non-nil range if there are soe overlapped ranges. +func (rangeTree *RangeStatsTree) InsertRange(rg RangeStats) *RangeStats { + out := rangeTree.ReplaceOrInsert(&rg) + if out == nil { + return nil + } + return out.(*RangeStats) +} + +// MergedRanges output the sortedRanges having merged according to given `splitSizeBytes` and `splitKeyCount`. +func (rangeTree *RangeStatsTree) MergedRanges(splitSizeBytes, splitKeyCount uint64) []RangeStats { + var mergeTargetIndex int = -1 + sortedRanges := make([]RangeStats, 0, rangeTree.Len()) + rangeTree.Ascend(func(item btree.Item) bool { + rg := item.(*RangeStats) + if mergeTargetIndex < 0 || !NeedsMerge(&sortedRanges[mergeTargetIndex], rg, splitSizeBytes, splitKeyCount) { + // unintialized or the sortedRanges[mergeTargetIndex] does not need to merged + mergeTargetIndex += 1 + sortedRanges = append(sortedRanges, *rg) + } else { + // need to merge from rg to sortedRages[mergeTargetIndex] + sortedRanges[mergeTargetIndex].EndKey = rg.EndKey + sortedRanges[mergeTargetIndex].Size += rg.Size + sortedRanges[mergeTargetIndex].Count += rg.Count + sortedRanges[mergeTargetIndex].Files = append(sortedRanges[mergeTargetIndex].Files, rg.Files...) + } + + return true + }) + return sortedRanges +} + // NeedsMerge checks whether two ranges needs to be merged. -func NeedsMerge(left, right *Range, splitSizeBytes, splitKeyCount uint64) bool { +func NeedsMerge(left, right *RangeStats, splitSizeBytes, splitKeyCount uint64) bool { leftBytes, leftKeys := left.BytesAndKeys() rightBytes, rightKeys := right.BytesAndKeys() if rightBytes == 0 { @@ -217,28 +275,6 @@ func (rangeTree *RangeTree) InsertRange(rg Range) *Range { return out.(*Range) } -// MergedRanges output the sortedRanges having merged according to given `splitSizeBytes` and `splitKeyCount`. -func (rangeTree *RangeTree) MergedRanges(splitSizeBytes, splitKeyCount uint64) []Range { - var mergeTargetIndex int = -1 - sortedRanges := make([]Range, 0, rangeTree.Len()) - rangeTree.Ascend(func(item btree.Item) bool { - rg := item.(*Range) - if mergeTargetIndex < 0 || !NeedsMerge(&sortedRanges[mergeTargetIndex], rg, splitSizeBytes, splitKeyCount) { - // unintialized or the sortedRanges[mergeTargetIndex] does not need to merged - mergeTargetIndex += 1 - sortedRanges = append(sortedRanges, *rg) - } else { - // need to merge from rg to sortedRages[mergeTargetIndex] - sortedRanges[mergeTargetIndex].EndKey = rg.EndKey - sortedRanges[mergeTargetIndex].Size += rg.Size - sortedRanges[mergeTargetIndex].Files = append(sortedRanges[mergeTargetIndex].Files, rg.Files...) - } - - return true - }) - return sortedRanges -} - // GetSortedRanges collects and returns sorted ranges. func (rangeTree *RangeTree) GetSortedRanges() []Range { sortedRanges := make([]Range, 0, rangeTree.Len()) diff --git a/br/pkg/rtree/rtree_test.go b/br/pkg/rtree/rtree_test.go index f5b702e04072b..adc337dd8cf8f 100644 --- a/br/pkg/rtree/rtree_test.go +++ b/br/pkg/rtree/rtree_test.go @@ -189,22 +189,24 @@ func encodeTableRecord(prefix kv.Key, rowID uint64) []byte { } func TestRangeTreeMerge(t *testing.T) { - rangeTree := rtree.NewRangeTree() + rangeTree := rtree.NewRangeStatsTree() tablePrefix := tablecodec.GenTableRecordPrefix(1) for i := uint64(0); i < 10000; i += 1 { - item := rtree.Range{ - StartKey: encodeTableRecord(tablePrefix, i), - EndKey: encodeTableRecord(tablePrefix, i+1), - Files: []*backuppb.File{ - { - Name: fmt.Sprintf("%20d", i), - TotalKvs: 1, - TotalBytes: 1, + item := rtree.RangeStats{ + Range: &rtree.Range{ + StartKey: encodeTableRecord(tablePrefix, i), + EndKey: encodeTableRecord(tablePrefix, i+1), + Files: []*backuppb.File{ + { + Name: fmt.Sprintf("%20d", i), + TotalKvs: 1, + TotalBytes: 1, + }, }, }, Size: i, } - rangeTree.Update(item) + rangeTree.InsertRange(item) } sortedRanges := rangeTree.MergedRanges(10, 10) require.Equal(t, 1000, len(sortedRanges)) diff --git a/br/pkg/task/BUILD.bazel b/br/pkg/task/BUILD.bazel index 60f80c77a5f86..616a661f3be49 100644 --- a/br/pkg/task/BUILD.bazel +++ b/br/pkg/task/BUILD.bazel @@ -50,6 +50,7 @@ go_library( "//br/pkg/utils", "//br/pkg/version", "//pkg/config", + "//pkg/ddl", "//pkg/domain", "//pkg/infoschema", "//pkg/kv", @@ -58,13 +59,11 @@ go_library( "//pkg/sessionctx/stmtctx", "//pkg/sessionctx/variable", "//pkg/statistics/handle", - "//pkg/tablecodec", "//pkg/types", "//pkg/util", "//pkg/util/cdcutil", "//pkg/util/collate", "//pkg/util/engine", - "//pkg/util/mathutil", "//pkg/util/table-filter", "@com_github_docker_go_units//:go-units", "@com_github_fatih_color//:color", @@ -111,7 +110,7 @@ go_test( ], embed = [":task"], flaky = True, - shard_count = 34, + shard_count = 33, deps = [ "//br/pkg/backup", "//br/pkg/config", diff --git a/br/pkg/task/config_test.go b/br/pkg/task/config_test.go index 3090570046644..85532f019863d 100644 --- a/br/pkg/task/config_test.go +++ b/br/pkg/task/config_test.go @@ -286,40 +286,3 @@ func mockBackupMeta(mockSchemas []*backuppb.Schema, mockFiles []*backuppb.File) Schemas: mockSchemas, } } - -func TestMapTableToFiles(t *testing.T) { - filesOfTable1 := []*backuppb.File{ - { - Name: "table1-1.sst", - StartKey: tablecodec.EncodeTablePrefix(1), - EndKey: tablecodec.EncodeTablePrefix(1), - }, - { - Name: "table1-2.sst", - StartKey: tablecodec.EncodeTablePrefix(1), - EndKey: tablecodec.EncodeTablePrefix(1), - }, - { - Name: "table1-3.sst", - StartKey: tablecodec.EncodeTablePrefix(1), - EndKey: tablecodec.EncodeTablePrefix(1), - }, - } - filesOfTable2 := []*backuppb.File{ - { - Name: "table2-1.sst", - StartKey: tablecodec.EncodeTablePrefix(2), - EndKey: tablecodec.EncodeTablePrefix(2), - }, - { - Name: "table2-2.sst", - StartKey: tablecodec.EncodeTablePrefix(2), - EndKey: tablecodec.EncodeTablePrefix(2), - }, - } - - result := MapTableToFiles(append(filesOfTable2, filesOfTable1...)) - - require.Equal(t, filesOfTable1, result[1]) - require.Equal(t, filesOfTable2, result[2]) -} diff --git a/br/pkg/task/restore.go b/br/pkg/task/restore.go index 8bc6383be78b6..cb915c31fda3e 100644 --- a/br/pkg/task/restore.go +++ b/br/pkg/task/restore.go @@ -8,13 +8,13 @@ import ( "fmt" "slices" "strings" + "sync/atomic" "time" "github.com/docker/go-units" "github.com/google/uuid" "github.com/opentracing/opentracing-go" "github.com/pingcap/errors" - "github.com/pingcap/failpoint" backuppb "github.com/pingcap/kvproto/pkg/brpb" "github.com/pingcap/log" "github.com/pingcap/tidb/br/pkg/checkpoint" @@ -33,14 +33,12 @@ import ( "github.com/pingcap/tidb/br/pkg/utils" "github.com/pingcap/tidb/br/pkg/version" "github.com/pingcap/tidb/pkg/config" + "github.com/pingcap/tidb/pkg/ddl" "github.com/pingcap/tidb/pkg/domain" "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/parser/model" - "github.com/pingcap/tidb/pkg/tablecodec" - "github.com/pingcap/tidb/pkg/util" "github.com/pingcap/tidb/pkg/util/collate" "github.com/pingcap/tidb/pkg/util/engine" - "github.com/pingcap/tidb/pkg/util/mathutil" "github.com/spf13/cobra" "github.com/spf13/pflag" "github.com/tikv/client-go/v2/tikv" @@ -1044,10 +1042,11 @@ func runRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf } } } - // We make bigger errCh so we won't block on multi-part failed. - errCh := make(chan error, 32) - tableStream := client.GoCreateTables(ctx, tables, newTS, errCh) + createdTables, err := client.CreateTables(ctx, tables, newTS) + if err != nil { + return errors.Trace(err) + } if len(files) == 0 { log.Info("no files, empty databases and tables are restored") @@ -1067,37 +1066,24 @@ func runRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf } // Hijack the tableStream and rewrite the rewrite rules. - tableStream = util.ChanMap(tableStream, func(t snapclient.CreatedTable) snapclient.CreatedTable { + for _, createdTable := range createdTables { // Set the keyspace info for the checksum requests - t.RewriteRule.OldKeyspace = oldKeyspace - t.RewriteRule.NewKeyspace = newKeyspace + createdTable.RewriteRule.OldKeyspace = oldKeyspace + createdTable.RewriteRule.NewKeyspace = newKeyspace - for _, rule := range t.RewriteRule.Data { + for _, rule := range createdTable.RewriteRule.Data { rule.OldKeyPrefix = append(append([]byte{}, oldKeyspace...), rule.OldKeyPrefix...) rule.NewKeyPrefix = codec.EncodeKey(rule.NewKeyPrefix) } - return t - }) + } } if cfg.tiflashRecorder != nil { - tableStream = util.ChanMap(tableStream, func(t snapclient.CreatedTable) snapclient.CreatedTable { - if cfg.tiflashRecorder != nil { - cfg.tiflashRecorder.Rewrite(t.OldTable.Info.ID, t.Table.ID) - } - return t - }) + for _, createdTable := range createdTables { + cfg.tiflashRecorder.Rewrite(createdTable.OldTable.Info.ID, createdTable.Table.ID) + } } - // Block on creating tables before restore starts. since create table is no longer a heavy operation any more. - tableStream = client.GoBlockCreateTablesPipeline(ctx, maxRestoreBatchSizeLimit, tableStream) - - tableFileMap := MapTableToFiles(files) - log.Debug("mapped table to files", zap.Any("result map", tableFileMap)) - - rangeStream := client.GoValidateFileRanges( - ctx, tableStream, tableFileMap, kvConfigs.MergeRegionSize.Value, kvConfigs.MergeRegionKeyCount.Value, errCh) - rangeSize := EstimateRangeSize(files) summary.CollectInt("restore ranges", rangeSize) log.Info("range and file prepared", zap.Int("file count", len(files)), zap.Int("range count", rangeSize)) @@ -1111,13 +1097,6 @@ func runRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf } } - // Restore sst files in batch. - batchSize := mathutil.MaxInt - failpoint.Inject("small-batch-size", func(v failpoint.Value) { - log.Info("failpoint small batch size is on", zap.Int("size", v.(int))) - batchSize = v.(int) - }) - // Split/Scatter + Download/Ingest progressLen := int64(rangeSize + len(files)) if cfg.Checksum { @@ -1127,28 +1106,26 @@ func runRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf progressLen += int64(len(tables)) } // Redirect to log if there is no log file to avoid unreadable output. - updateCh := g.StartProgress( - ctx, - cmdName, - progressLen, - !cfg.LogProgress) + updateCh := g.StartProgress(ctx, cmdName, progressLen, !cfg.LogProgress) defer updateCh.Close() - sender, err := snapclient.NewTiKVSender(ctx, client, updateCh, cfg.PDConcurrency) + + placementRuleManager, err := snapclient.NewPlacementRuleManager(ctx, mgr.GetPDClient(), mgr.GetPDHTTPClient(), mgr.GetTLSConfig(), cfg.Online) if err != nil { return errors.Trace(err) } - manager, err := snapclient.NewBRContextManager(ctx, mgr.GetPDClient(), mgr.GetPDHTTPClient(), mgr.GetTLSConfig(), cfg.Online) - if err != nil { + if err := client.RestoreTables(ctx, placementRuleManager, createdTables, files, checkpointSetWithTableID, + kvConfigs.MergeRegionSize.Value, kvConfigs.MergeRegionKeyCount.Value, + // If the command is from BR binary, the ddl.EnableSplitTableRegion is always 0, + // If the command is from BRIE SQL, the ddl.EnableSplitTableRegion is TiDB config split-table. + kvConfigs.SplitRegionOnTable.Value || atomic.LoadUint32(&ddl.EnableSplitTableRegion) == 1, + updateCh, + ); err != nil { return errors.Trace(err) } - batcher, afterTableRestoredCh := snapclient.NewBatcher(ctx, sender, manager, errCh, updateCh) - batcher.SetCheckpoint(checkpointSetWithTableID) - batcher.SetThreshold(batchSize) - batcher.EnableAutoCommit(ctx, cfg.BatchFlushInterval) - go restoreTableStream(ctx, rangeStream, batcher, errCh) - var finish <-chan struct{} - postHandleCh := afterTableRestoredCh + // We make bigger errCh so we won't block on multi-part failed. + errCh := make(chan error, 32) + postHandleCh := afterTableRestoredCh(ctx, createdTables) // pipeline checksum if cfg.Checksum { @@ -1164,7 +1141,7 @@ func runRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf postHandleCh = client.GoWaitTiFlashReady(ctx, postHandleCh, updateCh, errCh) } - finish = dropToBlackhole(ctx, postHandleCh, errCh) + finish := dropToBlackhole(ctx, postHandleCh, errCh) // Reset speed limit. ResetSpeedLimit must be called after client.InitBackupMeta has been called. defer func() { @@ -1369,30 +1346,6 @@ func EstimateRangeSize(files []*backuppb.File) int { return result } -// MapTableToFiles makes a map that mapping table ID to its backup files. -// aware that one file can and only can hold one table. -func MapTableToFiles(files []*backuppb.File) map[int64][]*backuppb.File { - result := map[int64][]*backuppb.File{} - for _, file := range files { - tableID := tablecodec.DecodeTableID(file.GetStartKey()) - tableEndID := tablecodec.DecodeTableID(file.GetEndKey()) - if tableID != tableEndID { - log.Panic("key range spread between many files.", - zap.String("file name", file.Name), - logutil.Key("startKey", file.StartKey), - logutil.Key("endKey", file.EndKey)) - } - if tableID == 0 { - log.Panic("invalid table key of file", - zap.String("file name", file.Name), - logutil.Key("startKey", file.StartKey), - logutil.Key("endKey", file.EndKey)) - } - result[tableID] = append(result[tableID], file) - } - return result -} - // dropToBlackhole drop all incoming tables into black hole, // i.e. don't execute checksum, just increase the process anyhow. func dropToBlackhole( @@ -1464,39 +1417,6 @@ func enableTiDBConfig() func() { return restoreConfig } -// restoreTableStream blocks current goroutine and restore a stream of tables, -// by send tables to batcher. -func restoreTableStream( - ctx context.Context, - inputCh <-chan snapclient.TableWithRange, - batcher *snapclient.Batcher, - errCh chan<- error, -) { - oldTableCount := 0 - defer func() { - // when things done, we must clean pending requests. - batcher.Close() - log.Info("doing postwork", - zap.Int("table count", oldTableCount), - ) - }() - - for { - select { - case <-ctx.Done(): - errCh <- ctx.Err() - return - case t, ok := <-inputCh: - if !ok { - return - } - oldTableCount += 1 - - batcher.Add(t) - } - } -} - func getTiFlashNodeCount(ctx context.Context, pdClient pd.Client) (uint64, error) { tiFlashStores, err := conn.GetAllTiKVStoresWithRetry(ctx, pdClient, connutil.TiFlashOnly) if err != nil { @@ -1711,3 +1631,18 @@ func checkIsInActions(action model.ActionType, actions map[model.ActionType]stru _, ok := actions[action] return ok } + +func afterTableRestoredCh(ctx context.Context, createdTables []*snapclient.CreatedTable) <-chan *snapclient.CreatedTable { + ch := make(chan *snapclient.CreatedTable) + go func() { + for _, createdTable := range createdTables { + select { + case <-ctx.Done(): + return + case ch <- createdTable: + } + } + close(ch) + }() + return ch +} diff --git a/br/pkg/task/restore_raw.go b/br/pkg/task/restore_raw.go index b53459a386a05..962d0ec1b835d 100644 --- a/br/pkg/task/restore_raw.go +++ b/br/pkg/task/restore_raw.go @@ -16,6 +16,7 @@ import ( "github.com/pingcap/tidb/br/pkg/restore" snapclient "github.com/pingcap/tidb/br/pkg/restore/snap_client" restoreutils "github.com/pingcap/tidb/br/pkg/restore/utils" + "github.com/pingcap/tidb/br/pkg/rtree" "github.com/pingcap/tidb/br/pkg/summary" "github.com/spf13/cobra" "github.com/spf13/pflag" @@ -145,7 +146,7 @@ func RunRestoreRaw(c context.Context, g glue.Glue, cmdName string, cfg *RestoreR !cfg.LogProgress) // RawKV restore does not need to rewrite keys. - err = client.SplitRanges(ctx, ranges, updateCh, true) + err = client.SplitRanges(ctx, getEndKeys(ranges), updateCh, true) if err != nil { return errors.Trace(err) } @@ -169,3 +170,11 @@ func RunRestoreRaw(c context.Context, g glue.Glue, cmdName string, cfg *RestoreR summary.SetSuccessStatus(true) return nil } + +func getEndKeys(ranges []rtree.RangeStats) [][]byte { + endKeys := make([][]byte, 0, len(ranges)) + for _, rg := range ranges { + endKeys = append(endKeys, rg.EndKey) + } + return endKeys +} diff --git a/br/pkg/task/restore_txn.go b/br/pkg/task/restore_txn.go index 00039eb51370e..c1ecefd2f4e62 100644 --- a/br/pkg/task/restore_txn.go +++ b/br/pkg/task/restore_txn.go @@ -88,7 +88,7 @@ func RunRestoreTxn(c context.Context, g glue.Glue, cmdName string, cfg *Config) !cfg.LogProgress) // RawKV restore does not need to rewrite keys. - err = client.SplitRanges(ctx, ranges, updateCh, false) + err = client.SplitRanges(ctx, getEndKeys(ranges), updateCh, false) if err != nil { return errors.Trace(err) } diff --git a/br/tests/br_small_batch_size/run.sh b/br/tests/br_small_batch_size/run.sh deleted file mode 100755 index 3fe09fca81063..0000000000000 --- a/br/tests/br_small_batch_size/run.sh +++ /dev/null @@ -1,81 +0,0 @@ -#!/bin/sh -# -# Copyright 2020 PingCAP, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -CUR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) - -random_values() { - length=$1 - count=$2 - python -c " -import random -import string -for ignored in range($count): - print(''.join(random.choice(string.ascii_letters) for _ in range($length)))" | - awk '{print "(1" $1 "1)"}' | - tr "\n1" ",'" | - sed 's/,$//' -} - -create_and_insert() { - table_name=$1 - record_count=$2 - run_sql "CREATE TABLE $DB.$table_name(k varchar(256) primary key)" - stmt="INSERT INTO $DB.$table_name VALUES `random_values 255 $record_count`" - echo $stmt | mysql -uroot -h127.0.0.1 -P4000 -} - -check_size() { - table_name=$1 - record_count=$2 - - count=`run_sql 'select count(*) from $DB.$table_name' | awk '/count/{print $2}'` - - if [ $count -ne $record_count ]; then - echo "check size failed: $count vs $record_count" - fi -} - -set -eu -DB="$TEST_NAME" -TABLE="usertable" - -run_sql "CREATE DATABASE $DB;" - -record_counts=(10000 10010 10086) -for i in $record_counts; do - create_and_insert "t$i" $i -done -go-ycsb load mysql -P $CUR/workload -p mysql.host=$TIDB_IP -p mysql.port=$TIDB_PORT -p mysql.user=root -p mysql.db=$DB - - -echo "backup start..." -backup_dir="$TEST_DIR/${TEST_NAME}_backup" -rm -rf $backup_dir -run_br backup full -s "local://$backup_dir" --pd $PD_ADDR - -run_sql "drop database $DB" - - -echo "restore start..." -GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/task/small-batch-size=return(2)" \ -run_br restore full -s "local://$backup_dir" --pd $PD_ADDR --ratelimit 1024 - -for i in $record_counts; do - check_size "t$i" $i -done -check_size $TABLE 10000 - -run_sql "DROP DATABASE $DB" diff --git a/br/tests/br_small_batch_size/workload b/br/tests/br_small_batch_size/workload deleted file mode 100644 index caba5e1caabd0..0000000000000 --- a/br/tests/br_small_batch_size/workload +++ /dev/null @@ -1,12 +0,0 @@ -recordcount=30000 -operationcount=0 -workload=core - -readallfields=true - -readproportion=0 -updateproportion=0 -scanproportion=0 -insertproportion=0 - -requestdistribution=uniform \ No newline at end of file diff --git a/br/tests/br_split_region_fail/run.sh b/br/tests/br_split_region_fail/run.sh index 87751695fab00..cc35015271231 100644 --- a/br/tests/br_split_region_fail/run.sh +++ b/br/tests/br_split_region_fail/run.sh @@ -48,7 +48,7 @@ echo "restore start..." unset BR_LOG_TO_TERM GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/restore/split/not-leader-error=1*return(true)->1*return(false);\ github.com/pingcap/tidb/br/pkg/restore/split/somewhat-retryable-error=3*return(true)" \ -run_br restore full -s "local://$TEST_DIR/$DB" --pd $PD_ADDR --ratelimit 1024 --log-file $LOG || true +run_br restore full -s "local://$TEST_DIR/$DB" --pd $PD_ADDR --ratelimit 1024 --merge-region-key-count 1 --log-file $LOG || true BR_LOG_TO_TERM=1 grep "a error occurs on split region" $LOG && \ diff --git a/br/tests/run_group_br_tests.sh b/br/tests/run_group_br_tests.sh index 9fe7fd643a58b..ae9f17d7a462b 100755 --- a/br/tests/run_group_br_tests.sh +++ b/br/tests/run_group_br_tests.sh @@ -25,7 +25,7 @@ groups=( ["G02"]="br_full_cluster_restore br_full_index br_incremental_ddl br_pitr_failpoint" ["G03"]='br_incompatible_tidb_config br_incremental br_incremental_index br_incremental_only_ddl br_incremental_same_table br_insert_after_restore br_key_locked br_log_test br_move_backup br_mv_index br_other br_partition_add_index br_tidb_placement_policy br_tiflash br_tiflash_conflict' ["G04"]='br_range br_replica_read br_restore_TDE_enable br_restore_log_task_enable br_s3 br_shuffle_leader br_shuffle_region br_single_table' - ["G05"]='br_skip_checksum br_small_batch_size br_split_region_fail br_systables br_table_filter br_txn br_stats br_clustered_index br_crypter' + ["G05"]='br_skip_checksum br_split_region_fail br_systables br_table_filter br_txn br_stats br_clustered_index br_crypter' ["G06"]='br_tikv_outage br_tikv_outage3' ["G07"]='br_pitr' ["G08"]='br_tikv_outage2 br_ttl br_views_and_sequences br_z_gc_safepoint br_autorandom' diff --git a/pkg/util/misc_test.go b/pkg/util/misc_test.go index c7f882ceb5530..eeab4999f652f 100644 --- a/pkg/util/misc_test.go +++ b/pkg/util/misc_test.go @@ -17,7 +17,6 @@ package util import ( "bytes" "crypto/x509/pkix" - "fmt" "testing" "time" @@ -199,17 +198,3 @@ func assertChannel[T any](t *testing.T, ch <-chan T, items ...T) { t.Fatal("channel not closed: blocked") } } - -func TestChannelMap(t *testing.T) { - ch := make(chan int, 4) - ch <- 1 - ch <- 2 - ch <- 3 - - tableCh := ChanMap(ch, func(i int) string { - return fmt.Sprintf("table%d", i) - }) - close(ch) - - assertChannel(t, tableCh, "table1", "table2", "table3") -} diff --git a/pkg/util/util.go b/pkg/util/util.go index fc0e17f4d7ad0..ea85c64be4dc9 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -84,21 +84,6 @@ func GetJSON(client *http.Client, url string, v any) error { return errors.Trace(json.NewDecoder(resp.Body).Decode(v)) } -// ChanMap creates a channel which applies the function over the input Channel. -// Hint of Resource Leakage: -// In golang, channel isn't an interface so we must create a goroutine for handling the inputs. -// Hence the input channel must be closed properly or this function may leak a goroutine. -func ChanMap[T, R any](c <-chan T, f func(T) R) <-chan R { - outCh := make(chan R) - go func() { - defer close(outCh) - for item := range c { - outCh <- f(item) - } - }() - return outCh -} - // Str2Int64Map converts a string to a map[int64]struct{}. func Str2Int64Map(str string) map[int64]struct{} { strs := strings.Split(str, ",")