From 9129590f5b0c006e08d449ff868212e74c691f90 Mon Sep 17 00:00:00 2001 From: 3pointer Date: Fri, 25 Mar 2022 21:52:34 +0800 Subject: [PATCH] br: fix the missing retry for pd batch scan error (#33420) close pingcap/tidb#33419 --- br/pkg/restore/split.go | 13 ++++++++++--- br/pkg/restore/split_test.go | 5 +++++ br/pkg/restore/util_test.go | 10 ++++++++++ 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/br/pkg/restore/split.go b/br/pkg/restore/split.go index 91af891f13fb0..9c28d0f3f9a9c 100644 --- a/br/pkg/restore/split.go +++ b/br/pkg/restore/split.go @@ -438,11 +438,18 @@ func PaginateScanRegion( } var regions []*RegionInfo - err := utils.WithRetry(ctx, func() error { + var err error + // we don't need to return multierr. since there only 3 times retry. + // in most case 3 times retry have the same error. so we just return the last error. + // actually we'd better remove all multierr in br/lightning. + // because it's not easy to check multierr equals normal error. + // see https://github.com/pingcap/tidb/issues/33419. + _ = utils.WithRetry(ctx, func() error { regions = []*RegionInfo{} scanStartKey := startKey for { - batch, err := client.ScanRegions(ctx, scanStartKey, endKey, limit) + var batch []*RegionInfo + batch, err = client.ScanRegions(ctx, scanStartKey, endKey, limit) if err != nil { return errors.Trace(err) } @@ -458,7 +465,7 @@ func PaginateScanRegion( break } } - if err := CheckRegionConsistency(startKey, endKey, regions); err != nil { + if err = CheckRegionConsistency(startKey, endKey, regions); err != nil { log.Warn("failed to scan region, retrying", logutil.ShortError(err)) return err } diff --git a/br/pkg/restore/split_test.go b/br/pkg/restore/split_test.go index 5f1911905b185..e07946a5b25af 100644 --- a/br/pkg/restore/split_test.go +++ b/br/pkg/restore/split_test.go @@ -39,6 +39,7 @@ type TestClient struct { supportBatchScatter bool scattered map[uint64]bool + InjectErr bool } func NewTestClient( @@ -215,6 +216,10 @@ func (c *TestClient) GetOperator(ctx context.Context, regionID uint64) (*pdpb.Ge } func (c *TestClient) ScanRegions(ctx context.Context, key, endKey []byte, limit int) ([]*restore.RegionInfo, error) { + if c.InjectErr { + return nil, errors.New("mock scan error") + } + infos := c.regionsInfo.ScanRange(key, endKey, limit) regions := make([]*restore.RegionInfo, 0, len(infos)) for _, info := range infos { diff --git a/br/pkg/restore/util_test.go b/br/pkg/restore/util_test.go index 16348d5212f9a..cde804ba89b08 100644 --- a/br/pkg/restore/util_test.go +++ b/br/pkg/restore/util_test.go @@ -10,6 +10,7 @@ import ( backuppb "github.com/pingcap/kvproto/pkg/brpb" "github.com/pingcap/kvproto/pkg/import_sstpb" "github.com/pingcap/kvproto/pkg/metapb" + berrors "github.com/pingcap/tidb/br/pkg/errors" "github.com/pingcap/tidb/br/pkg/restore" "github.com/pingcap/tidb/tablecodec" "github.com/pingcap/tidb/util/codec" @@ -228,6 +229,7 @@ func TestPaginateScanRegion(t *testing.T) { var batch []*restore.RegionInfo _, err := restore.PaginateScanRegion(ctx, NewTestClient(stores, regionMap, 0), []byte{}, []byte{}, 3) require.Error(t, err) + require.True(t, berrors.ErrPDBatchScanRegion.Equal(err)) require.Regexp(t, ".*scan region return empty result.*", err.Error()) regionMap, regions = makeRegions(1) @@ -268,12 +270,20 @@ func TestPaginateScanRegion(t *testing.T) { _, err = restore.PaginateScanRegion(ctx, NewTestClient(stores, regionMap, 0), []byte{2}, []byte{1}, 3) require.Error(t, err) + require.True(t, berrors.ErrRestoreInvalidRange.Equal(err)) require.Regexp(t, ".*startKey >= endKey.*", err.Error()) + tc := NewTestClient(stores, regionMap, 0) + tc.InjectErr = true + _, err = restore.PaginateScanRegion(ctx, tc, regions[1].Region.EndKey, regions[5].Region.EndKey, 3) + require.Error(t, err) + require.Regexp(t, ".*mock scan error.*", err.Error()) + // make the regionMap losing some region, this will cause scan region check fails delete(regionMap, uint64(3)) _, err = restore.PaginateScanRegion(ctx, NewTestClient(stores, regionMap, 0), regions[1].Region.EndKey, regions[5].Region.EndKey, 3) require.Error(t, err) + require.True(t, berrors.ErrPDBatchScanRegion.Equal(err)) require.Regexp(t, ".*region endKey not equal to next region startKey.*", err.Error()) }