Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

br: defer not in for-loop in the sendSplitRegionRequest #33636

Merged
merged 5 commits into from
Apr 2, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 80 additions & 66 deletions br/pkg/restore/split_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -299,82 +299,96 @@ func (c *pdClient) sendSplitRegionRequest(
) (*kvrpcpb.SplitRegionResponse, error) {
var splitErrors error
for i := 0; i < splitRegionMaxRetryTime; i++ {
var peer *metapb.Peer
// scanRegions may return empty Leader in https://github.com/tikv/pd/blob/v4.0.8/server/grpc_service.go#L524
// so wee also need check Leader.Id != 0
if regionInfo.Leader != nil && regionInfo.Leader.Id != 0 {
peer = regionInfo.Leader
} else {
if len(regionInfo.Region.Peers) == 0 {
return nil, multierr.Append(splitErrors,
errors.Annotatef(berrors.ErrRestoreNoPeer, "region[%d] doesn't have any peer", regionInfo.Region.GetId()))
}
peer = regionInfo.Region.Peers[0]
retry, result, err := sendSplitRegionRequest(c, ctx, regionInfo, keys, &splitErrors, i)
if retry {
continue
}
storeID := peer.GetStoreId()
store, err := c.GetStore(ctx, storeID)
if err != nil {
return nil, multierr.Append(splitErrors, err)
}
opt := grpc.WithInsecure()
if c.tlsConf != nil {
opt = grpc.WithTransportCredentials(credentials.NewTLS(c.tlsConf))
}
conn, err := grpc.Dial(store.GetAddress(), opt)
if err != nil {
return nil, multierr.Append(splitErrors, err)
if result != nil {
return result, nil
}
defer conn.Close()
client := tikvpb.NewTikvClient(conn)
resp, err := splitRegionWithFailpoint(ctx, regionInfo, peer, client, keys, c.isRawKv)
if err != nil {
return nil, multierr.Append(splitErrors, err)
return nil, errors.Trace(splitErrors)
hawkingrei marked this conversation as resolved.
Show resolved Hide resolved
}
return nil, errors.Trace(splitErrors)
}

func sendSplitRegionRequest(c *pdClient, ctx context.Context, regionInfo *RegionInfo, keys [][]byte, splitErrors *error, retry int) (bool, *kvrpcpb.SplitRegionResponse, error) {
hawkingrei marked this conversation as resolved.
Show resolved Hide resolved
var peer *metapb.Peer
// scanRegions may return empty Leader in https://github.com/tikv/pd/blob/v4.0.8/server/grpc_service.go#L524
// so wee also need check Leader.Id != 0
if regionInfo.Leader != nil && regionInfo.Leader.Id != 0 {
peer = regionInfo.Leader
} else {
if len(regionInfo.Region.Peers) == 0 {
return false, nil,
errors.Annotatef(berrors.ErrRestoreNoPeer, "region[%d] doesn't have any peer", regionInfo.Region.GetId())
}
if resp.RegionError != nil {
log.Warn("fail to split region",
logutil.Region(regionInfo.Region),
zap.Stringer("regionErr", resp.RegionError))
splitErrors = multierr.Append(splitErrors,
errors.Annotatef(berrors.ErrRestoreSplitFailed, "split region failed: err=%v", resp.RegionError))
if nl := resp.RegionError.NotLeader; nl != nil {
if leader := nl.GetLeader(); leader != nil {
regionInfo.Leader = leader
} else {
newRegionInfo, findLeaderErr := c.GetRegionByID(ctx, nl.RegionId)
if findLeaderErr != nil {
return nil, multierr.Append(splitErrors, findLeaderErr)
}
if !checkRegionEpoch(newRegionInfo, regionInfo) {
return nil, multierr.Append(splitErrors, berrors.ErrKVEpochNotMatch)
}
log.Info("find new leader", zap.Uint64("new leader", newRegionInfo.Leader.Id))
regionInfo = newRegionInfo
peer = regionInfo.Region.Peers[0]
}
storeID := peer.GetStoreId()
store, err := c.GetStore(ctx, storeID)
if err != nil {
return false, nil, err
}
opt := grpc.WithInsecure()
if c.tlsConf != nil {
opt = grpc.WithTransportCredentials(credentials.NewTLS(c.tlsConf))
}
conn, err := grpc.Dial(store.GetAddress(), opt)
if err != nil {
return false, nil, err
}
defer conn.Close()
client := tikvpb.NewTikvClient(conn)
resp, err := splitRegionWithFailpoint(ctx, regionInfo, peer, client, keys, c.isRawKv)
if err != nil {
return false, nil, err
}
if resp.RegionError != nil {
log.Warn("fail to split region",
logutil.Region(regionInfo.Region),
zap.Stringer("regionErr", resp.RegionError))
*splitErrors = multierr.Append(*splitErrors,
errors.Annotatef(berrors.ErrRestoreSplitFailed, "split region failed: err=%v", resp.RegionError))
if nl := resp.RegionError.NotLeader; nl != nil {
if leader := nl.GetLeader(); leader != nil {
regionInfo.Leader = leader
} else {
newRegionInfo, findLeaderErr := c.GetRegionByID(ctx, nl.RegionId)
if findLeaderErr != nil {
return false, nil, findLeaderErr
}
log.Info("split region meet not leader error, retrying",
zap.Int("retry times", i),
zap.Uint64("regionID", regionInfo.Region.Id),
zap.Any("new leader", regionInfo.Leader),
)
continue
}
// TODO: we don't handle RegionNotMatch and RegionNotFound here,
// because I think we don't have enough information to retry.
// But maybe we can handle them here by some information the error itself provides.
if resp.RegionError.ServerIsBusy != nil ||
resp.RegionError.StaleCommand != nil {
log.Warn("a error occurs on split region",
zap.Int("retry times", i),
zap.Uint64("regionID", regionInfo.Region.Id),
zap.String("error", resp.RegionError.Message),
zap.Any("error verbose", resp.RegionError),
)
continue
if !checkRegionEpoch(newRegionInfo, regionInfo) {
return false, nil, berrors.ErrKVEpochNotMatch
}
log.Info("find new leader", zap.Uint64("new leader", newRegionInfo.Leader.Id))
regionInfo = newRegionInfo
}
return nil, errors.Trace(splitErrors)
log.Info("split region meet not leader error, retrying",
zap.Int("retry times", retry),
zap.Uint64("regionID", regionInfo.Region.Id),
zap.Any("new leader", regionInfo.Leader),
)
return true, nil, nil
}
return resp, nil
// TODO: we don't handle RegionNotMatch and RegionNotFound here,
// because I think we don't have enough information to retry.
// But maybe we can handle them here by some information the error itself provides.
if resp.RegionError.ServerIsBusy != nil ||
resp.RegionError.StaleCommand != nil {
log.Warn("a error occurs on split region",
zap.Int("retry times", retry),
zap.Uint64("regionID", regionInfo.Region.Id),
zap.String("error", resp.RegionError.Message),
zap.Any("error verbose", resp.RegionError),
)
return true, nil, nil
}
return false, nil, nil
sleepymole marked this conversation as resolved.
Show resolved Hide resolved
}
return nil, errors.Trace(splitErrors)
return false, resp, nil
}

func (c *pdClient) BatchSplitRegionsWithOrigin(
Expand Down