Skip to content

Commit

Permalink
fix server is busy case
Browse files Browse the repository at this point in the history
Signed-off-by: you06 <[email protected]>
  • Loading branch information
you06 committed Aug 2, 2023
1 parent 533d7eb commit e607df2
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 28 deletions.
40 changes: 22 additions & 18 deletions internal/locate/region_request.go
Original file line number Diff line number Diff line change
Expand Up @@ -617,7 +617,23 @@ func (state *accessFollower) next(bo *retry.Backoffer, selector *replicaSelector
logutil.BgLogger().Warn("unable to find stores with given labels")
}
leader := selector.replicas[state.leaderIdx]
if leader.isEpochStale() || state.IsLeaderExhausted(leader) {
leaderEpochStale := leader.isEpochStale()
if leaderEpochStale || state.IsLeaderExhausted(leader) {
// In stale-read, the request will fallback to leader after the local follower failure.
// If the leader is also unavailable, we can fallback to the follower and use replica-read flag again,
// The remote follower not tried yet, and the local follower can retry without stale-read flag.
if state.isStaleRead {
selector.state = &tryFollower{
fallbackFromLeader: true,
leaderIdx: state.leaderIdx,
lastIdx: state.leaderIdx,
labels: state.option.labels,
}
if leaderEpochStale {
selector.regionCache.scheduleReloadRegion(selector.region)
}
return nil, stateChanged{}
}
metrics.TiKVReplicaSelectorFailureCounter.WithLabelValues("exhausted").Inc()
selector.invalidateRegion()
return nil, nil
Expand Down Expand Up @@ -935,8 +951,9 @@ func (s *replicaSelector) updateLeader(leader *metapb.Peer) {
s.region.invalidate(StoreNotFound)
}

// For some reason, the leader is unreachable by now, try followers instead.
func (s *replicaSelector) fallback2Follower() bool {
// For some reasons, the leader is unreachable by now, try followers instead.
// the state is changed in accessFollower.next when leader is unavailable.
func (s *replicaSelector) canFallback2Follower() bool {
if s == nil || s.state == nil {
return false
}
Expand All @@ -947,16 +964,7 @@ func (s *replicaSelector) fallback2Follower() bool {
if !state.isStaleRead {
return false
}
if state.lastIdx != state.leaderIdx {
return false
}
s.state = &tryFollower{
fallbackFromLeader: true,
leaderIdx: state.leaderIdx,
lastIdx: state.leaderIdx,
labels: state.option.labels,
}
return true
return state.lastIdx == state.leaderIdx
}

func (s *replicaSelector) invalidateRegion() {
Expand Down Expand Up @@ -1452,10 +1460,6 @@ func (s *RegionRequestSender) onSendFail(bo *retry.Backoffer, ctx *RPCContext, e
} else if ctx.Meta != nil {
if s.replicaSelector != nil {
s.replicaSelector.onSendFailure(bo, err)
if s.replicaSelector.fallback2Follower() {
// if the leader is unreachable, we need to reload the latest region.
s.regionCache.scheduleReloadRegion(s.replicaSelector.region)
}
} else {
s.regionCache.OnSendFail(bo, ctx, s.NeedReloadRegion(ctx), err)
}
Expand Down Expand Up @@ -1642,7 +1646,7 @@ func (s *RegionRequestSender) onRegionError(bo *retry.Backoffer, ctx *RPCContext
logutil.BgLogger().Warn("tikv reports `ServerIsBusy` retry later",
zap.String("reason", regionErr.GetServerIsBusy().GetReason()),
zap.Stringer("ctx", ctx))
if s.replicaSelector.fallback2Follower() {
if s.replicaSelector.canFallback2Follower() {
// immediately retry on followers.
return true, nil
}
Expand Down
16 changes: 6 additions & 10 deletions internal/locate/region_request_state_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,6 @@ func TestRegionCacheStaleRead(t *testing.T) {
followerSuccessReadType: SuccessLeaderRead,
},
{
debug: true,
do: followerDownAndUp,
leaderRegionValid: true,
leaderAsyncReload: None[bool](),
Expand Down Expand Up @@ -330,12 +329,11 @@ func TestRegionCacheStaleRead(t *testing.T) {
followerSuccessReadType: SuccessStaleRead,
},
{
do: leaderDownAndUp,
leaderRegionValid: false,
leaderAsyncReload: Some(true),
// TODO: "z1" can be available by trying follower, unless pseudo region error is returned and retry.
leaderSuccessReplica: []string{},
leaderSuccessReadType: ReadFail,
do: leaderDownAndUp,
leaderRegionValid: true,
leaderAsyncReload: Some(true),
leaderSuccessReplica: []string{"z2", "z3"},
leaderSuccessReadType: SuccessFollowerRead,
followerRegionValid: true,
followerAsyncReload: None[bool](),
followerSuccessReplica: []string{"z2"},
Expand Down Expand Up @@ -375,6 +373,7 @@ func TestRegionCacheStaleRead(t *testing.T) {
followerSuccessReadType: SuccessLeaderRead,
},
{
debug: true,
do: leaderServerIsBusy,
recoverable: true,
leaderRegionValid: true,
Expand Down Expand Up @@ -477,7 +476,6 @@ func TestRegionCacheStaleRead(t *testing.T) {
followerSuccessReadType: SuccessFollowerRead,
},
{
debug: true,
do: leaderDown,
extra: []func(suite *testRegionCacheStaleReadSuite){followerDataIsNotReady},
recoverable: true,
Expand All @@ -491,7 +489,6 @@ func TestRegionCacheStaleRead(t *testing.T) {
followerSuccessReadType: SuccessFollowerRead,
},
{
debug: true,
do: leaderDown,
extra: []func(suite *testRegionCacheStaleReadSuite){followerServerIsBusy},
recoverable: true,
Expand All @@ -505,7 +502,6 @@ func TestRegionCacheStaleRead(t *testing.T) {
followerSuccessReadType: SuccessFollowerRead,
},
{
debug: true,
do: leaderDown,
extra: []func(suite *testRegionCacheStaleReadSuite){followerDown},
recoverable: true,
Expand Down

0 comments on commit e607df2

Please sign in to comment.