Skip to content

Commit

Permalink
[pick-7.1] region_request: ignore resource group errors that not rela…
Browse files Browse the repository at this point in the history
…tive storag layer (#1354) (#1464)

ref #1322

Signed-off-by: nolouch <[email protected]>
  • Loading branch information
nolouch authored Nov 1, 2024
1 parent 2784a8f commit f01fc67
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 11 deletions.
30 changes: 19 additions & 11 deletions internal/locate/region_request.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ import (
"github.com/tikv/client-go/v2/metrics"
"github.com/tikv/client-go/v2/tikvrpc"
"github.com/tikv/client-go/v2/util"
"github.com/tikv/pd/client/errs"
pderr "github.com/tikv/pd/client/errs"
)

Expand Down Expand Up @@ -1679,7 +1680,9 @@ func (s *RegionRequestSender) sendReqToRegion(
}

if err != nil {
s.rpcError = err
if isRPCError(err) {
s.rpcError = err
}

// Because in rpc logic, context.Cancel() will be transferred to rpcContext.Cancel error. For rpcContext cancel,
// we need to retry the request. But for context cancel active, for example, limitExec gets the required rows,
Expand All @@ -1701,6 +1704,11 @@ func (s *RegionRequestSender) sendReqToRegion(
return
}

func isRPCError(err error) bool {
// exclude ErrClientResourceGroupThrottled
return err != nil && errs.ErrClientResourceGroupThrottled.NotEqual(err)
}

func (s *RegionRequestSender) getStoreToken(st *Store, limit int64) error {
// Checking limit is not thread safe, preferring this for avoiding load in loop.
count := st.tokenCount.Load()
Expand Down Expand Up @@ -1747,16 +1755,6 @@ func (s *RegionRequestSender) onSendFail(bo *retry.Backoffer, ctx *RPCContext, e
}
}

if ctx.Store != nil && ctx.Store.storeType == tikvrpc.TiFlashCompute {
s.regionCache.InvalidateTiFlashComputeStoresIfGRPCError(err)
} else if ctx.Meta != nil {
if s.replicaSelector != nil {
s.replicaSelector.onSendFailure(bo, err)
} else {
s.regionCache.OnSendFail(bo, ctx, s.NeedReloadRegion(ctx), err)
}
}

// don't need to retry for ResourceGroup error
if errors.Is(err, pderr.ErrClientResourceGroupThrottled) {
return err
Expand All @@ -1769,6 +1767,16 @@ func (s *RegionRequestSender) onSendFail(bo *retry.Backoffer, ctx *RPCContext, e
return err
}

if ctx.Store != nil && ctx.Store.storeType == tikvrpc.TiFlashCompute {
s.regionCache.InvalidateTiFlashComputeStoresIfGRPCError(err)
} else if ctx.Meta != nil {
if s.replicaSelector != nil {
s.replicaSelector.onSendFailure(bo, err)
} else {
s.regionCache.OnSendFail(bo, ctx, s.NeedReloadRegion(ctx), err)
}
}

// Retry on send request failure when it's not canceled.
// When a store is not available, the leader of related region should be elected quickly.
// TODO: the number of retry time should be limited:since region may be unavailable
Expand Down
36 changes: 36 additions & 0 deletions internal/locate/region_request_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ import (
"github.com/tikv/client-go/v2/internal/mockstore/mocktikv"
"github.com/tikv/client-go/v2/internal/retry"
"github.com/tikv/client-go/v2/tikvrpc"
pderr "github.com/tikv/pd/client/errs"
"google.golang.org/grpc"
)

Expand Down Expand Up @@ -144,6 +145,41 @@ func (s *testRegionRequestToSingleStoreSuite) TestOnRegionError() {
}()
}

func (s *testRegionRequestToSingleStoreSuite) TestOnSendFailByResourceGroupThrottled() {
req := tikvrpc.NewRequest(tikvrpc.CmdRawPut, &kvrpcpb.RawPutRequest{
Key: []byte("key"),
Value: []byte("value"),
})
region, err := s.cache.LocateRegionByID(s.bo, s.region)
s.Nil(err)
s.NotNil(region)

// test ErrClientResourceGroupThrottled handled by regionRequestSender
func() {
oc := s.regionRequestSender.client
defer func() {
s.regionRequestSender.client = oc
}()
s.regionRequestSender.regionCache.storeMu.Lock()
storeOld := s.regionRequestSender.regionCache.storeMu.stores[1]
s.regionRequestSender.regionCache.storeMu.Unlock()
epoch := storeOld.epoch
s.regionRequestSender.client = &fnClient{fn: func(ctx context.Context, addr string, req *tikvrpc.Request, timeout time.Duration) (response *tikvrpc.Response, err error) {
return nil, pderr.ErrClientResourceGroupThrottled
}}
bo := retry.NewBackofferWithVars(context.Background(), 5, nil)
_, _, err := s.regionRequestSender.SendReq(bo, req, region.Region, time.Second)
s.NotNil(err)
s.regionRequestSender.regionCache.storeMu.Lock()
storeNew := s.regionRequestSender.regionCache.storeMu.stores[1]
s.regionRequestSender.regionCache.storeMu.Unlock()
// not mark the store need be refill, then the epoch should not be changed.
s.Equal(epoch, storeNew.epoch)
// no rpc error if the error is ErrClientResourceGroupThrottled
s.Nil(s.regionRequestSender.rpcError)
}()
}

func (s *testRegionRequestToSingleStoreSuite) TestOnSendFailedWithStoreRestart() {
req := tikvrpc.NewRequest(tikvrpc.CmdRawPut, &kvrpcpb.RawPutRequest{
Key: []byte("key"),
Expand Down

0 comments on commit f01fc67

Please sign in to comment.