Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(6.1) lock_resolver: handle pessimistic locks in BatchResolveLocks (#794) #859

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .github/workflows/integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ name: Integration Test

on:
push:
branches: [ master ]
branches: [ master, tidb-6.1 ]
pull_request:
branches: [ master ]
branches: [ master, tidb-6.1 ]

jobs:

Expand Down Expand Up @@ -53,14 +53,14 @@ jobs:
uses: shrink/actions-docker-extract@v1
id: extract-pd
with:
image: pingcap/pd:nightly
image: pingcap/pd:v6.1.6
path: /pd-server

- name: Fetch TiKV
uses: shrink/actions-docker-extract@v1
id: extract-tikv
with:
image: pingcap/tikv:nightly
image: pingcap/tikv:v6.1.6
path: /tikv-server

- name: Run PD & TiKV
Expand Down Expand Up @@ -92,14 +92,14 @@ jobs:
uses: shrink/actions-docker-extract@v1
id: extract-pd
with:
image: pingcap/pd:nightly
image: pingcap/pd:v6.1.6
path: /pd-server

- name: Fetch TiKV
uses: shrink/actions-docker-extract@v1
id: extract-tikv
with:
image: pingcap/tikv:nightly
image: pingcap/tikv:v6.1.6
path: /tikv-server

- name: Run PD & TiKV
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ name: Unit Test

on:
push:
branches: [ master ]
branches: [ master, tidb-6.1 ]
pull_request:
branches: [ master ]
branches: [ master, tidb-6.1 ]

jobs:
test:
Expand Down
2 changes: 2 additions & 0 deletions .golangci.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
run:
timeout: 5m
linters:
disable-all: true
enable:
Expand Down
217 changes: 217 additions & 0 deletions integration_tests/lock_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,11 @@ package tikv_test
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"math"
"net/http"
"sync"
"sync/atomic"
"testing"
Expand All @@ -47,8 +50,11 @@ import (
"github.com/pingcap/failpoint"
deadlockpb "github.com/pingcap/kvproto/pkg/deadlock"
"github.com/pingcap/kvproto/pkg/kvrpcpb"
"github.com/pingcap/kvproto/pkg/metapb"
"github.com/pkg/errors"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
"github.com/tikv/client-go/v2/config"
tikverr "github.com/tikv/client-go/v2/error"
"github.com/tikv/client-go/v2/kv"
"github.com/tikv/client-go/v2/oracle"
Expand All @@ -57,6 +63,7 @@ import (
"github.com/tikv/client-go/v2/txnkv"
"github.com/tikv/client-go/v2/txnkv/transaction"
"github.com/tikv/client-go/v2/txnkv/txnlock"
"github.com/tikv/client-go/v2/util"
)

var getMaxBackoff = tikv.ConfigProbe{}.GetGetMaxBackoff()
Expand All @@ -65,6 +72,10 @@ func TestLock(t *testing.T) {
suite.Run(t, new(testLockSuite))
}

func TestLockWithTiKV(t *testing.T) {
suite.Run(t, new(testLockWithTiKVSuite))
}

type testLockSuite struct {
suite.Suite
store tikv.StoreProbe
Expand Down Expand Up @@ -1007,3 +1018,209 @@ func (s *testLockSuite) TestLockWaitTimeLimit() {
s.Nil(txn1.Rollback())
s.Nil(txn2.Rollback())
}

type testLockWithTiKVSuite struct {
suite.Suite
store tikv.StoreProbe
}

func (s *testLockWithTiKVSuite) SetupTest() {
if *withTiKV {
s.store = tikv.StoreProbe{KVStore: NewTestStore(s.T())}
} else {
s.store = tikv.StoreProbe{KVStore: NewTestUniStore(s.T())}
}
}

func (s *testLockWithTiKVSuite) TearDownTest() {
s.store.Close()
}

func (s *testLockWithTiKVSuite) trySetTiKVConfig(name string, value interface{}) func() {
stores, err := s.store.GetPDClient().GetAllStores(context.Background())
s.NoError(err)

type configItem struct {
url string
name string
value interface{}
}

var recoverConfigs []configItem

httpScheme := "http"
if c, err := config.GetGlobalConfig().Security.ToTLSConfig(); err == nil && c != nil {
httpScheme = "https"
}

t := s.Suite.T()

setCfg := func(url, name string, value interface{}) error {
postBody, err := json.Marshal(map[string]interface{}{name: value})
if err != nil {
return err
}
resp, err := http.Post(url, "text/json", bytes.NewReader(postBody))
if err != nil {
return err
}
s.NoError(resp.Body.Close())
if resp.StatusCode != 200 {
return errors.Errorf("post config got unexpected status code: %v, request body: %s", resp.StatusCode, postBody)
}
t.Logf("set config for tikv at %s finished: %s", url, string(postBody))
return nil
}

storeIter:
for _, store := range stores {
if store.State != metapb.StoreState_Up {
continue
}
for _, label := range store.Labels {
if label.Key == "engine" && label.Value != "tikv" {
continue storeIter
}
}

err := func() (err error) {
defer func() {
if r := recover(); r != nil {
err = errors.Errorf("set config for store at %v panicked: %v", store.StatusAddress, r)
}
}()

url := fmt.Sprintf("%s://%s/config", httpScheme, store.StatusAddress)
resp, err := http.Get(url)
if err != nil {
return err
}
defer resp.Body.Close()

if resp.StatusCode != 200 {
return errors.Errorf("unexpected response status: %v", resp.Status)
}
oldCfgRaw, err := io.ReadAll(resp.Body)
if err != nil {
return err
}

oldCfg := make(map[string]interface{})
err = json.Unmarshal(oldCfgRaw, &oldCfg)
if err != nil {
return err
}

oldValue := oldCfg["pessimistic-txn"].(map[string]interface{})["in-memory"]
if assert.ObjectsAreEqual(oldValue, value) {
return nil
}

err = setCfg(url, name, value)
if err != nil {
return err
}

recoverConfigs = append(recoverConfigs, configItem{
url: url,
name: name,
value: oldValue,
})

return nil
}()

if err != nil {
t.Logf("failed to set config for store at %s: %v", store.StatusAddress, err)
}
}

// Prevent goleak from complaining about its internal connections.
http.DefaultClient.CloseIdleConnections()

if len(recoverConfigs) > 0 {
// Sleep for a while to ensure the new configs are applied.
time.Sleep(time.Second)
}

return func() {
for _, item := range recoverConfigs {
err = setCfg(item.url, item.name, item.value)
if err != nil {
t.Logf("failed to recover config for store at %s: %v", item.url, err)
}
}

// Prevent goleak from complaining about its internal connections.
http.DefaultClient.CloseIdleConnections()
}
}

func (s *testLockWithTiKVSuite) TestBatchResolveLocks() {
if *withTiKV {
recoverFunc := s.trySetTiKVConfig("pessimistic-txn.in-memory", false)
defer recoverFunc()
} else {
s.T().Skip("this test only works with tikv")
}

s.NoError(failpoint.Enable("tikvclient/beforeAsyncPessimisticRollback", `return("skip")`))
s.NoError(failpoint.Enable("tikvclient/beforeCommitSecondaries", `return("skip")`))
s.NoError(failpoint.Enable("tikvclient/twoPCRequestBatchSizeLimit", `return("skip")`))
defer func() {
s.NoError(failpoint.Disable("tikvclient/beforeAsyncPessimisticRollback"))
s.NoError(failpoint.Disable("tikvclient/beforeCommitSecondaries"))
s.NoError(failpoint.Disable("tikvclient/twoPCRequestBatchSizeLimit"))
}()

k1, k2, k3 := []byte("k1"), []byte("k2"), []byte("k3")
v2, v3 := []byte("v2"), []byte("v3")

ctx := context.WithValue(context.Background(), util.SessionID, uint64(1))

txn, err := s.store.Begin()
s.NoError(err)
txn.SetPessimistic(true)

{
// Produce write conflict on key k2
txn2, err := s.store.Begin()
s.NoError(err)
s.NoError(txn2.Set(k2, []byte("v0")))
s.NoError(txn2.Commit(ctx))
}

lockCtx := kv.NewLockCtx(txn.StartTS(), 200, time.Now())
err = txn.LockKeys(ctx, lockCtx, k1, k2)
s.IsType(&tikverr.ErrWriteConflict{}, errors.Cause(err))

// k1 has txn's stale pessimistic lock now.

forUpdateTS, err := s.store.CurrentTimestamp(oracle.GlobalTxnScope)
s.NoError(err)
lockCtx = kv.NewLockCtx(forUpdateTS, 200, time.Now())
s.NoError(txn.LockKeys(ctx, lockCtx, k2, k3))

s.NoError(txn.Set(k2, v2))
s.NoError(txn.Set(k3, v3))
s.NoError(txn.Commit(ctx))

// k3 has txn's stale prewrite lock now.

// Perform ScanLock - BatchResolveLock.
currentTS, err := s.store.CurrentTimestamp(oracle.GlobalTxnScope)
s.NoError(err)
s.NoError(s.store.GCResolveLockPhase(ctx, currentTS, 1))

// Check data consistency
readTS, err := s.store.CurrentTimestamp(oracle.GlobalTxnScope)
snapshot := s.store.GetSnapshot(readTS)
_, err = snapshot.Get(ctx, k1)
s.Equal(tikverr.ErrNotExist, err)
v, err := snapshot.Get(ctx, k2)
s.NoError(err)
s.Equal(v2, v)
v, err = snapshot.Get(ctx, k3)
s.NoError(err)
s.Equal(v3, v)
}
5 changes: 5 additions & 0 deletions tikv/test_probe.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,11 @@ func (s StoreProbe) SetSafeTS(storeID, safeTS uint64) {
s.setSafeTS(storeID, safeTS)
}

// GCResolveLockPhase performs the resolve-locks phase of GC, which scans all locks and resolves them.
func (s StoreProbe) GCResolveLockPhase(ctx context.Context, safepoint uint64, concurrency int) error {
return s.resolveLocks(ctx, safepoint, concurrency)
}

// LockResolverProbe wraps a LockResolver and exposes internal stats for testing purpose.
type LockResolverProbe struct {
*txnlock.LockResolverProbe
Expand Down
16 changes: 16 additions & 0 deletions txnkv/txnlock/lock_resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,11 +235,27 @@ func (lr *LockResolver) BatchResolveLocks(bo *retry.Backoffer, locks []*Lock, lo
txnInfos := make(map[uint64]uint64)
startTime := time.Now()
for _, l := range expiredLocks {
logutil.Logger(bo.GetCtx()).Debug("BatchResolveLocks handling lock", zap.Stringer("lock", l))

if _, ok := txnInfos[l.TxnID]; ok {
continue
}
metrics.LockResolverCountWithExpired.Inc()

if l.LockType == kvrpcpb.Op_PessimisticLock {
// BatchResolveLocks forces resolving the locks ignoring whether whey are expired.
// For pessimistic locks, committing them makes no sense, but it won't affect transaction
// correctness if we always roll back them.
// Pessimistic locks needs special handling logic because their primary may not point
// to the real primary of that transaction, and their state cannot be put in `txnInfos`.
// (see: https://github.com/pingcap/tidb/issues/42937).
err := lr.resolvePessimisticLock(bo, l)
if err != nil {
return false, err
}
continue
}

// Use currentTS = math.MaxUint64 means rollback the txn, no matter the lock is expired or not!
status, err := lr.getTxnStatus(bo, l.TxnID, l.Primary, 0, math.MaxUint64, true, false, l)
if err != nil {
Expand Down