Merge #45124

45124: storage/concurrency: changes to lock table to handle requests that r=nvanbenschoten a=sumeerbhola have the same key as ReadWrite and ReadOnly - This new functionality is covered by the dup_access test and the randomized test. - The existing basic test lost some coverage of inactive waiters so there is now a separate non_active_waiter test. - The comment on future extensions to shared and upgrade locks now includes a discussion on non-transactional requests, and how active and inactive wait states will work. - There was a bug in the code that handles a discovered lock, when the lock was discovered by a reader, which was triggered by changes to the basic test. The lockTableGuardImpl.mu.locks was being incorrectly updated to add the *lockState. Release note: None Co-authored-by: sumeerbhola <[email protected]>
cockroachdb · Feb 18, 2020 · f5670db · f5670db
2 parents 734d8c1 + 149928e
commit f5670db
Show file tree

Hide file tree

Showing 5 changed files with 861 additions and 99 deletions.
diff --git a/pkg/storage/concurrency/lock_table.go b/pkg/storage/concurrency/lock_table.go
@@ -231,7 +231,7 @@ type lockTableGuardImpl struct {
 	// The key for the lockState is contained in the Span specified by
 	// spans[sa][ss][index].
 	ss    spanset.SpanScope
-	sa    spanset.SpanAccess
+	sa    spanset.SpanAccess // Iterates from stronger to weaker strength
 	index int
 
 	mu struct {
@@ -241,8 +241,8 @@ type lockTableGuardImpl struct {
 		state  waitingState
 		signal chan struct{}
 
-		// locks for which this request has a reservation or is in the queue or
-		// actively waiting as a reader.
+		// locks for which this request has a reservation or is in the queue of
+		// writers (active or inactive) or actively waiting as a reader.
 		//
 		// TODO(sbhola): investigate whether the logic to maintain this locks map
 		// can be simplified so it doesn't need to be adjusted by various
@@ -483,19 +483,29 @@ type lockWaitQueue struct {
 	// - Holders: only shared locks are compatible with themselves, so there can
 	//   be one of (a) no holder (b) multiple shared lock holders, (c) one
 	//   exclusive holder, (d) one upgrade holder. Non-locking reads will
-	//   continue to wait in waitingReaders for only an incompatible exclusive
-	//   holder.
+	//   wait in waitingReaders for only an incompatible exclusive holder.
 	//
 	// - Reservers: This follows the same pattern as holders. Non-locking reads
 	//   do not wait on reservers.
 	//
-	// - Queueing and dependencies: All potential lockers will wait in the same
-	//   queue. A sequence of consecutive requests that have the potential to
-	//   acquire a shared lock will jointly reserve that shared lock. Such
-	//   requests cannot jump ahead of requests with a lower seqnum just because
-	//   there is currently a shared lock reservation (this can cause lockTable
-	//   induced deadlocks). Such joint reservations can be partially broken by
-	//   a waiter desiring an exclusive or upgrade lock.
+	// - Queueing and dependencies: All potential lockers and non-transactional
+	//   writers will wait in the same queue. A sequence of consecutive requests
+	//   that have the potential to acquire a shared lock will jointly reserve
+	//   that shared lock. Such requests cannot jump ahead of requests with a
+	//   lower seqnum just because there is currently a shared lock reservation
+	//   (this can cause lockTable induced deadlocks). Such joint reservations
+	//   can be partially broken by a waiter desiring an exclusive or upgrade
+	//   lock. Like the current code, non-transactional writes will wait for
+	//   reservations that have a lower sequence num, but not make their own
+	//   reservation. Additionally, they can partially break joint reservations.
+	//
+	//   Reservations that are (partially or fully) broken cause requests to
+	//   reenter the queue as inactive waiters. This is no different than the
+	//   current behavior. Each request can specify the same key in spans for
+	//   ReadOnly, ReadShared, ReadUpgrade, ReadWrite. The spans will be
+	//   iterated over in decreasing order of strength, to only wait at a lock
+	//   at the highest strength (this is similar to the current behavior using
+	//   accessDecreasingStrength).
 	//
 	//   For dependencies, a waiter desiring an exclusive or upgrade lock always
 	//   conflicts with the holder(s) or reserver(s) so that is the dependency
@@ -757,6 +767,25 @@ func (l *lockState) tryActiveWait(g *lockTableGuardImpl, sa spanset.SpanAccess,
 		if g.ts.Less(waitForTs) {
 			return false
 		}
+		g.mu.Lock()
+		_, alsoHasStrongerAccess := g.mu.locks[l]
+		g.mu.Unlock()
+
+		// If the request already has this lock in its locks map, it must also be
+		// writing to this key and must be either a reservation holder or inactive
+		// waiter at this lock. The former has already been handled above. For the
+		// latter, it must have had its reservation broken. Since this is a weaker
+		// access we defer to the stronger access and don't wait here.
+		//
+		// For non-transactional requests that have the key specified as both
+		// SpanReadOnly and SpanReadWrite, the request never acquires a
+		// reservation, so using the locks map to detect this duplication of the
+		// key is not possible. In the rare case, the lock is now held at a
+		// timestamp that is not compatible with this request and it will wait
+		// here -- there is no correctness issue with doing that.
+		if alsoHasStrongerAccess {
+			return false
+		}
 	}
 
 	// Incompatible with whoever is holding lock or reservation.
@@ -950,9 +979,13 @@ func (l *lockState) acquireLock(
 						panic("lockTable bug")
 					}
 				}
+				g.doneWaitingAtLock(false, l)
+			} else {
+				g.mu.Lock()
+				delete(g.mu.locks, l)
+				g.mu.Unlock()
 			}
 			l.queuedWriters.Remove(curr)
-			g.doneWaitingAtLock(false, l)
 		}
 	}
 
@@ -986,13 +1019,6 @@ func (l *lockState) discoveredLock(
 		l.holder.holder[lock.Replicated].ts = ts
 	}
 
-	g.mu.Lock()
-	_, presentHere := g.mu.locks[l]
-	if !presentHere {
-		g.mu.locks[l] = struct{}{}
-	}
-	g.mu.Unlock()
-
 	// Queue the existing reservation holder. Note that this reservation
 	// holder may not be equal to g due to two reasons (a) the reservation
 	// of g could have been broken even though g is holding latches (see
@@ -1012,7 +1038,16 @@ func (l *lockState) discoveredLock(
 		informWaiters = false
 	}
 
-	if !presentHere && sa == spanset.SpanReadWrite {
+	g.mu.Lock()
+	_, presentHere := g.mu.locks[l]
+	addToQueue := !presentHere && sa == spanset.SpanReadWrite
+	if addToQueue {
+		// Since g will place itself in queue as inactive waiter below.
+		g.mu.locks[l] = struct{}{}
+	}
+	g.mu.Unlock()
+
+	if addToQueue {
 		// Put self in queue as inactive waiter.
 		qg := &queuedGuard{
 			guard:  g,
@@ -1357,6 +1392,7 @@ func (t *lockTableImpl) ScanAndEnqueue(req Request, guard lockTableGuard) lockTa
 			table:  t,
 			spans:  req.Spans,
 			ts:     req.Timestamp,
+			sa:     spanset.NumSpanAccess - 1,
 			index:  -1,
 		}
 		if req.Txn != nil {
@@ -1368,7 +1404,7 @@ func (t *lockTableImpl) ScanAndEnqueue(req Request, guard lockTableGuard) lockTa
 		g = guard.(*lockTableGuardImpl)
 		g.mu.Lock()
 		g.key = nil
-		g.sa = spanset.SpanAccess(0)
+		g.sa = spanset.NumSpanAccess - 1
 		g.ss = spanset.SpanScope(0)
 		g.index = -1
 		g.mu.startWait = false
@@ -1510,10 +1546,12 @@ func (t *lockTableImpl) clearMostLocks() {
 	}
 }
 
+// Given the key with scope ss must be in spans, returns the strongest access
+// specified in the spans.
 func findAccessInSpans(
 	key roachpb.Key, ss spanset.SpanScope, spans *spanset.SpanSet,
 ) (spanset.SpanAccess, error) {
-	for sa := spanset.SpanAccess(0); sa < spanset.NumSpanAccess; sa++ {
+	for sa := spanset.NumSpanAccess - 1; sa >= 0; sa-- {
 		s := spans.GetSpans(sa, ss)
 		// First span that starts after key
 		i := sort.Search(len(s), func(i int) bool {
@@ -1595,7 +1633,7 @@ func (t *lockTableImpl) UpdateLocks(intent *roachpb.Intent) error {
 func stepToNextSpan(g *lockTableGuardImpl) *spanset.Span {
 	g.index++
 	for ; g.ss < spanset.NumSpanScope; g.ss++ {
-		for ; g.sa < spanset.NumSpanAccess; g.sa++ {
+		for ; g.sa >= 0; g.sa-- {
 			spans := g.spans.GetSpans(g.sa, g.ss)
 			if g.index < len(spans) {
 				span := &spans[g.index]
@@ -1604,7 +1642,7 @@ func stepToNextSpan(g *lockTableGuardImpl) *spanset.Span {
 			}
 			g.index = 0
 		}
-		g.sa = 0
+		g.sa = spanset.NumSpanAccess - 1
 	}
 	return nil
 }
@@ -1625,7 +1663,6 @@ func (t *lockTableImpl) findNextLockAfter(g *lockTableGuardImpl, notify bool) {
 	}
 	for span != nil {
 		tree := &t.locks[g.ss]
-		sa := g.sa
 		if len(span.EndKey) == 0 {
 			// NB: !resumingInSameSpan
 			tree.mu.RLock()
@@ -1634,7 +1671,7 @@ func (t *lockTableImpl) findNextLockAfter(g *lockTableGuardImpl, notify bool) {
 			tree.mu.RUnlock()
 			if i != nil {
 				l := i.(*lockState)
-				if l.tryActiveWait(g, sa, notify) {
+				if l.tryActiveWait(g, g.sa, notify) {
 					return
 				}
 			}
@@ -1665,7 +1702,7 @@ func (t *lockTableImpl) findNextLockAfter(g *lockTableGuardImpl, notify bool) {
 						// Else, past the lock where it stopped waiting. We may not
 						// encounter that lock since it may have been garbage collected.
 					}
-					waiting = l.tryActiveWait(g, sa, notify)
+					waiting = l.tryActiveWait(g, g.sa, notify)
 					return !waiting
 				})
 			resumingInSameSpan = false

diff --git a/pkg/storage/concurrency/lock_table_test.go b/pkg/storage/concurrency/lock_table_test.go
@@ -930,13 +930,22 @@ func TestLockTableConcurrentRequests(t *testing.T) {
 		for i := 0; i < numKeys; i++ {
 			span := roachpb.Span{Key: keys[keysPerm[i]]}
 			acc := spanset.SpanReadOnly
+			dupRead := false
 			if !onlyReads {
 				acc = spanset.SpanAccess(rng.Intn(int(spanset.NumSpanAccess)))
 				if acc == spanset.SpanReadWrite && txnMeta != nil && rng.Intn(2) == 0 {
+					// Acquire lock.
 					wi.locksToAcquire = append(wi.locksToAcquire, span.Key)
 				}
+				if acc == spanset.SpanReadWrite && rng.Intn(2) == 0 {
+					// Also include the key as read.
+					dupRead = true
+				}
 			}
 			spans.AddMVCC(acc, span, ts)
+			if dupRead {
+				spans.AddMVCC(spanset.SpanReadOnly, span, ts)
+			}
 		}
 		items = append(items, wi)
 	}