Skip to content

Commit

Permalink
storage: compute minority in failed consistency check
Browse files Browse the repository at this point in the history
Previously, a consistency failure was always framed as a deviation from
the leaseholder. However, in practice a mismatch is always symmetric:
the leaseholder could be right and the follower wrong; the follower
might be right; and in fact the (usually 2+) followers may agree that
the leaseholder is wrong.

We've historically found that the majority was usually right, that is,
with very few exceptions (I remember a single one), if there is one
outlier replica, that replica is usually the problematic one, or more
generally, the SHA with the lowest multiplicity is usually the one
worth suspecting (we call this a "minority" in this context).

The consistency checker currently fatals the leaseholder
indiscriminately. This is an especially poor choice that was made out of
convenience. Killing the leaseholder, which may very well have the
correct data, forces the lease to move to a follower, often resulting in
the corrupt node taking over the lease and spreading the corruption
further. In a follow-up change, we'll want to kill the replicas
representing the SHA with the lowest multiplicity instead of defaulting
to the leaseholder.

Release note (general change): Improve the consistency checker's log output.
  • Loading branch information
tbg committed Oct 24, 2019
1 parent 434442a commit 87b3b4a
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 33 deletions.
5 changes: 3 additions & 2 deletions pkg/storage/consistency_queue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ func TestCheckConsistencyInconsistent(t *testing.T) {
}
if len(diff) != 1 {
t.Errorf("diff length = %d, diff = %v", len(diff), diff)
return
}
d := diff[0]
if d.LeaseHolder || !bytes.Equal(diffKey, d.Key) || diffTimestamp != d.Timestamp {
Expand Down Expand Up @@ -348,8 +349,8 @@ func TestCheckConsistencyInconsistent(t *testing.T) {

assert.Len(t, resp.Result, 1)
assert.Equal(t, roachpb.CheckConsistencyResponse_RANGE_INCONSISTENT, resp.Result[0].Status)
assert.Contains(t, resp.Result[0].Detail, `is inconsistent`)
assert.Contains(t, resp.Result[0].Detail, `persisted stats`)
assert.Contains(t, resp.Result[0].Detail, `[minority]`)
assert.Contains(t, resp.Result[0].Detail, `stats`)
}

// TestConsistencyQueueRecomputeStats is an end-to-end test of the mechanism CockroachDB
Expand Down
90 changes: 59 additions & 31 deletions pkg/storage/replica_consistency.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,54 +106,82 @@ func (r *Replica) CheckConsistency(
return roachpb.CheckConsistencyResponse{}, roachpb.NewError(err)
}

var inconsistencyCount int
var missingCount int

res := roachpb.CheckConsistencyResponse_Result{}
res.RangeID = r.RangeID

shaToIdxs := map[string][]int{}
var missing []ConsistencyCheckResult
for i, result := range results {
expResponse := results[0].Response
if result.Err != nil {
res.Detail += fmt.Sprintf("%s: %v\n", result.Replica, result.Err)
missingCount++
missing = append(missing, result)
continue
}
if bytes.Equal(expResponse.Checksum, result.Response.Checksum) {
// Replica is consistent (or rather, agrees with the local result).
if i == 0 {
res.Detail += fmt.Sprintf("stats: %+v\n", expResponse.Persisted)
s := string(result.Response.Checksum)
shaToIdxs[s] = append(shaToIdxs[s], i)
}

// When replicas diverge, anecdotally often the minority (usually of size
// one) is in the wrong. If there's more than one smallest minority (for
// example, if three replicas all return different hashes) we pick any of
// them.
var minoritySHA string
if len(shaToIdxs) > 1 {
for sha, idxs := range shaToIdxs {
if minoritySHA == "" || len(shaToIdxs[minoritySHA]) > len(idxs) {
minoritySHA = sha
}
continue
}
inconsistencyCount++
}

// There is an inconsistency if and only if there is a minority SHA.

if minoritySHA != "" {
var buf bytes.Buffer
_, _ = fmt.Fprintf(&buf, "replica %s is inconsistent: expected checksum %x, got %x\n"+
"persisted stats: exp %+v, got %+v\n"+
"stats delta: exp %+v, got %+v\n",
result.Replica,
expResponse.Checksum, result.Response.Checksum,
expResponse.Persisted, result.Response.Persisted,
expResponse.Delta, result.Response.Delta,
)
if expResponse.Snapshot != nil && result.Response.Snapshot != nil {
diff := diffRange(expResponse.Snapshot, result.Response.Snapshot)
if report := r.store.cfg.TestingKnobs.ConsistencyTestingKnobs.BadChecksumReportDiff; report != nil {
report(*r.store.Ident, diff)
for sha, idxs := range shaToIdxs {
minority := ""
if sha == minoritySHA {
minority = " [minority]"
}
for _, idx := range idxs {
_, _ = fmt.Fprintf(&buf, "%s: checksum %x%s\n"+
"- stats: %+v\n"+
"- recomputed delta: %+v\n",
&results[idx].Replica,
sha,
minority,
&results[idx].Response.Persisted,
&results[idx].Response.Delta,
)
}
minoritySnap := results[shaToIdxs[minoritySHA][0]].Response.Snapshot
curSnap := results[shaToIdxs[sha][0]].Response.Snapshot
if sha != minoritySHA && minoritySnap != nil && curSnap != nil {
diff := diffRange(curSnap, minoritySnap)
if report := r.store.cfg.TestingKnobs.ConsistencyTestingKnobs.BadChecksumReportDiff; report != nil {
report(*r.store.Ident, diff)
}
_, _ = fmt.Fprintf(&buf, "====== diff(%x, [minority]) ======\n", sha)
_, _ = diff.WriteTo(&buf)
}
_, _ = diff.WriteTo(&buf)
}

if isQueue {
log.Error(ctx, buf.String())
}
res.Detail += buf.String()
} else {
res.Detail += fmt.Sprintf("stats: %+v\n", enginepb.MVCCStats(results[0].Response.Persisted))
}
for _, result := range missing {
res.Detail += fmt.Sprintf("%s: error: %v\n", result.Replica, result.Err)
}

delta := enginepb.MVCCStats(results[0].Response.Delta)
delta.LastUpdateNanos = 0

res.StartKey = []byte(startKey)
res.Status = roachpb.CheckConsistencyResponse_RANGE_CONSISTENT
if inconsistencyCount != 0 {
if minoritySHA != "" {
res.Status = roachpb.CheckConsistencyResponse_RANGE_INCONSISTENT
} else if args.Mode != roachpb.ChecksumMode_CHECK_STATS && delta != (enginepb.MVCCStats{}) {
if delta.ContainsEstimates {
Expand All @@ -166,7 +194,8 @@ func (r *Replica) CheckConsistency(
res.Status = roachpb.CheckConsistencyResponse_RANGE_CONSISTENT_STATS_INCORRECT
}
res.Detail += fmt.Sprintf("stats delta: %+v\n", enginepb.MVCCStats(results[0].Response.Delta))
} else if missingCount > 0 {
} else if len(missing) > 0 {
// No inconsistency was detected, but we didn't manage to inspect all replicas.
res.Status = roachpb.CheckConsistencyResponse_RANGE_INDETERMINATE
}
var resp roachpb.CheckConsistencyResponse
Expand All @@ -179,7 +208,7 @@ func (r *Replica) CheckConsistency(
return resp, nil
}

if inconsistencyCount == 0 {
if minoritySHA == "" {
// The replicas were in sync. Check that the MVCCStats haven't diverged from
// what they should be. This code originated in the realization that there
// were many bugs in our stats computations. These are being fixed, but it
Expand Down Expand Up @@ -242,14 +271,13 @@ func (r *Replica) CheckConsistency(

// Diff was printed above, so call logFunc with a short message only.
if args.WithDiff {
logFunc(ctx, "consistency check failed with %d inconsistent replicas", inconsistencyCount)
logFunc(ctx, "consistency check failed")
return resp, nil
}

// No diff was printed, so we want to re-run with diff.
// Note that this will call Fatal recursively in `CheckConsistency` (in the code above).
log.Errorf(ctx, "consistency check failed with %d inconsistent replicas; fetching details",
inconsistencyCount)
log.Errorf(ctx, "consistency check failed; fetching details")
args.WithDiff = true
args.Checkpoint = true

Expand Down

0 comments on commit 87b3b4a

Please sign in to comment.