Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allocator: select a good enough store for decom/recovery #86267

Merged
merged 2 commits into from
Aug 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 72 additions & 4 deletions pkg/kv/kvserver/allocator/allocatorimpl/allocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,20 @@ var leaseRebalancingAggressiveness = settings.RegisterFloatSetting(
settings.NonNegativeFloat,
)

// recoveryStoreSelector controls the strategy for choosing a store to recover
// replicas to: either to any valid store ("good") or to a store that has low
// range count ("best"). With this set to "good", recovering from a dead node or
// from a decommissioning node can be faster, because nodes can send replicas to
// more target stores (instead of multiple nodes sending replicas to a few
// stores with a low range count).
var recoveryStoreSelector = settings.RegisterStringSetting(
settings.SystemOnly,
"kv.allocator.recovery_store_selector",
"if set to 'good', the allocator may recover replicas to any valid store, if set "+
"to 'best' it will pick one of the most ideal stores",
"good",
)

// AllocatorAction enumerates the various replication adjustments that may be
// recommended by the allocator.
type AllocatorAction int
Expand Down Expand Up @@ -850,21 +864,72 @@ type decisionDetails struct {
Existing string `json:",omitempty"`
}

// CandidateSelector is an interface to select a store from a list of
// candidates.
type CandidateSelector interface {
selectOne(cl candidateList) *candidate
}

// BestCandidateSelector in used to choose the best store to allocate.
type BestCandidateSelector struct {
randGen allocatorRand
}

// NewBestCandidateSelector returns a CandidateSelector for choosing the best
// candidate store.
func (a *Allocator) NewBestCandidateSelector() CandidateSelector {
return &BestCandidateSelector{a.randGen}
}

func (s *BestCandidateSelector) selectOne(cl candidateList) *candidate {
return cl.selectBest(s.randGen)
}

// GoodCandidateSelector is used to choose a random store out of the stores that
// are good enough.
type GoodCandidateSelector struct {
randGen allocatorRand
}

// NewGoodCandidateSelector returns a CandidateSelector for choosing a random store
// out of the stores that are good enough.
func (a *Allocator) NewGoodCandidateSelector() CandidateSelector {
return &GoodCandidateSelector{a.randGen}
}

func (s *GoodCandidateSelector) selectOne(cl candidateList) *candidate {
return cl.selectGood(s.randGen)
}

func (a *Allocator) allocateTarget(
ctx context.Context,
conf roachpb.SpanConfig,
existingVoters, existingNonVoters []roachpb.ReplicaDescriptor,
replicaStatus ReplicaStatus,
targetType TargetReplicaType,
) (roachpb.ReplicationTarget, string, error) {
candidateStoreList, aliveStoreCount, throttled := a.StorePool.GetStoreList(storepool.StoreFilterThrottled)

// If the replica is alive we are upreplicating, and in that case we want to
// allocate new replicas on the best possible store. Otherwise, the replica is
// dead or decommissioned, and we want to recover the missing replica as soon
// as possible, and therefore any store that is good enough will be
// considered.
var selector CandidateSelector
if replicaStatus == Alive || recoveryStoreSelector.Get(&a.StorePool.St.SV) == "best" {
selector = a.NewBestCandidateSelector()
} else {
selector = a.NewGoodCandidateSelector()
}

target, details := a.AllocateTargetFromList(
ctx,
candidateStoreList,
conf,
existingVoters,
existingNonVoters,
a.ScorerOptions(ctx),
selector,
// When allocating a *new* replica, we explicitly disregard nodes with any
// existing replicas. This is important for multi-store scenarios as
// otherwise, stores on the nodes that have existing replicas are simply
Expand Down Expand Up @@ -902,8 +967,9 @@ func (a *Allocator) AllocateVoter(
ctx context.Context,
conf roachpb.SpanConfig,
existingVoters, existingNonVoters []roachpb.ReplicaDescriptor,
replicaStatus ReplicaStatus,
) (roachpb.ReplicationTarget, string, error) {
return a.allocateTarget(ctx, conf, existingVoters, existingNonVoters, VoterTarget)
return a.allocateTarget(ctx, conf, existingVoters, existingNonVoters, replicaStatus, VoterTarget)
}

// AllocateNonVoter returns a suitable store for a new allocation of a
Expand All @@ -913,8 +979,9 @@ func (a *Allocator) AllocateNonVoter(
ctx context.Context,
conf roachpb.SpanConfig,
existingVoters, existingNonVoters []roachpb.ReplicaDescriptor,
replicaStatus ReplicaStatus,
) (roachpb.ReplicationTarget, string, error) {
return a.allocateTarget(ctx, conf, existingVoters, existingNonVoters, NonVoterTarget)
return a.allocateTarget(ctx, conf, existingVoters, existingNonVoters, replicaStatus, NonVoterTarget)
}

// AllocateTargetFromList returns a suitable store for a new allocation of a
Expand All @@ -926,6 +993,7 @@ func (a *Allocator) AllocateTargetFromList(
conf roachpb.SpanConfig,
existingVoters, existingNonVoters []roachpb.ReplicaDescriptor,
options ScorerOptions,
selector CandidateSelector,
allowMultipleReplsPerNode bool,
targetType TargetReplicaType,
) (roachpb.ReplicationTarget, string) {
Expand Down Expand Up @@ -967,7 +1035,7 @@ func (a *Allocator) AllocateTargetFromList(
)

log.VEventf(ctx, 3, "allocate %s: %s", targetType, candidates)
if target := candidates.selectGood(a.randGen); target != nil {
if target := selector.selectOne(candidates); target != nil {
log.VEventf(ctx, 3, "add target: %s", target)
details := decisionDetails{Target: target.compactString()}
detailsBytes, err := json.Marshal(details)
Expand Down Expand Up @@ -1101,7 +1169,7 @@ func (a Allocator) RemoveTarget(
)

log.VEventf(ctx, 3, "remove %s: %s", targetType, rankedCandidates)
if bad := rankedCandidates.selectBad(a.randGen); bad != nil {
if bad := rankedCandidates.selectWorst(a.randGen); bad != nil {
for _, exist := range existingReplicas {
if exist.StoreID == bad.store.StoreID {
log.VEventf(ctx, 3, "remove target: %s", bad)
Expand Down
46 changes: 40 additions & 6 deletions pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer.go
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,23 @@ func (cl candidateList) best() candidateList {
return cl
}

// good returns all the elements in a sorted (by score reversed) candidate list
// that share the highest diversity score and are valid.
func (cl candidateList) good() candidateList {
cl = cl.onlyValidAndHealthyDisk()
if len(cl) <= 1 {
return cl
}
for i := 1; i < len(cl); i++ {
if cl[i].necessary == cl[0].necessary &&
scoresAlmostEqual(cl[i].diversityScore, cl[0].diversityScore) {
continue
}
return cl[:i]
}
return cl
}

// worst returns all the elements in a sorted (by score reversed) candidate list
// that share the lowest constraint score (for instance, the set of candidates
// that result in the lowest diversity score for the range, or the set of
Expand Down Expand Up @@ -836,9 +853,9 @@ func (cl candidateList) betterThan(c candidate) candidateList {
return cl
}

// selectGood randomly chooses a good candidate store from a sorted (by score
// reversed) candidate list using the provided random generator.
func (cl candidateList) selectGood(randGen allocatorRand) *candidate {
// selectBest randomly chooses one of the best candidate stores from a sorted
// (by score reversed) candidate list using the provided random generator.
func (cl candidateList) selectBest(randGen allocatorRand) *candidate {
cl = cl.best()
if len(cl) == 0 {
return nil
Expand All @@ -858,9 +875,26 @@ func (cl candidateList) selectGood(randGen allocatorRand) *candidate {
return best
}

// selectBad randomly chooses a bad candidate store from a sorted (by score
// selectGood randomly chooses a good candidate store from a sorted (by score
// reversed) candidate list using the provided random generator.
func (cl candidateList) selectBad(randGen allocatorRand) *candidate {
func (cl candidateList) selectGood(randGen allocatorRand) *candidate {
cl = cl.good()
if len(cl) == 0 {
return nil
}
if len(cl) == 1 {
return &cl[0]
}
randGen.Lock()
r := randGen.Intn(len(cl))
randGen.Unlock()
c := &cl[r]
return c
}

// selectWorst randomly chooses one of the worst candidate stores from a sorted
// (by score reversed) candidate list using the provided random generator.
func (cl candidateList) selectWorst(randGen allocatorRand) *candidate {
cl = cl.worst()
if len(cl) == 0 {
return nil
Expand Down Expand Up @@ -1570,7 +1604,7 @@ func bestRebalanceTarget(
if len(option.candidates) == 0 {
continue
}
target := option.candidates.selectGood(randGen)
target := option.candidates.selectBest(randGen)
if target == nil {
continue
}
Expand Down
Loading