diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index b1abe4677df..815e04f5c54 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -68,7 +68,7 @@ func HandleOverlaps(ctx context.Context, c Cluster, overlaps []*core.RegionInfo) if c.GetRegionStats() != nil { c.GetRegionStats().ClearDefunctRegion(item.GetID()) } - c.GetLabelStats().ClearDefunctRegion(item.GetID()) + c.GetLabelStats().MarkDefunctRegion(item.GetID()) c.GetRuleManager().InvalidCache(item.GetID()) } } diff --git a/pkg/mcs/scheduling/server/cluster.go b/pkg/mcs/scheduling/server/cluster.go index 0dcb26a1a1f..5885a9cdb84 100644 --- a/pkg/mcs/scheduling/server/cluster.go +++ b/pkg/mcs/scheduling/server/cluster.go @@ -379,13 +379,12 @@ func (c *Cluster) waitSchedulersInitialized() { } } -// TODO: implement the following methods - // UpdateRegionsLabelLevelStats updates the status of the region label level by types. func (c *Cluster) UpdateRegionsLabelLevelStats(regions []*core.RegionInfo) { for _, region := range regions { c.labelStats.Observe(region, c.getStoresWithoutLabelLocked(region, core.EngineKey, core.EngineTiFlash), c.persistConfig.GetLocationLabels()) } + c.labelStats.ClearDefunctRegions() } func (c *Cluster) getStoresWithoutLabelLocked(region *core.RegionInfo, key, value string) []*core.StoreInfo { diff --git a/pkg/statistics/region_collection.go b/pkg/statistics/region_collection.go index 30197dd43ea..7e51a8a7bdd 100644 --- a/pkg/statistics/region_collection.go +++ b/pkg/statistics/region_collection.go @@ -365,6 +365,7 @@ type LabelStatistics struct { syncutil.RWMutex regionLabelStats map[uint64]string labelCounter map[string]int + defunctRegions map[uint64]struct{} } // NewLabelStatistics creates a new LabelStatistics. @@ -372,6 +373,7 @@ func NewLabelStatistics() *LabelStatistics { return &LabelStatistics{ regionLabelStats: make(map[uint64]string), labelCounter: make(map[string]int), + defunctRegions: make(map[uint64]struct{}), } } @@ -405,14 +407,26 @@ func ResetLabelStatsMetrics() { regionLabelLevelGauge.Reset() } -// ClearDefunctRegion is used to handle the overlap region. -func (l *LabelStatistics) ClearDefunctRegion(regionID uint64) { +// MarkDefunctRegion is used to handle the overlap region. +// It is used to mark the region as defunct and remove it from the label statistics later. +func (l *LabelStatistics) MarkDefunctRegion(regionID uint64) { l.Lock() defer l.Unlock() - if label, ok := l.regionLabelStats[regionID]; ok { - l.labelCounter[label]-- - delete(l.regionLabelStats, regionID) + l.defunctRegions[regionID] = struct{}{} +} + +// ClearDefunctRegions is used to handle the overlap region. +// It is used to remove the defunct regions from the label statistics. +func (l *LabelStatistics) ClearDefunctRegions() { + l.Lock() + defer l.Unlock() + for regionID := range l.defunctRegions { + if label, ok := l.regionLabelStats[regionID]; ok { + l.labelCounter[label]-- + delete(l.regionLabelStats, regionID) + } } + l.defunctRegions = make(map[uint64]struct{}) } // GetLabelCounter is only used for tests. diff --git a/server/cluster/cluster_test.go b/server/cluster/cluster_test.go index fa1b14d107e..a37f9718bdb 100644 --- a/server/cluster/cluster_test.go +++ b/server/cluster/cluster_test.go @@ -1125,6 +1125,7 @@ func TestRegionLabelIsolationLevel(t *testing.T) { opt.SetReplicationConfig(cfg) re.NoError(err) cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) + cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) for i := uint64(1); i <= 4; i++ { var labels []*metapb.StoreLabel @@ -1159,13 +1160,42 @@ func TestRegionLabelIsolationLevel(t *testing.T) { StartKey: []byte{byte(1)}, EndKey: []byte{byte(2)}, } - r := core.NewRegionInfo(region, peers[0]) - re.NoError(cluster.putRegion(r)) + r1 := core.NewRegionInfo(region, peers[0]) + re.NoError(cluster.putRegion(r1)) - cluster.UpdateRegionsLabelLevelStats([]*core.RegionInfo{r}) + cluster.UpdateRegionsLabelLevelStats([]*core.RegionInfo{r1}) counter := cluster.labelStats.GetLabelCounter() re.Equal(0, counter["none"]) re.Equal(1, counter["zone"]) + + region = &metapb.Region{ + Id: 10, + Peers: peers, + StartKey: []byte{byte(2)}, + EndKey: []byte{byte(3)}, + } + r2 := core.NewRegionInfo(region, peers[0]) + re.NoError(cluster.putRegion(r2)) + + cluster.UpdateRegionsLabelLevelStats([]*core.RegionInfo{r2}) + counter = cluster.labelStats.GetLabelCounter() + re.Equal(0, counter["none"]) + re.Equal(2, counter["zone"]) + + // issue: https://github.com/tikv/pd/issues/8700 + // step1: heartbeat a overlap region, which is used to simulate the case that the region is merged. + // step2: update region 9 and region 10, which is used to simulate the case that patrol is triggered. + // We should only count region 9. + overlapRegion := r1.Clone( + core.WithStartKey(r1.GetStartKey()), + core.WithEndKey(r2.GetEndKey()), + core.WithLeader(r2.GetPeer(8)), + ) + re.NoError(cluster.HandleRegionHeartbeat(overlapRegion)) + cluster.UpdateRegionsLabelLevelStats([]*core.RegionInfo{r1, r2}) + counter = cluster.labelStats.GetLabelCounter() + re.Equal(0, counter["none"]) + re.Equal(1, counter["zone"]) } func heartbeatRegions(re *require.Assertions, cluster *RaftCluster, regions []*core.RegionInfo) { diff --git a/server/cluster/scheduling_controller.go b/server/cluster/scheduling_controller.go index 5d617700804..8578b3480d8 100644 --- a/server/cluster/scheduling_controller.go +++ b/server/cluster/scheduling_controller.go @@ -236,6 +236,7 @@ func (sc *schedulingController) UpdateRegionsLabelLevelStats(regions []*core.Reg for _, region := range regions { sc.labelStats.Observe(region, sc.getStoresWithoutLabelLocked(region, core.EngineKey, core.EngineTiFlash), sc.opt.GetLocationLabels()) } + sc.labelStats.ClearDefunctRegions() } func (sc *schedulingController) getStoresWithoutLabelLocked(region *core.RegionInfo, key, value string) []*core.StoreInfo {