diff --git a/pkg/statistics/handle/autoanalyze/priorityqueue/BUILD.bazel b/pkg/statistics/handle/autoanalyze/priorityqueue/BUILD.bazel index 927b1f0b50522..f291eda41e196 100644 --- a/pkg/statistics/handle/autoanalyze/priorityqueue/BUILD.bazel +++ b/pkg/statistics/handle/autoanalyze/priorityqueue/BUILD.bazel @@ -3,6 +3,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "priorityqueue", srcs = [ + "analysis_job_factory.go", "calculator.go", "dynamic_partitioned_table_analysis_job.go", "interval.go", @@ -14,13 +15,17 @@ go_library( importpath = "github.com/pingcap/tidb/pkg/statistics/handle/autoanalyze/priorityqueue", visibility = ["//visibility:public"], deps = [ + "//pkg/meta/model", "//pkg/sessionctx", "//pkg/sessionctx/sysproctrack", "//pkg/sessionctx/variable", + "//pkg/statistics", "//pkg/statistics/handle/autoanalyze/exec", "//pkg/statistics/handle/logutil", "//pkg/statistics/handle/types", "//pkg/statistics/handle/util", + "//pkg/util/timeutil", + "@com_github_tikv_client_go_v2//oracle", "@org_uber_go_zap//:zap", ], ) @@ -29,6 +34,7 @@ go_test( name = "priorityqueue_test", timeout = "short", srcs = [ + "analysis_job_factory_test.go", "calculator_test.go", "dynamic_partitioned_table_analysis_job_test.go", "interval_test.go", @@ -39,15 +45,18 @@ go_test( "static_partitioned_table_analysis_job_test.go", ], flaky = True, - shard_count = 22, + shard_count = 28, deps = [ ":priorityqueue", + "//pkg/meta/model", "//pkg/parser/model", "//pkg/session", "//pkg/sessionctx", + "//pkg/statistics", "//pkg/testkit", "//pkg/testkit/testsetup", "@com_github_stretchr_testify//require", + "@com_github_tikv_client_go_v2//oracle", "@org_uber_go_goleak//:goleak", ], ) diff --git a/pkg/statistics/handle/autoanalyze/priorityqueue/analysis_job_factory.go b/pkg/statistics/handle/autoanalyze/priorityqueue/analysis_job_factory.go new file mode 100644 index 0000000000000..8e0dbaaf21837 --- /dev/null +++ b/pkg/statistics/handle/autoanalyze/priorityqueue/analysis_job_factory.go @@ -0,0 +1,382 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package priorityqueue + +import ( + "time" + + "github.com/pingcap/tidb/pkg/meta/model" + "github.com/pingcap/tidb/pkg/sessionctx" + "github.com/pingcap/tidb/pkg/statistics" + statstypes "github.com/pingcap/tidb/pkg/statistics/handle/types" + "github.com/pingcap/tidb/pkg/util/intest" + "github.com/pingcap/tidb/pkg/util/timeutil" + "github.com/tikv/client-go/v2/oracle" +) + +const ( + // unanalyzedTableDefaultChangePercentage is the default change percentage of unanalyzed table. + unanalyzedTableDefaultChangePercentage = 1 + // unanalyzedTableDefaultLastUpdateDuration is the default last update duration of unanalyzed table. + unanalyzedTableDefaultLastUpdateDuration = -30 * time.Minute +) + +// AnalysisJobFactory is responsible for creating different types of analysis jobs. +// NOTE: This struct is not thread-safe. +type AnalysisJobFactory struct { + sctx sessionctx.Context + autoAnalyzeRatio float64 + // The current TSO. + currentTs uint64 +} + +// NewAnalysisJobFactory creates a new AnalysisJobFactory. +func NewAnalysisJobFactory(sctx sessionctx.Context, autoAnalyzeRatio float64, currentTs uint64) *AnalysisJobFactory { + return &AnalysisJobFactory{ + sctx: sctx, + autoAnalyzeRatio: autoAnalyzeRatio, + currentTs: currentTs, + } +} + +// CreateNonPartitionedTableAnalysisJob creates a job for non-partitioned tables. +func (f *AnalysisJobFactory) CreateNonPartitionedTableAnalysisJob( + tableSchema string, + tblInfo *model.TableInfo, + tblStats *statistics.Table, +) AnalysisJob { + if !tblStats.IsEligibleForAnalysis() { + return nil + } + + tableStatsVer := f.sctx.GetSessionVars().AnalyzeVersion + statistics.CheckAnalyzeVerOnTable(tblStats, &tableStatsVer) + + changePercentage := f.CalculateChangePercentage(tblStats) + tableSize := f.CalculateTableSize(tblStats) + lastAnalysisDuration := f.GetTableLastAnalyzeDuration(tblStats) + indexes := f.CheckIndexesNeedAnalyze(tblInfo, tblStats) + + // No need to analyze. + // We perform a separate check because users may set the auto analyze ratio to 0, + // yet still wish to analyze newly added indexes and tables that have not been analyzed. + if changePercentage == 0 && len(indexes) == 0 { + return nil + } + + return NewNonPartitionedTableAnalysisJob( + tableSchema, + tblInfo.Name.O, + tblInfo.ID, + indexes, + tableStatsVer, + changePercentage, + tableSize, + lastAnalysisDuration, + ) +} + +// CreateStaticPartitionAnalysisJob creates a job for static partitions. +func (f *AnalysisJobFactory) CreateStaticPartitionAnalysisJob( + tableSchema string, + globalTblInfo *model.TableInfo, + partitionID int64, + partitionName string, + partitionStats *statistics.Table, +) AnalysisJob { + if !partitionStats.IsEligibleForAnalysis() { + return nil + } + + tableStatsVer := f.sctx.GetSessionVars().AnalyzeVersion + statistics.CheckAnalyzeVerOnTable(partitionStats, &tableStatsVer) + + changePercentage := f.CalculateChangePercentage(partitionStats) + tableSize := f.CalculateTableSize(partitionStats) + lastAnalysisDuration := f.GetTableLastAnalyzeDuration(partitionStats) + indexes := f.CheckIndexesNeedAnalyze(globalTblInfo, partitionStats) + + // No need to analyze. + // We perform a separate check because users may set the auto analyze ratio to 0, + // yet still wish to analyze newly added indexes and tables that have not been analyzed. + if changePercentage == 0 && len(indexes) == 0 { + return nil + } + + return NewStaticPartitionTableAnalysisJob( + tableSchema, + globalTblInfo.Name.O, + globalTblInfo.ID, + partitionName, + partitionID, + indexes, + tableStatsVer, + changePercentage, + tableSize, + lastAnalysisDuration, + ) +} + +// CreateDynamicPartitionedTableAnalysisJob creates a job for dynamic partitioned tables. +func (f *AnalysisJobFactory) CreateDynamicPartitionedTableAnalysisJob( + tableSchema string, + globalTblInfo *model.TableInfo, + globalTblStats *statistics.Table, + partitionStats map[PartitionIDAndName]*statistics.Table, +) AnalysisJob { + if !globalTblStats.IsEligibleForAnalysis() { + return nil + } + + // TODO: figure out how to check the table stats version correctly for partitioned tables. + tableStatsVer := f.sctx.GetSessionVars().AnalyzeVersion + statistics.CheckAnalyzeVerOnTable(globalTblStats, &tableStatsVer) + + avgChange, avgSize, minLastAnalyzeDuration, partitionNames := f.CalculateIndicatorsForPartitions(globalTblStats, partitionStats) + partitionIndexes := f.CheckNewlyAddedIndexesNeedAnalyzeForPartitionedTable(globalTblInfo, partitionStats) + + // No need to analyze. + // We perform a separate check because users may set the auto analyze ratio to 0, + // yet still wish to analyze newly added indexes and tables that have not been analyzed. + if len(partitionNames) == 0 && len(partitionIndexes) == 0 { + return nil + } + + return NewDynamicPartitionedTableAnalysisJob( + tableSchema, + globalTblInfo.Name.O, + globalTblInfo.ID, + partitionNames, + partitionIndexes, + tableStatsVer, + avgChange, + avgSize, + minLastAnalyzeDuration, + ) +} + +// CalculateChangePercentage calculates the change percentage of the table +// based on the change count and the analysis count. +func (f *AnalysisJobFactory) CalculateChangePercentage(tblStats *statistics.Table) float64 { + if !tblStats.IsAnalyzed() { + return unanalyzedTableDefaultChangePercentage + } + + // Auto analyze based on the change percentage is disabled. + // However, this check should not affect the analysis of indexes, + // as index analysis is still needed for query performance. + if f.autoAnalyzeRatio == 0 { + return 0 + } + + tblCnt := float64(tblStats.RealtimeCount) + if histCnt := tblStats.GetAnalyzeRowCount(); histCnt > 0 { + tblCnt = histCnt + } + res := float64(tblStats.ModifyCount) / tblCnt + if res > f.autoAnalyzeRatio { + return res + } + + return 0 +} + +// CalculateTableSize calculates the size of the table. +func (*AnalysisJobFactory) CalculateTableSize(tblStats *statistics.Table) float64 { + tblCnt := float64(tblStats.RealtimeCount) + colCnt := float64(tblStats.ColAndIdxExistenceMap.ColNum()) + intest.Assert(colCnt != 0, "Column count should not be 0") + + return tblCnt * colCnt +} + +// GetTableLastAnalyzeDuration gets the last analyze duration of the table. +func (f *AnalysisJobFactory) GetTableLastAnalyzeDuration(tblStats *statistics.Table) time.Duration { + lastTime := f.FindLastAnalyzeTime(tblStats) + currentTime := oracle.GetTimeFromTS(f.currentTs) + + // Calculate the duration since last analyze. + return currentTime.Sub(lastTime) +} + +// FindLastAnalyzeTime finds the last analyze time of the table. +// It uses `LastUpdateVersion` to find the last analyze time. +// The `LastUpdateVersion` is the version of the transaction that updates the statistics. +// It always not null(default 0), so we can use it to find the last analyze time. +func (f *AnalysisJobFactory) FindLastAnalyzeTime(tblStats *statistics.Table) time.Time { + if !tblStats.IsAnalyzed() { + phy := oracle.GetTimeFromTS(f.currentTs) + return phy.Add(unanalyzedTableDefaultLastUpdateDuration) + } + return oracle.GetTimeFromTS(tblStats.LastAnalyzeVersion) +} + +// CheckIndexesNeedAnalyze checks if the indexes need to be analyzed. +func (*AnalysisJobFactory) CheckIndexesNeedAnalyze(tblInfo *model.TableInfo, tblStats *statistics.Table) []string { + // If table is not analyzed, we need to analyze whole table. + // So we don't need to check indexes. + if !tblStats.IsAnalyzed() { + return nil + } + + indexes := make([]string, 0, len(tblInfo.Indices)) + // Check if missing index stats. + for _, idx := range tblInfo.Indices { + if idxStats := tblStats.GetIdx(idx.ID); idxStats == nil && !tblStats.ColAndIdxExistenceMap.HasAnalyzed(idx.ID, true) && idx.State == model.StatePublic { + indexes = append(indexes, idx.Name.O) + } + } + + return indexes +} + +// CalculateIndicatorsForPartitions calculates the average change percentage, +// average size and average last analyze duration for the partitions that meet the threshold. +// Change percentage is the ratio of the number of modified rows to the total number of rows. +// Size is the product of the number of rows and the number of columns. +// Last analyze duration is the duration since the last analyze. +func (f *AnalysisJobFactory) CalculateIndicatorsForPartitions( + globalStats *statistics.Table, + partitionStats map[PartitionIDAndName]*statistics.Table, +) ( + avgChange float64, + avgSize float64, + avgLastAnalyzeDuration time.Duration, + partitionNames []string, +) { + totalChangePercent := 0.0 + totalSize := 0.0 + count := 0.0 + partitionNames = make([]string, 0, len(partitionStats)) + cols := float64(globalStats.ColAndIdxExistenceMap.ColNum()) + intest.Assert(cols != 0, "Column count should not be 0") + totalLastAnalyzeDuration := time.Duration(0) + + for pIDAndName, tblStats := range partitionStats { + // Skip partition analysis if it doesn't meet the threshold, stats are not yet loaded, + // or the auto analyze ratio is set to 0 by the user. + changePercent := f.CalculateChangePercentage(tblStats) + if changePercent == 0 { + continue + } + + totalChangePercent += changePercent + // size = count * cols + totalSize += float64(tblStats.RealtimeCount) * cols + lastAnalyzeDuration := f.GetTableLastAnalyzeDuration(tblStats) + totalLastAnalyzeDuration += lastAnalyzeDuration + partitionNames = append(partitionNames, pIDAndName.Name) + count++ + } + if len(partitionNames) == 0 { + return 0, 0, 0, partitionNames + } + + avgChange = totalChangePercent / count + avgSize = totalSize / count + avgLastAnalyzeDuration = totalLastAnalyzeDuration / time.Duration(count) + + return avgChange, avgSize, avgLastAnalyzeDuration, partitionNames +} + +// CheckNewlyAddedIndexesNeedAnalyzeForPartitionedTable checks if the indexes of the partitioned table need to be analyzed. +// It returns a map from index name to the names of the partitions that need to be analyzed. +// NOTE: This is only for newly added indexes. +func (*AnalysisJobFactory) CheckNewlyAddedIndexesNeedAnalyzeForPartitionedTable( + tblInfo *model.TableInfo, + partitionStats map[PartitionIDAndName]*statistics.Table, +) map[string][]string { + partitionIndexes := make(map[string][]string, len(tblInfo.Indices)) + + for _, idx := range tblInfo.Indices { + // No need to analyze the index if it's not public. + if idx.State != model.StatePublic { + continue + } + + // Find all the partitions that need to analyze this index. + names := make([]string, 0, len(partitionStats)) + for pIDAndName, tblStats := range partitionStats { + if idxStats := tblStats.GetIdx(idx.ID); idxStats == nil && !tblStats.ColAndIdxExistenceMap.HasAnalyzed(idx.ID, true) { + names = append(names, pIDAndName.Name) + } + } + + if len(names) > 0 { + partitionIndexes[idx.Name.O] = names + } + } + + return partitionIndexes +} + +// PartitionIDAndName is a struct that contains the ID and name of a partition. +// Exported for testing purposes. Do not use it in other packages. +type PartitionIDAndName struct { + Name string + ID int64 +} + +// NewPartitionIDAndName creates a new PartitionIDAndName. +func NewPartitionIDAndName(name string, id int64) PartitionIDAndName { + return PartitionIDAndName{ + Name: name, + ID: id, + } +} + +// GetPartitionStats gets the partition stats. +func GetPartitionStats( + statsHandle statstypes.StatsHandle, + tblInfo *model.TableInfo, + defs []model.PartitionDefinition, +) map[PartitionIDAndName]*statistics.Table { + partitionStats := make(map[PartitionIDAndName]*statistics.Table, len(defs)) + + for _, def := range defs { + stats := statsHandle.GetPartitionStatsForAutoAnalyze(tblInfo, def.ID) + // Ignore the partition if it's not ready to analyze. + if !stats.IsEligibleForAnalysis() { + continue + } + d := NewPartitionIDAndName(def.Name.O, def.ID) + partitionStats[d] = stats + } + + return partitionStats +} + +// AutoAnalysisTimeWindow is a struct that contains the start and end time of the auto analyze time window. +type AutoAnalysisTimeWindow struct { + start time.Time + end time.Time +} + +// NewAutoAnalysisTimeWindow creates a new AutoAnalysisTimeWindow. +func NewAutoAnalysisTimeWindow(start, end time.Time) AutoAnalysisTimeWindow { + return AutoAnalysisTimeWindow{ + start: start, + end: end, + } +} + +// IsWithinTimeWindow checks if the current time is within the time window. +// If the auto analyze time window is not set or the current time is not in the window, return false. +func (a AutoAnalysisTimeWindow) IsWithinTimeWindow(currentTime time.Time) bool { + if a.start.IsZero() || a.end.IsZero() { + return false + } + return timeutil.WithinDayTimePeriod(a.start, a.end, currentTime) +} diff --git a/pkg/statistics/handle/autoanalyze/priorityqueue/analysis_job_factory_test.go b/pkg/statistics/handle/autoanalyze/priorityqueue/analysis_job_factory_test.go new file mode 100644 index 0000000000000..cce1fe5f3e63a --- /dev/null +++ b/pkg/statistics/handle/autoanalyze/priorityqueue/analysis_job_factory_test.go @@ -0,0 +1,479 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package priorityqueue_test + +import ( + "sort" + "testing" + "time" + + "github.com/pingcap/tidb/pkg/meta/model" + pmodel "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/statistics" + "github.com/pingcap/tidb/pkg/statistics/handle/autoanalyze/priorityqueue" + "github.com/stretchr/testify/require" + "github.com/tikv/client-go/v2/oracle" +) + +func TestCalculateChangePercentage(t *testing.T) { + tests := []struct { + name string + tblStats *statistics.Table + autoAnalyzeRatio float64 + want float64 + }{ + { + name: "Unanalyzed table", + tblStats: &statistics.Table{ + HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, statistics.AutoAnalyzeMinCnt+1, 0, nil, nil), + ColAndIdxExistenceMap: statistics.NewColAndIndexExistenceMap(0, 0), + }, + autoAnalyzeRatio: 0.5, + want: 1, + }, + { + name: "Analyzed table with change percentage above threshold", + tblStats: &statistics.Table{ + HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, 100, 60, nil, nil), + ColAndIdxExistenceMap: statistics.NewColAndIndexExistenceMap(1, 1), + LastAnalyzeVersion: 1, + }, + autoAnalyzeRatio: 0.5, + want: 0.6, + }, + { + name: "Analyzed table with change percentage below threshold", + tblStats: &statistics.Table{ + HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, 100, 40, nil, nil), + ColAndIdxExistenceMap: statistics.NewColAndIndexExistenceMap(1, 1), + LastAnalyzeVersion: 1, + }, + autoAnalyzeRatio: 0.5, + want: 0, + }, + { + name: "Auto analyze ratio set to 0", + tblStats: &statistics.Table{ + HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, 100, 60, nil, nil), + ColAndIdxExistenceMap: statistics.NewColAndIndexExistenceMap(1, 1), + LastAnalyzeVersion: 1, + }, + autoAnalyzeRatio: 0, + want: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + factory := priorityqueue.NewAnalysisJobFactory(nil, tt.autoAnalyzeRatio, 0) + got := factory.CalculateChangePercentage(tt.tblStats) + require.InDelta(t, tt.want, got, 0.001) + }) + } +} + +func TestGetTableLastAnalyzeDuration(t *testing.T) { + tests := []struct { + name string + tblStats *statistics.Table + currentTs uint64 + wantDuration time.Duration + }{ + { + name: "Analyzed table", + tblStats: &statistics.Table{ + LastAnalyzeVersion: oracle.GoTimeToTS(time.Now().Add(-24 * time.Hour)), + }, + currentTs: oracle.GoTimeToTS(time.Now()), + wantDuration: 24 * time.Hour, + }, + { + name: "Unanalyzed table", + tblStats: &statistics.Table{ + HistColl: statistics.HistColl{}, + }, + currentTs: oracle.GoTimeToTS(time.Now()), + wantDuration: 30 * time.Minute, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + factory := priorityqueue.NewAnalysisJobFactory(nil, 0, tt.currentTs) + got := factory.GetTableLastAnalyzeDuration(tt.tblStats) + require.InDelta(t, tt.wantDuration, got, float64(time.Second)) + }) + } +} + +func TestCheckIndexesNeedAnalyze(t *testing.T) { + analyzedMap := statistics.NewColAndIndexExistenceMap(1, 0) + analyzedMap.InsertCol(1, nil, true) + analyzedMap.InsertIndex(1, nil, false) + tests := []struct { + name string + tblInfo *model.TableInfo + tblStats *statistics.Table + want []string + }{ + { + name: "Test Table not analyzed", + tblInfo: &model.TableInfo{ + Indices: []*model.IndexInfo{ + { + ID: 1, + Name: pmodel.NewCIStr("index1"), + State: model.StatePublic, + }, + }, + }, + tblStats: &statistics.Table{ColAndIdxExistenceMap: statistics.NewColAndIndexExistenceMap(0, 0)}, + want: nil, + }, + { + name: "Test Index not analyzed", + tblInfo: &model.TableInfo{ + Indices: []*model.IndexInfo{ + { + ID: 1, + Name: pmodel.NewCIStr("index1"), + State: model.StatePublic, + }, + }, + }, + tblStats: &statistics.Table{ + HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, 0, 0, map[int64]*statistics.Column{ + 1: { + StatsVer: 2, + }, + }, nil), + ColAndIdxExistenceMap: analyzedMap, + LastAnalyzeVersion: 1, + }, + want: []string{"index1"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + factory := priorityqueue.NewAnalysisJobFactory(nil, 0, 0) + got := factory.CheckIndexesNeedAnalyze(tt.tblInfo, tt.tblStats) + require.Equal(t, tt.want, got) + }) + } +} + +func TestCalculateIndicatorsForPartitions(t *testing.T) { + // 2024-01-01 10:00:00 + currentTime := time.Date(2024, 1, 1, 10, 0, 0, 0, time.UTC) + currentTs := oracle.GoTimeToTS(currentTime) + // 2023-12-31 10:00:00 + lastUpdateTime := time.Date(2023, 12, 31, 10, 0, 0, 0, time.UTC) + lastUpdateTs := oracle.GoTimeToTS(lastUpdateTime) + unanalyzedMap := statistics.NewColAndIndexExistenceMap(0, 0) + analyzedMap := statistics.NewColAndIndexExistenceMap(2, 1) + analyzedMap.InsertCol(1, nil, true) + analyzedMap.InsertCol(2, nil, true) + analyzedMap.InsertIndex(1, nil, true) + tests := []struct { + name string + globalStats *statistics.Table + partitionStats map[priorityqueue.PartitionIDAndName]*statistics.Table + defs []model.PartitionDefinition + autoAnalyzeRatio float64 + currentTs uint64 + wantAvgChangePercentage float64 + wantAvgSize float64 + wantAvgLastAnalyzeDuration time.Duration + wantPartitions []string + }{ + { + name: "Test Table not analyzed", + globalStats: &statistics.Table{ + ColAndIdxExistenceMap: analyzedMap, + }, + partitionStats: map[priorityqueue.PartitionIDAndName]*statistics.Table{ + priorityqueue.NewPartitionIDAndName("p0", 1): { + HistColl: statistics.HistColl{ + Pseudo: false, + RealtimeCount: statistics.AutoAnalyzeMinCnt + 1, + }, + ColAndIdxExistenceMap: unanalyzedMap, + }, + priorityqueue.NewPartitionIDAndName("p1", 2): { + HistColl: statistics.HistColl{ + Pseudo: false, + RealtimeCount: statistics.AutoAnalyzeMinCnt + 1, + }, + ColAndIdxExistenceMap: unanalyzedMap, + }, + }, + defs: []model.PartitionDefinition{ + { + ID: 1, + Name: pmodel.NewCIStr("p0"), + }, + { + ID: 2, + Name: pmodel.NewCIStr("p1"), + }, + }, + autoAnalyzeRatio: 0.5, + currentTs: currentTs, + wantAvgChangePercentage: 1, + wantAvgSize: 2002, + wantAvgLastAnalyzeDuration: 1800 * time.Second, + wantPartitions: []string{"p0", "p1"}, + }, + { + name: "Test Table analyzed and only one partition meets the threshold", + globalStats: &statistics.Table{ + ColAndIdxExistenceMap: analyzedMap, + }, + partitionStats: map[priorityqueue.PartitionIDAndName]*statistics.Table{ + priorityqueue.NewPartitionIDAndName("p0", 1): { + HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, statistics.AutoAnalyzeMinCnt+1, (statistics.AutoAnalyzeMinCnt+1)*2, map[int64]*statistics.Column{ + 1: { + StatsVer: 2, + Histogram: statistics.Histogram{ + LastUpdateVersion: lastUpdateTs, + }, + }, + 2: { + StatsVer: 2, + Histogram: statistics.Histogram{ + LastUpdateVersion: lastUpdateTs, + }, + }, + }, nil), + Version: currentTs, + ColAndIdxExistenceMap: analyzedMap, + LastAnalyzeVersion: lastUpdateTs, + }, + priorityqueue.NewPartitionIDAndName("p1", 2): { + HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, statistics.AutoAnalyzeMinCnt+1, 0, map[int64]*statistics.Column{ + 1: { + StatsVer: 2, + Histogram: statistics.Histogram{ + LastUpdateVersion: lastUpdateTs, + }, + }, + 2: { + StatsVer: 2, + Histogram: statistics.Histogram{ + LastUpdateVersion: lastUpdateTs, + }, + }, + }, nil), + Version: currentTs, + ColAndIdxExistenceMap: analyzedMap, + LastAnalyzeVersion: lastUpdateTs, + }, + }, + defs: []model.PartitionDefinition{ + { + ID: 1, + Name: pmodel.NewCIStr("p0"), + }, + { + ID: 2, + Name: pmodel.NewCIStr("p1"), + }, + }, + autoAnalyzeRatio: 0.5, + currentTs: currentTs, + wantAvgChangePercentage: 2, + wantAvgSize: 2002, + wantAvgLastAnalyzeDuration: 24 * time.Hour, + wantPartitions: []string{"p0"}, + }, + { + name: "No partition meets the threshold", + globalStats: &statistics.Table{ + ColAndIdxExistenceMap: analyzedMap, + }, + partitionStats: map[priorityqueue.PartitionIDAndName]*statistics.Table{ + priorityqueue.NewPartitionIDAndName("p0", 1): { + HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, statistics.AutoAnalyzeMinCnt+1, 0, map[int64]*statistics.Column{ + 1: { + StatsVer: 2, + Histogram: statistics.Histogram{ + LastUpdateVersion: lastUpdateTs, + }, + }, + 2: { + StatsVer: 2, + Histogram: statistics.Histogram{ + LastUpdateVersion: lastUpdateTs, + }, + }, + }, nil), + Version: currentTs, + ColAndIdxExistenceMap: analyzedMap, + LastAnalyzeVersion: lastUpdateTs, + }, + priorityqueue.NewPartitionIDAndName("p1", 2): { + HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, statistics.AutoAnalyzeMinCnt+1, 0, map[int64]*statistics.Column{ + 1: { + StatsVer: 2, + Histogram: statistics.Histogram{ + LastUpdateVersion: lastUpdateTs, + }, + }, + 2: { + StatsVer: 2, + Histogram: statistics.Histogram{ + LastUpdateVersion: lastUpdateTs, + }, + }, + }, nil), + Version: currentTs, + ColAndIdxExistenceMap: analyzedMap, + LastAnalyzeVersion: lastUpdateTs, + }, + }, + defs: []model.PartitionDefinition{ + { + ID: 1, + Name: pmodel.NewCIStr("p0"), + }, + { + ID: 2, + Name: pmodel.NewCIStr("p1"), + }, + }, + autoAnalyzeRatio: 0.5, + currentTs: currentTs, + wantAvgChangePercentage: 0, + wantAvgSize: 0, + wantAvgLastAnalyzeDuration: 0, + wantPartitions: []string{}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + factory := priorityqueue.NewAnalysisJobFactory(nil, tt.autoAnalyzeRatio, tt.currentTs) + gotAvgChangePercentage, + gotAvgSize, + gotAvgLastAnalyzeDuration, + gotPartitions := + factory.CalculateIndicatorsForPartitions( + tt.globalStats, + tt.partitionStats, + ) + require.Equal(t, tt.wantAvgChangePercentage, gotAvgChangePercentage) + require.Equal(t, tt.wantAvgSize, gotAvgSize) + require.Equal(t, tt.wantAvgLastAnalyzeDuration, gotAvgLastAnalyzeDuration) + // Sort the partitions. + sort.Strings(tt.wantPartitions) + sort.Strings(gotPartitions) + require.Equal(t, tt.wantPartitions, gotPartitions) + }) + } +} + +func TestCheckNewlyAddedIndexesNeedAnalyzeForPartitionedTable(t *testing.T) { + tblInfo := model.TableInfo{ + Indices: []*model.IndexInfo{ + { + ID: 1, + Name: pmodel.NewCIStr("index1"), + State: model.StatePublic, + }, + { + ID: 2, + Name: pmodel.NewCIStr("index2"), + State: model.StatePublic, + }, + }, + Columns: []*model.ColumnInfo{ + { + ID: 1, + }, + { + ID: 2, + }, + }, + } + partitionStats := map[priorityqueue.PartitionIDAndName]*statistics.Table{ + priorityqueue.NewPartitionIDAndName("p0", 1): { + HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, statistics.AutoAnalyzeMinCnt+1, 0, nil, map[int64]*statistics.Index{}), + ColAndIdxExistenceMap: statistics.NewColAndIndexExistenceMap(0, 0), + }, + priorityqueue.NewPartitionIDAndName("p1", 2): { + HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, statistics.AutoAnalyzeMinCnt+1, 0, nil, map[int64]*statistics.Index{ + 2: { + StatsVer: 2, + }, + }), + ColAndIdxExistenceMap: statistics.NewColAndIndexExistenceMap(0, 1), + }, + } + + factory := priorityqueue.NewAnalysisJobFactory(nil, 0, 0) + partitionIndexes := factory.CheckNewlyAddedIndexesNeedAnalyzeForPartitionedTable(&tblInfo, partitionStats) + expected := map[string][]string{"index1": {"p0", "p1"}, "index2": {"p0"}} + require.Equal(t, len(expected), len(partitionIndexes)) + + for k, v := range expected { + sort.Strings(v) + if val, ok := partitionIndexes[k]; ok { + sort.Strings(val) + require.Equal(t, v, val) + } else { + require.Fail(t, "key not found in partitionIndexes: "+k) + } + } +} + +func TestAutoAnalysisTimeWindow(t *testing.T) { + tests := []struct { + name string + start time.Time + end time.Time + current time.Time + wantWithin bool + }{ + { + name: "Within time window", + start: time.Date(2024, 1, 1, 1, 0, 0, 0, time.UTC), + end: time.Date(2024, 1, 1, 5, 0, 0, 0, time.UTC), + current: time.Date(2024, 1, 1, 3, 0, 0, 0, time.UTC), + wantWithin: true, + }, + { + name: "Outside time window", + start: time.Date(2024, 1, 1, 1, 0, 0, 0, time.UTC), + end: time.Date(2024, 1, 1, 5, 0, 0, 0, time.UTC), + current: time.Date(2024, 1, 1, 6, 0, 0, 0, time.UTC), + wantWithin: false, + }, + { + name: "Empty time window", + start: time.Time{}, + end: time.Time{}, + current: time.Now(), + wantWithin: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + window := priorityqueue.NewAutoAnalysisTimeWindow(tt.start, tt.end) + got := window.IsWithinTimeWindow(tt.current) + require.Equal(t, tt.wantWithin, got) + }) + } +} diff --git a/pkg/statistics/handle/autoanalyze/refresher/BUILD.bazel b/pkg/statistics/handle/autoanalyze/refresher/BUILD.bazel index 1274b67db2a4c..97c8f62b369f7 100644 --- a/pkg/statistics/handle/autoanalyze/refresher/BUILD.bazel +++ b/pkg/statistics/handle/autoanalyze/refresher/BUILD.bazel @@ -14,7 +14,6 @@ go_library( "//pkg/sessionctx", "//pkg/sessionctx/sysproctrack", "//pkg/sessionctx/variable", - "//pkg/statistics", "//pkg/statistics/handle/autoanalyze/exec", "//pkg/statistics/handle/autoanalyze/priorityqueue", "//pkg/statistics/handle/lockstats", @@ -23,8 +22,6 @@ go_library( "//pkg/statistics/handle/util", "//pkg/util", "//pkg/util/intest", - "//pkg/util/timeutil", - "@com_github_tikv_client_go_v2//oracle", "@org_uber_go_zap//:zap", ], ) @@ -38,10 +35,9 @@ go_test( "worker_test.go", ], flaky = True, - shard_count = 15, + shard_count = 9, deps = [ ":refresher", - "//pkg/meta/model", "//pkg/parser/model", "//pkg/sessionctx", "//pkg/sessionctx/sysproctrack", @@ -51,7 +47,6 @@ go_test( "//pkg/testkit", "//pkg/testkit/testsetup", "@com_github_stretchr_testify//require", - "@com_github_tikv_client_go_v2//oracle", "@org_uber_go_goleak//:goleak", ], ) diff --git a/pkg/statistics/handle/autoanalyze/refresher/refresher.go b/pkg/statistics/handle/autoanalyze/refresher/refresher.go index 626e609cdc644..5a991cb354924 100644 --- a/pkg/statistics/handle/autoanalyze/refresher/refresher.go +++ b/pkg/statistics/handle/autoanalyze/refresher/refresher.go @@ -23,7 +23,6 @@ import ( "github.com/pingcap/tidb/pkg/sessionctx" "github.com/pingcap/tidb/pkg/sessionctx/sysproctrack" "github.com/pingcap/tidb/pkg/sessionctx/variable" - "github.com/pingcap/tidb/pkg/statistics" "github.com/pingcap/tidb/pkg/statistics/handle/autoanalyze/exec" "github.com/pingcap/tidb/pkg/statistics/handle/autoanalyze/priorityqueue" "github.com/pingcap/tidb/pkg/statistics/handle/lockstats" @@ -32,25 +31,16 @@ import ( statsutil "github.com/pingcap/tidb/pkg/statistics/handle/util" "github.com/pingcap/tidb/pkg/util" "github.com/pingcap/tidb/pkg/util/intest" - "github.com/pingcap/tidb/pkg/util/timeutil" - "github.com/tikv/client-go/v2/oracle" "go.uber.org/zap" ) -const ( - // unanalyzedTableDefaultChangePercentage is the default change percentage of unanalyzed table. - unanalyzedTableDefaultChangePercentage = 1 - // unanalyzedTableDefaultLastUpdateDuration is the default last update duration of unanalyzed table. - unanalyzedTableDefaultLastUpdateDuration = -30 * time.Minute -) - // Refresher provides methods to refresh stats info. // NOTE: Refresher is not thread-safe. type Refresher struct { statsHandle statstypes.StatsHandle sysProcTracker sysproctrack.Tracker // This will be refreshed every time we rebuild the priority queue. - autoAnalysisTimeWindow + autoAnalysisTimeWindow priorityqueue.AutoAnalysisTimeWindow // Jobs is the priority queue of analysis jobs. // Exported for testing purposes. @@ -84,7 +74,7 @@ func (r *Refresher) UpdateConcurrency() { // AnalyzeHighestPriorityTables picks tables with the highest priority and analyzes them. func (r *Refresher) AnalyzeHighestPriorityTables() bool { - if !r.autoAnalysisTimeWindow.isWithinTimeWindow(time.Now()) { + if !r.autoAnalysisTimeWindow.IsWithinTimeWindow(time.Now()) { return false } @@ -182,11 +172,8 @@ func (r *Refresher) RebuildTableAnalysisJobQueue() error { } // We will check it again when we try to execute the job. // So store the time window for later use. - r.autoAnalysisTimeWindow = autoAnalysisTimeWindow{ - start: start, - end: end, - } - if !r.autoAnalysisTimeWindow.isWithinTimeWindow(time.Now()) { + r.autoAnalysisTimeWindow = priorityqueue.NewAutoAnalysisTimeWindow(start, end) + if !r.autoAnalysisTimeWindow.IsWithinTimeWindow(time.Now()) { return nil } calculator := priorityqueue.NewPriorityCalculator() @@ -204,11 +191,13 @@ func (r *Refresher) RebuildTableAnalysisJobQueue() error { return err } + jobFactory := priorityqueue.NewAnalysisJobFactory(sctx, autoAnalyzeRatio, currentTs) + dbs := is.AllSchemaNames() for _, db := range dbs { // Sometimes the tables are too many. Auto-analyze will take too much time on it. // so we need to check the available time. - if !r.autoAnalysisTimeWindow.isWithinTimeWindow(time.Now()) { + if !r.autoAnalysisTimeWindow.IsWithinTimeWindow(time.Now()) { return nil } // Ignore the memory and system database. @@ -231,37 +220,13 @@ func (r *Refresher) RebuildTableAnalysisJobQueue() error { continue } pi := tblInfo.GetPartitionInfo() - pushJobFunc := func(job priorityqueue.AnalysisJob) { - if job == nil { - return - } - // Calculate the weight of the job. - weight := calculator.CalculateWeight(job) - // We apply a penalty to larger tables, which can potentially result in a negative weight. - // To prevent this, we filter out any negative weights. Under normal circumstances, table sizes should not be negative. - if weight <= 0 { - statslogutil.SingletonStatsSamplerLogger().Warn( - "Table gets a negative weight", - zap.Float64("weight", weight), - zap.Stringer("job", job), - ) - } - job.SetWeight(weight) - // Push the job onto the queue. - r.Jobs.Push(job) - } - // No partitions, analyze the whole table. if pi == nil { - job := CreateTableAnalysisJob( - sctx, + job := jobFactory.CreateNonPartitionedTableAnalysisJob( db.O, tblInfo, r.statsHandle.GetTableStatsForAutoAnalyze(tblInfo), - autoAnalyzeRatio, - currentTs, ) - pushJobFunc(job) - // Skip the rest of the loop. + r.pushJob(job, calculator) continue } @@ -272,33 +237,27 @@ func (r *Refresher) RebuildTableAnalysisJobQueue() error { partitionDefs = append(partitionDefs, def) } } - partitionStats := getPartitionStats(r.statsHandle, tblInfo, partitionDefs) + partitionStats := priorityqueue.GetPartitionStats(r.statsHandle, tblInfo, partitionDefs) // If the prune mode is static, we need to analyze every partition as a separate table. if pruneMode == variable.Static { for pIDAndName, stats := range partitionStats { - job := CreateStaticPartitionAnalysisJob( - sctx, + job := jobFactory.CreateStaticPartitionAnalysisJob( db.O, tblInfo, pIDAndName.ID, pIDAndName.Name, stats, - autoAnalyzeRatio, - currentTs, ) - pushJobFunc(job) + r.pushJob(job, calculator) } } else { - job := createTableAnalysisJobForPartitions( - sctx, + job := jobFactory.CreateDynamicPartitionedTableAnalysisJob( db.O, tblInfo, r.statsHandle.GetPartitionStatsForAutoAnalyze(tblInfo, tblInfo.ID), partitionStats, - autoAnalyzeRatio, - currentTs, ) - pushJobFunc(job) + r.pushJob(job, calculator) } } } @@ -313,6 +272,25 @@ func (r *Refresher) RebuildTableAnalysisJobQueue() error { return nil } +func (r *Refresher) pushJob(job priorityqueue.AnalysisJob, calculator *priorityqueue.PriorityCalculator) { + if job == nil { + return + } + // We apply a penalty to larger tables, which can potentially result in a negative weight. + // To prevent this, we filter out any negative weights. Under normal circumstances, table sizes should not be negative. + weight := calculator.CalculateWeight(job) + if weight <= 0 { + statslogutil.SingletonStatsSamplerLogger().Warn( + "Table gets a negative weight", + zap.Float64("weight", weight), + zap.Stringer("job", job), + ) + } + job.SetWeight(weight) + // Push the job onto the queue. + r.Jobs.Push(job) +} + // WaitAutoAnalyzeFinishedForTest waits for the auto analyze job to be finished. // Only used in the test. func (r *Refresher) WaitAutoAnalyzeFinishedForTest() { @@ -330,314 +308,6 @@ func (r *Refresher) Close() { r.worker.Stop() } -// CreateTableAnalysisJob creates a TableAnalysisJob for the physical table. -func CreateTableAnalysisJob( - sctx sessionctx.Context, - tableSchema string, - tblInfo *model.TableInfo, - tblStats *statistics.Table, - autoAnalyzeRatio float64, - currentTs uint64, -) priorityqueue.AnalysisJob { - if !tblStats.IsEligibleForAnalysis() { - return nil - } - - tableStatsVer := sctx.GetSessionVars().AnalyzeVersion - statistics.CheckAnalyzeVerOnTable(tblStats, &tableStatsVer) - - changePercentage := CalculateChangePercentage(tblStats, autoAnalyzeRatio) - tableSize := calculateTableSize(tblStats) - lastAnalysisDuration := GetTableLastAnalyzeDuration(tblStats, currentTs) - indexes := CheckIndexesNeedAnalyze(tblInfo, tblStats) - - // No need to analyze. - // We perform a separate check because users may set the auto analyze ratio to 0, - // yet still wish to analyze newly added indexes and tables that have not been analyzed. - if changePercentage == 0 && len(indexes) == 0 { - return nil - } - - job := priorityqueue.NewNonPartitionedTableAnalysisJob( - tableSchema, - tblInfo.Name.O, - tblInfo.ID, - indexes, - tableStatsVer, - changePercentage, - tableSize, - lastAnalysisDuration, - ) - - return job -} - -// CreateStaticPartitionAnalysisJob creates a TableAnalysisJob for the static partition. -func CreateStaticPartitionAnalysisJob( - sctx sessionctx.Context, - tableSchema string, - globalTblInfo *model.TableInfo, - partitionID int64, - partitionName string, - partitionStats *statistics.Table, - autoAnalyzeRatio float64, - currentTs uint64, -) priorityqueue.AnalysisJob { - if !partitionStats.IsEligibleForAnalysis() { - return nil - } - - tableStatsVer := sctx.GetSessionVars().AnalyzeVersion - statistics.CheckAnalyzeVerOnTable(partitionStats, &tableStatsVer) - - changePercentage := CalculateChangePercentage(partitionStats, autoAnalyzeRatio) - tableSize := calculateTableSize(partitionStats) - lastAnalysisDuration := GetTableLastAnalyzeDuration(partitionStats, currentTs) - indexes := CheckIndexesNeedAnalyze(globalTblInfo, partitionStats) - - // No need to analyze. - // We perform a separate check because users may set the auto analyze ratio to 0, - // yet still wish to analyze newly added indexes and tables that have not been analyzed. - if changePercentage == 0 && len(indexes) == 0 { - return nil - } - - job := priorityqueue.NewStaticPartitionTableAnalysisJob( - tableSchema, - globalTblInfo.Name.O, - globalTblInfo.ID, - partitionName, - partitionID, - indexes, - tableStatsVer, - changePercentage, - tableSize, - lastAnalysisDuration, - ) - - return job -} - -// CalculateChangePercentage calculates the change percentage of the table -// based on the change count and the analysis count. -func CalculateChangePercentage( - tblStats *statistics.Table, - autoAnalyzeRatio float64, -) float64 { - if !tblStats.IsAnalyzed() { - return unanalyzedTableDefaultChangePercentage - } - - // Auto analyze based on the change percentage is disabled. - // However, this check should not affect the analysis of indexes, - // as index analysis is still needed for query performance. - if autoAnalyzeRatio == 0 { - return 0 - } - - tblCnt := float64(tblStats.RealtimeCount) - if histCnt := tblStats.GetAnalyzeRowCount(); histCnt > 0 { - tblCnt = histCnt - } - res := float64(tblStats.ModifyCount) / tblCnt - if res > autoAnalyzeRatio { - return res - } - - return 0 -} - -func calculateTableSize( - tblStats *statistics.Table, -) float64 { - tblCnt := float64(tblStats.RealtimeCount) - colCnt := float64(tblStats.ColAndIdxExistenceMap.ColNum()) - intest.Assert(colCnt != 0, "Column count should not be 0") - - return tblCnt * colCnt -} - -// GetTableLastAnalyzeDuration gets the duration since the last analysis of the table. -func GetTableLastAnalyzeDuration( - tblStats *statistics.Table, - currentTs uint64, -) time.Duration { - lastTime := findLastAnalyzeTime(tblStats, currentTs) - currentTime := oracle.GetTimeFromTS(currentTs) - - // Calculate the duration since last analyze. - return currentTime.Sub(lastTime) -} - -// findLastAnalyzeTime finds the last analyze time of the table. -// It uses `LastUpdateVersion` to find the last analyze time. -// The `LastUpdateVersion` is the version of the transaction that updates the statistics. -// It always not null(default 0), so we can use it to find the last analyze time. -func findLastAnalyzeTime( - tblStats *statistics.Table, - currentTs uint64, -) time.Time { - // Table is not analyzed, compose a fake version. - if !tblStats.IsAnalyzed() { - phy := oracle.GetTimeFromTS(currentTs) - return phy.Add(unanalyzedTableDefaultLastUpdateDuration) - } - return oracle.GetTimeFromTS(tblStats.LastAnalyzeVersion) -} - -// CheckIndexesNeedAnalyze checks if the indexes of the table need to be analyzed. -func CheckIndexesNeedAnalyze( - tblInfo *model.TableInfo, - tblStats *statistics.Table, -) []string { - // If table is not analyzed, we need to analyze whole table. - // So we don't need to check indexes. - if !tblStats.IsAnalyzed() { - return nil - } - - indexes := make([]string, 0, len(tblInfo.Indices)) - // Check if missing index stats. - for _, idx := range tblInfo.Indices { - if idxStats := tblStats.GetIdx(idx.ID); idxStats == nil && !tblStats.ColAndIdxExistenceMap.HasAnalyzed(idx.ID, true) && idx.State == model.StatePublic { - indexes = append(indexes, idx.Name.O) - } - } - - return indexes -} - -func createTableAnalysisJobForPartitions( - sctx sessionctx.Context, - tableSchema string, - tblInfo *model.TableInfo, - tblStats *statistics.Table, - partitionStats map[PartitionIDAndName]*statistics.Table, - autoAnalyzeRatio float64, - currentTs uint64, -) priorityqueue.AnalysisJob { - if !tblStats.IsEligibleForAnalysis() { - return nil - } - - // TODO: figure out how to check the table stats version correctly for partitioned tables. - tableStatsVer := sctx.GetSessionVars().AnalyzeVersion - statistics.CheckAnalyzeVerOnTable(tblStats, &tableStatsVer) - - averageChangePercentage, avgSize, minLastAnalyzeDuration, partitionNames := CalculateIndicatorsForPartitions( - tblStats, - partitionStats, - autoAnalyzeRatio, - currentTs, - ) - partitionIndexes := CheckNewlyAddedIndexesNeedAnalyzeForPartitionedTable( - tblInfo, - partitionStats, - ) - // No need to analyze. - // We perform a separate check because users may set the auto analyze ratio to 0, - // yet still wish to analyze newly added indexes and tables that have not been analyzed. - if len(partitionNames) == 0 && len(partitionIndexes) == 0 { - return nil - } - - job := priorityqueue.NewDynamicPartitionedTableAnalysisJob( - tableSchema, - tblInfo.Name.O, - tblInfo.ID, - partitionNames, - partitionIndexes, - tableStatsVer, - averageChangePercentage, - avgSize, - minLastAnalyzeDuration, - ) - - return job -} - -// CalculateIndicatorsForPartitions calculates the average change percentage, -// average size and average last analyze duration for the partitions that meet the threshold. -// Change percentage is the ratio of the number of modified rows to the total number of rows. -// Size is the product of the number of rows and the number of columns. -// Last analyze duration is the duration since the last analyze. -func CalculateIndicatorsForPartitions( - globalStats *statistics.Table, - partitionStats map[PartitionIDAndName]*statistics.Table, - autoAnalyzeRatio float64, - currentTs uint64, -) ( - avgChange float64, - avgSize float64, - avgLastAnalyzeDuration time.Duration, - partitionNames []string, -) { - totalChangePercent := 0.0 - totalSize := 0.0 - count := 0.0 - partitionNames = make([]string, 0, len(partitionStats)) - cols := float64(globalStats.ColAndIdxExistenceMap.ColNum()) - intest.Assert(cols != 0, "Column count should not be 0") - totalLastAnalyzeDuration := time.Duration(0) - - for pIDAndName, tblStats := range partitionStats { - changePercent := CalculateChangePercentage(tblStats, autoAnalyzeRatio) - // Skip partition analysis if it doesn't meet the threshold, stats are not yet loaded, - // or the auto analyze ratio is set to 0 by the user. - if changePercent == 0 { - continue - } - - totalChangePercent += changePercent - // size = count * cols - totalSize += float64(tblStats.RealtimeCount) * cols - lastAnalyzeDuration := GetTableLastAnalyzeDuration(tblStats, currentTs) - totalLastAnalyzeDuration += lastAnalyzeDuration - partitionNames = append(partitionNames, pIDAndName.Name) - count++ - } - if len(partitionNames) == 0 { - return 0, 0, 0, partitionNames - } - - avgChange = totalChangePercent / count - avgSize = totalSize / count - avgLastAnalyzeDuration = totalLastAnalyzeDuration / time.Duration(count) - - return avgChange, avgSize, avgLastAnalyzeDuration, partitionNames -} - -// CheckNewlyAddedIndexesNeedAnalyzeForPartitionedTable checks if the indexes of the partitioned table need to be analyzed. -// It returns a map from index name to the names of the partitions that need to be analyzed. -// NOTE: This is only for newly added indexes. -func CheckNewlyAddedIndexesNeedAnalyzeForPartitionedTable( - tblInfo *model.TableInfo, - partitionStats map[PartitionIDAndName]*statistics.Table, -) map[string][]string { - partitionIndexes := make(map[string][]string, len(tblInfo.Indices)) - - for _, idx := range tblInfo.Indices { - // No need to analyze the index if it's not public. - if idx.State != model.StatePublic { - continue - } - - // Find all the partitions that need to analyze this index. - names := make([]string, 0, len(partitionStats)) - for pIDAndName, tblStats := range partitionStats { - if idxStats := tblStats.GetIdx(idx.ID); idxStats == nil && !tblStats.ColAndIdxExistenceMap.HasAnalyzed(idx.ID, true) { - names = append(names, pIDAndName.Name) - } - } - - if len(names) > 0 { - partitionIndexes[idx.Name.O] = names - } - } - - return partitionIndexes -} - func getStartTs(sctx sessionctx.Context) (uint64, error) { txn, err := sctx.Txn(true) if err != nil { @@ -645,48 +315,3 @@ func getStartTs(sctx sessionctx.Context) (uint64, error) { } return txn.StartTS(), nil } - -// PartitionIDAndName is a struct that contains the ID and name of a partition. -// Exported for testing purposes. Do not use it in other packages. -type PartitionIDAndName struct { - Name string - ID int64 -} - -func getPartitionStats( - statsHandle statstypes.StatsHandle, - tblInfo *model.TableInfo, - defs []model.PartitionDefinition, -) map[PartitionIDAndName]*statistics.Table { - partitionStats := make(map[PartitionIDAndName]*statistics.Table, len(defs)) - - for _, def := range defs { - stats := statsHandle.GetPartitionStatsForAutoAnalyze(tblInfo, def.ID) - // Ignore the partition if it's not ready to analyze. - if !stats.IsEligibleForAnalysis() { - continue - } - d := PartitionIDAndName{ - ID: def.ID, - Name: def.Name.O, - } - partitionStats[d] = stats - } - - return partitionStats -} - -// autoAnalysisTimeWindow is a struct that contains the start and end time of the auto analyze time window. -type autoAnalysisTimeWindow struct { - start time.Time - end time.Time -} - -// isWithinTimeWindow checks if the current time is within the time window. -// If the auto analyze time window is not set or the current time is not in the window, return false. -func (a autoAnalysisTimeWindow) isWithinTimeWindow(currentTime time.Time) bool { - if a.start == (time.Time{}) || a.end == (time.Time{}) { - return false - } - return timeutil.WithinDayTimePeriod(a.start, a.end, currentTime) -} diff --git a/pkg/statistics/handle/autoanalyze/refresher/refresher_test.go b/pkg/statistics/handle/autoanalyze/refresher/refresher_test.go index d1c64c6489456..0ebbe5f8e8a19 100644 --- a/pkg/statistics/handle/autoanalyze/refresher/refresher_test.go +++ b/pkg/statistics/handle/autoanalyze/refresher/refresher_test.go @@ -17,18 +17,15 @@ package refresher_test import ( "context" "math" - "sort" "testing" "time" - "github.com/pingcap/tidb/pkg/meta/model" pmodel "github.com/pingcap/tidb/pkg/parser/model" "github.com/pingcap/tidb/pkg/statistics" "github.com/pingcap/tidb/pkg/statistics/handle/autoanalyze/priorityqueue" "github.com/pingcap/tidb/pkg/statistics/handle/autoanalyze/refresher" "github.com/pingcap/tidb/pkg/testkit" "github.com/stretchr/testify/require" - "github.com/tikv/client-go/v2/oracle" ) func TestSkipAnalyzeTableWhenAutoAnalyzeRatioIsZero(t *testing.T) { @@ -440,450 +437,3 @@ func TestRebuildTableAnalysisJobQueue(t *testing.T) { require.Equal(t, float64(6*2), indicators.TableSize) require.GreaterOrEqual(t, indicators.LastAnalysisDuration, time.Duration(0)) } - -func TestCalculateChangePercentage(t *testing.T) { - unanalyzedColumns := map[int64]*statistics.Column{ - 1: {}, - 2: {}, - } - unanalyzedIndices := map[int64]*statistics.Index{ - 1: {}, - 2: {}, - } - analyzedColumns := map[int64]*statistics.Column{ - 1: { - StatsVer: 2, - }, - 2: { - StatsVer: 2, - }, - } - analyzedIndices := map[int64]*statistics.Index{ - 1: { - StatsVer: 2, - }, - 2: { - StatsVer: 2, - }, - } - bothUnanalyzedMap := statistics.NewColAndIndexExistenceMap(0, 0) - bothAnalyzedMap := statistics.NewColAndIndexExistenceMap(2, 2) - bothAnalyzedMap.InsertCol(1, nil, true) - bothAnalyzedMap.InsertCol(2, nil, true) - bothAnalyzedMap.InsertIndex(1, nil, true) - bothAnalyzedMap.InsertIndex(2, nil, true) - tests := []struct { - name string - tblStats *statistics.Table - autoAnalyzeRatio float64 - want float64 - }{ - { - name: "Test Table not analyzed", - tblStats: &statistics.Table{ - HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, statistics.AutoAnalyzeMinCnt+1, 0, unanalyzedColumns, unanalyzedIndices), - ColAndIdxExistenceMap: bothUnanalyzedMap, - }, - autoAnalyzeRatio: 0.5, - want: 1, - }, - { - name: "Based on change percentage", - tblStats: &statistics.Table{ - HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, statistics.AutoAnalyzeMinCnt+1, (statistics.AutoAnalyzeMinCnt+1)*2, analyzedColumns, analyzedIndices), - ColAndIdxExistenceMap: bothAnalyzedMap, - LastAnalyzeVersion: 1, - }, - autoAnalyzeRatio: 0.5, - want: 2, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := refresher.CalculateChangePercentage(tt.tblStats, tt.autoAnalyzeRatio) - require.Equal(t, tt.want, got) - }) - } -} - -func TestGetTableLastAnalyzeDuration(t *testing.T) { - // 2023-12-31 10:00:00 - lastUpdateTime := time.Date(2023, 12, 31, 10, 0, 0, 0, time.UTC) - lastUpdateTs := oracle.GoTimeToTS(lastUpdateTime) - tblStats := &statistics.Table{ - HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, 0, 0, map[int64]*statistics.Column{ - 1: { - StatsVer: 2, - Histogram: statistics.Histogram{ - LastUpdateVersion: lastUpdateTs, - }, - }, - }, nil), - LastAnalyzeVersion: lastUpdateTs, - } - // 2024-01-01 10:00:00 - currentTime := time.Date(2024, 1, 1, 10, 0, 0, 0, time.UTC) - currentTs := oracle.GoTimeToTS(currentTime) - want := 24 * time.Hour - - got := refresher.GetTableLastAnalyzeDuration(tblStats, currentTs) - require.Equal(t, want, got) -} - -func TestGetTableLastAnalyzeDurationForUnanalyzedTable(t *testing.T) { - tblStats := &statistics.Table{ - HistColl: statistics.HistColl{}, - } - // 2024-01-01 10:00:00 - currentTime := time.Date(2024, 1, 1, 10, 0, 0, 0, time.UTC) - currentTs := oracle.GoTimeToTS(currentTime) - want := 1800 * time.Second - - got := refresher.GetTableLastAnalyzeDuration(tblStats, currentTs) - require.Equal(t, want, got) -} - -func TestCheckIndexesNeedAnalyze(t *testing.T) { - analyzedMap := statistics.NewColAndIndexExistenceMap(1, 0) - analyzedMap.InsertCol(1, nil, true) - analyzedMap.InsertIndex(1, nil, false) - tests := []struct { - name string - tblInfo *model.TableInfo - tblStats *statistics.Table - want []string - }{ - { - name: "Test Table not analyzed", - tblInfo: &model.TableInfo{ - Indices: []*model.IndexInfo{ - { - ID: 1, - Name: pmodel.NewCIStr("index1"), - State: model.StatePublic, - }, - }, - }, - tblStats: &statistics.Table{ColAndIdxExistenceMap: statistics.NewColAndIndexExistenceMap(0, 0)}, - want: nil, - }, - { - name: "Test Index not analyzed", - tblInfo: &model.TableInfo{ - Indices: []*model.IndexInfo{ - { - ID: 1, - Name: pmodel.NewCIStr("index1"), - State: model.StatePublic, - }, - }, - }, - tblStats: &statistics.Table{ - HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, 0, 0, map[int64]*statistics.Column{ - 1: { - StatsVer: 2, - }, - }, map[int64]*statistics.Index{}), - ColAndIdxExistenceMap: analyzedMap, - LastAnalyzeVersion: 1, - }, - want: []string{"index1"}, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := refresher.CheckIndexesNeedAnalyze(tt.tblInfo, tt.tblStats) - require.Equal(t, tt.want, got) - }) - } -} - -func TestCalculateIndicatorsForPartitions(t *testing.T) { - // 2024-01-01 10:00:00 - currentTime := time.Date(2024, 1, 1, 10, 0, 0, 0, time.UTC) - currentTs := oracle.GoTimeToTS(currentTime) - // 2023-12-31 10:00:00 - lastUpdateTime := time.Date(2023, 12, 31, 10, 0, 0, 0, time.UTC) - lastUpdateTs := oracle.GoTimeToTS(lastUpdateTime) - unanalyzedMap := statistics.NewColAndIndexExistenceMap(0, 0) - analyzedMap := statistics.NewColAndIndexExistenceMap(2, 1) - analyzedMap.InsertCol(1, nil, true) - analyzedMap.InsertCol(2, nil, true) - analyzedMap.InsertIndex(1, nil, true) - tests := []struct { - name string - globalStats *statistics.Table - partitionStats map[refresher.PartitionIDAndName]*statistics.Table - defs []model.PartitionDefinition - autoAnalyzeRatio float64 - currentTs uint64 - wantAvgChangePercentage float64 - wantAvgSize float64 - wantAvgLastAnalyzeDuration time.Duration - wantPartitions []string - }{ - { - name: "Test Table not analyzed", - globalStats: &statistics.Table{ - ColAndIdxExistenceMap: analyzedMap, - }, - partitionStats: map[refresher.PartitionIDAndName]*statistics.Table{ - { - ID: 1, - Name: "p0", - }: { - HistColl: statistics.HistColl{ - Pseudo: false, - RealtimeCount: statistics.AutoAnalyzeMinCnt + 1, - }, - ColAndIdxExistenceMap: unanalyzedMap, - }, - { - ID: 2, - Name: "p1", - }: { - HistColl: statistics.HistColl{ - Pseudo: false, - RealtimeCount: statistics.AutoAnalyzeMinCnt + 1, - }, - ColAndIdxExistenceMap: unanalyzedMap, - }, - }, - defs: []model.PartitionDefinition{ - { - ID: 1, - Name: pmodel.NewCIStr("p0"), - }, - { - ID: 2, - Name: pmodel.NewCIStr("p1"), - }, - }, - autoAnalyzeRatio: 0.5, - currentTs: currentTs, - wantAvgChangePercentage: 1, - wantAvgSize: 2002, - wantAvgLastAnalyzeDuration: 1800 * time.Second, - wantPartitions: []string{"p0", "p1"}, - }, - { - name: "Test Table analyzed and only one partition meets the threshold", - globalStats: &statistics.Table{ - ColAndIdxExistenceMap: analyzedMap, - }, - partitionStats: map[refresher.PartitionIDAndName]*statistics.Table{ - { - ID: 1, - Name: "p0", - }: { - HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, statistics.AutoAnalyzeMinCnt+1, (statistics.AutoAnalyzeMinCnt+1)*2, map[int64]*statistics.Column{ - 1: { - StatsVer: 2, - Histogram: statistics.Histogram{ - LastUpdateVersion: lastUpdateTs, - }, - }, - 2: { - StatsVer: 2, - Histogram: statistics.Histogram{ - LastUpdateVersion: lastUpdateTs, - }, - }, - }, nil), - Version: currentTs, - ColAndIdxExistenceMap: analyzedMap, - LastAnalyzeVersion: lastUpdateTs, - }, - { - ID: 2, - Name: "p1", - }: { - HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, statistics.AutoAnalyzeMinCnt+1, 0, map[int64]*statistics.Column{ - 1: { - StatsVer: 2, - Histogram: statistics.Histogram{ - LastUpdateVersion: lastUpdateTs, - }, - }, - 2: { - StatsVer: 2, - Histogram: statistics.Histogram{ - LastUpdateVersion: lastUpdateTs, - }, - }, - }, nil), - Version: currentTs, - ColAndIdxExistenceMap: analyzedMap, - LastAnalyzeVersion: lastUpdateTs, - }, - }, - defs: []model.PartitionDefinition{ - { - ID: 1, - Name: pmodel.NewCIStr("p0"), - }, - { - ID: 2, - Name: pmodel.NewCIStr("p1"), - }, - }, - autoAnalyzeRatio: 0.5, - currentTs: currentTs, - wantAvgChangePercentage: 2, - wantAvgSize: 2002, - wantAvgLastAnalyzeDuration: 24 * time.Hour, - wantPartitions: []string{"p0"}, - }, - { - name: "No partition meets the threshold", - globalStats: &statistics.Table{ - ColAndIdxExistenceMap: analyzedMap, - }, - partitionStats: map[refresher.PartitionIDAndName]*statistics.Table{ - { - ID: 1, - Name: "p0", - }: { - HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, statistics.AutoAnalyzeMinCnt+1, 0, map[int64]*statistics.Column{ - 1: { - StatsVer: 2, - Histogram: statistics.Histogram{ - LastUpdateVersion: lastUpdateTs, - }, - }, - 2: { - StatsVer: 2, - Histogram: statistics.Histogram{ - LastUpdateVersion: lastUpdateTs, - }, - }, - }, nil), - Version: currentTs, - ColAndIdxExistenceMap: analyzedMap, - LastAnalyzeVersion: lastUpdateTs, - }, - { - ID: 2, - Name: "p1", - }: { - HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, statistics.AutoAnalyzeMinCnt+1, 0, map[int64]*statistics.Column{ - 1: { - StatsVer: 2, - Histogram: statistics.Histogram{ - LastUpdateVersion: lastUpdateTs, - }, - }, - 2: { - StatsVer: 2, - Histogram: statistics.Histogram{ - LastUpdateVersion: lastUpdateTs, - }, - }, - }, nil), - Version: currentTs, - ColAndIdxExistenceMap: analyzedMap, - LastAnalyzeVersion: lastUpdateTs, - }, - }, - defs: []model.PartitionDefinition{ - { - ID: 1, - Name: pmodel.NewCIStr("p0"), - }, - { - ID: 2, - Name: pmodel.NewCIStr("p1"), - }, - }, - autoAnalyzeRatio: 0.5, - currentTs: currentTs, - wantAvgChangePercentage: 0, - wantAvgSize: 0, - wantAvgLastAnalyzeDuration: 0, - wantPartitions: []string{}, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - gotAvgChangePercentage, - gotAvgSize, - gotAvgLastAnalyzeDuration, - gotPartitions := - refresher.CalculateIndicatorsForPartitions( - tt.globalStats, - tt.partitionStats, - tt.autoAnalyzeRatio, - tt.currentTs, - ) - require.Equal(t, tt.wantAvgChangePercentage, gotAvgChangePercentage) - require.Equal(t, tt.wantAvgSize, gotAvgSize) - require.Equal(t, tt.wantAvgLastAnalyzeDuration, gotAvgLastAnalyzeDuration) - // Sort the partitions. - sort.Strings(tt.wantPartitions) - sort.Strings(gotPartitions) - require.Equal(t, tt.wantPartitions, gotPartitions) - }) - } -} - -func TestCheckNewlyAddedIndexesNeedAnalyzeForPartitionedTable(t *testing.T) { - tblInfo := model.TableInfo{ - Indices: []*model.IndexInfo{ - { - ID: 1, - Name: pmodel.NewCIStr("index1"), - State: model.StatePublic, - }, - { - ID: 2, - Name: pmodel.NewCIStr("index2"), - State: model.StatePublic, - }, - }, - Columns: []*model.ColumnInfo{ - { - ID: 1, - }, - { - ID: 2, - }, - }, - } - partitionStats := map[refresher.PartitionIDAndName]*statistics.Table{ - { - ID: 1, - Name: "p0", - }: { - HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, statistics.AutoAnalyzeMinCnt+1, 0, nil, map[int64]*statistics.Index{}), - ColAndIdxExistenceMap: statistics.NewColAndIndexExistenceMap(0, 0), - }, - { - ID: 2, - Name: "p1", - }: { - HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, statistics.AutoAnalyzeMinCnt+1, 0, nil, map[int64]*statistics.Index{ - 2: { - StatsVer: 2, - }, - }), - ColAndIdxExistenceMap: statistics.NewColAndIndexExistenceMap(0, 1), - }, - } - - partitionIndexes := refresher.CheckNewlyAddedIndexesNeedAnalyzeForPartitionedTable(&tblInfo, partitionStats) - expected := map[string][]string{"index1": {"p0", "p1"}, "index2": {"p0"}} - require.Equal(t, len(expected), len(partitionIndexes)) - - for k, v := range expected { - sort.Strings(v) - if val, ok := partitionIndexes[k]; ok { - sort.Strings(val) - require.Equal(t, v, val) - } else { - require.Fail(t, "key not found in partitionIndexes: "+k) - } - } -}