From 4a7262cbab1385fcd91a87e093871dab8f332726 Mon Sep 17 00:00:00 2001 From: Rustin170506 <29879298+Rustin170506@users.noreply.github.com> Date: Thu, 12 Sep 2024 15:20:06 +0800 Subject: [PATCH 1/6] statistics: do not depend on table information when calculating the table size --- pkg/statistics/handle/autoanalyze/refresher/refresher.go | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pkg/statistics/handle/autoanalyze/refresher/refresher.go b/pkg/statistics/handle/autoanalyze/refresher/refresher.go index 4372ab0254bcd..b9ccab6c044cc 100644 --- a/pkg/statistics/handle/autoanalyze/refresher/refresher.go +++ b/pkg/statistics/handle/autoanalyze/refresher/refresher.go @@ -347,7 +347,7 @@ func CreateTableAnalysisJob( statistics.CheckAnalyzeVerOnTable(tblStats, &tableStatsVer) changePercentage := CalculateChangePercentage(tblStats, autoAnalyzeRatio) - tableSize := calculateTableSize(tblInfo, tblStats) + tableSize := calculateTableSize(tblStats) lastAnalysisDuration := GetTableLastAnalyzeDuration(tblStats, currentTs) indexes := CheckIndexesNeedAnalyze(tblInfo, tblStats) @@ -391,7 +391,7 @@ func CreateStaticPartitionAnalysisJob( statistics.CheckAnalyzeVerOnTable(partitionStats, &tableStatsVer) changePercentage := CalculateChangePercentage(partitionStats, autoAnalyzeRatio) - tableSize := calculateTableSize(globalTblInfo, partitionStats) + tableSize := calculateTableSize(partitionStats) lastAnalysisDuration := GetTableLastAnalyzeDuration(partitionStats, currentTs) indexes := CheckIndexesNeedAnalyze(globalTblInfo, partitionStats) @@ -448,12 +448,10 @@ func CalculateChangePercentage( } func calculateTableSize( - tblInfo *model.TableInfo, tblStats *statistics.Table, ) float64 { tblCnt := float64(tblStats.RealtimeCount) - // TODO: Ignore unanalyzable columns. - colCnt := float64(len(tblInfo.Columns)) + colCnt := float64(tblStats.ColNum()) return tblCnt * colCnt } From 27bfc7196df5455aaa43414b5e5aeafeaee610a2 Mon Sep 17 00:00:00 2001 From: Rustin170506 <29879298+Rustin170506@users.noreply.github.com> Date: Thu, 12 Sep 2024 16:49:42 +0800 Subject: [PATCH 2/6] fix: update the partitioned table as well Signed-off-by: Rustin170506 <29879298+Rustin170506@users.noreply.github.com> --- .../handle/autoanalyze/refresher/refresher.go | 6 +- .../autoanalyze/refresher/refresher_test.go | 61 ++++--------------- 2 files changed, 14 insertions(+), 53 deletions(-) diff --git a/pkg/statistics/handle/autoanalyze/refresher/refresher.go b/pkg/statistics/handle/autoanalyze/refresher/refresher.go index b9ccab6c044cc..6677819f043fc 100644 --- a/pkg/statistics/handle/autoanalyze/refresher/refresher.go +++ b/pkg/statistics/handle/autoanalyze/refresher/refresher.go @@ -524,7 +524,7 @@ func createTableAnalysisJobForPartitions( statistics.CheckAnalyzeVerOnTable(tblStats, &tableStatsVer) averageChangePercentage, avgSize, minLastAnalyzeDuration, partitionNames := CalculateIndicatorsForPartitions( - tblInfo, + tblStats, partitionStats, autoAnalyzeRatio, currentTs, @@ -561,7 +561,7 @@ func createTableAnalysisJobForPartitions( // Size is the product of the number of rows and the number of columns. // Last analyze duration is the duration since the last analyze. func CalculateIndicatorsForPartitions( - tblInfo *model.TableInfo, + globalStats *statistics.Table, partitionStats map[PartitionIDAndName]*statistics.Table, autoAnalyzeRatio float64, currentTs uint64, @@ -575,7 +575,7 @@ func CalculateIndicatorsForPartitions( totalSize := 0.0 count := 0.0 partitionNames = make([]string, 0, len(partitionStats)) - cols := float64(len(tblInfo.Columns)) + cols := float64(globalStats.ColNum()) totalLastAnalyzeDuration := time.Duration(0) for pIDAndName, tblStats := range partitionStats { diff --git a/pkg/statistics/handle/autoanalyze/refresher/refresher_test.go b/pkg/statistics/handle/autoanalyze/refresher/refresher_test.go index 4ac0971081b3c..9fd870b6e1669 100644 --- a/pkg/statistics/handle/autoanalyze/refresher/refresher_test.go +++ b/pkg/statistics/handle/autoanalyze/refresher/refresher_test.go @@ -612,9 +612,12 @@ func TestCalculateIndicatorsForPartitions(t *testing.T) { analyzedMap.InsertCol(1, nil, true) analyzedMap.InsertCol(2, nil, true) analyzedMap.InsertIndex(1, nil, true) + histColl := statistics.NewHistColl(1, false, 0, 0, 2, 1) + histColl.SetCol(1, &statistics.Column{}) + histColl.SetCol(2, &statistics.Column{}) tests := []struct { name string - tblInfo *model.TableInfo + globalStats *statistics.Table partitionStats map[refresher.PartitionIDAndName]*statistics.Table defs []model.PartitionDefinition autoAnalyzeRatio float64 @@ -626,22 +629,8 @@ func TestCalculateIndicatorsForPartitions(t *testing.T) { }{ { name: "Test Table not analyzed", - tblInfo: &model.TableInfo{ - Indices: []*model.IndexInfo{ - { - ID: 1, - Name: pmodel.NewCIStr("index1"), - State: model.StatePublic, - }, - }, - Columns: []*model.ColumnInfo{ - { - ID: 1, - }, - { - ID: 2, - }, - }, + globalStats: &statistics.Table{ + HistColl: *histColl, }, partitionStats: map[refresher.PartitionIDAndName]*statistics.Table{ { @@ -684,22 +673,8 @@ func TestCalculateIndicatorsForPartitions(t *testing.T) { }, { name: "Test Table analyzed and only one partition meets the threshold", - tblInfo: &model.TableInfo{ - Indices: []*model.IndexInfo{ - { - ID: 1, - Name: pmodel.NewCIStr("index1"), - State: model.StatePublic, - }, - }, - Columns: []*model.ColumnInfo{ - { - ID: 1, - }, - { - ID: 2, - }, - }, + globalStats: &statistics.Table{ + HistColl: *histColl, }, partitionStats: map[refresher.PartitionIDAndName]*statistics.Table{ { @@ -766,22 +741,8 @@ func TestCalculateIndicatorsForPartitions(t *testing.T) { }, { name: "No partition meets the threshold", - tblInfo: &model.TableInfo{ - Indices: []*model.IndexInfo{ - { - ID: 1, - Name: pmodel.NewCIStr("index1"), - State: model.StatePublic, - }, - }, - Columns: []*model.ColumnInfo{ - { - ID: 1, - }, - { - ID: 2, - }, - }, + globalStats: &statistics.Table{ + HistColl: *histColl, }, partitionStats: map[refresher.PartitionIDAndName]*statistics.Table{ { @@ -855,7 +816,7 @@ func TestCalculateIndicatorsForPartitions(t *testing.T) { gotAvgLastAnalyzeDuration, gotPartitions := refresher.CalculateIndicatorsForPartitions( - tt.tblInfo, + tt.globalStats, tt.partitionStats, tt.autoAnalyzeRatio, tt.currentTs, From 4f9a8f34182509720ae3e226575c30f0ea511e15 Mon Sep 17 00:00:00 2001 From: Rustin170506 <29879298+Rustin170506@users.noreply.github.com> Date: Thu, 12 Sep 2024 17:41:08 +0800 Subject: [PATCH 3/6] chore: add intest assert --- pkg/statistics/handle/autoanalyze/refresher/refresher.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/statistics/handle/autoanalyze/refresher/refresher.go b/pkg/statistics/handle/autoanalyze/refresher/refresher.go index 6677819f043fc..e3fe15fef3025 100644 --- a/pkg/statistics/handle/autoanalyze/refresher/refresher.go +++ b/pkg/statistics/handle/autoanalyze/refresher/refresher.go @@ -452,6 +452,7 @@ func calculateTableSize( ) float64 { tblCnt := float64(tblStats.RealtimeCount) colCnt := float64(tblStats.ColNum()) + intest.Assert(colCnt != 0, "Column count should not be 0") return tblCnt * colCnt } @@ -576,6 +577,7 @@ func CalculateIndicatorsForPartitions( count := 0.0 partitionNames = make([]string, 0, len(partitionStats)) cols := float64(globalStats.ColNum()) + intest.Assert(cols != 0, "Column count should not be 0") totalLastAnalyzeDuration := time.Duration(0) for pIDAndName, tblStats := range partitionStats { From 6561228771a6bfaba5d20d0aeff60095c1277160 Mon Sep 17 00:00:00 2001 From: Rustin170506 <29879298+Rustin170506@users.noreply.github.com> Date: Thu, 12 Sep 2024 17:42:52 +0800 Subject: [PATCH 4/6] test: better args --- pkg/statistics/handle/autoanalyze/refresher/refresher_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/statistics/handle/autoanalyze/refresher/refresher_test.go b/pkg/statistics/handle/autoanalyze/refresher/refresher_test.go index 9fd870b6e1669..8f4dbdb29de74 100644 --- a/pkg/statistics/handle/autoanalyze/refresher/refresher_test.go +++ b/pkg/statistics/handle/autoanalyze/refresher/refresher_test.go @@ -612,7 +612,7 @@ func TestCalculateIndicatorsForPartitions(t *testing.T) { analyzedMap.InsertCol(1, nil, true) analyzedMap.InsertCol(2, nil, true) analyzedMap.InsertIndex(1, nil, true) - histColl := statistics.NewHistColl(1, false, 0, 0, 2, 1) + histColl := statistics.NewHistColl(1, false, 1000, 10000, 2, 0) histColl.SetCol(1, &statistics.Column{}) histColl.SetCol(2, &statistics.Column{}) tests := []struct { From 82952c711ac7e421fa04e3cbd5dfaa005a855201 Mon Sep 17 00:00:00 2001 From: Rustin170506 <29879298+Rustin170506@users.noreply.github.com> Date: Sat, 14 Sep 2024 11:05:43 +0800 Subject: [PATCH 5/6] fix: use ColAndIdxExistenceMap's column number Signed-off-by: Rustin170506 <29879298+Rustin170506@users.noreply.github.com> --- pkg/statistics/handle/autoanalyze/refresher/refresher.go | 4 ++-- pkg/statistics/table.go | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/pkg/statistics/handle/autoanalyze/refresher/refresher.go b/pkg/statistics/handle/autoanalyze/refresher/refresher.go index e3fe15fef3025..626e609cdc644 100644 --- a/pkg/statistics/handle/autoanalyze/refresher/refresher.go +++ b/pkg/statistics/handle/autoanalyze/refresher/refresher.go @@ -451,7 +451,7 @@ func calculateTableSize( tblStats *statistics.Table, ) float64 { tblCnt := float64(tblStats.RealtimeCount) - colCnt := float64(tblStats.ColNum()) + colCnt := float64(tblStats.ColAndIdxExistenceMap.ColNum()) intest.Assert(colCnt != 0, "Column count should not be 0") return tblCnt * colCnt @@ -576,7 +576,7 @@ func CalculateIndicatorsForPartitions( totalSize := 0.0 count := 0.0 partitionNames = make([]string, 0, len(partitionStats)) - cols := float64(globalStats.ColNum()) + cols := float64(globalStats.ColAndIdxExistenceMap.ColNum()) intest.Assert(cols != 0, "Column count should not be 0") totalLastAnalyzeDuration := time.Duration(0) diff --git a/pkg/statistics/table.go b/pkg/statistics/table.go index e7f6510c63ba8..c59cddca90473 100644 --- a/pkg/statistics/table.go +++ b/pkg/statistics/table.go @@ -148,6 +148,11 @@ func (m *ColAndIdxExistenceMap) IsEmpty() bool { return len(m.colInfoMap)+len(m.idxInfoMap) == 0 } +// ColNum returns the number of columns in the map. +func (m *ColAndIdxExistenceMap) ColNum() int { + return len(m.colInfoMap) +} + // Clone deeply copies the map. func (m *ColAndIdxExistenceMap) Clone() *ColAndIdxExistenceMap { mm := NewColAndIndexExistenceMap(len(m.colInfoMap), len(m.idxInfoMap)) From 88b71bbe89d4209b1097c61f873f44be19e29aa1 Mon Sep 17 00:00:00 2001 From: Rustin170506 <29879298+Rustin170506@users.noreply.github.com> Date: Sat, 14 Sep 2024 14:17:31 +0800 Subject: [PATCH 6/6] test: fix broken tests Signed-off-by: Rustin170506 <29879298+Rustin170506@users.noreply.github.com> --- .../handle/autoanalyze/refresher/refresher_test.go | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pkg/statistics/handle/autoanalyze/refresher/refresher_test.go b/pkg/statistics/handle/autoanalyze/refresher/refresher_test.go index 8f4dbdb29de74..d1c64c6489456 100644 --- a/pkg/statistics/handle/autoanalyze/refresher/refresher_test.go +++ b/pkg/statistics/handle/autoanalyze/refresher/refresher_test.go @@ -612,9 +612,6 @@ func TestCalculateIndicatorsForPartitions(t *testing.T) { analyzedMap.InsertCol(1, nil, true) analyzedMap.InsertCol(2, nil, true) analyzedMap.InsertIndex(1, nil, true) - histColl := statistics.NewHistColl(1, false, 1000, 10000, 2, 0) - histColl.SetCol(1, &statistics.Column{}) - histColl.SetCol(2, &statistics.Column{}) tests := []struct { name string globalStats *statistics.Table @@ -630,7 +627,7 @@ func TestCalculateIndicatorsForPartitions(t *testing.T) { { name: "Test Table not analyzed", globalStats: &statistics.Table{ - HistColl: *histColl, + ColAndIdxExistenceMap: analyzedMap, }, partitionStats: map[refresher.PartitionIDAndName]*statistics.Table{ { @@ -674,7 +671,7 @@ func TestCalculateIndicatorsForPartitions(t *testing.T) { { name: "Test Table analyzed and only one partition meets the threshold", globalStats: &statistics.Table{ - HistColl: *histColl, + ColAndIdxExistenceMap: analyzedMap, }, partitionStats: map[refresher.PartitionIDAndName]*statistics.Table{ { @@ -742,7 +739,7 @@ func TestCalculateIndicatorsForPartitions(t *testing.T) { { name: "No partition meets the threshold", globalStats: &statistics.Table{ - HistColl: *histColl, + ColAndIdxExistenceMap: analyzedMap, }, partitionStats: map[refresher.PartitionIDAndName]*statistics.Table{ {