Skip to content

Commit

Permalink
statistics: do not depend on table information when calculating the t…
Browse files Browse the repository at this point in the history
…able size (#56036)

ref #55906
  • Loading branch information
Rustin170506 authored Sep 14, 2024
1 parent ae86e04 commit 3688a2b
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 58 deletions.
16 changes: 8 additions & 8 deletions pkg/statistics/handle/autoanalyze/refresher/refresher.go
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ func CreateTableAnalysisJob(
statistics.CheckAnalyzeVerOnTable(tblStats, &tableStatsVer)

changePercentage := CalculateChangePercentage(tblStats, autoAnalyzeRatio)
tableSize := calculateTableSize(tblInfo, tblStats)
tableSize := calculateTableSize(tblStats)
lastAnalysisDuration := GetTableLastAnalyzeDuration(tblStats, currentTs)
indexes := CheckIndexesNeedAnalyze(tblInfo, tblStats)

Expand Down Expand Up @@ -391,7 +391,7 @@ func CreateStaticPartitionAnalysisJob(
statistics.CheckAnalyzeVerOnTable(partitionStats, &tableStatsVer)

changePercentage := CalculateChangePercentage(partitionStats, autoAnalyzeRatio)
tableSize := calculateTableSize(globalTblInfo, partitionStats)
tableSize := calculateTableSize(partitionStats)
lastAnalysisDuration := GetTableLastAnalyzeDuration(partitionStats, currentTs)
indexes := CheckIndexesNeedAnalyze(globalTblInfo, partitionStats)

Expand Down Expand Up @@ -448,12 +448,11 @@ func CalculateChangePercentage(
}

func calculateTableSize(
tblInfo *model.TableInfo,
tblStats *statistics.Table,
) float64 {
tblCnt := float64(tblStats.RealtimeCount)
// TODO: Ignore unanalyzable columns.
colCnt := float64(len(tblInfo.Columns))
colCnt := float64(tblStats.ColAndIdxExistenceMap.ColNum())
intest.Assert(colCnt != 0, "Column count should not be 0")

return tblCnt * colCnt
}
Expand Down Expand Up @@ -526,7 +525,7 @@ func createTableAnalysisJobForPartitions(
statistics.CheckAnalyzeVerOnTable(tblStats, &tableStatsVer)

averageChangePercentage, avgSize, minLastAnalyzeDuration, partitionNames := CalculateIndicatorsForPartitions(
tblInfo,
tblStats,
partitionStats,
autoAnalyzeRatio,
currentTs,
Expand Down Expand Up @@ -563,7 +562,7 @@ func createTableAnalysisJobForPartitions(
// Size is the product of the number of rows and the number of columns.
// Last analyze duration is the duration since the last analyze.
func CalculateIndicatorsForPartitions(
tblInfo *model.TableInfo,
globalStats *statistics.Table,
partitionStats map[PartitionIDAndName]*statistics.Table,
autoAnalyzeRatio float64,
currentTs uint64,
Expand All @@ -577,7 +576,8 @@ func CalculateIndicatorsForPartitions(
totalSize := 0.0
count := 0.0
partitionNames = make([]string, 0, len(partitionStats))
cols := float64(len(tblInfo.Columns))
cols := float64(globalStats.ColAndIdxExistenceMap.ColNum())
intest.Assert(cols != 0, "Column count should not be 0")
totalLastAnalyzeDuration := time.Duration(0)

for pIDAndName, tblStats := range partitionStats {
Expand Down
58 changes: 8 additions & 50 deletions pkg/statistics/handle/autoanalyze/refresher/refresher_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,7 @@ func TestCalculateIndicatorsForPartitions(t *testing.T) {
analyzedMap.InsertIndex(1, nil, true)
tests := []struct {
name string
tblInfo *model.TableInfo
globalStats *statistics.Table
partitionStats map[refresher.PartitionIDAndName]*statistics.Table
defs []model.PartitionDefinition
autoAnalyzeRatio float64
Expand All @@ -626,22 +626,8 @@ func TestCalculateIndicatorsForPartitions(t *testing.T) {
}{
{
name: "Test Table not analyzed",
tblInfo: &model.TableInfo{
Indices: []*model.IndexInfo{
{
ID: 1,
Name: pmodel.NewCIStr("index1"),
State: model.StatePublic,
},
},
Columns: []*model.ColumnInfo{
{
ID: 1,
},
{
ID: 2,
},
},
globalStats: &statistics.Table{
ColAndIdxExistenceMap: analyzedMap,
},
partitionStats: map[refresher.PartitionIDAndName]*statistics.Table{
{
Expand Down Expand Up @@ -684,22 +670,8 @@ func TestCalculateIndicatorsForPartitions(t *testing.T) {
},
{
name: "Test Table analyzed and only one partition meets the threshold",
tblInfo: &model.TableInfo{
Indices: []*model.IndexInfo{
{
ID: 1,
Name: pmodel.NewCIStr("index1"),
State: model.StatePublic,
},
},
Columns: []*model.ColumnInfo{
{
ID: 1,
},
{
ID: 2,
},
},
globalStats: &statistics.Table{
ColAndIdxExistenceMap: analyzedMap,
},
partitionStats: map[refresher.PartitionIDAndName]*statistics.Table{
{
Expand Down Expand Up @@ -766,22 +738,8 @@ func TestCalculateIndicatorsForPartitions(t *testing.T) {
},
{
name: "No partition meets the threshold",
tblInfo: &model.TableInfo{
Indices: []*model.IndexInfo{
{
ID: 1,
Name: pmodel.NewCIStr("index1"),
State: model.StatePublic,
},
},
Columns: []*model.ColumnInfo{
{
ID: 1,
},
{
ID: 2,
},
},
globalStats: &statistics.Table{
ColAndIdxExistenceMap: analyzedMap,
},
partitionStats: map[refresher.PartitionIDAndName]*statistics.Table{
{
Expand Down Expand Up @@ -855,7 +813,7 @@ func TestCalculateIndicatorsForPartitions(t *testing.T) {
gotAvgLastAnalyzeDuration,
gotPartitions :=
refresher.CalculateIndicatorsForPartitions(
tt.tblInfo,
tt.globalStats,
tt.partitionStats,
tt.autoAnalyzeRatio,
tt.currentTs,
Expand Down
5 changes: 5 additions & 0 deletions pkg/statistics/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,11 @@ func (m *ColAndIdxExistenceMap) IsEmpty() bool {
return len(m.colInfoMap)+len(m.idxInfoMap) == 0
}

// ColNum returns the number of columns in the map.
func (m *ColAndIdxExistenceMap) ColNum() int {
return len(m.colInfoMap)
}

// Clone deeply copies the map.
func (m *ColAndIdxExistenceMap) Clone() *ColAndIdxExistenceMap {
mm := NewColAndIndexExistenceMap(len(m.colInfoMap), len(m.idxInfoMap))
Expand Down

0 comments on commit 3688a2b

Please sign in to comment.