diff --git a/pkg/domain/domain.go b/pkg/domain/domain.go index 5aba64258e9d7..0a5eea3bf701b 100644 --- a/pkg/domain/domain.go +++ b/pkg/domain/domain.go @@ -2266,7 +2266,7 @@ func (do *Domain) StatsHandle() *handle.Handle { // CreateStatsHandle is used only for test. func (do *Domain) CreateStatsHandle(ctx, initStatsCtx sessionctx.Context) error { - h, err := handle.NewHandle(ctx, initStatsCtx, do.statsLease, do.sysSessionPool, &do.sysProcesses, do.NextConnID, do.ReleaseConnID) + h, err := handle.NewHandle(ctx, initStatsCtx, do.statsLease, do.InfoSchema(), do.sysSessionPool, &do.sysProcesses, do.NextConnID, do.ReleaseConnID) if err != nil { return err } @@ -2303,7 +2303,7 @@ func (do *Domain) LoadAndUpdateStatsLoop(ctxs []sessionctx.Context, initStatsCtx // It should be called only once in BootstrapSession. func (do *Domain) UpdateTableStatsLoop(ctx, initStatsCtx sessionctx.Context) error { ctx.GetSessionVars().InRestrictedSQL = true - statsHandle, err := handle.NewHandle(ctx, initStatsCtx, do.statsLease, do.sysSessionPool, &do.sysProcesses, do.NextConnID, do.ReleaseConnID) + statsHandle, err := handle.NewHandle(ctx, initStatsCtx, do.statsLease, do.InfoSchema(), do.sysSessionPool, &do.sysProcesses, do.NextConnID, do.ReleaseConnID) if err != nil { return err } @@ -2449,7 +2449,7 @@ func (do *Domain) initStats(ctx context.Context) { initstats.InitStatsPercentage.Store(0) var err error if liteInitStats { - err = statsHandle.InitStatsLite(ctx, do.InfoSchema()) + err = statsHandle.InitStatsLite(ctx) } else { err = statsHandle.InitStats(ctx, do.InfoSchema()) } @@ -2488,7 +2488,7 @@ func (do *Domain) loadStatsWorker() { if err != nil { logutil.BgLogger().Debug("update stats info failed", zap.Error(err)) } - err = statsHandle.LoadNeededHistograms() + err = statsHandle.LoadNeededHistograms(do.InfoSchema()) if err != nil { logutil.BgLogger().Debug("load histograms failed", zap.Error(err)) } diff --git a/pkg/executor/infoschema_reader_test.go b/pkg/executor/infoschema_reader_test.go index 8449487ad11cd..4cf16a45a1de7 100644 --- a/pkg/executor/infoschema_reader_test.go +++ b/pkg/executor/infoschema_reader_test.go @@ -331,7 +331,7 @@ func TestForAnalyzeStatus(t *testing.T) { tk.MustExec("insert into t1 values (1,2),(3,4)") tk.MustExec("analyze table t1 all columns") tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t1, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"")) // 1 note. - require.NoError(t, dom.StatsHandle().LoadNeededHistograms()) + require.NoError(t, dom.StatsHandle().LoadNeededHistograms(dom.InfoSchema())) tk.MustExec("CREATE ROLE r_t1 ;") tk.MustExec("GRANT ALL PRIVILEGES ON test.t1 TO r_t1;") tk.MustExec("GRANT r_t1 TO analyze_tester;") diff --git a/pkg/executor/test/analyzetest/analyze_test.go b/pkg/executor/test/analyzetest/analyze_test.go index 2073703e4a699..6e0d1d96f1acc 100644 --- a/pkg/executor/test/analyzetest/analyze_test.go +++ b/pkg/executor/test/analyzetest/analyze_test.go @@ -724,7 +724,7 @@ func TestSavedAnalyzeOptions(t *testing.T) { tk.MustExec("analyze table t with 1 topn, 2 buckets") is := dom.InfoSchema() tk.MustQuery("select * from t where b > 1 and c > 1") - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) table, err := is.TableByName(context.Background(), model.NewCIStr("test"), model.NewCIStr("t")) require.NoError(t, err) tableInfo := table.Meta() @@ -761,7 +761,7 @@ func TestSavedAnalyzeOptions(t *testing.T) { col0 = tbl.GetCol(tableInfo.Columns[0].ID) require.Equal(t, 3, len(col0.Buckets)) tk.MustQuery("select * from t where b > 1 and c > 1") - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) col1 = tbl.GetCol(tableInfo.Columns[1].ID) require.Equal(t, 1, len(col1.TopN.TopN)) col2 = tbl.GetCol(tableInfo.Columns[2].ID) @@ -1073,7 +1073,7 @@ func TestSavedAnalyzeColumnOptions(t *testing.T) { tk.MustExec("select * from t where b > 1") require.NoError(t, h.DumpColStatsUsageToKV()) tk.MustExec("analyze table t predicate columns") - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) tblStats := h.GetTableStats(tblInfo) lastVersion := tblStats.Version // column b is analyzed @@ -1086,7 +1086,7 @@ func TestSavedAnalyzeColumnOptions(t *testing.T) { require.NoError(t, h.DumpColStatsUsageToKV()) // manually analyze uses the saved option(predicate columns). tk.MustExec("analyze table t") - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) tblStats = h.GetTableStats(tblInfo) require.Less(t, lastVersion, tblStats.Version) lastVersion = tblStats.Version @@ -2218,7 +2218,7 @@ PARTITION BY RANGE ( a ) ( // analyze table only sets table options and gen globalStats tk.MustExec("analyze table t columns a,c with 1 topn, 3 buckets") tk.MustQuery("select * from t where b > 1 and c > 1") - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) tbl := h.GetTableStats(tableInfo) lastVersion := tbl.Version // both globalStats and partition stats generated and options saved for column a,c @@ -2238,7 +2238,7 @@ PARTITION BY RANGE ( a ) ( // analyze table with persisted table-level options tk.MustExec("analyze table t") tk.MustQuery("select * from t where b > 1 and c > 1") - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) tbl = h.GetTableStats(tableInfo) require.Greater(t, tbl.Version, lastVersion) lastVersion = tbl.Version @@ -2258,7 +2258,7 @@ PARTITION BY RANGE ( a ) ( // analyze table with merged table-level options tk.MustExec("analyze table t with 2 topn, 2 buckets") tk.MustQuery("select * from t where b > 1 and c > 1") - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) tbl = h.GetTableStats(tableInfo) require.Greater(t, tbl.Version, lastVersion) require.Equal(t, 2, len(tbl.GetCol(tableInfo.Columns[0].ID).Buckets)) @@ -2312,7 +2312,7 @@ PARTITION BY RANGE ( a ) ( // analyze partition under static mode with options tk.MustExec("analyze table t partition p0 columns a,c with 1 topn, 3 buckets") tk.MustQuery("select * from t where b > 1 and c > 1") - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) tbl := h.GetTableStats(tableInfo) p0 := h.GetPartitionStats(tableInfo, pi.Definitions[0].ID) p1 := h.GetPartitionStats(tableInfo, pi.Definitions[1].ID) @@ -2337,7 +2337,7 @@ PARTITION BY RANGE ( a ) ( // analyze table in dynamic mode will ignore partition-level options and use default tk.MustExec("analyze table t") tk.MustQuery("select * from t where b > 1 and c > 1") - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) tbl = h.GetTableStats(tableInfo) require.Greater(t, tbl.Version, lastVersion) lastVersion = tbl.Version @@ -2361,7 +2361,7 @@ PARTITION BY RANGE ( a ) ( // analyze table under dynamic mode with specified options with old partition-level options tk.MustExec("analyze table t columns b,d with 2 topn, 2 buckets") tk.MustQuery("select * from t where b > 1 and d > 1") - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) tbl = h.GetTableStats(tableInfo) require.Greater(t, tbl.Version, lastVersion) lastVersion = tbl.Version @@ -2381,7 +2381,7 @@ PARTITION BY RANGE ( a ) ( // analyze table under dynamic mode without options with old table-level & partition-level options tk.MustExec("analyze table t") tk.MustQuery("select * from t where b > 1 and d > 1") - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) tbl = h.GetTableStats(tableInfo) require.Greater(t, tbl.Version, lastVersion) lastVersion = tbl.Version @@ -2391,7 +2391,7 @@ PARTITION BY RANGE ( a ) ( // analyze table under dynamic mode with specified options with old table-level & partition-level options tk.MustExec("analyze table t with 1 topn") tk.MustQuery("select * from t where b > 1 and d > 1") - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) tbl = h.GetTableStats(tableInfo) require.Greater(t, tbl.Version, lastVersion) require.Equal(t, 2, len(tbl.GetCol(tableInfo.Columns[1].ID).Buckets)) @@ -2451,7 +2451,7 @@ PARTITION BY RANGE ( a ) ( "Warning 1105 Ignore columns and options when analyze partition in dynamic mode", )) tk.MustQuery("select * from t where a > 1 and b > 1 and c > 1 and d > 1") - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) tbl := h.GetTableStats(tableInfo) lastVersion := tbl.Version require.NotEqual(t, 3, len(tbl.GetCol(tableInfo.Columns[2].ID).Buckets)) @@ -2506,7 +2506,7 @@ PARTITION BY RANGE ( a ) ( tk.MustExec("set @@session.tidb_partition_prune_mode = 'static'") tk.MustExec("analyze table t partition p0 columns a,c with 1 topn, 3 buckets") tk.MustQuery("select * from t where a > 1 and b > 1 and c > 1 and d > 1") - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) p0 := h.GetPartitionStats(tableInfo, pi.Definitions[0].ID) require.Equal(t, 3, len(p0.GetCol(tableInfo.Columns[2].ID).Buckets)) @@ -2538,14 +2538,14 @@ PARTITION BY RANGE ( a ) ( )) // flaky test, fix it later //tk.MustQuery("select * from t where a > 1 and b > 1 and c > 1 and d > 1") - //require.NoError(t, h.LoadNeededHistograms()) + //require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) //tbl := h.GetTableStats(tableInfo) //require.Equal(t, 0, len(tbl.Columns)) // ignore both p0's 3 buckets, persisted-partition-options' 1 bucket, just use table-level 2 buckets tk.MustExec("analyze table t partition p0") tk.MustQuery("select * from t where a > 1 and b > 1 and c > 1 and d > 1") - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) tbl := h.GetTableStats(tableInfo) require.Equal(t, 2, len(tbl.GetCol(tableInfo.Columns[2].ID).Buckets)) } @@ -2590,7 +2590,7 @@ PARTITION BY RANGE ( a ) ( tk.MustExec("analyze table t partition p1 with 1 topn, 3 buckets") tk.MustQuery("show warnings").Sort().Check(testkit.Rows()) tk.MustQuery("select * from t where a > 1 and b > 1 and c > 1 and d > 1") - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) tbl := h.GetTableStats(tableInfo) lastVersion := tbl.Version require.Equal(t, 3, len(tbl.GetCol(tableInfo.Columns[2].ID).Buckets)) @@ -2962,7 +2962,7 @@ func TestAnalyzeMVIndex(t *testing.T) { "└─TableRowIDScan(Probe) 0.03 cop[tikv] table:t keep order:false, stats:partial[ia:allEvicted, ij_char:allEvicted, j:unInitialized]", )) // 3.2. emulate the background async loading - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) // 3.3. now, stats on all indexes should be loaded tk.MustQuery("explain format = brief select /*+ use_index_merge(t, ij_signed) */ * from t where 1 member of (j->'$.signed')").Check(testkit.Rows( "IndexMerge 27.00 root type: union", @@ -3017,7 +3017,7 @@ func TestAnalyzeMVIndex(t *testing.T) { )) // 4. check stats content in the memory - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) tk.MustQuery("show stats_meta").CheckAt([]int{0, 1, 4, 5}, testkit.Rows("test t 0 27")) tk.MustQuery("show stats_histograms").Sort().CheckAt([]int{0, 1, 3, 4, 6, 7, 8, 9, 10}, testkit.Rows( // db_name, table_name, column_name, is_index, distinct_count, null_count, avg_col_size, correlation, load_status diff --git a/pkg/meta/model/table.go b/pkg/meta/model/table.go index 970a03e4d5164..2aa22b94eb93d 100644 --- a/pkg/meta/model/table.go +++ b/pkg/meta/model/table.go @@ -322,6 +322,26 @@ func (t *TableInfo) FindIndexByName(idxName string) *IndexInfo { return nil } +// FindColumnByID finds ColumnInfo by id. +func (t *TableInfo) FindColumnByID(id int64) *ColumnInfo { + for _, col := range t.Columns { + if col.ID == id { + return col + } + } + return nil +} + +// FindIndexByID finds index by id. +func (t *TableInfo) FindIndexByID(id int64) *IndexInfo { + for _, idx := range t.Indices { + if idx.ID == id { + return idx + } + } + return nil +} + // FindPublicColumnByName finds the public column by name. func (t *TableInfo) FindPublicColumnByName(colNameL string) *ColumnInfo { for _, col := range t.Cols() { diff --git a/pkg/planner/cardinality/selectivity_test.go b/pkg/planner/cardinality/selectivity_test.go index 8805f7d69ea39..7f065f89d75ab 100644 --- a/pkg/planner/cardinality/selectivity_test.go +++ b/pkg/planner/cardinality/selectivity_test.go @@ -63,7 +63,7 @@ func TestCollationColumnEstimate(t *testing.T) { require.Nil(t, h.DumpStatsDeltaToKV(true)) tk.MustExec("analyze table t all columns") tk.MustExec("explain select * from t where a = 'aaa'") - require.Nil(t, h.LoadNeededHistograms()) + require.Nil(t, h.LoadNeededHistograms(dom.InfoSchema())) var ( input []string output [][]string @@ -345,7 +345,7 @@ func TestColumnIndexNullEstimation(t *testing.T) { } // Make sure column stats has been loaded. testKit.MustExec(`explain select * from t where a is null`) - require.Nil(t, h.LoadNeededHistograms()) + require.Nil(t, h.LoadNeededHistograms(dom.InfoSchema())) for i := 5; i < len(input); i++ { testdata.OnRecord(func() { output[i] = testdata.ConvertRowsToStrings(testKit.MustQuery(input[i]).Rows()) @@ -582,7 +582,7 @@ func TestRangeStepOverflow(t *testing.T) { tk.MustExec("analyze table t") // Trigger the loading of column stats. tk.MustQuery("select * from t where col between '8499-1-23 2:14:38' and '9961-7-23 18:35:26'").Check(testkit.Rows()) - require.Nil(t, h.LoadNeededHistograms()) + require.Nil(t, h.LoadNeededHistograms(dom.InfoSchema())) // Must execute successfully after loading the column stats. tk.MustQuery("select * from t where col between '8499-1-23 2:14:38' and '9961-7-23 18:35:26'").Check(testkit.Rows()) } @@ -1341,7 +1341,7 @@ func TestBuiltinInEstWithoutStats(t *testing.T) { tk.MustQuery("explain format='brief' select * from t where b in (1, 2, 3, 4, 5, 6, 7, 8)").Check(expectedB) h.Clear() - require.NoError(t, h.InitStatsLite(context.Background(), is)) + require.NoError(t, h.InitStatsLite(context.Background())) tk.MustQuery("explain format='brief' select * from t where a in (1, 2, 3, 4, 5, 6, 7, 8)").Check(expectedA) tk.MustQuery("explain format='brief' select * from t where b in (1, 2, 3, 4, 5, 6, 7, 8)").Check(expectedB) @@ -1356,7 +1356,6 @@ func TestBuiltinInEstWithoutStats(t *testing.T) { require.True(t, found) require.False(t, statsTbl.ColAndIdxExistenceMap.IsEmpty()) for _, col := range tbl.Cols() { - require.True(t, statsTbl.ColAndIdxExistenceMap.Has(col.ID, false)) require.False(t, statsTbl.ColAndIdxExistenceMap.HasAnalyzed(col.ID, false)) } } diff --git a/pkg/planner/cardinality/trace_test.go b/pkg/planner/cardinality/trace_test.go index a42b981163a15..f61fe7932e43e 100644 --- a/pkg/planner/cardinality/trace_test.go +++ b/pkg/planner/cardinality/trace_test.go @@ -70,7 +70,7 @@ func TestTraceCE(t *testing.T) { tk.MustExec(sql) } statsHandle := dom.StatsHandle() - err := statsHandle.LoadNeededHistograms() + err := statsHandle.LoadNeededHistograms(dom.InfoSchema()) require.NoError(t, err) sctx := tk.Session().(sessionctx.Context) @@ -188,7 +188,7 @@ func TestTraceDebugSelectivity(t *testing.T) { sql := "explain " + tt tk.MustExec(sql) } - err := statsHandle.LoadNeededHistograms() + err := statsHandle.LoadNeededHistograms(dom.InfoSchema()) require.NoError(t, err) sctx := tk.Session().(sessionctx.Context) diff --git a/pkg/planner/core/integration_test.go b/pkg/planner/core/integration_test.go index 9c7faed244a92..9f11f6eed7c73 100644 --- a/pkg/planner/core/integration_test.go +++ b/pkg/planner/core/integration_test.go @@ -2211,7 +2211,7 @@ func TestIssue48257(t *testing.T) { "TableReader 10000.00 root data:TableFullScan", "└─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", )) - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) tk.MustQuery("explain format = brief select * from t1").Check(testkit.Rows( "TableReader 1.00 root data:TableFullScan", "└─TableFullScan 1.00 cop[tikv] table:t1 keep order:false", diff --git a/pkg/statistics/BUILD.bazel b/pkg/statistics/BUILD.bazel index cad48db7936fe..48146fd029700 100644 --- a/pkg/statistics/BUILD.bazel +++ b/pkg/statistics/BUILD.bazel @@ -82,7 +82,7 @@ go_test( data = glob(["testdata/**"]), embed = [":statistics"], flaky = True, - shard_count = 38, + shard_count = 37, deps = [ "//pkg/config", "//pkg/meta/model", diff --git a/pkg/statistics/handle/autoanalyze/priorityqueue/analysis_job_factory_test.go b/pkg/statistics/handle/autoanalyze/priorityqueue/analysis_job_factory_test.go index cce1fe5f3e63a..61fd3fdec22d5 100644 --- a/pkg/statistics/handle/autoanalyze/priorityqueue/analysis_job_factory_test.go +++ b/pkg/statistics/handle/autoanalyze/priorityqueue/analysis_job_factory_test.go @@ -120,8 +120,8 @@ func TestGetTableLastAnalyzeDuration(t *testing.T) { func TestCheckIndexesNeedAnalyze(t *testing.T) { analyzedMap := statistics.NewColAndIndexExistenceMap(1, 0) - analyzedMap.InsertCol(1, nil, true) - analyzedMap.InsertIndex(1, nil, false) + analyzedMap.InsertCol(1, true) + analyzedMap.InsertIndex(1, false) tests := []struct { name string tblInfo *model.TableInfo @@ -184,9 +184,9 @@ func TestCalculateIndicatorsForPartitions(t *testing.T) { lastUpdateTs := oracle.GoTimeToTS(lastUpdateTime) unanalyzedMap := statistics.NewColAndIndexExistenceMap(0, 0) analyzedMap := statistics.NewColAndIndexExistenceMap(2, 1) - analyzedMap.InsertCol(1, nil, true) - analyzedMap.InsertCol(2, nil, true) - analyzedMap.InsertIndex(1, nil, true) + analyzedMap.InsertCol(1, true) + analyzedMap.InsertCol(2, true) + analyzedMap.InsertIndex(1, true) tests := []struct { name string globalStats *statistics.Table diff --git a/pkg/statistics/handle/bootstrap.go b/pkg/statistics/handle/bootstrap.go index 7671c4a2877fa..8504e18f3e53a 100644 --- a/pkg/statistics/handle/bootstrap.go +++ b/pkg/statistics/handle/bootstrap.go @@ -57,32 +57,17 @@ type MaxTidRecord struct { tid atomic.Int64 } -func (h *Handle) initStatsMeta4Chunk(ctx context.Context, is infoschema.InfoSchema, cache statstypes.StatsCache, iter *chunk.Iterator4Chunk) { +func (*Handle) initStatsMeta4Chunk(cache statstypes.StatsCache, iter *chunk.Iterator4Chunk) { var physicalID, maxPhysicalID int64 for row := iter.Begin(); row != iter.End(); row = iter.Next() { physicalID = row.GetInt64(1) - - // Detect the context cancel signal, since it may take a long time for the loop. - // TODO: add context to TableInfoByID and remove this code block? - if ctx.Err() != nil { - return - } - - // The table is read-only. Please do not modify it. - table, ok := h.TableInfoByID(is, physicalID) - if !ok { - logutil.BgLogger().Debug("unknown physical ID in stats meta table, maybe it has been dropped", zap.Int64("ID", physicalID)) - continue - } maxPhysicalID = max(physicalID, maxPhysicalID) - tableInfo := table.Meta() newHistColl := *statistics.NewHistColl(physicalID, true, row.GetInt64(3), row.GetInt64(2), 4, 4) snapshot := row.GetUint64(4) tbl := &statistics.Table{ HistColl: newHistColl, Version: row.GetUint64(0), - ColAndIdxExistenceMap: statistics.NewColAndIndexExistenceMap(len(tableInfo.Columns), len(tableInfo.Indices)), - IsPkIsHandle: tableInfo.PKIsHandle, + ColAndIdxExistenceMap: statistics.NewColAndIndexExistenceMapWithoutSize(), // During the initialization phase, we need to initialize LastAnalyzeVersion with the snapshot, // which ensures that we don't duplicate the auto-analyze of a particular type of table. // When the predicate columns feature is turned on, if a table has neither predicate columns nor indexes, @@ -103,7 +88,7 @@ func (h *Handle) initStatsMeta4Chunk(ctx context.Context, is infoschema.InfoSche } } -func (h *Handle) initStatsMeta(ctx context.Context, is infoschema.InfoSchema) (statstypes.StatsCache, error) { +func (h *Handle) initStatsMeta(ctx context.Context) (statstypes.StatsCache, error) { ctx = kv.WithInternalSourceType(ctx, kv.InternalTxnStats) sql := "select HIGH_PRIORITY version, table_id, modify_count, count, snapshot from mysql.stats_meta" rc, err := util.Exec(h.initStatsCtx, sql) @@ -125,12 +110,12 @@ func (h *Handle) initStatsMeta(ctx context.Context, is infoschema.InfoSchema) (s if req.NumRows() == 0 { break } - h.initStatsMeta4Chunk(ctx, is, tables, iter) + h.initStatsMeta4Chunk(tables, iter) } return tables, nil } -func (h *Handle) initStatsHistograms4ChunkLite(is infoschema.InfoSchema, cache statstypes.StatsCache, iter *chunk.Iterator4Chunk) { +func (*Handle) initStatsHistograms4ChunkLite(cache statstypes.StatsCache, iter *chunk.Iterator4Chunk) { var table *statistics.Table for row := iter.Begin(); row != iter.End(); row = iter.Next() { tblID := row.GetInt64(0) @@ -150,39 +135,18 @@ func (h *Handle) initStatsHistograms4ChunkLite(is infoschema.InfoSchema, cache s ndv := row.GetInt64(3) nullCount := row.GetInt64(5) statsVer := row.GetInt64(7) - tbl, _ := h.TableInfoByID(is, table.PhysicalID) // All the objects in the table share the same stats version. if statsVer != statistics.Version0 { table.StatsVer = int(statsVer) } if isIndex > 0 { - var idxInfo *model.IndexInfo - for _, idx := range tbl.Meta().Indices { - if idx.ID == id { - idxInfo = idx - break - } - } - if idxInfo == nil { - continue - } - table.ColAndIdxExistenceMap.InsertIndex(idxInfo.ID, idxInfo, statsVer != statistics.Version0) + table.ColAndIdxExistenceMap.InsertIndex(id, statsVer != statistics.Version0) if statsVer != statistics.Version0 { // The LastAnalyzeVersion is added by ALTER table so its value might be 0. table.LastAnalyzeVersion = max(table.LastAnalyzeVersion, row.GetUint64(4)) } } else { - var colInfo *model.ColumnInfo - for _, col := range tbl.Meta().Columns { - if col.ID == id { - colInfo = col - break - } - } - if colInfo == nil { - continue - } - table.ColAndIdxExistenceMap.InsertCol(colInfo.ID, colInfo, statsVer != statistics.Version0 || ndv > 0 || nullCount > 0) + table.ColAndIdxExistenceMap.InsertCol(id, statsVer != statistics.Version0 || ndv > 0 || nullCount > 0) if statsVer != statistics.Version0 { // The LastAnalyzeVersion is added by ALTER table so its value might be 0. table.LastAnalyzeVersion = max(table.LastAnalyzeVersion, row.GetUint64(4)) @@ -200,6 +164,7 @@ func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, cache stats tblID, statsVer := row.GetInt64(0), row.GetInt64(8) if table == nil || table.PhysicalID != tblID { if table != nil { + table.ColAndIdxExistenceMap.SetChecked() cache.Put(table.PhysicalID, table) // put this table in the cache because all statstics of the table have been read. } var ok bool @@ -257,7 +222,7 @@ func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, cache stats } lastAnalyzePos.Copy(&index.LastAnalyzePos) table.SetIdx(idxInfo.ID, index) - table.ColAndIdxExistenceMap.InsertIndex(idxInfo.ID, idxInfo, statsVer != statistics.Version0) + table.ColAndIdxExistenceMap.InsertIndex(idxInfo.ID, statsVer != statistics.Version0) } else { var colInfo *model.ColumnInfo for _, col := range tbl.Meta().Columns { @@ -283,7 +248,7 @@ func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, cache stats col.StatsLoadedStatus = statistics.NewStatsAllEvictedStatus() lastAnalyzePos.Copy(&col.LastAnalyzePos) table.SetCol(hist.ID, col) - table.ColAndIdxExistenceMap.InsertCol(colInfo.ID, colInfo, statsVer != statistics.Version0 || ndv > 0 || nullCount > 0) + table.ColAndIdxExistenceMap.InsertCol(colInfo.ID, statsVer != statistics.Version0 || ndv > 0 || nullCount > 0) if statsVer != statistics.Version0 { // The LastAnalyzeVersion is added by ALTER table so its value might be 0. table.LastAnalyzeVersion = max(table.LastAnalyzeVersion, version) @@ -291,11 +256,12 @@ func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, cache stats } } if table != nil { + table.ColAndIdxExistenceMap.SetChecked() cache.Put(table.PhysicalID, table) // put this table in the cache because all statstics of the table have been read. } } -func (h *Handle) initStatsHistogramsLite(ctx context.Context, is infoschema.InfoSchema, cache statstypes.StatsCache) error { +func (h *Handle) initStatsHistogramsLite(ctx context.Context, cache statstypes.StatsCache) error { sql := "select /*+ ORDER_INDEX(mysql.stats_histograms,tbl)*/ HIGH_PRIORITY table_id, is_index, hist_id, distinct_count, version, null_count, tot_col_size, stats_ver, correlation, flag, last_analyze_pos from mysql.stats_histograms order by table_id" rc, err := util.Exec(h.initStatsCtx, sql) if err != nil { @@ -313,7 +279,7 @@ func (h *Handle) initStatsHistogramsLite(ctx context.Context, is infoschema.Info if req.NumRows() == 0 { break } - h.initStatsHistograms4ChunkLite(is, cache, iter) + h.initStatsHistograms4ChunkLite(cache, iter) } return nil } @@ -755,7 +721,7 @@ func (h *Handle) initStatsBucketsConcurrency(cache statstypes.StatsCache, totalM // 1. Basic stats meta data is loaded.(count, modify count, etc.) // 2. Column/index stats are loaded. (only histogram) // 3. TopN, Bucket, FMSketch are not loaded. -func (h *Handle) InitStatsLite(ctx context.Context, is infoschema.InfoSchema) (err error) { +func (h *Handle) InitStatsLite(ctx context.Context) (err error) { defer func() { _, err1 := util.Exec(h.initStatsCtx, "commit") if err == nil && err1 != nil { @@ -767,12 +733,12 @@ func (h *Handle) InitStatsLite(ctx context.Context, is infoschema.InfoSchema) (e return err } failpoint.Inject("beforeInitStatsLite", func() {}) - cache, err := h.initStatsMeta(ctx, is) + cache, err := h.initStatsMeta(ctx) if err != nil { return errors.Trace(err) } statslogutil.StatsLogger().Info("complete to load the meta in the lite mode") - err = h.initStatsHistogramsLite(ctx, is, cache) + err = h.initStatsHistogramsLite(ctx, cache) if err != nil { cache.Close() return errors.Trace(err) @@ -802,7 +768,7 @@ func (h *Handle) InitStats(ctx context.Context, is infoschema.InfoSchema) (err e return err } failpoint.Inject("beforeInitStats", func() {}) - cache, err := h.initStatsMeta(ctx, is) + cache, err := h.initStatsMeta(ctx) if err != nil { return errors.Trace(err) } diff --git a/pkg/statistics/handle/handle.go b/pkg/statistics/handle/handle.go index 0976692444824..cadc15e3c2e39 100644 --- a/pkg/statistics/handle/handle.go +++ b/pkg/statistics/handle/handle.go @@ -17,6 +17,7 @@ package handle import ( "time" + "github.com/pingcap/tidb/pkg/infoschema" "github.com/pingcap/tidb/pkg/meta/model" "github.com/pingcap/tidb/pkg/sessionctx" "github.com/pingcap/tidb/pkg/sessionctx/sysproctrack" @@ -111,6 +112,7 @@ func NewHandle( _, /* ctx, keep it for feature usage */ initStatsCtx sessionctx.Context, lease time.Duration, + is infoschema.InfoSchema, pool pkgutil.SessionPool, tracker sysproctrack.Tracker, autoAnalyzeProcIDGetter func() uint64, @@ -136,7 +138,7 @@ func NewHandle( handle.StatsHistory = history.NewStatsHistory(handle) handle.StatsUsage = usage.NewStatsUsageImpl(handle) handle.StatsAnalyze = autoanalyze.NewStatsAnalyze(handle, tracker) - handle.StatsSyncLoad = syncload.NewStatsSyncLoad(handle) + handle.StatsSyncLoad = syncload.NewStatsSyncLoad(is, handle) handle.StatsGlobal = globalstats.NewStatsGlobal(handle) handle.DDL = ddl.NewDDLHandler( handle.StatsReadWriter, diff --git a/pkg/statistics/handle/handletest/handle_test.go b/pkg/statistics/handle/handletest/handle_test.go index 5c83c9e23a8c5..e330c797f75d0 100644 --- a/pkg/statistics/handle/handletest/handle_test.go +++ b/pkg/statistics/handle/handletest/handle_test.go @@ -118,7 +118,7 @@ func TestVersion(t *testing.T) { tbl1, err := is.TableByName(context.Background(), model.NewCIStr("test"), model.NewCIStr("t1")) require.NoError(t, err) tableInfo1 := tbl1.Meta() - h, err := handle.NewHandle(testKit.Session(), testKit2.Session(), time.Millisecond, do.SysSessionPool(), do.SysProcTracker(), do.NextConnID, do.ReleaseConnID) + h, err := handle.NewHandle(testKit.Session(), testKit2.Session(), time.Millisecond, is, do.SysSessionPool(), do.SysProcTracker(), do.NextConnID, do.ReleaseConnID) defer func() { h.Close() }() @@ -1401,7 +1401,7 @@ func TestInitStatsLite(t *testing.T) { checkAllEvicted(t, statsTbl0) h.Clear() - require.NoError(t, h.InitStatsLite(context.Background(), is)) + require.NoError(t, h.InitStatsLite(context.Background())) statsTbl1 := h.GetTableStats(tblInfo) checkAllEvicted(t, statsTbl1) { @@ -1417,7 +1417,7 @@ func TestInitStatsLite(t *testing.T) { // async stats load tk.MustExec("set @@tidb_stats_load_sync_wait = 0") tk.MustExec("explain select * from t where b > 1") - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(is)) statsTbl2 := h.GetTableStats(tblInfo) colBStats1 := statsTbl2.GetCol(colBID) colCStats := statsTbl2.GetCol(colCID) diff --git a/pkg/statistics/handle/storage/json.go b/pkg/statistics/handle/storage/json.go index b7d4eb1f96816..39ceb3a37c296 100644 --- a/pkg/statistics/handle/storage/json.go +++ b/pkg/statistics/handle/storage/json.go @@ -203,7 +203,7 @@ func TableStatsFromJSON(tableInfo *model.TableInfo, physicalID int64, jsonTbl *u tbl.StatsVer = int(statsVer) } tbl.SetIdx(idx.ID, idx) - tbl.ColAndIdxExistenceMap.InsertIndex(idxInfo.ID, idxInfo, true) + tbl.ColAndIdxExistenceMap.InsertIndex(idxInfo.ID, true) } } @@ -255,7 +255,7 @@ func TableStatsFromJSON(tableInfo *model.TableInfo, physicalID int64, jsonTbl *u tbl.StatsVer = int(statsVer) } tbl.SetCol(col.ID, col) - tbl.ColAndIdxExistenceMap.InsertCol(colInfo.ID, colInfo, true) + tbl.ColAndIdxExistenceMap.InsertCol(colInfo.ID, true) } } tbl.ExtendedStats = extendedStatsFromJSON(jsonTbl.ExtStats) diff --git a/pkg/statistics/handle/storage/read.go b/pkg/statistics/handle/storage/read.go index eb6114834392e..5ca9b8d1f1787 100644 --- a/pkg/statistics/handle/storage/read.go +++ b/pkg/statistics/handle/storage/read.go @@ -312,7 +312,7 @@ func indexStatsFromStorage(sctx sessionctx.Context, row chunk.Row, table *statis if histID != idxInfo.ID { continue } - table.ColAndIdxExistenceMap.InsertIndex(idxInfo.ID, idxInfo, statsVer != statistics.Version0) + table.ColAndIdxExistenceMap.InsertIndex(idxInfo.ID, statsVer != statistics.Version0) // All the objects in the table shares the same stats version. // Update here. if statsVer != statistics.Version0 { @@ -408,7 +408,7 @@ func columnStatsFromStorage(sctx sessionctx.Context, row chunk.Row, table *stati if histID != colInfo.ID { continue } - table.ColAndIdxExistenceMap.InsertCol(histID, colInfo, statsVer != statistics.Version0 || distinct > 0 || nullCount > 0) + table.ColAndIdxExistenceMap.InsertCol(histID, statsVer != statistics.Version0 || distinct > 0 || nullCount > 0) // All the objects in the table shares the same stats version. // Update here. if statsVer != statistics.Version0 { @@ -587,14 +587,14 @@ func LoadHistogram(sctx sessionctx.Context, tableID int64, isIndex int, histID i } // LoadNeededHistograms will load histograms for those needed columns/indices. -func LoadNeededHistograms(sctx sessionctx.Context, statsHandle statstypes.StatsHandle, loadFMSketch bool) (err error) { +func LoadNeededHistograms(sctx sessionctx.Context, is infoschema.InfoSchema, statsHandle statstypes.StatsHandle, loadFMSketch bool) (err error) { items := asyncload.AsyncLoadHistogramNeededItems.AllItems() for _, item := range items { if !item.IsIndex { err = loadNeededColumnHistograms(sctx, statsHandle, item.TableItemID, loadFMSketch, item.FullLoad) } else { // Index is always full load. - err = loadNeededIndexHistograms(sctx, statsHandle, item.TableItemID, loadFMSketch) + err = loadNeededIndexHistograms(sctx, is, statsHandle, item.TableItemID, loadFMSketch) } if err != nil { return err @@ -635,28 +635,25 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsHandle statstypes. if !ok { return nil } + var colInfo *model.ColumnInfo _, loadNeeded, analyzed := tbl.ColumnIsLoadNeeded(col.ID, true) if !loadNeeded || !analyzed { asyncload.AsyncLoadHistogramNeededItems.Delete(col) return nil } - isUpdateColAndIdxExistenceMap := false - colInfo = tbl.ColAndIdxExistenceMap.GetCol(col.ID) + + // Now, we cannot init the column info in the ColAndIdxExistenceMap when to disable lite-init-stats. + // so we have to get the column info from the domain. + is := sctx.GetDomainInfoSchema().(infoschema.InfoSchema) + tblInfo, ok := statsHandle.TableInfoByID(is, col.TableID) + if !ok { + return nil + } + colInfo = tblInfo.Meta().GetColumnByID(col.ID) if colInfo == nil { - // Now, we cannot init the column info in the ColAndIdxExistenceMap when to disable lite-init-stats. - // so we have to get the column info from the domain. - is := sctx.GetDomainInfoSchema().(infoschema.InfoSchema) - tblInfo, ok := statsHandle.TableInfoByID(is, col.TableID) - if !ok { - return nil - } - colInfo = tblInfo.Meta().GetColumnByID(col.ID) - if colInfo == nil { - asyncload.AsyncLoadHistogramNeededItems.Delete(col) - return nil - } - isUpdateColAndIdxExistenceMap = true + asyncload.AsyncLoadHistogramNeededItems.Delete(col) + return nil } hg, _, statsVer, _, err := HistMetaFromStorageWithHighPriority(sctx, &col, colInfo) if hg == nil || err != nil { @@ -684,6 +681,7 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsHandle statstypes. } } } + colHist := &statistics.Column{ PhysicalID: col.TableID, Histogram: *hg, @@ -691,7 +689,7 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsHandle statstypes. CMSketch: cms, TopN: topN, FMSketch: fms, - IsHandle: tbl.IsPkIsHandle && mysql.HasPriKeyFlag(colInfo.GetFlag()), + IsHandle: tblInfo.Meta().PKIsHandle && mysql.HasPriKeyFlag(colInfo.GetFlag()), StatsVer: statsVer, } // Reload the latest stats cache, otherwise the `updateStatsCache` may fail with high probability, because functions @@ -711,11 +709,6 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsHandle statstypes. if statsVer != statistics.Version0 { tbl.StatsVer = int(statsVer) } - if isUpdateColAndIdxExistenceMap { - tbl.ColAndIdxExistenceMap.InsertCol(col.ID, colInfo, true) - } - } else if isUpdateColAndIdxExistenceMap { - tbl.ColAndIdxExistenceMap.InsertCol(col.ID, colInfo, false) } tbl.SetCol(col.ID, colHist) statsHandle.UpdateStatsCache([]*statistics.Table{tbl}, nil) @@ -729,8 +722,8 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsHandle statstypes. return nil } -func loadNeededIndexHistograms(sctx sessionctx.Context, statsCache statstypes.StatsCache, idx model.TableItemID, loadFMSketch bool) (err error) { - tbl, ok := statsCache.Get(idx.TableID) +func loadNeededIndexHistograms(sctx sessionctx.Context, is infoschema.InfoSchema, statsHandle statstypes.StatsHandle, idx model.TableItemID, loadFMSketch bool) (err error) { + tbl, ok := statsHandle.Get(idx.TableID) if !ok { return nil } @@ -744,7 +737,11 @@ func loadNeededIndexHistograms(sctx sessionctx.Context, statsCache statstypes.St asyncload.AsyncLoadHistogramNeededItems.Delete(idx) return err } - idxInfo := tbl.ColAndIdxExistenceMap.GetIndex(idx.ID) + tblInfo, ok := statsHandle.TableInfoByID(is, idx.TableID) + if !ok { + return nil + } + idxInfo := tblInfo.Meta().FindIndexByID(idx.ID) hg, err := HistogramFromStorageWithPriority(sctx, idx.TableID, idx.ID, types.NewFieldType(mysql.TypeBlob), hgMeta.NDV, 1, hgMeta.LastUpdateVersion, hgMeta.NullCount, hgMeta.TotColSize, hgMeta.Correlation, kv.PriorityHigh) if err != nil { return errors.Trace(err) @@ -766,7 +763,7 @@ func loadNeededIndexHistograms(sctx sessionctx.Context, statsCache statstypes.St StatsLoadedStatus: statistics.NewStatsFullLoadStatus()} lastAnalyzePos.Copy(&idxHist.LastAnalyzePos) - tbl, ok = statsCache.Get(idx.TableID) + tbl, ok = statsHandle.Get(idx.TableID) if !ok { return nil } @@ -776,7 +773,7 @@ func loadNeededIndexHistograms(sctx sessionctx.Context, statsCache statstypes.St } tbl.SetIdx(idx.ID, idxHist) tbl.LastAnalyzeVersion = max(tbl.LastAnalyzeVersion, idxHist.LastUpdateVersion) - statsCache.UpdateStatsCache([]*statistics.Table{tbl}, nil) + statsHandle.UpdateStatsCache([]*statistics.Table{tbl}, nil) if idx.IsSyncLoadFailed { logutil.BgLogger().Warn("Hist for index should already be loaded as sync but not found.", zap.Int64("table_id", idx.TableID), diff --git a/pkg/statistics/handle/storage/read_test.go b/pkg/statistics/handle/storage/read_test.go index 073b04dbbd248..1e67edb990127 100644 --- a/pkg/statistics/handle/storage/read_test.go +++ b/pkg/statistics/handle/storage/read_test.go @@ -71,7 +71,7 @@ func TestLoadStats(t *testing.T) { require.NoError(t, err) _, err = cardinality.ColumnEqualRowCount(testKit.Session().GetPlanCtx(), stat, types.NewIntDatum(1), colCID) require.NoError(t, err) - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) stat = h.GetTableStats(tableInfo) require.True(t, stat.GetCol(colAID).IsFullLoad()) hg := stat.GetCol(colAID).Histogram @@ -91,7 +91,7 @@ func TestLoadStats(t *testing.T) { require.False(t, idx != nil && idx.IsEssentialStatsLoaded()) // IsInvalid adds the index to AsyncLoadHistogramNeededItems. statistics.IndexStatsIsInvalid(testKit.Session().GetPlanCtx(), idx, &stat.HistColl, idxBID) - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) stat = h.GetTableStats(tableInfo) idx = stat.GetIdx(tableInfo.Indices[0].ID) hg = idx.Histogram diff --git a/pkg/statistics/handle/storage/stats_read_writer.go b/pkg/statistics/handle/storage/stats_read_writer.go index 98a58df885fc1..96d16c18c04db 100644 --- a/pkg/statistics/handle/storage/stats_read_writer.go +++ b/pkg/statistics/handle/storage/stats_read_writer.go @@ -258,10 +258,10 @@ func (s *statsReadWriter) LoadTablePartitionStats(tableInfo *model.TableInfo, pa } // LoadNeededHistograms will load histograms for those needed columns/indices. -func (s *statsReadWriter) LoadNeededHistograms() (err error) { +func (s *statsReadWriter) LoadNeededHistograms(is infoschema.InfoSchema) (err error) { err = util.CallWithSCtx(s.statsHandler.SPool(), func(sctx sessionctx.Context) error { loadFMSketch := config.GetGlobalConfig().Performance.EnableLoadFMSketch - return LoadNeededHistograms(sctx, s.statsHandler, loadFMSketch) + return LoadNeededHistograms(sctx, is, s.statsHandler, loadFMSketch) }, util.FlagWrapTxn) return err } diff --git a/pkg/statistics/handle/syncload/stats_syncload.go b/pkg/statistics/handle/syncload/stats_syncload.go index a49e5b707bb8a..d21d3a76c5596 100644 --- a/pkg/statistics/handle/syncload/stats_syncload.go +++ b/pkg/statistics/handle/syncload/stats_syncload.go @@ -60,14 +60,15 @@ func GetSyncLoadConcurrencyByCPU() int { type statsSyncLoad struct { statsHandle statstypes.StatsHandle + is infoschema.InfoSchema StatsLoad statstypes.StatsLoad } var globalStatsSyncLoadSingleFlight singleflight.Group // NewStatsSyncLoad creates a new StatsSyncLoad. -func NewStatsSyncLoad(statsHandle statstypes.StatsHandle) statstypes.StatsSyncLoad { - s := &statsSyncLoad{statsHandle: statsHandle} +func NewStatsSyncLoad(is infoschema.InfoSchema, statsHandle statstypes.StatsHandle) statstypes.StatsSyncLoad { + s := &statsSyncLoad{statsHandle: statsHandle, is: is} cfg := config.GetGlobalConfig() s.StatsLoad.NeededItemsCh = make(chan *statstypes.NeededItemTask, cfg.Performance.StatsLoadQueueSize) s.StatsLoad.TimeoutItemsCh = make(chan *statstypes.NeededItemTask, cfg.Performance.StatsLoadQueueSize) @@ -302,10 +303,16 @@ func (s *statsSyncLoad) handleOneItemTask(task *statstypes.NeededItemTask) (err }() item := task.Item.TableItemID tbl, ok := s.statsHandle.Get(item.TableID) + + if !ok { + return nil + } + is := sctx.GetDomainInfoSchema().(infoschema.InfoSchema) + tblInfo, ok := s.statsHandle.TableInfoByID(is, item.TableID) if !ok { return nil } - var tblInfo table.Table + isPkIsHandle := tblInfo.Meta().PKIsHandle wrapper := &statsWrapper{} if item.IsIndex { index, loadNeeded := tbl.IndexIsLoadNeeded(item.ID) @@ -315,7 +322,7 @@ func (s *statsSyncLoad) handleOneItemTask(task *statstypes.NeededItemTask) (err if index != nil { wrapper.idxInfo = index.Info } else { - wrapper.idxInfo = tbl.ColAndIdxExistenceMap.GetIndex(item.ID) + wrapper.idxInfo = tblInfo.Meta().FindIndexByID(item.ID) } } else { col, loadNeeded, analyzed := tbl.ColumnIsLoadNeeded(item.ID, task.Item.FullLoad) @@ -324,16 +331,9 @@ func (s *statsSyncLoad) handleOneItemTask(task *statstypes.NeededItemTask) (err } if col != nil { wrapper.colInfo = col.Info - } else if colInfo := tbl.ColAndIdxExistenceMap.GetCol(item.ID); colInfo != nil { - wrapper.colInfo = colInfo } else { // Now, we cannot init the column info in the ColAndIdxExistenceMap when to disable lite-init-stats. // so we have to get the column info from the domain. - is := sctx.GetDomainInfoSchema().(infoschema.InfoSchema) - tblInfo, ok = s.statsHandle.TableInfoByID(is, item.TableID) - if !ok { - return nil - } wrapper.colInfo = tblInfo.Meta().GetColumnByID(item.ID) } // If this column is not analyzed yet and we don't have it in memory. @@ -343,15 +343,15 @@ func (s *statsSyncLoad) handleOneItemTask(task *statstypes.NeededItemTask) (err PhysicalID: item.TableID, Info: wrapper.colInfo, Histogram: *statistics.NewHistogram(item.ID, 0, 0, 0, &wrapper.colInfo.FieldType, 0, 0), - IsHandle: tbl.IsPkIsHandle && mysql.HasPriKeyFlag(wrapper.colInfo.GetFlag()), + IsHandle: isPkIsHandle && mysql.HasPriKeyFlag(wrapper.colInfo.GetFlag()), } - s.updateCachedItem(item, wrapper.col, wrapper.idx, task.Item.FullLoad) + s.updateCachedItem(tblInfo, item, wrapper.col, wrapper.idx, task.Item.FullLoad) return nil } } t := time.Now() needUpdate := false - wrapper, err = s.readStatsForOneItem(sctx, item, wrapper, tbl.IsPkIsHandle, task.Item.FullLoad) + wrapper, err = s.readStatsForOneItem(sctx, item, wrapper, isPkIsHandle, task.Item.FullLoad) if err != nil { return err } @@ -366,7 +366,7 @@ func (s *statsSyncLoad) handleOneItemTask(task *statstypes.NeededItemTask) (err } metrics.ReadStatsHistogram.Observe(float64(time.Since(t).Milliseconds())) if needUpdate { - s.updateCachedItem(item, wrapper.col, wrapper.idx, task.Item.FullLoad) + s.updateCachedItem(tblInfo, item, wrapper.col, wrapper.idx, task.Item.FullLoad) } return nil } @@ -539,7 +539,7 @@ func (*statsSyncLoad) writeToResultChan(resultCh chan stmtctx.StatsLoadResult, r } // updateCachedItem updates the column/index hist to global statsCache. -func (s *statsSyncLoad) updateCachedItem(item model.TableItemID, colHist *statistics.Column, idxHist *statistics.Index, fullLoaded bool) (updated bool) { +func (s *statsSyncLoad) updateCachedItem(tblInfo table.Table, item model.TableItemID, colHist *statistics.Column, idxHist *statistics.Index, fullLoaded bool) (updated bool) { s.StatsLoad.Lock() defer s.StatsLoad.Unlock() // Reload the latest stats cache, otherwise the `updateStatsCache` may fail with high probability, because functions @@ -548,6 +548,22 @@ func (s *statsSyncLoad) updateCachedItem(item model.TableItemID, colHist *statis if !ok { return false } + if !tbl.ColAndIdxExistenceMap.Checked() { + tbl = tbl.Copy() + for _, col := range tbl.HistColl.GetColSlice() { + if tblInfo.Meta().FindColumnByID(col.ID) == nil { + tbl.HistColl.DelCol(col.ID) + tbl.ColAndIdxExistenceMap.DeleteColAnalyzed(col.ID) + } + } + for _, idx := range tbl.HistColl.GetIdxSlice() { + if tblInfo.Meta().FindIndexByID(idx.ID) == nil { + tbl.HistColl.DelIdx(idx.ID) + tbl.ColAndIdxExistenceMap.DeleteIdxAnalyzed(idx.ID) + } + } + tbl.ColAndIdxExistenceMap.SetChecked() + } if !item.IsIndex && colHist != nil { c := tbl.GetCol(item.ID) // - If the stats is fully loaded, @@ -558,12 +574,16 @@ func (s *statsSyncLoad) updateCachedItem(item model.TableItemID, colHist *statis tbl = tbl.Copy() tbl.SetCol(item.ID, colHist) + // If the column is analyzed we refresh the map for the possible change. + if colHist.StatsAvailable() { + tbl.ColAndIdxExistenceMap.InsertCol(item.ID, true) + } // All the objects shares the same stats version. Update it here. if colHist.StatsVer != statistics.Version0 { tbl.StatsVer = statistics.Version0 } // we have to refresh the map for the possible change to ensure that the map information is not missing. - tbl.ColAndIdxExistenceMap.InsertCol(item.ID, colHist.Info, colHist.StatsAvailable()) + tbl.ColAndIdxExistenceMap.InsertCol(item.ID, colHist.StatsAvailable()) } else if item.IsIndex && idxHist != nil { index := tbl.GetIdx(item.ID) // - If the stats is fully loaded, @@ -575,7 +595,7 @@ func (s *statsSyncLoad) updateCachedItem(item model.TableItemID, colHist *statis tbl.SetIdx(item.ID, idxHist) // If the index is analyzed we refresh the map for the possible change. if idxHist.IsAnalyzed() { - tbl.ColAndIdxExistenceMap.InsertIndex(item.ID, idxHist.Info, true) + tbl.ColAndIdxExistenceMap.InsertIndex(item.ID, true) // All the objects shares the same stats version. Update it here. tbl.StatsVer = statistics.Version0 } diff --git a/pkg/statistics/handle/types/interfaces.go b/pkg/statistics/handle/types/interfaces.go index 75de80fc3973e..32ec809fe1771 100644 --- a/pkg/statistics/handle/types/interfaces.go +++ b/pkg/statistics/handle/types/interfaces.go @@ -278,7 +278,7 @@ type StatsReadWriter interface { StatsMetaCountAndModifyCount(tableID int64) (count, modifyCount int64, err error) // LoadNeededHistograms will load histograms for those needed columns/indices and put them into the cache. - LoadNeededHistograms() (err error) + LoadNeededHistograms(is infoschema.InfoSchema) (err error) // ReloadExtendedStatistics drops the cache for extended statistics and reload data from mysql.stats_extended. ReloadExtendedStatistics() error diff --git a/pkg/statistics/handle/updatetest/update_test.go b/pkg/statistics/handle/updatetest/update_test.go index 0e4ac3a82c7d8..88616fbddbe20 100644 --- a/pkg/statistics/handle/updatetest/update_test.go +++ b/pkg/statistics/handle/updatetest/update_test.go @@ -450,7 +450,7 @@ func TestAutoUpdate(t *testing.T) { h.HandleAutoAnalyze() require.NoError(t, h.Update(context.Background(), is)) testKit.MustExec("explain select * from t where a > 'a'") - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) stats = h.GetTableStats(tableInfo) require.Equal(t, int64(8), stats.RealtimeCount) require.Equal(t, int64(0), stats.ModifyCount) @@ -637,7 +637,7 @@ func TestLoadHistCorrelation(t *testing.T) { result := testKit.MustQuery("show stats_histograms where Table_name = 't'") require.Len(t, result.Rows(), 0) testKit.MustExec("explain select * from t where c = 1") - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) result = testKit.MustQuery("show stats_histograms where Table_name = 't'") require.Len(t, result.Rows(), 2) require.Equal(t, "1", result.Rows()[0][9]) @@ -868,7 +868,7 @@ func TestAutoAnalyzeRatio(t *testing.T) { // To pass the stats.Pseudo check in autoAnalyzeTable tk.MustExec("analyze table t") tk.MustExec("explain select * from t where a = 1") - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) tk.MustExec("set global tidb_auto_analyze_start_time='00:00 +0000'") tk.MustExec("set global tidb_auto_analyze_end_time='23:59 +0000'") @@ -1070,7 +1070,7 @@ func TestStatsLockUnlockForAutoAnalyze(t *testing.T) { // To pass the stats.Pseudo check in autoAnalyzeTable tk.MustExec("analyze table t") tk.MustExec("explain select * from t where a = 1") - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) tk.MustExec("set global tidb_auto_analyze_start_time='00:00 +0000'") tk.MustExec("set global tidb_auto_analyze_end_time='23:59 +0000'") diff --git a/pkg/statistics/integration_test.go b/pkg/statistics/integration_test.go index 5cac4a39e0a83..9b94091ca93e7 100644 --- a/pkg/statistics/integration_test.go +++ b/pkg/statistics/integration_test.go @@ -352,7 +352,7 @@ func TestOutdatedStatsCheck(t *testing.T) { // To pass the stats.Pseudo check in autoAnalyzeTable tk.MustExec("analyze table t") tk.MustExec("explain select * from t where a = 1") - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) getStatsHealthy := func() int { rows := tk.MustQuery("show stats_healthy where db_name = 'test' and table_name = 't'").Rows() @@ -508,46 +508,6 @@ func TestIssue44369(t *testing.T) { tk.MustExec("select * from t where a = 10 and bb > 20;") } -// Test the case that after ALTER TABLE happens, the pointer to the column info/index info should be refreshed. -func TestColAndIdxExistenceMapChangedAfterAlterTable(t *testing.T) { - store, dom := testkit.CreateMockStoreAndDomain(t) - h := dom.StatsHandle() - tk := testkit.NewTestKit(t, store) - tk.MustExec("use test") - tk.MustExec("create table t(a int, b int, index iab(a,b));") - require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh())) - tk.MustExec("insert into t value(1,1);") - require.NoError(t, h.DumpStatsDeltaToKV(true)) - tk.MustExec("analyze table t;") - is := dom.InfoSchema() - require.NoError(t, h.Update(context.Background(), is)) - tbl, err := dom.InfoSchema().TableByName(context.Background(), model.NewCIStr("test"), model.NewCIStr("t")) - require.NoError(t, err) - tblInfo := tbl.Meta() - statsTbl := h.GetTableStats(tblInfo) - colA := tblInfo.Columns[0] - colInfo := statsTbl.ColAndIdxExistenceMap.GetCol(colA.ID) - require.Equal(t, colA, colInfo) - - tk.MustExec("alter table t modify column a double") - require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh())) - is = dom.InfoSchema() - require.NoError(t, h.Update(context.Background(), is)) - tbl, err = dom.InfoSchema().TableByName(context.Background(), model.NewCIStr("test"), model.NewCIStr("t")) - require.NoError(t, err) - tblInfo = tbl.Meta() - newColA := tblInfo.Columns[0] - require.NotEqual(t, colA.ID, newColA.ID) - statsTbl = h.GetTableStats(tblInfo) - colInfo = statsTbl.ColAndIdxExistenceMap.GetCol(newColA.ID) - require.Equal(t, newColA, colInfo) - tk.MustExec("analyze table t;") - require.NoError(t, h.Update(context.Background(), is)) - statsTbl = h.GetTableStats(tblInfo) - colInfo = statsTbl.ColAndIdxExistenceMap.GetCol(newColA.ID) - require.Equal(t, newColA, colInfo) -} - func TestTableLastAnalyzeVersion(t *testing.T) { store, dom := testkit.CreateMockStoreAndDomain(t) h := dom.StatsHandle() diff --git a/pkg/statistics/table.go b/pkg/statistics/table.go index bee7a9b1c26dc..02fc3b8f6b30b 100644 --- a/pkg/statistics/table.go +++ b/pkg/statistics/table.go @@ -17,7 +17,6 @@ package statistics import ( "cmp" "fmt" - stdmaps "maps" "slices" "strings" @@ -86,22 +85,29 @@ type Table struct { // ColAndIdxExistenceMap is the meta map for statistics.Table. // It can tell whether a column/index really has its statistics. So we won't send useless kv request when we do online stats loading. type ColAndIdxExistenceMap struct { - colInfoMap map[int64]*model.ColumnInfo + checked bool colAnalyzed map[int64]bool - idxInfoMap map[int64]*model.IndexInfo idxAnalyzed map[int64]bool } -// Has checks whether a column/index stats exists. -// This method only checks whether the given item exists or not. -// Don't check whether it has statistics or not. -func (m *ColAndIdxExistenceMap) Has(id int64, isIndex bool) bool { - if isIndex { - _, ok := m.idxInfoMap[id] - return ok - } - _, ok := m.colInfoMap[id] - return ok +// DeleteColAnalyzed deletes the column with the given id. +func (m *ColAndIdxExistenceMap) DeleteColAnalyzed(id int64) { + delete(m.colAnalyzed, id) +} + +// DeleteIdxAnalyzed deletes the index with the given id. +func (m *ColAndIdxExistenceMap) DeleteIdxAnalyzed(id int64) { + delete(m.idxAnalyzed, id) +} + +// Checked returns whether the map has been checked. +func (m *ColAndIdxExistenceMap) Checked() bool { + return m.checked +} + +// SetChecked set the map as checked. +func (m *ColAndIdxExistenceMap) SetChecked() { + m.checked = true } // HasAnalyzed checks whether a column/index stats exists and it has stats. @@ -122,53 +128,50 @@ func (m *ColAndIdxExistenceMap) HasAnalyzed(id int64, isIndex bool) bool { } // InsertCol inserts a column with its meta into the map. -func (m *ColAndIdxExistenceMap) InsertCol(id int64, info *model.ColumnInfo, analyzed bool) { - m.colInfoMap[id] = info +func (m *ColAndIdxExistenceMap) InsertCol(id int64, analyzed bool) { m.colAnalyzed[id] = analyzed } -// GetCol gets the meta data of the given column. -func (m *ColAndIdxExistenceMap) GetCol(id int64) *model.ColumnInfo { - return m.colInfoMap[id] -} - // InsertIndex inserts an index with its meta into the map. -func (m *ColAndIdxExistenceMap) InsertIndex(id int64, info *model.IndexInfo, analyzed bool) { - m.idxInfoMap[id] = info +func (m *ColAndIdxExistenceMap) InsertIndex(id int64, analyzed bool) { m.idxAnalyzed[id] = analyzed } -// GetIndex gets the meta data of the given index. -func (m *ColAndIdxExistenceMap) GetIndex(id int64) *model.IndexInfo { - return m.idxInfoMap[id] -} - // IsEmpty checks whether the map is empty. func (m *ColAndIdxExistenceMap) IsEmpty() bool { - return len(m.colInfoMap)+len(m.idxInfoMap) == 0 + return len(m.colAnalyzed)+len(m.idxAnalyzed) == 0 } // ColNum returns the number of columns in the map. func (m *ColAndIdxExistenceMap) ColNum() int { - return len(m.colInfoMap) + return len(m.colAnalyzed) } // Clone deeply copies the map. func (m *ColAndIdxExistenceMap) Clone() *ColAndIdxExistenceMap { - mm := NewColAndIndexExistenceMap(len(m.colInfoMap), len(m.idxInfoMap)) - mm.colInfoMap = stdmaps.Clone(m.colInfoMap) - mm.colAnalyzed = stdmaps.Clone(m.colAnalyzed) - mm.idxAnalyzed = stdmaps.Clone(m.idxAnalyzed) - mm.idxInfoMap = stdmaps.Clone(m.idxInfoMap) + mm := NewColAndIndexExistenceMap(len(m.colAnalyzed), len(m.idxAnalyzed)) + mm.colAnalyzed = maps.Clone(m.colAnalyzed) + mm.idxAnalyzed = maps.Clone(m.idxAnalyzed) return mm } +const ( + defaultColCap = 16 + defaultIdxCap = 4 +) + +// NewColAndIndexExistenceMapWithoutSize return a new object with default capacity. +func NewColAndIndexExistenceMapWithoutSize() *ColAndIdxExistenceMap { + return &ColAndIdxExistenceMap{ + colAnalyzed: make(map[int64]bool, defaultColCap), + idxAnalyzed: make(map[int64]bool, defaultIdxCap), + } +} + // NewColAndIndexExistenceMap return a new object with the given capcity. func NewColAndIndexExistenceMap(colCap, idxCap int) *ColAndIdxExistenceMap { return &ColAndIdxExistenceMap{ - colInfoMap: make(map[int64]*model.ColumnInfo, colCap), colAnalyzed: make(map[int64]bool, colCap), - idxInfoMap: make(map[int64]*model.IndexInfo, idxCap), idxAnalyzed: make(map[int64]bool, idxCap), } } @@ -349,6 +352,15 @@ func (coll *HistColl) StableOrderColSlice() []*Column { return cols } +// GetColSlice returns a slice of columns without order. +func (coll *HistColl) GetColSlice() []*Column { + cols := make([]*Column, 0, len(coll.columns)) + for _, col := range coll.columns { + cols = append(cols, col) + } + return cols +} + // StableOrderIdxSlice returns a slice of indices in stable order. func (coll *HistColl) StableOrderIdxSlice() []*Index { idxs := make([]*Index, 0, len(coll.indices)) @@ -361,6 +373,15 @@ func (coll *HistColl) StableOrderIdxSlice() []*Index { return idxs } +// GetIdxSlice returns a slice of indices without order. +func (coll *HistColl) GetIdxSlice() []*Index { + idxs := make([]*Index, 0, len(coll.indices)) + for _, idx := range coll.indices { + idxs = append(idxs, idx) + } + return idxs +} + // SetAllIndexFullLoadForBootstrap sets all indices' stats loaded status to full load for bootstrap. func (coll *HistColl) SetAllIndexFullLoadForBootstrap() { for _, idx := range coll.indices { @@ -578,7 +599,6 @@ func (t *Table) Copy() *Table { HistColl: newHistColl, Version: t.Version, TblInfoUpdateTS: t.TblInfoUpdateTS, - IsPkIsHandle: t.IsPkIsHandle, LastAnalyzeVersion: t.LastAnalyzeVersion, } if t.ExtendedStats != nil { @@ -805,17 +825,14 @@ func (t *Table) ColumnIsLoadNeeded(id int64, fullLoad bool) (*Column, bool, bool if !ok { return nil, true, true } + if t.ColAndIdxExistenceMap.Checked() { + return nil, true, true + } hasAnalyzed := t.ColAndIdxExistenceMap.HasAnalyzed(id, false) // If it's not analyzed yet. if !hasAnalyzed { - // If we don't have it in memory, we create a fake hist for pseudo estimation (see handleOneItemTask()). - // It's something ridiculous. But it's possible that the stats don't have some ColumnInfo. - // We need to find a way to maintain it more correctly. - // Otherwise we don't need to load it. - result := t.ColAndIdxExistenceMap.Has(id, false) - // If the column is not in the ColAndIdxExistenceMap, we need to load it. - return nil, !result, !result + return nil, false, false } // Restore the condition from the simplified form: @@ -837,7 +854,7 @@ func (t *Table) ColumnIsLoadNeeded(id int64, fullLoad bool) (*Column, bool, bool func (t *Table) IndexIsLoadNeeded(id int64) (*Index, bool) { idx, ok := t.indices[id] // If the index is not in the memory, and we have its stats in the storage. We need to trigger the load. - if !ok && t.ColAndIdxExistenceMap.HasAnalyzed(id, true) { + if !ok && (t.ColAndIdxExistenceMap.HasAnalyzed(id, true) || !t.ColAndIdxExistenceMap.Checked()) { return nil, true } // If the index is in the memory, we check its embedded func. @@ -1003,7 +1020,7 @@ func PseudoTable(tblInfo *model.TableInfo, allowTriggerLoading bool, allowFillHi // We would not collect stats for the hidden column and we won't use the hidden column to estimate. // Thus we don't create pseudo stats for it. if col.State == model.StatePublic && !col.Hidden { - t.ColAndIdxExistenceMap.InsertCol(col.ID, col, false) + t.ColAndIdxExistenceMap.InsertCol(col.ID, false) if allowFillHistMeta { t.columns[col.ID] = &Column{ PhysicalID: tblInfo.ID, @@ -1016,7 +1033,7 @@ func PseudoTable(tblInfo *model.TableInfo, allowTriggerLoading bool, allowFillHi } for _, idx := range tblInfo.Indices { if idx.State == model.StatePublic { - t.ColAndIdxExistenceMap.InsertIndex(idx.ID, idx, false) + t.ColAndIdxExistenceMap.InsertIndex(idx.ID, false) if allowFillHistMeta { t.indices[idx.ID] = &Index{ PhysicalID: tblInfo.ID, diff --git a/tests/realtikvtest/statisticstest/statistics_test.go b/tests/realtikvtest/statisticstest/statistics_test.go index c4b9ed93c9539..db4ae6afa8906 100644 --- a/tests/realtikvtest/statisticstest/statistics_test.go +++ b/tests/realtikvtest/statisticstest/statistics_test.go @@ -55,7 +55,7 @@ func TestNewCollationStatsWithPrefixIndex(t *testing.T) { tk.MustExec("analyze table t") tk.MustExec("explain select * from t where a = 'aaa'") - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) tk.MustQuery("show stats_buckets where db_name = 'test' and table_name = 't'").Sort().Check(testkit.Rows( "test t a 0 0 1 1 \x00A \x00A 0", @@ -126,7 +126,7 @@ func TestNewCollationStatsWithPrefixIndex(t *testing.T) { tk.MustExec("analyze table t") tk.MustExec("explain select * from t where a = 'aaa'") - require.NoError(t, h.LoadNeededHistograms()) + require.NoError(t, h.LoadNeededHistograms(dom.InfoSchema())) tk.MustQuery("show stats_buckets where db_name = 'test' and table_name = 't'").Sort().Check(testkit.Rows()) tk.MustQuery("show stats_topn where db_name = 'test' and table_name = 't'").Sort().Check(testkit.Rows(