Skip to content

Commit

Permalink
statistics: do not load unnecessary index statistics (#54060)
Browse files Browse the repository at this point in the history
close #54022
  • Loading branch information
Rustin170506 committed Jun 18, 2024
1 parent d179476 commit 3c599cd
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 7 deletions.
4 changes: 2 additions & 2 deletions pkg/statistics/handle/handle_hist.go
Original file line number Diff line number Diff line change
Expand Up @@ -368,9 +368,9 @@ func (*Handle) readStatsForOneItem(sctx sessionctx.Context, item model.TableItem
return nil, errors.Trace(err)
}
if len(rows) == 0 {
logutil.BgLogger().Error("fail to get stats version for this histogram", zap.Int64("table_id", item.TableID),
logutil.BgLogger().Error("fail to get stats version for this histogram, normally this wouldn't happen, please check if this column or index has a histogram record in `mysql.stats_histogram`", zap.Int64("table_id", item.TableID),
zap.Int64("hist_id", item.ID), zap.Bool("is_index", item.IsIndex))
return nil, errors.Trace(fmt.Errorf("fail to get stats version for this histogram, table_id:%v, hist_id:%v, is_index:%v", item.TableID, item.ID, item.IsIndex))
return nil, errors.Trace(fmt.Errorf("fail to get stats version for this histogram, normally this wouldn't happen, please check if this column or index has a histogram record in `mysql.stats_histogram`, table_id:%v, hist_id:%v, is_index:%v", item.TableID, item.ID, item.IsIndex))
}
statsVer := rows[0].GetInt64(0)
if item.IsIndex {
Expand Down
19 changes: 14 additions & 5 deletions pkg/statistics/handle/storage/read.go
Original file line number Diff line number Diff line change
Expand Up @@ -540,8 +540,8 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsCache util.StatsCa
return errors.Trace(err)
}
if len(rows) == 0 {
logutil.BgLogger().Error("fail to get stats version for this histogram", zap.Int64("table_id", col.TableID), zap.Int64("hist_id", col.ID))
return errors.Trace(fmt.Errorf("fail to get stats version for this histogram, table_id:%v, hist_id:%v", col.TableID, col.ID))
logutil.BgLogger().Error("fail to get stats version for this histogram, normally this wouldn't happen, please check if this column or index has a histogram record in `mysql.stats_histogram`", zap.Int64("table_id", col.TableID), zap.Int64("column_id", col.ID))
return errors.Trace(fmt.Errorf("fail to get stats version for this histogram, normally this wouldn't happen, please check if this column or index has a histogram record in `mysql.stats_histogram`, table_id:%v, column_id:%v", col.TableID, col.ID))
}
statsVer := rows[0].GetInt64(0)
colHist := &statistics.Column{
Expand Down Expand Up @@ -576,7 +576,16 @@ func loadNeededIndexHistograms(sctx sessionctx.Context, statsCache util.StatsCac
return nil
}
index, ok := tbl.Indices[idx.ID]
if !ok {
// Double check if the index is really needed to load.
// If we don't do this it might cause a memory leak.
// See: https://github.com/pingcap/tidb/issues/54022
if !ok || !index.IsLoadNeeded() {
if !index.IsLoadNeeded() {
logutil.BgLogger().Warn(
"Although the index stats is not required to load, an attempt is still made to load it, skip it",
zap.Int64("table_id", idx.TableID), zap.Int64("hist_id", idx.ID),
)
}
statistics.HistogramNeededItems.Delete(idx)
return nil
}
Expand All @@ -600,8 +609,8 @@ func loadNeededIndexHistograms(sctx sessionctx.Context, statsCache util.StatsCac
return errors.Trace(err)
}
if len(rows) == 0 {
logutil.BgLogger().Error("fail to get stats version for this histogram", zap.Int64("table_id", idx.TableID), zap.Int64("hist_id", idx.ID))
return errors.Trace(fmt.Errorf("fail to get stats version for this histogram, table_id:%v, hist_id:%v", idx.TableID, idx.ID))
logutil.BgLogger().Error("fail to get stats version for this histogram, normally this wouldn't happen, please check if this column or index has a histogram record in `mysql.stats_histogram`", zap.Int64("table_id", idx.TableID), zap.Int64("index_id", idx.ID))
return errors.Trace(fmt.Errorf("fail to get stats version for this histogram, normally this wouldn't happen, please check if this column or index has a histogram record in `mysql.stats_histogram`, table_id:%v, index_id:%v", idx.TableID, idx.ID))
}
idxHist := &statistics.Index{Histogram: *hg, CMSketch: cms, TopN: topN, FMSketch: fms,
Info: index.Info, StatsVer: rows[0].GetInt64(0),
Expand Down
2 changes: 2 additions & 0 deletions tests/realtikvtest/statisticstest/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ go_test(
flaky = True,
race = "on",
deps = [
"//pkg/parser/model",
"//pkg/statistics",
"//pkg/testkit",
"//tests/realtikvtest",
"@com_github_stretchr_testify//require",
Expand Down
75 changes: 75 additions & 0 deletions tests/realtikvtest/statisticstest/statistics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,13 @@
package statisticstest

import (
"context"
"fmt"
"testing"
"time"

"github.com/pingcap/tidb/pkg/parser/model"
"github.com/pingcap/tidb/pkg/statistics"
"github.com/pingcap/tidb/pkg/testkit"
"github.com/pingcap/tidb/tests/realtikvtest"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -232,3 +236,74 @@ func checkFMSketch(tk *testkit.TestKit) {
tk.MustQuery(`SHOW STATS_HISTOGRAMS WHERE TABLE_NAME='employees' and partition_name="global" and column_name="id"`).CheckAt([]int{6}, [][]any{
{"14"}})
}

func TestNoNeedIndexStatsLoading(t *testing.T) {
store, dom := realtikvtest.CreateMockStoreAndDomainAndSetup(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test;")
tk.MustExec("drop table if exists t;")
// 1. Create a table and the statsHandle.Update(do.InfoSchema()) will load this table into the stats cache.
tk.MustExec("create table if not exists t(a int, b int, index ia(a));")
// 2. Drop the stats of the stats, it will clean up all system table records for this table.
tk.MustExec("drop stats t;")
// 3. Insert some data and wait for the modify_count and the count is not null in the mysql.stats_meta.
tk.MustExec("insert into t value(1,1), (2,2);")
h := dom.StatsHandle()
require.NoError(t, h.DumpStatsDeltaToKV(true))
require.Eventually(t, func() bool {
rows := tk.MustQuery("show stats_meta").Rows()
return len(rows) > 0
}, 1*time.Minute, 2*time.Millisecond)
require.NoError(t, h.Update(dom.InfoSchema()))
// 4. Try to select some data from this table by ID, it would trigger an async load.
tk.MustExec("set tidb_opt_objective='determinate';")
tk.MustQuery("select * from t where a = 1 and b = 1;").Check(testkit.Rows("1 1"))
table, err := dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
require.NoError(t, err)
checkTableIDInItems(t, table.Meta().ID)
}

func checkTableIDInItems(t *testing.T, tableID int64) {
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
defer cancel()

ticker := time.NewTicker(2 * time.Millisecond)
defer ticker.Stop()

done := make(chan bool)

// First, confirm that the table ID is in the items.
items := statistics.HistogramNeededItems.AllItems()
for _, item := range items {
if item.TableID == tableID {
// Then, continuously check until it no longer exists or timeout.
go func() {
for {
select {
case <-ticker.C:
items := statistics.HistogramNeededItems.AllItems()
found := false
for _, item := range items {
if item.TableID == tableID {
found = true
}
}
if !found {
done <- true
}
case <-ctx.Done():
return
}
}
}()
break
}
}

select {
case <-done:
t.Log("Table ID has been removed from items")
case <-ctx.Done():
t.Fatal("Timeout: Table ID was not removed from items within the time limit")
}
}

0 comments on commit 3c599cd

Please sign in to comment.