Skip to content

Commit

Permalink
statistics: add default value of CMSketch for Analyze (#19455) (#19927
Browse files Browse the repository at this point in the history
)

Signed-off-by: ti-srebot <[email protected]>
  • Loading branch information
ti-srebot authored Sep 21, 2020
1 parent 902347c commit 0fbe796
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 4 deletions.
5 changes: 5 additions & 0 deletions executor/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,9 @@ func (e *AnalyzeIndexExec) buildStatsFromResult(result distsql.SelectResult, nee
}
}
err := hist.ExtractTopN(cms, len(e.idxInfo.Columns), uint32(e.opts[ast.AnalyzeOptNumTopN]))
if needCMS && cms != nil {
cms.CalcDefaultValForAnalyze(uint64(hist.NDV))
}
return hist, cms, err
}

Expand Down Expand Up @@ -525,6 +528,7 @@ func (e *AnalyzeColumnsExec) buildStats(ranges []*ranger.Range) (hists []*statis
return nil, nil, err
}
hists = append(hists, hg)
collectors[i].CMSketch.CalcDefaultValForAnalyze(uint64(hg.NDV))
cms = append(cms, collectors[i].CMSketch)
}
return hists, cms, nil
Expand Down Expand Up @@ -1236,6 +1240,7 @@ func analyzeIndexIncremental(idxExec *analyzeIndexIncrementalExec) analyzeResult
if err != nil {
return analyzeResult{Err: err, job: idxExec.job}
}
cms.CalcDefaultValForAnalyze(uint64(hist.NDV))
}
result := analyzeResult{
PhysicalTableID: idxExec.physicalTableID,
Expand Down
32 changes: 32 additions & 0 deletions executor/analyze_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -592,3 +592,35 @@ func (s *testSuite1) TestHashInTopN(c *C) {
}
}
}

func (s *testSuite1) TestDefaultValForAnalyze(c *C) {
tk := testkit.NewTestKit(c, s.store)
tk.MustExec("drop database if exists test_default_val_for_analyze;")
tk.MustExec("create database test_default_val_for_analyze;")
tk.MustExec("use test_default_val_for_analyze")

tk.MustExec("create table t (a int, key(a));")
for i := 0; i < 2048; i++ {
tk.MustExec("insert into t values (0)")
}
for i := 1; i < 4; i++ {
tk.MustExec("insert into t values (?)", i)
}
tk.MustExec("analyze table t with 0 topn;")
tk.MustQuery("explain select * from t where a = 1").Check(testkit.Rows("IndexReader_6 512.00 root index:IndexRangeScan_5",
"└─IndexRangeScan_5 512.00 cop[tikv] table:t, index:a(a) range:[1,1], keep order:false"))
tk.MustQuery("explain select * from t where a = 999").Check(testkit.Rows("IndexReader_6 0.00 root index:IndexRangeScan_5",
"└─IndexRangeScan_5 0.00 cop[tikv] table:t, index:a(a) range:[999,999], keep order:false"))

tk.MustExec("drop table t;")
tk.MustExec("create table t (a int, key(a));")
for i := 0; i < 2048; i++ {
tk.MustExec("insert into t values (0)")
}
for i := 1; i < 2049; i++ {
tk.MustExec("insert into t values (?)", i)
}
tk.MustExec("analyze table t with 0 topn;")
tk.MustQuery("explain select * from t where a = 1").Check(testkit.Rows("IndexReader_6 1.00 root index:IndexRangeScan_5",
"└─IndexRangeScan_5 1.00 cop[tikv] table:t, index:a(a) range:[1,1], keep order:false"))
}
30 changes: 26 additions & 4 deletions statistics/cmsketch.go
Original file line number Diff line number Diff line change
Expand Up @@ -288,23 +288,32 @@ func (c *CMSketch) QueryBytes(d []byte) uint64 {
func (c *CMSketch) queryHashValue(h1, h2 uint64) uint64 {
vals := make([]uint32, c.depth)
min := uint32(math.MaxUint32)
// We want that when res is 0 before the noise is eliminated, the default value is not used.
// So we need a temp value to distinguish before and after eliminating noise.
temp := uint32(1)
for i := range c.table {
j := (h1 + h2*uint64(i)) % uint64(c.width)
if min > c.table[i][j] {
min = c.table[i][j]
}
noise := (c.count - uint64(c.table[i][j])) / (uint64(c.width) - 1)
if uint64(c.table[i][j]) < noise {
if uint64(c.table[i][j]) == 0 {
vals[i] = 0
} else if uint64(c.table[i][j]) < noise {
vals[i] = temp
} else {
vals[i] = c.table[i][j] - uint32(noise)
vals[i] = c.table[i][j] - uint32(noise) + temp
}
}
sort.Sort(sortutil.Uint32Slice(vals))
res := vals[(c.depth-1)/2] + (vals[c.depth/2]-vals[(c.depth-1)/2])/2
if res > min {
res = min
if res > min+temp {
res = min + temp
}
if res == 0 {
return uint64(0)
}
res = res - temp
if c.considerDefVal(uint64(res)) {
return c.defaultValue
}
Expand Down Expand Up @@ -538,3 +547,16 @@ func (c *CMSketch) AppendTopN(data []byte, count uint64) {
func (c *CMSketch) GetWidthAndDepth() (int32, int32) {
return c.width, c.depth
}

// CalcDefaultValForAnalyze calculate the default value for Analyze.
// The value of it is count / NDV in CMSketch. This means count and NDV are not include topN.
func (c *CMSketch) CalcDefaultValForAnalyze(NDV uint64) {
// If NDV <= TopN, all values should be in TopN.
// So we set c.defaultValue to 0 and return immediately.
if NDV <= uint64(len(c.topN)) {
c.defaultValue = 0
return
}
remainNDV := NDV - uint64(len(c.topN))
c.defaultValue = c.count / mathutil.MaxUint64(1, remainNDV)
}

0 comments on commit 0fbe796

Please sign in to comment.