From 15b02765586edf2e631e70df3dd72ad091b2d0fc Mon Sep 17 00:00:00 2001 From: Haibin Xie Date: Thu, 1 Nov 2018 20:38:08 +0800 Subject: [PATCH] stats: fix selectivity estimation for primary key (#8134) --- statistics/bootstrap.go | 2 +- statistics/dump.go | 1 + statistics/handle.go | 2 +- statistics/histogram.go | 5 +++-- statistics/selectivity.go | 2 +- statistics/selectivity_test.go | 17 +++++++++++++++++ statistics/table.go | 4 +++- 7 files changed, 27 insertions(+), 6 deletions(-) diff --git a/statistics/bootstrap.go b/statistics/bootstrap.go index 850ad1a043a71..c4322a03acc56 100644 --- a/statistics/bootstrap.go +++ b/statistics/bootstrap.go @@ -120,7 +120,7 @@ func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, tables stat continue } hist := NewHistogram(id, ndv, nullCount, version, &colInfo.FieldType, 0, totColSize) - table.Columns[hist.ID] = &Column{Histogram: *hist, Info: colInfo, Count: nullCount} + table.Columns[hist.ID] = &Column{Histogram: *hist, Info: colInfo, Count: nullCount, isHandle: tbl.Meta().PKIsHandle && mysql.HasPriKeyFlag(colInfo.Flag)} } } } diff --git a/statistics/dump.go b/statistics/dump.go index 70a04e87b21c8..7e1fedd83b93d 100644 --- a/statistics/dump.go +++ b/statistics/dump.go @@ -216,6 +216,7 @@ func TableStatsFromJSON(tableInfo *model.TableInfo, physicalID int64, jsonTbl *J CMSketch: CMSketchFromProto(jsonCol.CMSketch), Info: colInfo, Count: count, + isHandle: tableInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.Flag), } tbl.Columns[col.ID] = col } diff --git a/statistics/handle.go b/statistics/handle.go index b014d863c97e5..0a612500d1e0b 100644 --- a/statistics/handle.go +++ b/statistics/handle.go @@ -258,7 +258,7 @@ func (h *Handle) LoadNeededHistograms() error { if err != nil { return errors.Trace(err) } - tbl.Columns[c.ID] = &Column{Histogram: *hg, Info: c.Info, CMSketch: cms, Count: int64(hg.totalRowCount())} + tbl.Columns[c.ID] = &Column{Histogram: *hg, Info: c.Info, CMSketch: cms, Count: int64(hg.totalRowCount()), isHandle: c.isHandle} h.UpdateTableStats([]*Table{tbl}, nil) histogramNeededColumns.delete(col) } diff --git a/statistics/histogram.go b/statistics/histogram.go index 56ae7c104bf35..f0c304d4da365 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -720,8 +720,9 @@ func (e *ErrorRate) merge(rate *ErrorRate) { type Column struct { Histogram *CMSketch - Count int64 - Info *model.ColumnInfo + Count int64 + Info *model.ColumnInfo + isHandle bool ErrorRate } diff --git a/statistics/selectivity.go b/statistics/selectivity.go index ef474455d98ea..f7e87a4913308 100644 --- a/statistics/selectivity.go +++ b/statistics/selectivity.go @@ -180,7 +180,7 @@ func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Exp return 0, errors.Trace(err) } sets = append(sets, &exprSet{tp: colType, ID: id, mask: maskCovered, ranges: ranges, numCols: 1}) - if mysql.HasPriKeyFlag(colInfo.Info.Flag) { + if colInfo.isHandle { sets[len(sets)-1].tp = pkType } } diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go index 0a2a4e13c07a9..128f87ec55c92 100644 --- a/statistics/selectivity_test.go +++ b/statistics/selectivity_test.go @@ -324,6 +324,23 @@ func (s *testSelectivitySuite) TestEstimationForUnknownValues(c *C) { c.Assert(count, Equals, 0.0) } +func (s *testSelectivitySuite) TestPrimaryKeySelectivity(c *C) { + testKit := testkit.NewTestKit(c, s.store) + testKit.MustExec("use test") + testKit.MustExec("drop table if exists t") + testKit.MustExec("create table t(a char(10) primary key, b int)") + testKit.MustQuery(`explain select * from t where a > "t"`).Check(testkit.Rows( + "IndexLookUp_10 3333.33 root ", + "├─IndexScan_8 3333.33 cop table:t, index:a, range:(\"t\",+inf], keep order:false, stats:pseudo", + "└─TableScan_9 3333.33 cop table:t, keep order:false, stats:pseudo")) + + testKit.MustExec("drop table t") + testKit.MustExec("create table t(a int primary key, b int)") + testKit.MustQuery(`explain select * from t where a > 1`).Check(testkit.Rows( + "TableReader_6 3333.33 root data:TableScan_5", + "└─TableScan_5 3333.33 cop table:t, range:(1,+inf], keep order:false, stats:pseudo")) +} + func BenchmarkSelectivity(b *testing.B) { c := &C{} s := &testSelectivitySuite{} diff --git a/statistics/table.go b/statistics/table.go index 1d05d1b284e6d..65c84a06499c6 100644 --- a/statistics/table.go +++ b/statistics/table.go @@ -188,6 +188,7 @@ func (h *Handle) columnStatsFromStorage(row chunk.Row, table *Table, tableInfo * Info: colInfo, Count: count + nullCount, ErrorRate: errorRate, + isHandle: tableInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.Flag), } break } @@ -206,6 +207,7 @@ func (h *Handle) columnStatsFromStorage(row chunk.Row, table *Table, tableInfo * CMSketch: cms, Count: int64(hg.totalRowCount()), ErrorRate: errorRate, + isHandle: tableInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.Flag), } break } @@ -610,7 +612,7 @@ func PseudoTable(tblInfo *model.TableInfo) *Table { } for _, col := range tblInfo.Columns { if col.State == model.StatePublic { - t.Columns[col.ID] = &Column{Info: col} + t.Columns[col.ID] = &Column{Info: col, isHandle: tblInfo.PKIsHandle && mysql.HasPriKeyFlag(col.Flag)} } } for _, idx := range tblInfo.Indices {