Skip to content

Commit

Permalink
statistics: improve out-of-range estimation strategy (#26502)
Browse files Browse the repository at this point in the history
  • Loading branch information
time-and-fate authored Aug 2, 2021
1 parent d7bddb8 commit cb65b7a
Show file tree
Hide file tree
Showing 12 changed files with 594 additions and 231 deletions.
28 changes: 14 additions & 14 deletions cmd/explaintest/r/explain_complex_stats.result

Large diffs are not rendered by default.

18 changes: 9 additions & 9 deletions planner/core/testdata/analyze_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@
"SQL": "explain format = 'brief' select * from t where a <= 5 and b <= 5",
"RatioOfPseudoEstimate": 10,
"Plan": [
"TableReader 29.77 root data:Selection",
"└─Selection 29.77 cop[tikv] le(test.t.a, 5), le(test.t.b, 5)",
"TableReader 28.80 root data:Selection",
"└─Selection 28.80 cop[tikv] le(test.t.a, 5), le(test.t.b, 5)",
" └─TableFullScan 80.00 cop[tikv] table:t keep order:false"
]
},
Expand Down Expand Up @@ -454,18 +454,18 @@
{
"SQL": "explain format = 'brief' select * from t where a = 7639902",
"Plan": [
"IndexReader 6.68 root index:IndexRangeScan",
"└─IndexRangeScan 6.68 cop[tikv] table:t, index:PRIMARY(a, c, b) range:[7639902,7639902], keep order:false"
"IndexReader 5.95 root index:IndexRangeScan",
"└─IndexRangeScan 5.95 cop[tikv] table:t, index:PRIMARY(a, c, b) range:[7639902,7639902], keep order:false"
]
},
{
"SQL": "explain format = 'brief' select c, b from t where a = 7639902 order by b asc limit 6",
"Plan": [
"Projection 6.00 root test.t.c, test.t.b",
"└─TopN 6.00 root test.t.b, offset:0, count:6",
" └─IndexReader 6.00 root index:TopN",
" └─TopN 6.00 cop[tikv] test.t.b, offset:0, count:6",
" └─IndexRangeScan 6.68 cop[tikv] table:t, index:PRIMARY(a, c, b) range:[7639902,7639902], keep order:false"
"Projection 5.95 root test.t.c, test.t.b",
"└─TopN 5.95 root test.t.b, offset:0, count:6",
" └─IndexReader 5.95 root index:TopN",
" └─TopN 5.95 cop[tikv] test.t.b, offset:0, count:6",
" └─IndexRangeScan 5.95 cop[tikv] table:t, index:PRIMARY(a, c, b) range:[7639902,7639902], keep order:false"
]
}
]
Expand Down
10 changes: 1 addition & 9 deletions statistics/cmsketch.go
Original file line number Diff line number Diff line change
Expand Up @@ -530,14 +530,6 @@ func (c *TopN) Num() int {
return len(c.TopN)
}

// outOfRange checks whether the the given value falls back in [TopN.LowestOne, TopN.HighestOne].
func (c *TopN) outOfRange(val []byte) bool {
if c == nil || len(c.TopN) == 0 {
return true
}
return bytes.Compare(c.TopN[0].Encoded, val) > 0 || bytes.Compare(val, c.TopN[c.Num()-1].Encoded) > 0
}

// DecodedString returns the value with decoded result.
func (c *TopN) DecodedString(ctx sessionctx.Context, colTypes []byte) (string, error) {
builder := &strings.Builder{}
Expand Down Expand Up @@ -775,7 +767,7 @@ func MergePartTopN2GlobalTopN(sc *stmtctx.StatementContext, version int, topNs [
datum = d
}
// Get the row count which the value is equal to the encodedVal from histogram.
count := hists[j].equalRowCount(datum, isIndex)
count, _ := hists[j].equalRowCount(datum, isIndex)
if count != 0 {
counter[encodedVal] += count
// Remove the value corresponding to encodedVal from the histogram.
Expand Down
6 changes: 2 additions & 4 deletions statistics/handle/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -1254,12 +1254,10 @@ func (h *Handle) RecalculateExpectCount(q *statistics.QueryFeedback) error {
expected := 0.0
if isIndex {
idx := t.Indices[id]
expected, err = idx.GetRowCount(sc, nil, ranges, t.ModifyCount)
expected *= idx.GetIncreaseFactor(t.Count)
expected, err = idx.GetRowCount(sc, nil, ranges, t.Count)
} else {
c := t.Columns[id]
expected, err = c.GetColumnRowCount(sc, ranges, t.ModifyCount, true)
expected *= c.GetIncreaseFactor(t.Count)
expected, err = c.GetColumnRowCount(sc, ranges, t.Count, true)
}
q.Expected = int64(expected)
return err
Expand Down
4 changes: 2 additions & 2 deletions statistics/handle/update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1815,8 +1815,8 @@ func (s *testStatsSuite) TestAbnormalIndexFeedback(c *C) {
sql: "select * from t where a = 2 and b > 10",
hist: "column:2 ndv:20 totColSize:20\n" +
"num: 5 lower_bound: -9223372036854775808 upper_bound: 7 repeats: 0 ndv: 0\n" +
"num: 4 lower_bound: 7 upper_bound: 14 repeats: 0 ndv: 0\n" +
"num: 5 lower_bound: 14 upper_bound: 9223372036854775807 repeats: 0 ndv: 0",
"num: 6 lower_bound: 7 upper_bound: 14 repeats: 0 ndv: 0\n" +
"num: 8 lower_bound: 14 upper_bound: 9223372036854775807 repeats: 0 ndv: 0",
rangeID: tblInfo.Columns[1].ID,
idxID: tblInfo.Indices[0].ID,
eqCount: 3,
Expand Down
Loading

0 comments on commit cb65b7a

Please sign in to comment.