diff --git a/planner/core/exhaust_physical_plans.go b/planner/core/exhaust_physical_plans.go index d96a29a84e722..6eaa43d5a192b 100644 --- a/planner/core/exhaust_physical_plans.go +++ b/planner/core/exhaust_physical_plans.go @@ -1251,7 +1251,7 @@ func (ijHelper *indexJoinBuildHelper) findUsefulEqAndInFilters(innerPlan *DataSo var remainedEqOrIn []expression.Expression // Extract the eq/in functions of possible join key. // you can see the comment of ExtractEqAndInCondition to get the meaning of the second return value. - usefulEqOrInFilters, remainedEqOrIn, remainingRangeCandidates, _ = ranger.ExtractEqAndInCondition( + usefulEqOrInFilters, remainedEqOrIn, remainingRangeCandidates, _, _ = ranger.ExtractEqAndInCondition( innerPlan.ctx, innerPlan.pushedDownConds, ijHelper.curNotUsedIndexCols, ijHelper.curNotUsedColLens, diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 9f2999d9b6958..0d14a0b9c83be 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -414,10 +414,11 @@ func (ds *DataSource) tryToGetDualTask() (task, error) { // candidatePath is used to maintain required info for skyline pruning. type candidatePath struct { - path *util.AccessPath - columnSet *intsets.Sparse // columnSet is the set of columns that occurred in the access conditions. - isSingleScan bool - isMatchProp bool + path *util.AccessPath + accessCondsColSet *intsets.Sparse // accessCondsColSet is the set of columns that occurred in the access conditions. + indexFiltersColSet *intsets.Sparse // indexFiltersColSet is the set of columns that occurred in the index filters. + isSingleScan bool + isMatchProp bool } // compareColumnSet will compares the two set. The last return value is used to indicate @@ -450,6 +451,16 @@ func compareBool(l, r bool) int { return 1 } +func compareIndexBack(lhs, rhs *candidatePath) (int, bool) { + result := compareBool(lhs.isSingleScan, rhs.isSingleScan) + if result == 0 && !lhs.isSingleScan { + // if both lhs and rhs need to access table after IndexScan, we use the set of columns that occurred in IndexFilters + // to compare how many table rows will be accessed. + return compareColumnSet(lhs.indexFiltersColSet, rhs.indexFiltersColSet) + } + return result, true +} + // compareCandidates is the core of skyline pruning. It compares the two candidate paths on three dimensions: // (1): the set of columns that occurred in the access condition, // (2): whether or not it matches the physical property @@ -457,11 +468,14 @@ func compareBool(l, r bool) int { // If `x` is not worse than `y` at all factors, // and there exists one factor that `x` is better than `y`, then `x` is better than `y`. func compareCandidates(lhs, rhs *candidatePath) int { - setsResult, comparable := compareColumnSet(lhs.columnSet, rhs.columnSet) + setsResult, comparable := compareColumnSet(lhs.accessCondsColSet, rhs.accessCondsColSet) + if !comparable { + return 0 + } + scanResult, comparable := compareIndexBack(lhs, rhs) if !comparable { return 0 } - scanResult := compareBool(lhs.isSingleScan, rhs.isSingleScan) matchResult := compareBool(lhs.isMatchProp, rhs.isMatchProp) sum := setsResult + scanResult + matchResult if setsResult >= 0 && scanResult >= 0 && matchResult >= 0 && sum > 0 { @@ -473,52 +487,70 @@ func compareCandidates(lhs, rhs *candidatePath) int { return 0 } -func (ds *DataSource) getTableCandidate(path *util.AccessPath, prop *property.PhysicalProperty) *candidatePath { - candidate := &candidatePath{path: path} +func (ds *DataSource) isMatchProp(path *util.AccessPath, prop *property.PhysicalProperty) bool { + var isMatchProp bool if path.IsIntHandlePath { pkCol := ds.getPKIsHandleCol() if len(prop.SortItems) == 1 && pkCol != nil { - candidate.isMatchProp = prop.SortItems[0].Col.Equal(nil, pkCol) + isMatchProp = prop.SortItems[0].Col.Equal(nil, pkCol) if path.StoreType == kv.TiFlash { - candidate.isMatchProp = candidate.isMatchProp && !prop.SortItems[0].Desc + isMatchProp = isMatchProp && !prop.SortItems[0].Desc } } - } else { - all, _ := prop.AllSameOrder() - // When the prop is empty or `all` is false, `isMatchProp` is better to be `false` because - // it needs not to keep order for index scan. - if !prop.IsEmpty() && all { - for i, col := range path.IdxCols { - if col.Equal(nil, prop.SortItems[0].Col) { - candidate.isMatchProp = matchIndicesProp(path.IdxCols[i:], path.IdxColLens[i:], prop.SortItems) + return isMatchProp + } + all, _ := prop.AllSameOrder() + // When the prop is empty or `all` is false, `isMatchProp` is better to be `false` because + // it needs not to keep order for index scan. + + // Basically, if `prop.SortItems` is the prefix of `path.IdxCols`, then `isMatchProp` is true. However, we need to consider + // the situations when some columns of `path.IdxCols` are evaluated as constant. For example: + // ``` + // create table t(a int, b int, c int, d int, index idx_a_b_c(a, b, c), index idx_d_c_b_a(d, c, b, a)); + // select * from t where a = 1 order by b, c; + // select * from t where b = 1 order by a, c; + // select * from t where d = 1 and b = 2 order by c, a; + // select * from t where d = 1 and b = 2 order by c, b, a; + // ``` + // In the first two `SELECT` statements, `idx_a_b_c` matches the sort order. In the last two `SELECT` statements, `idx_d_c_b_a` + // matches the sort order. Hence, we use `path.ConstCols` to deal with the above situations. + if !prop.IsEmpty() && all && len(path.IdxCols) >= len(prop.SortItems) { + isMatchProp = true + i := 0 + for _, sortItem := range prop.SortItems { + found := false + for ; i < len(path.IdxCols); i++ { + if path.IdxColLens[i] == types.UnspecifiedLength && sortItem.Col.Equal(nil, path.IdxCols[i]) { + found = true + i++ break - } else if i >= path.EqCondCount { + } + if path.ConstCols == nil || i >= len(path.ConstCols) || !path.ConstCols[i] { break } } + if !found { + isMatchProp = false + break + } } } - candidate.columnSet = expression.ExtractColumnSet(path.AccessConds) + return isMatchProp +} + +func (ds *DataSource) getTableCandidate(path *util.AccessPath, prop *property.PhysicalProperty) *candidatePath { + candidate := &candidatePath{path: path} + candidate.isMatchProp = ds.isMatchProp(path, prop) + candidate.accessCondsColSet = expression.ExtractColumnSet(path.AccessConds) candidate.isSingleScan = true return candidate } func (ds *DataSource) getIndexCandidate(path *util.AccessPath, prop *property.PhysicalProperty, isSingleScan bool) *candidatePath { candidate := &candidatePath{path: path} - all, _ := prop.AllSameOrder() - // When the prop is empty or `all` is false, `isMatchProp` is better to be `false` because - // it needs not to keep order for index scan. - if !prop.IsEmpty() && all { - for i, col := range path.IdxCols { - if col.Equal(nil, prop.SortItems[0].Col) { - candidate.isMatchProp = matchIndicesProp(path.IdxCols[i:], path.IdxColLens[i:], prop.SortItems) - break - } else if i >= path.EqCondCount { - break - } - } - } - candidate.columnSet = expression.ExtractColumnSet(path.AccessConds) + candidate.isMatchProp = ds.isMatchProp(path, prop) + candidate.accessCondsColSet = expression.ExtractColumnSet(path.AccessConds) + candidate.indexFiltersColSet = expression.ExtractColumnSet(path.IndexFilters) candidate.isSingleScan = isSingleScan return candidate } diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go index b50ef06abb8d5..56927eb747b0a 100644 --- a/planner/core/integration_test.go +++ b/planner/core/integration_test.go @@ -3973,6 +3973,30 @@ func (s *testIntegrationSerialSuite) TestSelectIgnoreTemporaryTableInView(c *C) } +// TestIsMatchProp is used to test https://github.com/pingcap/tidb/issues/26017. +func (s *testIntegrationSuite) TestIsMatchProp(c *C) { + tk := testkit.NewTestKit(c, s.store) + + tk.MustExec("use test") + tk.MustExec("drop table if exists t1, t2") + tk.MustExec("create table t1(a int, b int, c int, d int, index idx_a_b_c(a, b, c))") + tk.MustExec("create table t2(a int, b int, c int, d int, index idx_a_b_c_d(a, b, c, d))") + + var input []string + var output []struct { + SQL string + Plan []string + } + s.testData.GetTestCases(c, &input, &output) + for i, tt := range input { + s.testData.OnRecord(func() { + output[i].SQL = tt + output[i].Plan = s.testData.ConvertRowsToStrings(tk.MustQuery("explain format = 'brief' " + tt).Rows()) + }) + tk.MustQuery("explain format = 'brief' " + tt).Check(testkit.Rows(output[i].Plan...)) + } +} + func (s *testIntegrationSerialSuite) TestIssue26250(c *C) { tk := testkit.NewTestKit(c, s.store) tk.MustExec("use test") diff --git a/planner/core/logical_plan_test.go b/planner/core/logical_plan_test.go index 599026a84d74f..6259bc73fdc0f 100644 --- a/planner/core/logical_plan_test.go +++ b/planner/core/logical_plan_test.go @@ -1694,12 +1694,28 @@ func (s *testPlanSuite) TestSkylinePruning(c *C) { }, { sql: "select * from t where f > 1 and g > 1", - result: "PRIMARY_KEY,f,g,f_g", + result: "PRIMARY_KEY,g,f_g", }, { sql: "select count(1) from t", result: "PRIMARY_KEY,c_d_e,f,g,f_g,c_d_e_str,e_d_c_str_prefix", }, + { + sql: "select * from t where f > 3 and g = 5", + result: "PRIMARY_KEY,g,f_g", + }, + { + sql: "select * from t where g = 5 order by f", + result: "PRIMARY_KEY,g,f_g", + }, + { + sql: "select * from t where d = 3 order by c, e", + result: "PRIMARY_KEY,c_d_e", + }, + { + sql: "select * from t where d = 1 and f > 1 and g > 1 order by c, e", + result: "PRIMARY_KEY,c_d_e,g,f_g", + }, } ctx := context.TODO() for i, tt := range tests { diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go index 7c2a7b8b2ad8f..45703caa4a828 100644 --- a/planner/core/logical_plans.go +++ b/planner/core/logical_plans.go @@ -675,6 +675,12 @@ func (ds *DataSource) deriveCommonHandleTablePathStats(path *util.AccessPath, co path.EqCondCount = res.EqCondCount path.EqOrInCondCount = res.EqOrInCount path.IsDNFCond = res.IsDNFCond + path.ConstCols = make([]bool, len(path.IdxCols)) + if res.ColumnValues != nil { + for i := range path.ConstCols { + path.ConstCols[i] = res.ColumnValues[i] != nil + } + } path.CountAfterAccess, err = ds.tableStats.HistColl.GetRowCountByIndexRanges(sc, path.Index.ID, path.Ranges) if err != nil { return false, err @@ -854,6 +860,12 @@ func (ds *DataSource) fillIndexPath(path *util.AccessPath, conds []expression.Ex path.EqCondCount = res.EqCondCount path.EqOrInCondCount = res.EqOrInCount path.IsDNFCond = res.IsDNFCond + path.ConstCols = make([]bool, len(path.IdxCols)) + if res.ColumnValues != nil { + for i := range path.ConstCols { + path.ConstCols[i] = res.ColumnValues[i] != nil + } + } path.CountAfterAccess, err = ds.tableStats.HistColl.GetRowCountByIndexRanges(sc, path.Index.ID, path.Ranges) if err != nil { return err diff --git a/planner/core/mock.go b/planner/core/mock.go index 42e6141980e90..eac3315fcdeac 100644 --- a/planner/core/mock.go +++ b/planner/core/mock.go @@ -43,9 +43,9 @@ func newDateType() types.FieldType { // MockSignedTable is only used for plan related tests. func MockSignedTable() *model.TableInfo { - // column: a, b, c, d, e, c_str, d_str, e_str, f, g + // column: a, b, c, d, e, c_str, d_str, e_str, f, g, h, i_date // PK: a - // indices: c_d_e, e, f, g, f_g, c_d_e_str, c_d_e_str_prefix + // indices: c_d_e, e, f, g, f_g, c_d_e_str, e_d_c_str_prefix indices := []*model.IndexInfo{ { Name: model.NewCIStr("c_d_e"), diff --git a/planner/core/testdata/integration_suite_in.json b/planner/core/testdata/integration_suite_in.json index bf2391065a86a..e7e9bb12e6001 100644 --- a/planner/core/testdata/integration_suite_in.json +++ b/planner/core/testdata/integration_suite_in.json @@ -310,5 +310,15 @@ "select sum(1) from s1", "select count(1) as cnt from s1 union select count(1) as cnt from s2" ] + }, + { + "name": "TestIsMatchProp", + "cases": [ + "select a, b, c from t1 where a > 3 and b = 4 order by a, c", + "select * from t2 where a = 1 and c = 2 order by b, d", + "select a, b, c from t1 where (a = 1 and b = 1 and c = 1) or (a = 1 and b = 1 and c = 2) order by c", + "select a, b, c from t1 where (a = 1 and b = 1 and c < 3) or (a = 1 and b = 1 and c > 6) order by c", + "select * from t2 where ((a = 1 and b = 1 and d < 3) or (a = 1 and b = 1 and d > 6)) and c = 3 order by d" + ] } ] diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json index 37330e65673c9..b93d2ef382647 100644 --- a/planner/core/testdata/integration_suite_out.json +++ b/planner/core/testdata/integration_suite_out.json @@ -1636,5 +1636,48 @@ ] } ] + }, + { + "Name": "TestIsMatchProp", + "Cases": [ + { + "SQL": "select a, b, c from t1 where a > 3 and b = 4 order by a, c", + "Plan": [ + "IndexReader 3.33 root index:Selection", + "└─Selection 3.33 cop[tikv] eq(test.t1.b, 4)", + " └─IndexRangeScan 3333.33 cop[tikv] table:t1, index:idx_a_b_c(a, b, c) range:(3,+inf], keep order:true, stats:pseudo" + ] + }, + { + "SQL": "select * from t2 where a = 1 and c = 2 order by b, d", + "Plan": [ + "IndexReader 0.01 root index:Selection", + "└─Selection 0.01 cop[tikv] eq(test.t2.c, 2)", + " └─IndexRangeScan 10.00 cop[tikv] table:t2, index:idx_a_b_c_d(a, b, c, d) range:[1,1], keep order:true, stats:pseudo" + ] + }, + { + "SQL": "select a, b, c from t1 where (a = 1 and b = 1 and c = 1) or (a = 1 and b = 1 and c = 2) order by c", + "Plan": [ + "IndexReader 0.03 root index:IndexRangeScan", + "└─IndexRangeScan 0.03 cop[tikv] table:t1, index:idx_a_b_c(a, b, c) range:[1 1 1,1 1 2], keep order:true, stats:pseudo" + ] + }, + { + "SQL": "select a, b, c from t1 where (a = 1 and b = 1 and c < 3) or (a = 1 and b = 1 and c > 6) order by c", + "Plan": [ + "IndexReader 0.67 root index:IndexRangeScan", + "└─IndexRangeScan 0.67 cop[tikv] table:t1, index:idx_a_b_c(a, b, c) range:[1 1 -inf,1 1 3), (1 1 6,1 1 +inf], keep order:true, stats:pseudo" + ] + }, + { + "SQL": "select * from t2 where ((a = 1 and b = 1 and d < 3) or (a = 1 and b = 1 and d > 6)) and c = 3 order by d", + "Plan": [ + "IndexReader 0.00 root index:Selection", + "└─Selection 0.00 cop[tikv] eq(test.t2.c, 3), or(and(eq(test.t2.a, 1), and(eq(test.t2.b, 1), lt(test.t2.d, 3))), and(eq(test.t2.a, 1), and(eq(test.t2.b, 1), gt(test.t2.d, 6))))", + " └─IndexRangeScan 10.00 cop[tikv] table:t2, index:idx_a_b_c_d(a, b, c, d) range:[1,1], keep order:true, stats:pseudo" + ] + } + ] } ] diff --git a/planner/util/path.go b/planner/util/path.go index f6fa0b47e0f51..10e994e998a22 100644 --- a/planner/util/path.go +++ b/planner/util/path.go @@ -32,7 +32,9 @@ type AccessPath struct { FullIdxColLens []int IdxCols []*expression.Column IdxColLens []int - Ranges []*ranger.Range + // ConstCols indicates whether the column is constant under the given conditions for all index columns. + ConstCols []bool + Ranges []*ranger.Range // CountAfterAccess is the row count after we apply range seek and before we use other filter to filter data. // For index merge path, CountAfterAccess is the row count after partial paths and before we apply table filters. CountAfterAccess float64 diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index f26e96c42d7f8..e3566a8119afa 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -185,43 +185,40 @@ func getPotentialEqOrInColOffset(expr expression.Expression, cols []*expression. // is totally composed of point range filters. // e.g, for input CNF expressions ((a,b) in ((1,1),(2,2))) and a > 1 and ((a,b,c) in (1,1,1),(2,2,2)) // ((a,b,c) in (1,1,1),(2,2,2)) would be extracted. -func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.Expression, cols []*expression.Column, lengths []int) (*DetachRangeResult, int, error) { +func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.Expression, cols []*expression.Column, lengths []int) (*DetachRangeResult, int, []*valueInfo, error) { if len(conds) < 2 { - return nil, -1, nil + return nil, -1, nil, nil } var r *DetachRangeResult + columnValues := make([]*valueInfo, len(cols)) maxNumCols := int(0) offset := int(-1) for i, cond := range conds { tmpConds := []expression.Expression{cond} colSets := expression.ExtractColumnSet(tmpConds) - origColNum := colSets.Len() - if origColNum == 0 { + if colSets.Len() == 0 { continue } - if l := len(cols); origColNum > l { - origColNum = l - } - currCols := cols[:origColNum] - currLengths := lengths[:origColNum] - res, err := DetachCondAndBuildRangeForIndex(sctx, tmpConds, currCols, currLengths) + res, err := DetachCondAndBuildRangeForIndex(sctx, tmpConds, cols, lengths) if err != nil { - return nil, -1, err + return nil, -1, nil, err } if len(res.Ranges) == 0 { - return &DetachRangeResult{}, -1, nil + return &DetachRangeResult{}, -1, nil, nil } + // take the union of the two columnValues + columnValues = unionColumnValues(columnValues, res.ColumnValues) if len(res.AccessConds) == 0 || len(res.RemainedConds) > 0 { continue } sameLens, allPoints := true, true numCols := int(0) - for i, ran := range res.Ranges { + for j, ran := range res.Ranges { if !ran.IsPoint(sctx.GetSessionVars().StmtCtx) { allPoints = false break } - if i == 0 { + if j == 0 { numCols = len(ran.LowVal) } else if numCols != len(ran.LowVal) { sameLens = false @@ -240,7 +237,24 @@ func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.E if r != nil { r.IsDNFCond = false } - return r, offset, nil + return r, offset, columnValues, nil +} + +func unionColumnValues(lhs, rhs []*valueInfo) []*valueInfo { + if lhs == nil { + return rhs + } + if rhs != nil { + for i, valInfo := range lhs { + if i >= len(rhs) { + break + } + if valInfo == nil && rhs[i] != nil { + lhs[i] = rhs[i] + } + } + } + return lhs } // detachCNFCondAndBuildRangeForIndex will detach the index filters from table filters. These conditions are connected with `and` @@ -254,7 +268,7 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi ) res := &DetachRangeResult{} - accessConds, filterConds, newConditions, emptyRange := ExtractEqAndInCondition(d.sctx, conditions, d.cols, d.lengths) + accessConds, filterConds, newConditions, columnValues, emptyRange := ExtractEqAndInCondition(d.sctx, conditions, d.cols, d.lengths) if emptyRange { return res, nil } @@ -286,6 +300,7 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi res.Ranges = ranges res.AccessConds = accessConds res.RemainedConds = filterConds + res.ColumnValues = columnValues if eqOrInCount == len(d.cols) || len(newConditions) == 0 { res.RemainedConds = append(res.RemainedConds, newConditions...) return res, nil @@ -296,15 +311,17 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi shouldReserve: d.lengths[eqOrInCount] != types.UnspecifiedLength, } if considerDNF { - pointRes, offset, err := extractIndexPointRangesForCNF(d.sctx, conditions, d.cols, d.lengths) + pointRes, offset, columnValues, err := extractIndexPointRangesForCNF(d.sctx, conditions, d.cols, d.lengths) if err != nil { return nil, err } + res.ColumnValues = unionColumnValues(res.ColumnValues, columnValues) if pointRes != nil { if len(pointRes.Ranges) == 0 { return &DetachRangeResult{}, nil } if len(pointRes.Ranges[0].LowVal) > eqOrInCount { + pointRes.ColumnValues = res.ColumnValues res = pointRes pointRanges = pointRes.Ranges eqOrInCount = len(res.Ranges[0].LowVal) @@ -460,20 +477,42 @@ func allEqOrIn(expr expression.Expression) bool { return false } +func extractValueInfo(expr expression.Expression) *valueInfo { + if f, ok := expr.(*expression.ScalarFunction); ok && (f.FuncName.L == ast.EQ || f.FuncName.L == ast.NullEQ) { + getValueInfo := func(c *expression.Constant) *valueInfo { + mutable := c.ParamMarker != nil || c.DeferredExpr != nil + var value *types.Datum + if !mutable { + value = &c.Value + } + return &valueInfo{mutable, value} + } + if c, ok := f.GetArgs()[0].(*expression.Constant); ok { + return getValueInfo(c) + } + if c, ok := f.GetArgs()[1].(*expression.Constant); ok { + return getValueInfo(c) + } + } + return nil +} + // ExtractEqAndInCondition will split the given condition into three parts by the information of index columns and their lengths. // accesses: The condition will be used to build range. // filters: filters is the part that some access conditions need to be evaluate again since it's only the prefix part of char column. // newConditions: We'll simplify the given conditions if there're multiple in conditions or eq conditions on the same column. // e.g. if there're a in (1, 2, 3) and a in (2, 3, 4). This two will be combined to a in (2, 3) and pushed to newConditions. +// columnValues: the constant column values for all index columns. columnValues[i] is nil if cols[i] is not constant. // bool: indicate whether there's nil range when merging eq and in conditions. -func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Expression, - cols []*expression.Column, lengths []int) ([]expression.Expression, []expression.Expression, []expression.Expression, bool) { +func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column, + lengths []int) ([]expression.Expression, []expression.Expression, []expression.Expression, []*valueInfo, bool) { var filters []expression.Expression rb := builder{sc: sctx.GetSessionVars().StmtCtx} accesses := make([]expression.Expression, len(cols)) points := make([][]*point, len(cols)) mergedAccesses := make([]expression.Expression, len(cols)) newConditions := make([]expression.Expression, 0, len(conditions)) + columnValues := make([]*valueInfo, len(cols)) offsets := make([]int, len(conditions)) for i, cond := range conditions { offset := getPotentialEqOrInColOffset(cond, cols) @@ -494,7 +533,7 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex points[offset] = rb.intersection(points[offset], rb.build(cond)) // Early termination if false expression found if len(points[offset]) == 0 { - return nil, nil, nil, true + return nil, nil, nil, nil, true } } for i, ma := range mergedAccesses { @@ -502,6 +541,7 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex if accesses[i] != nil { if allEqOrIn(accesses[i]) { newConditions = append(newConditions, accesses[i]) + columnValues[i] = extractValueInfo(accesses[i]) } else { accesses[i] = nil } @@ -514,11 +554,16 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex accesses[i] = nil } else if len(points[i]) == 0 { // Early termination if false expression found - return nil, nil, nil, true + return nil, nil, nil, nil, true } else { // All Intervals are single points accesses[i] = points2EqOrInCond(sctx, points[i], cols[i]) newConditions = append(newConditions, accesses[i]) + if f, ok := accesses[i].(*expression.ScalarFunction); ok && f.FuncName.L == ast.EQ { + // Actually the constant column value may not be mutable. Here we assume it is mutable to keep it simple. + // Maybe we can improve it later. + columnValues[i] = &valueInfo{mutable: true} + } sctx.GetSessionVars().StmtCtx.OptimDependOnMutableConst = true } } @@ -546,12 +591,12 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex } // We should remove all accessConds, so that they will not be added to filter conditions. newConditions = removeAccessConditions(newConditions, accesses) - return accesses, filters, newConditions, false + return accesses, filters, newConditions, columnValues, false } // detachDNFCondAndBuildRangeForIndex will detach the index filters from table filters when it's a DNF. // We will detach the conditions of every DNF items, then compose them to a DNF. -func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression.ScalarFunction, newTpSlice []*types.FieldType) ([]*Range, []expression.Expression, bool, error) { +func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression.ScalarFunction, newTpSlice []*types.FieldType) ([]*Range, []expression.Expression, []*valueInfo, bool, error) { sc := d.sctx.GetSessionVars().StmtCtx firstColumnChecker := &conditionChecker{ colUniqueID: d.cols[0].UniqueID, @@ -562,26 +607,46 @@ func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression dnfItems := expression.FlattenDNFConditions(condition) newAccessItems := make([]expression.Expression, 0, len(dnfItems)) var totalRanges []*Range + columnValues := make([]*valueInfo, len(d.cols)) hasResidual := false - for _, item := range dnfItems { + for i, item := range dnfItems { if sf, ok := item.(*expression.ScalarFunction); ok && sf.FuncName.L == ast.LogicAnd { cnfItems := expression.FlattenCNFConditions(sf) var accesses, filters []expression.Expression res, err := d.detachCNFCondAndBuildRangeForIndex(cnfItems, newTpSlice, true) if err != nil { - return nil, nil, false, nil + return nil, nil, nil, false, nil } ranges := res.Ranges accesses = res.AccessConds filters = res.RemainedConds if len(accesses) == 0 { - return FullRange(), nil, true, nil + return FullRange(), nil, nil, true, nil } if len(filters) > 0 { hasResidual = true } totalRanges = append(totalRanges, ranges...) newAccessItems = append(newAccessItems, expression.ComposeCNFCondition(d.sctx, accesses...)) + if res.ColumnValues != nil { + if i == 0 { + columnValues = res.ColumnValues + } else { + // take the intersection of the two columnValues + for j, valInfo := range columnValues { + if valInfo == nil { + continue + } + sameValue, err := isSameValue(d.sctx.GetSessionVars().StmtCtx, valInfo, res.ColumnValues[j]) + if err != nil { + return nil, nil, nil, false, errors.Trace(err) + } + if !sameValue { + columnValues[j] = nil + } + } + } + } } else if firstColumnChecker.check(item) { if firstColumnChecker.shouldReserve { hasResidual = true @@ -590,12 +655,24 @@ func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression points := rb.build(item) ranges, err := points2Ranges(sc, points, newTpSlice[0]) if err != nil { - return nil, nil, false, errors.Trace(err) + return nil, nil, nil, false, errors.Trace(err) } totalRanges = append(totalRanges, ranges...) newAccessItems = append(newAccessItems, item) + if i == 0 { + columnValues[0] = extractValueInfo(item) + } else if columnValues[0] != nil { + valInfo := extractValueInfo(item) + sameValue, err := isSameValue(d.sctx.GetSessionVars().StmtCtx, columnValues[0], valInfo) + if err != nil { + return nil, nil, nil, false, errors.Trace(err) + } + if !sameValue { + columnValues[0] = nil + } + } } else { - return FullRange(), nil, true, nil + return FullRange(), nil, nil, true, nil } } @@ -605,10 +682,32 @@ func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression } totalRanges, err := UnionRanges(sc, totalRanges, d.mergeConsecutive) if err != nil { - return nil, nil, false, errors.Trace(err) + return nil, nil, nil, false, errors.Trace(err) } - return totalRanges, []expression.Expression{expression.ComposeDNFCondition(d.sctx, newAccessItems...)}, hasResidual, nil + return totalRanges, []expression.Expression{expression.ComposeDNFCondition(d.sctx, newAccessItems...)}, columnValues, hasResidual, nil +} + +// valueInfo is used for recording the constant column value in DetachCondAndBuildRangeForIndex. +type valueInfo struct { + mutable bool // If true, the constant column value depends on mutable constant. + value *types.Datum // If not mutable, value is the constant column value. Otherwise value is nil. +} + +func isSameValue(sc *stmtctx.StatementContext, lhs, rhs *valueInfo) (bool, error) { + // We assume `lhs` and `rhs` are not the same when either `lhs` or `rhs` is mutable to keep it simple. If we consider + // mutable valueInfo, we need to set `sc.OptimDependOnMutableConst = true`, which makes the plan not able to be cached. + // On the other hand, the equal condition may not be used for optimization. Hence we simply regard mutable valueInfos different + // from others. Maybe we can improve it later. + // TODO: is `lhs.value.Kind() != rhs.value.Kind()` necessary? + if lhs == nil || rhs == nil || lhs.mutable || rhs.mutable || lhs.value.Kind() != rhs.value.Kind() { + return false, nil + } + cmp, err := lhs.value.CompareDatum(sc, rhs.value) + if err != nil { + return false, err + } + return cmp == 0, nil } // DetachRangeResult wraps up results when detaching conditions and builing ranges. @@ -619,6 +718,9 @@ type DetachRangeResult struct { AccessConds []expression.Expression // RemainedConds is the filter conditions which should be kept after access. RemainedConds []expression.Expression + // ColumnValues records the constant column values for all index columns. + // For the ith column, if it is evaluated as constant, ColumnValues[i] is its value. Otherwise ColumnValues[i] is nil. + ColumnValues []*valueInfo // EqCondCount is the number of equal conditions extracted. EqCondCount int // EqOrInCount is the number of equal/in conditions extracted. @@ -657,12 +759,13 @@ func (d *rangeDetacher) detachCondAndBuildRangeForCols() (*DetachRangeResult, er } if len(d.allConds) == 1 { if sf, ok := d.allConds[0].(*expression.ScalarFunction); ok && sf.FuncName.L == ast.LogicOr { - ranges, accesses, hasResidual, err := d.detachDNFCondAndBuildRangeForIndex(sf, newTpSlice) + ranges, accesses, columnValues, hasResidual, err := d.detachDNFCondAndBuildRangeForIndex(sf, newTpSlice) if err != nil { return res, errors.Trace(err) } res.Ranges = ranges res.AccessConds = accesses + res.ColumnValues = columnValues res.IsDNFCond = true // If this DNF have something cannot be to calculate range, then all this DNF should be pushed as filter condition. if hasResidual {