pingcap · ti-chi-bot · Aug 2, 2021 · Jul 14, 2021 · Jul 14, 2021 · Jul 14, 2021
diff --git a/planner/core/exhaust_physical_plans.go b/planner/core/exhaust_physical_plans.go
@@ -1251,7 +1251,7 @@ func (ijHelper *indexJoinBuildHelper) findUsefulEqAndInFilters(innerPlan *DataSo
 	var remainedEqOrIn []expression.Expression
 	// Extract the eq/in functions of possible join key.
 	// you can see the comment of ExtractEqAndInCondition to get the meaning of the second return value.
-	usefulEqOrInFilters, remainedEqOrIn, remainingRangeCandidates, _ = ranger.ExtractEqAndInCondition(
+	usefulEqOrInFilters, remainedEqOrIn, remainingRangeCandidates, _, _ = ranger.ExtractEqAndInCondition(
 		innerPlan.ctx, innerPlan.pushedDownConds,
 		ijHelper.curNotUsedIndexCols,
 		ijHelper.curNotUsedColLens,

diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go
@@ -415,10 +415,11 @@ func (ds *DataSource) tryToGetDualTask() (task, error) {
 
 // candidatePath is used to maintain required info for skyline pruning.
 type candidatePath struct {
-	path         *util.AccessPath
-	columnSet    *intsets.Sparse // columnSet is the set of columns that occurred in the access conditions.
-	isSingleScan bool
-	isMatchProp  bool
+	path               *util.AccessPath
+	accessCondsColSet  *intsets.Sparse // accessCondsColSet is the set of columns that occurred in the access conditions.
+	indexFiltersColSet *intsets.Sparse // indexFiltersColSet is the set of columns that occurred in the index filters.
+	isSingleScan       bool
+	isMatchProp        bool
 }
 
 // compareColumnSet will compares the two set. The last return value is used to indicate
@@ -451,18 +452,31 @@ func compareBool(l, r bool) int {
 	return 1
 }
 
+func compareIndexBack(lhs, rhs *candidatePath) (int, bool) {
+	result := compareBool(lhs.isSingleScan, rhs.isSingleScan)
+	if result == 0 && !lhs.isSingleScan {
+		// if both lhs and rhs need to access table after IndexScan, we use the set of columns that occurred in IndexFilters
+		// to compare how many table rows will be accessed.
+		return compareColumnSet(lhs.indexFiltersColSet, rhs.indexFiltersColSet)
+	}
+	return result, true
+}
+
 // compareCandidates is the core of skyline pruning. It compares the two candidate paths on three dimensions:
 // (1): the set of columns that occurred in the access condition,
 // (2): whether or not it matches the physical property
 // (3): does it require a double scan.
 // If `x` is not worse than `y` at all factors,
 // and there exists one factor that `x` is better than `y`, then `x` is better than `y`.
 func compareCandidates(lhs, rhs *candidatePath) int {
-	setsResult, comparable := compareColumnSet(lhs.columnSet, rhs.columnSet)
+	setsResult, comparable := compareColumnSet(lhs.accessCondsColSet, rhs.accessCondsColSet)
+	if !comparable {
+		return 0
+	}
+	scanResult, comparable := compareIndexBack(lhs, rhs)
 	if !comparable {
 		return 0
 	}
-	scanResult := compareBool(lhs.isSingleScan, rhs.isSingleScan)
 	matchResult := compareBool(lhs.isMatchProp, rhs.isMatchProp)
 	sum := setsResult + scanResult + matchResult
 	if setsResult >= 0 && scanResult >= 0 && matchResult >= 0 && sum > 0 {
@@ -474,52 +488,60 @@ func compareCandidates(lhs, rhs *candidatePath) int {
 	return 0
 }
 
-func (ds *DataSource) getTableCandidate(path *util.AccessPath, prop *property.PhysicalProperty) *candidatePath {
-	candidate := &candidatePath{path: path}
+func (ds *DataSource) isMatchProp(path *util.AccessPath, prop *property.PhysicalProperty) bool {
+	var isMatchProp bool
 	if path.IsIntHandlePath {
 		pkCol := ds.getPKIsHandleCol()
 		if len(prop.SortItems) == 1 && pkCol != nil {
-			candidate.isMatchProp = prop.SortItems[0].Col.Equal(nil, pkCol)
+			isMatchProp = prop.SortItems[0].Col.Equal(nil, pkCol)
 			if path.StoreType == kv.TiFlash {
-				candidate.isMatchProp = candidate.isMatchProp && !prop.SortItems[0].Desc
+				isMatchProp = isMatchProp && !prop.SortItems[0].Desc
 			}
 		}
-	} else {
-		all, _ := prop.AllSameOrder()
-		// When the prop is empty or `all` is false, `isMatchProp` is better to be `false` because
-		// it needs not to keep order for index scan.
-		if !prop.IsEmpty() && all {
-			for i, col := range path.IdxCols {
-				if col.Equal(nil, prop.SortItems[0].Col) {
-					candidate.isMatchProp = matchIndicesProp(path.IdxCols[i:], path.IdxColLens[i:], prop.SortItems)
+		return isMatchProp
+	}
+	// TODO: do we need to consider TiFlash here?
+	// TODO: check is it ok to cache the optimization?
+	all, _ := prop.AllSameOrder()
+	// When the prop is empty or `all` is false, `isMatchProp` is better to be `false` because
+	// it needs not to keep order for index scan.
+	if !prop.IsEmpty() && all && len(path.IdxCols) >= len(prop.SortItems) {
+		isMatchProp = true
+		i := 0
+		for _, sortItem := range prop.SortItems {
+			found := false
+			for ; i < len(path.IdxCols); i++ {
+				if path.IdxColLens[i] == types.UnspecifiedLength && sortItem.Col.Equal(nil, path.IdxCols[i]) {
+					found = true
+					i++
 					break
-				} else if i >= path.EqCondCount {
+				}
+				if path.EqualCols == nil || !path.EqualCols[i] {
 					break
 				}
 			}
+			if !found {
+				isMatchProp = false
+				break
+			}
 		}
 	}
-	candidate.columnSet = expression.ExtractColumnSet(path.AccessConds)
+	return isMatchProp
+}
+
+func (ds *DataSource) getTableCandidate(path *util.AccessPath, prop *property.PhysicalProperty) *candidatePath {
+	candidate := &candidatePath{path: path}
+	candidate.isMatchProp = ds.isMatchProp(path, prop)
+	candidate.accessCondsColSet = expression.ExtractColumnSet(path.AccessConds)
 	candidate.isSingleScan = true
 	return candidate
 }
 
 func (ds *DataSource) getIndexCandidate(path *util.AccessPath, prop *property.PhysicalProperty, isSingleScan bool) *candidatePath {
 	candidate := &candidatePath{path: path}
-	all, _ := prop.AllSameOrder()
-	// When the prop is empty or `all` is false, `isMatchProp` is better to be `false` because
-	// it needs not to keep order for index scan.
-	if !prop.IsEmpty() && all {
-		for i, col := range path.IdxCols {
-			if col.Equal(nil, prop.SortItems[0].Col) {
-				candidate.isMatchProp = matchIndicesProp(path.IdxCols[i:], path.IdxColLens[i:], prop.SortItems)
-				break
-			} else if i >= path.EqCondCount {
-				break
-			}
-		}
-	}
-	candidate.columnSet = expression.ExtractColumnSet(path.AccessConds)
+	candidate.isMatchProp = ds.isMatchProp(path, prop)
+	candidate.accessCondsColSet = expression.ExtractColumnSet(path.AccessConds)
+	candidate.indexFiltersColSet = expression.ExtractColumnSet(path.IndexFilters)
 	candidate.isSingleScan = isSingleScan
 	return candidate
 }

diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go
@@ -3952,3 +3952,26 @@ func (s *testIntegrationSerialSuite) TestSelectIgnoreTemporaryTableInView(c *C)
 	tk.MustQuery("select * from v5").Check(testkit.Rows("1 2", "3 4"))
 
 }
+
+// TestIsMatchProp is used to test https://github.com/pingcap/tidb/issues/26017.
+func (s *testIntegrationSuite) TestIsMatchProp(c *C) {
+	tk := testkit.NewTestKit(c, s.store)
+
+	tk.MustExec("use test")
+	tk.MustExec("drop table if exists t")
+	tk.MustExec("create table t(a int, b int, c int, d int, index idx_a_b_c(a, b, c), index idx_d_c_b_a(d, c, b, a))")
+
+	var input []string
+	var output []struct {
+		SQL  string
+		Plan []string
+	}
+	s.testData.GetTestCases(c, &input, &output)
+	for i, tt := range input {
+		s.testData.OnRecord(func() {
+			output[i].SQL = tt
+			output[i].Plan = s.testData.ConvertRowsToStrings(tk.MustQuery("explain format = 'brief' " + tt).Rows())
+		})
+		tk.MustQuery("explain format = 'brief' " + tt).Check(testkit.Rows(output[i].Plan...))
+	}
+}
diff --git a/planner/core/logical_plan_test.go b/planner/core/logical_plan_test.go
@@ -1694,12 +1694,28 @@ func (s *testPlanSuite) TestSkylinePruning(c *C) {
 		},
 		{
 			sql:    "select * from t where f > 1 and g > 1",
-			result: "PRIMARY_KEY,f,g,f_g",
+			result: "PRIMARY_KEY,g,f_g",
 		},
 		{
 			sql:    "select count(1) from t",
 			result: "PRIMARY_KEY,c_d_e,f,g,f_g,c_d_e_str,e_d_c_str_prefix",
 		},
+		{
+			sql:    "select * from t where f > 3 and g = 5",
+			result: "PRIMARY_KEY,g,f_g",
+		},
+		{
+			sql:    "select * from t where g = 5 order by f",
+			result: "PRIMARY_KEY,g,f_g",
+		},
+		{
+			sql:    "select * from t where d = 3 order by c, e",
+			result: "PRIMARY_KEY,c_d_e",
+		},
+		{
+			sql:    "select * from t where d = 1 and f > 1 and g > 1 order by c, e",
+			result: "PRIMARY_KEY,c_d_e,g,f_g",
+		},
 	}
 	ctx := context.TODO()
 	for i, tt := range tests {

diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go
@@ -675,6 +675,7 @@ func (ds *DataSource) deriveCommonHandleTablePathStats(path *util.AccessPath, co
 		path.EqCondCount = res.EqCondCount
 		path.EqOrInCondCount = res.EqOrInCount
 		path.IsDNFCond = res.IsDNFCond
+		path.EqualCols = res.EqualCols
 		path.CountAfterAccess, err = ds.tableStats.HistColl.GetRowCountByIndexRanges(sc, path.Index.ID, path.Ranges)
 		if err != nil {
 			return false, err
@@ -854,6 +855,7 @@ func (ds *DataSource) fillIndexPath(path *util.AccessPath, conds []expression.Ex
 		path.EqCondCount = res.EqCondCount
 		path.EqOrInCondCount = res.EqOrInCount
 		path.IsDNFCond = res.IsDNFCond
+		path.EqualCols = res.EqualCols
 		path.CountAfterAccess, err = ds.tableStats.HistColl.GetRowCountByIndexRanges(sc, path.Index.ID, path.Ranges)
 		if err != nil {
 			return err

diff --git a/planner/core/mock.go b/planner/core/mock.go
@@ -43,9 +43,9 @@ func newDateType() types.FieldType {
 
 // MockSignedTable is only used for plan related tests.
 func MockSignedTable() *model.TableInfo {
-	// column: a, b, c, d, e, c_str, d_str, e_str, f, g
+	// column: a, b, c, d, e, c_str, d_str, e_str, f, g, h, i_date
 	// PK: a
-	// indices: c_d_e, e, f, g, f_g, c_d_e_str, c_d_e_str_prefix
+	// indices: c_d_e, e, f, g, f_g, c_d_e_str, e_d_c_str_prefix
 	indices := []*model.IndexInfo{
 		{
 			Name: model.NewCIStr("c_d_e"),

diff --git a/planner/core/testdata/integration_suite_in.json b/planner/core/testdata/integration_suite_in.json
@@ -310,5 +310,12 @@
       "select sum(1) from s1",
       "select count(1) as cnt from s1 union select count(1) as cnt from s2"
     ]
+  },
+  {
+    "name": "TestIsMatchProp",
+    "cases": [
+      "select a, b, c from t where a > 3 and b = 4 order by a, c",
+      "select * from t where d = 1 and b = 2 order by c, a"
+    ]
   }
 ]
diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json
@@ -1636,5 +1636,26 @@
         ]
       }
     ]
+  },
+  {
+    "Name": "TestIsMatchProp",
+    "Cases": [
+      {
+        "SQL": "select a, b, c from t where a > 3 and b = 4 order by a, c",
+        "Plan": [
+          "IndexReader 3.33 root  index:Selection",
+          "└─Selection 3.33 cop[tikv]  eq(test.t.b, 4)",
+          "  └─IndexRangeScan 3333.33 cop[tikv] table:t, index:idx_a_b_c(a, b, c) range:(3,+inf], keep order:true, stats:pseudo"
+        ]
+      },
+      {
+        "SQL": "select * from t where d = 1 and b = 2 order by c, a",
+        "Plan": [
+          "IndexReader 0.01 root  index:Selection",
+          "└─Selection 0.01 cop[tikv]  eq(test.t.b, 2)",
+          "  └─IndexRangeScan 10.00 cop[tikv] table:t, index:idx_d_c_b_a(d, c, b, a) range:[1,1], keep order:true, stats:pseudo"
+        ]
+      }
+    ]
   }
 ]
diff --git a/planner/util/path.go b/planner/util/path.go
@@ -32,7 +32,9 @@ type AccessPath struct {
 	FullIdxColLens []int
 	IdxCols        []*expression.Column
 	IdxColLens     []int
-	Ranges         []*ranger.Range
+	// EqualCols indicates whether the column is constant under the given conditions for all index columns.
+	EqualCols []bool
+	Ranges    []*ranger.Range
 	// CountAfterAccess is the row count after we apply range seek and before we use other filter to filter data.
 	// For index merge path, CountAfterAccess is the row count after partial paths and before we apply table filters.
 	CountAfterAccess float64

diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go
@@ -254,7 +254,7 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi
 	)
 	res := &DetachRangeResult{}
 
-	accessConds, filterConds, newConditions, emptyRange := ExtractEqAndInCondition(d.sctx, conditions, d.cols, d.lengths)
+	accessConds, filterConds, newConditions, equalCols, emptyRange := ExtractEqAndInCondition(d.sctx, conditions, d.cols, d.lengths)
 	if emptyRange {
 		return res, nil
 	}
@@ -286,6 +286,7 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi
 	res.Ranges = ranges
 	res.AccessConds = accessConds
 	res.RemainedConds = filterConds
+	res.EqualCols = equalCols
 	if eqOrInCount == len(d.cols) || len(newConditions) == 0 {
 		res.RemainedConds = append(res.RemainedConds, newConditions...)
 		return res, nil
@@ -465,15 +466,17 @@ func allEqOrIn(expr expression.Expression) bool {
 // filters: filters is the part that some access conditions need to be evaluate again since it's only the prefix part of char column.
 // newConditions: We'll simplify the given conditions if there're multiple in conditions or eq conditions on the same column.
 //   e.g. if there're a in (1, 2, 3) and a in (2, 3, 4). This two will be combined to a in (2, 3) and pushed to newConditions.
+// equalCols: equalCols indicates whether the column is constant under the given conditions for all index columns.
 // bool: indicate whether there's nil range when merging eq and in conditions.
-func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Expression,
-	cols []*expression.Column, lengths []int) ([]expression.Expression, []expression.Expression, []expression.Expression, bool) {
+func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column,
+	lengths []int) ([]expression.Expression, []expression.Expression, []expression.Expression, []bool, bool) {
 	var filters []expression.Expression
 	rb := builder{sc: sctx.GetSessionVars().StmtCtx}
 	accesses := make([]expression.Expression, len(cols))
 	points := make([][]*point, len(cols))
 	mergedAccesses := make([]expression.Expression, len(cols))
 	newConditions := make([]expression.Expression, 0, len(conditions))
+	equalCols := make([]bool, len(cols))
 	offsets := make([]int, len(conditions))
 	for i, cond := range conditions {
 		offset := getPotentialEqOrInColOffset(cond, cols)
@@ -494,7 +497,7 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex
 		points[offset] = rb.intersection(points[offset], rb.build(cond))
 		// Early termination if false expression found
 		if len(points[offset]) == 0 {
-			return nil, nil, nil, true
+			return nil, nil, nil, nil, true
 		}
 	}
 	for i, ma := range mergedAccesses {
@@ -514,7 +517,7 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex
 			accesses[i] = nil
 		} else if len(points[i]) == 0 {
 			// Early termination if false expression found
-			return nil, nil, nil, true
+			return nil, nil, nil, nil, true
 		} else {
 			// All Intervals are single points
 			accesses[i] = points2EqOrInCond(sctx, points[i], cols[i])
@@ -527,6 +530,15 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex
 			newConditions = append(newConditions, conditions[i])
 		}
 	}
+	for i, cond := range accesses {
+		if f, ok := cond.(*expression.ScalarFunction); ok && (f.FuncName.L == ast.EQ || f.FuncName.L == ast.NullEQ) {
+			if _, ok := f.GetArgs()[0].(*expression.Column); ok {
+				equalCols[i] = true
+			} else if _, ok := f.GetArgs()[1].(*expression.Column); ok {
+				equalCols[i] = true
+			}
+		}
+	}
 	for i, cond := range accesses {
 		if cond == nil {
 			accesses = accesses[:i]
@@ -546,7 +558,7 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex
 	}
 	// We should remove all accessConds, so that they will not be added to filter conditions.
 	newConditions = removeAccessConditions(newConditions, accesses)
-	return accesses, filters, newConditions, false
+	return accesses, filters, newConditions, equalCols, false
 }
 
 // detachDNFCondAndBuildRangeForIndex will detach the index filters from table filters when it's a DNF.
@@ -619,6 +631,8 @@ type DetachRangeResult struct {
 	AccessConds []expression.Expression
 	// RemainedConds is the filter conditions which should be kept after access.
 	RemainedConds []expression.Expression
+	// EqualCols indicates whether the column is constant under the given conditions for all index columns.
+	EqualCols []bool
 	// EqCondCount is the number of equal conditions extracted.
 	EqCondCount int
 	// EqOrInCount is the number of equal/in conditions extracted.