diff --git a/planner/core/indexmerge_path.go b/planner/core/indexmerge_path.go index caa6b209aab4b..cf2b83c3ebe20 100644 --- a/planner/core/indexmerge_path.go +++ b/planner/core/indexmerge_path.go @@ -491,6 +491,75 @@ func (ds *DataSource) generateAndPruneIndexMergePath(indexMergeConds []expressio return nil } +// generateIndexMergeOnDNF4MVIndex generates IndexMerge paths for MVIndex upon DNF filters. +/* + select * from t where ((1 member of (a) and b=1) or (2 member of (a) and b=2)) and (c > 10) + IndexMerge(OR) + IndexRangeScan(a, b, [1 1, 1 1]) + IndexRangeScan(a, b, [2 2, 2 2]) + Selection(c > 10) + TableRowIdScan(t) + Two limitations now: + 1). all filters in the DNF have to be used as access-filters: ((1 member of (a)) or (2 member of (a)) or b > 10) cannot be used to access the MVIndex. + 2). cannot support json_contains: (json_contains(a, '[1, 2]') or json_contains(a, '[3, 4]')) is not supported since a single IndexMerge cannot represent this SQL. +*/ +func (ds *DataSource) generateIndexMergeOnDNF4MVIndex(normalPathCnt int, filters []expression.Expression) (mvIndexPaths []*util.AccessPath, err error) { + for idx := 0; idx < normalPathCnt; idx++ { + if ds.possibleAccessPaths[idx].IsTablePath() || ds.possibleAccessPaths[idx].Index == nil || !ds.possibleAccessPaths[idx].Index.MVIndex { + continue // not a MVIndex path + } + + idxCols, ok := ds.prepareCols4MVIndex(ds.possibleAccessPaths[idx].Index) + if !ok { + continue + } + + for current, filter := range filters { + sf, ok := filter.(*expression.ScalarFunction) + if !ok || sf.FuncName.L != ast.LogicOr { + continue + } + dnfFilters := expression.FlattenDNFConditions(sf) // [(1 member of (a) and b=1), (2 member of (a) and b=2)] + + // build partial paths for each dnf filter + cannotFit := false + var partialPaths []*util.AccessPath + for _, dnfFilter := range dnfFilters { + mvIndexFilters := []expression.Expression{dnfFilter} + if sf, ok := dnfFilter.(*expression.ScalarFunction); ok && sf.FuncName.L == ast.LogicAnd { + mvIndexFilters = expression.FlattenCNFConditions(sf) // (1 member of (a) and b=1) --> [(1 member of (a)), b=1] + } + + accessFilters, remainingFilters := ds.collectFilters4MVIndex(mvIndexFilters, idxCols) + if len(accessFilters) == 0 || len(remainingFilters) > 0 { // limitation 1 + cannotFit = true + break + } + paths, isIntersection, ok, err := ds.buildPartialPaths4MVIndex(accessFilters, idxCols, ds.possibleAccessPaths[idx].Index) + if err != nil { + return nil, err + } + if isIntersection || !ok { // limitation 2 + cannotFit = true + break + } + partialPaths = append(partialPaths, paths...) + } + if cannotFit { + continue + } + + var remainingFilters []expression.Expression + remainingFilters = append(remainingFilters, filters[:current]...) + remainingFilters = append(remainingFilters, filters[current+1:]...) + + indexMergePath := ds.buildPartialPathUp4MVIndex(partialPaths, false, remainingFilters) + mvIndexPaths = append(mvIndexPaths, indexMergePath) + } + } + return +} + // generateIndexMergeJSONMVIndexPath generates paths for (json_member_of / json_overlaps / json_contains) on multi-valued index. /* 1. select * from t where 1 member of (a) @@ -511,6 +580,12 @@ func (ds *DataSource) generateAndPruneIndexMergePath(indexMergeConds []expressio TableRowIdScan(t) */ func (ds *DataSource) generateIndexMerge4MVIndex(normalPathCnt int, filters []expression.Expression) (mvIndexPaths []*util.AccessPath, err error) { + dnfMVIndexPaths, err := ds.generateIndexMergeOnDNF4MVIndex(normalPathCnt, filters) + if err != nil { + return nil, err + } + mvIndexPaths = append(mvIndexPaths, dnfMVIndexPaths...) + for idx := 0; idx < normalPathCnt; idx++ { if ds.possibleAccessPaths[idx].IsTablePath() || ds.possibleAccessPaths[idx].Index == nil || !ds.possibleAccessPaths[idx].Index.MVIndex { continue // not a MVIndex path @@ -526,34 +601,45 @@ func (ds *DataSource) generateIndexMerge4MVIndex(normalPathCnt int, filters []ex continue } - partialPaths, isIntersection, err := ds.buildPartialPaths4MVIndex(accessFilters, idxCols, ds.possibleAccessPaths[idx].Index) + partialPaths, isIntersection, ok, err := ds.buildPartialPaths4MVIndex(accessFilters, idxCols, ds.possibleAccessPaths[idx].Index) if err != nil { return nil, err } - - indexMergePath := &util.AccessPath{PartialIndexPaths: partialPaths} - indexMergePath.IndexMergeIsIntersection = isIntersection - indexMergePath.TableFilters = remainingFilters - - // TODO: use a naive estimation strategy here now for simplicity, make it more accurate. - minEstRows, maxEstRows := math.MaxFloat64, -1.0 - for _, p := range indexMergePath.PartialIndexPaths { - minEstRows = math.Min(minEstRows, p.CountAfterAccess) - maxEstRows = math.Max(maxEstRows, p.CountAfterAccess) - } - if indexMergePath.IndexMergeIsIntersection { - indexMergePath.CountAfterAccess = minEstRows - } else { - indexMergePath.CountAfterAccess = maxEstRows + if !ok { + continue } - mvIndexPaths = append(mvIndexPaths, indexMergePath) + mvIndexPaths = append(mvIndexPaths, ds.buildPartialPathUp4MVIndex(partialPaths, isIntersection, remainingFilters)) } return } +// buildPartialPathUp4MVIndex builds these partial paths up to a complete index merge path. +func (ds *DataSource) buildPartialPathUp4MVIndex(partialPaths []*util.AccessPath, isIntersection bool, remainingFilters []expression.Expression) *util.AccessPath { + indexMergePath := &util.AccessPath{PartialIndexPaths: partialPaths} + indexMergePath.IndexMergeIsIntersection = isIntersection + indexMergePath.TableFilters = remainingFilters + + // TODO: use a naive estimation strategy here now for simplicity, make it more accurate. + minEstRows, maxEstRows := math.MaxFloat64, -1.0 + for _, p := range indexMergePath.PartialIndexPaths { + minEstRows = math.Min(minEstRows, p.CountAfterAccess) + maxEstRows = math.Max(maxEstRows, p.CountAfterAccess) + } + if indexMergePath.IndexMergeIsIntersection { + indexMergePath.CountAfterAccess = minEstRows + } else { + indexMergePath.CountAfterAccess = maxEstRows + } + return indexMergePath +} + +// buildPartialPaths4MVIndex builds partial paths by using these accessFilters upon this MVIndex. +// The accessFilters must be corresponding to these idxCols. +// OK indicates whether it builds successfully. These partial paths should be ignored if ok==false. func (ds *DataSource) buildPartialPaths4MVIndex(accessFilters []expression.Expression, - idxCols []*expression.Column, mvIndex *model.IndexInfo) ([]*util.AccessPath, bool, error) { + idxCols []*expression.Column, mvIndex *model.IndexInfo) ( + partialPaths []*util.AccessPath, isIntersection bool, ok bool, err error) { var virColID = -1 for i := range idxCols { if idxCols[i].VirtualExpr != nil { @@ -562,39 +648,38 @@ func (ds *DataSource) buildPartialPaths4MVIndex(accessFilters []expression.Expre } } if virColID == -1 { // unexpected, no vir-col on this MVIndex - return nil, false, nil + return nil, false, false, nil } if len(accessFilters) <= virColID { // no filter related to the vir-col, build a partial path directly. partialPath, ok, err := ds.buildPartialPath4MVIndex(accessFilters, idxCols, mvIndex) - return []*util.AccessPath{partialPath}, ok, err + return []*util.AccessPath{partialPath}, false, ok, err } virCol := idxCols[virColID] jsonType := virCol.GetType().ArrayType() targetJSONPath, ok := unwrapJSONCast(virCol.VirtualExpr) if !ok { - return nil, false, nil + return nil, false, false, nil } // extract values related to this vir-col, for example, extract [1, 2] from `json_contains(j, '[1, 2]')` var virColVals []expression.Expression - var isIntersection bool sf, ok := accessFilters[virColID].(*expression.ScalarFunction) if !ok { - return nil, false, nil + return nil, false, false, nil } switch sf.FuncName.L { case ast.JSONMemberOf: // (1 member of a->'$.zip') v, ok := unwrapJSONCast(sf.GetArgs()[0]) // cast(1 as json) --> 1 if !ok { - return nil, false, nil + return nil, false, false, nil } virColVals = append(virColVals, v) case ast.JSONContains: // (json_contains(a->'$.zip', '[1, 2, 3]') isIntersection = true virColVals, ok = jsonArrayExpr2Exprs(ds.ctx, sf.GetArgs()[1], jsonType) if !ok { - return nil, false, nil + return nil, false, false, nil } case ast.JSONOverlaps: // (json_overlaps(a->'$.zip', '[1, 2, 3]') var jsonPathIdx int @@ -603,33 +688,32 @@ func (ds *DataSource) buildPartialPaths4MVIndex(accessFilters []expression.Expre } else if sf.GetArgs()[1].Equal(ds.ctx, targetJSONPath) { jsonPathIdx = 1 // (json_overlaps('[1, 2, 3]', a->'$.zip') } else { - return nil, false, nil + return nil, false, false, nil } var ok bool virColVals, ok = jsonArrayExpr2Exprs(ds.ctx, sf.GetArgs()[1-jsonPathIdx], jsonType) if !ok { - return nil, false, nil + return nil, false, false, nil } default: - return nil, false, nil + return nil, false, false, nil } - partialPaths := make([]*util.AccessPath, 0, len(virColVals)) for _, v := range virColVals { // rewrite json functions to EQ to calculate range, `(1 member of j)` -> `j=1`. eq, err := expression.NewFunction(ds.ctx, ast.EQ, types.NewFieldType(mysql.TypeTiny), virCol, v) if err != nil { - return nil, false, err + return nil, false, false, err } accessFilters[virColID] = eq partialPath, ok, err := ds.buildPartialPath4MVIndex(accessFilters, idxCols, mvIndex) if !ok || err != nil { - return nil, ok, err + return nil, false, ok, err } partialPaths = append(partialPaths, partialPath) } - return partialPaths, isIntersection, nil + return partialPaths, isIntersection, true, nil } // buildPartialPath4MVIndex builds a partial path on this MVIndex with these accessFilters. diff --git a/planner/core/indexmerge_path_test.go b/planner/core/indexmerge_path_test.go index b825104d9fdb8..841a94f093d4a 100644 --- a/planner/core/indexmerge_path_test.go +++ b/planner/core/indexmerge_path_test.go @@ -56,6 +56,34 @@ index j1((cast(j1 as signed array))))`) } } +func TestDNFOnMVIndex(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec(`create table t(a int, b int, c int, j json, +index idx1((cast(j as signed array))), +index idx2(a, b, (cast(j as signed array)), c))`) + + var input []string + var output []struct { + SQL string + Plan []string + } + planSuiteData := core.GetIndexMergeSuiteData() + planSuiteData.LoadTestCases(t, &input, &output) + + for i, query := range input { + testdata.OnRecord(func() { + output[i].SQL = query + }) + result := tk.MustQuery("explain format = 'brief' " + query) + testdata.OnRecord(func() { + output[i].Plan = testdata.ConvertRowsToStrings(result.Rows()) + }) + result.Check(testkit.Rows(output[i].Plan...)) + } +} + func TestCompositeMVIndex(t *testing.T) { store := testkit.CreateMockStore(t) tk := testkit.NewTestKit(t, store) diff --git a/planner/core/testdata/index_merge_suite_in.json b/planner/core/testdata/index_merge_suite_in.json index c0f63bdd7bd3f..2883e51c29736 100644 --- a/planner/core/testdata/index_merge_suite_in.json +++ b/planner/core/testdata/index_merge_suite_in.json @@ -58,6 +58,19 @@ "select /*+ use_index_merge(t, idx2) */ * from t where a=1" ] }, + { + "name": "TestDNFOnMVIndex", + "cases": [ + "select /*+ use_index_merge(t, idx1) */ * from t where (1 member of (j)) or (2 member of (j))", + "select /*+ use_index_merge(t, idx1) */ * from t where ((1 member of (j)) or (2 member of (j))) and (a > 10)", + "select /*+ use_index_merge(t, idx1) */ * from t where (json_overlaps(j, '[1, 2]')) or (json_overlaps(j, '[3, 4]'))", + "select /*+ use_index_merge(t, idx1) */ * from t where ((json_overlaps(j, '[1, 2]')) or (json_overlaps(j, '[3, 4]'))) and (a > 10)", + "select /*+ use_index_merge(t, idx1) */ * from t where (json_contains(j, '[1, 2]')) or (json_contains(j, '[3, 4]'))", + "select /*+ use_index_merge(t, idx2) */ * from t where (a=1 and b=2 and (3 member of (j))) or (a=11 and b=12 and (13 member of (j)))", + "select /*+ use_index_merge(t, idx2) */ * from t where (a=1 and b=2 and (3 member of (j))) or (a=11 and b=12 and (13 member of (j)) and c=14)", + "select /*+ use_index_merge(t, idx2) */ * from t where ((a=1 and b=2 and (3 member of (j))) or (a=11 and b=12 and (13 member of (j)))) and (c > 10)" + ] + }, { "name": "TestMVIndexSelection", "cases": [ diff --git a/planner/core/testdata/index_merge_suite_out.json b/planner/core/testdata/index_merge_suite_out.json index 3988d8323f9c5..ffabf6f66b7d3 100644 --- a/planner/core/testdata/index_merge_suite_out.json +++ b/planner/core/testdata/index_merge_suite_out.json @@ -450,7 +450,7 @@ { "SQL": "select /*+ use_index_merge(t, idx) */ * from t where a=1 and b=2", "Plan": [ - "IndexMerge 0.10 root type: intersection", + "IndexMerge 0.10 root type: union", "├─IndexRangeScan(Build) 0.10 cop[tikv] table:t, index:idx(a, b, cast(`j` as signed array), c) range:[1 2,1 2], keep order:false, stats:pseudo", "└─TableRowIDScan(Probe) 0.10 cop[tikv] table:t keep order:false, stats:pseudo" ] @@ -458,7 +458,7 @@ { "SQL": "select /*+ use_index_merge(t, idx) */ * from t where a=1", "Plan": [ - "IndexMerge 10.00 root type: intersection", + "IndexMerge 10.00 root type: union", "├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:idx(a, b, cast(`j` as signed array), c) range:[1,1], keep order:false, stats:pseudo", "└─TableRowIDScan(Probe) 10.00 cop[tikv] table:t keep order:false, stats:pseudo" ] @@ -484,7 +484,7 @@ { "SQL": "select /*+ use_index_merge(t, idx2) */ * from t where a=1 and b=2", "Plan": [ - "IndexMerge 0.10 root type: intersection", + "IndexMerge 0.10 root type: union", "├─IndexRangeScan(Build) 0.10 cop[tikv] table:t, index:idx(a, b, cast(`j` as signed array), c) range:[1 2,1 2], keep order:false, stats:pseudo", "└─TableRowIDScan(Probe) 0.10 cop[tikv] table:t keep order:false, stats:pseudo" ] @@ -492,13 +492,103 @@ { "SQL": "select /*+ use_index_merge(t, idx2) */ * from t where a=1", "Plan": [ - "IndexMerge 10.00 root type: intersection", + "IndexMerge 10.00 root type: union", "├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:idx(a, b, cast(`j` as signed array), c) range:[1,1], keep order:false, stats:pseudo", "└─TableRowIDScan(Probe) 10.00 cop[tikv] table:t keep order:false, stats:pseudo" ] } ] }, + { + "Name": "TestDNFOnMVIndex", + "Cases": [ + { + "SQL": "select /*+ use_index_merge(t, idx1) */ * from t where (1 member of (j)) or (2 member of (j))", + "Plan": [ + "Selection 8.00 root or(json_memberof(cast(1, json BINARY), test.t.j), json_memberof(cast(2, json BINARY), test.t.j))", + "└─IndexMerge 10.00 root type: union", + " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:idx1(cast(`j` as signed array)) range:[1,1], keep order:false, stats:pseudo", + " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:idx1(cast(`j` as signed array)) range:[2,2], keep order:false, stats:pseudo", + " └─TableRowIDScan(Probe) 10.00 cop[tikv] table:t keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select /*+ use_index_merge(t, idx1) */ * from t where ((1 member of (j)) or (2 member of (j))) and (a > 10)", + "Plan": [ + "Selection 8.00 root or(json_memberof(cast(1, json BINARY), test.t.j), json_memberof(cast(2, json BINARY), test.t.j))", + "└─IndexMerge 3.33 root type: union", + " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:idx1(cast(`j` as signed array)) range:[1,1], keep order:false, stats:pseudo", + " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:idx1(cast(`j` as signed array)) range:[2,2], keep order:false, stats:pseudo", + " └─Selection(Probe) 3.33 cop[tikv] gt(test.t.a, 10)", + " └─TableRowIDScan 10.00 cop[tikv] table:t keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select /*+ use_index_merge(t, idx1) */ * from t where (json_overlaps(j, '[1, 2]')) or (json_overlaps(j, '[3, 4]'))", + "Plan": [ + "Selection 8.00 root or(json_overlaps(test.t.j, cast(\"[1, 2]\", json BINARY)), json_overlaps(test.t.j, cast(\"[3, 4]\", json BINARY)))", + "└─IndexMerge 10.00 root type: union", + " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:idx1(cast(`j` as signed array)) range:[1,1], keep order:false, stats:pseudo", + " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:idx1(cast(`j` as signed array)) range:[2,2], keep order:false, stats:pseudo", + " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:idx1(cast(`j` as signed array)) range:[3,3], keep order:false, stats:pseudo", + " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:idx1(cast(`j` as signed array)) range:[4,4], keep order:false, stats:pseudo", + " └─TableRowIDScan(Probe) 10.00 cop[tikv] table:t keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select /*+ use_index_merge(t, idx1) */ * from t where ((json_overlaps(j, '[1, 2]')) or (json_overlaps(j, '[3, 4]'))) and (a > 10)", + "Plan": [ + "Selection 8.00 root or(json_overlaps(test.t.j, cast(\"[1, 2]\", json BINARY)), json_overlaps(test.t.j, cast(\"[3, 4]\", json BINARY)))", + "└─IndexMerge 3.33 root type: union", + " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:idx1(cast(`j` as signed array)) range:[1,1], keep order:false, stats:pseudo", + " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:idx1(cast(`j` as signed array)) range:[2,2], keep order:false, stats:pseudo", + " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:idx1(cast(`j` as signed array)) range:[3,3], keep order:false, stats:pseudo", + " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:idx1(cast(`j` as signed array)) range:[4,4], keep order:false, stats:pseudo", + " └─Selection(Probe) 3.33 cop[tikv] gt(test.t.a, 10)", + " └─TableRowIDScan 10.00 cop[tikv] table:t keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select /*+ use_index_merge(t, idx1) */ * from t where (json_contains(j, '[1, 2]')) or (json_contains(j, '[3, 4]'))", + "Plan": [ + "TableReader 9600.00 root data:Selection", + "└─Selection 9600.00 cop[tikv] or(json_contains(test.t.j, cast(\"[1, 2]\", json BINARY)), json_contains(test.t.j, cast(\"[3, 4]\", json BINARY)))", + " └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select /*+ use_index_merge(t, idx2) */ * from t where (a=1 and b=2 and (3 member of (j))) or (a=11 and b=12 and (13 member of (j)))", + "Plan": [ + "Selection 0.00 root or(and(eq(test.t.a, 1), and(eq(test.t.b, 2), json_memberof(cast(3, json BINARY), test.t.j))), and(eq(test.t.a, 11), and(eq(test.t.b, 12), json_memberof(cast(13, json BINARY), test.t.j))))", + "└─IndexMerge 0.00 root type: union", + " ├─IndexRangeScan(Build) 0.00 cop[tikv] table:t, index:idx2(a, b, cast(`j` as signed array), c) range:[1 2 3,1 2 3], keep order:false, stats:pseudo", + " ├─IndexRangeScan(Build) 0.00 cop[tikv] table:t, index:idx2(a, b, cast(`j` as signed array), c) range:[11 12 13,11 12 13], keep order:false, stats:pseudo", + " └─TableRowIDScan(Probe) 0.00 cop[tikv] table:t keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select /*+ use_index_merge(t, idx2) */ * from t where (a=1 and b=2 and (3 member of (j))) or (a=11 and b=12 and (13 member of (j)) and c=14)", + "Plan": [ + "Selection 0.00 root or(and(eq(test.t.a, 1), and(eq(test.t.b, 2), json_memberof(cast(3, json BINARY), test.t.j))), and(and(eq(test.t.a, 11), eq(test.t.b, 12)), and(json_memberof(cast(13, json BINARY), test.t.j), eq(test.t.c, 14))))", + "└─IndexMerge 0.00 root type: union", + " ├─IndexRangeScan(Build) 0.00 cop[tikv] table:t, index:idx2(a, b, cast(`j` as signed array), c) range:[1 2 3,1 2 3], keep order:false, stats:pseudo", + " ├─IndexRangeScan(Build) 0.00 cop[tikv] table:t, index:idx2(a, b, cast(`j` as signed array), c) range:[11 12 13 14,11 12 13 14], keep order:false, stats:pseudo", + " └─TableRowIDScan(Probe) 0.00 cop[tikv] table:t keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select /*+ use_index_merge(t, idx2) */ * from t where ((a=1 and b=2 and (3 member of (j))) or (a=11 and b=12 and (13 member of (j)))) and (c > 10)", + "Plan": [ + "Selection 0.00 root or(and(eq(test.t.a, 1), and(eq(test.t.b, 2), json_memberof(cast(3, json BINARY), test.t.j))), and(eq(test.t.a, 11), and(eq(test.t.b, 12), json_memberof(cast(13, json BINARY), test.t.j))))", + "└─IndexMerge 0.00 root type: union", + " ├─IndexRangeScan(Build) 0.00 cop[tikv] table:t, index:idx2(a, b, cast(`j` as signed array), c) range:[1 2 3,1 2 3], keep order:false, stats:pseudo", + " ├─IndexRangeScan(Build) 0.00 cop[tikv] table:t, index:idx2(a, b, cast(`j` as signed array), c) range:[11 12 13,11 12 13], keep order:false, stats:pseudo", + " └─Selection(Probe) 0.00 cop[tikv] gt(test.t.c, 10)", + " └─TableRowIDScan 0.00 cop[tikv] table:t keep order:false, stats:pseudo" + ] + } + ] + }, { "Name": "TestMVIndexSelection", "Cases": [