Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: support using DNF to construct IndexMerge to access MVIndex #40471

Merged
merged 5 commits into from
Jan 11, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 113 additions & 32 deletions planner/core/indexmerge_path.go
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,75 @@ func (ds *DataSource) generateAndPruneIndexMergePath(indexMergeConds []expressio
return nil
}

// generateIndexMergeOnDNF4MVIndex generates IndexMerge paths for MVIndex upon DNF filters.
/*
select * from t where ((1 member of (a) and b=1) or (2 member of (a) and b=2)) and (c > 10)
IndexMerge(OR)
IndexRangeScan(a, b, [1 1, 1 1])
IndexRangeScan(a, b, [2 2, 2 2])
Selection(c > 10)
TableRowIdScan(t)
Two limitations now:
1). all filters in the DNF have to be used as access-filters: ((1 member of (a)) or (2 member of (a)) or b > 10) cannot be used to access the MVIndex.
2). cannot support json_contains: (json_contains(a, '[1, 2]') or json_contains(a, '[3, 4]')) is not supported since a single IndexMerge cannot represent this SQL.
xiongjiwei marked this conversation as resolved.
Show resolved Hide resolved
*/
func (ds *DataSource) generateIndexMergeOnDNF4MVIndex(normalPathCnt int, filters []expression.Expression) (mvIndexPaths []*util.AccessPath, err error) {
for idx := 0; idx < normalPathCnt; idx++ {
if ds.possibleAccessPaths[idx].IsTablePath() || ds.possibleAccessPaths[idx].Index == nil || !ds.possibleAccessPaths[idx].Index.MVIndex {
continue // not a MVIndex path
}

idxCols, ok := ds.prepareCols4MVIndex(ds.possibleAccessPaths[idx].Index)
if !ok {
continue
}

for current, filter := range filters {
sf, ok := filter.(*expression.ScalarFunction)
if !ok || sf.FuncName.L != ast.LogicOr {
continue
}
dnfFilters := expression.FlattenDNFConditions(sf) // [(1 member of (a) and b=1), (2 member of (a) and b=2)]

// build partial paths for each dnf filter
cannotFit := false
var partialPaths []*util.AccessPath
for _, dnfFilter := range dnfFilters {
mvIndexFilters := []expression.Expression{dnfFilter}
if sf, ok := dnfFilter.(*expression.ScalarFunction); ok && sf.FuncName.L == ast.LogicAnd {
mvIndexFilters = expression.FlattenCNFConditions(sf) // (1 member of (a) and b=1) --> [(1 member of (a)), b=1]
}

accessFilters, remainingFilters := ds.collectFilters4MVIndex(mvIndexFilters, idxCols)
if len(accessFilters) == 0 || len(remainingFilters) > 0 { // limitation 1
cannotFit = true
break
}
paths, isIntersection, ok, err := ds.buildPartialPaths4MVIndex(accessFilters, idxCols, ds.possibleAccessPaths[idx].Index)
if err != nil {
return nil, err
}
if isIntersection || !ok { // limitation 2
cannotFit = true
break
}
partialPaths = append(partialPaths, paths...)
}
if cannotFit {
continue
}

var remainingFilters []expression.Expression
remainingFilters = append(remainingFilters, filters[:current]...)
remainingFilters = append(remainingFilters, filters[current+1:]...)

indexMergePath := ds.buildPartialPathUp4MVIndex(partialPaths, false, remainingFilters)
mvIndexPaths = append(mvIndexPaths, indexMergePath)
}
}
return
}

// generateIndexMergeJSONMVIndexPath generates paths for (json_member_of / json_overlaps / json_contains) on multi-valued index.
/*
1. select * from t where 1 member of (a)
Expand All @@ -511,6 +580,12 @@ func (ds *DataSource) generateAndPruneIndexMergePath(indexMergeConds []expressio
TableRowIdScan(t)
*/
func (ds *DataSource) generateIndexMerge4MVIndex(normalPathCnt int, filters []expression.Expression) (mvIndexPaths []*util.AccessPath, err error) {
dnfMVIndexPaths, err := ds.generateIndexMergeOnDNF4MVIndex(normalPathCnt, filters)
if err != nil {
return nil, err
}
mvIndexPaths = append(mvIndexPaths, dnfMVIndexPaths...)

for idx := 0; idx < normalPathCnt; idx++ {
if ds.possibleAccessPaths[idx].IsTablePath() || ds.possibleAccessPaths[idx].Index == nil || !ds.possibleAccessPaths[idx].Index.MVIndex {
continue // not a MVIndex path
Expand All @@ -526,34 +601,42 @@ func (ds *DataSource) generateIndexMerge4MVIndex(normalPathCnt int, filters []ex
continue
}

partialPaths, isIntersection, err := ds.buildPartialPaths4MVIndex(accessFilters, idxCols, ds.possibleAccessPaths[idx].Index)
partialPaths, isIntersection, ok, err := ds.buildPartialPaths4MVIndex(accessFilters, idxCols, ds.possibleAccessPaths[idx].Index)
if err != nil {
return nil, err
}

indexMergePath := &util.AccessPath{PartialIndexPaths: partialPaths}
indexMergePath.IndexMergeIsIntersection = isIntersection
indexMergePath.TableFilters = remainingFilters

// TODO: use a naive estimation strategy here now for simplicity, make it more accurate.
minEstRows, maxEstRows := math.MaxFloat64, -1.0
for _, p := range indexMergePath.PartialIndexPaths {
minEstRows = math.Min(minEstRows, p.CountAfterAccess)
maxEstRows = math.Max(maxEstRows, p.CountAfterAccess)
}
if indexMergePath.IndexMergeIsIntersection {
indexMergePath.CountAfterAccess = minEstRows
} else {
indexMergePath.CountAfterAccess = maxEstRows
if !ok {
continue
}

mvIndexPaths = append(mvIndexPaths, indexMergePath)
mvIndexPaths = append(mvIndexPaths, ds.buildPartialPathUp4MVIndex(partialPaths, isIntersection, remainingFilters))
}
return
}

// buildPartialPathUp4MVIndex builds these partial paths up to a complete index merge path.
func (ds *DataSource) buildPartialPathUp4MVIndex(partialPaths []*util.AccessPath, isIntersection bool, remainingFilters []expression.Expression) *util.AccessPath {
indexMergePath := &util.AccessPath{PartialIndexPaths: partialPaths}
indexMergePath.IndexMergeIsIntersection = isIntersection
indexMergePath.TableFilters = remainingFilters

// TODO: use a naive estimation strategy here now for simplicity, make it more accurate.
minEstRows, maxEstRows := math.MaxFloat64, -1.0
for _, p := range indexMergePath.PartialIndexPaths {
minEstRows = math.Min(minEstRows, p.CountAfterAccess)
maxEstRows = math.Max(maxEstRows, p.CountAfterAccess)
}
if indexMergePath.IndexMergeIsIntersection {
indexMergePath.CountAfterAccess = minEstRows
} else {
indexMergePath.CountAfterAccess = maxEstRows
}
return indexMergePath
}

func (ds *DataSource) buildPartialPaths4MVIndex(accessFilters []expression.Expression,
idxCols []*expression.Column, mvIndex *model.IndexInfo) ([]*util.AccessPath, bool, error) {
idxCols []*expression.Column, mvIndex *model.IndexInfo) (
partialPaths []*util.AccessPath, isIntersection bool, ok bool, err error) {
qw4990 marked this conversation as resolved.
Show resolved Hide resolved
var virColID = -1
for i := range idxCols {
if idxCols[i].VirtualExpr != nil {
Expand All @@ -562,39 +645,38 @@ func (ds *DataSource) buildPartialPaths4MVIndex(accessFilters []expression.Expre
}
}
if virColID == -1 { // unexpected, no vir-col on this MVIndex
return nil, false, nil
return nil, false, false, nil
}
if len(accessFilters) <= virColID { // no filter related to the vir-col, build a partial path directly.
partialPath, ok, err := ds.buildPartialPath4MVIndex(accessFilters, idxCols, mvIndex)
return []*util.AccessPath{partialPath}, ok, err
return []*util.AccessPath{partialPath}, false, ok, err
}

virCol := idxCols[virColID]
jsonType := virCol.GetType().ArrayType()
targetJSONPath, ok := unwrapJSONCast(virCol.VirtualExpr)
if !ok {
return nil, false, nil
return nil, false, false, nil
}

// extract values related to this vir-col, for example, extract [1, 2] from `json_contains(j, '[1, 2]')`
var virColVals []expression.Expression
var isIntersection bool
sf, ok := accessFilters[virColID].(*expression.ScalarFunction)
if !ok {
return nil, false, nil
return nil, false, false, nil
}
switch sf.FuncName.L {
case ast.JSONMemberOf: // (1 member of a->'$.zip')
v, ok := unwrapJSONCast(sf.GetArgs()[0]) // cast(1 as json) --> 1
if !ok {
return nil, false, nil
return nil, false, false, nil
}
virColVals = append(virColVals, v)
case ast.JSONContains: // (json_contains(a->'$.zip', '[1, 2, 3]')
isIntersection = true
virColVals, ok = jsonArrayExpr2Exprs(ds.ctx, sf.GetArgs()[1], jsonType)
if !ok {
return nil, false, nil
return nil, false, false, nil
}
case ast.JSONOverlaps: // (json_overlaps(a->'$.zip', '[1, 2, 3]')
var jsonPathIdx int
Expand All @@ -603,33 +685,32 @@ func (ds *DataSource) buildPartialPaths4MVIndex(accessFilters []expression.Expre
} else if sf.GetArgs()[1].Equal(ds.ctx, targetJSONPath) {
jsonPathIdx = 1 // (json_overlaps('[1, 2, 3]', a->'$.zip')
} else {
return nil, false, nil
return nil, false, false, nil
}
var ok bool
virColVals, ok = jsonArrayExpr2Exprs(ds.ctx, sf.GetArgs()[1-jsonPathIdx], jsonType)
if !ok {
return nil, false, nil
return nil, false, false, nil
}
default:
return nil, false, nil
return nil, false, false, nil
}

partialPaths := make([]*util.AccessPath, 0, len(virColVals))
for _, v := range virColVals {
// rewrite json functions to EQ to calculate range, `(1 member of j)` -> `j=1`.
eq, err := expression.NewFunction(ds.ctx, ast.EQ, types.NewFieldType(mysql.TypeTiny), virCol, v)
if err != nil {
return nil, false, err
return nil, false, false, err
}
accessFilters[virColID] = eq

partialPath, ok, err := ds.buildPartialPath4MVIndex(accessFilters, idxCols, mvIndex)
if !ok || err != nil {
return nil, ok, err
return nil, false, ok, err
}
partialPaths = append(partialPaths, partialPath)
}
return partialPaths, isIntersection, nil
return partialPaths, isIntersection, true, nil
}

// buildPartialPath4MVIndex builds a partial path on this MVIndex with these accessFilters.
Expand Down
28 changes: 28 additions & 0 deletions planner/core/indexmerge_path_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,34 @@ index j1((cast(j1 as signed array))))`)
}
}

func TestDNFOnMVIndex(t *testing.T) {
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec(`create table t(a int, b int, c int, j json,
index idx1((cast(j as signed array))),
index idx2(a, b, (cast(j as signed array)), c))`)

var input []string
var output []struct {
SQL string
Plan []string
}
planSuiteData := core.GetIndexMergeSuiteData()
planSuiteData.LoadTestCases(t, &input, &output)

for i, query := range input {
testdata.OnRecord(func() {
output[i].SQL = query
})
result := tk.MustQuery("explain format = 'brief' " + query)
testdata.OnRecord(func() {
output[i].Plan = testdata.ConvertRowsToStrings(result.Rows())
})
result.Check(testkit.Rows(output[i].Plan...))
}
}

func TestCompositeMVIndex(t *testing.T) {
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)
Expand Down
13 changes: 13 additions & 0 deletions planner/core/testdata/index_merge_suite_in.json
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,19 @@
"select /*+ use_index_merge(t, idx2) */ * from t where a=1"
]
},
{
"name": "TestDNFOnMVIndex",
"cases": [
"select /*+ use_index_merge(t, idx1) */ * from t where (1 member of (j)) or (2 member of (j))",
"select /*+ use_index_merge(t, idx1) */ * from t where ((1 member of (j)) or (2 member of (j))) and (a > 10)",
"select /*+ use_index_merge(t, idx1) */ * from t where (json_overlaps(j, '[1, 2]')) or (json_overlaps(j, '[3, 4]'))",
"select /*+ use_index_merge(t, idx1) */ * from t where ((json_overlaps(j, '[1, 2]')) or (json_overlaps(j, '[3, 4]'))) and (a > 10)",
"select /*+ use_index_merge(t, idx1) */ * from t where (json_contains(j, '[1, 2]')) or (json_contains(j, '[3, 4]'))",
"select /*+ use_index_merge(t, idx2) */ * from t where (a=1 and b=2 and (3 member of (j))) or (a=11 and b=12 and (13 member of (j)))",
"select /*+ use_index_merge(t, idx2) */ * from t where (a=1 and b=2 and (3 member of (j))) or (a=11 and b=12 and (13 member of (j)) and c=14)",
"select /*+ use_index_merge(t, idx2) */ * from t where ((a=1 and b=2 and (3 member of (j))) or (a=11 and b=12 and (13 member of (j)))) and (c > 10)"
]
},
{
"name": "TestMVIndexSelection",
"cases": [
Expand Down
90 changes: 90 additions & 0 deletions planner/core/testdata/index_merge_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,96 @@
}
]
},
{
"Name": "TestDNFOnMVIndex",
"Cases": [
{
"SQL": "select /*+ use_index_merge(t, idx1) */ * from t where (1 member of (j)) or (2 member of (j))",
"Plan": [
"Selection 8.00 root or(json_memberof(cast(1, json BINARY), test.t.j), json_memberof(cast(2, json BINARY), test.t.j))",
"└─IndexMerge 10.00 root type: union",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:idx1(cast(`j` as signed array)) range:[1,1], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:idx1(cast(`j` as signed array)) range:[2,2], keep order:false, stats:pseudo",
" └─TableRowIDScan(Probe) 10.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ use_index_merge(t, idx1) */ * from t where ((1 member of (j)) or (2 member of (j))) and (a > 10)",
"Plan": [
"Selection 8.00 root or(json_memberof(cast(1, json BINARY), test.t.j), json_memberof(cast(2, json BINARY), test.t.j))",
"└─IndexMerge 3.33 root type: union",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:idx1(cast(`j` as signed array)) range:[1,1], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:idx1(cast(`j` as signed array)) range:[2,2], keep order:false, stats:pseudo",
" └─Selection(Probe) 3.33 cop[tikv] gt(test.t.a, 10)",
" └─TableRowIDScan 10.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ use_index_merge(t, idx1) */ * from t where (json_overlaps(j, '[1, 2]')) or (json_overlaps(j, '[3, 4]'))",
"Plan": [
"Selection 8.00 root or(json_overlaps(test.t.j, cast(\"[1, 2]\", json BINARY)), json_overlaps(test.t.j, cast(\"[3, 4]\", json BINARY)))",
"└─IndexMerge 10.00 root type: union",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:idx1(cast(`j` as signed array)) range:[1,1], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:idx1(cast(`j` as signed array)) range:[2,2], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:idx1(cast(`j` as signed array)) range:[3,3], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:idx1(cast(`j` as signed array)) range:[4,4], keep order:false, stats:pseudo",
" └─TableRowIDScan(Probe) 10.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ use_index_merge(t, idx1) */ * from t where ((json_overlaps(j, '[1, 2]')) or (json_overlaps(j, '[3, 4]'))) and (a > 10)",
"Plan": [
"Selection 8.00 root or(json_overlaps(test.t.j, cast(\"[1, 2]\", json BINARY)), json_overlaps(test.t.j, cast(\"[3, 4]\", json BINARY)))",
"└─IndexMerge 3.33 root type: union",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:idx1(cast(`j` as signed array)) range:[1,1], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:idx1(cast(`j` as signed array)) range:[2,2], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:idx1(cast(`j` as signed array)) range:[3,3], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:idx1(cast(`j` as signed array)) range:[4,4], keep order:false, stats:pseudo",
" └─Selection(Probe) 3.33 cop[tikv] gt(test.t.a, 10)",
" └─TableRowIDScan 10.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ use_index_merge(t, idx1) */ * from t where (json_contains(j, '[1, 2]')) or (json_contains(j, '[3, 4]'))",
"Plan": [
"TableReader 9600.00 root data:Selection",
Copy link
Contributor Author

@qw4990 qw4990 Jan 10, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Expected, cannot support json_contains(...) or json_contains(...) by using a single IndexMerge operator since json_contains conflicts with the outside or.

"└─Selection 9600.00 cop[tikv] or(json_contains(test.t.j, cast(\"[1, 2]\", json BINARY)), json_contains(test.t.j, cast(\"[3, 4]\", json BINARY)))",
" └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ use_index_merge(t, idx2) */ * from t where (a=1 and b=2 and (3 member of (j))) or (a=11 and b=12 and (13 member of (j)))",
"Plan": [
"Selection 0.00 root or(and(eq(test.t.a, 1), and(eq(test.t.b, 2), json_memberof(cast(3, json BINARY), test.t.j))), and(eq(test.t.a, 11), and(eq(test.t.b, 12), json_memberof(cast(13, json BINARY), test.t.j))))",
"└─IndexMerge 0.00 root type: union",
" ├─IndexRangeScan(Build) 0.00 cop[tikv] table:t, index:idx2(a, b, cast(`j` as signed array), c) range:[1 2 3,1 2 3], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 0.00 cop[tikv] table:t, index:idx2(a, b, cast(`j` as signed array), c) range:[11 12 13,11 12 13], keep order:false, stats:pseudo",
" └─TableRowIDScan(Probe) 0.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ use_index_merge(t, idx2) */ * from t where (a=1 and b=2 and (3 member of (j))) or (a=11 and b=12 and (13 member of (j)) and c=14)",
"Plan": [
"Selection 0.00 root or(and(eq(test.t.a, 1), and(eq(test.t.b, 2), json_memberof(cast(3, json BINARY), test.t.j))), and(and(eq(test.t.a, 11), eq(test.t.b, 12)), and(json_memberof(cast(13, json BINARY), test.t.j), eq(test.t.c, 14))))",
"└─IndexMerge 0.00 root type: union",
" ├─IndexRangeScan(Build) 0.00 cop[tikv] table:t, index:idx2(a, b, cast(`j` as signed array), c) range:[1 2 3,1 2 3], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 0.00 cop[tikv] table:t, index:idx2(a, b, cast(`j` as signed array), c) range:[11 12 13 14,11 12 13 14], keep order:false, stats:pseudo",
" └─TableRowIDScan(Probe) 0.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ use_index_merge(t, idx2) */ * from t where ((a=1 and b=2 and (3 member of (j))) or (a=11 and b=12 and (13 member of (j)))) and (c > 10)",
"Plan": [
"Selection 0.00 root or(and(eq(test.t.a, 1), and(eq(test.t.b, 2), json_memberof(cast(3, json BINARY), test.t.j))), and(eq(test.t.a, 11), and(eq(test.t.b, 12), json_memberof(cast(13, json BINARY), test.t.j))))",
"└─IndexMerge 0.00 root type: union",
" ├─IndexRangeScan(Build) 0.00 cop[tikv] table:t, index:idx2(a, b, cast(`j` as signed array), c) range:[1 2 3,1 2 3], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 0.00 cop[tikv] table:t, index:idx2(a, b, cast(`j` as signed array), c) range:[11 12 13,11 12 13], keep order:false, stats:pseudo",
" └─Selection(Probe) 0.00 cop[tikv] gt(test.t.c, 10)",
" └─TableRowIDScan 0.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
}
]
},
{
"Name": "TestMVIndexSelection",
"Cases": [
Expand Down