diff --git a/cmd/explaintest/r/explain_indexmerge.result b/cmd/explaintest/r/explain_indexmerge.result index a7728759f5d11..ef7f6cdf80088 100644 --- a/cmd/explaintest/r/explain_indexmerge.result +++ b/cmd/explaintest/r/explain_indexmerge.result @@ -97,19 +97,26 @@ label = "cop" } set session tidb_enable_index_merge = off; +explain select /*+ use_index_merge(t, primary, tb, tc) */ * from t where a <= 500000 or b <= 1000000 or c <= 3000000; +id estRows task access object operator info +IndexMerge_9 3560000.00 root +├─TableRangeScan_5(Build) 500000.00 cop[tikv] table:t range:[-inf,500000], keep order:false +├─IndexRangeScan_6(Build) 1000000.00 cop[tikv] table:t, index:tb(b) range:[-inf,1000000], keep order:false +├─IndexRangeScan_7(Build) 3000000.00 cop[tikv] table:t, index:tc(c) range:[-inf,3000000], keep order:false +└─TableRowIDScan_8(Probe) 3560000.00 cop[tikv] table:t keep order:false explain select /*+ use_index_merge(t, tb, tc) */ * from t where b < 50 or c < 5000000; id estRows task access object operator info -IndexMerge_8 5000000.00 root +IndexMerge_8 4999999.00 root ├─IndexRangeScan_5(Build) 49.00 cop[tikv] table:t, index:tb(b) range:[-inf,50), keep order:false ├─IndexRangeScan_6(Build) 4999999.00 cop[tikv] table:t, index:tc(c) range:[-inf,5000000), keep order:false -└─TableRowIDScan_7(Probe) 5000000.00 cop[tikv] table:t keep order:false +└─TableRowIDScan_7(Probe) 4999999.00 cop[tikv] table:t keep order:false explain select /*+ use_index_merge(t, tb, tc) */ * from t where (b < 10000 or c < 10000) and (a < 10 or d < 10) and f < 10; id estRows task access object operator info IndexMerge_9 0.00 root ├─IndexRangeScan_5(Build) 9999.00 cop[tikv] table:t, index:tb(b) range:[-inf,10000), keep order:false ├─IndexRangeScan_6(Build) 9999.00 cop[tikv] table:t, index:tc(c) range:[-inf,10000), keep order:false └─Selection_8(Probe) 0.00 cop[tikv] lt(test.t.f, 10), or(lt(test.t.a, 10), lt(test.t.d, 10)) - └─TableRowIDScan_7 19998.00 cop[tikv] table:t keep order:false + └─TableRowIDScan_7 19978.00 cop[tikv] table:t keep order:false explain select /*+ use_index_merge(t, tb) */ * from t where b < 50 or c < 5000000; id estRows task access object operator info TableReader_7 4999999.00 root data:Selection_6 @@ -122,7 +129,7 @@ TableReader_7 4999999.00 root data:Selection_6 └─TableFullScan_5 5000000.00 cop[tikv] table:t keep order:false explain select /*+ use_index_merge(t, primary, tb) */ * from t where a < 50 or b < 5000000; id estRows task access object operator info -IndexMerge_8 5000000.00 root +IndexMerge_8 4999999.00 root ├─TableRangeScan_5(Build) 49.00 cop[tikv] table:t range:[-inf,50), keep order:false ├─IndexRangeScan_6(Build) 4999999.00 cop[tikv] table:t, index:tb(b) range:[-inf,5000000), keep order:false -└─TableRowIDScan_7(Probe) 5000000.00 cop[tikv] table:t keep order:false +└─TableRowIDScan_7(Probe) 4999999.00 cop[tikv] table:t keep order:false diff --git a/cmd/explaintest/t/explain_indexmerge.test b/cmd/explaintest/t/explain_indexmerge.test index 8a7089936f0fe..372d23b5c1392 100644 --- a/cmd/explaintest/t/explain_indexmerge.test +++ b/cmd/explaintest/t/explain_indexmerge.test @@ -20,6 +20,7 @@ explain select * from t where (b < 10000 or c < 10000) and (a < 10 or d < 10) an explain format="dot" select * from t where (a < 50 or b < 50) and f > 100; set session tidb_enable_index_merge = off; # be forced to use IndexMerge +explain select /*+ use_index_merge(t, primary, tb, tc) */ * from t where a <= 500000 or b <= 1000000 or c <= 3000000; explain select /*+ use_index_merge(t, tb, tc) */ * from t where b < 50 or c < 5000000; explain select /*+ use_index_merge(t, tb, tc) */ * from t where (b < 10000 or c < 10000) and (a < 10 or d < 10) and f < 10; explain select /*+ use_index_merge(t, tb) */ * from t where b < 50 or c < 5000000; diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 2cb6483328955..ec5e5192cfbfd 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -767,7 +767,7 @@ func (ds *DataSource) convertToIndexMergeScan(prop *property.PhysicalProperty, c return invalidTask, nil } path := candidate.path - var totalCost, totalRowCount float64 + var totalCost float64 scans := make([]PhysicalPlan, 0, len(path.PartialIndexPaths)) cop := &copTask{ indexPlanFinished: true, @@ -781,17 +781,19 @@ func (ds *DataSource) convertToIndexMergeScan(prop *property.PhysicalProperty, c } for _, partPath := range path.PartialIndexPaths { var scan PhysicalPlan - var partialCost, rowCount float64 + var partialCost float64 if partPath.IsTablePath() { - scan, partialCost, rowCount = ds.convertToPartialTableScan(prop, partPath) + scan, partialCost = ds.convertToPartialTableScan(prop, partPath) } else { - scan, partialCost, rowCount = ds.convertToPartialIndexScan(prop, partPath) + scan, partialCost = ds.convertToPartialIndexScan(prop, partPath) } scans = append(scans, scan) totalCost += partialCost - totalRowCount += rowCount } - + totalRowCount := path.CountAfterAccess + if prop.ExpectedCnt < ds.stats.RowCount { + totalRowCount *= prop.ExpectedCnt / ds.stats.RowCount + } ts, partialCost, err := ds.buildIndexMergeTableScan(prop, path.TableFilters, totalRowCount) if err != nil { return nil, err @@ -806,8 +808,7 @@ func (ds *DataSource) convertToIndexMergeScan(prop *property.PhysicalProperty, c func (ds *DataSource) convertToPartialIndexScan(prop *property.PhysicalProperty, path *util.AccessPath) ( indexPlan PhysicalPlan, - partialCost float64, - rowCount float64) { + partialCost float64) { idx := path.Index is, partialCost, rowCount := ds.getOriginalPhysicalIndexScan(prop, path, false, false) rowSize := is.indexScanRowSize(idx, ds, false) @@ -829,17 +830,16 @@ func (ds *DataSource) convertToPartialIndexScan(prop *property.PhysicalProperty, indexPlan := PhysicalSelection{Conditions: indexConds}.Init(is.ctx, stats, ds.blockOffset) indexPlan.SetChildren(is) partialCost += rowCount * rowSize * sessVars.NetworkFactor - return indexPlan, partialCost, rowCount + return indexPlan, partialCost } partialCost += rowCount * rowSize * sessVars.NetworkFactor indexPlan = is - return indexPlan, partialCost, rowCount + return indexPlan, partialCost } func (ds *DataSource) convertToPartialTableScan(prop *property.PhysicalProperty, path *util.AccessPath) ( tablePlan PhysicalPlan, - partialCost float64, - rowCount float64) { + partialCost float64) { ts, partialCost, rowCount := ds.getOriginalPhysicalTableScan(prop, path, false) rowSize := ds.TblColHists.GetAvgRowSize(ds.ctx, ds.TblCols, false, false) sessVars := ds.ctx.GetSessionVars() @@ -853,11 +853,11 @@ func (ds *DataSource) convertToPartialTableScan(prop *property.PhysicalProperty, tablePlan.SetChildren(ts) partialCost += rowCount * sessVars.CopCPUFactor partialCost += selectivity * rowCount * rowSize * sessVars.NetworkFactor - return tablePlan, partialCost, rowCount + return tablePlan, partialCost } partialCost += rowCount * rowSize * sessVars.NetworkFactor tablePlan = ts - return tablePlan, partialCost, rowCount + return tablePlan, partialCost } func (ds *DataSource) buildIndexMergeTableScan(prop *property.PhysicalProperty, tableFilters []expression.Expression, totalRowCount float64) (PhysicalPlan, float64, error) { diff --git a/planner/core/stats.go b/planner/core/stats.go index 02fdee518b2bb..e33a87bf41291 100644 --- a/planner/core/stats.go +++ b/planner/core/stats.go @@ -419,9 +419,13 @@ func (ds *DataSource) generateIndexMergeOrPaths() { } if len(partialPaths) > 1 { possiblePath := ds.buildIndexMergeOrPath(partialPaths, i) - if possiblePath != nil { - ds.possibleAccessPaths = append(ds.possibleAccessPaths, possiblePath) + sel, _, err := ds.tableStats.HistColl.Selectivity(ds.ctx, []expression.Expression{sf}, nil) + if err != nil { + logutil.BgLogger().Debug("something wrong happened, use the default selectivity", zap.Error(err)) + sel = SelectionFactor } + possiblePath.CountAfterAccess = sel * ds.tableStats.RowCount + ds.possibleAccessPaths = append(ds.possibleAccessPaths, possiblePath) } } } diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json index ef5c8c7c0a16c..fcbd1959212e1 100644 --- a/planner/core/testdata/integration_suite_out.json +++ b/planner/core/testdata/integration_suite_out.json @@ -769,10 +769,10 @@ "SQL": "select /*+ use_index_merge(t partition(p0)) */ * from t where t.b = 1 or t.c = \"8\"", "Plan": [ "PartitionUnion_9 59.97 root ", - "├─IndexMerge_13 20.00 root ", + "├─IndexMerge_13 19.99 root ", "│ ├─IndexRangeScan_10(Build) 10.00 cop[tikv] table:t, partition:p0, index:b(b) range:[1,1], keep order:false, stats:pseudo", "│ ├─IndexRangeScan_11(Build) 10.00 cop[tikv] table:t, partition:p0, index:c(c) range:[\"8\",\"8\"], keep order:false, stats:pseudo", - "│ └─TableRowIDScan_12(Probe) 20.00 cop[tikv] table:t, partition:p0 keep order:false, stats:pseudo", + "│ └─TableRowIDScan_12(Probe) 19.99 cop[tikv] table:t, partition:p0 keep order:false, stats:pseudo", "├─TableReader_19 19.99 root data:Selection_18", "│ └─Selection_18 19.99 cop[tiflash] or(eq(test.t.b, 1), eq(test.t.c, \"8\"))", "│ └─TableFullScan_17 10000.00 cop[tiflash] table:t, partition:p1 keep order:false, stats:pseudo", @@ -949,10 +949,10 @@ { "SQL": "select /*+ use_index_merge(t1 primary, c) */ * from t1 where t1.a = 1 and t1.b = '111' or t1.c = 3.3", "Plan": [ - "IndexMerge_8 2.00 root ", + "IndexMerge_8 1.67 root ", "├─TableRangeScan_5(Build) 1.00 cop[tikv] table:t1 range:[1 \"111\",1 \"111\"], keep order:false", "├─IndexRangeScan_6(Build) 1.00 cop[tikv] table:t1, index:c(c) range:[3.3000000000,3.3000000000], keep order:false", - "└─TableRowIDScan_7(Probe) 2.00 cop[tikv] table:t1 keep order:false" + "└─TableRowIDScan_7(Probe) 1.67 cop[tikv] table:t1 keep order:false" ], "Res": [ "1 111 1.1000000000 11", @@ -1162,20 +1162,20 @@ "SQL": "select * from pt where id = 4 or c < 7", "Plan": [ "Projection_4 3330.01 root test.pt.id, test.pt.c", - "└─IndexMerge_11 3333.33 root partition:all ", + "└─IndexMerge_11 3330.01 root partition:all ", " ├─IndexRangeScan_8(Build) 10.00 cop[tikv] table:pt, index:i_id(id) range:[4,4], keep order:false, stats:pseudo", " ├─IndexRangeScan_9(Build) 3323.33 cop[tikv] table:pt, index:i_c(c) range:[-inf,7), keep order:false, stats:pseudo", - " └─TableRowIDScan_10(Probe) 3333.33 cop[tikv] table:pt keep order:false, stats:pseudo" + " └─TableRowIDScan_10(Probe) 3330.01 cop[tikv] table:pt keep order:false, stats:pseudo" ] }, { "SQL": "select * from pt where id > 4 or c = 7", "Plan": [ "Projection_4 3340.00 root test.pt.id, test.pt.c", - "└─IndexMerge_11 3343.33 root partition:all ", + "└─IndexMerge_11 3340.00 root partition:all ", " ├─IndexRangeScan_8(Build) 3333.33 cop[tikv] table:pt, index:i_id(id) range:(4,+inf], keep order:false, stats:pseudo", " ├─IndexRangeScan_9(Build) 10.00 cop[tikv] table:pt, index:i_c(c) range:[7,7], keep order:false, stats:pseudo", - " └─TableRowIDScan_10(Probe) 3343.33 cop[tikv] table:pt keep order:false, stats:pseudo" + " └─TableRowIDScan_10(Probe) 3340.00 cop[tikv] table:pt keep order:false, stats:pseudo" ] } ] diff --git a/planner/util/path.go b/planner/util/path.go index eccf642677fc4..f6fa0b47e0f51 100644 --- a/planner/util/path.go +++ b/planner/util/path.go @@ -34,6 +34,7 @@ type AccessPath struct { IdxColLens []int Ranges []*ranger.Range // CountAfterAccess is the row count after we apply range seek and before we use other filter to filter data. + // For index merge path, CountAfterAccess is the row count after partial paths and before we apply table filters. CountAfterAccess float64 // CountAfterIndex is the row count after we apply filters on index and before we apply the table filters. CountAfterIndex float64