From ee9cb204f9e1ab8b7b8587ad223d0889f2b03bcd Mon Sep 17 00:00:00 2001 From: Haibin Xie Date: Fri, 15 Feb 2019 18:43:23 +0800 Subject: [PATCH 1/8] planner: implement skyline pruning --- cmd/explaintest/main.go | 5 - cmd/explaintest/r/explain_complex.result | 14 +- .../r/explain_complex_stats.result | 14 +- cmd/explaintest/r/explain_easy.result | 102 +++++----- cmd/explaintest/r/explain_easy_stats.result | 12 +- cmd/explaintest/r/select.result | 40 ++-- cmd/explaintest/r/topn_push_down.result | 20 +- cmd/explaintest/r/tpch.result | 14 +- cmd/explaintest/r/window_function.result | 34 ++-- expression/util.go | 20 ++ planner/core/find_best_task.go | 182 ++++++++++++++---- planner/core/logical_plan_test.go | 100 ++++++++++ statistics/selectivity_test.go | 8 +- util/ranger/ranger_test.go | 8 +- 14 files changed, 400 insertions(+), 173 deletions(-) diff --git a/cmd/explaintest/main.go b/cmd/explaintest/main.go index fc7a6f043c742..8ff9d2b00cf6d 100644 --- a/cmd/explaintest/main.go +++ b/cmd/explaintest/main.go @@ -567,11 +567,6 @@ func loadAllTests() ([]string, error) { if strings.HasSuffix(name, ".test") { name = strings.TrimSuffix(name, ".test") - // if we use record and the result file exists, skip generating - if record && resultExists(name) { - continue - } - if create && !strings.HasSuffix(name, "_stats") { continue } diff --git a/cmd/explaintest/r/explain_complex.result b/cmd/explaintest/r/explain_complex.result index 1b93d600fa4aa..66d38378b0f33 100644 --- a/cmd/explaintest/r/explain_complex.result +++ b/cmd/explaintest/r/explain_complex.result @@ -153,9 +153,9 @@ id count task operator info Projection_10 0.00 root dt.id, dt.aid, dt.pt, dt.dic, dt.cm, rr.gid, rr.acd, rr.t, dt.p1, dt.p2, dt.p3, dt.p4, dt.p5, dt.p6_md5, dt.p7_md5 └─Limit_13 0.00 root offset:0, count:2000 └─IndexJoin_19 0.00 root inner join, inner:IndexLookUp_18, outer key:dt.aid, dt.dic, inner key:rr.aid, rr.dic - ├─TableReader_47 0.00 root data:Selection_46 - │ └─Selection_46 0.00 cop eq(dt.bm, 0), eq(dt.pt, "ios"), gt(dt.t, 1478185592), not(isnull(dt.dic)) - │ └─TableScan_45 10000.00 cop table:dt, range:[0,+inf], keep order:false, stats:pseudo + ├─TableReader_43 0.00 root data:Selection_42 + │ └─Selection_42 0.00 cop eq(dt.bm, 0), eq(dt.pt, "ios"), gt(dt.t, 1478185592), not(isnull(dt.dic)) + │ └─TableScan_41 10000.00 cop table:dt, range:[0,+inf], keep order:false, stats:pseudo └─IndexLookUp_18 3.33 root ├─IndexScan_15 10.00 cop table:rr, index:aid, dic, range: decided by [dt.aid dt.dic], keep order:false, stats:pseudo └─Selection_17 3.33 cop eq(rr.pt, "ios"), gt(rr.t, 1478185592) @@ -164,10 +164,10 @@ explain select pc,cr,count(DISTINCT uid) as pay_users,count(oid) as pay_times,su id count task operator info Projection_5 1.00 root test.pp.pc, test.pp.cr, 3_col_0, 3_col_1, 3_col_2 └─HashAgg_7 1.00 root group by:test.pp.cr, test.pp.pc, funcs:count(distinct test.pp.uid), count(test.pp.oid), sum(test.pp.am), firstrow(test.pp.pc), firstrow(test.pp.cr) - └─IndexLookUp_28 0.00 root - ├─IndexScan_25 0.40 cop table:pp, index:uid, pi, range:[18089709 510017,18089709 510017], [18089709 520017,18089709 520017], [18090780 510017,18090780 510017], [18090780 520017,18090780 520017], keep order:false, stats:pseudo - └─Selection_27 0.00 cop eq(test.pp.ps, 2), ge(test.pp.ppt, 1478188800), lt(test.pp.ppt, 1478275200) - └─TableScan_26 0.40 cop table:pp, keep order:false, stats:pseudo + └─IndexLookUp_24 0.00 root + ├─IndexScan_21 0.40 cop table:pp, index:uid, pi, range:[18089709 510017,18089709 510017], [18089709 520017,18089709 520017], [18090780 510017,18090780 510017], [18090780 520017,18090780 520017], keep order:false, stats:pseudo + └─Selection_23 0.00 cop eq(test.pp.ps, 2), ge(test.pp.ppt, 1478188800), lt(test.pp.ppt, 1478275200) + └─TableScan_22 0.40 cop table:pp, keep order:false, stats:pseudo CREATE TABLE `tbl_001` (`a` int, `b` int); CREATE TABLE `tbl_002` (`a` int, `b` int); CREATE TABLE `tbl_003` (`a` int, `b` int); diff --git a/cmd/explaintest/r/explain_complex_stats.result b/cmd/explaintest/r/explain_complex_stats.result index f64fe4e0b30af..805695e77132c 100644 --- a/cmd/explaintest/r/explain_complex_stats.result +++ b/cmd/explaintest/r/explain_complex_stats.result @@ -161,9 +161,9 @@ id count task operator info Projection_10 428.32 root dt.id, dt.aid, dt.pt, dt.dic, dt.cm, rr.gid, rr.acd, rr.t, dt.p1, dt.p2, dt.p3, dt.p4, dt.p5, dt.p6_md5, dt.p7_md5 └─Limit_13 428.32 root offset:0, count:2000 └─IndexJoin_19 428.32 root inner join, inner:IndexLookUp_18, outer key:dt.aid, dt.dic, inner key:rr.aid, rr.dic - ├─TableReader_47 428.32 root data:Selection_46 - │ └─Selection_46 428.32 cop eq(dt.bm, 0), eq(dt.pt, "ios"), gt(dt.t, 1478185592), not(isnull(dt.dic)) - │ └─TableScan_45 2000.00 cop table:dt, range:[0,+inf], keep order:false + ├─TableReader_43 428.32 root data:Selection_42 + │ └─Selection_42 428.32 cop eq(dt.bm, 0), eq(dt.pt, "ios"), gt(dt.t, 1478185592), not(isnull(dt.dic)) + │ └─TableScan_41 2000.00 cop table:dt, range:[0,+inf], keep order:false └─IndexLookUp_18 970.00 root ├─IndexScan_15 1.00 cop table:rr, index:aid, dic, range: decided by [dt.aid dt.dic], keep order:false └─Selection_17 970.00 cop eq(rr.pt, "ios"), gt(rr.t, 1478185592) @@ -172,10 +172,10 @@ explain select pc,cr,count(DISTINCT uid) as pay_users,count(oid) as pay_times,su id count task operator info Projection_5 207.86 root test.pp.pc, test.pp.cr, 3_col_0, 3_col_1, 3_col_2 └─HashAgg_7 207.86 root group by:test.pp.cr, test.pp.pc, funcs:count(distinct test.pp.uid), count(test.pp.oid), sum(test.pp.am), firstrow(test.pp.pc), firstrow(test.pp.cr) - └─IndexLookUp_28 207.86 root - ├─IndexScan_22 627.00 cop table:pp, index:ps, range:[2,2], keep order:false - └─Selection_24 207.86 cop ge(test.pp.ppt, 1478188800), in(test.pp.pi, 510017, 520017), in(test.pp.uid, 18089709, 18090780), lt(test.pp.ppt, 1478275200) - └─TableScan_23 627.00 cop table:pp, keep order:false + └─IndexLookUp_24 207.86 root + ├─IndexScan_18 627.00 cop table:pp, index:ps, range:[2,2], keep order:false + └─Selection_20 207.86 cop ge(test.pp.ppt, 1478188800), in(test.pp.pi, 510017, 520017), in(test.pp.uid, 18089709, 18090780), lt(test.pp.ppt, 1478275200) + └─TableScan_19 627.00 cop table:pp, keep order:false drop table if exists tbl_001; CREATE TABLE tbl_001 (a int, b int); load stats 's/explain_complex_stats_tbl_001.json'; diff --git a/cmd/explaintest/r/explain_easy.result b/cmd/explaintest/r/explain_easy.result index 39d8baf9c85a6..8a610f05d0f18 100644 --- a/cmd/explaintest/r/explain_easy.result +++ b/cmd/explaintest/r/explain_easy.result @@ -38,8 +38,8 @@ TableReader_6 3333.33 root data:TableScan_5 └─TableScan_5 3333.33 cop table:t1, range:(0,+inf], keep order:false, stats:pseudo explain select t1.c1, t1.c2 from t1 where t1.c2 = 1; id count task operator info -IndexReader_9 10.00 root index:IndexScan_8 -└─IndexScan_8 10.00 cop table:t1, index:c2, range:[1,1], keep order:false, stats:pseudo +IndexReader_6 10.00 root index:IndexScan_5 +└─IndexScan_5 10.00 cop table:t1, index:c2, range:[1,1], keep order:false, stats:pseudo explain select * from t1 left join t2 on t1.c2 = t2.c1 where t1.c1 > 1; id count task operator info IndexJoin_12 4166.67 root left outer join, inner:IndexLookUp_11, outer key:test.t1.c2, inner key:test.t2.c1 @@ -89,12 +89,12 @@ TableReader_7 0.33 root data:Selection_6 explain select sum(t1.c1 in (select c1 from t2)) from t1; id count task operator info StreamAgg_12 1.00 root funcs:sum(col_0) -└─Projection_35 10000.00 root cast(5_aux_0) - └─MergeJoin_28 10000.00 root left outer semi join, left key:test.t1.c1, right key:test.t2.c1 +└─Projection_33 10000.00 root cast(5_aux_0) + └─MergeJoin_26 10000.00 root left outer semi join, left key:test.t1.c1, right key:test.t2.c1 ├─TableReader_19 10000.00 root data:TableScan_18 │ └─TableScan_18 10000.00 cop table:t1, range:[-inf,+inf], keep order:true, stats:pseudo - └─IndexReader_23 10000.00 root index:IndexScan_22 - └─IndexScan_22 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo + └─IndexReader_21 10000.00 root index:IndexScan_20 + └─IndexScan_20 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo explain select c1 from t1 where c1 in (select c2 from t2); id count task operator info Projection_9 9990.00 root test.t1.c1 @@ -113,9 +113,9 @@ Projection_12 10000.00 root k └─MergeJoin_14 10000.00 root left outer join, left key:test.t1.c1, right key:s.c1 ├─TableReader_17 10000.00 root data:TableScan_16 │ └─TableScan_16 10000.00 cop table:t1, range:[-inf,+inf], keep order:true, stats:pseudo - └─Projection_19 8000.00 root 1, s.c1 - └─TableReader_21 10000.00 root data:TableScan_20 - └─TableScan_20 10000.00 cop table:s, range:[-inf,+inf], keep order:true, stats:pseudo + └─Projection_18 8000.00 root 1, s.c1 + └─TableReader_20 10000.00 root data:TableScan_19 + └─TableScan_19 10000.00 cop table:s, range:[-inf,+inf], keep order:true, stats:pseudo explain select * from information_schema.columns; id count task operator info MemTableScan_4 10000.00 root @@ -134,8 +134,8 @@ Projection_12 10000.00 root eq(test.t1.c2, test.t2.c2) explain select * from t1 order by c1 desc limit 1; id count task operator info Limit_10 1.00 root offset:0, count:1 -└─TableReader_21 1.00 root data:Limit_20 - └─Limit_20 1.00 cop offset:0, count:1 +└─TableReader_20 1.00 root data:Limit_19 + └─Limit_19 1.00 cop offset:0, count:1 └─TableScan_18 1.00 cop table:t1, range:[-inf,+inf], keep order:true, desc, stats:pseudo explain select * from t4 use index(idx) where a > 1 and b > 1 and c > 1 limit 1; id count task operator info @@ -149,8 +149,8 @@ Limit_9 1.00 root offset:0, count:1 explain select * from t4 where a > 1 and c > 1 limit 1; id count task operator info Limit_8 1.00 root offset:0, count:1 -└─TableReader_15 1.00 root data:Limit_14 - └─Limit_14 1.00 cop offset:0, count:1 +└─TableReader_14 1.00 root data:Limit_13 + └─Limit_13 1.00 cop offset:0, count:1 └─Selection_12 1.00 cop gt(test.t4.c, 1) └─TableScan_11 3.00 cop table:t4, range:(1,+inf], keep order:false, stats:pseudo explain select ifnull(null, t1.c1) from t1; @@ -166,42 +166,42 @@ id count task operator info Union_17 26000.00 root ├─HashAgg_21 16000.00 root group by:c1, funcs:firstrow(join_agg_0) │ └─Union_22 16000.00 root -│ ├─StreamAgg_35 8000.00 root group by:col_2, funcs:firstrow(col_0), firstrow(col_1) -│ │ └─IndexReader_36 8000.00 root index:StreamAgg_26 +│ ├─StreamAgg_34 8000.00 root group by:col_2, funcs:firstrow(col_0), firstrow(col_1) +│ │ └─IndexReader_35 8000.00 root index:StreamAgg_26 │ │ └─StreamAgg_26 8000.00 cop group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1) -│ │ └─IndexScan_34 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo -│ └─StreamAgg_52 8000.00 root group by:col_2, funcs:firstrow(col_0), firstrow(col_1) -│ └─IndexReader_53 8000.00 root index:StreamAgg_43 -│ └─StreamAgg_43 8000.00 cop group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1) -│ └─IndexScan_51 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo -└─TableReader_59 10000.00 root data:TableScan_58 - └─TableScan_58 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo +│ │ └─IndexScan_33 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo +│ └─StreamAgg_49 8000.00 root group by:col_2, funcs:firstrow(col_0), firstrow(col_1) +│ └─IndexReader_50 8000.00 root index:StreamAgg_41 +│ └─StreamAgg_41 8000.00 cop group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1) +│ └─IndexScan_48 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo +└─TableReader_55 10000.00 root data:TableScan_54 + └─TableScan_54 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo explain select c1 from t2 union all select c1 from t2 union select c1 from t2; id count task operator info HashAgg_18 24000.00 root group by:c1, funcs:firstrow(join_agg_0) └─Union_19 24000.00 root - ├─StreamAgg_32 8000.00 root group by:col_2, funcs:firstrow(col_0), firstrow(col_1) - │ └─IndexReader_33 8000.00 root index:StreamAgg_23 + ├─StreamAgg_31 8000.00 root group by:col_2, funcs:firstrow(col_0), firstrow(col_1) + │ └─IndexReader_32 8000.00 root index:StreamAgg_23 │ └─StreamAgg_23 8000.00 cop group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1) - │ └─IndexScan_31 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo - ├─StreamAgg_49 8000.00 root group by:col_2, funcs:firstrow(col_0), firstrow(col_1) - │ └─IndexReader_50 8000.00 root index:StreamAgg_40 - │ └─StreamAgg_40 8000.00 cop group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1) - │ └─IndexScan_48 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo - └─StreamAgg_66 8000.00 root group by:col_2, funcs:firstrow(col_0), firstrow(col_1) - └─IndexReader_67 8000.00 root index:StreamAgg_57 - └─StreamAgg_57 8000.00 cop group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1) - └─IndexScan_65 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo + │ └─IndexScan_30 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo + ├─StreamAgg_46 8000.00 root group by:col_2, funcs:firstrow(col_0), firstrow(col_1) + │ └─IndexReader_47 8000.00 root index:StreamAgg_38 + │ └─StreamAgg_38 8000.00 cop group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1) + │ └─IndexScan_45 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo + └─StreamAgg_61 8000.00 root group by:col_2, funcs:firstrow(col_0), firstrow(col_1) + └─IndexReader_62 8000.00 root index:StreamAgg_53 + └─StreamAgg_53 8000.00 cop group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1) + └─IndexScan_60 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo set @@session.tidb_opt_insubq_to_join_and_agg=0; explain select sum(t1.c1 in (select c1 from t2)) from t1; id count task operator info StreamAgg_12 1.00 root funcs:sum(col_0) -└─Projection_35 10000.00 root cast(5_aux_0) - └─MergeJoin_28 10000.00 root left outer semi join, left key:test.t1.c1, right key:test.t2.c1 +└─Projection_33 10000.00 root cast(5_aux_0) + └─MergeJoin_26 10000.00 root left outer semi join, left key:test.t1.c1, right key:test.t2.c1 ├─TableReader_19 10000.00 root data:TableScan_18 │ └─TableScan_18 10000.00 cop table:t1, range:[-inf,+inf], keep order:true, stats:pseudo - └─IndexReader_23 10000.00 root index:IndexScan_22 - └─IndexScan_22 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo + └─IndexReader_21 10000.00 root index:IndexScan_20 + └─IndexScan_20 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo explain select 1 in (select c2 from t2) from t1; id count task operator info Projection_6 10000.00 root 5_aux_0 @@ -229,10 +229,10 @@ subgraph cluster12{ node [style=filled, color=lightgrey] color=black label = "root" -"StreamAgg_12" -> "Projection_35" -"Projection_35" -> "MergeJoin_28" -"MergeJoin_28" -> "TableReader_19" -"MergeJoin_28" -> "IndexReader_23" +"StreamAgg_12" -> "Projection_33" +"Projection_33" -> "MergeJoin_26" +"MergeJoin_26" -> "TableReader_19" +"MergeJoin_26" -> "IndexReader_21" } subgraph cluster18{ node [style=filled, color=lightgrey] @@ -240,14 +240,14 @@ color=black label = "cop" "TableScan_18" } -subgraph cluster22{ +subgraph cluster20{ node [style=filled, color=lightgrey] color=black label = "cop" -"IndexScan_22" +"IndexScan_20" } "TableReader_19" -> "TableScan_18" -"IndexReader_23" -> "IndexScan_22" +"IndexReader_21" -> "IndexScan_20" } explain format="dot" select 1 in (select c2 from t2) from t1; @@ -346,8 +346,8 @@ drop table if exists t; create table t(a bigint, b bigint, index idx(a, b)); explain select * from t where a in (1, 2) and a in (1, 3); id count task operator info -IndexReader_9 10.00 root index:IndexScan_8 -└─IndexScan_8 10.00 cop table:t, index:a, b, range:[1,1], keep order:false, stats:pseudo +IndexReader_6 10.00 root index:IndexScan_5 +└─IndexScan_5 10.00 cop table:t, index:a, b, range:[1,1], keep order:false, stats:pseudo explain select * from t where b in (1, 2) and b in (1, 3); id count task operator info TableReader_7 10.00 root data:Selection_6 @@ -355,8 +355,8 @@ TableReader_7 10.00 root data:Selection_6 └─TableScan_5 10000.00 cop table:t, range:[-inf,+inf], keep order:false, stats:pseudo explain select * from t where a = 1 and a = 1; id count task operator info -IndexReader_9 10.00 root index:IndexScan_8 -└─IndexScan_8 10.00 cop table:t, index:a, b, range:[1,1], keep order:false, stats:pseudo +IndexReader_6 10.00 root index:IndexScan_5 +└─IndexScan_5 10.00 cop table:t, index:a, b, range:[1,1], keep order:false, stats:pseudo explain select * from t where a = 1 and a = 2; id count task operator info TableDual_5 0.00 root rows:0 @@ -412,10 +412,10 @@ TableReader_7 10000.00 root data:TableScan_6 └─TableScan_6 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo explain select distinct t1.a, t1.b from t1 left outer join t2 on t1.a = t2.a; id count task operator info -StreamAgg_19 8000.00 root group by:col_2, col_3, funcs:firstrow(col_0), firstrow(col_1) -└─IndexReader_20 8000.00 root index:StreamAgg_10 +StreamAgg_18 8000.00 root group by:col_2, col_3, funcs:firstrow(col_0), firstrow(col_1) +└─IndexReader_19 8000.00 root index:StreamAgg_10 └─StreamAgg_10 8000.00 cop group by:test.t1.a, test.t1.b, funcs:firstrow(test.t1.a), firstrow(test.t1.b) - └─IndexScan_18 10000.00 cop table:t1, index:a, b, range:[NULL,+inf], keep order:true, stats:pseudo + └─IndexScan_17 10000.00 cop table:t1, index:a, b, range:[NULL,+inf], keep order:true, stats:pseudo drop table if exists t; create table t(a int, nb int not null, nc int not null); explain select ifnull(a, 0) from t; diff --git a/cmd/explaintest/r/explain_easy_stats.result b/cmd/explaintest/r/explain_easy_stats.result index e25c9b6336b11..f3e9f0a25ce83 100644 --- a/cmd/explaintest/r/explain_easy_stats.result +++ b/cmd/explaintest/r/explain_easy_stats.result @@ -41,8 +41,8 @@ TableReader_6 1999.00 root data:TableScan_5 └─TableScan_5 1999.00 cop table:t1, range:(0,+inf], keep order:false explain select t1.c1, t1.c2 from t1 where t1.c2 = 1; id count task operator info -IndexReader_9 0.00 root index:IndexScan_8 -└─IndexScan_8 0.00 cop table:t1, index:c2, range:[1,1], keep order:false +IndexReader_6 0.00 root index:IndexScan_5 +└─IndexScan_5 0.00 cop table:t1, index:c2, range:[1,1], keep order:false explain select * from t1 left join t2 on t1.c2 = t2.c1 where t1.c1 > 1; id count task operator info MergeJoin_7 2481.25 root left outer join, left key:test.t1.c2, right key:test.t2.c1 @@ -120,8 +120,8 @@ Projection_12 1999.00 root eq(test.t1.c2, test.t2.c2) explain select * from t1 order by c1 desc limit 1; id count task operator info Limit_10 1.00 root offset:0, count:1 -└─TableReader_21 1.00 root data:Limit_20 - └─Limit_20 1.00 cop offset:0, count:1 +└─TableReader_20 1.00 root data:Limit_19 + └─Limit_19 1.00 cop offset:0, count:1 └─TableScan_18 1.00 cop table:t1, range:[-inf,+inf], keep order:true, desc set @@session.tidb_opt_insubq_to_join_and_agg=0; explain select 1 in (select c2 from t2) from t1; @@ -200,5 +200,5 @@ create table tbl(column1 int, column2 int, index idx(column1, column2)); load stats 's/explain_easy_stats_tbl_dnf.json'; explain select * from tbl where (column1=0 and column2=1) or (column1=1 and column2=3) or (column1=2 and column2=5); id count task operator info -IndexReader_9 3.00 root index:IndexScan_8 -└─IndexScan_8 3.00 cop table:tbl, index:column1, column2, range:[0 1,0 1], [1 3,1 3], [2 5,2 5], keep order:false +IndexReader_6 3.00 root index:IndexScan_5 +└─IndexScan_5 3.00 cop table:tbl, index:column1, column2, range:[0 1,0 1], [1 3,1 3], [2 5,2 5], keep order:false diff --git a/cmd/explaintest/r/select.result b/cmd/explaintest/r/select.result index 8a78bcb99d2bd..14a457adbfd85 100644 --- a/cmd/explaintest/r/select.result +++ b/cmd/explaintest/r/select.result @@ -249,30 +249,30 @@ insert t values(0,0,0); explain select distinct b from t group by a; id count task operator info HashAgg_7 8000.00 root group by:test.t.b, funcs:firstrow(test.t.b) -└─StreamAgg_20 8000.00 root group by:col_1, funcs:firstrow(col_0) - └─IndexReader_21 8000.00 root index:StreamAgg_11 +└─StreamAgg_19 8000.00 root group by:col_1, funcs:firstrow(col_0) + └─IndexReader_20 8000.00 root index:StreamAgg_11 └─StreamAgg_11 8000.00 cop group by:test.t.a, funcs:firstrow(test.t.b) - └─IndexScan_19 10000.00 cop table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo + └─IndexScan_18 10000.00 cop table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo select distinct b from t group by a; b 0 explain select count(b) from t group by a; id count task operator info -StreamAgg_17 8000.00 root group by:col_1, funcs:count(col_0) -└─IndexReader_18 8000.00 root index:StreamAgg_8 +StreamAgg_16 8000.00 root group by:col_1, funcs:count(col_0) +└─IndexReader_17 8000.00 root index:StreamAgg_8 └─StreamAgg_8 8000.00 cop group by:test.t.a, funcs:count(test.t.b) - └─IndexScan_16 10000.00 cop table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo + └─IndexScan_15 10000.00 cop table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo select count(b) from t group by a; count(b) 1 insert t values(1,1,1),(3,3,6),(3,2,5),(2,1,4),(1,1,3),(1,1,2); explain select count(a) from t where b>0 group by a, b; id count task operator info -StreamAgg_21 2666.67 root group by:col_1, col_2, funcs:count(col_0) -└─IndexReader_22 2666.67 root index:StreamAgg_9 +StreamAgg_20 2666.67 root group by:col_1, col_2, funcs:count(col_0) +└─IndexReader_21 2666.67 root index:StreamAgg_9 └─StreamAgg_9 2666.67 cop group by:test.t.a, test.t.b, funcs:count(test.t.a) - └─Selection_20 3333.33 cop gt(test.t.b, 0) - └─IndexScan_19 10000.00 cop table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo + └─Selection_19 3333.33 cop gt(test.t.b, 0) + └─IndexScan_18 10000.00 cop table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo select count(a) from t where b>0 group by a, b; count(a) 3 @@ -282,11 +282,11 @@ count(a) explain select count(a) from t where b>0 group by a, b order by a; id count task operator info Projection_7 2666.67 root count(a) -└─StreamAgg_33 2666.67 root group by:col_2, col_3, funcs:count(col_0), firstrow(col_1) - └─IndexReader_34 2666.67 root index:StreamAgg_31 - └─StreamAgg_31 2666.67 cop group by:test.t.a, test.t.b, funcs:count(test.t.a), firstrow(test.t.a) - └─Selection_24 3333.33 cop gt(test.t.b, 0) - └─IndexScan_23 10000.00 cop table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo +└─StreamAgg_31 2666.67 root group by:col_2, col_3, funcs:count(col_0), firstrow(col_1) + └─IndexReader_32 2666.67 root index:StreamAgg_29 + └─StreamAgg_29 2666.67 cop group by:test.t.a, test.t.b, funcs:count(test.t.a), firstrow(test.t.a) + └─Selection_23 3333.33 cop gt(test.t.b, 0) + └─IndexScan_22 10000.00 cop table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo select count(a) from t where b>0 group by a, b order by a; count(a) 3 @@ -297,11 +297,11 @@ explain select count(a) from t where b>0 group by a, b order by a limit 1; id count task operator info Projection_9 1.00 root count(a) └─Limit_15 1.00 root offset:0, count:1 - └─StreamAgg_42 1.00 root group by:col_2, col_3, funcs:count(col_0), firstrow(col_1) - └─IndexReader_43 1.00 root index:StreamAgg_37 - └─StreamAgg_37 1.00 cop group by:test.t.a, test.t.b, funcs:count(test.t.a), firstrow(test.t.a) - └─Selection_41 1.25 cop gt(test.t.b, 0) - └─IndexScan_40 3.75 cop table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo + └─StreamAgg_39 1.00 root group by:col_2, col_3, funcs:count(col_0), firstrow(col_1) + └─IndexReader_40 1.00 root index:StreamAgg_35 + └─StreamAgg_35 1.00 cop group by:test.t.a, test.t.b, funcs:count(test.t.a), firstrow(test.t.a) + └─Selection_38 1.25 cop gt(test.t.b, 0) + └─IndexScan_37 3.75 cop table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo select count(a) from t where b>0 group by a, b order by a limit 1; count(a) 3 diff --git a/cmd/explaintest/r/topn_push_down.result b/cmd/explaintest/r/topn_push_down.result index 244943aecec90..e7700f7c17ea2 100644 --- a/cmd/explaintest/r/topn_push_down.result +++ b/cmd/explaintest/r/topn_push_down.result @@ -169,18 +169,18 @@ LIMIT 0, 5; id count task operator info Projection_13 0.00 root te.expect_time └─Limit_19 0.00 root offset:0, count:5 - └─IndexJoin_143 0.00 root left outer join, inner:IndexReader_142, outer key:tr.id, inner key:p.relate_id - ├─TopN_146 0.00 root te.expect_time:asc, offset:0, count:5 + └─IndexJoin_104 0.00 root left outer join, inner:IndexReader_103, outer key:tr.id, inner key:p.relate_id + ├─TopN_107 0.00 root te.expect_time:asc, offset:0, count:5 │ └─IndexJoin_36 0.00 root inner join, inner:IndexLookUp_35, outer key:tr.id, inner key:te.trade_id - │ ├─IndexLookUp_107 0.00 root - │ │ ├─Selection_105 0.00 cop eq(tr.business_type, 18), in(tr.trade_type, 1) - │ │ │ └─IndexScan_103 10.00 cop table:tr, index:shop_identy, trade_status, business_type, trade_pay_status, trade_type, delivery_type, source, biz_date, range:[810094178,810094178], keep order:false, stats:pseudo - │ │ └─Selection_106 0.00 cop eq(tr.brand_identy, 32314), eq(tr.domain_type, 2) - │ │ └─TableScan_104 0.00 cop table:tr, keep order:false + │ ├─IndexLookUp_85 0.00 root + │ │ ├─Selection_83 0.00 cop eq(tr.business_type, 18), in(tr.trade_type, 1) + │ │ │ └─IndexScan_81 10.00 cop table:tr, index:shop_identy, trade_status, business_type, trade_pay_status, trade_type, delivery_type, source, biz_date, range:[810094178,810094178], keep order:false, stats:pseudo + │ │ └─Selection_84 0.00 cop eq(tr.brand_identy, 32314), eq(tr.domain_type, 2) + │ │ └─TableScan_82 0.00 cop table:tr, keep order:false │ └─IndexLookUp_35 250.00 root │ ├─IndexScan_32 10.00 cop table:te, index:trade_id, range: decided by [tr.id], keep order:false, stats:pseudo │ └─Selection_34 250.00 cop ge(te.expect_time, 2018-04-23 00:00:00.000000), le(te.expect_time, 2018-04-23 23:59:59.000000) │ └─TableScan_33 10.00 cop table:te, keep order:false, stats:pseudo - └─IndexReader_142 0.00 root index:Selection_141 - └─Selection_141 0.00 cop not(isnull(p.relate_id)) - └─IndexScan_140 10.00 cop table:p, index:relate_id, range: decided by [tr.id], keep order:false, stats:pseudo + └─IndexReader_103 0.00 root index:Selection_102 + └─Selection_102 0.00 cop not(isnull(p.relate_id)) + └─IndexScan_101 10.00 cop table:p, index:relate_id, range: decided by [tr.id], keep order:false, stats:pseudo diff --git a/cmd/explaintest/r/tpch.result b/cmd/explaintest/r/tpch.result index c32849cbd666d..d3c7dc4a3eb0c 100644 --- a/cmd/explaintest/r/tpch.result +++ b/cmd/explaintest/r/tpch.result @@ -930,16 +930,16 @@ id count task operator info Sort_13 3863988.24 root supplier_cnt:desc, tpch.part.p_brand:asc, tpch.part.p_type:asc, tpch.part.p_size:asc └─Projection_15 3863988.24 root tpch.part.p_brand, tpch.part.p_type, tpch.part.p_size, 9_col_0 └─HashAgg_18 3863988.24 root group by:tpch.part.p_brand, tpch.part.p_size, tpch.part.p_type, funcs:count(distinct tpch.partsupp.ps_suppkey), firstrow(tpch.part.p_brand), firstrow(tpch.part.p_type), firstrow(tpch.part.p_size) - └─HashLeftJoin_23 3863988.24 root anti semi join, inner:TableReader_47, equal:[eq(tpch.partsupp.ps_suppkey, tpch.supplier.s_suppkey)] + └─HashLeftJoin_23 3863988.24 root anti semi join, inner:TableReader_46, equal:[eq(tpch.partsupp.ps_suppkey, tpch.supplier.s_suppkey)] ├─IndexJoin_27 4829985.30 root inner join, inner:IndexReader_26, outer key:tpch.part.p_partkey, inner key:tpch.partsupp.ps_partkey - │ ├─TableReader_42 1200618.43 root data:Selection_41 - │ │ └─Selection_41 1200618.43 cop in(tpch.part.p_size, 48, 19, 12, 4, 41, 7, 21, 39), ne(tpch.part.p_brand, "Brand#34"), not(like(tpch.part.p_type, "LARGE BRUSHED%", 92)) - │ │ └─TableScan_40 10000000.00 cop table:part, range:[-inf,+inf], keep order:false + │ ├─TableReader_41 1200618.43 root data:Selection_40 + │ │ └─Selection_40 1200618.43 cop in(tpch.part.p_size, 48, 19, 12, 4, 41, 7, 21, 39), ne(tpch.part.p_brand, "Brand#34"), not(like(tpch.part.p_type, "LARGE BRUSHED%", 92)) + │ │ └─TableScan_39 10000000.00 cop table:part, range:[-inf,+inf], keep order:false │ └─IndexReader_26 1.00 root index:IndexScan_25 │ └─IndexScan_25 1.00 cop table:partsupp, index:PS_PARTKEY, PS_SUPPKEY, range: decided by [tpch.part.p_partkey], keep order:false - └─TableReader_47 400000.00 root data:Selection_46 - └─Selection_46 400000.00 cop like(tpch.supplier.s_comment, "%Customer%Complaints%", 92) - └─TableScan_45 500000.00 cop table:supplier, range:[-inf,+inf], keep order:false + └─TableReader_46 400000.00 root data:Selection_45 + └─Selection_45 400000.00 cop like(tpch.supplier.s_comment, "%Customer%Complaints%", 92) + └─TableScan_44 500000.00 cop table:supplier, range:[-inf,+inf], keep order:false /* Q17 Small-Quantity-Order Revenue Query This query determines how much average yearly revenue would be lost if orders were no longer filled for small diff --git a/cmd/explaintest/r/window_function.result b/cmd/explaintest/r/window_function.result index c1da15629e80b..73fcf3a905e9e 100644 --- a/cmd/explaintest/r/window_function.result +++ b/cmd/explaintest/r/window_function.result @@ -12,40 +12,40 @@ explain select sum(a) over(partition by a) from t; id count task operator info Projection_7 10000.00 root sum(a) over(partition by a) └─Window_8 10000.00 root sum(cast(test.t.a)) over(partition by test.t.a) - └─IndexReader_11 10000.00 root index:IndexScan_10 - └─IndexScan_10 10000.00 cop table:t, index:a, range:[NULL,+inf], keep order:true, stats:pseudo + └─IndexReader_10 10000.00 root index:IndexScan_9 + └─IndexScan_9 10000.00 cop table:t, index:a, range:[NULL,+inf], keep order:true, stats:pseudo explain select sum(a) over(partition by a order by b) from t; id count task operator info Projection_7 10000.00 root sum(a) over(partition by a order by b) └─Window_8 10000.00 root sum(cast(test.t.a)) over(partition by test.t.a order by test.t.b asc) - └─Sort_14 10000.00 root test.t.a:asc, test.t.b:asc - └─TableReader_13 10000.00 root data:TableScan_12 - └─TableScan_12 10000.00 cop table:t, range:[-inf,+inf], keep order:false, stats:pseudo + └─Sort_12 10000.00 root test.t.a:asc, test.t.b:asc + └─TableReader_11 10000.00 root data:TableScan_10 + └─TableScan_10 10000.00 cop table:t, range:[-inf,+inf], keep order:false, stats:pseudo explain select sum(a) over(partition by a order by b rows unbounded preceding) from t; id count task operator info Projection_7 10000.00 root sum(a) over(partition by a order by b rows unbounded preceding) └─Window_8 10000.00 root sum(cast(test.t.a)) over(partition by test.t.a order by test.t.b asc rows between unbounded preceding and current row) - └─Sort_14 10000.00 root test.t.a:asc, test.t.b:asc - └─TableReader_13 10000.00 root data:TableScan_12 - └─TableScan_12 10000.00 cop table:t, range:[-inf,+inf], keep order:false, stats:pseudo + └─Sort_12 10000.00 root test.t.a:asc, test.t.b:asc + └─TableReader_11 10000.00 root data:TableScan_10 + └─TableScan_10 10000.00 cop table:t, range:[-inf,+inf], keep order:false, stats:pseudo explain select sum(a) over(partition by a order by b rows between 1 preceding and 1 following) from t; id count task operator info Projection_7 10000.00 root sum(a) over(partition by a order by b rows between 1 preceding and 1 following) └─Window_8 10000.00 root sum(cast(test.t.a)) over(partition by test.t.a order by test.t.b asc rows between 1 preceding and 1 following) - └─Sort_14 10000.00 root test.t.a:asc, test.t.b:asc - └─TableReader_13 10000.00 root data:TableScan_12 - └─TableScan_12 10000.00 cop table:t, range:[-inf,+inf], keep order:false, stats:pseudo + └─Sort_12 10000.00 root test.t.a:asc, test.t.b:asc + └─TableReader_11 10000.00 root data:TableScan_10 + └─TableScan_10 10000.00 cop table:t, range:[-inf,+inf], keep order:false, stats:pseudo explain select sum(a) over(partition by a order by b range between 1 preceding and 1 following) from t; id count task operator info Projection_7 10000.00 root sum(a) over(partition by a order by b range between 1 preceding and 1 following) └─Window_8 10000.00 root sum(cast(test.t.a)) over(partition by test.t.a order by test.t.b asc range between 1 preceding and 1 following) - └─Sort_14 10000.00 root test.t.a:asc, test.t.b:asc - └─TableReader_13 10000.00 root data:TableScan_12 - └─TableScan_12 10000.00 cop table:t, range:[-inf,+inf], keep order:false, stats:pseudo + └─Sort_12 10000.00 root test.t.a:asc, test.t.b:asc + └─TableReader_11 10000.00 root data:TableScan_10 + └─TableScan_10 10000.00 cop table:t, range:[-inf,+inf], keep order:false, stats:pseudo explain select sum(a) over(partition by a order by c range between interval '2:30' minute_second preceding and interval '2:30' minute_second following) from t; id count task operator info Projection_7 10000.00 root sum(a) over(partition by a order by c range between interval '2:30' minute_second preceding and interval '2:30' minute_second following) └─Window_8 10000.00 root sum(cast(test.t.a)) over(partition by test.t.a order by test.t.c asc range between interval "2:30" "MINUTE_SECOND" preceding and interval "2:30" "MINUTE_SECOND" following) - └─Sort_14 10000.00 root test.t.a:asc, test.t.c:asc - └─TableReader_13 10000.00 root data:TableScan_12 - └─TableScan_12 10000.00 cop table:t, range:[-inf,+inf], keep order:false, stats:pseudo + └─Sort_12 10000.00 root test.t.a:asc, test.t.c:asc + └─TableReader_11 10000.00 root data:TableScan_10 + └─TableScan_10 10000.00 cop table:t, range:[-inf,+inf], keep order:false, stats:pseudo diff --git a/expression/util.go b/expression/util.go index 01ee88c094c9b..e6a2bec438ce3 100644 --- a/expression/util.go +++ b/expression/util.go @@ -92,6 +92,26 @@ func extractColumns(result []*Column, expr Expression, filter func(*Column) bool return result } +// ExtractColumnSet extract columns that occurred in the exprs. +func ExtractColumnSet(exprs []Expression) map[int64]struct{} { + set := make(map[int64]struct{}) + for _, expr := range exprs { + extractColumnSet(expr, set) + } + return set +} + +func extractColumnSet(expr Expression, set map[int64]struct{}) { + switch v := expr.(type) { + case *Column: + set[v.UniqueID] = struct{}{} + case *ScalarFunction: + for _, arg := range v.GetArgs() { + extractColumnSet(arg, set) + } + } +} + // ColumnSubstitute substitutes the columns in filter to expressions in select fields. // e.g. select * from (select b as a from t) k where a < 10 => select * from (select b as a from t where b < 10) k. func ColumnSubstitute(expr Expression, schema *Schema, newExprs []Expression) Expression { diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index bfefc57b32f71..8e3c7c5ae4076 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -196,6 +196,139 @@ func (ds *DataSource) tryToGetDualTask() (task, error) { return nil, nil } +// candidatePath is used to maintain required info for skyline pruning. +type candidatePath struct { + path *accessPath + columnSet map[int64]struct{} // columnSet is the set of columns that occurred in the access conditions. + singleScan bool + matchProp bool +} + +// compareColumnSet will compares the two set. The last return value is used to indicate +// if they are comparable, it is false when both two sets have columns that do not occur in the other. +func compareColumnSet(l, r map[int64]struct{}) (int, bool) { + if len(l) <= len(r) { + for key := range l { + if _, ok := r[key]; !ok { + return 0, false + } + } + if len(l) == len(r) { + return 0, true + } + return -1, true + } + for key := range r { + if _, ok := l[key]; !ok { + return 0, false + } + } + return 1, true +} + +func compareBool(l, r bool) int { + if l == r { + return 0 + } + if l == false { + return -1 + } + return 1 +} + +// compareCandidates is the core of skyline pruning. It compares the two candidate paths on three dimensions: +// the set of columns that occurred in the access condition, whether or not it matches the physical property +// and does it require a double scan. If `x` is not worse than `y` at all factors, +// and there exists one factor that `x` is better than `y`, then we `x` is better than `y`. +func compareCandidates(lhs, rhs *candidatePath) int { + setsResult, comparable := compareColumnSet(lhs.columnSet, rhs.columnSet) + if !comparable { + return 0 + } + scanResult := compareBool(lhs.singleScan, rhs.singleScan) + matchResult := compareBool(lhs.matchProp, rhs.matchProp) + sum := setsResult + scanResult + matchResult + if setsResult >= 0 && scanResult >= 0 && matchResult >= 0 && sum > 0 { + return 1 + } + if setsResult <= 0 && scanResult <= 0 && matchResult <= 0 && sum < 0 { + return -1 + } + return 0 +} + +func (ds *DataSource) getTableCandidate(path *accessPath, prop *property.PhysicalProperty) *candidatePath { + point := &candidatePath{path: path} + if ds.tableInfo.PKIsHandle { + if pkColInfo := ds.tableInfo.GetPkColInfo(); pkColInfo != nil { + pkCol := expression.ColInfo2Col(ds.schema.Columns, pkColInfo) + point.matchProp = len(prop.Items) == 1 && pkCol != nil && prop.Items[0].Col.Equal(nil, pkCol) + } + } + point.columnSet = expression.ExtractColumnSet(path.accessConds) + point.singleScan = true + return point +} + +func (ds *DataSource) getIndexCandidate(path *accessPath, prop *property.PhysicalProperty) *candidatePath { + point := &candidatePath{path: path} + all, _ := prop.AllSameOrder() + if !prop.IsEmpty() && all { + for i, col := range path.index.Columns { + // not matched + if col.Name.L == prop.Items[0].Col.ColName.L { + point.matchProp = matchIndicesProp(path.index.Columns[i:], prop.Items) + break + } else if i >= path.eqCondCount { + break + } + } + } + point.columnSet = expression.ExtractColumnSet(path.accessConds) + point.singleScan = isCoveringIndex(ds.schema.Columns, path.index.Columns, ds.tableInfo.PKIsHandle) + return point +} + +func (ds *DataSource) skylinePruning(prop *property.PhysicalProperty) []*candidatePath { + candidates := make([]*candidatePath, 0, 4) + for _, path := range ds.possibleAccessPaths { + // if we already know the range of the scan is empty, just return a TableDual + if len(path.ranges) == 0 && !ds.ctx.GetSessionVars().StmtCtx.UseCache { + return []*candidatePath{{path: path}} + } + var currentCandidate *candidatePath + if path.isTablePath { + currentCandidate = ds.getTableCandidate(path, prop) + } else { + // We will use index to generate physical plan if: + // this path's access cond is not nil or + // we have prop to match or + // this index is forced to choose. + if len(path.accessConds) > 0 || len(prop.Items) > 0 || path.forced { + currentCandidate = ds.getIndexCandidate(path, prop) + } + } + if currentCandidate == nil { + continue + } + pruned := false + for i := len(candidates) - 1; i >= 0; i-- { + result := compareCandidates(candidates[i], currentCandidate) + if result == 1 { + pruned = true + // We can break here because the current candidate cannot prune others anymore. + break + } else if result == -1 { + candidates = append(candidates[:i], candidates[i+1:]...) + } + } + if !pruned { + candidates = append(candidates, currentCandidate) + } + } + return candidates +} + // findBestTask implements the PhysicalPlan interface. // It will enumerate all the available indices and choose a plan with least cost. func (ds *DataSource) findBestTask(prop *property.PhysicalProperty) (t task, err error) { @@ -250,7 +383,9 @@ func (ds *DataSource) findBestTask(prop *property.PhysicalProperty) (t task, err t = invalidTask - for _, path := range ds.possibleAccessPaths { + candidates := ds.skylinePruning(prop) + for _, candidate := range candidates { + path := candidate.path // if we already know the range of the scan is empty, just return a TableDual if len(path.ranges) == 0 && !ds.ctx.GetSessionVars().StmtCtx.UseCache { dual := PhysicalTableDual{}.Init(ds.ctx, ds.stats) @@ -260,7 +395,7 @@ func (ds *DataSource) findBestTask(prop *property.PhysicalProperty) (t task, err }, nil } if path.isTablePath { - tblTask, err := ds.convertToTableScan(prop, path) + tblTask, err := ds.convertToTableScan(prop, path, candidate.matchProp) if err != nil { return nil, errors.Trace(err) } @@ -269,18 +404,12 @@ func (ds *DataSource) findBestTask(prop *property.PhysicalProperty) (t task, err } continue } - // We will use index to generate physical plan if: - // this path's access cond is not nil or - // we have prop to match or - // this index is forced to choose. - if len(path.accessConds) > 0 || len(prop.Items) > 0 || path.forced { - idxTask, err := ds.convertToIndexScan(prop, path) - if err != nil { - return nil, errors.Trace(err) - } - if idxTask.cost() < t.cost() { - t = idxTask - } + idxTask, err := ds.convertToIndexScan(prop, path, candidate.matchProp, !candidate.singleScan) + if err != nil { + return nil, errors.Trace(err) + } + if idxTask.cost() < t.cost() { + t = idxTask } } return @@ -322,7 +451,7 @@ func (ts *PhysicalTableScan) appendExtraHandleCol(ds *DataSource) { } // convertToIndexScan converts the DataSource to index scan with idx. -func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, path *accessPath) (task task, err error) { +func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, path *accessPath, matchProperty, needDoubleScan bool) (task task, err error) { idx := path.index is := PhysicalIndexScan{ Table: ds.tableInfo, @@ -345,7 +474,7 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, path * } rowCount := path.countAfterAccess cop := &copTask{indexPlan: is} - if !isCoveringIndex(ds.schema.Columns, is.Index.Columns, is.Table.PKIsHandle) { + if needDoubleScan { // If it's parent requires single read task, return max cost. if prop.TaskTp == property.CopSingleReadTaskType { return invalidTask, nil @@ -364,20 +493,6 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, path * return invalidTask, nil } is.initSchema(ds.id, idx, cop.tablePlan != nil) - // Check if this plan matches the property. - matchProperty := false - all, desc := prop.AllSameOrder() - if !prop.IsEmpty() && all { - for i, col := range idx.Columns { - // not matched - if col.Name.L == prop.Items[0].Col.ColName.L { - matchProperty = matchIndicesProp(idx.Columns[i:], prop.Items) - break - } else if i >= path.eqCondCount { - break - } - } - } // Only use expectedCnt when it's smaller than the count we calculated. // e.g. IndexScan(count1)->After Filter(count2). The `ds.stats.RowCount` is count2. count1 is the one we need to calculate // If expectedCnt and count2 are both zero and we go into the below `if` block, the count1 will be set to zero though it's shouldn't be. @@ -390,7 +505,7 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, path * cop.cst = rowCount * scanFactor task = cop if matchProperty { - if desc { + if prop.Items[0].Desc { is.Desc = true cop.cst = rowCount * descScanFactor } @@ -500,7 +615,7 @@ func splitIndexFilterConditions(conditions []expression.Expression, indexColumns } // convertToTableScan converts the DataSource to table scan. -func (ds *DataSource) convertToTableScan(prop *property.PhysicalProperty, path *accessPath) (task task, err error) { +func (ds *DataSource) convertToTableScan(prop *property.PhysicalProperty, path *accessPath, matchProperty bool) (task task, err error) { // It will be handled in convertToIndexScan. if prop.TaskTp == property.CopDoubleReadTaskType { return invalidTask, nil @@ -515,10 +630,8 @@ func (ds *DataSource) convertToTableScan(prop *property.PhysicalProperty, path * physicalTableID: ds.physicalTableID, }.Init(ds.ctx) ts.SetSchema(ds.schema) - var pkCol *expression.Column if ts.Table.PKIsHandle { if pkColInfo := ts.Table.GetPkColInfo(); pkColInfo != nil { - pkCol = expression.ColInfo2Col(ts.schema.Columns, pkColInfo) if ds.statisticTable.Columns[pkColInfo.ID] != nil { ts.Hist = &ds.statisticTable.Columns[pkColInfo.ID].Histogram } @@ -532,7 +645,6 @@ func (ds *DataSource) convertToTableScan(prop *property.PhysicalProperty, path * indexPlanFinished: true, } task = copTask - matchProperty := len(prop.Items) == 1 && pkCol != nil && prop.Items[0].Col.Equal(nil, pkCol) // Only use expectedCnt when it's smaller than the count we calculated. // e.g. IndexScan(count1)->After Filter(count2). The `ds.stats.RowCount` is count2. count1 is the one we need to calculate // If expectedCnt and count2 are both zero and we go into the below `if` block, the count1 will be set to zero though it's shouldn't be. diff --git a/planner/core/logical_plan_test.go b/planner/core/logical_plan_test.go index 649335dd0ab2c..7fad6bb5c8dac 100644 --- a/planner/core/logical_plan_test.go +++ b/planner/core/logical_plan_test.go @@ -16,6 +16,7 @@ package core import ( "fmt" "sort" + "strings" "testing" . "github.com/pingcap/check" @@ -26,6 +27,7 @@ import ( "github.com/pingcap/parser/terror" "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/infoschema" + "github.com/pingcap/tidb/planner/property" "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/util/testleak" ) @@ -2220,3 +2222,101 @@ func (s *testPlanSuite) TestWindowFunction(c *C) { c.Assert(ToString(p), Equals, tt.result, comment) } } + +func byItemsToProperty(byItems []*ByItems) *property.PhysicalProperty { + pp := &property.PhysicalProperty{} + for _, item := range byItems { + pp.Items = append(pp.Items, property.Item{Col: item.Expr.(*expression.Column), Desc: item.Desc}) + } + return pp +} + +func pathsName(paths []*candidatePath) string { + var names []string + for _, path := range paths { + if path.path.isTablePath { + names = append(names, "PRIMARY_KEY") + } else { + names = append(names, path.path.index.Name.O) + } + } + return strings.Join(names, ",") +} + +func (s *testPlanSuite) TestSkylinePruning(c *C) { + defer testleak.AfterTest(c)() + tests := []struct { + sql string + result string + }{ + { + sql: "select * from t", + result: "PRIMARY_KEY", + }, + { + sql: "select * from t order by f", + result: "PRIMARY_KEY,f,f_g", + }, + { + sql: "select * from t where a > 1", + result: "PRIMARY_KEY", + }, + { + sql: "select * from t where a > 1 order by f", + result: "PRIMARY_KEY,f,f_g", + }, + { + sql: "select * from t where f > 1", + result: "PRIMARY_KEY,f,f_g", + }, + { + sql: "select f from t where f > 1", + result: "f,f_g", + }, + { + sql: "select f from t where f > 1 order by a", + result: "PRIMARY_KEY,f,f_g", + }, + { + sql: "select * from t where f > 1 and g > 1", + result: "PRIMARY_KEY,f,g,f_g", + }, + } + for i, tt := range tests { + comment := Commentf("case:%v sql:%s", i, tt.sql) + stmt, err := s.ParseOneStmt(tt.sql, "", "") + c.Assert(err, IsNil, comment) + Preprocess(s.ctx, stmt, s.is, false) + builder := &PlanBuilder{ + ctx: MockContext(), + is: s.is, + colMapper: make(map[*ast.ColumnNameExpr]int), + } + p, err := builder.Build(stmt) + if err != nil { + c.Assert(err.Error(), Equals, tt.result, comment) + continue + } + c.Assert(err, IsNil) + p, err = logicalOptimize(builder.optFlag, p.(LogicalPlan)) + c.Assert(err, IsNil) + lp := p.(LogicalPlan) + _, err = lp.recursiveDeriveStats() + c.Assert(err, IsNil) + var ds *DataSource + var byItems []*ByItems + for ds == nil { + switch v := lp.(type) { + case *DataSource: + ds = v + case *LogicalSort: + byItems = v.ByItems + lp = lp.Children()[0] + default: + lp = lp.Children()[0] + } + } + paths := ds.skylinePruning(byItemsToProperty(byItems)) + c.Assert(pathsName(paths), Equals, tt.result) + } +} diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go index 7956dff6741b5..bda22ff528829 100644 --- a/statistics/selectivity_test.go +++ b/statistics/selectivity_test.go @@ -206,8 +206,8 @@ func (s *testStatsSuite) TestDiscreteDistribution(c *C) { } testKit.MustExec("analyze table t") testKit.MustQuery("explain select * from t where a = 'tw' and b < 0").Check(testkit.Rows( - "IndexReader_9 0.00 root index:IndexScan_8", - "└─IndexScan_8 0.00 cop table:t, index:a, b, range:[\"tw\" -inf,\"tw\" 0), keep order:false")) + "IndexReader_6 0.00 root index:IndexScan_5", + "└─IndexScan_5 0.00 cop table:t, index:a, b, range:[\"tw\" -inf,\"tw\" 0), keep order:false")) } func (s *testStatsSuite) TestSelectCombinedLowBound(c *C) { @@ -219,8 +219,8 @@ func (s *testStatsSuite) TestSelectCombinedLowBound(c *C) { testKit.MustExec("insert into t (kid, pid) values (1,2), (1,3), (1,4),(1, 11), (1, 12), (1, 13), (1, 14), (2, 2), (2, 3), (2, 4)") testKit.MustExec("analyze table t") testKit.MustQuery("explain select * from t where kid = 1").Check(testkit.Rows( - "IndexReader_9 7.00 root index:IndexScan_8", - "└─IndexScan_8 7.00 cop table:t, index:kid, pid, range:[1,1], keep order:false")) + "IndexReader_6 7.00 root index:IndexScan_5", + "└─IndexScan_5 7.00 cop table:t, index:kid, pid, range:[1,1], keep order:false")) } func getRange(start, end int64) []*ranger.Range { diff --git a/util/ranger/ranger_test.go b/util/ranger/ranger_test.go index 79c29b0b68866..49a29e6a1d677 100644 --- a/util/ranger/ranger_test.go +++ b/util/ranger/ranger_test.go @@ -979,10 +979,10 @@ func (s *testRangerSuite) TestIndexRangeElimininatedProjection(c *C) { testKit.MustExec("analyze table t") testKit.MustQuery("explain select * from (select * from t union all select ifnull(a,b), b from t) sub where a > 0").Check(testkit.Rows( "Union_11 2.00 root ", - "├─IndexReader_17 1.00 root index:IndexScan_16", - "│ └─IndexScan_16 1.00 cop table:t, index:a, b, range:(0,+inf], keep order:false", - "└─IndexReader_23 1.00 root index:IndexScan_22", - " └─IndexScan_22 1.00 cop table:t, index:a, b, range:(0,+inf], keep order:false", + "├─IndexReader_14 1.00 root index:IndexScan_13", + "│ └─IndexScan_13 1.00 cop table:t, index:a, b, range:(0,+inf], keep order:false", + "└─IndexReader_17 1.00 root index:IndexScan_16", + " └─IndexScan_16 1.00 cop table:t, index:a, b, range:(0,+inf], keep order:false", )) testKit.MustQuery("select * from (select * from t union all select ifnull(a,b), b from t) sub where a > 0").Check(testkit.Rows( "1 2", From 71afdde23c72a3f3648edbb3d1cd785e101594d6 Mon Sep 17 00:00:00 2001 From: Haibin Xie Date: Mon, 18 Feb 2019 14:21:44 +0800 Subject: [PATCH 2/8] address comments --- docs/design/README.md | 2 +- expression/util.go | 9 +++++---- go.mod | 2 +- planner/core/find_best_task.go | 24 +++++++++++------------- 4 files changed, 18 insertions(+), 19 deletions(-) diff --git a/docs/design/README.md b/docs/design/README.md index 2ec807b104d45..6106cc3a5de1c 100644 --- a/docs/design/README.md +++ b/docs/design/README.md @@ -22,7 +22,6 @@ Writing a design document can promote us to think deliberately and gather knowle - [Proposal: A new command to restore dropped table](./2018-08-10-restore-dropped-table.md) - [Proposal: Support SQL Plan Management](./2018-12-11-sql-plan-management.md) -- [Proposal: Support Skyline Pruning](./2019-01-25-skyline-pruning.md) ### In Progress @@ -39,3 +38,4 @@ Writing a design document can promote us to think deliberately and gather knowle - [Proposal: A new aggregate function execution framework](./2018-07-01-refactor-aggregate-framework.md) - [Proposal: Infer the System Timezone of a TiDB cluster via TZ environment variable](./2018-09-10-adding-tz-env.md) +- [Proposal: Support Skyline Pruning](./2019-01-25-skyline-pruning.md) diff --git a/expression/util.go b/expression/util.go index e6a2bec438ce3..a30d63bf968f2 100644 --- a/expression/util.go +++ b/expression/util.go @@ -28,6 +28,7 @@ import ( "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/types/parser_driver" "github.com/pingcap/tidb/util/chunk" + "golang.org/x/tools/container/intsets" ) // Filter the input expressions, append the results to result. @@ -93,18 +94,18 @@ func extractColumns(result []*Column, expr Expression, filter func(*Column) bool } // ExtractColumnSet extract columns that occurred in the exprs. -func ExtractColumnSet(exprs []Expression) map[int64]struct{} { - set := make(map[int64]struct{}) +func ExtractColumnSet(exprs []Expression) *intsets.Sparse { + set := &intsets.Sparse{} for _, expr := range exprs { extractColumnSet(expr, set) } return set } -func extractColumnSet(expr Expression, set map[int64]struct{}) { +func extractColumnSet(expr Expression, set *intsets.Sparse) { switch v := expr.(type) { case *Column: - set[v.UniqueID] = struct{}{} + set.Insert(int(v.UniqueID)) case *ScalarFunction: for _, arg := range v.GetArgs() { extractColumnSet(arg, set) diff --git a/go.mod b/go.mod index 272a87440b86d..69900eeef2768 100644 --- a/go.mod +++ b/go.mod @@ -79,7 +79,7 @@ require ( golang.org/x/sys v0.0.0-20190109145017-48ac38b7c8cb // indirect golang.org/x/text v0.3.0 golang.org/x/time v0.0.0-20181108054448-85acf8d2951c // indirect - golang.org/x/tools v0.0.0-20190130214255-bb1329dc71a0 // indirect + golang.org/x/tools v0.0.0-20190130214255-bb1329dc71a0 google.golang.org/genproto v0.0.0-20190108161440-ae2f86662275 // indirect google.golang.org/grpc v1.17.0 gopkg.in/natefinch/lumberjack.v2 v2.0.0 diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 8e3c7c5ae4076..2a0e78bf4e533 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -24,6 +24,7 @@ import ( "github.com/pingcap/tidb/planner/property" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/chunk" + "golang.org/x/tools/container/intsets" ) const ( @@ -199,29 +200,26 @@ func (ds *DataSource) tryToGetDualTask() (task, error) { // candidatePath is used to maintain required info for skyline pruning. type candidatePath struct { path *accessPath - columnSet map[int64]struct{} // columnSet is the set of columns that occurred in the access conditions. + columnSet *intsets.Sparse // columnSet is the set of columns that occurred in the access conditions. singleScan bool matchProp bool } // compareColumnSet will compares the two set. The last return value is used to indicate // if they are comparable, it is false when both two sets have columns that do not occur in the other. -func compareColumnSet(l, r map[int64]struct{}) (int, bool) { - if len(l) <= len(r) { - for key := range l { - if _, ok := r[key]; !ok { - return 0, false - } +func compareColumnSet(l, r *intsets.Sparse) (int, bool) { + lLen, rLen := l.Len(), r.Len() + if lLen <= rLen { + if isSubset := l.SubsetOf(r); !isSubset { + return 0, false } - if len(l) == len(r) { + if lLen == rLen { return 0, true } return -1, true } - for key := range r { - if _, ok := l[key]; !ok { - return 0, false - } + if isSubset := r.SubsetOf(l); !isSubset { + return 0, false } return 1, true } @@ -239,7 +237,7 @@ func compareBool(l, r bool) int { // compareCandidates is the core of skyline pruning. It compares the two candidate paths on three dimensions: // the set of columns that occurred in the access condition, whether or not it matches the physical property // and does it require a double scan. If `x` is not worse than `y` at all factors, -// and there exists one factor that `x` is better than `y`, then we `x` is better than `y`. +// and there exists one factor that `x` is better than `y`, then `x` is better than `y`. func compareCandidates(lhs, rhs *candidatePath) int { setsResult, comparable := compareColumnSet(lhs.columnSet, rhs.columnSet) if !comparable { From 6db6f1e64ca1e084b907b31550437582ef7ad741 Mon Sep 17 00:00:00 2001 From: Haibin Xie Date: Mon, 18 Feb 2019 16:22:48 +0800 Subject: [PATCH 3/8] address comments --- expression/util.go | 2 +- planner/core/find_best_task.go | 67 +++++++++++++++++----------------- 2 files changed, 34 insertions(+), 35 deletions(-) diff --git a/expression/util.go b/expression/util.go index a30d63bf968f2..e7285d132761c 100644 --- a/expression/util.go +++ b/expression/util.go @@ -93,7 +93,7 @@ func extractColumns(result []*Column, expr Expression, filter func(*Column) bool return result } -// ExtractColumnSet extract columns that occurred in the exprs. +// ExtractColumnSet extract the different unique id of columns that occurred in the exprs. func ExtractColumnSet(exprs []Expression) *intsets.Sparse { set := &intsets.Sparse{} for _, expr := range exprs { diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 2a0e78bf4e533..cb2382113e98e 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -199,10 +199,10 @@ func (ds *DataSource) tryToGetDualTask() (task, error) { // candidatePath is used to maintain required info for skyline pruning. type candidatePath struct { - path *accessPath - columnSet *intsets.Sparse // columnSet is the set of columns that occurred in the access conditions. - singleScan bool - matchProp bool + path *accessPath + columnSet *intsets.Sparse // columnSet is the set of columns that occurred in the access conditions. + isSingleScan bool + isMatchProp bool } // compareColumnSet will compares the two set. The last return value is used to indicate @@ -235,16 +235,18 @@ func compareBool(l, r bool) int { } // compareCandidates is the core of skyline pruning. It compares the two candidate paths on three dimensions: -// the set of columns that occurred in the access condition, whether or not it matches the physical property -// and does it require a double scan. If `x` is not worse than `y` at all factors, +// (1): the set of columns that occurred in the access condition, +// (2): whether or not it matches the physical property +// (3): does it require a double scan. +// If `x` is not worse than `y` at all factors, // and there exists one factor that `x` is better than `y`, then `x` is better than `y`. func compareCandidates(lhs, rhs *candidatePath) int { setsResult, comparable := compareColumnSet(lhs.columnSet, rhs.columnSet) if !comparable { return 0 } - scanResult := compareBool(lhs.singleScan, rhs.singleScan) - matchResult := compareBool(lhs.matchProp, rhs.matchProp) + scanResult := compareBool(lhs.isSingleScan, rhs.isSingleScan) + matchResult := compareBool(lhs.isMatchProp, rhs.isMatchProp) sum := setsResult + scanResult + matchResult if setsResult >= 0 && scanResult >= 0 && matchResult >= 0 && sum > 0 { return 1 @@ -256,35 +258,30 @@ func compareCandidates(lhs, rhs *candidatePath) int { } func (ds *DataSource) getTableCandidate(path *accessPath, prop *property.PhysicalProperty) *candidatePath { - point := &candidatePath{path: path} - if ds.tableInfo.PKIsHandle { - if pkColInfo := ds.tableInfo.GetPkColInfo(); pkColInfo != nil { - pkCol := expression.ColInfo2Col(ds.schema.Columns, pkColInfo) - point.matchProp = len(prop.Items) == 1 && pkCol != nil && prop.Items[0].Col.Equal(nil, pkCol) - } - } - point.columnSet = expression.ExtractColumnSet(path.accessConds) - point.singleScan = true - return point + candidate := &candidatePath{path: path} + pkCol := ds.getPKIsHandleCol() + candidate.isMatchProp = len(prop.Items) == 1 && pkCol != nil && prop.Items[0].Col.Equal(nil, pkCol) + candidate.columnSet = expression.ExtractColumnSet(path.accessConds) + candidate.isSingleScan = true + return candidate } func (ds *DataSource) getIndexCandidate(path *accessPath, prop *property.PhysicalProperty) *candidatePath { - point := &candidatePath{path: path} + candidate := &candidatePath{path: path} all, _ := prop.AllSameOrder() if !prop.IsEmpty() && all { for i, col := range path.index.Columns { - // not matched if col.Name.L == prop.Items[0].Col.ColName.L { - point.matchProp = matchIndicesProp(path.index.Columns[i:], prop.Items) + candidate.isMatchProp = matchIndicesProp(path.index.Columns[i:], prop.Items) break } else if i >= path.eqCondCount { break } } } - point.columnSet = expression.ExtractColumnSet(path.accessConds) - point.singleScan = isCoveringIndex(ds.schema.Columns, path.index.Columns, ds.tableInfo.PKIsHandle) - return point + candidate.columnSet = expression.ExtractColumnSet(path.accessConds) + candidate.isSingleScan = isCoveringIndex(ds.schema.Columns, path.index.Columns, ds.tableInfo.PKIsHandle) + return candidate } func (ds *DataSource) skylinePruning(prop *property.PhysicalProperty) []*candidatePath { @@ -302,7 +299,7 @@ func (ds *DataSource) skylinePruning(prop *property.PhysicalProperty) []*candida // this path's access cond is not nil or // we have prop to match or // this index is forced to choose. - if len(path.accessConds) > 0 || len(prop.Items) > 0 || path.forced { + if len(path.accessConds) > 0 || !prop.IsEmpty() || path.forced { currentCandidate = ds.getIndexCandidate(path, prop) } } @@ -393,7 +390,7 @@ func (ds *DataSource) findBestTask(prop *property.PhysicalProperty) (t task, err }, nil } if path.isTablePath { - tblTask, err := ds.convertToTableScan(prop, path, candidate.matchProp) + tblTask, err := ds.convertToTableScan(prop, candidate) if err != nil { return nil, errors.Trace(err) } @@ -402,7 +399,7 @@ func (ds *DataSource) findBestTask(prop *property.PhysicalProperty) (t task, err } continue } - idxTask, err := ds.convertToIndexScan(prop, path, candidate.matchProp, !candidate.singleScan) + idxTask, err := ds.convertToIndexScan(prop, candidate) if err != nil { return nil, errors.Trace(err) } @@ -449,7 +446,8 @@ func (ts *PhysicalTableScan) appendExtraHandleCol(ds *DataSource) { } // convertToIndexScan converts the DataSource to index scan with idx. -func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, path *accessPath, matchProperty, needDoubleScan bool) (task task, err error) { +func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, candidate *candidatePath) (task task, err error) { + path := candidate.path idx := path.index is := PhysicalIndexScan{ Table: ds.tableInfo, @@ -472,7 +470,7 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, path * } rowCount := path.countAfterAccess cop := &copTask{indexPlan: is} - if needDoubleScan { + if !candidate.isSingleScan { // If it's parent requires single read task, return max cost. if prop.TaskTp == property.CopSingleReadTaskType { return invalidTask, nil @@ -494,7 +492,7 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, path * // Only use expectedCnt when it's smaller than the count we calculated. // e.g. IndexScan(count1)->After Filter(count2). The `ds.stats.RowCount` is count2. count1 is the one we need to calculate // If expectedCnt and count2 are both zero and we go into the below `if` block, the count1 will be set to zero though it's shouldn't be. - if (matchProperty || prop.IsEmpty()) && prop.ExpectedCnt < ds.stats.RowCount { + if (candidate.isMatchProp || prop.IsEmpty()) && prop.ExpectedCnt < ds.stats.RowCount { selectivity := ds.stats.RowCount / path.countAfterAccess rowCount = math.Min(prop.ExpectedCnt/selectivity, rowCount) } @@ -502,7 +500,7 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, path * is.stats.UsePseudoStats = ds.statisticTable.Pseudo cop.cst = rowCount * scanFactor task = cop - if matchProperty { + if candidate.isMatchProp { if prop.Items[0].Desc { is.Desc = true cop.cst = rowCount * descScanFactor @@ -613,7 +611,7 @@ func splitIndexFilterConditions(conditions []expression.Expression, indexColumns } // convertToTableScan converts the DataSource to table scan. -func (ds *DataSource) convertToTableScan(prop *property.PhysicalProperty, path *accessPath, matchProperty bool) (task task, err error) { +func (ds *DataSource) convertToTableScan(prop *property.PhysicalProperty, candidate *candidatePath) (task task, err error) { // It will be handled in convertToIndexScan. if prop.TaskTp == property.CopDoubleReadTaskType { return invalidTask, nil @@ -635,6 +633,7 @@ func (ds *DataSource) convertToTableScan(prop *property.PhysicalProperty, path * } } } + path := candidate.path ts.Ranges = path.ranges ts.AccessCondition, ts.filterCondition = path.accessConds, path.tableFilters rowCount := path.countAfterAccess @@ -646,14 +645,14 @@ func (ds *DataSource) convertToTableScan(prop *property.PhysicalProperty, path * // Only use expectedCnt when it's smaller than the count we calculated. // e.g. IndexScan(count1)->After Filter(count2). The `ds.stats.RowCount` is count2. count1 is the one we need to calculate // If expectedCnt and count2 are both zero and we go into the below `if` block, the count1 will be set to zero though it's shouldn't be. - if (matchProperty || prop.IsEmpty()) && prop.ExpectedCnt < ds.stats.RowCount { + if (candidate.isMatchProp || prop.IsEmpty()) && prop.ExpectedCnt < ds.stats.RowCount { selectivity := ds.stats.RowCount / rowCount rowCount = math.Min(prop.ExpectedCnt/selectivity, rowCount) } ts.stats = property.NewSimpleStats(rowCount) ts.stats.UsePseudoStats = ds.statisticTable.Pseudo copTask.cst = rowCount * scanFactor - if matchProperty { + if candidate.isMatchProp { if prop.Items[0].Desc { ts.Desc = true copTask.cst = rowCount * descScanFactor From edcf4a4664df87aff84095b16e564eefeb8b37d3 Mon Sep 17 00:00:00 2001 From: Haibin Xie Date: Mon, 18 Feb 2019 16:37:24 +0800 Subject: [PATCH 4/8] address comments --- planner/core/find_best_task.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index cb2382113e98e..06d86c0d4abd8 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -207,6 +207,10 @@ type candidatePath struct { // compareColumnSet will compares the two set. The last return value is used to indicate // if they are comparable, it is false when both two sets have columns that do not occur in the other. +// When the second return value is true, the value of first: +// (1) -1 means that `l` is a strict subset of `r`; +// (2) 0 means that `l` equals to `r`; +// (3) 1 means that `l` is a strict superset of `r`. func compareColumnSet(l, r *intsets.Sparse) (int, bool) { lLen, rLen := l.Len(), r.Len() if lLen <= rLen { @@ -294,14 +298,12 @@ func (ds *DataSource) skylinePruning(prop *property.PhysicalProperty) []*candida var currentCandidate *candidatePath if path.isTablePath { currentCandidate = ds.getTableCandidate(path, prop) - } else { + } else if len(path.accessConds) > 0 || !prop.IsEmpty() || path.forced { // We will use index to generate physical plan if: // this path's access cond is not nil or // we have prop to match or // this index is forced to choose. - if len(path.accessConds) > 0 || !prop.IsEmpty() || path.forced { - currentCandidate = ds.getIndexCandidate(path, prop) - } + currentCandidate = ds.getIndexCandidate(path, prop) } if currentCandidate == nil { continue From aa72d5730cab5f388ff43210577a112c7ece3f39 Mon Sep 17 00:00:00 2001 From: Haibin Xie Date: Mon, 18 Feb 2019 17:19:54 +0800 Subject: [PATCH 5/8] address comments --- expression/util.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/expression/util.go b/expression/util.go index e7285d132761c..6f7b6b8aba6fd 100644 --- a/expression/util.go +++ b/expression/util.go @@ -93,7 +93,7 @@ func extractColumns(result []*Column, expr Expression, filter func(*Column) bool return result } -// ExtractColumnSet extract the different unique id of columns that occurred in the exprs. +// ExtractColumnSet extracts the different values of `UniqueId` for columns in expressions. func ExtractColumnSet(exprs []Expression) *intsets.Sparse { set := &intsets.Sparse{} for _, expr := range exprs { From 358f04e56de729b3f318a70c07ceb0be358cdc1d Mon Sep 17 00:00:00 2001 From: Haibin Xie Date: Mon, 18 Feb 2019 19:38:19 +0800 Subject: [PATCH 6/8] address comments --- planner/core/find_best_task.go | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 06d86c0d4abd8..2024f20e6ee18 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -213,19 +213,16 @@ type candidatePath struct { // (3) 1 means that `l` is a strict superset of `r`. func compareColumnSet(l, r *intsets.Sparse) (int, bool) { lLen, rLen := l.Len(), r.Len() - if lLen <= rLen { - if isSubset := l.SubsetOf(r); !isSubset { - return 0, false - } - if lLen == rLen { - return 0, true - } - return -1, true + if lLen < rLen { + // -1 is meaningful only when l.SubsetOf(r) is true. + return -1, l.SubsetOf(r) } - if isSubset := r.SubsetOf(l); !isSubset { - return 0, false + if lLen == rLen { + // 0 is meaningful only when l.SubsetOf(r) is true. + return 0, l.SubsetOf(r) } - return 1, true + // 1 is meaningful only when r.SubsetOf(l) is true. + return 1, r.SubsetOf(l) } func compareBool(l, r bool) int { @@ -273,6 +270,8 @@ func (ds *DataSource) getTableCandidate(path *accessPath, prop *property.Physica func (ds *DataSource) getIndexCandidate(path *accessPath, prop *property.PhysicalProperty) *candidatePath { candidate := &candidatePath{path: path} all, _ := prop.AllSameOrder() + // When the prop is empty or `all` is false, `isMatchProp` is better to be `false` because + // it needs not to keep order for index scan. if !prop.IsEmpty() && all { for i, col := range path.index.Columns { if col.Name.L == prop.Items[0].Col.ColName.L { From a141f86c0c10e93cf6481d850d008f197131fd20 Mon Sep 17 00:00:00 2001 From: Haibin Xie Date: Mon, 18 Feb 2019 19:58:59 +0800 Subject: [PATCH 7/8] update explain test --- .../r/access_path_selection.result | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/cmd/explaintest/r/access_path_selection.result b/cmd/explaintest/r/access_path_selection.result index 1c2db46bf4d3b..ec0309e75dee7 100644 --- a/cmd/explaintest/r/access_path_selection.result +++ b/cmd/explaintest/r/access_path_selection.result @@ -7,12 +7,12 @@ KEY `IDX_ab` (`a`, `b`) ); explain select a from access_path_selection where a < 3; id count task operator info -IndexReader_9 3323.33 root index:IndexScan_8 -└─IndexScan_8 3323.33 cop table:access_path_selection, index:a, range:[-inf,3), keep order:false, stats:pseudo +IndexReader_6 3323.33 root index:IndexScan_5 +└─IndexScan_5 3323.33 cop table:access_path_selection, index:a, range:[-inf,3), keep order:false, stats:pseudo explain select a, b from access_path_selection where a < 3; id count task operator info -IndexReader_12 3323.33 root index:IndexScan_11 -└─IndexScan_11 3323.33 cop table:access_path_selection, index:a, b, range:[-inf,3), keep order:false, stats:pseudo +IndexReader_6 3323.33 root index:IndexScan_5 +└─IndexScan_5 3323.33 cop table:access_path_selection, index:a, b, range:[-inf,3), keep order:false, stats:pseudo explain select a, b from access_path_selection where b < 3; id count task operator info IndexLookUp_10 3323.33 root @@ -20,9 +20,9 @@ IndexLookUp_10 3323.33 root └─TableScan_9 3323.33 cop table:access_path_selection, keep order:false, stats:pseudo explain select a, b from access_path_selection where a < 3 and b < 3; id count task operator info -IndexReader_18 1104.45 root index:Selection_17 -└─Selection_17 1104.45 cop lt(test.access_path_selection.b, 3) - └─IndexScan_16 3323.33 cop table:access_path_selection, index:a, b, range:[-inf,3), keep order:false, stats:pseudo +IndexReader_11 1104.45 root index:Selection_10 +└─Selection_10 1104.45 cop lt(test.access_path_selection.b, 3) + └─IndexScan_9 3323.33 cop table:access_path_selection, index:a, b, range:[-inf,3), keep order:false, stats:pseudo CREATE TABLE `outdated_statistics` ( `a` int, `b` int, @@ -41,9 +41,9 @@ analyze table outdated_statistics index idx_ab; explain select * from outdated_statistics where a=1 and b=1 and c=1; id count task operator info IndexLookUp_11 0.00 root -├─IndexScan_8 0.00 cop table:outdated_statistics, index:a, range:[1,1], keep order:false -└─Selection_10 0.00 cop eq(test.outdated_statistics.b, 1), eq(test.outdated_statistics.c, 1) - └─TableScan_9 0.00 cop table:outdated_statistics, keep order:false +├─IndexScan_8 1.00 cop table:outdated_statistics, index:a, b, range:[1 1,1 1], keep order:false +└─Selection_10 0.00 cop eq(test.outdated_statistics.c, 1) + └─TableScan_9 1.00 cop table:outdated_statistics, keep order:false CREATE TABLE `unknown_correlation` ( id int, a int, @@ -55,7 +55,7 @@ ANALYZE TABLE unknown_correlation; EXPLAIN SELECT * FROM unknown_correlation WHERE a = 2 ORDER BY id limit 1; id count task operator info Limit_11 1.00 root offset:0, count:1 -└─TableReader_29 1.00 root data:Limit_28 - └─Limit_28 1.00 cop offset:0, count:1 - └─Selection_26 1.00 cop eq(test.unknown_correlation.a, 2) - └─TableScan_25 4.17 cop table:unknown_correlation, range:[-inf,+inf], keep order:true +└─TableReader_24 1.00 root data:Limit_23 + └─Limit_23 1.00 cop offset:0, count:1 + └─Selection_21 1.00 cop eq(test.unknown_correlation.a, 2) + └─TableScan_20 4.17 cop table:unknown_correlation, range:[-inf,+inf], keep order:true From 0578678dacf7503287e1df6d47c6bb8b84db619a Mon Sep 17 00:00:00 2001 From: Haibin Xie Date: Tue, 19 Feb 2019 10:50:51 +0800 Subject: [PATCH 8/8] address comment --- planner/core/find_best_task.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 2024f20e6ee18..3526929acf1a0 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -287,6 +287,8 @@ func (ds *DataSource) getIndexCandidate(path *accessPath, prop *property.Physica return candidate } +// skylinePruning prunes access paths according to different factors. An access path can be pruned only if +// there exists a path that is not worse than it at all factors and there is at least one better factor. func (ds *DataSource) skylinePruning(prop *property.PhysicalProperty) []*candidatePath { candidates := make([]*candidatePath, 0, 4) for _, path := range ds.possibleAccessPaths { @@ -303,8 +305,7 @@ func (ds *DataSource) skylinePruning(prop *property.PhysicalProperty) []*candida // we have prop to match or // this index is forced to choose. currentCandidate = ds.getIndexCandidate(path, prop) - } - if currentCandidate == nil { + } else { continue } pruned := false