From ee9cb204f9e1ab8b7b8587ad223d0889f2b03bcd Mon Sep 17 00:00:00 2001
From: Haibin Xie <lambdax.tyler@gmail.com>
Date: Fri, 15 Feb 2019 18:43:23 +0800
Subject: [PATCH 1/8] planner: implement skyline pruning

---
 cmd/explaintest/main.go                       |   5 -
 cmd/explaintest/r/explain_complex.result      |  14 +-
 .../r/explain_complex_stats.result            |  14 +-
 cmd/explaintest/r/explain_easy.result         | 102 +++++-----
 cmd/explaintest/r/explain_easy_stats.result   |  12 +-
 cmd/explaintest/r/select.result               |  40 ++--
 cmd/explaintest/r/topn_push_down.result       |  20 +-
 cmd/explaintest/r/tpch.result                 |  14 +-
 cmd/explaintest/r/window_function.result      |  34 ++--
 expression/util.go                            |  20 ++
 planner/core/find_best_task.go                | 182 ++++++++++++++----
 planner/core/logical_plan_test.go             | 100 ++++++++++
 statistics/selectivity_test.go                |   8 +-
 util/ranger/ranger_test.go                    |   8 +-
 14 files changed, 400 insertions(+), 173 deletions(-)

diff --git a/cmd/explaintest/main.go b/cmd/explaintest/main.go
index fc7a6f043c742..8ff9d2b00cf6d 100644
--- a/cmd/explaintest/main.go
+++ b/cmd/explaintest/main.go
@@ -567,11 +567,6 @@ func loadAllTests() ([]string, error) {
 		if strings.HasSuffix(name, ".test") {
 			name = strings.TrimSuffix(name, ".test")
 
-			// if we use record and the result file exists, skip generating
-			if record && resultExists(name) {
-				continue
-			}
-
 			if create && !strings.HasSuffix(name, "_stats") {
 				continue
 			}
diff --git a/cmd/explaintest/r/explain_complex.result b/cmd/explaintest/r/explain_complex.result
index 1b93d600fa4aa..66d38378b0f33 100644
--- a/cmd/explaintest/r/explain_complex.result
+++ b/cmd/explaintest/r/explain_complex.result
@@ -153,9 +153,9 @@ id	count	task	operator info
 Projection_10	0.00	root	dt.id, dt.aid, dt.pt, dt.dic, dt.cm, rr.gid, rr.acd, rr.t, dt.p1, dt.p2, dt.p3, dt.p4, dt.p5, dt.p6_md5, dt.p7_md5
 └─Limit_13	0.00	root	offset:0, count:2000
   └─IndexJoin_19	0.00	root	inner join, inner:IndexLookUp_18, outer key:dt.aid, dt.dic, inner key:rr.aid, rr.dic
-    ├─TableReader_47	0.00	root	data:Selection_46
-    │ └─Selection_46	0.00	cop	eq(dt.bm, 0), eq(dt.pt, "ios"), gt(dt.t, 1478185592), not(isnull(dt.dic))
-    │   └─TableScan_45	10000.00	cop	table:dt, range:[0,+inf], keep order:false, stats:pseudo
+    ├─TableReader_43	0.00	root	data:Selection_42
+    │ └─Selection_42	0.00	cop	eq(dt.bm, 0), eq(dt.pt, "ios"), gt(dt.t, 1478185592), not(isnull(dt.dic))
+    │   └─TableScan_41	10000.00	cop	table:dt, range:[0,+inf], keep order:false, stats:pseudo
     └─IndexLookUp_18	3.33	root	
       ├─IndexScan_15	10.00	cop	table:rr, index:aid, dic, range: decided by [dt.aid dt.dic], keep order:false, stats:pseudo
       └─Selection_17	3.33	cop	eq(rr.pt, "ios"), gt(rr.t, 1478185592)
@@ -164,10 +164,10 @@ explain select pc,cr,count(DISTINCT uid) as pay_users,count(oid) as pay_times,su
 id	count	task	operator info
 Projection_5	1.00	root	test.pp.pc, test.pp.cr, 3_col_0, 3_col_1, 3_col_2
 └─HashAgg_7	1.00	root	group by:test.pp.cr, test.pp.pc, funcs:count(distinct test.pp.uid), count(test.pp.oid), sum(test.pp.am), firstrow(test.pp.pc), firstrow(test.pp.cr)
-  └─IndexLookUp_28	0.00	root	
-    ├─IndexScan_25	0.40	cop	table:pp, index:uid, pi, range:[18089709 510017,18089709 510017], [18089709 520017,18089709 520017], [18090780 510017,18090780 510017], [18090780 520017,18090780 520017], keep order:false, stats:pseudo
-    └─Selection_27	0.00	cop	eq(test.pp.ps, 2), ge(test.pp.ppt, 1478188800), lt(test.pp.ppt, 1478275200)
-      └─TableScan_26	0.40	cop	table:pp, keep order:false, stats:pseudo
+  └─IndexLookUp_24	0.00	root	
+    ├─IndexScan_21	0.40	cop	table:pp, index:uid, pi, range:[18089709 510017,18089709 510017], [18089709 520017,18089709 520017], [18090780 510017,18090780 510017], [18090780 520017,18090780 520017], keep order:false, stats:pseudo
+    └─Selection_23	0.00	cop	eq(test.pp.ps, 2), ge(test.pp.ppt, 1478188800), lt(test.pp.ppt, 1478275200)
+      └─TableScan_22	0.40	cop	table:pp, keep order:false, stats:pseudo
 CREATE TABLE `tbl_001` (`a` int, `b` int);
 CREATE TABLE `tbl_002` (`a` int, `b` int);
 CREATE TABLE `tbl_003` (`a` int, `b` int);
diff --git a/cmd/explaintest/r/explain_complex_stats.result b/cmd/explaintest/r/explain_complex_stats.result
index f64fe4e0b30af..805695e77132c 100644
--- a/cmd/explaintest/r/explain_complex_stats.result
+++ b/cmd/explaintest/r/explain_complex_stats.result
@@ -161,9 +161,9 @@ id	count	task	operator info
 Projection_10	428.32	root	dt.id, dt.aid, dt.pt, dt.dic, dt.cm, rr.gid, rr.acd, rr.t, dt.p1, dt.p2, dt.p3, dt.p4, dt.p5, dt.p6_md5, dt.p7_md5
 └─Limit_13	428.32	root	offset:0, count:2000
   └─IndexJoin_19	428.32	root	inner join, inner:IndexLookUp_18, outer key:dt.aid, dt.dic, inner key:rr.aid, rr.dic
-    ├─TableReader_47	428.32	root	data:Selection_46
-    │ └─Selection_46	428.32	cop	eq(dt.bm, 0), eq(dt.pt, "ios"), gt(dt.t, 1478185592), not(isnull(dt.dic))
-    │   └─TableScan_45	2000.00	cop	table:dt, range:[0,+inf], keep order:false
+    ├─TableReader_43	428.32	root	data:Selection_42
+    │ └─Selection_42	428.32	cop	eq(dt.bm, 0), eq(dt.pt, "ios"), gt(dt.t, 1478185592), not(isnull(dt.dic))
+    │   └─TableScan_41	2000.00	cop	table:dt, range:[0,+inf], keep order:false
     └─IndexLookUp_18	970.00	root	
       ├─IndexScan_15	1.00	cop	table:rr, index:aid, dic, range: decided by [dt.aid dt.dic], keep order:false
       └─Selection_17	970.00	cop	eq(rr.pt, "ios"), gt(rr.t, 1478185592)
@@ -172,10 +172,10 @@ explain select pc,cr,count(DISTINCT uid) as pay_users,count(oid) as pay_times,su
 id	count	task	operator info
 Projection_5	207.86	root	test.pp.pc, test.pp.cr, 3_col_0, 3_col_1, 3_col_2
 └─HashAgg_7	207.86	root	group by:test.pp.cr, test.pp.pc, funcs:count(distinct test.pp.uid), count(test.pp.oid), sum(test.pp.am), firstrow(test.pp.pc), firstrow(test.pp.cr)
-  └─IndexLookUp_28	207.86	root	
-    ├─IndexScan_22	627.00	cop	table:pp, index:ps, range:[2,2], keep order:false
-    └─Selection_24	207.86	cop	ge(test.pp.ppt, 1478188800), in(test.pp.pi, 510017, 520017), in(test.pp.uid, 18089709, 18090780), lt(test.pp.ppt, 1478275200)
-      └─TableScan_23	627.00	cop	table:pp, keep order:false
+  └─IndexLookUp_24	207.86	root	
+    ├─IndexScan_18	627.00	cop	table:pp, index:ps, range:[2,2], keep order:false
+    └─Selection_20	207.86	cop	ge(test.pp.ppt, 1478188800), in(test.pp.pi, 510017, 520017), in(test.pp.uid, 18089709, 18090780), lt(test.pp.ppt, 1478275200)
+      └─TableScan_19	627.00	cop	table:pp, keep order:false
 drop table if exists tbl_001;
 CREATE TABLE tbl_001 (a int, b int);
 load stats 's/explain_complex_stats_tbl_001.json';
diff --git a/cmd/explaintest/r/explain_easy.result b/cmd/explaintest/r/explain_easy.result
index 39d8baf9c85a6..8a610f05d0f18 100644
--- a/cmd/explaintest/r/explain_easy.result
+++ b/cmd/explaintest/r/explain_easy.result
@@ -38,8 +38,8 @@ TableReader_6	3333.33	root	data:TableScan_5
 └─TableScan_5	3333.33	cop	table:t1, range:(0,+inf], keep order:false, stats:pseudo
 explain select t1.c1, t1.c2 from t1 where t1.c2 = 1;
 id	count	task	operator info
-IndexReader_9	10.00	root	index:IndexScan_8
-└─IndexScan_8	10.00	cop	table:t1, index:c2, range:[1,1], keep order:false, stats:pseudo
+IndexReader_6	10.00	root	index:IndexScan_5
+└─IndexScan_5	10.00	cop	table:t1, index:c2, range:[1,1], keep order:false, stats:pseudo
 explain select * from t1 left join t2 on t1.c2 = t2.c1 where t1.c1 > 1;
 id	count	task	operator info
 IndexJoin_12	4166.67	root	left outer join, inner:IndexLookUp_11, outer key:test.t1.c2, inner key:test.t2.c1
@@ -89,12 +89,12 @@ TableReader_7	0.33	root	data:Selection_6
 explain select sum(t1.c1 in (select c1 from t2)) from t1;
 id	count	task	operator info
 StreamAgg_12	1.00	root	funcs:sum(col_0)
-└─Projection_35	10000.00	root	cast(5_aux_0)
-  └─MergeJoin_28	10000.00	root	left outer semi join, left key:test.t1.c1, right key:test.t2.c1
+└─Projection_33	10000.00	root	cast(5_aux_0)
+  └─MergeJoin_26	10000.00	root	left outer semi join, left key:test.t1.c1, right key:test.t2.c1
     ├─TableReader_19	10000.00	root	data:TableScan_18
     │ └─TableScan_18	10000.00	cop	table:t1, range:[-inf,+inf], keep order:true, stats:pseudo
-    └─IndexReader_23	10000.00	root	index:IndexScan_22
-      └─IndexScan_22	10000.00	cop	table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo
+    └─IndexReader_21	10000.00	root	index:IndexScan_20
+      └─IndexScan_20	10000.00	cop	table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo
 explain select c1 from t1 where c1 in (select c2 from t2);
 id	count	task	operator info
 Projection_9	9990.00	root	test.t1.c1
@@ -113,9 +113,9 @@ Projection_12	10000.00	root	k
   └─MergeJoin_14	10000.00	root	left outer join, left key:test.t1.c1, right key:s.c1
     ├─TableReader_17	10000.00	root	data:TableScan_16
     │ └─TableScan_16	10000.00	cop	table:t1, range:[-inf,+inf], keep order:true, stats:pseudo
-    └─Projection_19	8000.00	root	1, s.c1
-      └─TableReader_21	10000.00	root	data:TableScan_20
-        └─TableScan_20	10000.00	cop	table:s, range:[-inf,+inf], keep order:true, stats:pseudo
+    └─Projection_18	8000.00	root	1, s.c1
+      └─TableReader_20	10000.00	root	data:TableScan_19
+        └─TableScan_19	10000.00	cop	table:s, range:[-inf,+inf], keep order:true, stats:pseudo
 explain select * from information_schema.columns;
 id	count	task	operator info
 MemTableScan_4	10000.00	root	
@@ -134,8 +134,8 @@ Projection_12	10000.00	root	eq(test.t1.c2, test.t2.c2)
 explain select * from t1 order by c1 desc limit 1;
 id	count	task	operator info
 Limit_10	1.00	root	offset:0, count:1
-└─TableReader_21	1.00	root	data:Limit_20
-  └─Limit_20	1.00	cop	offset:0, count:1
+└─TableReader_20	1.00	root	data:Limit_19
+  └─Limit_19	1.00	cop	offset:0, count:1
     └─TableScan_18	1.00	cop	table:t1, range:[-inf,+inf], keep order:true, desc, stats:pseudo
 explain select * from t4 use index(idx) where a > 1 and b > 1 and c > 1 limit 1;
 id	count	task	operator info
@@ -149,8 +149,8 @@ Limit_9	1.00	root	offset:0, count:1
 explain select * from t4 where a > 1 and c > 1 limit 1;
 id	count	task	operator info
 Limit_8	1.00	root	offset:0, count:1
-└─TableReader_15	1.00	root	data:Limit_14
-  └─Limit_14	1.00	cop	offset:0, count:1
+└─TableReader_14	1.00	root	data:Limit_13
+  └─Limit_13	1.00	cop	offset:0, count:1
     └─Selection_12	1.00	cop	gt(test.t4.c, 1)
       └─TableScan_11	3.00	cop	table:t4, range:(1,+inf], keep order:false, stats:pseudo
 explain select ifnull(null, t1.c1) from t1;
@@ -166,42 +166,42 @@ id	count	task	operator info
 Union_17	26000.00	root	
 ├─HashAgg_21	16000.00	root	group by:c1, funcs:firstrow(join_agg_0)
 │ └─Union_22	16000.00	root	
-│   ├─StreamAgg_35	8000.00	root	group by:col_2, funcs:firstrow(col_0), firstrow(col_1)
-│   │ └─IndexReader_36	8000.00	root	index:StreamAgg_26
+│   ├─StreamAgg_34	8000.00	root	group by:col_2, funcs:firstrow(col_0), firstrow(col_1)
+│   │ └─IndexReader_35	8000.00	root	index:StreamAgg_26
 │   │   └─StreamAgg_26	8000.00	cop	group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1)
-│   │     └─IndexScan_34	10000.00	cop	table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo
-│   └─StreamAgg_52	8000.00	root	group by:col_2, funcs:firstrow(col_0), firstrow(col_1)
-│     └─IndexReader_53	8000.00	root	index:StreamAgg_43
-│       └─StreamAgg_43	8000.00	cop	group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1)
-│         └─IndexScan_51	10000.00	cop	table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo
-└─TableReader_59	10000.00	root	data:TableScan_58
-  └─TableScan_58	10000.00	cop	table:t2, range:[-inf,+inf], keep order:false, stats:pseudo
+│   │     └─IndexScan_33	10000.00	cop	table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo
+│   └─StreamAgg_49	8000.00	root	group by:col_2, funcs:firstrow(col_0), firstrow(col_1)
+│     └─IndexReader_50	8000.00	root	index:StreamAgg_41
+│       └─StreamAgg_41	8000.00	cop	group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1)
+│         └─IndexScan_48	10000.00	cop	table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo
+└─TableReader_55	10000.00	root	data:TableScan_54
+  └─TableScan_54	10000.00	cop	table:t2, range:[-inf,+inf], keep order:false, stats:pseudo
 explain select c1 from t2 union all select c1 from t2 union select c1 from t2;
 id	count	task	operator info
 HashAgg_18	24000.00	root	group by:c1, funcs:firstrow(join_agg_0)
 └─Union_19	24000.00	root	
-  ├─StreamAgg_32	8000.00	root	group by:col_2, funcs:firstrow(col_0), firstrow(col_1)
-  │ └─IndexReader_33	8000.00	root	index:StreamAgg_23
+  ├─StreamAgg_31	8000.00	root	group by:col_2, funcs:firstrow(col_0), firstrow(col_1)
+  │ └─IndexReader_32	8000.00	root	index:StreamAgg_23
   │   └─StreamAgg_23	8000.00	cop	group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1)
-  │     └─IndexScan_31	10000.00	cop	table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo
-  ├─StreamAgg_49	8000.00	root	group by:col_2, funcs:firstrow(col_0), firstrow(col_1)
-  │ └─IndexReader_50	8000.00	root	index:StreamAgg_40
-  │   └─StreamAgg_40	8000.00	cop	group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1)
-  │     └─IndexScan_48	10000.00	cop	table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo
-  └─StreamAgg_66	8000.00	root	group by:col_2, funcs:firstrow(col_0), firstrow(col_1)
-    └─IndexReader_67	8000.00	root	index:StreamAgg_57
-      └─StreamAgg_57	8000.00	cop	group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1)
-        └─IndexScan_65	10000.00	cop	table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo
+  │     └─IndexScan_30	10000.00	cop	table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo
+  ├─StreamAgg_46	8000.00	root	group by:col_2, funcs:firstrow(col_0), firstrow(col_1)
+  │ └─IndexReader_47	8000.00	root	index:StreamAgg_38
+  │   └─StreamAgg_38	8000.00	cop	group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1)
+  │     └─IndexScan_45	10000.00	cop	table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo
+  └─StreamAgg_61	8000.00	root	group by:col_2, funcs:firstrow(col_0), firstrow(col_1)
+    └─IndexReader_62	8000.00	root	index:StreamAgg_53
+      └─StreamAgg_53	8000.00	cop	group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1)
+        └─IndexScan_60	10000.00	cop	table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo
 set @@session.tidb_opt_insubq_to_join_and_agg=0;
 explain select sum(t1.c1 in (select c1 from t2)) from t1;
 id	count	task	operator info
 StreamAgg_12	1.00	root	funcs:sum(col_0)
-└─Projection_35	10000.00	root	cast(5_aux_0)
-  └─MergeJoin_28	10000.00	root	left outer semi join, left key:test.t1.c1, right key:test.t2.c1
+└─Projection_33	10000.00	root	cast(5_aux_0)
+  └─MergeJoin_26	10000.00	root	left outer semi join, left key:test.t1.c1, right key:test.t2.c1
     ├─TableReader_19	10000.00	root	data:TableScan_18
     │ └─TableScan_18	10000.00	cop	table:t1, range:[-inf,+inf], keep order:true, stats:pseudo
-    └─IndexReader_23	10000.00	root	index:IndexScan_22
-      └─IndexScan_22	10000.00	cop	table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo
+    └─IndexReader_21	10000.00	root	index:IndexScan_20
+      └─IndexScan_20	10000.00	cop	table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo
 explain select 1 in (select c2 from t2) from t1;
 id	count	task	operator info
 Projection_6	10000.00	root	5_aux_0
@@ -229,10 +229,10 @@ subgraph cluster12{
 node [style=filled, color=lightgrey]
 color=black
 label = "root"
-"StreamAgg_12" -> "Projection_35"
-"Projection_35" -> "MergeJoin_28"
-"MergeJoin_28" -> "TableReader_19"
-"MergeJoin_28" -> "IndexReader_23"
+"StreamAgg_12" -> "Projection_33"
+"Projection_33" -> "MergeJoin_26"
+"MergeJoin_26" -> "TableReader_19"
+"MergeJoin_26" -> "IndexReader_21"
 }
 subgraph cluster18{
 node [style=filled, color=lightgrey]
@@ -240,14 +240,14 @@ color=black
 label = "cop"
 "TableScan_18"
 }
-subgraph cluster22{
+subgraph cluster20{
 node [style=filled, color=lightgrey]
 color=black
 label = "cop"
-"IndexScan_22"
+"IndexScan_20"
 }
 "TableReader_19" -> "TableScan_18"
-"IndexReader_23" -> "IndexScan_22"
+"IndexReader_21" -> "IndexScan_20"
 }
 
 explain format="dot" select 1 in (select c2 from t2) from t1;
@@ -346,8 +346,8 @@ drop table if exists t;
 create table t(a bigint, b bigint, index idx(a, b));
 explain select * from t where a in (1, 2) and a in (1, 3);
 id	count	task	operator info
-IndexReader_9	10.00	root	index:IndexScan_8
-└─IndexScan_8	10.00	cop	table:t, index:a, b, range:[1,1], keep order:false, stats:pseudo
+IndexReader_6	10.00	root	index:IndexScan_5
+└─IndexScan_5	10.00	cop	table:t, index:a, b, range:[1,1], keep order:false, stats:pseudo
 explain select * from t where b in (1, 2) and b in (1, 3);
 id	count	task	operator info
 TableReader_7	10.00	root	data:Selection_6
@@ -355,8 +355,8 @@ TableReader_7	10.00	root	data:Selection_6
   └─TableScan_5	10000.00	cop	table:t, range:[-inf,+inf], keep order:false, stats:pseudo
 explain select * from t where a = 1 and a = 1;
 id	count	task	operator info
-IndexReader_9	10.00	root	index:IndexScan_8
-└─IndexScan_8	10.00	cop	table:t, index:a, b, range:[1,1], keep order:false, stats:pseudo
+IndexReader_6	10.00	root	index:IndexScan_5
+└─IndexScan_5	10.00	cop	table:t, index:a, b, range:[1,1], keep order:false, stats:pseudo
 explain select * from t where a = 1 and a = 2;
 id	count	task	operator info
 TableDual_5	0.00	root	rows:0
@@ -412,10 +412,10 @@ TableReader_7	10000.00	root	data:TableScan_6
 └─TableScan_6	10000.00	cop	table:t1, range:[-inf,+inf], keep order:false, stats:pseudo
 explain select distinct t1.a, t1.b from t1 left outer join t2 on t1.a = t2.a;
 id	count	task	operator info
-StreamAgg_19	8000.00	root	group by:col_2, col_3, funcs:firstrow(col_0), firstrow(col_1)
-└─IndexReader_20	8000.00	root	index:StreamAgg_10
+StreamAgg_18	8000.00	root	group by:col_2, col_3, funcs:firstrow(col_0), firstrow(col_1)
+└─IndexReader_19	8000.00	root	index:StreamAgg_10
   └─StreamAgg_10	8000.00	cop	group by:test.t1.a, test.t1.b, funcs:firstrow(test.t1.a), firstrow(test.t1.b)
-    └─IndexScan_18	10000.00	cop	table:t1, index:a, b, range:[NULL,+inf], keep order:true, stats:pseudo
+    └─IndexScan_17	10000.00	cop	table:t1, index:a, b, range:[NULL,+inf], keep order:true, stats:pseudo
 drop table if exists t;
 create table t(a int, nb int not null, nc int not null);
 explain select ifnull(a, 0) from t;
diff --git a/cmd/explaintest/r/explain_easy_stats.result b/cmd/explaintest/r/explain_easy_stats.result
index e25c9b6336b11..f3e9f0a25ce83 100644
--- a/cmd/explaintest/r/explain_easy_stats.result
+++ b/cmd/explaintest/r/explain_easy_stats.result
@@ -41,8 +41,8 @@ TableReader_6	1999.00	root	data:TableScan_5
 └─TableScan_5	1999.00	cop	table:t1, range:(0,+inf], keep order:false
 explain select t1.c1, t1.c2 from t1 where t1.c2 = 1;
 id	count	task	operator info
-IndexReader_9	0.00	root	index:IndexScan_8
-└─IndexScan_8	0.00	cop	table:t1, index:c2, range:[1,1], keep order:false
+IndexReader_6	0.00	root	index:IndexScan_5
+└─IndexScan_5	0.00	cop	table:t1, index:c2, range:[1,1], keep order:false
 explain select * from t1 left join t2 on t1.c2 = t2.c1 where t1.c1 > 1;
 id	count	task	operator info
 MergeJoin_7	2481.25	root	left outer join, left key:test.t1.c2, right key:test.t2.c1
@@ -120,8 +120,8 @@ Projection_12	1999.00	root	eq(test.t1.c2, test.t2.c2)
 explain select * from t1 order by c1 desc limit 1;
 id	count	task	operator info
 Limit_10	1.00	root	offset:0, count:1
-└─TableReader_21	1.00	root	data:Limit_20
-  └─Limit_20	1.00	cop	offset:0, count:1
+└─TableReader_20	1.00	root	data:Limit_19
+  └─Limit_19	1.00	cop	offset:0, count:1
     └─TableScan_18	1.00	cop	table:t1, range:[-inf,+inf], keep order:true, desc
 set @@session.tidb_opt_insubq_to_join_and_agg=0;
 explain select 1 in (select c2 from t2) from t1;
@@ -200,5 +200,5 @@ create table tbl(column1 int, column2 int, index idx(column1, column2));
 load stats 's/explain_easy_stats_tbl_dnf.json';
 explain select * from tbl where (column1=0 and column2=1) or (column1=1 and column2=3) or (column1=2 and column2=5);
 id	count	task	operator info
-IndexReader_9	3.00	root	index:IndexScan_8
-└─IndexScan_8	3.00	cop	table:tbl, index:column1, column2, range:[0 1,0 1], [1 3,1 3], [2 5,2 5], keep order:false
+IndexReader_6	3.00	root	index:IndexScan_5
+└─IndexScan_5	3.00	cop	table:tbl, index:column1, column2, range:[0 1,0 1], [1 3,1 3], [2 5,2 5], keep order:false
diff --git a/cmd/explaintest/r/select.result b/cmd/explaintest/r/select.result
index 8a78bcb99d2bd..14a457adbfd85 100644
--- a/cmd/explaintest/r/select.result
+++ b/cmd/explaintest/r/select.result
@@ -249,30 +249,30 @@ insert t values(0,0,0);
 explain select distinct b from t group by a;
 id	count	task	operator info
 HashAgg_7	8000.00	root	group by:test.t.b, funcs:firstrow(test.t.b)
-└─StreamAgg_20	8000.00	root	group by:col_1, funcs:firstrow(col_0)
-  └─IndexReader_21	8000.00	root	index:StreamAgg_11
+└─StreamAgg_19	8000.00	root	group by:col_1, funcs:firstrow(col_0)
+  └─IndexReader_20	8000.00	root	index:StreamAgg_11
     └─StreamAgg_11	8000.00	cop	group by:test.t.a, funcs:firstrow(test.t.b)
-      └─IndexScan_19	10000.00	cop	table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo
+      └─IndexScan_18	10000.00	cop	table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo
 select distinct b from t group by a;
 b
 0
 explain select count(b) from t group by a;
 id	count	task	operator info
-StreamAgg_17	8000.00	root	group by:col_1, funcs:count(col_0)
-└─IndexReader_18	8000.00	root	index:StreamAgg_8
+StreamAgg_16	8000.00	root	group by:col_1, funcs:count(col_0)
+└─IndexReader_17	8000.00	root	index:StreamAgg_8
   └─StreamAgg_8	8000.00	cop	group by:test.t.a, funcs:count(test.t.b)
-    └─IndexScan_16	10000.00	cop	table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo
+    └─IndexScan_15	10000.00	cop	table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo
 select count(b) from t group by a;
 count(b)
 1
 insert t values(1,1,1),(3,3,6),(3,2,5),(2,1,4),(1,1,3),(1,1,2);
 explain select count(a) from t where b>0 group by a, b;
 id	count	task	operator info
-StreamAgg_21	2666.67	root	group by:col_1, col_2, funcs:count(col_0)
-└─IndexReader_22	2666.67	root	index:StreamAgg_9
+StreamAgg_20	2666.67	root	group by:col_1, col_2, funcs:count(col_0)
+└─IndexReader_21	2666.67	root	index:StreamAgg_9
   └─StreamAgg_9	2666.67	cop	group by:test.t.a, test.t.b, funcs:count(test.t.a)
-    └─Selection_20	3333.33	cop	gt(test.t.b, 0)
-      └─IndexScan_19	10000.00	cop	table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo
+    └─Selection_19	3333.33	cop	gt(test.t.b, 0)
+      └─IndexScan_18	10000.00	cop	table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo
 select count(a) from t where b>0 group by a, b;
 count(a)
 3
@@ -282,11 +282,11 @@ count(a)
 explain select count(a) from t where b>0 group by a, b order by a;
 id	count	task	operator info
 Projection_7	2666.67	root	count(a)
-└─StreamAgg_33	2666.67	root	group by:col_2, col_3, funcs:count(col_0), firstrow(col_1)
-  └─IndexReader_34	2666.67	root	index:StreamAgg_31
-    └─StreamAgg_31	2666.67	cop	group by:test.t.a, test.t.b, funcs:count(test.t.a), firstrow(test.t.a)
-      └─Selection_24	3333.33	cop	gt(test.t.b, 0)
-        └─IndexScan_23	10000.00	cop	table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo
+└─StreamAgg_31	2666.67	root	group by:col_2, col_3, funcs:count(col_0), firstrow(col_1)
+  └─IndexReader_32	2666.67	root	index:StreamAgg_29
+    └─StreamAgg_29	2666.67	cop	group by:test.t.a, test.t.b, funcs:count(test.t.a), firstrow(test.t.a)
+      └─Selection_23	3333.33	cop	gt(test.t.b, 0)
+        └─IndexScan_22	10000.00	cop	table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo
 select count(a) from t where b>0 group by a, b order by a;
 count(a)
 3
@@ -297,11 +297,11 @@ explain select count(a) from t where b>0 group by a, b order by a limit 1;
 id	count	task	operator info
 Projection_9	1.00	root	count(a)
 └─Limit_15	1.00	root	offset:0, count:1
-  └─StreamAgg_42	1.00	root	group by:col_2, col_3, funcs:count(col_0), firstrow(col_1)
-    └─IndexReader_43	1.00	root	index:StreamAgg_37
-      └─StreamAgg_37	1.00	cop	group by:test.t.a, test.t.b, funcs:count(test.t.a), firstrow(test.t.a)
-        └─Selection_41	1.25	cop	gt(test.t.b, 0)
-          └─IndexScan_40	3.75	cop	table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo
+  └─StreamAgg_39	1.00	root	group by:col_2, col_3, funcs:count(col_0), firstrow(col_1)
+    └─IndexReader_40	1.00	root	index:StreamAgg_35
+      └─StreamAgg_35	1.00	cop	group by:test.t.a, test.t.b, funcs:count(test.t.a), firstrow(test.t.a)
+        └─Selection_38	1.25	cop	gt(test.t.b, 0)
+          └─IndexScan_37	3.75	cop	table:t, index:a, b, c, range:[NULL,+inf], keep order:true, stats:pseudo
 select count(a) from t where b>0 group by a, b order by a limit 1;
 count(a)
 3
diff --git a/cmd/explaintest/r/topn_push_down.result b/cmd/explaintest/r/topn_push_down.result
index 244943aecec90..e7700f7c17ea2 100644
--- a/cmd/explaintest/r/topn_push_down.result
+++ b/cmd/explaintest/r/topn_push_down.result
@@ -169,18 +169,18 @@ LIMIT 0, 5;
 id	count	task	operator info
 Projection_13	0.00	root	te.expect_time
 └─Limit_19	0.00	root	offset:0, count:5
-  └─IndexJoin_143	0.00	root	left outer join, inner:IndexReader_142, outer key:tr.id, inner key:p.relate_id
-    ├─TopN_146	0.00	root	te.expect_time:asc, offset:0, count:5
+  └─IndexJoin_104	0.00	root	left outer join, inner:IndexReader_103, outer key:tr.id, inner key:p.relate_id
+    ├─TopN_107	0.00	root	te.expect_time:asc, offset:0, count:5
     │ └─IndexJoin_36	0.00	root	inner join, inner:IndexLookUp_35, outer key:tr.id, inner key:te.trade_id
-    │   ├─IndexLookUp_107	0.00	root	
-    │   │ ├─Selection_105	0.00	cop	eq(tr.business_type, 18), in(tr.trade_type, 1)
-    │   │ │ └─IndexScan_103	10.00	cop	table:tr, index:shop_identy, trade_status, business_type, trade_pay_status, trade_type, delivery_type, source, biz_date, range:[810094178,810094178], keep order:false, stats:pseudo
-    │   │ └─Selection_106	0.00	cop	eq(tr.brand_identy, 32314), eq(tr.domain_type, 2)
-    │   │   └─TableScan_104	0.00	cop	table:tr, keep order:false
+    │   ├─IndexLookUp_85	0.00	root	
+    │   │ ├─Selection_83	0.00	cop	eq(tr.business_type, 18), in(tr.trade_type, 1)
+    │   │ │ └─IndexScan_81	10.00	cop	table:tr, index:shop_identy, trade_status, business_type, trade_pay_status, trade_type, delivery_type, source, biz_date, range:[810094178,810094178], keep order:false, stats:pseudo
+    │   │ └─Selection_84	0.00	cop	eq(tr.brand_identy, 32314), eq(tr.domain_type, 2)
+    │   │   └─TableScan_82	0.00	cop	table:tr, keep order:false
     │   └─IndexLookUp_35	250.00	root	
     │     ├─IndexScan_32	10.00	cop	table:te, index:trade_id, range: decided by [tr.id], keep order:false, stats:pseudo
     │     └─Selection_34	250.00	cop	ge(te.expect_time, 2018-04-23 00:00:00.000000), le(te.expect_time, 2018-04-23 23:59:59.000000)
     │       └─TableScan_33	10.00	cop	table:te, keep order:false, stats:pseudo
-    └─IndexReader_142	0.00	root	index:Selection_141
-      └─Selection_141	0.00	cop	not(isnull(p.relate_id))
-        └─IndexScan_140	10.00	cop	table:p, index:relate_id, range: decided by [tr.id], keep order:false, stats:pseudo
+    └─IndexReader_103	0.00	root	index:Selection_102
+      └─Selection_102	0.00	cop	not(isnull(p.relate_id))
+        └─IndexScan_101	10.00	cop	table:p, index:relate_id, range: decided by [tr.id], keep order:false, stats:pseudo
diff --git a/cmd/explaintest/r/tpch.result b/cmd/explaintest/r/tpch.result
index c32849cbd666d..d3c7dc4a3eb0c 100644
--- a/cmd/explaintest/r/tpch.result
+++ b/cmd/explaintest/r/tpch.result
@@ -930,16 +930,16 @@ id	count	task	operator info
 Sort_13	3863988.24	root	supplier_cnt:desc, tpch.part.p_brand:asc, tpch.part.p_type:asc, tpch.part.p_size:asc
 └─Projection_15	3863988.24	root	tpch.part.p_brand, tpch.part.p_type, tpch.part.p_size, 9_col_0
   └─HashAgg_18	3863988.24	root	group by:tpch.part.p_brand, tpch.part.p_size, tpch.part.p_type, funcs:count(distinct tpch.partsupp.ps_suppkey), firstrow(tpch.part.p_brand), firstrow(tpch.part.p_type), firstrow(tpch.part.p_size)
-    └─HashLeftJoin_23	3863988.24	root	anti semi join, inner:TableReader_47, equal:[eq(tpch.partsupp.ps_suppkey, tpch.supplier.s_suppkey)]
+    └─HashLeftJoin_23	3863988.24	root	anti semi join, inner:TableReader_46, equal:[eq(tpch.partsupp.ps_suppkey, tpch.supplier.s_suppkey)]
       ├─IndexJoin_27	4829985.30	root	inner join, inner:IndexReader_26, outer key:tpch.part.p_partkey, inner key:tpch.partsupp.ps_partkey
-      │ ├─TableReader_42	1200618.43	root	data:Selection_41
-      │ │ └─Selection_41	1200618.43	cop	in(tpch.part.p_size, 48, 19, 12, 4, 41, 7, 21, 39), ne(tpch.part.p_brand, "Brand#34"), not(like(tpch.part.p_type, "LARGE BRUSHED%", 92))
-      │ │   └─TableScan_40	10000000.00	cop	table:part, range:[-inf,+inf], keep order:false
+      │ ├─TableReader_41	1200618.43	root	data:Selection_40
+      │ │ └─Selection_40	1200618.43	cop	in(tpch.part.p_size, 48, 19, 12, 4, 41, 7, 21, 39), ne(tpch.part.p_brand, "Brand#34"), not(like(tpch.part.p_type, "LARGE BRUSHED%", 92))
+      │ │   └─TableScan_39	10000000.00	cop	table:part, range:[-inf,+inf], keep order:false
       │ └─IndexReader_26	1.00	root	index:IndexScan_25
       │   └─IndexScan_25	1.00	cop	table:partsupp, index:PS_PARTKEY, PS_SUPPKEY, range: decided by [tpch.part.p_partkey], keep order:false
-      └─TableReader_47	400000.00	root	data:Selection_46
-        └─Selection_46	400000.00	cop	like(tpch.supplier.s_comment, "%Customer%Complaints%", 92)
-          └─TableScan_45	500000.00	cop	table:supplier, range:[-inf,+inf], keep order:false
+      └─TableReader_46	400000.00	root	data:Selection_45
+        └─Selection_45	400000.00	cop	like(tpch.supplier.s_comment, "%Customer%Complaints%", 92)
+          └─TableScan_44	500000.00	cop	table:supplier, range:[-inf,+inf], keep order:false
 /*
 Q17 Small-Quantity-Order Revenue Query
 This query determines how much average yearly revenue would be lost if orders were no longer filled for small
diff --git a/cmd/explaintest/r/window_function.result b/cmd/explaintest/r/window_function.result
index c1da15629e80b..73fcf3a905e9e 100644
--- a/cmd/explaintest/r/window_function.result
+++ b/cmd/explaintest/r/window_function.result
@@ -12,40 +12,40 @@ explain select sum(a) over(partition by a) from t;
 id	count	task	operator info
 Projection_7	10000.00	root	sum(a) over(partition by a)
 └─Window_8	10000.00	root	sum(cast(test.t.a)) over(partition by test.t.a)
-  └─IndexReader_11	10000.00	root	index:IndexScan_10
-    └─IndexScan_10	10000.00	cop	table:t, index:a, range:[NULL,+inf], keep order:true, stats:pseudo
+  └─IndexReader_10	10000.00	root	index:IndexScan_9
+    └─IndexScan_9	10000.00	cop	table:t, index:a, range:[NULL,+inf], keep order:true, stats:pseudo
 explain select sum(a) over(partition by a order by b) from t;
 id	count	task	operator info
 Projection_7	10000.00	root	sum(a) over(partition by a order by b)
 └─Window_8	10000.00	root	sum(cast(test.t.a)) over(partition by test.t.a order by test.t.b asc)
-  └─Sort_14	10000.00	root	test.t.a:asc, test.t.b:asc
-    └─TableReader_13	10000.00	root	data:TableScan_12
-      └─TableScan_12	10000.00	cop	table:t, range:[-inf,+inf], keep order:false, stats:pseudo
+  └─Sort_12	10000.00	root	test.t.a:asc, test.t.b:asc
+    └─TableReader_11	10000.00	root	data:TableScan_10
+      └─TableScan_10	10000.00	cop	table:t, range:[-inf,+inf], keep order:false, stats:pseudo
 explain select sum(a) over(partition by a order by b rows unbounded preceding) from t;
 id	count	task	operator info
 Projection_7	10000.00	root	sum(a) over(partition by a order by b rows unbounded preceding)
 └─Window_8	10000.00	root	sum(cast(test.t.a)) over(partition by test.t.a order by test.t.b asc rows between unbounded preceding and current row)
-  └─Sort_14	10000.00	root	test.t.a:asc, test.t.b:asc
-    └─TableReader_13	10000.00	root	data:TableScan_12
-      └─TableScan_12	10000.00	cop	table:t, range:[-inf,+inf], keep order:false, stats:pseudo
+  └─Sort_12	10000.00	root	test.t.a:asc, test.t.b:asc
+    └─TableReader_11	10000.00	root	data:TableScan_10
+      └─TableScan_10	10000.00	cop	table:t, range:[-inf,+inf], keep order:false, stats:pseudo
 explain select sum(a) over(partition by a order by b rows between 1 preceding and 1 following) from t;
 id	count	task	operator info
 Projection_7	10000.00	root	sum(a) over(partition by a order by b rows between 1 preceding and 1 following)
 └─Window_8	10000.00	root	sum(cast(test.t.a)) over(partition by test.t.a order by test.t.b asc rows between 1 preceding and 1 following)
-  └─Sort_14	10000.00	root	test.t.a:asc, test.t.b:asc
-    └─TableReader_13	10000.00	root	data:TableScan_12
-      └─TableScan_12	10000.00	cop	table:t, range:[-inf,+inf], keep order:false, stats:pseudo
+  └─Sort_12	10000.00	root	test.t.a:asc, test.t.b:asc
+    └─TableReader_11	10000.00	root	data:TableScan_10
+      └─TableScan_10	10000.00	cop	table:t, range:[-inf,+inf], keep order:false, stats:pseudo
 explain select sum(a) over(partition by a order by b range between 1 preceding and 1 following) from t;
 id	count	task	operator info
 Projection_7	10000.00	root	sum(a) over(partition by a order by b range between 1 preceding and 1 following)
 └─Window_8	10000.00	root	sum(cast(test.t.a)) over(partition by test.t.a order by test.t.b asc range between 1 preceding and 1 following)
-  └─Sort_14	10000.00	root	test.t.a:asc, test.t.b:asc
-    └─TableReader_13	10000.00	root	data:TableScan_12
-      └─TableScan_12	10000.00	cop	table:t, range:[-inf,+inf], keep order:false, stats:pseudo
+  └─Sort_12	10000.00	root	test.t.a:asc, test.t.b:asc
+    └─TableReader_11	10000.00	root	data:TableScan_10
+      └─TableScan_10	10000.00	cop	table:t, range:[-inf,+inf], keep order:false, stats:pseudo
 explain select sum(a) over(partition by a order by c range between interval '2:30' minute_second preceding and interval '2:30' minute_second following) from t;
 id	count	task	operator info
 Projection_7	10000.00	root	sum(a) over(partition by a order by c range between interval '2:30' minute_second preceding and interval '2:30' minute_second following)
 └─Window_8	10000.00	root	sum(cast(test.t.a)) over(partition by test.t.a order by test.t.c asc range between interval "2:30" "MINUTE_SECOND" preceding and interval "2:30" "MINUTE_SECOND" following)
-  └─Sort_14	10000.00	root	test.t.a:asc, test.t.c:asc
-    └─TableReader_13	10000.00	root	data:TableScan_12
-      └─TableScan_12	10000.00	cop	table:t, range:[-inf,+inf], keep order:false, stats:pseudo
+  └─Sort_12	10000.00	root	test.t.a:asc, test.t.c:asc
+    └─TableReader_11	10000.00	root	data:TableScan_10
+      └─TableScan_10	10000.00	cop	table:t, range:[-inf,+inf], keep order:false, stats:pseudo
diff --git a/expression/util.go b/expression/util.go
index 01ee88c094c9b..e6a2bec438ce3 100644
--- a/expression/util.go
+++ b/expression/util.go
@@ -92,6 +92,26 @@ func extractColumns(result []*Column, expr Expression, filter func(*Column) bool
 	return result
 }
 
+// ExtractColumnSet extract columns that occurred in the exprs.
+func ExtractColumnSet(exprs []Expression) map[int64]struct{} {
+	set := make(map[int64]struct{})
+	for _, expr := range exprs {
+		extractColumnSet(expr, set)
+	}
+	return set
+}
+
+func extractColumnSet(expr Expression, set map[int64]struct{}) {
+	switch v := expr.(type) {
+	case *Column:
+		set[v.UniqueID] = struct{}{}
+	case *ScalarFunction:
+		for _, arg := range v.GetArgs() {
+			extractColumnSet(arg, set)
+		}
+	}
+}
+
 // ColumnSubstitute substitutes the columns in filter to expressions in select fields.
 // e.g. select * from (select b as a from t) k where a < 10 => select * from (select b as a from t where b < 10) k.
 func ColumnSubstitute(expr Expression, schema *Schema, newExprs []Expression) Expression {
diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go
index bfefc57b32f71..8e3c7c5ae4076 100644
--- a/planner/core/find_best_task.go
+++ b/planner/core/find_best_task.go
@@ -196,6 +196,139 @@ func (ds *DataSource) tryToGetDualTask() (task, error) {
 	return nil, nil
 }
 
+// candidatePath is used to maintain required info for skyline pruning.
+type candidatePath struct {
+	path       *accessPath
+	columnSet  map[int64]struct{} // columnSet is the set of columns that occurred in the access conditions.
+	singleScan bool
+	matchProp  bool
+}
+
+// compareColumnSet will compares the two set. The last return value is used to indicate
+// if they are comparable, it is false when both two sets have columns that do not occur in the other.
+func compareColumnSet(l, r map[int64]struct{}) (int, bool) {
+	if len(l) <= len(r) {
+		for key := range l {
+			if _, ok := r[key]; !ok {
+				return 0, false
+			}
+		}
+		if len(l) == len(r) {
+			return 0, true
+		}
+		return -1, true
+	}
+	for key := range r {
+		if _, ok := l[key]; !ok {
+			return 0, false
+		}
+	}
+	return 1, true
+}
+
+func compareBool(l, r bool) int {
+	if l == r {
+		return 0
+	}
+	if l == false {
+		return -1
+	}
+	return 1
+}
+
+// compareCandidates is the core of skyline pruning. It compares the two candidate paths on three dimensions:
+// the set of columns that occurred in the access condition, whether or not it matches the physical property
+// and does it require a double scan.  If `x` is not worse than `y` at all factors,
+// and there exists one factor that `x` is better than `y`, then we `x` is better than `y`.
+func compareCandidates(lhs, rhs *candidatePath) int {
+	setsResult, comparable := compareColumnSet(lhs.columnSet, rhs.columnSet)
+	if !comparable {
+		return 0
+	}
+	scanResult := compareBool(lhs.singleScan, rhs.singleScan)
+	matchResult := compareBool(lhs.matchProp, rhs.matchProp)
+	sum := setsResult + scanResult + matchResult
+	if setsResult >= 0 && scanResult >= 0 && matchResult >= 0 && sum > 0 {
+		return 1
+	}
+	if setsResult <= 0 && scanResult <= 0 && matchResult <= 0 && sum < 0 {
+		return -1
+	}
+	return 0
+}
+
+func (ds *DataSource) getTableCandidate(path *accessPath, prop *property.PhysicalProperty) *candidatePath {
+	point := &candidatePath{path: path}
+	if ds.tableInfo.PKIsHandle {
+		if pkColInfo := ds.tableInfo.GetPkColInfo(); pkColInfo != nil {
+			pkCol := expression.ColInfo2Col(ds.schema.Columns, pkColInfo)
+			point.matchProp = len(prop.Items) == 1 && pkCol != nil && prop.Items[0].Col.Equal(nil, pkCol)
+		}
+	}
+	point.columnSet = expression.ExtractColumnSet(path.accessConds)
+	point.singleScan = true
+	return point
+}
+
+func (ds *DataSource) getIndexCandidate(path *accessPath, prop *property.PhysicalProperty) *candidatePath {
+	point := &candidatePath{path: path}
+	all, _ := prop.AllSameOrder()
+	if !prop.IsEmpty() && all {
+		for i, col := range path.index.Columns {
+			// not matched
+			if col.Name.L == prop.Items[0].Col.ColName.L {
+				point.matchProp = matchIndicesProp(path.index.Columns[i:], prop.Items)
+				break
+			} else if i >= path.eqCondCount {
+				break
+			}
+		}
+	}
+	point.columnSet = expression.ExtractColumnSet(path.accessConds)
+	point.singleScan = isCoveringIndex(ds.schema.Columns, path.index.Columns, ds.tableInfo.PKIsHandle)
+	return point
+}
+
+func (ds *DataSource) skylinePruning(prop *property.PhysicalProperty) []*candidatePath {
+	candidates := make([]*candidatePath, 0, 4)
+	for _, path := range ds.possibleAccessPaths {
+		// if we already know the range of the scan is empty, just return a TableDual
+		if len(path.ranges) == 0 && !ds.ctx.GetSessionVars().StmtCtx.UseCache {
+			return []*candidatePath{{path: path}}
+		}
+		var currentCandidate *candidatePath
+		if path.isTablePath {
+			currentCandidate = ds.getTableCandidate(path, prop)
+		} else {
+			// We will use index to generate physical plan if:
+			// this path's access cond is not nil or
+			// we have prop to match or
+			// this index is forced to choose.
+			if len(path.accessConds) > 0 || len(prop.Items) > 0 || path.forced {
+				currentCandidate = ds.getIndexCandidate(path, prop)
+			}
+		}
+		if currentCandidate == nil {
+			continue
+		}
+		pruned := false
+		for i := len(candidates) - 1; i >= 0; i-- {
+			result := compareCandidates(candidates[i], currentCandidate)
+			if result == 1 {
+				pruned = true
+				// We can break here because the current candidate cannot prune others anymore.
+				break
+			} else if result == -1 {
+				candidates = append(candidates[:i], candidates[i+1:]...)
+			}
+		}
+		if !pruned {
+			candidates = append(candidates, currentCandidate)
+		}
+	}
+	return candidates
+}
+
 // findBestTask implements the PhysicalPlan interface.
 // It will enumerate all the available indices and choose a plan with least cost.
 func (ds *DataSource) findBestTask(prop *property.PhysicalProperty) (t task, err error) {
@@ -250,7 +383,9 @@ func (ds *DataSource) findBestTask(prop *property.PhysicalProperty) (t task, err
 
 	t = invalidTask
 
-	for _, path := range ds.possibleAccessPaths {
+	candidates := ds.skylinePruning(prop)
+	for _, candidate := range candidates {
+		path := candidate.path
 		// if we already know the range of the scan is empty, just return a TableDual
 		if len(path.ranges) == 0 && !ds.ctx.GetSessionVars().StmtCtx.UseCache {
 			dual := PhysicalTableDual{}.Init(ds.ctx, ds.stats)
@@ -260,7 +395,7 @@ func (ds *DataSource) findBestTask(prop *property.PhysicalProperty) (t task, err
 			}, nil
 		}
 		if path.isTablePath {
-			tblTask, err := ds.convertToTableScan(prop, path)
+			tblTask, err := ds.convertToTableScan(prop, path, candidate.matchProp)
 			if err != nil {
 				return nil, errors.Trace(err)
 			}
@@ -269,18 +404,12 @@ func (ds *DataSource) findBestTask(prop *property.PhysicalProperty) (t task, err
 			}
 			continue
 		}
-		// We will use index to generate physical plan if:
-		// this path's access cond is not nil or
-		// we have prop to match or
-		// this index is forced to choose.
-		if len(path.accessConds) > 0 || len(prop.Items) > 0 || path.forced {
-			idxTask, err := ds.convertToIndexScan(prop, path)
-			if err != nil {
-				return nil, errors.Trace(err)
-			}
-			if idxTask.cost() < t.cost() {
-				t = idxTask
-			}
+		idxTask, err := ds.convertToIndexScan(prop, path, candidate.matchProp, !candidate.singleScan)
+		if err != nil {
+			return nil, errors.Trace(err)
+		}
+		if idxTask.cost() < t.cost() {
+			t = idxTask
 		}
 	}
 	return
@@ -322,7 +451,7 @@ func (ts *PhysicalTableScan) appendExtraHandleCol(ds *DataSource) {
 }
 
 // convertToIndexScan converts the DataSource to index scan with idx.
-func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, path *accessPath) (task task, err error) {
+func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, path *accessPath, matchProperty, needDoubleScan bool) (task task, err error) {
 	idx := path.index
 	is := PhysicalIndexScan{
 		Table:            ds.tableInfo,
@@ -345,7 +474,7 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, path *
 	}
 	rowCount := path.countAfterAccess
 	cop := &copTask{indexPlan: is}
-	if !isCoveringIndex(ds.schema.Columns, is.Index.Columns, is.Table.PKIsHandle) {
+	if needDoubleScan {
 		// If it's parent requires single read task, return max cost.
 		if prop.TaskTp == property.CopSingleReadTaskType {
 			return invalidTask, nil
@@ -364,20 +493,6 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, path *
 		return invalidTask, nil
 	}
 	is.initSchema(ds.id, idx, cop.tablePlan != nil)
-	// Check if this plan matches the property.
-	matchProperty := false
-	all, desc := prop.AllSameOrder()
-	if !prop.IsEmpty() && all {
-		for i, col := range idx.Columns {
-			// not matched
-			if col.Name.L == prop.Items[0].Col.ColName.L {
-				matchProperty = matchIndicesProp(idx.Columns[i:], prop.Items)
-				break
-			} else if i >= path.eqCondCount {
-				break
-			}
-		}
-	}
 	// Only use expectedCnt when it's smaller than the count we calculated.
 	// e.g. IndexScan(count1)->After Filter(count2). The `ds.stats.RowCount` is count2. count1 is the one we need to calculate
 	// If expectedCnt and count2 are both zero and we go into the below `if` block, the count1 will be set to zero though it's shouldn't be.
@@ -390,7 +505,7 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, path *
 	cop.cst = rowCount * scanFactor
 	task = cop
 	if matchProperty {
-		if desc {
+		if prop.Items[0].Desc {
 			is.Desc = true
 			cop.cst = rowCount * descScanFactor
 		}
@@ -500,7 +615,7 @@ func splitIndexFilterConditions(conditions []expression.Expression, indexColumns
 }
 
 // convertToTableScan converts the DataSource to table scan.
-func (ds *DataSource) convertToTableScan(prop *property.PhysicalProperty, path *accessPath) (task task, err error) {
+func (ds *DataSource) convertToTableScan(prop *property.PhysicalProperty, path *accessPath, matchProperty bool) (task task, err error) {
 	// It will be handled in convertToIndexScan.
 	if prop.TaskTp == property.CopDoubleReadTaskType {
 		return invalidTask, nil
@@ -515,10 +630,8 @@ func (ds *DataSource) convertToTableScan(prop *property.PhysicalProperty, path *
 		physicalTableID: ds.physicalTableID,
 	}.Init(ds.ctx)
 	ts.SetSchema(ds.schema)
-	var pkCol *expression.Column
 	if ts.Table.PKIsHandle {
 		if pkColInfo := ts.Table.GetPkColInfo(); pkColInfo != nil {
-			pkCol = expression.ColInfo2Col(ts.schema.Columns, pkColInfo)
 			if ds.statisticTable.Columns[pkColInfo.ID] != nil {
 				ts.Hist = &ds.statisticTable.Columns[pkColInfo.ID].Histogram
 			}
@@ -532,7 +645,6 @@ func (ds *DataSource) convertToTableScan(prop *property.PhysicalProperty, path *
 		indexPlanFinished: true,
 	}
 	task = copTask
-	matchProperty := len(prop.Items) == 1 && pkCol != nil && prop.Items[0].Col.Equal(nil, pkCol)
 	// Only use expectedCnt when it's smaller than the count we calculated.
 	// e.g. IndexScan(count1)->After Filter(count2). The `ds.stats.RowCount` is count2. count1 is the one we need to calculate
 	// If expectedCnt and count2 are both zero and we go into the below `if` block, the count1 will be set to zero though it's shouldn't be.
diff --git a/planner/core/logical_plan_test.go b/planner/core/logical_plan_test.go
index 649335dd0ab2c..7fad6bb5c8dac 100644
--- a/planner/core/logical_plan_test.go
+++ b/planner/core/logical_plan_test.go
@@ -16,6 +16,7 @@ package core
 import (
 	"fmt"
 	"sort"
+	"strings"
 	"testing"
 
 	. "github.com/pingcap/check"
@@ -26,6 +27,7 @@ import (
 	"github.com/pingcap/parser/terror"
 	"github.com/pingcap/tidb/expression"
 	"github.com/pingcap/tidb/infoschema"
+	"github.com/pingcap/tidb/planner/property"
 	"github.com/pingcap/tidb/sessionctx"
 	"github.com/pingcap/tidb/util/testleak"
 )
@@ -2220,3 +2222,101 @@ func (s *testPlanSuite) TestWindowFunction(c *C) {
 		c.Assert(ToString(p), Equals, tt.result, comment)
 	}
 }
+
+func byItemsToProperty(byItems []*ByItems) *property.PhysicalProperty {
+	pp := &property.PhysicalProperty{}
+	for _, item := range byItems {
+		pp.Items = append(pp.Items, property.Item{Col: item.Expr.(*expression.Column), Desc: item.Desc})
+	}
+	return pp
+}
+
+func pathsName(paths []*candidatePath) string {
+	var names []string
+	for _, path := range paths {
+		if path.path.isTablePath {
+			names = append(names, "PRIMARY_KEY")
+		} else {
+			names = append(names, path.path.index.Name.O)
+		}
+	}
+	return strings.Join(names, ",")
+}
+
+func (s *testPlanSuite) TestSkylinePruning(c *C) {
+	defer testleak.AfterTest(c)()
+	tests := []struct {
+		sql    string
+		result string
+	}{
+		{
+			sql:    "select * from t",
+			result: "PRIMARY_KEY",
+		},
+		{
+			sql:    "select * from t order by f",
+			result: "PRIMARY_KEY,f,f_g",
+		},
+		{
+			sql:    "select * from t where a > 1",
+			result: "PRIMARY_KEY",
+		},
+		{
+			sql:    "select * from t where a > 1 order by f",
+			result: "PRIMARY_KEY,f,f_g",
+		},
+		{
+			sql:    "select * from t where f > 1",
+			result: "PRIMARY_KEY,f,f_g",
+		},
+		{
+			sql:    "select f from t where f > 1",
+			result: "f,f_g",
+		},
+		{
+			sql:    "select f from t where f > 1 order by a",
+			result: "PRIMARY_KEY,f,f_g",
+		},
+		{
+			sql:    "select * from t where f > 1 and g > 1",
+			result: "PRIMARY_KEY,f,g,f_g",
+		},
+	}
+	for i, tt := range tests {
+		comment := Commentf("case:%v sql:%s", i, tt.sql)
+		stmt, err := s.ParseOneStmt(tt.sql, "", "")
+		c.Assert(err, IsNil, comment)
+		Preprocess(s.ctx, stmt, s.is, false)
+		builder := &PlanBuilder{
+			ctx:       MockContext(),
+			is:        s.is,
+			colMapper: make(map[*ast.ColumnNameExpr]int),
+		}
+		p, err := builder.Build(stmt)
+		if err != nil {
+			c.Assert(err.Error(), Equals, tt.result, comment)
+			continue
+		}
+		c.Assert(err, IsNil)
+		p, err = logicalOptimize(builder.optFlag, p.(LogicalPlan))
+		c.Assert(err, IsNil)
+		lp := p.(LogicalPlan)
+		_, err = lp.recursiveDeriveStats()
+		c.Assert(err, IsNil)
+		var ds *DataSource
+		var byItems []*ByItems
+		for ds == nil {
+			switch v := lp.(type) {
+			case *DataSource:
+				ds = v
+			case *LogicalSort:
+				byItems = v.ByItems
+				lp = lp.Children()[0]
+			default:
+				lp = lp.Children()[0]
+			}
+		}
+		paths := ds.skylinePruning(byItemsToProperty(byItems))
+		c.Assert(pathsName(paths), Equals, tt.result)
+	}
+}
diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go
index 7956dff6741b5..bda22ff528829 100644
--- a/statistics/selectivity_test.go
+++ b/statistics/selectivity_test.go
@@ -206,8 +206,8 @@ func (s *testStatsSuite) TestDiscreteDistribution(c *C) {
 	}
 	testKit.MustExec("analyze table t")
 	testKit.MustQuery("explain select * from t where a = 'tw' and b < 0").Check(testkit.Rows(
-		"IndexReader_9 0.00 root index:IndexScan_8",
-		"└─IndexScan_8 0.00 cop table:t, index:a, b, range:[\"tw\" -inf,\"tw\" 0), keep order:false"))
+		"IndexReader_6 0.00 root index:IndexScan_5",
+		"└─IndexScan_5 0.00 cop table:t, index:a, b, range:[\"tw\" -inf,\"tw\" 0), keep order:false"))
 }
 
 func (s *testStatsSuite) TestSelectCombinedLowBound(c *C) {
@@ -219,8 +219,8 @@ func (s *testStatsSuite) TestSelectCombinedLowBound(c *C) {
 	testKit.MustExec("insert into t (kid, pid) values (1,2), (1,3), (1,4),(1, 11), (1, 12), (1, 13), (1, 14), (2, 2), (2, 3), (2, 4)")
 	testKit.MustExec("analyze table t")
 	testKit.MustQuery("explain select * from t where kid = 1").Check(testkit.Rows(
-		"IndexReader_9 7.00 root index:IndexScan_8",
-		"└─IndexScan_8 7.00 cop table:t, index:kid, pid, range:[1,1], keep order:false"))
+		"IndexReader_6 7.00 root index:IndexScan_5",
+		"└─IndexScan_5 7.00 cop table:t, index:kid, pid, range:[1,1], keep order:false"))
 }
 
 func getRange(start, end int64) []*ranger.Range {
diff --git a/util/ranger/ranger_test.go b/util/ranger/ranger_test.go
index 79c29b0b68866..49a29e6a1d677 100644
--- a/util/ranger/ranger_test.go
+++ b/util/ranger/ranger_test.go
@@ -979,10 +979,10 @@ func (s *testRangerSuite) TestIndexRangeElimininatedProjection(c *C) {
 	testKit.MustExec("analyze table t")
 	testKit.MustQuery("explain select * from (select * from t union all select ifnull(a,b), b from t) sub where a > 0").Check(testkit.Rows(
 		"Union_11 2.00 root ",
-		"├─IndexReader_17 1.00 root index:IndexScan_16",
-		"│ └─IndexScan_16 1.00 cop table:t, index:a, b, range:(0,+inf], keep order:false",
-		"└─IndexReader_23 1.00 root index:IndexScan_22",
-		"  └─IndexScan_22 1.00 cop table:t, index:a, b, range:(0,+inf], keep order:false",
+		"├─IndexReader_14 1.00 root index:IndexScan_13",
+		"│ └─IndexScan_13 1.00 cop table:t, index:a, b, range:(0,+inf], keep order:false",
+		"└─IndexReader_17 1.00 root index:IndexScan_16",
+		"  └─IndexScan_16 1.00 cop table:t, index:a, b, range:(0,+inf], keep order:false",
 	))
 	testKit.MustQuery("select * from (select * from t union all select ifnull(a,b), b from t) sub where a > 0").Check(testkit.Rows(
 		"1 2",

From 71afdde23c72a3f3648edbb3d1cd785e101594d6 Mon Sep 17 00:00:00 2001
From: Haibin Xie <lambdax.tyler@gmail.com>
Date: Mon, 18 Feb 2019 14:21:44 +0800
Subject: [PATCH 2/8] address comments

---
 docs/design/README.md          |  2 +-
 expression/util.go             |  9 +++++----
 go.mod                         |  2 +-
 planner/core/find_best_task.go | 24 +++++++++++-------------
 4 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/docs/design/README.md b/docs/design/README.md
index 2ec807b104d45..6106cc3a5de1c 100644
--- a/docs/design/README.md
+++ b/docs/design/README.md
@@ -22,7 +22,6 @@ Writing a design document can promote us to think deliberately and gather knowle
 
 - [Proposal: A new command to restore dropped table](./2018-08-10-restore-dropped-table.md)
 - [Proposal: Support SQL Plan Management](./2018-12-11-sql-plan-management.md)
-- [Proposal: Support Skyline Pruning](./2019-01-25-skyline-pruning.md)
 
 ### In Progress
 
@@ -39,3 +38,4 @@ Writing a design document can promote us to think deliberately and gather knowle
 
 - [Proposal: A new aggregate function execution framework](./2018-07-01-refactor-aggregate-framework.md)
 - [Proposal: Infer the System Timezone of a TiDB cluster via TZ environment variable](./2018-09-10-adding-tz-env.md)
+- [Proposal: Support Skyline Pruning](./2019-01-25-skyline-pruning.md)
diff --git a/expression/util.go b/expression/util.go
index e6a2bec438ce3..a30d63bf968f2 100644
--- a/expression/util.go
+++ b/expression/util.go
@@ -28,6 +28,7 @@ import (
 	"github.com/pingcap/tidb/types"
 	"github.com/pingcap/tidb/types/parser_driver"
 	"github.com/pingcap/tidb/util/chunk"
+	"golang.org/x/tools/container/intsets"
 )
 
 // Filter the input expressions, append the results to result.
@@ -93,18 +94,18 @@ func extractColumns(result []*Column, expr Expression, filter func(*Column) bool
 }
 
 // ExtractColumnSet extract columns that occurred in the exprs.
-func ExtractColumnSet(exprs []Expression) map[int64]struct{} {
-	set := make(map[int64]struct{})
+func ExtractColumnSet(exprs []Expression) *intsets.Sparse {
+	set := &intsets.Sparse{}
 	for _, expr := range exprs {
 		extractColumnSet(expr, set)
 	}
 	return set
 }
 
-func extractColumnSet(expr Expression, set map[int64]struct{}) {
+func extractColumnSet(expr Expression, set *intsets.Sparse) {
 	switch v := expr.(type) {
 	case *Column:
-		set[v.UniqueID] = struct{}{}
+		set.Insert(int(v.UniqueID))
 	case *ScalarFunction:
 		for _, arg := range v.GetArgs() {
 			extractColumnSet(arg, set)
diff --git a/go.mod b/go.mod
index 272a87440b86d..69900eeef2768 100644
--- a/go.mod
+++ b/go.mod
@@ -79,7 +79,7 @@ require (
 	golang.org/x/sys v0.0.0-20190109145017-48ac38b7c8cb // indirect
 	golang.org/x/text v0.3.0
 	golang.org/x/time v0.0.0-20181108054448-85acf8d2951c // indirect
-	golang.org/x/tools v0.0.0-20190130214255-bb1329dc71a0 // indirect
+	golang.org/x/tools v0.0.0-20190130214255-bb1329dc71a0
 	google.golang.org/genproto v0.0.0-20190108161440-ae2f86662275 // indirect
 	google.golang.org/grpc v1.17.0
 	gopkg.in/natefinch/lumberjack.v2 v2.0.0
diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go
index 8e3c7c5ae4076..2a0e78bf4e533 100644
--- a/planner/core/find_best_task.go
+++ b/planner/core/find_best_task.go
@@ -24,6 +24,7 @@ import (
 	"github.com/pingcap/tidb/planner/property"
 	"github.com/pingcap/tidb/types"
 	"github.com/pingcap/tidb/util/chunk"
+	"golang.org/x/tools/container/intsets"
 )
 
 const (
@@ -199,29 +200,26 @@ func (ds *DataSource) tryToGetDualTask() (task, error) {
 // candidatePath is used to maintain required info for skyline pruning.
 type candidatePath struct {
 	path       *accessPath
-	columnSet  map[int64]struct{} // columnSet is the set of columns that occurred in the access conditions.
+	columnSet  *intsets.Sparse // columnSet is the set of columns that occurred in the access conditions.
 	singleScan bool
 	matchProp  bool
 }
 
 // compareColumnSet will compares the two set. The last return value is used to indicate
 // if they are comparable, it is false when both two sets have columns that do not occur in the other.
-func compareColumnSet(l, r map[int64]struct{}) (int, bool) {
-	if len(l) <= len(r) {
-		for key := range l {
-			if _, ok := r[key]; !ok {
-				return 0, false
-			}
+func compareColumnSet(l, r *intsets.Sparse) (int, bool) {
+	lLen, rLen := l.Len(), r.Len()
+	if lLen <= rLen {
+		if isSubset := l.SubsetOf(r); !isSubset {
+			return 0, false
 		}
-		if len(l) == len(r) {
+		if lLen == rLen {
 			return 0, true
 		}
 		return -1, true
 	}
-	for key := range r {
-		if _, ok := l[key]; !ok {
-			return 0, false
-		}
+	if isSubset := r.SubsetOf(l); !isSubset {
+		return 0, false
 	}
 	return 1, true
 }
@@ -239,7 +237,7 @@ func compareBool(l, r bool) int {
 // compareCandidates is the core of skyline pruning. It compares the two candidate paths on three dimensions:
 // the set of columns that occurred in the access condition, whether or not it matches the physical property
 // and does it require a double scan.  If `x` is not worse than `y` at all factors,
-// and there exists one factor that `x` is better than `y`, then we `x` is better than `y`.
+// and there exists one factor that `x` is better than `y`, then `x` is better than `y`.
 func compareCandidates(lhs, rhs *candidatePath) int {
 	setsResult, comparable := compareColumnSet(lhs.columnSet, rhs.columnSet)
 	if !comparable {

From 6db6f1e64ca1e084b907b31550437582ef7ad741 Mon Sep 17 00:00:00 2001
From: Haibin Xie <lambdax.tyler@gmail.com>
Date: Mon, 18 Feb 2019 16:22:48 +0800
Subject: [PATCH 3/8] address comments

---
 expression/util.go             |  2 +-
 planner/core/find_best_task.go | 67 +++++++++++++++++-----------------
 2 files changed, 34 insertions(+), 35 deletions(-)

diff --git a/expression/util.go b/expression/util.go
index a30d63bf968f2..e7285d132761c 100644
--- a/expression/util.go
+++ b/expression/util.go
@@ -93,7 +93,7 @@ func extractColumns(result []*Column, expr Expression, filter func(*Column) bool
 	return result
 }
 
-// ExtractColumnSet extract columns that occurred in the exprs.
+// ExtractColumnSet extract the different unique id of columns that occurred in the exprs.
 func ExtractColumnSet(exprs []Expression) *intsets.Sparse {
 	set := &intsets.Sparse{}
 	for _, expr := range exprs {
diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go
index 2a0e78bf4e533..cb2382113e98e 100644
--- a/planner/core/find_best_task.go
+++ b/planner/core/find_best_task.go
@@ -199,10 +199,10 @@ func (ds *DataSource) tryToGetDualTask() (task, error) {
 
 // candidatePath is used to maintain required info for skyline pruning.
 type candidatePath struct {
-	path       *accessPath
-	columnSet  *intsets.Sparse // columnSet is the set of columns that occurred in the access conditions.
-	singleScan bool
-	matchProp  bool
+	path         *accessPath
+	columnSet    *intsets.Sparse // columnSet is the set of columns that occurred in the access conditions.
+	isSingleScan bool
+	isMatchProp  bool
 }
 
 // compareColumnSet will compares the two set. The last return value is used to indicate
@@ -235,16 +235,18 @@ func compareBool(l, r bool) int {
 }
 
 // compareCandidates is the core of skyline pruning. It compares the two candidate paths on three dimensions:
-// the set of columns that occurred in the access condition, whether or not it matches the physical property
-// and does it require a double scan.  If `x` is not worse than `y` at all factors,
+// (1): the set of columns that occurred in the access condition,
+// (2): whether or not it matches the physical property
+// (3): does it require a double scan.
+// If `x` is not worse than `y` at all factors,
 // and there exists one factor that `x` is better than `y`, then `x` is better than `y`.
 func compareCandidates(lhs, rhs *candidatePath) int {
 	setsResult, comparable := compareColumnSet(lhs.columnSet, rhs.columnSet)
 	if !comparable {
 		return 0
 	}
-	scanResult := compareBool(lhs.singleScan, rhs.singleScan)
-	matchResult := compareBool(lhs.matchProp, rhs.matchProp)
+	scanResult := compareBool(lhs.isSingleScan, rhs.isSingleScan)
+	matchResult := compareBool(lhs.isMatchProp, rhs.isMatchProp)
 	sum := setsResult + scanResult + matchResult
 	if setsResult >= 0 && scanResult >= 0 && matchResult >= 0 && sum > 0 {
 		return 1
@@ -256,35 +258,30 @@ func compareCandidates(lhs, rhs *candidatePath) int {
 }
 
 func (ds *DataSource) getTableCandidate(path *accessPath, prop *property.PhysicalProperty) *candidatePath {
-	point := &candidatePath{path: path}
-	if ds.tableInfo.PKIsHandle {
-		if pkColInfo := ds.tableInfo.GetPkColInfo(); pkColInfo != nil {
-			pkCol := expression.ColInfo2Col(ds.schema.Columns, pkColInfo)
-			point.matchProp = len(prop.Items) == 1 && pkCol != nil && prop.Items[0].Col.Equal(nil, pkCol)
-		}
-	}
-	point.columnSet = expression.ExtractColumnSet(path.accessConds)
-	point.singleScan = true
-	return point
+	candidate := &candidatePath{path: path}
+	pkCol := ds.getPKIsHandleCol()
+	candidate.isMatchProp = len(prop.Items) == 1 && pkCol != nil && prop.Items[0].Col.Equal(nil, pkCol)
+	candidate.columnSet = expression.ExtractColumnSet(path.accessConds)
+	candidate.isSingleScan = true
+	return candidate
 }
 
 func (ds *DataSource) getIndexCandidate(path *accessPath, prop *property.PhysicalProperty) *candidatePath {
-	point := &candidatePath{path: path}
+	candidate := &candidatePath{path: path}
 	all, _ := prop.AllSameOrder()
 	if !prop.IsEmpty() && all {
 		for i, col := range path.index.Columns {
-			// not matched
 			if col.Name.L == prop.Items[0].Col.ColName.L {
-				point.matchProp = matchIndicesProp(path.index.Columns[i:], prop.Items)
+				candidate.isMatchProp = matchIndicesProp(path.index.Columns[i:], prop.Items)
 				break
 			} else if i >= path.eqCondCount {
 				break
 			}
 		}
 	}
-	point.columnSet = expression.ExtractColumnSet(path.accessConds)
-	point.singleScan = isCoveringIndex(ds.schema.Columns, path.index.Columns, ds.tableInfo.PKIsHandle)
-	return point
+	candidate.columnSet = expression.ExtractColumnSet(path.accessConds)
+	candidate.isSingleScan = isCoveringIndex(ds.schema.Columns, path.index.Columns, ds.tableInfo.PKIsHandle)
+	return candidate
 }
 
 func (ds *DataSource) skylinePruning(prop *property.PhysicalProperty) []*candidatePath {
@@ -302,7 +299,7 @@ func (ds *DataSource) skylinePruning(prop *property.PhysicalProperty) []*candida
 			// this path's access cond is not nil or
 			// we have prop to match or
 			// this index is forced to choose.
-			if len(path.accessConds) > 0 || len(prop.Items) > 0 || path.forced {
+			if len(path.accessConds) > 0 || !prop.IsEmpty() || path.forced {
 				currentCandidate = ds.getIndexCandidate(path, prop)
 			}
 		}
@@ -393,7 +390,7 @@ func (ds *DataSource) findBestTask(prop *property.PhysicalProperty) (t task, err
 			}, nil
 		}
 		if path.isTablePath {
-			tblTask, err := ds.convertToTableScan(prop, path, candidate.matchProp)
+			tblTask, err := ds.convertToTableScan(prop, candidate)
 			if err != nil {
 				return nil, errors.Trace(err)
 			}
@@ -402,7 +399,7 @@ func (ds *DataSource) findBestTask(prop *property.PhysicalProperty) (t task, err
 			}
 			continue
 		}
-		idxTask, err := ds.convertToIndexScan(prop, path, candidate.matchProp, !candidate.singleScan)
+		idxTask, err := ds.convertToIndexScan(prop, candidate)
 		if err != nil {
 			return nil, errors.Trace(err)
 		}
@@ -449,7 +446,8 @@ func (ts *PhysicalTableScan) appendExtraHandleCol(ds *DataSource) {
 }
 
 // convertToIndexScan converts the DataSource to index scan with idx.
-func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, path *accessPath, matchProperty, needDoubleScan bool) (task task, err error) {
+func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, candidate *candidatePath) (task task, err error) {
+	path := candidate.path
 	idx := path.index
 	is := PhysicalIndexScan{
 		Table:            ds.tableInfo,
@@ -472,7 +470,7 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, path *
 	}
 	rowCount := path.countAfterAccess
 	cop := &copTask{indexPlan: is}
-	if needDoubleScan {
+	if !candidate.isSingleScan {
 		// If it's parent requires single read task, return max cost.
 		if prop.TaskTp == property.CopSingleReadTaskType {
 			return invalidTask, nil
@@ -494,7 +492,7 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, path *
 	// Only use expectedCnt when it's smaller than the count we calculated.
 	// e.g. IndexScan(count1)->After Filter(count2). The `ds.stats.RowCount` is count2. count1 is the one we need to calculate
 	// If expectedCnt and count2 are both zero and we go into the below `if` block, the count1 will be set to zero though it's shouldn't be.
-	if (matchProperty || prop.IsEmpty()) && prop.ExpectedCnt < ds.stats.RowCount {
+	if (candidate.isMatchProp || prop.IsEmpty()) && prop.ExpectedCnt < ds.stats.RowCount {
 		selectivity := ds.stats.RowCount / path.countAfterAccess
 		rowCount = math.Min(prop.ExpectedCnt/selectivity, rowCount)
 	}
@@ -502,7 +500,7 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, path *
 	is.stats.UsePseudoStats = ds.statisticTable.Pseudo
 	cop.cst = rowCount * scanFactor
 	task = cop
-	if matchProperty {
+	if candidate.isMatchProp {
 		if prop.Items[0].Desc {
 			is.Desc = true
 			cop.cst = rowCount * descScanFactor
@@ -613,7 +611,7 @@ func splitIndexFilterConditions(conditions []expression.Expression, indexColumns
 }
 
 // convertToTableScan converts the DataSource to table scan.
-func (ds *DataSource) convertToTableScan(prop *property.PhysicalProperty, path *accessPath, matchProperty bool) (task task, err error) {
+func (ds *DataSource) convertToTableScan(prop *property.PhysicalProperty, candidate *candidatePath) (task task, err error) {
 	// It will be handled in convertToIndexScan.
 	if prop.TaskTp == property.CopDoubleReadTaskType {
 		return invalidTask, nil
@@ -635,6 +633,7 @@ func (ds *DataSource) convertToTableScan(prop *property.PhysicalProperty, path *
 			}
 		}
 	}
+	path := candidate.path
 	ts.Ranges = path.ranges
 	ts.AccessCondition, ts.filterCondition = path.accessConds, path.tableFilters
 	rowCount := path.countAfterAccess
@@ -646,14 +645,14 @@ func (ds *DataSource) convertToTableScan(prop *property.PhysicalProperty, path *
 	// Only use expectedCnt when it's smaller than the count we calculated.
 	// e.g. IndexScan(count1)->After Filter(count2). The `ds.stats.RowCount` is count2. count1 is the one we need to calculate
 	// If expectedCnt and count2 are both zero and we go into the below `if` block, the count1 will be set to zero though it's shouldn't be.
-	if (matchProperty || prop.IsEmpty()) && prop.ExpectedCnt < ds.stats.RowCount {
+	if (candidate.isMatchProp || prop.IsEmpty()) && prop.ExpectedCnt < ds.stats.RowCount {
 		selectivity := ds.stats.RowCount / rowCount
 		rowCount = math.Min(prop.ExpectedCnt/selectivity, rowCount)
 	}
 	ts.stats = property.NewSimpleStats(rowCount)
 	ts.stats.UsePseudoStats = ds.statisticTable.Pseudo
 	copTask.cst = rowCount * scanFactor
-	if matchProperty {
+	if candidate.isMatchProp {
 		if prop.Items[0].Desc {
 			ts.Desc = true
 			copTask.cst = rowCount * descScanFactor

From edcf4a4664df87aff84095b16e564eefeb8b37d3 Mon Sep 17 00:00:00 2001
From: Haibin Xie <lambdax.tyler@gmail.com>
Date: Mon, 18 Feb 2019 16:37:24 +0800
Subject: [PATCH 4/8] address comments

---
 planner/core/find_best_task.go | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go
index cb2382113e98e..06d86c0d4abd8 100644
--- a/planner/core/find_best_task.go
+++ b/planner/core/find_best_task.go
@@ -207,6 +207,10 @@ type candidatePath struct {
 
 // compareColumnSet will compares the two set. The last return value is used to indicate
 // if they are comparable, it is false when both two sets have columns that do not occur in the other.
+// When the second return value is true, the value of first:
+// (1) -1 means that `l` is a strict subset of `r`;
+// (2) 0 means that `l` equals to `r`;
+// (3) 1 means that `l` is a strict superset of `r`.
 func compareColumnSet(l, r *intsets.Sparse) (int, bool) {
 	lLen, rLen := l.Len(), r.Len()
 	if lLen <= rLen {
@@ -294,14 +298,12 @@ func (ds *DataSource) skylinePruning(prop *property.PhysicalProperty) []*candida
 		var currentCandidate *candidatePath
 		if path.isTablePath {
 			currentCandidate = ds.getTableCandidate(path, prop)
-		} else {
+		} else if len(path.accessConds) > 0 || !prop.IsEmpty() || path.forced {
 			// We will use index to generate physical plan if:
 			// this path's access cond is not nil or
 			// we have prop to match or
 			// this index is forced to choose.
-			if len(path.accessConds) > 0 || !prop.IsEmpty() || path.forced {
-				currentCandidate = ds.getIndexCandidate(path, prop)
-			}
+			currentCandidate = ds.getIndexCandidate(path, prop)
 		}
 		if currentCandidate == nil {
 			continue

From aa72d5730cab5f388ff43210577a112c7ece3f39 Mon Sep 17 00:00:00 2001
From: Haibin Xie <lambdax.tyler@gmail.com>
Date: Mon, 18 Feb 2019 17:19:54 +0800
Subject: [PATCH 5/8] address comments

---
 expression/util.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/expression/util.go b/expression/util.go
index e7285d132761c..6f7b6b8aba6fd 100644
--- a/expression/util.go
+++ b/expression/util.go
@@ -93,7 +93,7 @@ func extractColumns(result []*Column, expr Expression, filter func(*Column) bool
 	return result
 }
 
-// ExtractColumnSet extract the different unique id of columns that occurred in the exprs.
+// ExtractColumnSet extracts the different values of `UniqueId` for columns in expressions.
 func ExtractColumnSet(exprs []Expression) *intsets.Sparse {
 	set := &intsets.Sparse{}
 	for _, expr := range exprs {

From 358f04e56de729b3f318a70c07ceb0be358cdc1d Mon Sep 17 00:00:00 2001
From: Haibin Xie <lambdax.tyler@gmail.com>
Date: Mon, 18 Feb 2019 19:38:19 +0800
Subject: [PATCH 6/8] address comments

---
 planner/core/find_best_task.go | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go
index 06d86c0d4abd8..2024f20e6ee18 100644
--- a/planner/core/find_best_task.go
+++ b/planner/core/find_best_task.go
@@ -213,19 +213,16 @@ type candidatePath struct {
 // (3) 1 means that `l` is a strict superset of `r`.
 func compareColumnSet(l, r *intsets.Sparse) (int, bool) {
 	lLen, rLen := l.Len(), r.Len()
-	if lLen <= rLen {
-		if isSubset := l.SubsetOf(r); !isSubset {
-			return 0, false
-		}
-		if lLen == rLen {
-			return 0, true
-		}
-		return -1, true
+	if lLen < rLen {
+		// -1 is meaningful only when l.SubsetOf(r) is true.
+		return -1, l.SubsetOf(r)
 	}
-	if isSubset := r.SubsetOf(l); !isSubset {
-		return 0, false
+	if lLen == rLen {
+		// 0 is meaningful only when l.SubsetOf(r) is true.
+		return 0, l.SubsetOf(r)
 	}
-	return 1, true
+	// 1 is meaningful only when r.SubsetOf(l) is true.
+	return 1, r.SubsetOf(l)
 }
 
 func compareBool(l, r bool) int {
@@ -273,6 +270,8 @@ func (ds *DataSource) getTableCandidate(path *accessPath, prop *property.Physica
 func (ds *DataSource) getIndexCandidate(path *accessPath, prop *property.PhysicalProperty) *candidatePath {
 	candidate := &candidatePath{path: path}
 	all, _ := prop.AllSameOrder()
+	// When the prop is empty or `all` is false, `isMatchProp` is better to be `false` because
+	// it needs not to keep order for index scan.
 	if !prop.IsEmpty() && all {
 		for i, col := range path.index.Columns {
 			if col.Name.L == prop.Items[0].Col.ColName.L {

From a141f86c0c10e93cf6481d850d008f197131fd20 Mon Sep 17 00:00:00 2001
From: Haibin Xie <lambdax.tyler@gmail.com>
Date: Mon, 18 Feb 2019 19:58:59 +0800
Subject: [PATCH 7/8] update explain test

---
 .../r/access_path_selection.result            | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/cmd/explaintest/r/access_path_selection.result b/cmd/explaintest/r/access_path_selection.result
index 1c2db46bf4d3b..ec0309e75dee7 100644
--- a/cmd/explaintest/r/access_path_selection.result
+++ b/cmd/explaintest/r/access_path_selection.result
@@ -7,12 +7,12 @@ KEY `IDX_ab` (`a`, `b`)
 );
 explain select a from access_path_selection where a < 3;
 id	count	task	operator info
-IndexReader_9	3323.33	root	index:IndexScan_8
-└─IndexScan_8	3323.33	cop	table:access_path_selection, index:a, range:[-inf,3), keep order:false, stats:pseudo
+IndexReader_6	3323.33	root	index:IndexScan_5
+└─IndexScan_5	3323.33	cop	table:access_path_selection, index:a, range:[-inf,3), keep order:false, stats:pseudo
 explain select a, b from access_path_selection where a < 3;
 id	count	task	operator info
-IndexReader_12	3323.33	root	index:IndexScan_11
-└─IndexScan_11	3323.33	cop	table:access_path_selection, index:a, b, range:[-inf,3), keep order:false, stats:pseudo
+IndexReader_6	3323.33	root	index:IndexScan_5
+└─IndexScan_5	3323.33	cop	table:access_path_selection, index:a, b, range:[-inf,3), keep order:false, stats:pseudo
 explain select a, b from access_path_selection where b < 3;
 id	count	task	operator info
 IndexLookUp_10	3323.33	root	
@@ -20,9 +20,9 @@ IndexLookUp_10	3323.33	root
 └─TableScan_9	3323.33	cop	table:access_path_selection, keep order:false, stats:pseudo
 explain select a, b from access_path_selection where a < 3 and b < 3;
 id	count	task	operator info
-IndexReader_18	1104.45	root	index:Selection_17
-└─Selection_17	1104.45	cop	lt(test.access_path_selection.b, 3)
-  └─IndexScan_16	3323.33	cop	table:access_path_selection, index:a, b, range:[-inf,3), keep order:false, stats:pseudo
+IndexReader_11	1104.45	root	index:Selection_10
+└─Selection_10	1104.45	cop	lt(test.access_path_selection.b, 3)
+  └─IndexScan_9	3323.33	cop	table:access_path_selection, index:a, b, range:[-inf,3), keep order:false, stats:pseudo
 CREATE TABLE `outdated_statistics` (
 `a` int,
 `b` int,
@@ -41,9 +41,9 @@ analyze table outdated_statistics index idx_ab;
 explain select * from outdated_statistics where a=1 and b=1 and c=1;
 id	count	task	operator info
 IndexLookUp_11	0.00	root	
-├─IndexScan_8	0.00	cop	table:outdated_statistics, index:a, range:[1,1], keep order:false
-└─Selection_10	0.00	cop	eq(test.outdated_statistics.b, 1), eq(test.outdated_statistics.c, 1)
-  └─TableScan_9	0.00	cop	table:outdated_statistics, keep order:false
+├─IndexScan_8	1.00	cop	table:outdated_statistics, index:a, b, range:[1 1,1 1], keep order:false
+└─Selection_10	0.00	cop	eq(test.outdated_statistics.c, 1)
+  └─TableScan_9	1.00	cop	table:outdated_statistics, keep order:false
 CREATE TABLE `unknown_correlation` (
 id int,
 a int,
@@ -55,7 +55,7 @@ ANALYZE TABLE unknown_correlation;
 EXPLAIN SELECT * FROM unknown_correlation WHERE a = 2 ORDER BY id limit 1;
 id	count	task	operator info
 Limit_11	1.00	root	offset:0, count:1
-└─TableReader_29	1.00	root	data:Limit_28
-  └─Limit_28	1.00	cop	offset:0, count:1
-    └─Selection_26	1.00	cop	eq(test.unknown_correlation.a, 2)
-      └─TableScan_25	4.17	cop	table:unknown_correlation, range:[-inf,+inf], keep order:true
+└─TableReader_24	1.00	root	data:Limit_23
+  └─Limit_23	1.00	cop	offset:0, count:1
+    └─Selection_21	1.00	cop	eq(test.unknown_correlation.a, 2)
+      └─TableScan_20	4.17	cop	table:unknown_correlation, range:[-inf,+inf], keep order:true

From 0578678dacf7503287e1df6d47c6bb8b84db619a Mon Sep 17 00:00:00 2001
From: Haibin Xie <lambdax.tyler@gmail.com>
Date: Tue, 19 Feb 2019 10:50:51 +0800
Subject: [PATCH 8/8] address comment

---
 planner/core/find_best_task.go | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go
index 2024f20e6ee18..3526929acf1a0 100644
--- a/planner/core/find_best_task.go
+++ b/planner/core/find_best_task.go
@@ -287,6 +287,8 @@ func (ds *DataSource) getIndexCandidate(path *accessPath, prop *property.Physica
 	return candidate
 }
 
+// skylinePruning prunes access paths according to different factors. An access path can be pruned only if
+// there exists a path that is not worse than it at all factors and there is at least one better factor.
 func (ds *DataSource) skylinePruning(prop *property.PhysicalProperty) []*candidatePath {
 	candidates := make([]*candidatePath, 0, 4)
 	for _, path := range ds.possibleAccessPaths {
@@ -303,8 +305,7 @@ func (ds *DataSource) skylinePruning(prop *property.PhysicalProperty) []*candida
 			// we have prop to match or
 			// this index is forced to choose.
 			currentCandidate = ds.getIndexCandidate(path, prop)
-		}
-		if currentCandidate == nil {
+		} else {
 			continue
 		}
 		pruned := false