From 77cc25fb7473d8a06b727d2ba5ee62db1c651cf8 Mon Sep 17 00:00:00 2001 From: Zhongshuai Pei <799203320@qq.com> Date: Thu, 30 Apr 2015 15:22:13 -0700 Subject: [PATCH] [SPARK-7267][SQL]Push down Project when it's child is Limit SQL ``` select key from (select key,value from t1 limit 100) t2 limit 10 ``` Optimized Logical Plan before modifying ``` == Optimized Logical Plan == Limit 10 Project key#228 Limit 100 MetastoreRelation default, t1, None ``` Optimized Logical Plan after modifying ``` == Optimized Logical Plan == Limit 10 Limit 100 Project key#228 MetastoreRelation default, t1, None ``` After this, we can combine limits Author: Zhongshuai Pei <799203320@qq.com> Author: DoingDone9 <799203320@qq.com> Closes #5797 from DoingDone9/ProjectLimit and squashes the following commits: 70d0fca [Zhongshuai Pei] Update FilterPushdownSuite.scala dc83ae9 [Zhongshuai Pei] Update FilterPushdownSuite.scala 485c61c [Zhongshuai Pei] Update Optimizer.scala f03fe7f [Zhongshuai Pei] Merge pull request #12 from apache/master f12fa50 [Zhongshuai Pei] Merge pull request #10 from apache/master f61210c [Zhongshuai Pei] Merge pull request #9 from apache/master 34b1a9a [Zhongshuai Pei] Merge pull request #8 from apache/master 802261c [DoingDone9] Merge pull request #7 from apache/master d00303b [DoingDone9] Merge pull request #6 from apache/master 98b134f [DoingDone9] Merge pull request #5 from apache/master 161cae3 [DoingDone9] Merge pull request #4 from apache/master c87e8b6 [DoingDone9] Merge pull request #3 from apache/master cb1852d [DoingDone9] Merge pull request #2 from apache/master c3f046f [DoingDone9] Merge pull request #1 from apache/master --- .../spark/sql/catalyst/optimizer/Optimizer.scala | 3 +++ .../catalyst/optimizer/FilterPushdownSuite.scala | 16 ++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 485761dd8538b..63669e970b1ec 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -170,6 +170,9 @@ object ColumnPruning extends Rule[LogicalPlan] { Project(substitutedProjection, child) + case Project(projectList, Limit(exp, child)) => + Limit(exp, Project(projectList, child)) + // Eliminate no-op Projects case Project(projectList, child) if child.output == projectList => child } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala index aa9708b164efa..2ad73941ab978 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala @@ -90,7 +90,23 @@ class FilterPushdownSuite extends PlanTest { comparePlans(optimized, correctAnswer) } + + test("column pruning for Project(ne, Limit)") { + val originalQuery = + testRelation + .select('a,'b) + .limit(2) + .select('a) + + val optimized = Optimize.execute(originalQuery.analyze) + val correctAnswer = + testRelation + .select('a) + .limit(2).analyze + comparePlans(optimized, correctAnswer) + } + // After this line is unimplemented. test("simple push down") { val originalQuery =