From 6ba418619dcfd139b6b3fa81cf8a29d54ca62681 Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Sat, 14 Apr 2018 12:11:27 +0200
Subject: [PATCH 1/6] [SPARK-23973][SQL] Remove consecutive Sorts

---
 .../apache/spark/sql/catalyst/optimizer/Optimizer.scala    | 5 ++++-
 .../sql/catalyst/optimizer/RemoveRedundantSortsSuite.scala | 7 +++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 5fb59ef350b8b..b68f736051dd6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -736,12 +736,15 @@ object EliminateSorts extends Rule[LogicalPlan] {
 }
 
 /**
- * Removes Sort operation if the child is already sorted
+ * Removes redundant Sort operation. This can happen:
+ * 1) if the child is already sorted
+ * 2) if the next operator is a Sort itself
  */
 object RemoveRedundantSorts extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     case Sort(orders, true, child) if SortOrder.orderingSatisfies(child.outputOrdering, orders) =>
       child
+    case s @ Sort(_, _, Sort(_, _, child)) => s.copy(child = child)
   }
 }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantSortsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantSortsSuite.scala
index 2319ab8046e56..c0b0c2ac57caf 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantSortsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantSortsSuite.scala
@@ -98,4 +98,11 @@ class RemoveRedundantSortsSuite extends PlanTest {
     val correctAnswer = groupedAndResorted.analyze
     comparePlans(optimized, correctAnswer)
   }
+
+  test("remove two consecutive sorts") {
+    val orderedTwice = testRelation.orderBy('a.asc).orderBy('b.desc)
+    val optimized = Optimize.execute(orderedTwice.analyze)
+    val correctAnswer = testRelation.orderBy('b.desc).analyze
+    comparePlans(optimized, correctAnswer)
+  }
 }

From ff7d4123eef99fc9543a277552ed6019853bbeaa Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Mon, 16 Apr 2018 10:45:47 +0200
Subject: [PATCH 2/6] remove Sort operator separated by 0...n Project/Filter
 operators

---
 .../sql/catalyst/optimizer/Optimizer.scala    | 11 ++++++++--
 .../optimizer/RemoveRedundantSortsSuite.scala | 20 +++++++++++++++++++
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index b68f736051dd6..b4068dfacf64c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -738,13 +738,20 @@ object EliminateSorts extends Rule[LogicalPlan] {
 /**
  * Removes redundant Sort operation. This can happen:
  * 1) if the child is already sorted
- * 2) if the next operator is a Sort itself
+ * 2) if the there is another Sort operator separated by 0...n Project/Filter operators
  */
 object RemoveRedundantSorts extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     case Sort(orders, true, child) if SortOrder.orderingSatisfies(child.outputOrdering, orders) =>
       child
-    case s @ Sort(_, _, Sort(_, _, child)) => s.copy(child = child)
+    case s @ Sort(_, _, child) => s.copy(child = recursiveRemoveSort(child))
+  }
+
+  def recursiveRemoveSort(plan: LogicalPlan): LogicalPlan = plan match {
+    case Project(fields, child) => Project(fields, recursiveRemoveSort(child))
+    case Filter(condition, child) => Filter(condition, recursiveRemoveSort(child))
+    case Sort(_, _, child) => recursiveRemoveSort(child)
+    case _ => plan
   }
 }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantSortsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantSortsSuite.scala
index c0b0c2ac57caf..a1ac9e3f3339a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantSortsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantSortsSuite.scala
@@ -105,4 +105,24 @@ class RemoveRedundantSortsSuite extends PlanTest {
     val correctAnswer = testRelation.orderBy('b.desc).analyze
     comparePlans(optimized, correctAnswer)
   }
+
+  test("remove sorts separated by Filter/Project operators") {
+    val orderedTwiceWithProject = testRelation.orderBy('a.asc).select('b).orderBy('b.desc)
+    val optimizedWithProject = Optimize.execute(orderedTwiceWithProject.analyze)
+    val correctAnswerWithProject = testRelation.select('b).orderBy('b.desc).analyze
+    comparePlans(optimizedWithProject, correctAnswerWithProject)
+
+    val orderedTwiceWithFilter =
+      testRelation.orderBy('a.asc).where('b > Literal(0)).orderBy('b.desc)
+    val optimizedWithFilter = Optimize.execute(orderedTwiceWithFilter.analyze)
+    val correctAnswerWithFilter = testRelation.where('b > Literal(0)).orderBy('b.desc).analyze
+    comparePlans(optimizedWithFilter, correctAnswerWithFilter)
+
+    val orderedTwiceWithBoth =
+      testRelation.orderBy('a.asc).select('b).where('b > Literal(0)).orderBy('b.desc)
+    val optimizedWithBoth = Optimize.execute(orderedTwiceWithBoth.analyze)
+    val correctAnswerWithBoth =
+      testRelation.select('b).where('b > Literal(0)).orderBy('b.desc).analyze
+    comparePlans(optimizedWithBoth, correctAnswerWithBoth)
+  }
 }

From ac03bedc4b74616a164f2ab1cc9ea7ecb901155b Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Mon, 16 Apr 2018 14:12:12 +0200
Subject: [PATCH 3/6] fix test

---
 .../sql/catalyst/optimizer/RemoveRedundantSortsSuite.scala  | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantSortsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantSortsSuite.scala
index a1ac9e3f3339a..3e238566e9653 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantSortsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantSortsSuite.scala
@@ -42,15 +42,15 @@ class RemoveRedundantSortsSuite extends PlanTest {
 
   test("remove redundant order by") {
     val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc_nullsFirst)
-    val unnecessaryReordered = orderedPlan.select('a).orderBy('a.asc, 'b.desc_nullsFirst)
+    val unnecessaryReordered = orderedPlan.limit(2).select('a).orderBy('a.asc, 'b.desc_nullsFirst)
     val optimized = Optimize.execute(unnecessaryReordered.analyze)
-    val correctAnswer = orderedPlan.select('a).analyze
+    val correctAnswer = orderedPlan.limit(2).select('a).analyze
     comparePlans(Optimize.execute(optimized), correctAnswer)
   }
 
   test("do not remove sort if the order is different") {
     val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc_nullsFirst)
-    val reorderedDifferently = orderedPlan.select('a).orderBy('a.asc, 'b.desc)
+    val reorderedDifferently = orderedPlan.limit(2).select('a).orderBy('a.asc, 'b.desc)
     val optimized = Optimize.execute(reorderedDifferently.analyze)
     val correctAnswer = reorderedDifferently.analyze
     comparePlans(optimized, correctAnswer)

From 1d6ca1e05bfd8d8cfcb7db08af1b27a9029f8921 Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Tue, 17 Apr 2018 11:36:50 +0200
Subject: [PATCH 4/6] add test cases + fix typo

---
 .../sql/catalyst/optimizer/Optimizer.scala     |  2 +-
 .../optimizer/RemoveRedundantSortsSuite.scala  | 18 ++++++++++++++----
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index b4068dfacf64c..3e60c392b1caf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -738,7 +738,7 @@ object EliminateSorts extends Rule[LogicalPlan] {
 /**
  * Removes redundant Sort operation. This can happen:
  * 1) if the child is already sorted
- * 2) if the there is another Sort operator separated by 0...n Project/Filter operators
+ * 2) if there is another Sort operator separated by 0...n Project/Filter operators
  */
 object RemoveRedundantSorts extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantSortsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantSortsSuite.scala
index 3e238566e9653..dae5e6f3ee3dd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantSortsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantSortsSuite.scala
@@ -17,16 +17,12 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry}
-import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.SQLConf.{CASE_SENSITIVE, ORDER_BY_ORDINAL}
 
 class RemoveRedundantSortsSuite extends PlanTest {
 
@@ -72,6 +68,14 @@ class RemoveRedundantSortsSuite extends PlanTest {
     comparePlans(optimized, correctAnswer)
   }
 
+  test("different sorts are not simplified if limit is in between") {
+    val orderedPlan = testRelation.select('a, 'b).orderBy('b.desc).limit(Literal(10))
+      .orderBy('a.asc)
+    val optimized = Optimize.execute(orderedPlan.analyze)
+    val correctAnswer = orderedPlan.analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
   test("range is already sorted") {
     val inputPlan = Range(1L, 1000L, 1, 10)
     val orderedPlan = inputPlan.orderBy('id.asc)
@@ -124,5 +128,11 @@ class RemoveRedundantSortsSuite extends PlanTest {
     val correctAnswerWithBoth =
       testRelation.select('b).where('b > Literal(0)).orderBy('b.desc).analyze
     comparePlans(optimizedWithBoth, correctAnswerWithBoth)
+
+    val orderedThrice = orderedTwiceWithBoth.select(('b + 1).as('c)).orderBy('c.asc)
+    val optimizedThrice = Optimize.execute(orderedThrice.analyze)
+    val correctAnswerThrice = testRelation.select('b).where('b > Literal(0))
+      .select(('b + 1).as('c)).orderBy('c.asc).analyze
+    comparePlans(optimizedThrice, correctAnswerThrice)
   }
 }

From e7391f3990e7d944ca0248e5088b8936652580e9 Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Mon, 23 Apr 2018 14:41:38 +0200
Subject: [PATCH 5/6] address comments

---
 .../spark/sql/catalyst/optimizer/Optimizer.scala      | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 3e60c392b1caf..fe9a03bf6b689 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -748,11 +748,18 @@ object RemoveRedundantSorts extends Rule[LogicalPlan] {
   }
 
   def recursiveRemoveSort(plan: LogicalPlan): LogicalPlan = plan match {
-    case Project(fields, child) => Project(fields, recursiveRemoveSort(child))
-    case Filter(condition, child) => Filter(condition, recursiveRemoveSort(child))
     case Sort(_, _, child) => recursiveRemoveSort(child)
+    case other if canEliminateSort(other) =>
+      other.withNewChildren(other.children.map(recursiveRemoveSort))
     case _ => plan
   }
+
+  def canEliminateSort(plan: LogicalPlan): Boolean = plan match {
+    case p: Project => p.projectList.forall(_.deterministic)
+    case f: Filter => f.condition.deterministic
+    case _: ResolvedHint => true
+    case _ => false
+  }
 }
 
 /**

From e2f4d4dc2c5741475ae5d085d0c804f2a60a74fb Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Mon, 23 Apr 2018 15:05:19 +0200
Subject: [PATCH 6/6] address comment

---
 .../org/apache/spark/sql/catalyst/optimizer/Optimizer.scala     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index fe9a03bf6b689..88c396d3bb50f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -741,7 +741,7 @@ object EliminateSorts extends Rule[LogicalPlan] {
  * 2) if there is another Sort operator separated by 0...n Project/Filter operators
  */
 object RemoveRedundantSorts extends Rule[LogicalPlan] {
-  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+  def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
     case Sort(orders, true, child) if SortOrder.orderingSatisfies(child.outputOrdering, orders) =>
       child
     case s @ Sort(_, _, child) => s.copy(child = recursiveRemoveSort(child))