From 423baea953996a66dde671ff6db2fb1f32fbe8cb Mon Sep 17 00:00:00 2001 From: Cheng Lian Date: Tue, 18 Nov 2014 17:41:54 -0800 Subject: [PATCH] [SPARK-4468][SQL] Fixes Parquet filter creation for inequality predicates with literals on the left hand side For expressions like `10 < someVar`, we should create an `Operators.Gt` filter, but right now an `Operators.Lt` is created. This issue affects all inequality predicates with literals on the left hand side. (This bug existed before #3317 and affects branch-1.1. #3338 was opened to backport this to branch-1.1.) [Review on Reviewable](https://reviewable.io/reviews/apache/spark/3334) Author: Cheng Lian Closes #3334 from liancheng/fix-parquet-comp-filter and squashes the following commits: 0130897 [Cheng Lian] Fixes Parquet comparison filter generation --- .../apache/spark/sql/parquet/ParquetFilters.scala | 8 ++++---- .../apache/spark/sql/parquet/ParquetQuerySuite.scala | 12 ++++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala index 3a9e1499e2dc4..6fb5f49b13668 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala @@ -135,22 +135,22 @@ private[sql] object ParquetFilters { case LessThan(NamedExpression(name, _), Literal(value, dataType)) => makeLt.lift(dataType).map(_(name, value)) case LessThan(Literal(value, dataType), NamedExpression(name, _)) => - makeLt.lift(dataType).map(_(name, value)) + makeGt.lift(dataType).map(_(name, value)) case LessThanOrEqual(NamedExpression(name, _), Literal(value, dataType)) => makeLtEq.lift(dataType).map(_(name, value)) case LessThanOrEqual(Literal(value, dataType), NamedExpression(name, _)) => - makeLtEq.lift(dataType).map(_(name, value)) + makeGtEq.lift(dataType).map(_(name, value)) case GreaterThan(NamedExpression(name, _), Literal(value, dataType)) => makeGt.lift(dataType).map(_(name, value)) case GreaterThan(Literal(value, dataType), NamedExpression(name, _)) => - makeGt.lift(dataType).map(_(name, value)) + makeLt.lift(dataType).map(_(name, value)) case GreaterThanOrEqual(NamedExpression(name, _), Literal(value, dataType)) => makeGtEq.lift(dataType).map(_(name, value)) case GreaterThanOrEqual(Literal(value, dataType), NamedExpression(name, _)) => - makeGtEq.lift(dataType).map(_(name, value)) + makeLtEq.lift(dataType).map(_(name, value)) case And(lhs, rhs) => (createFilter(lhs) ++ createFilter(rhs)).reduceOption(FilterApi.and) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala index d31a9d8418dee..7ee4f3c1e93eb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala @@ -461,9 +461,21 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA } checkFilter[Operators.Eq[Integer]]('a.int === 1) + checkFilter[Operators.Eq[Integer]](Literal(1) === 'a.int) + checkFilter[Operators.Lt[Integer]]('a.int < 4) + checkFilter[Operators.Lt[Integer]](Literal(4) > 'a.int) + checkFilter[Operators.LtEq[Integer]]('a.int <= 4) + checkFilter[Operators.LtEq[Integer]](Literal(4) >= 'a.int) + + checkFilter[Operators.Gt[Integer]]('a.int > 4) + checkFilter[Operators.Gt[Integer]](Literal(4) < 'a.int) + checkFilter[Operators.GtEq[Integer]]('a.int >= 4) + checkFilter[Operators.GtEq[Integer]](Literal(4) <= 'a.int) + checkFilter[Operators.And]('a.int === 1 && 'a.int < 4) checkFilter[Operators.Or]('a.int === 1 || 'a.int < 4) + checkFilter[Operators.Not](!('a.int === 1)) checkFilter('a.int > 'b.int, defined = false) checkFilter(('a.int > 'b.int) && ('a.int > 'b.int), defined = false)