Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-30724][SQL] Support 'LIKE ANY' and 'LIKE ALL' operators #27477

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -766,6 +766,7 @@ predicate
| NOT? kind=IN '(' expression (',' expression)* ')'
| NOT? kind=IN '(' query ')'
| NOT? kind=RLIKE pattern=valueExpression
| NOT? kind=LIKE quantifier=(ANY | SOME | ALL) ('('')' | '(' expression (',' expression)* ')')
| NOT? kind=LIKE pattern=valueExpression (ESCAPE escapeChar=STRING)?
| IS NOT? kind=NULL
| IS NOT? kind=(TRUE | FALSE | UNKNOWN)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1373,7 +1373,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
* Add a predicate to the given expression. Supported expressions are:
* - (NOT) BETWEEN
* - (NOT) IN
* - (NOT) LIKE
* - (NOT) LIKE (ANY | SOME | ALL)
* - (NOT) RLIKE
* - IS (NOT) NULL.
* - IS (NOT) (TRUE | FALSE | UNKNOWN)
Expand All @@ -1391,6 +1391,14 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
case other => Seq(other)
}

def getLikeQuantifierExprs(expressions: java.util.List[ExpressionContext]): Seq[Expression] = {
if (expressions.isEmpty) {
throw new ParseException("Expected something between '(' and ')'.", ctx)
} else {
expressions.asScala.map(expression).map(p => invertIfNotDefined(new Like(e, p)))
}
}

// Create the predicate.
ctx.kind.getType match {
case SqlBaseParser.BETWEEN =>
Expand All @@ -1403,14 +1411,21 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
case SqlBaseParser.IN =>
invertIfNotDefined(In(e, ctx.expression.asScala.map(expression)))
case SqlBaseParser.LIKE =>
val escapeChar = Option(ctx.escapeChar).map(string).map { str =>
if (str.length != 1) {
throw new ParseException("Invalid escape string." +
"Escape string must contains only one character.", ctx)
}
str.charAt(0)
}.getOrElse('\\')
invertIfNotDefined(Like(e, expression(ctx.pattern), escapeChar))
Option(ctx.quantifier).map(_.getType) match {
case Some(SqlBaseParser.ANY) | Some(SqlBaseParser.SOME) =>
getLikeQuantifierExprs(ctx.expression).reduceLeft(Or)
case Some(SqlBaseParser.ALL) =>
getLikeQuantifierExprs(ctx.expression).reduceLeft(And)
case _ =>
val escapeChar = Option(ctx.escapeChar).map(string).map { str =>
if (str.length != 1) {
throw new ParseException("Invalid escape string." +
"Escape string must contain only one character.", ctx)
}
str.charAt(0)
}.getOrElse('\\')
invertIfNotDefined(Like(e, expression(ctx.pattern), escapeChar))
}
case SqlBaseParser.RLIKE =>
invertIfNotDefined(RLike(e, expression(ctx.pattern)))
case SqlBaseParser.NULL if ctx.NOT != null =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ class ExpressionParserSuite extends AnalysisTest {
}

test("like escape expressions") {
val message = "Escape string must contains only one character."
val message = "Escape string must contain only one character."
assertEqual("a like 'pattern%' escape '#'", 'a.like("pattern%", '#'))
assertEqual("a like 'pattern%' escape '\"'", 'a.like("pattern%", '\"'))
intercept("a like 'pattern%' escape '##'", message)
Expand All @@ -208,6 +208,21 @@ class ExpressionParserSuite extends AnalysisTest {
assertEqual("a rlike 'pattern\\t\\n'", 'a rlike "pattern\\t\\n", parser)
}

test("(NOT) LIKE (ANY | SOME | ALL) expressions") {
Seq("any", "some").foreach { quantifier =>
assertEqual(s"a like $quantifier ('foo%', 'b%')", ('a like "foo%") || ('a like "b%"))
assertEqual(s"a not like $quantifier ('foo%', 'b%')", !('a like "foo%") || !('a like "b%"))
assertEqual(s"not (a like $quantifier ('foo%', 'b%'))", !(('a like "foo%") || ('a like "b%")))
}
assertEqual("a like all ('foo%', 'b%')", ('a like "foo%") && ('a like "b%"))
assertEqual("a not like all ('foo%', 'b%')", !('a like "foo%") && !('a like "b%"))
assertEqual("not (a like all ('foo%', 'b%'))", !(('a like "foo%") && ('a like "b%")))

Seq("any", "some", "all").foreach { quantifier =>
intercept(s"a like $quantifier()", "Expected something between '(' and ')'")
}
}

test("is null expressions") {
assertEqual("a is null", 'a.isNull)
assertEqual("a is not null", 'a.isNotNull)
Expand Down
39 changes: 39 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/like-all.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
CREATE OR REPLACE TEMPORARY VIEW like_all_table AS SELECT * FROM (VALUES
('google', '%oo%'),
('facebook', '%oo%'),
('linkedin', '%in'))
as t1(company, pat);

SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', '%go%');

SELECT company FROM like_all_table WHERE company LIKE ALL ('microsoft', '%yoo%');

SELECT
company,
CASE
WHEN company LIKE ALL ('%oo%', '%go%') THEN 'Y'
ELSE 'N'
END AS is_available,
CASE
WHEN company LIKE ALL ('%oo%', 'go%') OR company LIKE ALL ('%in', 'ms%') THEN 'Y'
ELSE 'N'
END AS mix
FROM like_all_table ;

-- Mix test with constant pattern and column value
SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', pat);

-- not like all test
SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', '%in', 'fa%');
SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('microsoft', '%yoo%');
SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', 'fa%');
SELECT company FROM like_all_table WHERE NOT company LIKE ALL ('%oo%', 'fa%');

-- null test
SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', NULL);
SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', NULL);
SELECT company FROM like_all_table WHERE company LIKE ALL (NULL, NULL);
SELECT company FROM like_all_table WHERE company NOT LIKE ALL (NULL, NULL);

-- negative case
SELECT company FROM like_any_table WHERE company LIKE ALL ();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is using of non-existing table intentional? I guess the purpose was to check LIKE ALL ().

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's a typo

39 changes: 39 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/like-any.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
CREATE OR REPLACE TEMPORARY VIEW like_any_table AS SELECT * FROM (VALUES
('google', '%oo%'),
('facebook', '%oo%'),
('linkedin', '%in'))
as t1(company, pat);

SELECT company FROM like_any_table WHERE company LIKE ANY ('%oo%', '%in', 'fa%');

SELECT company FROM like_any_table WHERE company LIKE ANY ('microsoft', '%yoo%');

select
company,
CASE
WHEN company LIKE ANY ('%oo%', '%in', 'fa%') THEN 'Y'
ELSE 'N'
END AS is_available,
CASE
WHEN company LIKE ANY ('%oo%', 'fa%') OR company LIKE ANY ('%in', 'ms%') THEN 'Y'
ELSE 'N'
END AS mix
FROM like_any_table;

-- Mix test with constant pattern and column value
SELECT company FROM like_any_table WHERE company LIKE ANY ('%zz%', pat);

-- not like any test
SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('%oo%', '%in', 'fa%');
SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('microsoft', '%yoo%');
SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('%oo%', 'fa%');
SELECT company FROM like_any_table WHERE NOT company LIKE ANY ('%oo%', 'fa%');

-- null test
SELECT company FROM like_any_table WHERE company LIKE ANY ('%oo%', NULL);
SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('%oo%', NULL);
SELECT company FROM like_any_table WHERE company LIKE ANY (NULL, NULL);
SELECT company FROM like_any_table WHERE company NOT LIKE ANY (NULL, NULL);

-- negative case
SELECT company FROM like_any_table WHERE company LIKE ANY ();
140 changes: 140 additions & 0 deletions sql/core/src/test/resources/sql-tests/results/like-all.sql.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 14


Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note: I've checked that the output is the same with PostgreSQL output: https://gist.github.com/maropu/fa4bd6491e21751d6bbc44c545390b0c

-- !query
CREATE OR REPLACE TEMPORARY VIEW like_all_table AS SELECT * FROM (VALUES
('google', '%oo%'),
('facebook', '%oo%'),
('linkedin', '%in'))
as t1(company, pat)
-- !query schema
struct<>
-- !query output



-- !query
SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', '%go%')
-- !query schema
struct<company:string>
-- !query output
google


-- !query
SELECT company FROM like_all_table WHERE company LIKE ALL ('microsoft', '%yoo%')
-- !query schema
struct<company:string>
-- !query output



-- !query
SELECT
company,
CASE
WHEN company LIKE ALL ('%oo%', '%go%') THEN 'Y'
ELSE 'N'
END AS is_available,
CASE
WHEN company LIKE ALL ('%oo%', 'go%') OR company LIKE ALL ('%in', 'ms%') THEN 'Y'
ELSE 'N'
END AS mix
FROM like_all_table
-- !query schema
struct<company:string,is_available:string,mix:string>
-- !query output
facebook N N
google Y Y
linkedin N N


-- !query
SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', pat)
-- !query schema
struct<company:string>
-- !query output
facebook
google


-- !query
SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', '%in', 'fa%')
-- !query schema
struct<company:string>
-- !query output



-- !query
SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('microsoft', '%yoo%')
-- !query schema
struct<company:string>
-- !query output
facebook
google
linkedin


-- !query
SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', 'fa%')
-- !query schema
struct<company:string>
-- !query output
linkedin


-- !query
SELECT company FROM like_all_table WHERE NOT company LIKE ALL ('%oo%', 'fa%')
-- !query schema
struct<company:string>
-- !query output
google
linkedin


-- !query
SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', NULL)
-- !query schema
struct<company:string>
-- !query output



-- !query
SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', NULL)
-- !query schema
struct<company:string>
-- !query output



-- !query
SELECT company FROM like_all_table WHERE company LIKE ALL (NULL, NULL)
-- !query schema
struct<company:string>
-- !query output



-- !query
SELECT company FROM like_all_table WHERE company NOT LIKE ALL (NULL, NULL)
-- !query schema
struct<company:string>
-- !query output



-- !query
SELECT company FROM like_any_table WHERE company LIKE ALL ()
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

Expected something between '(' and ')'.(line 1, pos 49)

== SQL ==
SELECT company FROM like_any_table WHERE company LIKE ALL ()
-------------------------------------------------^^^
Loading