-
Notifications
You must be signed in to change notification settings - Fork 28.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-30724][SQL] Support 'LIKE ANY' and 'LIKE ALL' operators #27477
Changes from 5 commits
4548b0f
5475675
de7c398
caea82a
88ca4c2
1c3d98c
2b59cb0
0656b05
cf4666f
f9af131
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -748,6 +748,7 @@ predicate | |
| NOT? kind=IN '(' expression (',' expression)* ')' | ||
| NOT? kind=IN '(' query ')' | ||
| NOT? kind=RLIKE pattern=valueExpression | ||
| NOT? kind=LIKE quantifier=(ANY | ALL) ('('')' | '(' expression (',' expression)* ')') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shall we support |
||
| NOT? kind=LIKE pattern=valueExpression (ESCAPE escapeChar=STRING)? | ||
| IS NOT? kind=NULL | ||
| IS NOT? kind=(TRUE | FALSE | UNKNOWN) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1357,7 +1357,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging | |
* Add a predicate to the given expression. Supported expressions are: | ||
* - (NOT) BETWEEN | ||
* - (NOT) IN | ||
* - (NOT) LIKE | ||
* - (NOT) LIKE (ANY | ALL) | ||
* - (NOT) RLIKE | ||
* - IS (NOT) NULL. | ||
* - IS (NOT) (TRUE | FALSE | UNKNOWN) | ||
|
@@ -1375,6 +1375,14 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging | |
case other => Seq(other) | ||
} | ||
|
||
def getLikeQuantifierExps(expressions: java.util.List[ExpressionContext]): Seq[Expression] = { | ||
if (expressions.isEmpty) { | ||
throw new ParseException("Syntax error: expected something between '(' and ')'.", ctx) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think should remove |
||
} else { | ||
expressions.asScala.map(expression).map(p => invertIfNotDefined(new Like(e, p))) | ||
} | ||
} | ||
|
||
// Create the predicate. | ||
ctx.kind.getType match { | ||
case SqlBaseParser.BETWEEN => | ||
|
@@ -1387,14 +1395,21 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging | |
case SqlBaseParser.IN => | ||
invertIfNotDefined(In(e, ctx.expression.asScala.map(expression))) | ||
case SqlBaseParser.LIKE => | ||
val escapeChar = Option(ctx.escapeChar).map(string).map { str => | ||
if (str.length != 1) { | ||
throw new ParseException("Invalid escape string." + | ||
"Escape string must contains only one character.", ctx) | ||
} | ||
str | ||
}.getOrElse('\\') | ||
invertIfNotDefined(Like(e, expression(ctx.pattern), Literal(escapeChar))) | ||
Option(ctx.quantifier).map(_.getType) match { | ||
case Some(SqlBaseParser.ANY) => | ||
getLikeQuantifierExps(ctx.expression).reduceLeft(Or) | ||
case Some(SqlBaseParser.ALL) => | ||
getLikeQuantifierExps(ctx.expression).reduceLeft(And) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: getLikeQuantifierExps -> getLikeQuantifierExprs ? |
||
case _ => | ||
val escapeChar = Option(ctx.escapeChar).map(string).map { str => | ||
if (str.length != 1) { | ||
throw new ParseException("Invalid escape string." + | ||
"Escape string must contains only one character.", ctx) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: contains -> contain ? |
||
} | ||
str | ||
}.getOrElse('\\') | ||
invertIfNotDefined(Like(e, expression(ctx.pattern), Literal(escapeChar))) | ||
} | ||
case SqlBaseParser.RLIKE => | ||
invertIfNotDefined(RLike(e, expression(ctx.pattern))) | ||
case SqlBaseParser.NULL if ctx.NOT != null => | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
CREATE OR REPLACE TEMPORARY VIEW like_all_table AS SELECT * FROM (VALUES | ||
('google', '%oo%'), | ||
('facebook', '%oo%'), | ||
('linkedin', '%in')) | ||
as t1(company, pat); | ||
|
||
SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', '%go%'); | ||
|
||
SELECT company FROM like_all_table WHERE company LIKE ALL ('microsoft', '%yoo%'); | ||
|
||
SELECT | ||
company, | ||
CASE | ||
WHEN company LIKE ALL ('%oo%', '%go%') THEN 'Y' | ||
ELSE 'N' | ||
END AS is_available, | ||
CASE | ||
WHEN company LIKE ALL ('%oo%', 'go%') OR company LIKE ALL ('%in', 'ms%') THEN 'Y' | ||
ELSE 'N' | ||
END AS mix | ||
FROM like_all_table ; | ||
|
||
-- Mix test with constant pattern and column value | ||
SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', pat); | ||
|
||
-- not like all test | ||
SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', '%in', 'fa%'); | ||
SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('microsoft', '%yoo%'); | ||
|
||
-- null test | ||
SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', NULL); | ||
SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', NULL); | ||
SELECT company FROM like_all_table WHERE company LIKE ALL (NULL, NULL); | ||
SELECT company FROM like_all_table WHERE company NOT LIKE ALL (NULL, NULL); | ||
|
||
-- negative case | ||
SELECT company FROM like_any_table WHERE company LIKE ALL (); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is using of non-existing table intentional? I guess the purpose was to check There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's a typo |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
CREATE OR REPLACE TEMPORARY VIEW like_any_table AS SELECT * FROM (VALUES | ||
('google', '%oo%'), | ||
('facebook', '%oo%'), | ||
('linkedin', '%in')) | ||
as t1(company, pat); | ||
|
||
SELECT company FROM like_any_table WHERE company LIKE ANY ('%oo%', '%in', 'fa%'); | ||
|
||
SELECT company FROM like_any_table WHERE company LIKE ANY ('microsoft', '%yoo%'); | ||
|
||
select | ||
company, | ||
CASE | ||
WHEN company LIKE ANY ('%oo%', '%in', 'fa%') THEN 'Y' | ||
ELSE 'N' | ||
END AS is_available, | ||
CASE | ||
WHEN company LIKE ANY ('%oo%', 'fa%') OR company LIKE ANY ('%in', 'ms%') THEN 'Y' | ||
ELSE 'N' | ||
END AS mix | ||
FROM like_any_table; | ||
|
||
-- Mix test with constant pattern and column value | ||
SELECT company FROM like_any_table WHERE company LIKE ANY ('%zz%', pat); | ||
|
||
-- not like any test | ||
SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('%oo%', '%in', 'fa%'); | ||
SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('microsoft', '%yoo%'); | ||
|
||
-- null test | ||
SELECT company FROM like_any_table WHERE company LIKE ANY ('%oo%', NULL); | ||
SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('%oo%', NULL); | ||
SELECT company FROM like_any_table WHERE company LIKE ANY (NULL, NULL); | ||
SELECT company FROM like_any_table WHERE company NOT LIKE ANY (NULL, NULL); | ||
|
||
-- negative case | ||
SELECT company FROM like_any_table WHERE company LIKE ANY (); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
-- Automatically generated by SQLQueryTestSuite | ||
-- Number of queries: 12 | ||
|
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. note: I've checked that the output is the same with PostgreSQL output: https://gist.github.com/maropu/fa4bd6491e21751d6bbc44c545390b0c |
||
-- !query | ||
CREATE OR REPLACE TEMPORARY VIEW like_all_table AS SELECT * FROM (VALUES | ||
('google', '%oo%'), | ||
('facebook', '%oo%'), | ||
('linkedin', '%in')) | ||
as t1(company, pat) | ||
-- !query schema | ||
struct<> | ||
-- !query output | ||
|
||
|
||
|
||
-- !query | ||
SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', '%go%') | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
-- !query | ||
SELECT company FROM like_all_table WHERE company LIKE ALL ('microsoft', '%yoo%') | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
|
||
-- !query | ||
SELECT | ||
company, | ||
CASE | ||
WHEN company LIKE ALL ('%oo%', '%go%') THEN 'Y' | ||
ELSE 'N' | ||
END AS is_available, | ||
CASE | ||
WHEN company LIKE ALL ('%oo%', 'go%') OR company LIKE ALL ('%in', 'ms%') THEN 'Y' | ||
ELSE 'N' | ||
END AS mix | ||
FROM like_all_table | ||
-- !query schema | ||
struct<company:string,is_available:string,mix:string> | ||
-- !query output | ||
facebook N N | ||
google Y Y | ||
linkedin N N | ||
|
||
|
||
-- !query | ||
SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', pat) | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
-- !query | ||
SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', '%in', 'fa%') | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
|
||
-- !query | ||
SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('microsoft', '%yoo%') | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
-- !query | ||
SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', NULL) | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
|
||
-- !query | ||
SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', NULL) | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
|
||
-- !query | ||
SELECT company FROM like_all_table WHERE company LIKE ALL (NULL, NULL) | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
|
||
-- !query | ||
SELECT company FROM like_all_table WHERE company NOT LIKE ALL (NULL, NULL) | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
|
||
-- !query | ||
SELECT company FROM like_any_table WHERE company LIKE ALL () | ||
-- !query schema | ||
struct<> | ||
-- !query output | ||
org.apache.spark.sql.catalyst.parser.ParseException | ||
|
||
Syntax error: expected something between '(' and ')'.(line 1, pos 49) | ||
|
||
== SQL == | ||
SELECT company FROM like_any_table WHERE company LIKE ALL () | ||
-------------------------------------------------^^^ |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
-- Automatically generated by SQLQueryTestSuite | ||
-- Number of queries: 12 | ||
|
||
|
||
-- !query | ||
CREATE OR REPLACE TEMPORARY VIEW like_any_table AS SELECT * FROM (VALUES | ||
('google', '%oo%'), | ||
('facebook', '%oo%'), | ||
('linkedin', '%in')) | ||
as t1(company, pat) | ||
-- !query schema | ||
struct<> | ||
-- !query output | ||
|
||
|
||
|
||
-- !query | ||
SELECT company FROM like_any_table WHERE company LIKE ANY ('%oo%', '%in', 'fa%') | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
-- !query | ||
SELECT company FROM like_any_table WHERE company LIKE ANY ('microsoft', '%yoo%') | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
|
||
-- !query | ||
select | ||
company, | ||
CASE | ||
WHEN company LIKE ANY ('%oo%', '%in', 'fa%') THEN 'Y' | ||
ELSE 'N' | ||
END AS is_available, | ||
CASE | ||
WHEN company LIKE ANY ('%oo%', 'fa%') OR company LIKE ANY ('%in', 'ms%') THEN 'Y' | ||
ELSE 'N' | ||
END AS mix | ||
FROM like_any_table | ||
-- !query schema | ||
struct<company:string,is_available:string,mix:string> | ||
-- !query output | ||
facebook Y Y | ||
google Y Y | ||
linkedin Y Y | ||
|
||
|
||
-- !query | ||
SELECT company FROM like_any_table WHERE company LIKE ANY ('%zz%', pat) | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
-- !query | ||
SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('%oo%', '%in', 'fa%') | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
-- !query | ||
SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('microsoft', '%yoo%') | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
-- !query | ||
SELECT company FROM like_any_table WHERE company LIKE ANY ('%oo%', NULL) | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
-- !query | ||
SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('%oo%', NULL) | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
-- !query | ||
SELECT company FROM like_any_table WHERE company LIKE ANY (NULL, NULL) | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
|
||
-- !query | ||
SELECT company FROM like_any_table WHERE company NOT LIKE ANY (NULL, NULL) | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
|
||
-- !query | ||
SELECT company FROM like_any_table WHERE company LIKE ANY () | ||
-- !query schema | ||
struct<> | ||
-- !query output | ||
org.apache.spark.sql.catalyst.parser.ParseException | ||
|
||
Syntax error: expected something between '(' and ')'.(line 1, pos 49) | ||
|
||
== SQL == | ||
SELECT company FROM like_any_table WHERE company LIKE ANY () | ||
-------------------------------------------------^^^ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What happened previously when we didn't have
'('')' |
here? I guessed that it was also aParse Exception
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Otherwise it will throw
AnalysisException
:There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, it's considered as
function
. I got it.