Skip to content

Commit

Permalink
[SPARK-47072][SQL] Fix supported interval formats in error messages
Browse files Browse the repository at this point in the history
In the PR, I propose to add one more field to keys of `supportedFormat` in `IntervalUtils` because current implementation has duplicate keys that overwrites each other. For instance, the following keys are the same:
```
(YM.YEAR, YM.MONTH)
...
(DT.DAY, DT.HOUR)
```
because `YM.YEAR = DT.DAY = 0` and `YM.MONTH = DT.HOUR = 1`.

To fix the incorrect error message when Spark cannot parse ANSI interval string. For example, the expected format should be some year-month format but Spark outputs day-time one:
```sql
spark-sql (default)> select interval '-\t2-2\t' year to month;

Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -	2-2	. (line 1, pos 16)

== SQL ==
select interval '-\t2-2\t' year to month
----------------^^^
```

Yes.

By running the existing test suite:
```
$ build/sbt "test:testOnly *IntervalUtilsSuite"
```
and regenerating the golden files:
```
$ SPARK_GENERATE_GOLDEN_FILES=1 PYSPARK_PYTHON=python3 build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite"
```

No.

Closes apache#45127 from MaxGekk/fix-supportedFormat.

Authored-by: Max Gekk <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
(cherry picked from commit 074fcf2)
Signed-off-by: Max Gekk <[email protected]>
  • Loading branch information
MaxGekk committed Feb 16, 2024
1 parent 1c1c5fa commit f1cbe30
Show file tree
Hide file tree
Showing 6 changed files with 27 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -107,25 +107,30 @@ object IntervalUtils extends SparkIntervalUtils {
fallBackNotice: Option[String] = None) = {
throw new IllegalArgumentException(
s"Interval string does not match $intervalStr format of " +
s"${supportedFormat((startFiled, endField)).map(format => s"`$format`").mkString(", ")} " +
s"${supportedFormat((intervalStr, startFiled, endField))
.map(format => s"`$format`").mkString(", ")} " +
s"when cast to $typeName: ${input.toString}" +
s"${fallBackNotice.map(s => s", $s").getOrElse("")}")
}

val supportedFormat = Map(
(YM.YEAR, YM.MONTH) -> Seq("[+|-]y-m", "INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH"),
(YM.YEAR, YM.YEAR) -> Seq("[+|-]y", "INTERVAL [+|-]'[+|-]y' YEAR"),
(YM.MONTH, YM.MONTH) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MONTH"),
(DT.DAY, DT.DAY) -> Seq("[+|-]d", "INTERVAL [+|-]'[+|-]d' DAY"),
(DT.DAY, DT.HOUR) -> Seq("[+|-]d h", "INTERVAL [+|-]'[+|-]d h' DAY TO HOUR"),
(DT.DAY, DT.MINUTE) -> Seq("[+|-]d h:m", "INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE"),
(DT.DAY, DT.SECOND) -> Seq("[+|-]d h:m:s.n", "INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND"),
(DT.HOUR, DT.HOUR) -> Seq("[+|-]h", "INTERVAL [+|-]'[+|-]h' HOUR"),
(DT.HOUR, DT.MINUTE) -> Seq("[+|-]h:m", "INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE"),
(DT.HOUR, DT.SECOND) -> Seq("[+|-]h:m:s.n", "INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND"),
(DT.MINUTE, DT.MINUTE) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MINUTE"),
(DT.MINUTE, DT.SECOND) -> Seq("[+|-]m:s.n", "INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND"),
(DT.SECOND, DT.SECOND) -> Seq("[+|-]s.n", "INTERVAL [+|-]'[+|-]s.n' SECOND")
("year-month", YM.YEAR, YM.MONTH) -> Seq("[+|-]y-m", "INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH"),
("year-month", YM.YEAR, YM.YEAR) -> Seq("[+|-]y", "INTERVAL [+|-]'[+|-]y' YEAR"),
("year-month", YM.MONTH, YM.MONTH) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MONTH"),
("day-time", DT.DAY, DT.DAY) -> Seq("[+|-]d", "INTERVAL [+|-]'[+|-]d' DAY"),
("day-time", DT.DAY, DT.HOUR) -> Seq("[+|-]d h", "INTERVAL [+|-]'[+|-]d h' DAY TO HOUR"),
("day-time", DT.DAY, DT.MINUTE) ->
Seq("[+|-]d h:m", "INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE"),
("day-time", DT.DAY, DT.SECOND) ->
Seq("[+|-]d h:m:s.n", "INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND"),
("day-time", DT.HOUR, DT.HOUR) -> Seq("[+|-]h", "INTERVAL [+|-]'[+|-]h' HOUR"),
("day-time", DT.HOUR, DT.MINUTE) -> Seq("[+|-]h:m", "INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE"),
("day-time", DT.HOUR, DT.SECOND) ->
Seq("[+|-]h:m:s.n", "INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND"),
("day-time", DT.MINUTE, DT.MINUTE) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MINUTE"),
("day-time", DT.MINUTE, DT.SECOND) ->
Seq("[+|-]m:s.n", "INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND"),
("day-time", DT.SECOND, DT.SECOND) -> Seq("[+|-]s.n", "INTERVAL [+|-]'[+|-]s.n' SECOND")
)

def castStringToYMInterval(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1174,7 +1174,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
Seq("INTERVAL '1-1' YEAR", "INTERVAL '1-1' MONTH").foreach { interval =>
val dataType = YearMonthIntervalType()
val expectedMsg = s"Interval string does not match year-month format of " +
s"${IntervalUtils.supportedFormat((dataType.startField, dataType.endField))
s"${IntervalUtils.supportedFormat(("year-month", dataType.startField, dataType.endField))
.map(format => s"`$format`").mkString(", ")} " +
s"when cast to ${dataType.typeName}: $interval"
checkExceptionInExpression[IllegalArgumentException](
Expand All @@ -1194,7 +1194,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
("INTERVAL '1' MONTH", YearMonthIntervalType(YEAR, MONTH)))
.foreach { case (interval, dataType) =>
val expectedMsg = s"Interval string does not match year-month format of " +
s"${IntervalUtils.supportedFormat((dataType.startField, dataType.endField))
s"${IntervalUtils.supportedFormat(("year-month", dataType.startField, dataType.endField))
.map(format => s"`$format`").mkString(", ")} " +
s"when cast to ${dataType.typeName}: $interval"
checkExceptionInExpression[IllegalArgumentException](
Expand Down Expand Up @@ -1314,7 +1314,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
("1.23", DayTimeIntervalType(MINUTE)))
.foreach { case (interval, dataType) =>
val expectedMsg = s"Interval string does not match day-time format of " +
s"${IntervalUtils.supportedFormat((dataType.startField, dataType.endField))
s"${IntervalUtils.supportedFormat(("day-time", dataType.startField, dataType.endField))
.map(format => s"`$format`").mkString(", ")} " +
s"when cast to ${dataType.typeName}: $interval, " +
s"set ${SQLConf.LEGACY_FROM_DAYTIME_STRING.key} to true " +
Expand All @@ -1338,7 +1338,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
("INTERVAL '92233720368541.775807' SECOND", DayTimeIntervalType(SECOND)))
.foreach { case (interval, dataType) =>
val expectedMsg = "Interval string does not match day-time format of " +
s"${IntervalUtils.supportedFormat((dataType.startField, dataType.endField))
s"${IntervalUtils.supportedFormat(("day-time", dataType.startField, dataType.endField))
.map(format => s"`$format`").mkString(", ")} " +
s"when cast to ${dataType.typeName}: $interval, " +
s"set ${SQLConf.LEGACY_FROM_DAYTIME_STRING.key} to true " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1916,7 +1916,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
{
"errorClass" : "_LEGACY_ERROR_TEMP_0063",
"messageParameters" : {
"msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t"
"msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t"
},
"queryContext" : [ {
"objectType" : "",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1916,7 +1916,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
{
"errorClass" : "_LEGACY_ERROR_TEMP_0063",
"messageParameters" : {
"msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t"
"msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t"
},
"queryContext" : [ {
"objectType" : "",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2355,7 +2355,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
{
"errorClass" : "_LEGACY_ERROR_TEMP_0063",
"messageParameters" : {
"msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t"
"msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t"
},
"queryContext" : [ {
"objectType" : "",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2168,7 +2168,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
{
"errorClass" : "_LEGACY_ERROR_TEMP_0063",
"messageParameters" : {
"msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t"
"msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t"
},
"queryContext" : [ {
"objectType" : "",
Expand Down

0 comments on commit f1cbe30

Please sign in to comment.