-
Notifications
You must be signed in to change notification settings - Fork 28.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-35111][SQL] Support Cast string to year-month interval #32266
Changes from 25 commits
9546a0f
691c1f4
f5b02ee
15424a7
879817b
62d175b
2c75bba
5b134fa
6d14414
d19bbc8
ff904a1
d0e30e4
3b84baa
b05f7e6
25c08e0
f636d41
ce69004
092d01a
f088f64
80499b8
ca19c09
3df92b6
3adde87
0f82987
2c8785b
253c70e
9c70b88
5ca83ab
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.util | |||||
|
||||||
import java.time.{Duration, Period} | ||||||
import java.time.temporal.ChronoUnit | ||||||
import java.util.Locale | ||||||
import java.util.concurrent.TimeUnit | ||||||
|
||||||
import scala.util.control.NonFatal | ||||||
|
@@ -92,6 +93,26 @@ object IntervalUtils { | |||||
} | ||||||
|
||||||
private val yearMonthPattern = "^([+|-])?(\\d+)-(\\d+)$".r | ||||||
private val yearMonthStringPattern = | ||||||
"^(INTERVAL\\s+)([+|-])?(')([+|-])?(\\d+)-(\\d+)(')(\\s+YEAR\\s+TO\\s+MONTH)$".r | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
The (?i) is placed at the beginning of the pattern to enable case-insensitivity. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for you share |
||||||
|
||||||
def castStringToYMInterval(input: UTF8String): Int = { | ||||||
input.trimAll().toString.toUpperCase(Locale.ROOT) match { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can specify that your pattern is case insensitive, and remove There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You don't see any tests that check proper spaces handling - I mean There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just wonder why don't you trim whitespaces by regular expression? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Match case There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
done |
||||||
case yearMonthPattern("-", year, month) => toYMInterval(year, month, -1) | ||||||
case yearMonthPattern(_, year, month) => toYMInterval(year, month, 1) | ||||||
case yearMonthStringPattern(_, firstSign, _, secondSign, year, month, _, _) => | ||||||
(firstSign, secondSign) match { | ||||||
case ("-", "-") => toYMInterval(year, month, 1) | ||||||
case ("-", _) => toYMInterval(year, month, -1) | ||||||
case (_, "-") => toYMInterval(year, month, -1) | ||||||
case (_, _) => toYMInterval(year, month, 1) | ||||||
} | ||||||
case yearMonthStringPattern(_, _, _, _, _, _, _, _) => | ||||||
throw new IllegalArgumentException( | ||||||
s"Interval string does not match year-month format of `[+|-]y-m` " + | ||||||
s"or `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH`: ${input.toString}") | ||||||
} | ||||||
} | ||||||
|
||||||
/** | ||||||
* Parse YearMonth string in form: [+|-]YYYY-MM | ||||||
|
@@ -100,28 +121,30 @@ object IntervalUtils { | |||||
*/ | ||||||
def fromYearMonthString(input: String): CalendarInterval = { | ||||||
AngersZhuuuu marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
require(input != null, "Interval year-month string must be not null") | ||||||
def toInterval(yearStr: String, monthStr: String, sign: Int): CalendarInterval = { | ||||||
try { | ||||||
val years = toLongWithRange(YEAR, yearStr, 0, Integer.MAX_VALUE / MONTHS_PER_YEAR) | ||||||
val totalMonths = sign * (years * MONTHS_PER_YEAR + toLongWithRange(MONTH, monthStr, 0, 11)) | ||||||
new CalendarInterval(Math.toIntExact(totalMonths), 0, 0) | ||||||
} catch { | ||||||
case NonFatal(e) => | ||||||
throw new IllegalArgumentException( | ||||||
s"Error parsing interval year-month string: ${e.getMessage}", e) | ||||||
} | ||||||
} | ||||||
input.trim match { | ||||||
case yearMonthPattern("-", yearStr, monthStr) => | ||||||
toInterval(yearStr, monthStr, -1) | ||||||
new CalendarInterval(toYMInterval(yearStr, monthStr, -1), 0, 0) | ||||||
case yearMonthPattern(_, yearStr, monthStr) => | ||||||
toInterval(yearStr, monthStr, 1) | ||||||
new CalendarInterval(toYMInterval(yearStr, monthStr, 1), 0, 0) | ||||||
case _ => | ||||||
throw new IllegalArgumentException( | ||||||
s"Interval string does not match year-month format of 'y-m': $input") | ||||||
} | ||||||
} | ||||||
|
||||||
def toYMInterval(yearStr: String, monthStr: String, sign: Int): Int = { | ||||||
try { | ||||||
val years = toLongWithRange(YEAR, yearStr, 0, Integer.MAX_VALUE / MONTHS_PER_YEAR) | ||||||
val totalMonths = sign * (years * MONTHS_PER_YEAR + toLongWithRange(MONTH, monthStr, 0, 11)) | ||||||
// new CalendarInterval(Math.toIntExact(totalMonths), 0, 0) | ||||||
AngersZhuuuu marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
Math.toIntExact(totalMonths) | ||||||
} catch { | ||||||
case NonFatal(e) => | ||||||
throw new IllegalArgumentException( | ||||||
s"Error parsing interval year-month string: ${e.getMessage}", e) | ||||||
} | ||||||
} | ||||||
|
||||||
/** | ||||||
* Parse dayTime string in form: [-]d HH:mm:ss.nnnnnnnnn and [-]HH:mm:ss.nnnnnnnnn | ||||||
* | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1774,6 +1774,51 @@ class CastSuite extends CastSuiteBase { | |
assert(e3.contains("Casting 2147483648 to int causes overflow")) | ||
} | ||
} | ||
|
||
test("SPARK-35111: Cast string to year-month interval") { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add round trip tests: string -> year-month interval -> string, and year-month interval -> string -> year-month interval There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you test corner cases when arithmetic overflow happens. Also test upper and lower cases. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please, check the input in lower case. For example: checkEvaluation(cast(Literal.create(" interval '1-0' YEAR TO MONTH "),
YearMonthIntervalType), 12) |
||
checkEvaluation(cast(Literal.create("INTERVAL '1-0' YEAR TO MONTH"), | ||
YearMonthIntervalType), 12) | ||
checkEvaluation(cast(Literal.create("0-0"), YearMonthIntervalType), 0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is duplicated There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
checkEvaluation(cast(Literal.create("INTERVAL '-1-0' YEAR TO MONTH"), | ||
YearMonthIntervalType), -12) | ||
checkEvaluation(cast(Literal.create("INTERVAL -'-1-0' YEAR TO MONTH"), | ||
YearMonthIntervalType), 12) | ||
checkEvaluation(cast(Literal.create("INTERVAL +'-1-0' YEAR TO MONTH"), | ||
YearMonthIntervalType), -12) | ||
checkEvaluation(cast(Literal.create("INTERVAL +'+1-0' YEAR TO MONTH"), | ||
YearMonthIntervalType), 12) | ||
checkEvaluation(cast(Literal.create("INTERVAL +'1-0' YEAR TO MONTH"), | ||
YearMonthIntervalType), 12) | ||
checkEvaluation(cast(Literal.create("-1-0"), YearMonthIntervalType), -12) | ||
checkEvaluation(cast(Literal.create("INTERVAL '10-1' YEAR TO MONTH"), | ||
AngersZhuuuu marked this conversation as resolved.
Show resolved
Hide resolved
|
||
YearMonthIntervalType), 121) | ||
checkEvaluation(cast(Literal.create("10-1"), YearMonthIntervalType), 121) | ||
AngersZhuuuu marked this conversation as resolved.
Show resolved
Hide resolved
|
||
checkEvaluation(cast(Literal.create("interval '10-1' year TO MONTH"), | ||
AngersZhuuuu marked this conversation as resolved.
Show resolved
Hide resolved
|
||
YearMonthIntervalType), 121) | ||
checkEvaluation(cast(Literal.create(null, StringType), YearMonthIntervalType), null) | ||
|
||
Seq("0-0", "10-1", "-178956970-7", "178956970-7", "-178956970-8").foreach { interval => | ||
val ansiInterval = s"INTERVAL '$interval' YEAR TO MONTH" | ||
checkEvaluation( | ||
cast(cast(Literal.create(interval), YearMonthIntervalType), StringType), ansiInterval) | ||
checkEvaluation(cast(cast(Literal.create(ansiInterval), | ||
YearMonthIntervalType), StringType), ansiInterval) | ||
} | ||
|
||
Seq("INTERVAL '-178956970-9' YEAR TO MONTH", "INTERVAL '178956970-8' YEAR TO MONTH") | ||
.foreach { interval => | ||
val e = intercept[IllegalArgumentException] { | ||
cast(Literal.create(interval), YearMonthIntervalType).eval() | ||
}.getMessage | ||
assert(e.contains("Error parsing interval year-month string: integer overflow")) | ||
} | ||
|
||
Seq(Byte.MaxValue, Short.MaxValue, Int.MaxValue, Int.MinValue + 1, Int.MinValue) | ||
.foreach { period => | ||
val interval = Literal.create(Period.ofMonths(period), YearMonthIntervalType) | ||
checkEvaluation(cast(cast(interval, StringType), YearMonthIntervalType), period) | ||
} | ||
} | ||
} | ||
|
||
/** | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: evNull is not used
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yea