Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-35112][SQL] Support Cast string to day-second interval #32271

Closed
wants to merge 15 commits into from
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ object Cast {
case (TimestampType, DateType) => true

case (StringType, CalendarIntervalType) => true
case (StringType, DayTimeIntervalType) => true
case (StringType, YearMonthIntervalType) => true

case (StringType, _: NumericType) => true
Expand Down Expand Up @@ -535,9 +536,12 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
buildCast[UTF8String](_, s => IntervalUtils.safeStringToInterval(s))
}

private[this] def castToDayTimeInterval(from: DataType): Any => Any = from match {
case StringType => buildCast[UTF8String](_, s => IntervalUtils.castStringToDTInterval(s))
}

private[this] def castToYearMonthInterval(from: DataType): Any => Any = from match {
case StringType =>
buildCast[UTF8String](_, s => IntervalUtils.castStringToYMInterval(s))
case StringType => buildCast[UTF8String](_, s => IntervalUtils.castStringToYMInterval(s))
}

// LongConverter
Expand Down Expand Up @@ -844,6 +848,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
case decimal: DecimalType => castToDecimal(from, decimal)
case TimestampType => castToTimestamp(from)
case CalendarIntervalType => castToInterval(from)
case DayTimeIntervalType => castToDayTimeInterval(from)
case YearMonthIntervalType => castToYearMonthInterval(from)
case BooleanType => castToBoolean(from)
case ByteType => castToByte(from)
Expand Down Expand Up @@ -903,6 +908,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
case decimal: DecimalType => castToDecimalCode(from, decimal, ctx)
case TimestampType => castToTimestampCode(from, ctx)
case CalendarIntervalType => castToIntervalCode(from)
case DayTimeIntervalType => castToDayTimeIntervalCode(from)
case YearMonthIntervalType => castToYearMonthIntervalCode(from)
case BooleanType => castToBooleanCode(from)
case ByteType => castToByteCode(from, ctx)
Expand Down Expand Up @@ -1362,6 +1368,12 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit

}

private[this] def castToDayTimeIntervalCode(from: DataType): CastFunction = from match {
case StringType =>
val util = IntervalUtils.getClass.getCanonicalName.stripSuffix("$")
(c, evPrim, _) => code"$evPrim = $util.castStringToDTInterval($c);"
}

private[this] def castToYearMonthIntervalCode(from: DataType): CastFunction = from match {
case StringType =>
val util = IntervalUtils.getClass.getCanonicalName.stripSuffix("$")
Expand Down Expand Up @@ -1929,6 +1941,7 @@ object AnsiCast {
case (DateType, TimestampType) => true

case (StringType, _: CalendarIntervalType) => true
case (StringType, DayTimeIntervalType) => true
case (StringType, YearMonthIntervalType) => true

case (StringType, DateType) => true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,58 @@ object IntervalUtils {
}
}

private val unquotedDaySecondPattern =
"([+|-])?(\\d+) (\\d{1,2}):(\\d{1,2}):(\\d{1,2})(\\.\\d{1,9})?"
private val quotedDaySecondPattern = (s"^$unquotedDaySecondPattern$$").r
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unquotedDaySecondRegex?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unquotedDaySecondRegex?

Change this in #32444

private val daySecondLiteralPattern =
(s"(?i)^INTERVAL\\s+([+|-])?\\'$unquotedDaySecondPattern\\'\\s+DAY\\s+TO\\s+SECOND$$").r

def castStringToDTInterval(input: UTF8String): Long = {
def secondAndMicro(second: String, micro: String): String = {
if (micro != null) {
s"$second$micro"
} else {
second
}
}

input.trimAll().toString match {
case quotedDaySecondPattern("-", day, hour, minute, second, micro) =>
toDTInterval(day, hour, minute, secondAndMicro(second, micro), -1)
case quotedDaySecondPattern(_, day, hour, minute, second, micro) =>
toDTInterval(day, hour, minute, secondAndMicro(second, micro), 1)
case daySecondLiteralPattern(firstSign, secondSign, day, hour, minute, second, micro) =>
(firstSign, secondSign) match {
case ("-", "-") => toDTInterval(day, hour, minute, secondAndMicro(second, micro), 1)
case ("-", _) => toDTInterval(day, hour, minute, secondAndMicro(second, micro), -1)
case (_, "-") => toDTInterval(day, hour, minute, secondAndMicro(second, micro), -1)
case (_, _) => toDTInterval(day, hour, minute, secondAndMicro(second, micro), 1)
}
case _ =>
throw new IllegalArgumentException(
s"Interval string must match day-time format of `d h:m:s.n` " +
s"or `INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND`: ${input.toString}, " +
s"$fallbackNotice")
}
}

def toDTInterval(
dayStr: String,
hourStr: String,
minuteStr: String,
secondStr: String,
sign: Int): Long = {
var micros = 0L
val days = toLongWithRange(DAY, dayStr, 0, Int.MaxValue).toInt
micros = Math.addExact(micros, sign * days * MICROS_PER_DAY)
val hours = toLongWithRange(HOUR, hourStr, 0, 23)
micros = Math.addExact(micros, sign * hours * MICROS_PER_HOUR)
val minutes = toLongWithRange(MINUTE, minuteStr, 0, 59)
micros = Math.addExact(micros, sign * minutes * MICROS_PER_MINUTE)
micros = Math.addExact(micros, sign * parseSecondNano(secondStr))
micros
}

/**
* Parse dayTime string in form: [-]d HH:mm:ss.nnnnnnnnn and [-]HH:mm:ss.nnnnnnnnn
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1775,6 +1775,48 @@ class CastSuite extends CastSuiteBase {
}
}

test("SPARK-35112: Cast string to day-time interval") {
checkEvaluation(cast(Literal.create("0 0:0:0"), DayTimeIntervalType), 0L)
checkEvaluation(cast(Literal.create(" interval '0 0:0:0' Day TO second "),
DayTimeIntervalType), 0L)
checkEvaluation(cast(Literal.create("INTERVAL '1 2:03:04' DAY TO SECOND"),
DayTimeIntervalType), 93784000000L)
checkEvaluation(cast(Literal.create("INTERVAL '1 03:04:00' DAY TO SECOND"),
DayTimeIntervalType), 97440000000L)
checkEvaluation(cast(Literal.create("INTERVAL '1 03:04:00.0000' DAY TO SECOND"),
DayTimeIntervalType), 97440000000L)
checkEvaluation(cast(Literal.create("1 2:03:04"), DayTimeIntervalType), 93784000000L)
checkEvaluation(cast(Literal.create("INTERVAL '-10 2:03:04' DAY TO SECOND"),
DayTimeIntervalType), -871384000000L)
checkEvaluation(cast(Literal.create("-10 2:03:04"), DayTimeIntervalType), -871384000000L)
checkEvaluation(cast(Literal.create("-106751991 04:00:54.775808"), DayTimeIntervalType),
Long.MinValue)
checkEvaluation(cast(Literal.create("106751991 04:00:54.775807"), DayTimeIntervalType),
Long.MaxValue)

Seq("-106751991 04:00:54.775808", "106751991 04:00:54.775807").foreach { interval =>
val ansiInterval = s"INTERVAL '$interval' DAY TO SECOND"
checkEvaluation(
cast(cast(Literal.create(interval), DayTimeIntervalType), StringType), ansiInterval)
checkEvaluation(cast(cast(Literal.create(ansiInterval),
DayTimeIntervalType), StringType), ansiInterval)
}

Seq("INTERVAL '-106751991 04:00:54.775809' YEAR TO MONTH",
"INTERVAL '106751991 04:00:54.775808' YEAR TO MONTH").foreach { interval =>
val e = intercept[IllegalArgumentException] {
cast(Literal.create(interval), DayTimeIntervalType).eval()
}.getMessage
assert(e.contains("Interval string must match day-time format of"))
}

Seq(Byte.MaxValue, Short.MaxValue, Int.MaxValue, Long.MaxValue, Long.MinValue + 1,
Long.MinValue).foreach { duration =>
val interval = Literal.create(Duration.of(duration, ChronoUnit.MICROS), DayTimeIntervalType)
checkEvaluation(cast(cast(interval, StringType), DayTimeIntervalType), duration)
}
}

test("SPARK-35111: Cast string to year-month interval") {
checkEvaluation(cast(Literal.create("INTERVAL '1-0' YEAR TO MONTH"),
YearMonthIntervalType), 12)
Expand Down