Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-35112][SQL] Support Cast string to day-second interval #32271

Closed
wants to merge 15 commits into from
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ object Cast {
case (TimestampType, DateType) => true

case (StringType, CalendarIntervalType) => true
case (StringType, DayTimeIntervalType) => true
case (StringType, YearMonthIntervalType) => true

case (StringType, _: NumericType) => true
Expand Down Expand Up @@ -535,9 +536,12 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
buildCast[UTF8String](_, s => IntervalUtils.safeStringToInterval(s))
}

private[this] def castToDayTimeInterval(from: DataType): Any => Any = from match {
case StringType => buildCast[UTF8String](_, s => IntervalUtils.castStringToDTInterval(s))
}

private[this] def castToYearMonthInterval(from: DataType): Any => Any = from match {
case StringType =>
buildCast[UTF8String](_, s => IntervalUtils.castStringToYMInterval(s))
case StringType => buildCast[UTF8String](_, s => IntervalUtils.castStringToYMInterval(s))
}

// LongConverter
Expand Down Expand Up @@ -844,6 +848,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
case decimal: DecimalType => castToDecimal(from, decimal)
case TimestampType => castToTimestamp(from)
case CalendarIntervalType => castToInterval(from)
case DayTimeIntervalType => castToDayTimeInterval(from)
case YearMonthIntervalType => castToYearMonthInterval(from)
case BooleanType => castToBoolean(from)
case ByteType => castToByte(from)
Expand Down Expand Up @@ -903,6 +908,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
case decimal: DecimalType => castToDecimalCode(from, decimal, ctx)
case TimestampType => castToTimestampCode(from, ctx)
case CalendarIntervalType => castToIntervalCode(from)
case DayTimeIntervalType => castToDayTimeIntervalCode(from)
case YearMonthIntervalType => castToYearMonthIntervalCode(from)
case BooleanType => castToBooleanCode(from)
case ByteType => castToByteCode(from, ctx)
Expand Down Expand Up @@ -1362,6 +1368,12 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit

}

private[this] def castToDayTimeIntervalCode(from: DataType): CastFunction = from match {
case StringType =>
val util = IntervalUtils.getClass.getCanonicalName.stripSuffix("$")
(c, evPrim, _) => code"$evPrim = $util.castStringToDTInterval($c);"
}

private[this] def castToYearMonthIntervalCode(from: DataType): CastFunction = from match {
case StringType =>
val util = IntervalUtils.getClass.getCanonicalName.stripSuffix("$")
Expand Down Expand Up @@ -1929,6 +1941,7 @@ object AnsiCast {
case (DateType, TimestampType) => true

case (StringType, _: CalendarIntervalType) => true
case (StringType, DayTimeIntervalType) => true
case (StringType, YearMonthIntervalType) => true

case (StringType, DateType) => true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,57 @@ object IntervalUtils {
}
}

private val daySecondStringPattern = ("(?i)^(INTERVAL\\s+)([+|-])?(')" +
"([+|-])?(\\d+) (\\d{1,2}):(\\d{1,2}):(\\d{1,2})(\\.\\d{1,9})?(')(\\s+DAY\\s+TO\\s+SECOND)$").r
private val daySecondPattern = "^([+|-])?(\\d+) (\\d{1,2}):(\\d{1,2}):(\\d{1,2})(\\.\\d{1,9})?$".r
AngersZhuuuu marked this conversation as resolved.
Show resolved Hide resolved

def castStringToDTInterval(input: UTF8String): Long = {
def secondAndMicro(second: String, micro: String): String = {
if (micro != null) {
s"$second$micro"
} else {
second
}
}
val intervalStr = input.trimAll().toString
intervalStr match {
AngersZhuuuu marked this conversation as resolved.
Show resolved Hide resolved
case daySecondPattern("-", day, hour, minute, second, micro) =>
toDTInterval(day, hour, minute, secondAndMicro(second, micro), -1)
case daySecondPattern(_, day, hour, minute, second, micro) =>
toDTInterval(day, hour, minute, secondAndMicro(second, micro), 1)
case daySecondStringPattern(
_, firstSign, _, secondSign, day, hour, minute, second, micro, _, _) =>
AngersZhuuuu marked this conversation as resolved.
Show resolved Hide resolved
(firstSign, secondSign) match {
case ("-", "-") => toDTInterval(day, hour, minute, secondAndMicro(second, micro), 1)
case ("-", _) => toDTInterval(day, hour, minute, secondAndMicro(second, micro), -1)
case (_, "-") => toDTInterval(day, hour, minute, secondAndMicro(second, micro), -1)
case (_, _) => toDTInterval(day, hour, minute, secondAndMicro(second, micro), 1)
}
case _ =>
throw new IllegalArgumentException(
s"Interval string must match day-time format of `d h:m:s.n` " +
s"or `INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND`: ${input.toString}, " +
s"$fallbackNotice")
}
}

def toDTInterval(
dayStr: String,
hourStr: String,
minuteStr: String,
secondStr: String,
sign: Int): Long = {
var micros = 0L
val days = toLongWithRange(DAY, dayStr, 0, Int.MaxValue).toInt
micros = Math.addExact(micros, sign * days * MICROS_PER_DAY)
val hours = toLongWithRange(HOUR, hourStr, 0, 23)
micros = Math.addExact(micros, sign * hours * MICROS_PER_HOUR)
val minutes = toLongWithRange(MINUTE, minuteStr, 0, 59)
micros = Math.addExact(micros, sign * minutes * MICROS_PER_MINUTE)
micros = Math.addExact(micros, sign * parseSecondNano(secondStr))
micros
}

/**
* Parse dayTime string in form: [-]d HH:mm:ss.nnnnnnnnn and [-]HH:mm:ss.nnnnnnnnn
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1775,6 +1775,48 @@ class CastSuite extends CastSuiteBase {
}
}

test("SPARK-35112: Cast string to day-time interval") {
checkEvaluation(cast(Literal.create("0 0:0:0"), DayTimeIntervalType), 0L)
checkEvaluation(cast(Literal.create("INTERVAL '0 0:0:0' DAY TO SECOND"),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you check INTERVAL in lower case + spaces:

Suggested change
checkEvaluation(cast(Literal.create("INTERVAL '0 0:0:0' DAY TO SECOND"),
checkEvaluation(cast(Literal.create(" interval '0 0:0:0' Day TO second "),

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@AngersZhuuuu Your force push reverted the changes I approved. Please, revert them back.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@AngersZhuuuu Your force push reverted the changes I approved. Please, revert them back.

Updated

DayTimeIntervalType), 0L)
checkEvaluation(cast(Literal.create("INTERVAL '1 2:03:04' DAY TO SECOND"),
DayTimeIntervalType), 93784000000L)
checkEvaluation(cast(Literal.create("INTERVAL '1 03:04:00' DAY TO SECOND"),
DayTimeIntervalType), 97440000000L)
checkEvaluation(cast(Literal.create("INTERVAL '1 03:04:00.0000' DAY TO SECOND"),
DayTimeIntervalType), 97440000000L)
checkEvaluation(cast(Literal.create("1 2:03:04"), DayTimeIntervalType), 93784000000L)
checkEvaluation(cast(Literal.create("INTERVAL '-10 2:03:04' DAY TO SECOND"),
DayTimeIntervalType), -871384000000L)
checkEvaluation(cast(Literal.create("-10 2:03:04"), DayTimeIntervalType), -871384000000L)
checkEvaluation(cast(Literal.create("-106751991 04:00:54.775808"), DayTimeIntervalType),
Long.MinValue)
checkEvaluation(cast(Literal.create("106751991 04:00:54.775807"), DayTimeIntervalType),
Long.MaxValue)

Seq("-106751991 04:00:54.775808", "106751991 04:00:54.775807").foreach { interval =>
val ansiInterval = s"INTERVAL '$interval' DAY TO SECOND"
checkEvaluation(
cast(cast(Literal.create(interval), DayTimeIntervalType), StringType), ansiInterval)
checkEvaluation(cast(cast(Literal.create(ansiInterval),
DayTimeIntervalType), StringType), ansiInterval)
}

Seq("INTERVAL '-106751991 04:00:54.775809' YEAR TO MONTH",
"INTERVAL '106751991 04:00:54.775808' YEAR TO MONTH").foreach { interval =>
val e = intercept[IllegalArgumentException] {
cast(Literal.create(interval), DayTimeIntervalType).eval()
}.getMessage
assert(e.contains("Interval string must match day-time format of"))
}

Seq(Byte.MaxValue, Short.MaxValue, Int.MaxValue, Long.MaxValue, Long.MinValue + 1,
Long.MinValue).foreach { duration =>
val interval = Literal.create(Duration.of(duration, ChronoUnit.MICROS), DayTimeIntervalType)
checkEvaluation(cast(cast(interval, StringType), DayTimeIntervalType), duration)
}
}

test("SPARK-35111: Cast string to year-month interval") {
checkEvaluation(cast(Literal.create("INTERVAL '1-0' YEAR TO MONTH"),
YearMonthIntervalType), 12)
Expand Down