Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-35111][SQL] Support Cast string to year-month interval #32266

Closed
wants to merge 28 commits into from
Closed
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
9546a0f
[SPARK-35111][SQL] Support Cast string to year-month interval
AngersZhuuuu Apr 21, 2021
691c1f4
Update CastSuite.scala
AngersZhuuuu Apr 21, 2021
f5b02ee
Update CastSuite.scala
AngersZhuuuu Apr 22, 2021
15424a7
Update IntervalUtils.scala
AngersZhuuuu Apr 22, 2021
879817b
Update CastSuite.scala
AngersZhuuuu Apr 22, 2021
62d175b
Update CastSuite.scala
AngersZhuuuu Apr 22, 2021
2c75bba
save
AngersZhuuuu Apr 22, 2021
5b134fa
Merge branch 'master' into SPARK-SPARK-35111
AngersZhuuuu Apr 22, 2021
6d14414
Update CastSuite.scala
AngersZhuuuu Apr 22, 2021
d19bbc8
Update Cast.scala
AngersZhuuuu Apr 23, 2021
ff904a1
save
AngersZhuuuu Apr 25, 2021
d0e30e4
Merge branch 'master' into SPARK-SPARK-35111
AngersZhuuuu Apr 25, 2021
3b84baa
follow comment
AngersZhuuuu Apr 25, 2021
b05f7e6
update
AngersZhuuuu Apr 28, 2021
25c08e0
Update IntervalUtils.scala
AngersZhuuuu Apr 28, 2021
f636d41
update
AngersZhuuuu Apr 28, 2021
ce69004
Update CastSuite.scala
AngersZhuuuu Apr 28, 2021
092d01a
Update IntervalUtils.scala
AngersZhuuuu Apr 28, 2021
f088f64
Update IntervalUtils.scala
AngersZhuuuu Apr 28, 2021
80499b8
Update IntervalUtils.scala
AngersZhuuuu Apr 29, 2021
ca19c09
Update IntervalUtils.scala
AngersZhuuuu Apr 29, 2021
3df92b6
Update Cast.scala
AngersZhuuuu Apr 29, 2021
3adde87
Update IntervalUtils.scala
AngersZhuuuu Apr 29, 2021
0f82987
follow comment
AngersZhuuuu Apr 29, 2021
2c8785b
Update CastSuite.scala
AngersZhuuuu Apr 29, 2021
253c70e
follow comment
AngersZhuuuu Apr 29, 2021
9c70b88
Update IntervalUtils.scala
AngersZhuuuu Apr 29, 2021
5ca83ab
update
AngersZhuuuu Apr 30, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ object Cast {
case (TimestampType, DateType) => true

case (StringType, CalendarIntervalType) => true
case (StringType, YearMonthIntervalType) => true

case (StringType, _: NumericType) => true
case (BooleanType, _: NumericType) => true
Expand Down Expand Up @@ -533,6 +534,11 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
buildCast[UTF8String](_, s => IntervalUtils.safeStringToInterval(s))
}

private[this] def castToYearMonthInterval(from: DataType): Any => Any = from match {
case StringType =>
buildCast[UTF8String](_, s => IntervalUtils.fromYearMonthString(s).months)
}

// LongConverter
private[this] def castToLong(from: DataType): Any => Any = from match {
case StringType if ansiEnabled =>
Expand Down Expand Up @@ -837,6 +843,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
case decimal: DecimalType => castToDecimal(from, decimal)
case TimestampType => castToTimestamp(from)
case CalendarIntervalType => castToInterval(from)
case YearMonthIntervalType => castToYearMonthInterval(from)
case BooleanType => castToBoolean(from)
case ByteType => castToByte(from)
case ShortType => castToShort(from)
Expand Down Expand Up @@ -895,6 +902,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
case decimal: DecimalType => castToDecimalCode(from, decimal, ctx)
case TimestampType => castToTimestampCode(from, ctx)
case CalendarIntervalType => castToIntervalCode(from)
case YearMonthIntervalType => castToYearMonthIntervalCode(from)
case BooleanType => castToBooleanCode(from)
case ByteType => castToByteCode(from, ctx)
case ShortType => castToShortCode(from, ctx)
Expand Down Expand Up @@ -1353,6 +1361,13 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit

}

private[this] def castToYearMonthIntervalCode(from: DataType): CastFunction = from match {
case StringType =>
val util = IntervalUtils.getClass.getCanonicalName.stripSuffix("$")
(c, evPrim, evNull) =>
code"""$evPrim = (Integer)$util.fromYearMonthString($c).months;""".stripMargin
AngersZhuuuu marked this conversation as resolved.
Show resolved Hide resolved
}

private[this] def decimalToTimestampCode(d: ExprValue): Block = {
val block = inline"new java.math.BigDecimal($MICROS_PER_SECOND)"
code"($d.toBigDecimal().bigDecimal().multiply($block)).longValue()"
Expand Down Expand Up @@ -1912,6 +1927,7 @@ object AnsiCast {
case (DateType, TimestampType) => true

case (StringType, _: CalendarIntervalType) => true
case (StringType, YearMonthIntervalType) => true

case (StringType, DateType) => true
case (TimestampType, DateType) => true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,23 @@ object IntervalUtils {

private val yearMonthPattern = "^([+|-])?(\\d+)-(\\d+)$".r

private val yearMonthFuzzyPattern = "[^-|+]*?([+|-]?[\\d]+-[\\d]+).*".r
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we need a new regex?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we need a new regex?

Since yearMonthPattern only support [+|-]yy-mmmm

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what do we want to support here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am discussing this here #32266 (comment) with @MaxGekk ,
Do you have any other supplementary suggestions?


def fromYearMonthString(input: UTF8String): CalendarInterval = {
if (input == null || input.toString == null) {
throw new IllegalArgumentException("Interval year-month string must be not null")
AngersZhuuuu marked this conversation as resolved.
Show resolved Hide resolved
} else {
val intervalString = input.trimAll().toString
intervalString match {
case yearMonthFuzzyPattern(payLoad) =>
fromYearMonthString(payLoad)
case _ =>
throw new IllegalArgumentException(
s"Interval string does not match year-month format of 'y-m': $input")
}
}
}

/**
* Parse YearMonth string in form: [+|-]YYYY-MM
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1774,6 +1774,45 @@ class CastSuite extends CastSuiteBase {
assert(e3.contains("Casting 2147483648 to int causes overflow"))
}
}

test("SPARK-35111: Cast string to year-month interval") {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add round trip tests: string -> year-month interval -> string, and year-month interval -> string -> year-month interval

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you test corner cases when arithmetic overflow happens. Also test upper and lower cases.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you test corner cases when arithmetic overflow happens. Also test upper and lower cases.

Current test is not correct according to #32281, will update after #32281 merged

Copy link
Member

@MaxGekk MaxGekk Apr 29, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please, check the input in lower case. For example:

    checkEvaluation(cast(Literal.create("  interval '1-0' YEAR TO MONTH  "),
      YearMonthIntervalType), 12)

checkEvaluation(cast(Literal.create("INTERVAL '0-0' YEAR TO MONTH"),
AngersZhuuuu marked this conversation as resolved.
Show resolved Hide resolved
YearMonthIntervalType), 0)
checkEvaluation(cast(Literal.create("0-0"), YearMonthIntervalType), 0)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is duplicated

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

checkEvaluation(cast(Literal.create("INTERVAL '-1-0' YEAR TO MONTH"),
YearMonthIntervalType), -12)
checkEvaluation(cast(Literal.create("-1-0"), YearMonthIntervalType), -12)
checkEvaluation(cast(Literal.create("INTERVAL '10-1' YEAR TO MONTH"),
AngersZhuuuu marked this conversation as resolved.
Show resolved Hide resolved
YearMonthIntervalType), 121)
checkEvaluation(cast(Literal.create("10-1"), YearMonthIntervalType), 121)
AngersZhuuuu marked this conversation as resolved.
Show resolved Hide resolved
checkEvaluation(cast(Literal.create("abc10-1"), YearMonthIntervalType), 121)
checkEvaluation(cast(Literal.create("abca10-1-da-"), YearMonthIntervalType), 121)
checkEvaluation(cast(Literal.create(null, StringType), YearMonthIntervalType), null)

Seq("INTERVAL '0-0' YEAR TO MONTH" -> "INTERVAL '0-0' YEAR TO MONTH",
"INTERVAL '10-1' YEAR TO MONTH" -> "INTERVAL '10-1' YEAR TO MONTH",
"INTERVAL '-178956970-7' YEAR TO MONTH" -> "INTERVAL '-178956970-7' YEAR TO MONTH",
"INTERVAL '178956970-7' YEAR TO MONTH" -> "INTERVAL '178956970-7' YEAR TO MONTH",
"INTERVAL '-178956970-8' YEAR TO MONTH" -> "INTERVAL '-178956970-8' YEAR TO MONTH"
AngersZhuuuu marked this conversation as resolved.
Show resolved Hide resolved
).foreach { case (interval, result) =>
checkEvaluation(
cast(cast(Literal.create(interval), YearMonthIntervalType), StringType), result)
}

Seq("INTERVAL '-178956970-9' YEAR TO MONTH", "INTERVAL '178956970-8' YEAR TO MONTH")
.foreach { interval =>
val e = intercept[IllegalArgumentException] {
cast(Literal.create(interval), YearMonthIntervalType).eval()
}.getMessage
assert(e.contains("Error parsing interval year-month string: integer overflow"))
}

Seq(Byte.MaxValue, Short.MaxValue, Int.MaxValue, Int.MinValue + 1, Int.MinValue)
.foreach { period =>
val interval = Literal.create(Period.ofMonths(period), YearMonthIntervalType)
checkEvaluation(cast(cast(interval, StringType), YearMonthIntervalType), period)
}
}
}

/**
Expand Down