Skip to content

Commit

Permalink
[SPARK-24468][SQL] Handle negative scale when adjusting precision for…
Browse files Browse the repository at this point in the history
… decimal operations

## What changes were proposed in this pull request?

In SPARK-22036 we introduced the possibility to allow precision loss in arithmetic operations (according to the SQL standard). The implementation was drawn from Hive's one, where Decimals with a negative scale are not allowed in the operations.

The PR handles the case when the scale is negative, removing the assertion that it is not.

## How was this patch tested?

added UTs

Author: Marco Gaido <[email protected]>

Closes #21499 from mgaido91/SPARK-24468.

(cherry picked from commit f07c506)
Signed-off-by: Wenchen Fan <[email protected]>
  • Loading branch information
mgaido91 authored and cloud-fan committed Jun 9, 2018
1 parent 36f1d5e commit 1582945
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 68 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -161,13 +161,17 @@ object DecimalType extends AbstractDataType {
* This method is used only when `spark.sql.decimalOperations.allowPrecisionLoss` is set to true.
*/
private[sql] def adjustPrecisionScale(precision: Int, scale: Int): DecimalType = {
// Assumptions:
// Assumption:
assert(precision >= scale)
assert(scale >= 0)

if (precision <= MAX_PRECISION) {
// Adjustment only needed when we exceed max precision
DecimalType(precision, scale)
} else if (scale < 0) {
// Decimal can have negative scale (SPARK-24468). In this case, we cannot allow a precision
// loss since we would cause a loss of digits in the integer part.
// In this case, we are likely to meet an overflow.
DecimalType(MAX_PRECISION, scale)
} else {
// Precision/scale exceed maximum precision. Result must be adjusted to MAX_PRECISION.
val intDigits = precision - scale
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,15 @@ class DecimalPrecisionSuite extends AnalysisTest with BeforeAndAfter {
}
}

test("SPARK-24468: operations on decimals with negative scale") {
val a = AttributeReference("a", DecimalType(3, -10))()
val b = AttributeReference("b", DecimalType(1, -1))()
val c = AttributeReference("c", DecimalType(35, 1))()
checkType(Multiply(a, b), DecimalType(5, -11))
checkType(Multiply(a, c), DecimalType(38, -9))
checkType(Multiply(b, c), DecimalType(37, 0))
}

/** strength reduction for integer/decimal comparisons */
def ruleTest(initial: Expression, transformed: Expression): Unit = {
val testRelation = LocalRelation(AttributeReference("a", IntegerType)())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,14 @@ select 10.3000 * 3.0;
select 10.30000 * 30.0;
select 10.300000000000000000 * 3.000000000000000000;
select 10.300000000000000000 * 3.0000000000000000000;
select 2.35E10 * 1.0;

-- arithmetic operations causing an overflow return NULL
select (5e36 + 0.1) + 5e36;
select (-4e36 - 0.1) - 7e36;
select 12345678901234567890.0 * 12345678901234567890.0;
select 1e35 / 0.1;
select 1.2345678901234567890E30 * 1.2345678901234567890E25;

-- arithmetic operations causing a precision loss are truncated
select 12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345;
Expand All @@ -67,12 +69,14 @@ select 10.3000 * 3.0;
select 10.30000 * 30.0;
select 10.300000000000000000 * 3.000000000000000000;
select 10.300000000000000000 * 3.0000000000000000000;
select 2.35E10 * 1.0;

-- arithmetic operations causing an overflow return NULL
select (5e36 + 0.1) + 5e36;
select (-4e36 - 0.1) - 7e36;
select 12345678901234567890.0 * 12345678901234567890.0;
select 1e35 / 0.1;
select 1.2345678901234567890E30 * 1.2345678901234567890E25;

-- arithmetic operations causing a precision loss return NULL
select 12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 36
-- Number of queries: 40


-- !query 0
Expand Down Expand Up @@ -114,190 +114,222 @@ struct<(CAST(10.300000000000000000 AS DECIMAL(21,19)) * CAST(3.00000000000000000


-- !query 13
select (5e36 + 0.1) + 5e36
select 2.35E10 * 1.0
-- !query 13 schema
struct<(CAST((CAST(5E+36 AS DECIMAL(38,1)) + CAST(0.1 AS DECIMAL(38,1))) AS DECIMAL(38,1)) + CAST(5E+36 AS DECIMAL(38,1))):decimal(38,1)>
struct<(CAST(2.35E+10 AS DECIMAL(12,1)) * CAST(1.0 AS DECIMAL(12,1))):decimal(6,-7)>
-- !query 13 output
NULL
23500000000


-- !query 14
select (-4e36 - 0.1) - 7e36
select (5e36 + 0.1) + 5e36
-- !query 14 schema
struct<(CAST((CAST(-4E+36 AS DECIMAL(38,1)) - CAST(0.1 AS DECIMAL(38,1))) AS DECIMAL(38,1)) - CAST(7E+36 AS DECIMAL(38,1))):decimal(38,1)>
struct<(CAST((CAST(5E+36 AS DECIMAL(38,1)) + CAST(0.1 AS DECIMAL(38,1))) AS DECIMAL(38,1)) + CAST(5E+36 AS DECIMAL(38,1))):decimal(38,1)>
-- !query 14 output
NULL


-- !query 15
select 12345678901234567890.0 * 12345678901234567890.0
select (-4e36 - 0.1) - 7e36
-- !query 15 schema
struct<(12345678901234567890.0 * 12345678901234567890.0):decimal(38,2)>
struct<(CAST((CAST(-4E+36 AS DECIMAL(38,1)) - CAST(0.1 AS DECIMAL(38,1))) AS DECIMAL(38,1)) - CAST(7E+36 AS DECIMAL(38,1))):decimal(38,1)>
-- !query 15 output
NULL


-- !query 16
select 1e35 / 0.1
select 12345678901234567890.0 * 12345678901234567890.0
-- !query 16 schema
struct<(CAST(1E+35 AS DECIMAL(37,1)) / CAST(0.1 AS DECIMAL(37,1))):decimal(38,6)>
struct<(12345678901234567890.0 * 12345678901234567890.0):decimal(38,2)>
-- !query 16 output
NULL


-- !query 17
select 12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345
select 1e35 / 0.1
-- !query 17 schema
struct<(CAST(12345678912345678912345678912.1234567 AS DECIMAL(38,6)) + CAST(9999999999999999999999999999999.12345 AS DECIMAL(38,6))):decimal(38,6)>
struct<(CAST(1E+35 AS DECIMAL(37,1)) / CAST(0.1 AS DECIMAL(37,1))):decimal(38,6)>
-- !query 17 output
10012345678912345678912345678911.246907
NULL


-- !query 18
select 123456789123456789.1234567890 * 1.123456789123456789
select 1.2345678901234567890E30 * 1.2345678901234567890E25
-- !query 18 schema
struct<(CAST(123456789123456789.1234567890 AS DECIMAL(36,18)) * CAST(1.123456789123456789 AS DECIMAL(36,18))):decimal(38,18)>
struct<(CAST(1.2345678901234567890E+30 AS DECIMAL(25,-6)) * CAST(1.2345678901234567890E+25 AS DECIMAL(25,-6))):decimal(38,-17)>
-- !query 18 output
138698367904130467.654320988515622621
NULL


-- !query 19
select 12345678912345.123456789123 / 0.000000012345678
select 12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345
-- !query 19 schema
struct<(CAST(12345678912345.123456789123 AS DECIMAL(29,15)) / CAST(1.2345678E-8 AS DECIMAL(29,15))):decimal(38,9)>
struct<(CAST(12345678912345678912345678912.1234567 AS DECIMAL(38,6)) + CAST(9999999999999999999999999999999.12345 AS DECIMAL(38,6))):decimal(38,6)>
-- !query 19 output
1000000073899961059796.725866332
10012345678912345678912345678911.246907


-- !query 20
set spark.sql.decimalOperations.allowPrecisionLoss=false
select 123456789123456789.1234567890 * 1.123456789123456789
-- !query 20 schema
struct<key:string,value:string>
struct<(CAST(123456789123456789.1234567890 AS DECIMAL(36,18)) * CAST(1.123456789123456789 AS DECIMAL(36,18))):decimal(38,18)>
-- !query 20 output
spark.sql.decimalOperations.allowPrecisionLoss false
138698367904130467.654320988515622621


-- !query 21
select id, a+b, a-b, a*b, a/b from decimals_test order by id
select 12345678912345.123456789123 / 0.000000012345678
-- !query 21 schema
struct<id:int,(a + b):decimal(38,18),(a - b):decimal(38,18),(a * b):decimal(38,36),(a / b):decimal(38,18)>
struct<(CAST(12345678912345.123456789123 AS DECIMAL(29,15)) / CAST(1.2345678E-8 AS DECIMAL(29,15))):decimal(38,9)>
-- !query 21 output
1 1099 -899 NULL 0.1001001001001001
2 24690.246 0 NULL 1
3 1234.2234567891011 -1233.9765432108989 NULL 0.000100037913541123
4 123456789123456790.123456789123456789 123456789123456787.876543210876543211 NULL 109890109097814272.043109406191131436
1000000073899961059796.725866332


-- !query 22
select id, a*10, b/10 from decimals_test order by id
set spark.sql.decimalOperations.allowPrecisionLoss=false
-- !query 22 schema
struct<id:int,(CAST(a AS DECIMAL(38,18)) * CAST(CAST(10 AS DECIMAL(2,0)) AS DECIMAL(38,18))):decimal(38,18),(CAST(b AS DECIMAL(38,18)) / CAST(CAST(10 AS DECIMAL(2,0)) AS DECIMAL(38,18))):decimal(38,19)>
struct<key:string,value:string>
-- !query 22 output
1 1000 99.9
2 123451.23 1234.5123
3 1.234567891011 123.41
4 1234567891234567890 0.1123456789123456789
spark.sql.decimalOperations.allowPrecisionLoss false


-- !query 23
select 10.3 * 3.0
select id, a+b, a-b, a*b, a/b from decimals_test order by id
-- !query 23 schema
struct<(CAST(10.3 AS DECIMAL(3,1)) * CAST(3.0 AS DECIMAL(3,1))):decimal(6,2)>
struct<id:int,(a + b):decimal(38,18),(a - b):decimal(38,18),(a * b):decimal(38,36),(a / b):decimal(38,18)>
-- !query 23 output
30.9
1 1099 -899 NULL 0.1001001001001001
2 24690.246 0 NULL 1
3 1234.2234567891011 -1233.9765432108989 NULL 0.000100037913541123
4 123456789123456790.123456789123456789 123456789123456787.876543210876543211 NULL 109890109097814272.043109406191131436


-- !query 24
select 10.3000 * 3.0
select id, a*10, b/10 from decimals_test order by id
-- !query 24 schema
struct<(CAST(10.3000 AS DECIMAL(6,4)) * CAST(3.0 AS DECIMAL(6,4))):decimal(9,5)>
struct<id:int,(CAST(a AS DECIMAL(38,18)) * CAST(CAST(10 AS DECIMAL(2,0)) AS DECIMAL(38,18))):decimal(38,18),(CAST(b AS DECIMAL(38,18)) / CAST(CAST(10 AS DECIMAL(2,0)) AS DECIMAL(38,18))):decimal(38,19)>
-- !query 24 output
30.9
1 1000 99.9
2 123451.23 1234.5123
3 1.234567891011 123.41
4 1234567891234567890 0.1123456789123456789


-- !query 25
select 10.30000 * 30.0
select 10.3 * 3.0
-- !query 25 schema
struct<(CAST(10.30000 AS DECIMAL(7,5)) * CAST(30.0 AS DECIMAL(7,5))):decimal(11,6)>
struct<(CAST(10.3 AS DECIMAL(3,1)) * CAST(3.0 AS DECIMAL(3,1))):decimal(6,2)>
-- !query 25 output
309
30.9


-- !query 26
select 10.300000000000000000 * 3.000000000000000000
select 10.3000 * 3.0
-- !query 26 schema
struct<(CAST(10.300000000000000000 AS DECIMAL(20,18)) * CAST(3.000000000000000000 AS DECIMAL(20,18))):decimal(38,36)>
struct<(CAST(10.3000 AS DECIMAL(6,4)) * CAST(3.0 AS DECIMAL(6,4))):decimal(9,5)>
-- !query 26 output
30.9


-- !query 27
select 10.300000000000000000 * 3.0000000000000000000
select 10.30000 * 30.0
-- !query 27 schema
struct<(CAST(10.300000000000000000 AS DECIMAL(21,19)) * CAST(3.0000000000000000000 AS DECIMAL(21,19))):decimal(38,37)>
struct<(CAST(10.30000 AS DECIMAL(7,5)) * CAST(30.0 AS DECIMAL(7,5))):decimal(11,6)>
-- !query 27 output
NULL
309


-- !query 28
select (5e36 + 0.1) + 5e36
select 10.300000000000000000 * 3.000000000000000000
-- !query 28 schema
struct<(CAST((CAST(5E+36 AS DECIMAL(38,1)) + CAST(0.1 AS DECIMAL(38,1))) AS DECIMAL(38,1)) + CAST(5E+36 AS DECIMAL(38,1))):decimal(38,1)>
struct<(CAST(10.300000000000000000 AS DECIMAL(20,18)) * CAST(3.000000000000000000 AS DECIMAL(20,18))):decimal(38,36)>
-- !query 28 output
NULL
30.9


-- !query 29
select (-4e36 - 0.1) - 7e36
select 10.300000000000000000 * 3.0000000000000000000
-- !query 29 schema
struct<(CAST((CAST(-4E+36 AS DECIMAL(38,1)) - CAST(0.1 AS DECIMAL(38,1))) AS DECIMAL(38,1)) - CAST(7E+36 AS DECIMAL(38,1))):decimal(38,1)>
struct<(CAST(10.300000000000000000 AS DECIMAL(21,19)) * CAST(3.0000000000000000000 AS DECIMAL(21,19))):decimal(38,37)>
-- !query 29 output
NULL


-- !query 30
select 12345678901234567890.0 * 12345678901234567890.0
select 2.35E10 * 1.0
-- !query 30 schema
struct<(12345678901234567890.0 * 12345678901234567890.0):decimal(38,2)>
struct<(CAST(2.35E+10 AS DECIMAL(12,1)) * CAST(1.0 AS DECIMAL(12,1))):decimal(6,-7)>
-- !query 30 output
NULL
23500000000


-- !query 31
select 1e35 / 0.1
select (5e36 + 0.1) + 5e36
-- !query 31 schema
struct<(CAST(1E+35 AS DECIMAL(37,1)) / CAST(0.1 AS DECIMAL(37,1))):decimal(38,3)>
struct<(CAST((CAST(5E+36 AS DECIMAL(38,1)) + CAST(0.1 AS DECIMAL(38,1))) AS DECIMAL(38,1)) + CAST(5E+36 AS DECIMAL(38,1))):decimal(38,1)>
-- !query 31 output
NULL


-- !query 32
select 12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345
select (-4e36 - 0.1) - 7e36
-- !query 32 schema
struct<(CAST(12345678912345678912345678912.1234567 AS DECIMAL(38,7)) + CAST(9999999999999999999999999999999.12345 AS DECIMAL(38,7))):decimal(38,7)>
struct<(CAST((CAST(-4E+36 AS DECIMAL(38,1)) - CAST(0.1 AS DECIMAL(38,1))) AS DECIMAL(38,1)) - CAST(7E+36 AS DECIMAL(38,1))):decimal(38,1)>
-- !query 32 output
NULL


-- !query 33
select 123456789123456789.1234567890 * 1.123456789123456789
select 12345678901234567890.0 * 12345678901234567890.0
-- !query 33 schema
struct<(CAST(123456789123456789.1234567890 AS DECIMAL(36,18)) * CAST(1.123456789123456789 AS DECIMAL(36,18))):decimal(38,28)>
struct<(12345678901234567890.0 * 12345678901234567890.0):decimal(38,2)>
-- !query 33 output
NULL


-- !query 34
select 12345678912345.123456789123 / 0.000000012345678
select 1e35 / 0.1
-- !query 34 schema
struct<(CAST(12345678912345.123456789123 AS DECIMAL(29,15)) / CAST(1.2345678E-8 AS DECIMAL(29,15))):decimal(38,18)>
struct<(CAST(1E+35 AS DECIMAL(37,1)) / CAST(0.1 AS DECIMAL(37,1))):decimal(38,3)>
-- !query 34 output
NULL


-- !query 35
drop table decimals_test
select 1.2345678901234567890E30 * 1.2345678901234567890E25
-- !query 35 schema
struct<>
struct<(CAST(1.2345678901234567890E+30 AS DECIMAL(25,-6)) * CAST(1.2345678901234567890E+25 AS DECIMAL(25,-6))):decimal(38,-17)>
-- !query 35 output
NULL


-- !query 36
select 12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345
-- !query 36 schema
struct<(CAST(12345678912345678912345678912.1234567 AS DECIMAL(38,7)) + CAST(9999999999999999999999999999999.12345 AS DECIMAL(38,7))):decimal(38,7)>
-- !query 36 output
NULL


-- !query 37
select 123456789123456789.1234567890 * 1.123456789123456789
-- !query 37 schema
struct<(CAST(123456789123456789.1234567890 AS DECIMAL(36,18)) * CAST(1.123456789123456789 AS DECIMAL(36,18))):decimal(38,28)>
-- !query 37 output
NULL


-- !query 38
select 12345678912345.123456789123 / 0.000000012345678
-- !query 38 schema
struct<(CAST(12345678912345.123456789123 AS DECIMAL(29,15)) / CAST(1.2345678E-8 AS DECIMAL(29,15))):decimal(38,18)>
-- !query 38 output
NULL


-- !query 39
drop table decimals_test
-- !query 39 schema
struct<>
-- !query 39 output

0 comments on commit 1582945

Please sign in to comment.