[SPARK-42307][SQL] Assign name for error _LEGACY_ERROR_TEMP_2232

### What changes were proposed in this pull request? In this PR, I propose to replace the legacy error name `_LEGACY_ERROR_TEMP_2232` with `ROW_VALUE_IS_NULL `, and add a test case for it. ### Why are the changes needed? Proper name improves user experience with Spark SQL. ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? Ran all the tests in the suite: ``` build/sbt "testOnly *org.apache.spark.sql.RowSuite" ``` ### Was this patch authored or co-authored using generative AI tooling? No Closes apache#47354 from junyuc25/SPARK-42307. Lead-authored-by: junyuc25 <[email protected]> Co-authored-by: junyuc25 <=> Signed-off-by: Max Gekk <[email protected]>
MaxGekk · Aug 13, 2024 · f081650 · f081650
1 parent 3f3d024
commit f081650
Show file tree

Hide file tree

Showing 5 changed files with 34 additions and 19 deletions.
diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json
@@ -3772,6 +3772,12 @@
     ],
     "sqlState" : "21000"
   },
+  "ROW_VALUE_IS_NULL" : {
+    "message" : [
+      "Found NULL in a row at the index <index>, expected a non-NULL value."
+    ],
+    "sqlState" : "22023"
+  },
   "RULE_ID_NOT_FOUND" : {
     "message" : [
       "Not found an id for the rule name \"<ruleName>\". Please modify RuleIdCollection.scala if you are adding a new rule."
@@ -7285,11 +7291,6 @@
       "Primitive types are not supported."
     ]
   },
-  "_LEGACY_ERROR_TEMP_2232" : {
-    "message" : [
-      "Value at index <index> is null."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_2233" : {
     "message" : [
       "Only Data Sources providing FileFormat are supported: <providingClass>."

diff --git a/sql/api/src/main/scala/org/apache/spark/sql/Row.scala b/sql/api/src/main/scala/org/apache/spark/sql/Row.scala
@@ -219,39 +219,39 @@ trait Row extends Serializable {
    * Returns the value at position i as a primitive boolean.
    *
    * @throws ClassCastException when data type does not match.
-   * @throws NullPointerException when value is null.
+   * @throws org.apache.spark.SparkRuntimeException when value is null.
    */
   def getBoolean(i: Int): Boolean = getAnyValAs[Boolean](i)
 
   /**
    * Returns the value at position i as a primitive byte.
    *
    * @throws ClassCastException when data type does not match.
-   * @throws NullPointerException when value is null.
+   * @throws org.apache.spark.SparkRuntimeException when value is null.
    */
   def getByte(i: Int): Byte = getAnyValAs[Byte](i)
 
   /**
    * Returns the value at position i as a primitive short.
    *
    * @throws ClassCastException when data type does not match.
-   * @throws NullPointerException when value is null.
+   * @throws org.apache.spark.SparkRuntimeException when value is null.
    */
   def getShort(i: Int): Short = getAnyValAs[Short](i)
 
   /**
    * Returns the value at position i as a primitive int.
    *
    * @throws ClassCastException when data type does not match.
-   * @throws NullPointerException when value is null.
+   * @throws org.apache.spark.SparkRuntimeException when value is null.
    */
   def getInt(i: Int): Int = getAnyValAs[Int](i)
 
   /**
    * Returns the value at position i as a primitive long.
    *
    * @throws ClassCastException when data type does not match.
-   * @throws NullPointerException when value is null.
+   * @throws org.apache.spark.SparkRuntimeException when value is null.
    */
   def getLong(i: Int): Long = getAnyValAs[Long](i)
 
@@ -260,15 +260,15 @@ trait Row extends Serializable {
    * Throws an exception if the type mismatches or if the value is null.
    *
    * @throws ClassCastException when data type does not match.
-   * @throws NullPointerException when value is null.
+   * @throws org.apache.spark.SparkRuntimeException when value is null.
    */
   def getFloat(i: Int): Float = getAnyValAs[Float](i)
 
   /**
    * Returns the value at position i as a primitive double.
    *
    * @throws ClassCastException when data type does not match.
-   * @throws NullPointerException when value is null.
+   * @throws org.apache.spark.SparkRuntimeException when value is null.
    */
   def getDouble(i: Int): Double = getAnyValAs[Double](i)
 
@@ -530,7 +530,7 @@ trait Row extends Serializable {
    *
    * @throws UnsupportedOperationException when schema is not defined.
    * @throws ClassCastException when data type does not match.
-   * @throws NullPointerException when value is null.
+   * @throws org.apache.spark.SparkRuntimeException when value is null.
    */
   private def getAnyValAs[T <: AnyVal](i: Int): T =
     if (isNullAt(i)) throw DataTypeErrors.valueIsNullError(i)

diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala
@@ -272,8 +272,8 @@ private[sql] object DataTypeErrors extends DataTypeErrorsBase {
   }
 
   def valueIsNullError(index: Int): Throwable = {
-    new SparkException(
-      errorClass = "_LEGACY_ERROR_TEMP_2232",
+    new SparkRuntimeException(
+      errorClass = "ROW_VALUE_IS_NULL",
       messageParameters = Map(
         "index" -> index.toString),
       cause = null)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala
@@ -24,7 +24,7 @@ import org.scalatest.funspec.AnyFunSpec
 import org.scalatest.matchers.must.Matchers
 import org.scalatest.matchers.should.Matchers._
 
-import org.apache.spark.{SparkException, SparkIllegalArgumentException, SparkUnsupportedOperationException}
+import org.apache.spark.{SparkIllegalArgumentException, SparkRuntimeException, SparkUnsupportedOperationException}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{GenericRow, GenericRowWithSchema}
 import org.apache.spark.sql.types._
@@ -87,8 +87,9 @@ class RowTest extends AnyFunSpec with Matchers {
       sampleRowWithoutCol3.getValuesMap[String](List("col1", "col2")) shouldBe expected
     }
 
-    it("getAs() on type extending AnyVal throws an exception when accessing field that is null") {
-      intercept[SparkException] {
+    it("getAnyValAs() on type extending AnyVal throws an exception when accessing " +
+      "field that is null") {
+      intercept[SparkRuntimeException] {
         sampleRowWithoutCol3.getInt(sampleRowWithoutCol3.fieldIndex("col3"))
       }
     }

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql
 
-import org.apache.spark.{SparkFunSuite, SparkUnsupportedOperationException}
+import org.apache.spark.{SparkFunSuite, SparkRuntimeException, SparkUnsupportedOperationException}
 import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, SpecificInternalRow}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
@@ -123,4 +123,17 @@ class RowSuite extends SparkFunSuite with SharedSparkSession {
       parameters = Map("methodName" -> "fieldIndex", "className" -> "Row", "fieldName" -> "`foo`")
     )
   }
+
+  test("SPARK-42307: get a value from a null column should result in error") {
+    val position = 0
+    val rowWithNullValue = Row.fromSeq(Seq(null))
+
+    checkError(
+      exception = intercept[SparkRuntimeException] {
+        rowWithNullValue.getLong(position)
+      },
+      errorClass = "ROW_VALUE_IS_NULL",
+      parameters = Map("index" -> position.toString)
+    )
+  }
 }