diff --git a/python/docs/source/migration_guide/pyspark_3.3_to_3.4.rst b/python/docs/source/migration_guide/pyspark_3.3_to_3.4.rst index e6f89b72c63dd..9f8cf545e28c2 100644 --- a/python/docs/source/migration_guide/pyspark_3.3_to_3.4.rst +++ b/python/docs/source/migration_guide/pyspark_3.3_to_3.4.rst @@ -20,4 +20,4 @@ Upgrading from PySpark 3.3 to 3.4 ================================= -* In Spark 3.4, the schema of an array column is inferred by merging the schemas of all elements in the array. To restore the previous behavior where the schema is only inferred from the first element, set the spark configuration as follows: ``{"spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled": True}``. \ No newline at end of file +* In Spark 3.4, the schema of an array column is inferred by merging the schemas of all elements in the array. To restore the previous behavior where the schema is only inferred from the first element, you can set ``spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled`` to ``true``. \ No newline at end of file diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py index 4307dd4992844..ef0ad82dbb97a 100644 --- a/python/pyspark/sql/tests/test_types.py +++ b/python/pyspark/sql/tests/test_types.py @@ -291,8 +291,8 @@ def test_infer_array_merge_element_types(self): data = [ArrayRow([1, None], [None, 2])] - nestedRdd = self.sc.parallelize(data) - df = self.spark.createDataFrame(nestedRdd) + rdd = self.sc.parallelize(data) + df = self.spark.createDataFrame(rdd) self.assertEqual(Row(f1=[1, None], f2=[None, 2]), df.first()) df = self.spark.createDataFrame(data) @@ -323,8 +323,8 @@ def test_infer_array_element_type_empty(self): data = [ArrayRow([]), ArrayRow([None]), ArrayRow([1])] - nestedRdd = self.sc.parallelize(data) - df = self.spark.createDataFrame(nestedRdd) + rdd = self.sc.parallelize(data) + df = self.spark.createDataFrame(rdd) rows = df.collect() self.assertEqual(Row(f1=[]), rows[0]) self.assertEqual(Row(f1=[None]), rows[1]) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 3b6d21da9e49a..6dfc46f6a3fd4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -3763,7 +3763,7 @@ object SQLConf { .doc("PySpark's SparkSession.createDataFrame infers the element type of an array from all " + "values in the array by default. If this config is set to true, it restores the legacy " + "behavior of only inferring the type from the first array element.") - .version("3.3.0") + .version("3.4.0") .booleanConf .createWithDefault(false)