Skip to content

Commit

Permalink
PR feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
physinet committed May 17, 2022
1 parent a6b207e commit 7cde786
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 6 deletions.
2 changes: 1 addition & 1 deletion python/docs/source/migration_guide/pyspark_3.3_to_3.4.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@
Upgrading from PySpark 3.3 to 3.4
=================================

* In Spark 3.4, the schema of an array column is inferred by merging the schemas of all elements in the array. To restore the previous behavior where the schema is only inferred from the first element, set the spark configuration as follows: ``{"spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled": True}``.
* In Spark 3.4, the schema of an array column is inferred by merging the schemas of all elements in the array. To restore the previous behavior where the schema is only inferred from the first element, you can set ``spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled`` to ``true``.
8 changes: 4 additions & 4 deletions python/pyspark/sql/tests/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,8 +291,8 @@ def test_infer_array_merge_element_types(self):

data = [ArrayRow([1, None], [None, 2])]

nestedRdd = self.sc.parallelize(data)
df = self.spark.createDataFrame(nestedRdd)
rdd = self.sc.parallelize(data)
df = self.spark.createDataFrame(rdd)
self.assertEqual(Row(f1=[1, None], f2=[None, 2]), df.first())

df = self.spark.createDataFrame(data)
Expand Down Expand Up @@ -323,8 +323,8 @@ def test_infer_array_element_type_empty(self):

data = [ArrayRow([]), ArrayRow([None]), ArrayRow([1])]

nestedRdd = self.sc.parallelize(data)
df = self.spark.createDataFrame(nestedRdd)
rdd = self.sc.parallelize(data)
df = self.spark.createDataFrame(rdd)
rows = df.collect()
self.assertEqual(Row(f1=[]), rows[0])
self.assertEqual(Row(f1=[None]), rows[1])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3763,7 +3763,7 @@ object SQLConf {
.doc("PySpark's SparkSession.createDataFrame infers the element type of an array from all " +
"values in the array by default. If this config is set to true, it restores the legacy " +
"behavior of only inferring the type from the first array element.")
.version("3.3.0")
.version("3.4.0")
.booleanConf
.createWithDefault(false)

Expand Down

0 comments on commit 7cde786

Please sign in to comment.