diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py index 00bd1d9a6f834..ad458988d4fd1 100644 --- a/python/pyspark/sql/tests/test_types.py +++ b/python/pyspark/sql/tests/test_types.py @@ -1275,6 +1275,13 @@ def test_yearmonth_interval_type(self): schema3 = self.spark.sql("SELECT INTERVAL '8' MONTH AS interval").schema self.assertEqual(schema3.fields[0].dataType, YearMonthIntervalType(1, 1)) + def test_infer_array_element_type_with_struct(self): + # SPARK-48248: Nested array to respect legacy conf of inferArrayTypeFromFirstElement + with self.sql_conf( + {"spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled": True} + ): + self.assertEqual([[1, None]], self.spark.createDataFrame([[[[1, "a"]]]]).first()[0]) + class DataTypeTests(unittest.TestCase): # regression test for SPARK-6055 diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 24964c56e2e89..a2a8796957623 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -1606,13 +1606,27 @@ def _infer_type( if len(obj) > 0: if infer_array_from_first_element: return ArrayType( - _infer_type(obj[0], infer_dict_as_struct, prefer_timestamp_ntz), True + _infer_type( + obj[0], + infer_dict_as_struct, + infer_array_from_first_element, + prefer_timestamp_ntz, + ), + True, ) else: return ArrayType( reduce( _merge_type, - (_infer_type(v, infer_dict_as_struct, prefer_timestamp_ntz) for v in obj), + ( + _infer_type( + v, + infer_dict_as_struct, + infer_array_from_first_element, + prefer_timestamp_ntz, + ) + for v in obj + ), ), True, )