[SPARK-47454][PYTHON][CONNECT][TESTS] Split `pyspark.sql.tests.test_d…

…ataframe` ### What changes were proposed in this pull request? Split `pyspark.sql.tests.test_dataframe` ### Why are the changes needed? for testing parallelism ### Does this PR introduce _any_ user-facing change? no, test only ### How was this patch tested? updated ci ### Was this patch authored or co-authored using generative AI tooling? no Closes apache#45580 from zhengruifeng/break_test_df. Authored-by: Ruifeng Zheng <[email protected]> Signed-off-by: Hyukjin Kwon <[email protected]>
MaxGekk · Mar 19, 2024 · b6a8369 · b6a8369
1 parent ef94f70
commit b6a8369
Show file tree

Hide file tree

Showing 8 changed files with 1,091 additions and 863 deletions.
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
@@ -509,6 +509,9 @@ def __hash__(self):
         "pyspark.sql.tests.test_conf",
         "pyspark.sql.tests.test_context",
         "pyspark.sql.tests.test_dataframe",
+        "pyspark.sql.tests.test_listener",
+        "pyspark.sql.tests.test_observation",
+        "pyspark.sql.tests.test_stat",
         "pyspark.sql.tests.test_datasources",
         "pyspark.sql.tests.test_errors",
         "pyspark.sql.tests.test_functions",
@@ -1019,6 +1022,8 @@ def __hash__(self):
         "pyspark.sql.tests.connect.test_parity_functions",
         "pyspark.sql.tests.connect.test_parity_group",
         "pyspark.sql.tests.connect.test_parity_dataframe",
+        "pyspark.sql.tests.connect.test_parity_observation",
+        "pyspark.sql.tests.connect.test_parity_stat",
         "pyspark.sql.tests.connect.test_parity_types",
         "pyspark.sql.tests.connect.test_parity_column",
         "pyspark.sql.tests.connect.test_parity_readwriter",

diff --git a/python/pyspark/sql/tests/connect/test_parity_dataframe.py b/python/pyspark/sql/tests/connect/test_parity_dataframe.py
@@ -26,11 +26,6 @@ class DataFrameParityTests(DataFrameTestsMixin, ReusedConnectTestCase):
     def test_help_command(self):
         super().test_help_command()
 
-    # TODO(SPARK-41625): Support Structured Streaming
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_observe_str(self):
-        super().test_observe_str()
-
     # Spark Connect throws `IllegalArgumentException` when calling `collect` instead of `sample`.
     def test_sample(self):
         super().test_sample()

diff --git a/python/pyspark/sql/tests/connect/test_parity_observation.py b/python/pyspark/sql/tests/connect/test_parity_observation.py
@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.sql.tests.test_observation import DataFrameObservationTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class DataFrameObservationParityTests(
+    DataFrameObservationTestsMixin,
+    ReusedConnectTestCase,
+):
+    # TODO(SPARK-41625): Support Structured Streaming
+    @unittest.skip("Fails in Spark Connect, should enable.")
+    def test_observe_str(self):
+        super().test_observe_str()
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.sql.tests.connect.test_parity_observation import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_stat.py b/python/pyspark/sql/tests/connect/test_parity_stat.py
@@ -0,0 +1,39 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark.sql.tests.test_stat import DataFrameStatTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class DataFrameStatParityTests(
+    DataFrameStatTestsMixin,
+    ReusedConnectTestCase,
+):
+    pass
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.sql.tests.connect.test_parity_stat import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)