Skip to content

Commit

Permalink
[SPARK-47454][PYTHON][CONNECT][TESTS] Split `pyspark.sql.tests.test_d…
Browse files Browse the repository at this point in the history
…ataframe`

### What changes were proposed in this pull request?
Split `pyspark.sql.tests.test_dataframe`

### Why are the changes needed?
for testing parallelism

### Does this PR introduce _any_ user-facing change?
no, test only

### How was this patch tested?
updated ci

### Was this patch authored or co-authored using generative AI tooling?
no

Closes apache#45580 from zhengruifeng/break_test_df.

Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
  • Loading branch information
zhengruifeng authored and HyukjinKwon committed Mar 19, 2024
1 parent ef94f70 commit b6a8369
Show file tree
Hide file tree
Showing 8 changed files with 1,091 additions and 863 deletions.
5 changes: 5 additions & 0 deletions dev/sparktestsupport/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,9 @@ def __hash__(self):
"pyspark.sql.tests.test_conf",
"pyspark.sql.tests.test_context",
"pyspark.sql.tests.test_dataframe",
"pyspark.sql.tests.test_listener",
"pyspark.sql.tests.test_observation",
"pyspark.sql.tests.test_stat",
"pyspark.sql.tests.test_datasources",
"pyspark.sql.tests.test_errors",
"pyspark.sql.tests.test_functions",
Expand Down Expand Up @@ -1019,6 +1022,8 @@ def __hash__(self):
"pyspark.sql.tests.connect.test_parity_functions",
"pyspark.sql.tests.connect.test_parity_group",
"pyspark.sql.tests.connect.test_parity_dataframe",
"pyspark.sql.tests.connect.test_parity_observation",
"pyspark.sql.tests.connect.test_parity_stat",
"pyspark.sql.tests.connect.test_parity_types",
"pyspark.sql.tests.connect.test_parity_column",
"pyspark.sql.tests.connect.test_parity_readwriter",
Expand Down
5 changes: 0 additions & 5 deletions python/pyspark/sql/tests/connect/test_parity_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,6 @@ class DataFrameParityTests(DataFrameTestsMixin, ReusedConnectTestCase):
def test_help_command(self):
super().test_help_command()

# TODO(SPARK-41625): Support Structured Streaming
@unittest.skip("Fails in Spark Connect, should enable.")
def test_observe_str(self):
super().test_observe_str()

# Spark Connect throws `IllegalArgumentException` when calling `collect` instead of `sample`.
def test_sample(self):
super().test_sample()
Expand Down
44 changes: 44 additions & 0 deletions python/pyspark/sql/tests/connect/test_parity_observation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import unittest

from pyspark.sql.tests.test_observation import DataFrameObservationTestsMixin
from pyspark.testing.connectutils import ReusedConnectTestCase


class DataFrameObservationParityTests(
DataFrameObservationTestsMixin,
ReusedConnectTestCase,
):
# TODO(SPARK-41625): Support Structured Streaming
@unittest.skip("Fails in Spark Connect, should enable.")
def test_observe_str(self):
super().test_observe_str()


if __name__ == "__main__":
import unittest
from pyspark.sql.tests.connect.test_parity_observation import * # noqa: F401

try:
import xmlrunner # type: ignore[import]

testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
except ImportError:
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
39 changes: 39 additions & 0 deletions python/pyspark/sql/tests/connect/test_parity_stat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from pyspark.sql.tests.test_stat import DataFrameStatTestsMixin
from pyspark.testing.connectutils import ReusedConnectTestCase


class DataFrameStatParityTests(
DataFrameStatTestsMixin,
ReusedConnectTestCase,
):
pass


if __name__ == "__main__":
import unittest
from pyspark.sql.tests.connect.test_parity_stat import * # noqa: F401

try:
import xmlrunner # type: ignore[import]

testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
except ImportError:
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
Loading

0 comments on commit b6a8369

Please sign in to comment.