Skip to content

Commit

Permalink
[SPARK-35098][PYTHON] Re-enable pandas-on-Spark test cases
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?

Re-enable some pandas-on-Spark test cases.

### Why are the changes needed?

pandas version in GitHub Actions is upgraded now so we can re-enable  some pandas-on-Spark test cases.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit tests.

Closes #32682 from xinrong-databricks/enable_tests.

Authored-by: Xinrong Meng <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
  • Loading branch information
xinrong-meng authored and HyukjinKwon committed May 27, 2021
1 parent d6d3209 commit 79a2a46
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 36 deletions.
67 changes: 33 additions & 34 deletions python/pyspark/pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1253,40 +1253,39 @@ def test_monotonic(self):
self.assert_eq(psmidx.is_monotonic_increasing, False)
self.assert_eq(psmidx.is_monotonic_decreasing, False)

# Disable the test cases below because pandas returns `True` or `False` randomly.
# else:
# [(-5, None), (-4, None), (-3, None), (-2, None), (-1, None)]
# psdf = ps.DataFrame({"a": [-5, -4, -3, -2, -1], "b": [1, 1, 1, 1, 1]})
# psdf["b"] = None
# psmidx = psdf.set_index(["a", "b"]).index
# pmidx = psmidx.to_pandas()
# self.assert_eq(psmidx.is_monotonic_increasing, pmidx.is_monotonic_increasing)
# self.assert_eq(psmidx.is_monotonic_decreasing, pmidx.is_monotonic_decreasing)

# [(None, "e"), (None, "c"), (None, "b"), (None, "d"), (None, "a")]
# psdf = ps.DataFrame({"a": [1, 1, 1, 1, 1], "b": ["e", "c", "b", "d", "a"]})
# psdf["a"] = None
# psmidx = psdf.set_index(["a", "b"]).index
# pmidx = psmidx.to_pandas()
# self.assert_eq(psmidx.is_monotonic_increasing, pmidx.is_monotonic_increasing)
# self.assert_eq(psmidx.is_monotonic_decreasing, pmidx.is_monotonic_decreasing)

# [(None, None), (None, None), (None, None), (None, None), (None, None)]
# psdf = ps.DataFrame({"a": [1, 1, 1, 1, 1], "b": [1, 1, 1, 1, 1]})
# psdf["a"] = None
# psdf["b"] = None
# psmidx = psdf.set_index(["a", "b"]).index
# pmidx = psmidx.to_pandas()
# self.assert_eq(psmidx.is_monotonic_increasing, pmidx.is_monotonic_increasing)
# self.assert_eq(psmidx.is_monotonic_decreasing, pmidx.is_monotonic_decreasing)
# [(None, None)]
# psdf = ps.DataFrame({"a": [1], "b": [1]})
# psdf["a"] = None
# psdf["b"] = None
# psmidx = psdf.set_index(["a", "b"]).index
# pmidx = psmidx.to_pandas()
# self.assert_eq(psmidx.is_monotonic_increasing, pmidx.is_monotonic_increasing)
# self.assert_eq(psmidx.is_monotonic_decreasing, pmidx.is_monotonic_decreasing)
else:
[(-5, None), (-4, None), (-3, None), (-2, None), (-1, None)]
psdf = ps.DataFrame({"a": [-5, -4, -3, -2, -1], "b": [1, 1, 1, 1, 1]})
psdf["b"] = None
psmidx = psdf.set_index(["a", "b"]).index
pmidx = psmidx.to_pandas()
self.assert_eq(psmidx.is_monotonic_increasing, pmidx.is_monotonic_increasing)
self.assert_eq(psmidx.is_monotonic_decreasing, pmidx.is_monotonic_decreasing)

[(None, "e"), (None, "c"), (None, "b"), (None, "d"), (None, "a")]
psdf = ps.DataFrame({"a": [1, 1, 1, 1, 1], "b": ["e", "c", "b", "d", "a"]})
psdf["a"] = None
psmidx = psdf.set_index(["a", "b"]).index
pmidx = psmidx.to_pandas()
self.assert_eq(psmidx.is_monotonic_increasing, pmidx.is_monotonic_increasing)
self.assert_eq(psmidx.is_monotonic_decreasing, pmidx.is_monotonic_decreasing)

[(None, None), (None, None), (None, None), (None, None), (None, None)]
psdf = ps.DataFrame({"a": [1, 1, 1, 1, 1], "b": [1, 1, 1, 1, 1]})
psdf["a"] = None
psdf["b"] = None
psmidx = psdf.set_index(["a", "b"]).index
pmidx = psmidx.to_pandas()
self.assert_eq(psmidx.is_monotonic_increasing, pmidx.is_monotonic_increasing)
self.assert_eq(psmidx.is_monotonic_decreasing, pmidx.is_monotonic_decreasing)
[(None, None)]
psdf = ps.DataFrame({"a": [1], "b": [1]})
psdf["a"] = None
psdf["b"] = None
psmidx = psdf.set_index(["a", "b"]).index
pmidx = psmidx.to_pandas()
self.assert_eq(psmidx.is_monotonic_increasing, pmidx.is_monotonic_increasing)
self.assert_eq(psmidx.is_monotonic_decreasing, pmidx.is_monotonic_decreasing)

def test_difference(self):
# Index
Expand Down
3 changes: 1 addition & 2 deletions python/pyspark/pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1561,8 +1561,7 @@ def test_astype(self):
psser = ps.Series(pser)

self.assert_eq(psser.astype(bool), pser.astype(bool))
# Comment out the below test cause because pandas returns `None` or `nan` randomly
# self.assert_eq(psser.astype(str), pser.astype(str))
self.assert_eq(psser.astype(str), pser.astype(str))

if extension_object_dtypes_available:
from pandas import BooleanDtype, StringDtype
Expand Down

0 comments on commit 79a2a46

Please sign in to comment.