[SPARK-35098][PYTHON] Re-enable pandas-on-Spark test cases

### What changes were proposed in this pull request? Re-enable some pandas-on-Spark test cases. ### Why are the changes needed? pandas version in GitHub Actions is upgraded now so we can re-enable some pandas-on-Spark test cases. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit tests. Closes #32682 from xinrong-databricks/enable_tests. Authored-by: Xinrong Meng <[email protected]> Signed-off-by: Hyukjin Kwon <[email protected]>
apache · May 27, 2021 · 79a2a46 · 79a2a46
1 parent d6d3209
commit 79a2a46
Show file tree

Hide file tree

Showing 2 changed files with 34 additions and 36 deletions.
diff --git a/python/pyspark/pandas/tests/indexes/test_base.py b/python/pyspark/pandas/tests/indexes/test_base.py
@@ -1253,40 +1253,39 @@ def test_monotonic(self):
             self.assert_eq(psmidx.is_monotonic_increasing, False)
             self.assert_eq(psmidx.is_monotonic_decreasing, False)
 
-        # Disable the test cases below because pandas returns `True` or `False` randomly.
-        # else:
-        #     [(-5, None), (-4, None), (-3, None), (-2, None), (-1, None)]
-        #     psdf = ps.DataFrame({"a": [-5, -4, -3, -2, -1], "b": [1, 1, 1, 1, 1]})
-        #     psdf["b"] = None
-        #     psmidx = psdf.set_index(["a", "b"]).index
-        #     pmidx = psmidx.to_pandas()
-        #     self.assert_eq(psmidx.is_monotonic_increasing, pmidx.is_monotonic_increasing)
-        #     self.assert_eq(psmidx.is_monotonic_decreasing, pmidx.is_monotonic_decreasing)
-
-        #     [(None, "e"), (None, "c"), (None, "b"), (None, "d"), (None, "a")]
-        #     psdf = ps.DataFrame({"a": [1, 1, 1, 1, 1], "b": ["e", "c", "b", "d", "a"]})
-        #     psdf["a"] = None
-        #     psmidx = psdf.set_index(["a", "b"]).index
-        #     pmidx = psmidx.to_pandas()
-        #     self.assert_eq(psmidx.is_monotonic_increasing, pmidx.is_monotonic_increasing)
-        #     self.assert_eq(psmidx.is_monotonic_decreasing, pmidx.is_monotonic_decreasing)
-
-        #     [(None, None), (None, None), (None, None), (None, None), (None, None)]
-        #     psdf = ps.DataFrame({"a": [1, 1, 1, 1, 1], "b": [1, 1, 1, 1, 1]})
-        #     psdf["a"] = None
-        #     psdf["b"] = None
-        #     psmidx = psdf.set_index(["a", "b"]).index
-        #     pmidx = psmidx.to_pandas()
-        #     self.assert_eq(psmidx.is_monotonic_increasing, pmidx.is_monotonic_increasing)
-        #     self.assert_eq(psmidx.is_monotonic_decreasing, pmidx.is_monotonic_decreasing)
-        #     [(None, None)]
-        #     psdf = ps.DataFrame({"a": [1], "b": [1]})
-        #     psdf["a"] = None
-        #     psdf["b"] = None
-        #     psmidx = psdf.set_index(["a", "b"]).index
-        #     pmidx = psmidx.to_pandas()
-        #     self.assert_eq(psmidx.is_monotonic_increasing, pmidx.is_monotonic_increasing)
-        #     self.assert_eq(psmidx.is_monotonic_decreasing, pmidx.is_monotonic_decreasing)
+        else:
+            [(-5, None), (-4, None), (-3, None), (-2, None), (-1, None)]
+            psdf = ps.DataFrame({"a": [-5, -4, -3, -2, -1], "b": [1, 1, 1, 1, 1]})
+            psdf["b"] = None
+            psmidx = psdf.set_index(["a", "b"]).index
+            pmidx = psmidx.to_pandas()
+            self.assert_eq(psmidx.is_monotonic_increasing, pmidx.is_monotonic_increasing)
+            self.assert_eq(psmidx.is_monotonic_decreasing, pmidx.is_monotonic_decreasing)
+
+            [(None, "e"), (None, "c"), (None, "b"), (None, "d"), (None, "a")]
+            psdf = ps.DataFrame({"a": [1, 1, 1, 1, 1], "b": ["e", "c", "b", "d", "a"]})
+            psdf["a"] = None
+            psmidx = psdf.set_index(["a", "b"]).index
+            pmidx = psmidx.to_pandas()
+            self.assert_eq(psmidx.is_monotonic_increasing, pmidx.is_monotonic_increasing)
+            self.assert_eq(psmidx.is_monotonic_decreasing, pmidx.is_monotonic_decreasing)
+
+            [(None, None), (None, None), (None, None), (None, None), (None, None)]
+            psdf = ps.DataFrame({"a": [1, 1, 1, 1, 1], "b": [1, 1, 1, 1, 1]})
+            psdf["a"] = None
+            psdf["b"] = None
+            psmidx = psdf.set_index(["a", "b"]).index
+            pmidx = psmidx.to_pandas()
+            self.assert_eq(psmidx.is_monotonic_increasing, pmidx.is_monotonic_increasing)
+            self.assert_eq(psmidx.is_monotonic_decreasing, pmidx.is_monotonic_decreasing)
+            [(None, None)]
+            psdf = ps.DataFrame({"a": [1], "b": [1]})
+            psdf["a"] = None
+            psdf["b"] = None
+            psmidx = psdf.set_index(["a", "b"]).index
+            pmidx = psmidx.to_pandas()
+            self.assert_eq(psmidx.is_monotonic_increasing, pmidx.is_monotonic_increasing)
+            self.assert_eq(psmidx.is_monotonic_decreasing, pmidx.is_monotonic_decreasing)
 
     def test_difference(self):
         # Index

diff --git a/python/pyspark/pandas/tests/test_series.py b/python/pyspark/pandas/tests/test_series.py
@@ -1561,8 +1561,7 @@ def test_astype(self):
         psser = ps.Series(pser)
 
         self.assert_eq(psser.astype(bool), pser.astype(bool))
-        # Comment out the below test cause because pandas returns `None` or `nan` randomly
-        # self.assert_eq(psser.astype(str), pser.astype(str))
+        self.assert_eq(psser.astype(str), pser.astype(str))
 
         if extension_object_dtypes_available:
             from pandas import BooleanDtype, StringDtype