diff --git a/python/pyspark/pandas/tests/indexes/test_category.py b/python/pyspark/pandas/tests/indexes/test_category.py index 6aa92b7e1e390..d2405f6adb301 100644 --- a/python/pyspark/pandas/tests/indexes/test_category.py +++ b/python/pyspark/pandas/tests/indexes/test_category.py @@ -75,10 +75,6 @@ def test_categorical_index(self): ): ps.CategoricalIndex([1, 2, 3]).all() - @unittest.skipIf( - LooseVersion(pd.__version__) >= LooseVersion("2.0.0"), - "TODO(SPARK-43568): Enable CategoricalIndexTests.test_categories_setter for pandas 2.0.0.", - ) def test_categories_setter(self): pdf = pd.DataFrame( { @@ -92,20 +88,10 @@ def test_categories_setter(self): pidx = pdf.index psidx = psdf.index - pidx.categories = ["z", "y", "x"] - psidx.categories = ["z", "y", "x"] - # Pandas deprecated all the in-place category-setting behaviors, dtypes also not be - # refreshed in categories.setter since Pandas 1.4+, we should also consider to clean up - # this test when in-place category-setting removed: - # https://github.com/pandas-dev/pandas/issues/46820 - if LooseVersion("1.4") >= LooseVersion(pd.__version__) >= LooseVersion("1.1"): - self.assert_eq(pidx, psidx) - self.assert_eq(pdf, psdf) - else: - pidx = pidx.set_categories(pidx.categories) - pdf.index = pidx - self.assert_eq(pidx, psidx) - self.assert_eq(pdf, psdf) + pidx = pidx.rename_categories(["z", "y", "x"]) + psidx = psidx.rename_categories(["z", "y", "x"]) + self.assert_eq(pidx, psidx) + self.assert_eq(pdf, psdf) with self.assertRaises(ValueError): psidx.categories = [1, 2, 3, 4] @@ -122,10 +108,6 @@ def test_add_categories(self): self.assertRaises(ValueError, lambda: psidx.add_categories(3)) self.assertRaises(ValueError, lambda: psidx.add_categories([4, 4])) - @unittest.skipIf( - LooseVersion(pd.__version__) >= LooseVersion("2.0.0"), - "TODO(SPARK-43633): Enable CategoricalIndexTests.test_remove_categories for pandas 2.0.0.", - ) def test_remove_categories(self): pidx = pd.CategoricalIndex([1, 2, 3], categories=[3, 2, 1]) psidx = ps.from_pandas(pidx) diff --git a/python/pyspark/pandas/tests/test_categorical.py b/python/pyspark/pandas/tests/test_categorical.py index c45e063d6f466..ba361ab565c77 100644 --- a/python/pyspark/pandas/tests/test_categorical.py +++ b/python/pyspark/pandas/tests/test_categorical.py @@ -198,10 +198,6 @@ def test_astype(self): self.assert_eq(pscser.astype(str), pcser.astype(str)) - @unittest.skipIf( - LooseVersion(pd.__version__) >= LooseVersion("2.0.0"), - "TODO(SPARK-43564): Enable CategoricalTests.test_factorize for pandas 2.0.0.", - ) def test_factorize(self): pser = pd.Series(["a", "b", "c", None], dtype=CategoricalDtype(["c", "a", "d", "b"])) psser = ps.from_pandas(pser) @@ -212,8 +208,8 @@ def test_factorize(self): self.assert_eq(kcodes.tolist(), pcodes.tolist()) self.assert_eq(kuniques, puniques) - pcodes, puniques = pser.factorize(na_sentinel=-2) - kcodes, kuniques = psser.factorize(na_sentinel=-2) + pcodes, puniques = pser.factorize(use_na_sentinel=-2) + kcodes, kuniques = psser.factorize(use_na_sentinel=-2) self.assert_eq(kcodes.tolist(), pcodes.tolist()) self.assert_eq(kuniques, puniques) @@ -345,11 +341,6 @@ def test_groupby_apply(self): # psdf.groupby("a").apply(len).sort_index(), pdf.groupby("a").apply(len).sort_index(), # ) - @unittest.skipIf( - LooseVersion(pd.__version__) >= LooseVersion("2.0.0"), - "TODO(SPARK-43813): Enable CategoricalTests.test_groupby_apply_without_shortcut " - "for pandas 2.0.0.", - ) def test_groupby_apply_without_shortcut(self): with ps.option_context("compute.shortcut_limit", 0): self.test_groupby_apply() @@ -360,8 +351,8 @@ def identity(df) -> ps.DataFrame[zip(psdf.columns, psdf.dtypes)]: return df self.assert_eq( - psdf.groupby("a").apply(identity).sort_values(["a", "b"]).reset_index(drop=True), - pdf.groupby("a").apply(identity).sort_values(["a", "b"]).reset_index(drop=True), + psdf.groupby("a").apply(identity).sort_values(["b"]).reset_index(drop=True), + pdf.groupby("a").apply(identity).sort_values(["b"]).reset_index(drop=True), ) def test_groupby_transform(self):