[FEA] Add an environment variable to fail on fallback in cudf.pandas (

#16562) This PR makes more on #14975 by adding an environment variable that fails when fallback occurs in cudf.pandas. It also adds some tests that do __not__ fallback. Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: #16562
rapidsai · Sep 25, 2024 · dbe5528 · dbe5528
1 parent 03c77c2
commit dbe5528
Show file tree

Hide file tree

Showing 3 changed files with 125 additions and 1 deletion.
diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py
@@ -881,6 +881,12 @@ def _assert_fast_slow_eq(left, right):
         assert_eq(left, right)
 
 
+class ProxyFallbackError(Exception):
+    """Raised when fallback occurs"""
+
+    pass
+
+
 def _fast_function_call():
     """
     Placeholder fast function for pytest profiling purposes.
@@ -957,6 +963,10 @@ def _fast_slow_function_call(
                             f"The exception was {e}."
                         )
     except Exception as err:
+        if _env_get_bool("CUDF_PANDAS_FAIL_ON_FALLBACK", False):
+            raise ProxyFallbackError(
+                f"The operation failed with cuDF, the reason was {type(err)}: {err}."
+            ) from err
         with nvtx.annotate(
             "EXECUTE_SLOW",
             color=_CUDF_PANDAS_NVTX_COLORS["EXECUTE_SLOW"],

diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
@@ -26,7 +26,11 @@
 
 from cudf.core._compat import PANDAS_GE_220
 from cudf.pandas import LOADED, Profiler
-from cudf.pandas.fast_slow_proxy import _Unusable, is_proxy_object
+from cudf.pandas.fast_slow_proxy import (
+    ProxyFallbackError,
+    _Unusable,
+    is_proxy_object,
+)
 from cudf.testing import assert_eq
 
 if not LOADED:
@@ -1738,3 +1742,13 @@ def add_one_ufunc(a):
         return a + 1
 
     assert_eq(cp.asarray(add_one_ufunc(arr1)), cp.asarray(add_one_ufunc(arr2)))
+
+
+@pytest.mark.xfail(
+    reason="Fallback expected because casting to object is not supported",
+)
+def test_fallback_raises_error(monkeypatch):
+    with monkeypatch.context() as monkeycontext:
+        monkeycontext.setenv("CUDF_PANDAS_FAIL_ON_FALLBACK", "True")
+        with pytest.raises(ProxyFallbackError):
+            pd.Series(range(2)).astype(object)
diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas_no_fallback.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas_no_fallback.py
@@ -0,0 +1,100 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
+# All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+
+from cudf.pandas import LOADED
+
+if not LOADED:
+    raise ImportError("These tests must be run with cudf.pandas loaded")
+
+import numpy as np
+import pandas as pd
+
+
+@pytest.fixture(autouse=True)
+def fail_on_fallback(monkeypatch):
+    monkeypatch.setenv("CUDF_PANDAS_FAIL_ON_FALLBACK", "True")
+
+
+@pytest.fixture
+def dataframe():
+    df = pd.DataFrame(
+        {
+            "a": [1, 1, 1, 2, 3],
+            "b": [1, 2, 3, 4, 5],
+            "c": [1.2, 1.3, 1.5, 1.7, 1.11],
+        }
+    )
+    return df
+
+
+@pytest.fixture
+def series(dataframe):
+    return dataframe["a"]
+
+
+@pytest.fixture
+def array(series):
+    return series.values
+
+
+@pytest.mark.parametrize(
+    "op",
+    [
+        "sum",
+        "min",
+        "max",
+        "mean",
+        "std",
+        "var",
+        "prod",
+        "median",
+    ],
+)
+def test_no_fallback_in_reduction_ops(series, op):
+    s = series
+    getattr(s, op)()
+
+
+def test_groupby(dataframe):
+    df = dataframe
+    df.groupby("a", sort=True).max()
+
+
+def test_no_fallback_in_binops(dataframe):
+    df = dataframe
+    df + df
+    df - df
+    df * df
+    df**df
+    df[["a", "b"]] & df[["a", "b"]]
+    df <= df
+
+
+def test_no_fallback_in_groupby_rolling_sum(dataframe):
+    df = dataframe
+    df.groupby("a").rolling(2).sum()
+
+
+def test_no_fallback_in_concat(dataframe):
+    df = dataframe
+    pd.concat([df, df])
+
+
+def test_no_fallback_in_get_shape(dataframe):
+    df = dataframe
+    df.shape
+
+
+def test_no_fallback_in_array_ufunc_op(array):
+    np.add(array, array)
+
+
+def test_no_fallback_in_merge(dataframe):
+    df = dataframe
+    pd.merge(df * df, df + df, how="inner")
+    pd.merge(df * df, df + df, how="outer")
+    pd.merge(df * df, df + df, how="left")
+    pd.merge(df * df, df + df, how="right")