Skip to content

Commit

Permalink
[FEA] Add an environment variable to fail on fallback in cudf.pandas (
Browse files Browse the repository at this point in the history
#16562)

This PR makes more on #14975 by adding an environment variable that fails when fallback occurs in cudf.pandas. It also adds some tests that do __not__ fallback.

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #16562
  • Loading branch information
Matt711 authored Sep 25, 2024
1 parent 03c77c2 commit dbe5528
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 1 deletion.
10 changes: 10 additions & 0 deletions python/cudf/cudf/pandas/fast_slow_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -881,6 +881,12 @@ def _assert_fast_slow_eq(left, right):
assert_eq(left, right)


class ProxyFallbackError(Exception):
"""Raised when fallback occurs"""

pass


def _fast_function_call():
"""
Placeholder fast function for pytest profiling purposes.
Expand Down Expand Up @@ -957,6 +963,10 @@ def _fast_slow_function_call(
f"The exception was {e}."
)
except Exception as err:
if _env_get_bool("CUDF_PANDAS_FAIL_ON_FALLBACK", False):
raise ProxyFallbackError(
f"The operation failed with cuDF, the reason was {type(err)}: {err}."
) from err
with nvtx.annotate(
"EXECUTE_SLOW",
color=_CUDF_PANDAS_NVTX_COLORS["EXECUTE_SLOW"],
Expand Down
16 changes: 15 additions & 1 deletion python/cudf/cudf_pandas_tests/test_cudf_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@

from cudf.core._compat import PANDAS_GE_220
from cudf.pandas import LOADED, Profiler
from cudf.pandas.fast_slow_proxy import _Unusable, is_proxy_object
from cudf.pandas.fast_slow_proxy import (
ProxyFallbackError,
_Unusable,
is_proxy_object,
)
from cudf.testing import assert_eq

if not LOADED:
Expand Down Expand Up @@ -1738,3 +1742,13 @@ def add_one_ufunc(a):
return a + 1

assert_eq(cp.asarray(add_one_ufunc(arr1)), cp.asarray(add_one_ufunc(arr2)))


@pytest.mark.xfail(
reason="Fallback expected because casting to object is not supported",
)
def test_fallback_raises_error(monkeypatch):
with monkeypatch.context() as monkeycontext:
monkeycontext.setenv("CUDF_PANDAS_FAIL_ON_FALLBACK", "True")
with pytest.raises(ProxyFallbackError):
pd.Series(range(2)).astype(object)
100 changes: 100 additions & 0 deletions python/cudf/cudf_pandas_tests/test_cudf_pandas_no_fallback.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import pytest

from cudf.pandas import LOADED

if not LOADED:
raise ImportError("These tests must be run with cudf.pandas loaded")

import numpy as np
import pandas as pd


@pytest.fixture(autouse=True)
def fail_on_fallback(monkeypatch):
monkeypatch.setenv("CUDF_PANDAS_FAIL_ON_FALLBACK", "True")


@pytest.fixture
def dataframe():
df = pd.DataFrame(
{
"a": [1, 1, 1, 2, 3],
"b": [1, 2, 3, 4, 5],
"c": [1.2, 1.3, 1.5, 1.7, 1.11],
}
)
return df


@pytest.fixture
def series(dataframe):
return dataframe["a"]


@pytest.fixture
def array(series):
return series.values


@pytest.mark.parametrize(
"op",
[
"sum",
"min",
"max",
"mean",
"std",
"var",
"prod",
"median",
],
)
def test_no_fallback_in_reduction_ops(series, op):
s = series
getattr(s, op)()


def test_groupby(dataframe):
df = dataframe
df.groupby("a", sort=True).max()


def test_no_fallback_in_binops(dataframe):
df = dataframe
df + df
df - df
df * df
df**df
df[["a", "b"]] & df[["a", "b"]]
df <= df


def test_no_fallback_in_groupby_rolling_sum(dataframe):
df = dataframe
df.groupby("a").rolling(2).sum()


def test_no_fallback_in_concat(dataframe):
df = dataframe
pd.concat([df, df])


def test_no_fallback_in_get_shape(dataframe):
df = dataframe
df.shape


def test_no_fallback_in_array_ufunc_op(array):
np.add(array, array)


def test_no_fallback_in_merge(dataframe):
df = dataframe
pd.merge(df * df, df + df, how="inner")
pd.merge(df * df, df + df, how="outer")
pd.merge(df * df, df + df, how="left")
pd.merge(df * df, df + df, how="right")

0 comments on commit dbe5528

Please sign in to comment.