Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Align TimedeltaIndex APIs with pandas 2.x #16368

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions python/cudf/cudf/core/column/timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,18 @@ def normalize_binop_value(self, other) -> ColumnBinaryOperand:
def time_unit(self) -> str:
return np.datetime_data(self.dtype)[0]

def total_seconds(self) -> ColumnBase:
raise NotImplementedError("total_seconds is currently not implemented")

def ceil(self, freq: str) -> ColumnBase:
raise NotImplementedError("ceil is currently not implemented")

def floor(self, freq: str) -> ColumnBase:
raise NotImplementedError("floor is currently not implemented")

def round(self, freq: str) -> ColumnBase:
raise NotImplementedError("round is currently not implemented")

def as_numerical_column(
self, dtype: Dtype
) -> "cudf.core.column.NumericalColumn":
Expand Down
92 changes: 92 additions & 0 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2538,6 +2538,98 @@ def __getitem__(self, index):
return pd.Timedelta(value)
return value

def as_unit(self, unit: str, round_ok: bool = True) -> Self:
"""
Convert to a dtype with the given unit resolution.

Currently not implemented.

Parameters
----------
unit : {'s', 'ms', 'us', 'ns'}
round_ok : bool, default True
If False and the conversion requires rounding, raise ValueError.
"""
raise NotImplementedError("as_unit is currently not implemented")

@property
def freq(self) -> cudf.DateOffset | None:
raise NotImplementedError("freq is currently not implemented")

@property
def freqstr(self) -> str:
raise NotImplementedError("freqstr is currently not implemented")

@property
def resolution(self) -> str:
"""
Returns day, hour, minute, second, millisecond or microsecond
"""
raise NotImplementedError("resolution is currently not implemented")

@property
def unit(self) -> str:
return self._column.time_unit

def to_pytimedelta(self) -> np.ndarray:
"""
Return an ndarray of ``datetime.timedelta`` objects.

Returns
-------
numpy.ndarray
An ndarray of ``datetime.timedelta`` objects.
"""
return self.to_pandas().to_pytimedelta()

@property
def asi8(self) -> cupy.ndarray:
return self._column.astype("int64").values

def sum(self, *, skipna: bool = True, axis: int | None = 0):
return self._column.sum(skipna=skipna)

def mean(self, *, skipna: bool = True, axis: int | None = 0):
return self._column.mean(skipna=skipna)

def median(self, *, skipna: bool = True, axis: int | None = 0):
return self._column.median(skipna=skipna)

def std(self, *, skipna: bool = True, axis: int | None = 0, ddof: int = 1):
return self._column.std(skipna=skipna, ddof=ddof)

def total_seconds(self) -> cupy.ndarray:
"""
Return total duration of each element expressed in seconds.

This method is currently not implemented.
"""
return self._column.total_seconds().values

def ceil(self, freq: str) -> Self:
"""
Ceil to the specified resolution.

This method is currently not implemented.
"""
return type(self)._from_data({self.name: self._column.ceil(freq)})

def floor(self, freq: str) -> Self:
"""
Floor to the specified resolution.

This method is currently not implemented.
"""
return type(self)._from_data({self.name: self._column.floor(freq)})

def round(self, freq: str) -> Self:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we have some tests to cover these newly added APIs?

Copy link
Contributor Author

@mroeschke mroeschke Jul 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure added tests for APIs that don't raise a NotImplementedError

"""
Round to the specified resolution.

This method is currently not implemented.
"""
return type(self)._from_data({self.name: self._column.round(freq)})

@property # type: ignore
@_performance_tracking
def days(self):
Expand Down
39 changes: 39 additions & 0 deletions python/cudf/cudf/tests/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -1467,3 +1467,42 @@ def test_timedelta_series_cmpops_pandas_compatibility(data1, data2, op):
got = op(gsr1, gsr2)

assert_eq(expect, got)


@pytest.mark.parametrize(
"method, kwargs",
[
["sum", {}],
["mean", {}],
["median", {}],
["std", {}],
["std", {"ddof": 0}],
],
)
def test_tdi_reductions(method, kwargs):
pd_tdi = pd.TimedeltaIndex(["1 day", "2 days", "3 days"])
cudf_tdi = cudf.from_pandas(pd_tdi)

result = getattr(pd_tdi, method)(**kwargs)
expected = getattr(cudf_tdi, method)(**kwargs)
assert result == expected


def test_tdi_asi8():
pd_tdi = pd.TimedeltaIndex(["1 day", "2 days", "3 days"])
cudf_tdi = cudf.from_pandas(pd_tdi)

result = pd_tdi.asi8
expected = cudf_tdi.asi8
assert_eq(result, expected)


def test_tdi_unit():
pd_tdi = pd.TimedeltaIndex(
["1 day", "2 days", "3 days"], dtype="timedelta64[ns]"
)
cudf_tdi = cudf.from_pandas(pd_tdi)

result = pd_tdi.unit
expected = cudf_tdi.unit
assert result == expected
Loading