Skip to content

Commit

Permalink
Align TimedeltaIndex APIs with pandas 2.x (#16368)
Browse files Browse the repository at this point in the history
Mostly exposing methods that were available on the `TimedeltaColumn`

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #16368
  • Loading branch information
mroeschke authored Jul 31, 2024
1 parent 5bcd8e0 commit e2d45d6
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 0 deletions.
12 changes: 12 additions & 0 deletions python/cudf/cudf/core/column/timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,18 @@ def normalize_binop_value(self, other) -> ColumnBinaryOperand:
def time_unit(self) -> str:
return np.datetime_data(self.dtype)[0]

def total_seconds(self) -> ColumnBase:
raise NotImplementedError("total_seconds is currently not implemented")

def ceil(self, freq: str) -> ColumnBase:
raise NotImplementedError("ceil is currently not implemented")

def floor(self, freq: str) -> ColumnBase:
raise NotImplementedError("floor is currently not implemented")

def round(self, freq: str) -> ColumnBase:
raise NotImplementedError("round is currently not implemented")

def as_numerical_column(
self, dtype: Dtype
) -> "cudf.core.column.NumericalColumn":
Expand Down
92 changes: 92 additions & 0 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2759,6 +2759,98 @@ def __getitem__(self, index):
return pd.Timedelta(value)
return value

def as_unit(self, unit: str, round_ok: bool = True) -> Self:
"""
Convert to a dtype with the given unit resolution.
Currently not implemented.
Parameters
----------
unit : {'s', 'ms', 'us', 'ns'}
round_ok : bool, default True
If False and the conversion requires rounding, raise ValueError.
"""
raise NotImplementedError("as_unit is currently not implemented")

@property
def freq(self) -> cudf.DateOffset | None:
raise NotImplementedError("freq is currently not implemented")

@property
def freqstr(self) -> str:
raise NotImplementedError("freqstr is currently not implemented")

@property
def resolution(self) -> str:
"""
Returns day, hour, minute, second, millisecond or microsecond
"""
raise NotImplementedError("resolution is currently not implemented")

@property
def unit(self) -> str:
return self._column.time_unit

def to_pytimedelta(self) -> np.ndarray:
"""
Return an ndarray of ``datetime.timedelta`` objects.
Returns
-------
numpy.ndarray
An ndarray of ``datetime.timedelta`` objects.
"""
return self.to_pandas().to_pytimedelta()

@property
def asi8(self) -> cupy.ndarray:
return self._column.astype("int64").values

def sum(self, *, skipna: bool = True, axis: int | None = 0):
return self._column.sum(skipna=skipna)

def mean(self, *, skipna: bool = True, axis: int | None = 0):
return self._column.mean(skipna=skipna)

def median(self, *, skipna: bool = True, axis: int | None = 0):
return self._column.median(skipna=skipna)

def std(self, *, skipna: bool = True, axis: int | None = 0, ddof: int = 1):
return self._column.std(skipna=skipna, ddof=ddof)

def total_seconds(self) -> cupy.ndarray:
"""
Return total duration of each element expressed in seconds.
This method is currently not implemented.
"""
return self._column.total_seconds().values

def ceil(self, freq: str) -> Self:
"""
Ceil to the specified resolution.
This method is currently not implemented.
"""
return type(self)._from_data({self.name: self._column.ceil(freq)})

def floor(self, freq: str) -> Self:
"""
Floor to the specified resolution.
This method is currently not implemented.
"""
return type(self)._from_data({self.name: self._column.floor(freq)})

def round(self, freq: str) -> Self:
"""
Round to the specified resolution.
This method is currently not implemented.
"""
return type(self)._from_data({self.name: self._column.round(freq)})

@property # type: ignore
@_performance_tracking
def days(self):
Expand Down
39 changes: 39 additions & 0 deletions python/cudf/cudf/tests/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -1467,3 +1467,42 @@ def test_timedelta_series_cmpops_pandas_compatibility(data1, data2, op):
got = op(gsr1, gsr2)

assert_eq(expect, got)


@pytest.mark.parametrize(
"method, kwargs",
[
["sum", {}],
["mean", {}],
["median", {}],
["std", {}],
["std", {"ddof": 0}],
],
)
def test_tdi_reductions(method, kwargs):
pd_tdi = pd.TimedeltaIndex(["1 day", "2 days", "3 days"])
cudf_tdi = cudf.from_pandas(pd_tdi)

result = getattr(pd_tdi, method)(**kwargs)
expected = getattr(cudf_tdi, method)(**kwargs)
assert result == expected


def test_tdi_asi8():
pd_tdi = pd.TimedeltaIndex(["1 day", "2 days", "3 days"])
cudf_tdi = cudf.from_pandas(pd_tdi)

result = pd_tdi.asi8
expected = cudf_tdi.asi8
assert_eq(result, expected)


def test_tdi_unit():
pd_tdi = pd.TimedeltaIndex(
["1 day", "2 days", "3 days"], dtype="timedelta64[ns]"
)
cudf_tdi = cudf.from_pandas(pd_tdi)

result = pd_tdi.unit
expected = cudf_tdi.unit
assert result == expected

0 comments on commit e2d45d6

Please sign in to comment.