Skip to content

Commit

Permalink
Align DatetimeIndex APIs with pandas 2.x (#16367)
Browse files Browse the repository at this point in the history
Mostly transferring methods that were defined on `Series.dt` methods to `DatetimeColumn` so it could be reused in `DatetimeIndex`

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #16367
  • Loading branch information
mroeschke authored Jul 31, 2024
1 parent 38c15f7 commit 5bcd8e0
Show file tree
Hide file tree
Showing 5 changed files with 385 additions and 34 deletions.
2 changes: 2 additions & 0 deletions docs/cudf/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,8 @@ def on_missing_reference(app, env, node, contnode):
("py:class", "Dtype"),
# The following are erroneously warned due to
# https://github.com/sphinx-doc/sphinx/issues/11225
("py:obj", "cudf.DatetimeIndex.time"),
("py:obj", "cudf.DatetimeIndex.date"),
("py:obj", "cudf.Index.values_host"),
("py:class", "pa.Array"),
("py:class", "ScalarLike"),
Expand Down
56 changes: 56 additions & 0 deletions python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,62 @@ def dayofyear(self) -> ColumnBase:
def day_of_year(self) -> ColumnBase:
return self.get_dt_field("day_of_year")

@property
def is_month_start(self) -> ColumnBase:
return (self.day == 1).fillna(False)

@property
def is_month_end(self) -> ColumnBase:
last_day_col = libcudf.datetime.last_day_of_month(self)
return (self.day == last_day_col.day).fillna(False)

@property
def is_quarter_end(self) -> ColumnBase:
last_month = self.month.isin([3, 6, 9, 12])
return (self.is_month_end & last_month).fillna(False)

@property
def is_quarter_start(self) -> ColumnBase:
first_month = self.month.isin([1, 4, 7, 10])
return (self.is_month_start & first_month).fillna(False)

@property
def is_year_end(self) -> ColumnBase:
day_of_year = self.day_of_year
leap_dates = libcudf.datetime.is_leap_year(self)

leap = day_of_year == cudf.Scalar(366)
non_leap = day_of_year == cudf.Scalar(365)
return libcudf.copying.copy_if_else(leap, non_leap, leap_dates).fillna(
False
)

@property
def is_year_start(self) -> ColumnBase:
return (self.day_of_year == 1).fillna(False)

@property
def days_in_month(self) -> ColumnBase:
return libcudf.datetime.days_in_month(self)

@property
def day_of_week(self) -> ColumnBase:
raise NotImplementedError("day_of_week is currently not implemented.")

@property
def is_normalized(self) -> bool:
raise NotImplementedError(
"is_normalized is currently not implemented."
)

def to_julian_date(self) -> ColumnBase:
raise NotImplementedError(
"to_julian_date is currently not implemented."
)

def normalize(self) -> ColumnBase:
raise NotImplementedError("normalize is currently not implemented.")

@property
def values(self):
"""
Expand Down
211 changes: 209 additions & 2 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@

if TYPE_CHECKING:
from collections.abc import Generator, Iterable
from datetime import tzinfo


def ensure_index(index_like: Any) -> BaseIndex:
Expand Down Expand Up @@ -1680,7 +1681,7 @@ class DatetimeIndex(Index):
copy : bool
Make a copy of input.
freq : str, optional
This is not yet supported
Frequency of the DatetimeIndex
tz : pytz.timezone or dateutil.tz.tzfile
This is not yet supported
ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
Expand Down Expand Up @@ -1847,6 +1848,210 @@ def searchsorted(
value, side=side, ascending=ascending, na_position=na_position
)

def as_unit(self, unit: str, round_ok: bool = True) -> Self:
"""
Convert to a dtype with the given unit resolution.
Currently not implemented.
Parameters
----------
unit : {'s', 'ms', 'us', 'ns'}
round_ok : bool, default True
If False and the conversion requires rounding, raise ValueError.
"""
raise NotImplementedError("as_unit is currently not implemented")

def mean(self, *, skipna: bool = True, axis: int | None = 0):
return self._column.mean(skipna=skipna)

def std(self, *, skipna: bool = True, axis: int | None = 0, ddof: int = 1):
return self._column.std(skipna=skipna, ddof=ddof)

def strftime(self, date_format: str) -> Index:
"""
Convert to Index using specified date_format.
Return an Index of formatted strings specified by date_format, which
supports the same string format as the python standard library.
Parameters
----------
date_format : str
Date format string (e.g. "%Y-%m-%d").
"""
return Index._from_data(
{self.name: self._column.strftime(date_format)}
)

@property
def asi8(self) -> cupy.ndarray:
return self._column.astype("int64").values

@property
def inferred_freq(self) -> cudf.DateOffset | None:
raise NotImplementedError("inferred_freq is currently not implemented")

@property
def freq(self) -> cudf.DateOffset | None:
return self._freq

@freq.setter
def freq(self) -> None:
raise NotImplementedError("Setting freq is currently not supported.")

@property
def freqstr(self) -> str:
raise NotImplementedError("freqstr is currently not implemented")

@property
def resolution(self) -> str:
"""
Returns day, hour, minute, second, millisecond or microsecond
"""
raise NotImplementedError("resolution is currently not implemented")

@property
def unit(self) -> str:
return self._column.time_unit

@property
def tz(self) -> tzinfo | None:
"""
Return the timezone.
Returns
-------
datetime.tzinfo or None
Returns None when the array is tz-naive.
"""
return getattr(self.dtype, "tz", None)

@property
def tzinfo(self) -> tzinfo | None:
"""
Alias for tz attribute
"""
return self.tz

def to_pydatetime(self) -> np.ndarray:
"""
Return an ndarray of ``datetime.datetime`` objects.
Returns
-------
numpy.ndarray
An ndarray of ``datetime.datetime`` objects.
"""
return self.to_pandas().to_pydatetime()

def to_julian_date(self) -> Index:
return Index._from_data({self.name: self._column.to_julian_date()})

def to_period(self, freq) -> pd.PeriodIndex:
return self.to_pandas().to_period(freq=freq)

def normalize(self) -> Self:
"""
Convert times to midnight.
Currently not implemented.
"""
return type(self)._from_data({self.name: self._column.normalize()})

@property
def time(self) -> np.ndarray:
"""
Returns numpy array of ``datetime.time`` objects.
The time part of the Timestamps.
"""
return self.to_pandas().time

@property
def timetz(self) -> np.ndarray:
"""
Returns numpy array of ``datetime.time`` objects with timezones.
The time part of the Timestamps.
"""
return self.to_pandas().timetz

@property
def date(self) -> np.ndarray:
"""
Returns numpy array of python ``datetime.date`` objects.
Namely, the date part of Timestamps without time and
timezone information.
"""
return self.to_pandas().date

@property
def is_month_start(self) -> cupy.ndarray:
"""
Booleans indicating if dates are the first day of the month.
"""
return self._column.is_month_start.values

@property
def is_month_end(self) -> cupy.ndarray:
"""
Booleans indicating if dates are the last day of the month.
"""
return self._column.is_month_end.values

@property
def is_quarter_end(self) -> cupy.ndarray:
"""
Booleans indicating if dates are the last day of the quarter.
"""
return self._column.is_quarter_end.values

@property
def is_quarter_start(self) -> cupy.ndarray:
"""
Booleans indicating if dates are the start day of the quarter.
"""
return self._column.is_quarter_start.values

@property
def is_year_end(self) -> cupy.ndarray:
"""
Booleans indicating if dates are the last day of the year.
"""
return self._column.is_year_end.values

@property
def is_year_start(self) -> cupy.ndarray:
"""
Booleans indicating if dates are the first day of the year.
"""
return self._column.is_year_start.values

@property
def is_normalized(self) -> bool:
"""
Returns True if all of the dates are at midnight ("no time")
"""
return self._column.is_normalized

@property
def days_in_month(self) -> Index:
"""
Get the total number of days in the month that the date falls on.
"""
return Index._from_data({self.name: self._column.days_in_month})

daysinmonth = days_in_month

@property
def day_of_week(self) -> Index:
"""
Get the day of week that the date falls on.
"""
return Index._from_data({self.name: self._column.day_of_week})

@property # type: ignore
@_performance_tracking
def year(self):
Expand Down Expand Up @@ -3391,9 +3596,11 @@ def _get_nearest_indexer(
return indexer


def _validate_freq(freq: Any) -> cudf.DateOffset:
def _validate_freq(freq: Any) -> cudf.DateOffset | None:
if isinstance(freq, str):
return cudf.DateOffset._from_freqstr(freq)
elif freq is None:
return freq
elif freq is not None and not isinstance(freq, cudf.DateOffset):
raise ValueError(f"Invalid frequency: {freq}")
return cast(cudf.DateOffset, freq)
Loading

0 comments on commit 5bcd8e0

Please sign in to comment.