Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Align DatetimeIndex APIs with pandas 2.x #16367

Merged
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/cudf/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,8 @@ def on_missing_reference(app, env, node, contnode):
("py:class", "Dtype"),
# The following are erroneously warned due to
# https://github.com/sphinx-doc/sphinx/issues/11225
("py:obj", "cudf.DatetimeIndex.time"),
("py:obj", "cudf.DatetimeIndex.date"),
("py:obj", "cudf.Index.values_host"),
("py:class", "pa.Array"),
("py:class", "ScalarLike"),
Expand Down
56 changes: 56 additions & 0 deletions python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,62 @@ def dayofyear(self) -> ColumnBase:
def day_of_year(self) -> ColumnBase:
return self.get_dt_field("day_of_year")

@property
def is_month_start(self) -> ColumnBase:
return (self.day == 1).fillna(False)

@property
def is_month_end(self) -> ColumnBase:
last_day_col = libcudf.datetime.last_day_of_month(self)
return (self.day == last_day_col.day).fillna(False)

@property
def is_quarter_end(self) -> ColumnBase:
last_month = self.month.isin([3, 6, 9, 12])
return (self.is_month_end & last_month).fillna(False)

@property
def is_quarter_start(self) -> ColumnBase:
first_month = self.month.isin([1, 4, 7, 10])
return (self.is_month_start & first_month).fillna(False)

@property
def is_year_end(self) -> ColumnBase:
day_of_year = self.day_of_year
leap_dates = libcudf.datetime.is_leap_year(self)

leap = day_of_year == cudf.Scalar(366)
non_leap = day_of_year == cudf.Scalar(365)
return libcudf.copying.copy_if_else(leap, non_leap, leap_dates).fillna(
False
)

@property
def is_year_start(self) -> ColumnBase:
return (self.day_of_year == 1).fillna(False)

@property
def days_in_month(self) -> ColumnBase:
return libcudf.datetime.days_in_month(self)

@property
def day_of_week(self) -> ColumnBase:
raise NotImplementedError("day_of_week is currently not implemented.")

@property
def is_normalized(self) -> bool:
raise NotImplementedError(
"is_normalized is currently not implemented."
)

def to_julian_date(self) -> ColumnBase:
raise NotImplementedError(
"to_julian_date is currently not implemented."
)

def normalize(self) -> ColumnBase:
raise NotImplementedError("normalize is currently not implemented.")

@property
def values(self):
"""
Expand Down
211 changes: 209 additions & 2 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@

if TYPE_CHECKING:
from collections.abc import Generator, Iterable
from datetime import tzinfo


def ensure_index(index_like: Any) -> BaseIndex:
Expand Down Expand Up @@ -1664,7 +1665,7 @@ class DatetimeIndex(Index):
copy : bool
Make a copy of input.
freq : str, optional
This is not yet supported
Frequency of the DatetimeIndex
tz : pytz.timezone or dateutil.tz.tzfile
This is not yet supported
ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
Expand Down Expand Up @@ -1831,6 +1832,210 @@ def searchsorted(
value, side=side, ascending=ascending, na_position=na_position
)

def as_unit(self, unit: str, round_ok: bool = True) -> Self:
"""
Convert to a dtype with the given unit resolution.

Currently not implemented.

Parameters
----------
unit : {'s', 'ms', 'us', 'ns'}
round_ok : bool, default True
If False and the conversion requires rounding, raise ValueError.
"""
raise NotImplementedError("as_unit is currently not implemented")

def mean(self, *, skipna: bool = True, axis: int | None = 0):
return self._column.mean(skipna=skipna)

def std(self, *, skipna: bool = True, axis: int | None = 0, ddof: int = 1):
return self._column.std(skipna=skipna, ddof=ddof)

def strftime(self, date_format: str) -> Index:
"""
Convert to Index using specified date_format.

Return an Index of formatted strings specified by date_format, which
supports the same string format as the python standard library.

Parameters
----------
date_format : str
Date format string (e.g. "%Y-%m-%d").
"""
return Index._from_data(
{self.name: self._column.strftime(date_format)}
)

@property
def asi8(self) -> cupy.ndarray:
return self._column.astype("int64").values

@property
def inferred_freq(self) -> cudf.DateOffset | None:
raise NotImplementedError("inferred_freq is currently not implemented")

@property
def freq(self) -> cudf.DateOffset | None:
return self._freq

@freq.setter
def freq(self) -> None:
raise NotImplementedError("Setting freq is currently not supported.")

@property
def freqstr(self) -> str:
raise NotImplementedError("freqstr is currently not implemented")

@property
def resolution(self) -> str:
"""
Returns day, hour, minute, second, millisecond or microsecond
"""
raise NotImplementedError("resolution is currently not implemented")

@property
def unit(self) -> str:
return self._column.time_unit

@property
def tz(self) -> tzinfo | None:
"""
Return the timezone.

Returns
-------
datetime.tzinfo or None
Returns None when the array is tz-naive.
"""
return getattr(self.dtype, "tz", None)

@property
def tzinfo(self) -> tzinfo | None:
"""
Alias for tz attribute
"""
return self.tz

def to_pydatetime(self) -> np.ndarray:
"""
Return an ndarray of ``datetime.datetime`` objects.

Returns
-------
numpy.ndarray
An ndarray of ``datetime.datetime`` objects.
"""
return self.to_pandas().to_pydatetime()

def to_julian_date(self) -> Index:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add pytests to cover these newly added APIs?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure added tests in the latest commit

return Index._from_data({self.name: self._column.to_julian_date()})

def to_period(self, freq) -> pd.PeriodIndex:
return self.to_pandas().to_period(freq=freq)

def normalize(self) -> Self:
"""
Convert times to midnight.

Currently not implemented.
"""
return type(self)._from_data({self.name: self._column.normalize()})

@property
def time(self) -> np.ndarray:
"""
Returns numpy array of ``datetime.time`` objects.

The time part of the Timestamps.
"""
return self.to_pandas().time

@property
def timetz(self) -> np.ndarray:
"""
Returns numpy array of ``datetime.time`` objects with timezones.

The time part of the Timestamps.
"""
return self.to_pandas().timetz

@property
def date(self) -> np.ndarray:
"""
Returns numpy array of python ``datetime.date`` objects.

Namely, the date part of Timestamps without time and
timezone information.
"""
return self.to_pandas().date

@property
def is_month_start(self) -> cupy.ndarray:
"""
Booleans indicating if dates are the first day of the month.
"""
return self._column.is_month_start.values

@property
def is_month_end(self) -> cupy.ndarray:
"""
Booleans indicating if dates are the last day of the month.
"""
return self._column.is_month_end.values

@property
def is_quarter_end(self) -> cupy.ndarray:
"""
Booleans indicating if dates are the last day of the quarter.
"""
return self._column.is_quarter_end.values

@property
def is_quarter_start(self) -> cupy.ndarray:
"""
Booleans indicating if dates are the start day of the quarter.
"""
return self._column.is_quarter_start.values

@property
def is_year_end(self) -> cupy.ndarray:
"""
Booleans indicating if dates are the last day of the year.
"""
return self._column.is_year_end.values

@property
def is_year_start(self) -> cupy.ndarray:
"""
Booleans indicating if dates are the first day of the year.
"""
return self._column.is_year_start.values

@property
def is_normalized(self) -> bool:
"""
Returns True if all of the dates are at midnight ("no time")
"""
return self._column.is_normalized

@property
def days_in_month(self) -> Index:
"""
Get the total number of days in the month that the date falls on.
"""
return Index._from_data({self.name: self._column.days_in_month})

daysinmonth = days_in_month

@property
def day_of_week(self) -> Index:
"""
Get the day of week that the date falls on.
"""
return Index._from_data({self.name: self._column.day_of_week})

@property # type: ignore
@_performance_tracking
def year(self):
Expand Down Expand Up @@ -3259,9 +3464,11 @@ def _get_nearest_indexer(
return indexer


def _validate_freq(freq: Any) -> cudf.DateOffset:
def _validate_freq(freq: Any) -> cudf.DateOffset | None:
if isinstance(freq, str):
return cudf.DateOffset._from_freqstr(freq)
elif freq is None:
return freq
elif freq is not None and not isinstance(freq, cudf.DateOffset):
raise ValueError(f"Invalid frequency: {freq}")
return cast(cudf.DateOffset, freq)
Loading
Loading