Skip to content

Commit

Permalink
PERF-#5705: Preserve metadata when applying Series.cat.codes (#5706)
Browse files Browse the repository at this point in the history
Signed-off-by: Igoshev, Iaroslav <[email protected]>
  • Loading branch information
YarShev authored Feb 28, 2023
1 parent 0ce9ae6 commit 4db3e70
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 2 deletions.
14 changes: 13 additions & 1 deletion modin/core/dataframe/pandas/dataframe/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1793,7 +1793,7 @@ def window(
pass

@lazy_metadata_decorator(apply_axis="both")
def fold(self, axis, func):
def fold(self, axis, func, new_columns=None):
"""
Perform a function across an entire axis.
Expand All @@ -1803,6 +1803,11 @@ def fold(self, axis, func):
The axis to apply over.
func : callable
The function to apply.
new_columns : list-like, optional
The columns of the result.
Must be the same length as the columns' length of `self`.
The column labels of `self` may change during an operation so
we may want to pass the new column labels in (e.g., see `cat.codes`).
Returns
-------
Expand All @@ -1813,6 +1818,13 @@ def fold(self, axis, func):
-----
The data shape is not changed (length and width of the table).
"""
if new_columns is not None:
if self._columns_cache is not None:
assert len(self._columns_cache) == len(
new_columns
), "The length of `new_columns` doesn't match the columns' length of `self`"
self._columns_cache = new_columns

new_partitions = self._partition_mgr_cls.map_axis_partitions(
axis, self._partitions, func, keep_partitioning=True
)
Expand Down
2 changes: 1 addition & 1 deletion modin/core/storage_formats/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3452,7 +3452,7 @@ def func(df) -> np.ndarray:
ser = ser.astype("category", copy=False)
return ser.cat.codes.to_frame(name=MODIN_UNNAMED_SERIES_LABEL)

res = self._modin_frame.apply_full_axis(
res = self._modin_frame.fold(
axis=0, func=func, new_columns=[MODIN_UNNAMED_SERIES_LABEL]
)
return self.__constructor__(res, shape_hint="column")
Expand Down

0 comments on commit 4db3e70

Please sign in to comment.