Skip to content

Commit

Permalink
PERF-modin-project#5705: Preserve metadata when applying `Series.cat.…
Browse files Browse the repository at this point in the history
…codes`

Signed-off-by: Igoshev, Iaroslav <[email protected]>
  • Loading branch information
YarShev committed Feb 24, 2023
1 parent f4ed1c8 commit adbea76
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 2 deletions.
20 changes: 19 additions & 1 deletion modin/core/dataframe/pandas/dataframe/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2199,6 +2199,8 @@ def apply_full_axis(
func,
new_index=None,
new_columns=None,
new_row_lengths=None,
new_column_widths=None,
apply_indices=None,
enumerate_partitions: bool = False,
dtypes=None,
Expand All @@ -2221,6 +2223,12 @@ def apply_full_axis(
new_columns : list-like, optional
The columns of the result. We may know this in
advance, and if not provided it must be computed.
new_row_lengths : list, optional
The length of each partition in the rows. The "height" of
each of the block partitions. Is computed if not provided.
new_column_widths : list, optional
The width of each partition in the columns. The "width" of
each of the block partitions. Is computed if not provided.
apply_indices : list-like, default: None
Indices of `axis ^ 1` to apply function over.
enumerate_partitions : bool, default: False
Expand Down Expand Up @@ -2255,6 +2263,8 @@ def apply_full_axis(
func=func,
new_index=new_index,
new_columns=new_columns,
new_row_lengths=new_row_lengths,
new_column_widths=new_column_widths,
apply_indices=apply_indices,
enumerate_partitions=enumerate_partitions,
dtypes=dtypes,
Expand Down Expand Up @@ -2649,6 +2659,8 @@ def broadcast_apply_full_axis(
other,
new_index=None,
new_columns=None,
new_row_lengths=None,
new_column_widths=None,
apply_indices=None,
enumerate_partitions=False,
dtypes=None,
Expand All @@ -2673,6 +2685,12 @@ def broadcast_apply_full_axis(
new_columns : list-like, optional
Columns of the result. We may know this in
advance, and if not provided it must be computed.
new_row_lengths : list, optional
The length of each partition in the rows. The "height" of
each of the block partitions. Is computed if not provided.
new_column_widths : list, optional
The width of each partition in the columns. The "width" of
each of the block partitions. Is computed if not provided.
apply_indices : list-like, default: None
Indices of `axis ^ 1` to apply function over.
enumerate_partitions : bool, default: False
Expand Down Expand Up @@ -2736,7 +2754,7 @@ def broadcast_apply_full_axis(
keep_partitioning=keep_partitioning,
apply_func_args=apply_func_args,
)
kw = {"row_lengths": None, "column_widths": None}
kw = {"row_lengths": new_row_lengths, "column_widths": new_column_widths}
if dtypes == "copy":
kw["dtypes"] = self._dtypes
elif dtypes is not None:
Expand Down
7 changes: 6 additions & 1 deletion modin/core/storage_formats/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3428,7 +3428,12 @@ def func(df) -> np.ndarray:
return ser.cat.codes.to_frame(name=MODIN_UNNAMED_SERIES_LABEL)

res = self._modin_frame.apply_full_axis(
axis=0, func=func, new_columns=[MODIN_UNNAMED_SERIES_LABEL]
axis=0,
func=func,
new_index=self._modin_frame._index_cache,
new_columns=[MODIN_UNNAMED_SERIES_LABEL],
new_row_lengths=self._modin_frame._row_lengths_cache,
new_column_widths=self._modin_frame._column_widths_cache,
)
return self.__constructor__(res, shape_hint="column")

Expand Down

0 comments on commit adbea76

Please sign in to comment.