Skip to content

Commit

Permalink
PERF-modin-project#6437: preserve dtypes for 'reindex'
Browse files Browse the repository at this point in the history
Signed-off-by: Anatoly Myachev <[email protected]>
  • Loading branch information
anmyachev committed Aug 1, 2023
1 parent 5af3318 commit 0336208
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 0 deletions.
7 changes: 7 additions & 0 deletions modin/core/storage_formats/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -642,11 +642,18 @@ def map_func(left, right=right_pandas, kwargs=kwargs): # pragma: no cover
def reindex(self, axis, labels, **kwargs):
new_index, _ = (self.index, None) if axis else self.index.reindex(labels)
new_columns, _ = self.columns.reindex(labels) if axis else (self.columns, None)
new_dtypes = None
if self._modin_frame.has_materialized_dtypes:
if axis == 0:
new_dtypes = self.dtypes
else:
new_dtypes = self.dtypes[new_columns]
new_modin_frame = self._modin_frame.apply_full_axis(
axis,
lambda df: df.reindex(labels=labels, axis=axis, **kwargs),
new_index=new_index,
new_columns=new_columns,
dtypes=new_dtypes,
)
return self.__constructor__(new_modin_frame)

Expand Down
11 changes: 11 additions & 0 deletions modin/test/storage_formats/pandas/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1089,3 +1089,14 @@ def test_setitem_bool_preserve_dtypes():
# scalar as a col_loc
df.loc[indexer, "a"] = 2.0
assert df._query_compiler._modin_frame.has_materialized_dtypes


@pytest.mark.parametrize(
"kwargs",
[dict(axis=0, labels=[]), dict(axis=1, labels=["a"]), dict(axis=1, labels=[])],
)
def test_reindex_preserve_dtypes(kwargs):
df = pd.DataFrame({"a": [1, 1, 2, 2], "b": [3, 4, 5, 6]})

reindexed_df = df.reindex(**kwargs)
assert reindexed_df._query_compiler._modin_frame.has_materialized_dtypes

0 comments on commit 0336208

Please sign in to comment.