Skip to content

Commit

Permalink
FEAT: Add initial partial support for groupby.cumcount() (modin-proje…
Browse files Browse the repository at this point in the history
…ct#54)

* FEAT: Add partial support for cumcount

* Remove the set_index_name

* Squeeze the result

* Write cumcount name to None

* Can't set dtype to int64
  • Loading branch information
pyrito authored Feb 2, 2023
1 parent 5b3824d commit 16661e4
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 3 deletions.
1 change: 1 addition & 0 deletions modin/core/execution/client/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -911,6 +911,7 @@ def forwarding_method(self, by, *args, **kwargs):
"any",
"size",
"skew",
"cumcount",
"cumsum",
"cummax",
"cummin",
Expand Down
24 changes: 24 additions & 0 deletions modin/core/storage_formats/base/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2596,6 +2596,30 @@ def groupby_skew(
drop=drop,
)

@doc_utils.doc_groupby_method(
action="compute cumulative count",
result="count of all the previous values",
refer_to="cumcount",
)
def groupby_cumcount(
self,
by,
axis,
groupby_kwargs,
agg_args,
agg_kwargs,
drop=False,
):
return self.groupby_agg(
by=by,
agg_func="cumcount",
axis=axis,
groupby_kwargs=groupby_kwargs,
agg_args=agg_args,
agg_kwargs=agg_kwargs,
drop=drop,
)

@doc_utils.doc_groupby_method(
action="compute cumulative sum",
result="sum of all the previous values",
Expand Down
12 changes: 9 additions & 3 deletions modin/pandas/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -832,9 +832,15 @@ def pipe(self, func, *args, **kwargs):
return com.pipe(self, func, *args, **kwargs)

def cumcount(self, ascending=True):
result = self._default_to_pandas(lambda df: df.cumcount(ascending=ascending))
# pandas does not name the index on cumcount
result._query_compiler.set_index_name(None)
result = self._wrap_aggregation(
type(self._query_compiler).groupby_cumcount,
numeric_only=False,
agg_kwargs=dict(ascending=ascending),
)
if not isinstance(result, Series):
# The result should always be a Series with name None and type int64
result = result.squeeze(axis=1)
result.name = None
return result

def tail(self, n=5):
Expand Down

0 comments on commit 16661e4

Please sign in to comment.