From e738f7df7f53247cde1208a3242d24f4db319cdf Mon Sep 17 00:00:00 2001 From: Karthik Velayutham Date: Mon, 23 Jan 2023 11:49:32 -0600 Subject: [PATCH] Add QC method for groupby.sem (#47) (core) --- .../storage_formats/base/query_compiler.py | 22 +++++++++++++++++++ modin/pandas/groupby.py | 17 +++++++++----- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py index 9469cd2e612..47999244345 100644 --- a/modin/core/storage_formats/base/query_compiler.py +++ b/modin/core/storage_formats/base/query_compiler.py @@ -2975,6 +2975,28 @@ def groupby_std( drop=drop, ) + @doc_utils.doc_groupby_method( + action="compute standard error", result="standard error", refer_to="sem" + ) + def groupby_sem( + self, + by, + axis, + groupby_kwargs, + agg_args, + agg_kwargs, + drop=False, + ): + return self.groupby_agg( + by=by, + agg_func="sem", + axis=axis, + groupby_kwargs=groupby_kwargs, + agg_args=agg_args, + agg_kwargs=agg_kwargs, + drop=drop, + ) + @doc_utils.doc_groupby_method( action="compute numerical rank", result="numerical rank", refer_to="rank" ) diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py index 844467ef575..e605637d150 100644 --- a/modin/pandas/groupby.py +++ b/modin/pandas/groupby.py @@ -222,7 +222,11 @@ def ffill(self, limit=None): return self._default_to_pandas(lambda df: df.ffill(limit=limit)) def sem(self, ddof=1): - return self._default_to_pandas(lambda df: df.sem(ddof=ddof)) + return self._wrap_aggregation( + type(self._query_compiler).groupby_sem, + agg_kwargs=dict(ddof=ddof), + numeric_only=True, + ) def sample(self, n=None, frac=None, replace=False, weights=None, random_state=None): return self._default_to_pandas( @@ -690,10 +694,13 @@ def do_relabel(obj_to_relabel): kwargs = {} func = func_dict elif is_list_like(func): - return self._default_to_pandas( - lambda df, *args, **kwargs: df.aggregate(func, *args, **kwargs), - *args, - **kwargs, + return self._wrap_aggregation( + qc_method=type(self._query_compiler).groupby_agg, + numeric_only=False, + agg_func=func, + agg_args=args, + agg_kwargs=kwargs, + how="axis_wise", ) elif callable(func): return self._check_index(