diff --git a/flox/aggregate_flox.py b/flox/aggregate_flox.py index ef64a371..4bd9c24a 100644 --- a/flox/aggregate_flox.py +++ b/flox/aggregate_flox.py @@ -37,7 +37,8 @@ def _lerp(a, b, *, t, dtype, out=None): """ if out is None: out = np.empty_like(a, dtype=dtype) - diff_b_a = np.subtract(b, a) + with np.errstate(invalid="ignore"): + diff_b_a = np.subtract(b, a) # asanyarray is a stop-gap until gh-13105 np.add(a, diff_b_a * t, out=out) np.subtract(b, diff_b_a * (1 - t), out=out, where=t >= 0.5) @@ -95,7 +96,8 @@ def quantile_(array, inv_idx, *, q, axis, skipna, group_idx, dtype=None, out=Non # partition the complex array in-place labels_broadcast = np.broadcast_to(group_idx, array.shape) - cmplx = labels_broadcast + 1j * array + with np.errstate(invalid="ignore"): + cmplx = labels_broadcast + 1j * array cmplx.partition(kth=kth, axis=-1) if is_scalar_q: a_ = cmplx.imag diff --git a/flox/aggregate_npg.py b/flox/aggregate_npg.py index 6ffbc0b0..91d49cb7 100644 --- a/flox/aggregate_npg.py +++ b/flox/aggregate_npg.py @@ -88,6 +88,8 @@ def nanprod(group_idx, array, engine, *, axis=-1, size=None, fill_value=None, dt def _len(group_idx, array, engine, *, func, axis=-1, size=None, fill_value=None, dtype=None): + if array.dtype.kind in "US": + array = np.broadcast_to(np.array([1]), array.shape) result = _get_aggregate(engine).aggregate( group_idx, array, diff --git a/flox/aggregate_numbagg.py b/flox/aggregate_numbagg.py index 1c0edbee..c2b718e8 100644 --- a/flox/aggregate_numbagg.py +++ b/flox/aggregate_numbagg.py @@ -105,11 +105,24 @@ def nanstd(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None, ) +def nanlen(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None): + if array.dtype.kind in "US": + array = np.broadcast_to(np.array([1]), array.shape) + return _numbagg_wrapper( + group_idx, + array, + axis=axis, + size=size, + func="nancount", + # fill_value=fill_value, + # dtype=dtype, + ) + + nansum = partial(_numbagg_wrapper, func="nansum") nanmean = partial(_numbagg_wrapper, func="nanmean") nanprod = partial(_numbagg_wrapper, func="nanprod") nansum_of_squares = partial(_numbagg_wrapper, func="nansum_of_squares") -nanlen = partial(_numbagg_wrapper, func="nancount") nanprod = partial(_numbagg_wrapper, func="nanprod") nanfirst = partial(_numbagg_wrapper, func="nanfirst") nanlast = partial(_numbagg_wrapper, func="nanlast") diff --git a/tests/test_core.py b/tests/test_core.py index 26c75a85..19c96758 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1127,7 +1127,7 @@ def test_group_by_datetime(engine, method): edges = pd.date_range("1999-12-31", "2000-12-31", freq="ME").to_series().to_numpy() actual, _ = groupby_reduce(daskarray, t.to_numpy(), isbin=True, expected_groups=edges, **kwargs) - expected = data.resample("M").mean().to_numpy() + expected = data.resample("ME").mean().to_numpy() assert_equal(expected, actual) actual, _ = groupby_reduce( @@ -1688,3 +1688,12 @@ def test_multiple_quantiles(q, chunk, func, by_ndim): if by_ndim == 2: expected = expected.squeeze(axis=-2) assert_equal(expected, actual, tolerance=1e-14) + + +@pytest.mark.parametrize("dtype", ["U3", "S3"]) +def test_nanlen_string(dtype, engine): + array = np.array(["ABC", "DEF", "GHI", "JKL", "MNO", "PQR"], dtype=dtype) + by = np.array([0, 0, 1, 2, 1, 0]) + expected = np.array([3, 2, 1], dtype=np.intp) + actual, *_ = groupby_reduce(array, by, func="count", engine=engine) + assert_equal(expected, actual)