Skip to content

Commit

Permalink
Fix nanlen with strings (#344)
Browse files Browse the repository at this point in the history
* Fix nanlen with strings

Closes pydata/xarray#8853

* fix windows

* Silence warnings
  • Loading branch information
dcherian authored Mar 19, 2024
1 parent 20be463 commit 307899a
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 4 deletions.
6 changes: 4 additions & 2 deletions flox/aggregate_flox.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ def _lerp(a, b, *, t, dtype, out=None):
"""
if out is None:
out = np.empty_like(a, dtype=dtype)
diff_b_a = np.subtract(b, a)
with np.errstate(invalid="ignore"):
diff_b_a = np.subtract(b, a)
# asanyarray is a stop-gap until gh-13105
np.add(a, diff_b_a * t, out=out)
np.subtract(b, diff_b_a * (1 - t), out=out, where=t >= 0.5)
Expand Down Expand Up @@ -95,7 +96,8 @@ def quantile_(array, inv_idx, *, q, axis, skipna, group_idx, dtype=None, out=Non

# partition the complex array in-place
labels_broadcast = np.broadcast_to(group_idx, array.shape)
cmplx = labels_broadcast + 1j * array
with np.errstate(invalid="ignore"):
cmplx = labels_broadcast + 1j * array
cmplx.partition(kth=kth, axis=-1)
if is_scalar_q:
a_ = cmplx.imag
Expand Down
2 changes: 2 additions & 0 deletions flox/aggregate_npg.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ def nanprod(group_idx, array, engine, *, axis=-1, size=None, fill_value=None, dt


def _len(group_idx, array, engine, *, func, axis=-1, size=None, fill_value=None, dtype=None):
if array.dtype.kind in "US":
array = np.broadcast_to(np.array([1]), array.shape)
result = _get_aggregate(engine).aggregate(
group_idx,
array,
Expand Down
15 changes: 14 additions & 1 deletion flox/aggregate_numbagg.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,24 @@ def nanstd(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None,
)


def nanlen(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None):
if array.dtype.kind in "US":
array = np.broadcast_to(np.array([1]), array.shape)
return _numbagg_wrapper(
group_idx,
array,
axis=axis,
size=size,
func="nancount",
# fill_value=fill_value,
# dtype=dtype,
)


nansum = partial(_numbagg_wrapper, func="nansum")
nanmean = partial(_numbagg_wrapper, func="nanmean")
nanprod = partial(_numbagg_wrapper, func="nanprod")
nansum_of_squares = partial(_numbagg_wrapper, func="nansum_of_squares")
nanlen = partial(_numbagg_wrapper, func="nancount")
nanprod = partial(_numbagg_wrapper, func="nanprod")
nanfirst = partial(_numbagg_wrapper, func="nanfirst")
nanlast = partial(_numbagg_wrapper, func="nanlast")
Expand Down
11 changes: 10 additions & 1 deletion tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1127,7 +1127,7 @@ def test_group_by_datetime(engine, method):

edges = pd.date_range("1999-12-31", "2000-12-31", freq="ME").to_series().to_numpy()
actual, _ = groupby_reduce(daskarray, t.to_numpy(), isbin=True, expected_groups=edges, **kwargs)
expected = data.resample("M").mean().to_numpy()
expected = data.resample("ME").mean().to_numpy()
assert_equal(expected, actual)

actual, _ = groupby_reduce(
Expand Down Expand Up @@ -1688,3 +1688,12 @@ def test_multiple_quantiles(q, chunk, func, by_ndim):
if by_ndim == 2:
expected = expected.squeeze(axis=-2)
assert_equal(expected, actual, tolerance=1e-14)


@pytest.mark.parametrize("dtype", ["U3", "S3"])
def test_nanlen_string(dtype, engine):
array = np.array(["ABC", "DEF", "GHI", "JKL", "MNO", "PQR"], dtype=dtype)
by = np.array([0, 0, 1, 2, 1, 0])
expected = np.array([3, 2, 1], dtype=np.intp)
actual, *_ = groupby_reduce(array, by, func="count", engine=engine)
assert_equal(expected, actual)

0 comments on commit 307899a

Please sign in to comment.