From 82536ce3242415e6eb5e0938134316229f33732c Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Mon, 30 Jan 2023 20:16:37 +0000 Subject: [PATCH] fix: support `mask_identity=True` for `axis=None` in `ptp`, `std`, etc. (#2172) --- src/awkward/operations/ak_mean.py | 28 +++++++--- src/awkward/operations/ak_ptp.py | 87 +++++++++++------------------ src/awkward/operations/ak_std.py | 21 +++++-- src/awkward/operations/ak_var.py | 37 ++++++++---- tests/test_2020_reduce_axis_none.py | 6 +- 5 files changed, 97 insertions(+), 82 deletions(-) diff --git a/src/awkward/operations/ak_mean.py b/src/awkward/operations/ak_mean.py index 8c9e1f1dc5..ea36d935af 100644 --- a/src/awkward/operations/ak_mean.py +++ b/src/awkward/operations/ak_mean.py @@ -186,16 +186,16 @@ def _impl(x, weight, axis, keepdims, mask_identity): sumw = ak.operations.ak_count._impl( x, axis, - keepdims, - mask_identity, + keepdims=True, + mask_identity=True, highlevel=True, behavior=None, ) sumwx = ak.operations.ak_sum._impl( x, axis, - keepdims, - mask_identity, + keepdims=True, + mask_identity=True, highlevel=True, behavior=None, ) @@ -211,12 +211,26 @@ def _impl(x, weight, axis, keepdims, mask_identity): sumwx = ak.operations.ak_sum._impl( x * weight, axis, - keepdims, - mask_identity, + keepdims=True, + mask_identity=True, highlevel=True, behavior=None, ) - return sumwx / sumw + + out = sumwx / sumw + + if not mask_identity: + out = ak.highlevel.Array(ak.operations.fill_none(out, np.nan, axis=-1)) + + if axis is None: + if not keepdims: + out = out[(0,) * out.ndim] + else: + if not keepdims: + posaxis = ak._util.maybe_posaxis(out.layout, axis, 1) + out = out[(slice(None, None),) * posaxis + (0,)] + + return out @ak._connect.numpy.implements("mean") diff --git a/src/awkward/operations/ak_ptp.py b/src/awkward/operations/ak_ptp.py index a7756903c5..9f7510a079 100644 --- a/src/awkward/operations/ak_ptp.py +++ b/src/awkward/operations/ak_ptp.py @@ -79,65 +79,42 @@ def ptp(array, axis=None, *, keepdims=False, mask_identity=True, flatten_records def _impl(array, axis, keepdims, mask_identity): behavior = ak._util.behavior_of(array) - array = ak.highlevel.Array( - ak.operations.to_layout(array, allow_record=False, allow_other=False), - behavior=behavior, - ) + layout = ak.operations.to_layout(array, allow_record=False, allow_other=False) with np.errstate(invalid="ignore", divide="ignore"): - if axis is None: - out = ak.operations.ak_max._impl( - array, - axis, - keepdims, - None, - mask_identity, - highlevel=True, - behavior=None, - ) - ak.operations.ak_min._impl( - array, - axis, - keepdims, - None, - mask_identity, - highlevel=True, - behavior=None, - ) - if not mask_identity and out is None: - out = 0 + maxi = ak.operations.ak_max._impl( + layout, + axis, + True, + None, + mask_identity, + highlevel=True, + behavior=behavior, + ) + mini = ak.operations.ak_min._impl( + layout, + axis, + True, + None, + True, + highlevel=True, + behavior=behavior, + ) + out = maxi - mini + + # Check that removed code was not needed! + assert maxi is not None and mini is not None + + if not mask_identity: + out = ak.highlevel.Array(ak.operations.fill_none(out, 0, axis=-1)) + if axis is None: + if not keepdims: + out = out[(0,) * out.ndim] else: - maxi = ak.operations.ak_max._impl( - array, - axis, - True, - None, - mask_identity, - highlevel=True, - behavior=None, - ) - mini = ak.operations.ak_min._impl( - array, - axis, - True, - None, - True, - highlevel=True, - behavior=None, - ) - - if maxi is None or mini is None: - out = None - - else: - out = maxi - mini - - if not mask_identity: - out = ak.highlevel.Array(ak.operations.fill_none(out, 0, axis=-1)) - - if not keepdims: - posaxis = ak._util.maybe_posaxis(out.layout, axis, 1) - out = out[(slice(None, None),) * posaxis + (0,)] + if not keepdims: + posaxis = ak._util.maybe_posaxis(out.layout, axis, 1) + out = out[(slice(None, None),) * posaxis + (0,)] return out diff --git a/src/awkward/operations/ak_std.py b/src/awkward/operations/ak_std.py index 5efc9c0909..2fd6729092 100644 --- a/src/awkward/operations/ak_std.py +++ b/src/awkward/operations/ak_std.py @@ -2,7 +2,7 @@ import awkward as ak from awkward._connect.numpy import unsupported -from awkward._nplikes import nplike_of +from awkward._nplikes import ufuncs from awkward._nplikes.numpylike import NumpyMetadata from awkward._util import unset @@ -171,17 +171,30 @@ def _impl(x, weight, ddof, axis, keepdims, mask_identity): ) with np.errstate(invalid="ignore", divide="ignore"): - return nplike_of(x, weight).sqrt( + out = ufuncs.sqrt( ak.operations.ak_var._impl( x, weight, ddof, axis, - keepdims, - mask_identity, + keepdims=True, + mask_identity=True, ) ) + if not mask_identity: + out = ak.highlevel.Array(ak.operations.fill_none(out, np.nan, axis=-1)) + + if axis is None: + if not keepdims: + out = out[(0,) * out.ndim] + else: + if not keepdims: + posaxis = ak._util.maybe_posaxis(out.layout, axis, 1) + out = out[(slice(None, None),) * posaxis + (0,)] + + return out + @ak._connect.numpy.implements("std") def _nep_18_impl_std( diff --git a/src/awkward/operations/ak_var.py b/src/awkward/operations/ak_var.py index 935d4fe813..c297f90569 100644 --- a/src/awkward/operations/ak_var.py +++ b/src/awkward/operations/ak_var.py @@ -176,21 +176,23 @@ def _impl(x, weight, ddof, axis, keepdims, mask_identity): ) with np.errstate(invalid="ignore", divide="ignore"): - xmean = ak.operations.ak_mean._impl(x, weight, axis, False, mask_identity) + xmean = ak.operations.ak_mean._impl( + x, weight, axis, keepdims=True, mask_identity=True + ) if weight is None: sumw = ak.operations.ak_count._impl( x, axis, - keepdims, - mask_identity, + keepdims=True, + mask_identity=True, highlevel=True, behavior=None, ) sumwxx = ak.operations.ak_sum._impl( (x - xmean) ** 2, axis, - keepdims, - mask_identity, + keepdims=True, + mask_identity=True, highlevel=True, behavior=None, ) @@ -198,23 +200,36 @@ def _impl(x, weight, ddof, axis, keepdims, mask_identity): sumw = ak.operations.ak_sum._impl( x * 0 + weight, axis, - keepdims, - mask_identity, + keepdims=True, + mask_identity=True, highlevel=True, behavior=None, ) sumwxx = ak.operations.ak_sum._impl( (x - xmean) ** 2 * weight, axis, - keepdims, - mask_identity, + keepdims=True, + mask_identity=True, highlevel=True, behavior=None, ) if ddof != 0: - return (sumwxx / sumw) * (sumw / (sumw - ddof)) + out = (sumwxx / sumw) * (sumw / (sumw - ddof)) else: - return sumwxx / sumw + out = sumwxx / sumw + + if not mask_identity: + out = ak.highlevel.Array(ak.operations.fill_none(out, np.nan, axis=-1)) + + if axis is None: + if not keepdims: + out = out[(0,) * out.ndim] + else: + if not keepdims: + posaxis = ak._util.maybe_posaxis(out.layout, axis, 1) + out = out[(slice(None, None),) * posaxis + (0,)] + + return out @ak._connect.numpy.implements("var") diff --git a/tests/test_2020_reduce_axis_none.py b/tests/test_2020_reduce_axis_none.py index 911dd5d688..9a07491e1a 100644 --- a/tests/test_2020_reduce_axis_none.py +++ b/tests/test_2020_reduce_axis_none.py @@ -128,7 +128,6 @@ def test_std(): assert np.isnan(ak.std(array[2], axis=None, mask_identity=False)) -@pytest.mark.xfail(reason="fix mask_identity=False") def test_std_no_mask_axis_none(): assert ak._util.arrays_approx_equal( ak.std(array[-1:], axis=None, keepdims=True, mask_identity=True), @@ -150,7 +149,6 @@ def test_var(): assert np.isnan(ak.var(array[2], axis=None, mask_identity=False)) -@pytest.mark.xfail(reason="fix mask_identity=False") def test_var_no_mask_axis_none(): assert ak._util.arrays_approx_equal( ak.var(array[-1:], axis=None, keepdims=True, mask_identity=True), @@ -172,7 +170,6 @@ def test_mean(): assert np.isnan(ak.mean(array[2], axis=None, mask_identity=False)) -@pytest.mark.xfail(reason="fix mask_identity=False") def test_mean_no_mask_axis_none(): assert ak._util.arrays_approx_equal( ak.mean(array[-1:], axis=None, keepdims=True, mask_identity=True), @@ -191,10 +188,9 @@ def test_ptp(): ak.ptp(array, axis=None, keepdims=True, mask_identity=True), ak.Array([10.0]).mask[[True]], ) - assert np.isinf(ak.ptp(array[2], axis=None, mask_identity=False)) + assert ak.ptp(array[2], axis=None, mask_identity=False) == pytest.approx(0.0) -@pytest.mark.xfail(reason="fix mask_identity=False") def test_ptp_no_mask_axis_none(): assert ak._util.arrays_approx_equal( ak.ptp(array[-1:], axis=None, keepdims=True, mask_identity=True),