From da57cafe807113dcef1cddd856dd8031ab8ba61f Mon Sep 17 00:00:00 2001 From: Keewis Date: Wed, 13 Nov 2019 23:33:27 +0100 Subject: [PATCH 1/5] move the implementation of DataArrayGroupBy.quantile to GroupBy --- xarray/core/groupby.py | 108 ++++++++++++++++++++--------------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index c73ee3cf7c5..974d4671814 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -557,6 +557,60 @@ def fillna(self, value): out = ops.fillna(self, value) return out + def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): + """Compute the qth quantile over each array in the groups and + concatenate them together into a new array. + + Parameters + ---------- + q : float in range of [0,1] (or sequence of floats) + Quantile to compute, which must be between 0 and 1 + inclusive. + dim : `...`, str or sequence of str, optional + Dimension(s) over which to apply quantile. + Defaults to the grouped dimension. + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} + This optional parameter specifies the interpolation method to + use when the desired quantile lies between two data points + ``i < j``: + * linear: ``i + (j - i) * fraction``, where ``fraction`` is + the fractional part of the index surrounded by ``i`` and + ``j``. + * lower: ``i``. + * higher: ``j``. + * nearest: ``i`` or ``j``, whichever is nearest. + * midpoint: ``(i + j) / 2``. + + Returns + ------- + quantiles : Variable + If `q` is a single quantile, then the result + is a scalar. If multiple percentiles are given, first axis of + the result corresponds to the quantile and a quantile dimension + is added to the return array. The other dimensions are the + dimensions that remain after the reduction of the array. + + See Also + -------- + numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile, + DataArray.quantile + """ + if dim is None: + dim = self._group_dim + + out = self.map( + self._obj.__class__.quantile, + shortcut=False, + q=q, + dim=dim, + interpolation=interpolation, + keep_attrs=keep_attrs, + ) + + if np.asarray(q, dtype=np.float64).ndim == 0: + out = out.drop_vars("quantile") + return out + def where(self, cond, other=dtypes.NA): """Return elements from `self` or `other` depending on `cond`. @@ -737,60 +791,6 @@ def _combine(self, applied, restore_coord_dims=False, shortcut=False): combined = self._maybe_unstack(combined) return combined - def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): - """Compute the qth quantile over each array in the groups and - concatenate them together into a new array. - - Parameters - ---------- - q : float in range of [0,1] (or sequence of floats) - Quantile to compute, which must be between 0 and 1 - inclusive. - dim : `...`, str or sequence of str, optional - Dimension(s) over which to apply quantile. - Defaults to the grouped dimension. - interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} - This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points - ``i < j``: - * linear: ``i + (j - i) * fraction``, where ``fraction`` is - the fractional part of the index surrounded by ``i`` and - ``j``. - * lower: ``i``. - * higher: ``j``. - * nearest: ``i`` or ``j``, whichever is nearest. - * midpoint: ``(i + j) / 2``. - - Returns - ------- - quantiles : Variable - If `q` is a single quantile, then the result - is a scalar. If multiple percentiles are given, first axis of - the result corresponds to the quantile and a quantile dimension - is added to the return array. The other dimensions are the - dimensions that remain after the reduction of the array. - - See Also - -------- - numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile, - DataArray.quantile - """ - if dim is None: - dim = self._group_dim - - out = self.map( - self._obj.__class__.quantile, - shortcut=False, - q=q, - dim=dim, - interpolation=interpolation, - keep_attrs=keep_attrs, - ) - - if np.asarray(q, dtype=np.float64).ndim == 0: - out = out.drop_vars("quantile") - return out - def reduce( self, func, dim=None, axis=None, keep_attrs=None, shortcut=True, **kwargs ): From 5b2251421124957898be09f3f8b32dbf1cec1d40 Mon Sep 17 00:00:00 2001 From: Keewis Date: Wed, 13 Nov 2019 23:55:09 +0100 Subject: [PATCH 2/5] add tests for DatasetGroupBy --- xarray/tests/test_groupby.py | 92 ++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 581affa3471..5cd27525023 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -210,6 +210,98 @@ def test_da_groupby_quantile(): assert_identical(expected, actual) +def test_ds_groupby_quantile(): + ds = xr.Dataset( + data_vars={"a": ("x", [1, 2, 3, 4, 5, 6])}, coords={"x": [1, 1, 1, 2, 2, 2]} + ) + + # Scalar quantile + expected = xr.Dataset({"a": ("x", [2, 5])}, coords={"x": [1, 2]}) + actual = ds.groupby("x").quantile(0.5) + assert_identical(expected, actual) + + # Vector quantile + expected = xr.Dataset( + data_vars={"a": (("x", "quantile"), [[1, 3], [4, 6]])}, + coords={"x": [1, 2], "quantile": [0, 1]}, + ) + actual = ds.groupby("x").quantile([0, 1]) + assert_identical(expected, actual) + + # Multiple dimensions + ds = xr.Dataset( + data_vars={ + "a": ( + ("x", "y"), + [[1, 11, 26], [2, 12, 22], [3, 13, 23], [4, 16, 24], [5, 15, 25]], + ) + }, + coords={"x": [1, 1, 1, 2, 2], "y": [0, 0, 1]}, + ) + + actual_x = ds.groupby("x").quantile(0, dim=...) + expected_x = xr.Dataset({"a": ("x", [1, 4])}, coords={"x": [1, 2]}) + assert_identical(expected_x, actual_x) + + actual_y = ds.groupby("y").quantile(0, dim=...) + expected_y = xr.Dataset({"a": ("y", [1, 22])}, coords={"y": [0, 1]}) + assert_identical(expected_y, actual_y) + + actual_xx = ds.groupby("x").quantile(0) + expected_xx = xr.Dataset( + {"a": (("x", "y"), [[1, 11, 22], [4, 15, 24]])}, + coords={"x": [1, 2], "y": [0, 0, 1]}, + ) + assert_identical(expected_xx, actual_xx) + + actual_yy = ds.groupby("y").quantile(0) + expected_yy = xr.Dataset( + {"a": (("x", "y"), [[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]])}, + coords={"x": [1, 1, 1, 2, 2], "y": [0, 1]}, + ).transpose() + assert_identical(expected_yy, actual_yy) + + times = pd.date_range("2000-01-01", periods=365) + x = [0, 1] + foo = xr.Dataset( + {"a": (("time", "x"), np.reshape(np.arange(365 * 2), (365, 2)))}, + coords=dict(time=times, x=x), + ) + g = foo.groupby(foo.time.dt.month) + + actual = g.quantile(0, dim=...) + expected = xr.Dataset( + { + "a": ( + "month", + [ + 0.0, + 62.0, + 120.0, + 182.0, + 242.0, + 304.0, + 364.0, + 426.0, + 488.0, + 548.0, + 610.0, + 670.0, + ], + ) + }, + coords={"month": np.arange(1, 13)}, + ) + assert_identical(expected, actual) + + actual = g.quantile(0, dim="time").isel(month=slice(None, 2)) + expected = xr.Dataset( + data_vars={"a": (("month", "x"), [[0.0, 1], [62.0, 63]])}, + coords={"month": [1, 2], "x": [0, 1]}, + ) + assert_identical(expected, actual) + + def test_da_groupby_assign_coords(): actual = xr.DataArray( [[3, 4, 5], [6, 7, 8]], dims=["y", "x"], coords={"y": range(2), "x": range(3)} From dc4846f2cb1e45f50fd71703ed9576291f8fb569 Mon Sep 17 00:00:00 2001 From: Keewis Date: Thu, 14 Nov 2019 00:09:09 +0100 Subject: [PATCH 3/5] update whats-new.rst --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ea3b012cc98..ad3ee1c9b3c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -96,6 +96,8 @@ Bug fixes (:issue:`3402`). By `Deepak Cherian `_ - Allow appending datetime and bool data variables to zarr stores. (:issue:`3480`). By `Akihiro Matsukawa `_. +- Add the documented-but-missing :py:meth:`xarray.core.groupby.DatasetGroupBy.quantile`. + (:issue:`3525`, :pull:`3527`). By `Justus Magin `_. Documentation ~~~~~~~~~~~~~ From 5c441c9d8239e12bf7d2b833a470032adeae96f0 Mon Sep 17 00:00:00 2001 From: Keewis Date: Thu, 14 Nov 2019 00:22:30 +0100 Subject: [PATCH 4/5] move the item in whats-new.rst into New Features --- doc/whats-new.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ad3ee1c9b3c..ad430b1bcfc 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -76,6 +76,8 @@ New Features invoked. (:issue:`3378`, :pull:`3446`, :pull:`3515`) By `Deepak Cherian `_ and `Guido Imperiale `_. +- Add the documented-but-missing :py:meth:`xarray.core.groupby.DatasetGroupBy.quantile`. + (:issue:`3525`, :pull:`3527`). By `Justus Magin `_. Bug fixes ~~~~~~~~~ @@ -96,8 +98,6 @@ Bug fixes (:issue:`3402`). By `Deepak Cherian `_ - Allow appending datetime and bool data variables to zarr stores. (:issue:`3480`). By `Akihiro Matsukawa `_. -- Add the documented-but-missing :py:meth:`xarray.core.groupby.DatasetGroupBy.quantile`. - (:issue:`3525`, :pull:`3527`). By `Justus Magin `_. Documentation ~~~~~~~~~~~~~ From 5f9ca06ef6bd4fcae3605ad8a55ad9085471bd37 Mon Sep 17 00:00:00 2001 From: Keewis Date: Thu, 14 Nov 2019 17:21:56 +0100 Subject: [PATCH 5/5] don't drop scalar quantile coords --- xarray/core/groupby.py | 13 ++++---- xarray/tests/test_groupby.py | 65 ++++++++++++++++++++++++------------ 2 files changed, 50 insertions(+), 28 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 974d4671814..38ecc04534a 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -584,11 +584,12 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): Returns ------- quantiles : Variable - If `q` is a single quantile, then the result - is a scalar. If multiple percentiles are given, first axis of - the result corresponds to the quantile and a quantile dimension - is added to the return array. The other dimensions are the - dimensions that remain after the reduction of the array. + If `q` is a single quantile, then the result is a + scalar. If multiple percentiles are given, first axis of + the result corresponds to the quantile. In either case a + quantile dimension is added to the return array. The other + dimensions are the dimensions that remain after the + reduction of the array. See Also -------- @@ -607,8 +608,6 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): keep_attrs=keep_attrs, ) - if np.asarray(q, dtype=np.float64).ndim == 0: - out = out.drop_vars("quantile") return out def where(self, cond, other=dtypes.NA): diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 5cd27525023..97bd31ae050 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -137,42 +137,58 @@ def test_da_groupby_empty(): def test_da_groupby_quantile(): - array = xr.DataArray([1, 2, 3, 4, 5, 6], [("x", [1, 1, 1, 2, 2, 2])]) + array = xr.DataArray( + data=[1, 2, 3, 4, 5, 6], coords={"x": [1, 1, 1, 2, 2, 2]}, dims="x" + ) # Scalar quantile - expected = xr.DataArray([2, 5], [("x", [1, 2])]) + expected = xr.DataArray( + data=[2, 5], coords={"x": [1, 2], "quantile": 0.5}, dims="x" + ) actual = array.groupby("x").quantile(0.5) assert_identical(expected, actual) # Vector quantile - expected = xr.DataArray([[1, 3], [4, 6]], [("x", [1, 2]), ("quantile", [0, 1])]) + expected = xr.DataArray( + data=[[1, 3], [4, 6]], + coords={"x": [1, 2], "quantile": [0, 1]}, + dims=("x", "quantile"), + ) actual = array.groupby("x").quantile([0, 1]) assert_identical(expected, actual) # Multiple dimensions array = xr.DataArray( - [[1, 11, 26], [2, 12, 22], [3, 13, 23], [4, 16, 24], [5, 15, 25]], - [("x", [1, 1, 1, 2, 2]), ("y", [0, 0, 1])], + data=[[1, 11, 26], [2, 12, 22], [3, 13, 23], [4, 16, 24], [5, 15, 25]], + coords={"x": [1, 1, 1, 2, 2], "y": [0, 0, 1]}, + dims=("x", "y"), ) actual_x = array.groupby("x").quantile(0, dim=...) - expected_x = xr.DataArray([1, 4], [("x", [1, 2])]) + expected_x = xr.DataArray( + data=[1, 4], coords={"x": [1, 2], "quantile": 0}, dims="x" + ) assert_identical(expected_x, actual_x) actual_y = array.groupby("y").quantile(0, dim=...) - expected_y = xr.DataArray([1, 22], [("y", [0, 1])]) + expected_y = xr.DataArray( + data=[1, 22], coords={"y": [0, 1], "quantile": 0}, dims="y" + ) assert_identical(expected_y, actual_y) actual_xx = array.groupby("x").quantile(0) expected_xx = xr.DataArray( - [[1, 11, 22], [4, 15, 24]], [("x", [1, 2]), ("y", [0, 0, 1])] + data=[[1, 11, 22], [4, 15, 24]], + coords={"x": [1, 2], "y": [0, 0, 1], "quantile": 0}, + dims=("x", "y"), ) assert_identical(expected_xx, actual_xx) actual_yy = array.groupby("y").quantile(0) expected_yy = xr.DataArray( - [[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]], - [("x", [1, 1, 1, 2, 2]), ("y", [0, 1])], + data=[[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]], + coords={"x": [1, 1, 1, 2, 2], "y": [0, 1], "quantile": 0}, + dims=("x", "y"), ) assert_identical(expected_yy, actual_yy) @@ -180,14 +196,14 @@ def test_da_groupby_quantile(): x = [0, 1] foo = xr.DataArray( np.reshape(np.arange(365 * 2), (365, 2)), - coords=dict(time=times, x=x), + coords={"time": times, "x": x}, dims=("time", "x"), ) g = foo.groupby(foo.time.dt.month) actual = g.quantile(0, dim=...) expected = xr.DataArray( - [ + data=[ 0.0, 62.0, 120.0, @@ -201,12 +217,17 @@ def test_da_groupby_quantile(): 610.0, 670.0, ], - [("month", np.arange(1, 13))], + coords={"month": np.arange(1, 13), "quantile": 0}, + dims="month", ) assert_identical(expected, actual) actual = g.quantile(0, dim="time")[:2] - expected = xr.DataArray([[0.0, 1], [62.0, 63]], [("month", [1, 2]), ("x", [0, 1])]) + expected = xr.DataArray( + data=[[0.0, 1], [62.0, 63]], + coords={"month": [1, 2], "x": [0, 1], "quantile": 0}, + dims=("month", "x"), + ) assert_identical(expected, actual) @@ -216,7 +237,9 @@ def test_ds_groupby_quantile(): ) # Scalar quantile - expected = xr.Dataset({"a": ("x", [2, 5])}, coords={"x": [1, 2]}) + expected = xr.Dataset( + data_vars={"a": ("x", [2, 5])}, coords={"quantile": 0.5, "x": [1, 2]} + ) actual = ds.groupby("x").quantile(0.5) assert_identical(expected, actual) @@ -240,24 +263,24 @@ def test_ds_groupby_quantile(): ) actual_x = ds.groupby("x").quantile(0, dim=...) - expected_x = xr.Dataset({"a": ("x", [1, 4])}, coords={"x": [1, 2]}) + expected_x = xr.Dataset({"a": ("x", [1, 4])}, coords={"x": [1, 2], "quantile": 0}) assert_identical(expected_x, actual_x) actual_y = ds.groupby("y").quantile(0, dim=...) - expected_y = xr.Dataset({"a": ("y", [1, 22])}, coords={"y": [0, 1]}) + expected_y = xr.Dataset({"a": ("y", [1, 22])}, coords={"y": [0, 1], "quantile": 0}) assert_identical(expected_y, actual_y) actual_xx = ds.groupby("x").quantile(0) expected_xx = xr.Dataset( {"a": (("x", "y"), [[1, 11, 22], [4, 15, 24]])}, - coords={"x": [1, 2], "y": [0, 0, 1]}, + coords={"x": [1, 2], "y": [0, 0, 1], "quantile": 0}, ) assert_identical(expected_xx, actual_xx) actual_yy = ds.groupby("y").quantile(0) expected_yy = xr.Dataset( {"a": (("x", "y"), [[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]])}, - coords={"x": [1, 1, 1, 2, 2], "y": [0, 1]}, + coords={"x": [1, 1, 1, 2, 2], "y": [0, 1], "quantile": 0}, ).transpose() assert_identical(expected_yy, actual_yy) @@ -290,14 +313,14 @@ def test_ds_groupby_quantile(): ], ) }, - coords={"month": np.arange(1, 13)}, + coords={"month": np.arange(1, 13), "quantile": 0}, ) assert_identical(expected, actual) actual = g.quantile(0, dim="time").isel(month=slice(None, 2)) expected = xr.Dataset( data_vars={"a": (("month", "x"), [[0.0, 1], [62.0, 63]])}, - coords={"month": [1, 2], "x": [0, 1]}, + coords={"month": [1, 2], "x": [0, 1], "quantile": 0}, ) assert_identical(expected, actual)