Skip to content

Commit

Permalink
Add quantile method to GroupBy (#2828)
Browse files Browse the repository at this point in the history
* implement groupby.quantile + tests

* added quantile method in whats-new

* mark additional test as xfail.

* lint fix

* simpler version of groupby.quantile

* added quantile methods to api.rst

* included DEFAULT_DIMS handling in quantile method

* clarified groupby tests

* added test with more typical use case

* pep8

* removed failing test
  • Loading branch information
huard authored and shoyer committed Jun 24, 2019
1 parent cfd8210 commit b054c31
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 3 deletions.
3 changes: 2 additions & 1 deletion doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ Computation
:py:attr:`~core.groupby.DatasetGroupBy.last`
:py:attr:`~core.groupby.DatasetGroupBy.fillna`
:py:attr:`~core.groupby.DatasetGroupBy.where`
:py:attr:`~core.groupby.DatasetGroupBy.quantile`

Reshaping and reorganizing
--------------------------
Expand Down Expand Up @@ -362,7 +363,7 @@ Computation
:py:attr:`~core.groupby.DataArrayGroupBy.last`
:py:attr:`~core.groupby.DataArrayGroupBy.fillna`
:py:attr:`~core.groupby.DataArrayGroupBy.where`

:py:attr:`~core.groupby.DataArrayGroupBy.quantile`

Reshaping and reorganizing
--------------------------
Expand Down
5 changes: 3 additions & 2 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ v0.12.2 (unreleased)
Enhancements
~~~~~~~~~~~~


- New :py:meth:`~xarray.GroupBy.quantile` method. (:issue:`3018`)
By `David Huard <https://github.com/huard>`_.
- Add ``keepdims`` argument for reduce operations (:issue:`2170`)
By `Scott Wales <https://github.com/ScottWales>`_.
- netCDF chunksizes are now only dropped when original_shape is different,
Expand Down Expand Up @@ -90,7 +91,7 @@ Bug fixes
By `Maximilian Roos <https://github.com/max-sixty>`_.
- Fixed performance issues with cftime installed (:issue:`3000`)
By `0x0L <https://github.com/0x0L>`_.
- Replace incorrect usages of `message` in pytest assertions
- Replace incorrect usages of `message` in pytest assertions
with `match` (:issue:`3011`)
By `Maximilian Roos <https://github.com/max-sixty>`_.
- Add explicit pytest markers, now required by pytest
Expand Down
58 changes: 58 additions & 0 deletions xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,64 @@ def _combine(self, applied, restore_coord_dims=False, shortcut=False):
combined = self._maybe_unstack(combined)
return combined

def quantile(self, q, dim=None, interpolation='linear', keep_attrs=None):
"""Compute the qth quantile over each array in the groups and
concatenate them together into a new array.
Parameters
----------
q : float in range of [0,1] (or sequence of floats)
Quantile to compute, which must be between 0 and 1
inclusive.
dim : str or sequence of str, optional
Dimension(s) over which to apply quantile.
Defaults to the grouped dimension.
interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
This optional parameter specifies the interpolation method to
use when the desired quantile lies between two data points
``i < j``:
* linear: ``i + (j - i) * fraction``, where ``fraction`` is
the fractional part of the index surrounded by ``i`` and
``j``.
* lower: ``i``.
* higher: ``j``.
* nearest: ``i`` or ``j``, whichever is nearest.
* midpoint: ``(i + j) / 2``.
Returns
-------
quantiles : Variable
If `q` is a single quantile, then the result
is a scalar. If multiple percentiles are given, first axis of
the result corresponds to the quantile and a quantile dimension
is added to the return array. The other dimensions are the
dimensions that remain after the reduction of the array.
See Also
--------
numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile,
DataArray.quantile
"""
if dim == DEFAULT_DIMS:
dim = ALL_DIMS
# TODO change this to dim = self._group_dim after
# the deprecation process
if self._obj.ndim > 1:
warnings.warn(
"Default reduction dimension will be changed to the "
"grouped dimension in a future version of xarray. To "
"silence this warning, pass dim=xarray.ALL_DIMS "
"explicitly.",
FutureWarning, stacklevel=2)

out = self.apply(self._obj.__class__.quantile, shortcut=False,
q=q, dim=dim, interpolation=interpolation,
keep_attrs=keep_attrs)

if np.asarray(q, dtype=np.float64).ndim == 0:
out = out.drop('quantile')
return out

def reduce(self, func, dim=None, axis=None, keep_attrs=None,
shortcut=True, **kwargs):
"""Reduce the items in this group by applying `func` along some
Expand Down
60 changes: 60 additions & 0 deletions xarray/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,64 @@ def func(arg1, arg2, arg3=0):
assert_identical(expected, actual)


def test_da_groupby_quantile():

array = xr.DataArray([1, 2, 3, 4, 5, 6],
[('x', [1, 1, 1, 2, 2, 2])])

# Scalar quantile
expected = xr.DataArray([2, 5], [('x', [1, 2])])
actual = array.groupby('x').quantile(.5)
assert_identical(expected, actual)

# Vector quantile
expected = xr.DataArray([[1, 3], [4, 6]],
[('x', [1, 2]), ('quantile', [0, 1])])
actual = array.groupby('x').quantile([0, 1])
assert_identical(expected, actual)

# Multiple dimensions
array = xr.DataArray([[1, 11, 26], [2, 12, 22], [3, 13, 23],
[4, 16, 24], [5, 15, 25]],
[('x', [1, 1, 1, 2, 2],),
('y', [0, 0, 1])])

actual_x = array.groupby('x').quantile(0)
expected_x = xr.DataArray([1, 4],
[('x', [1, 2]), ])
assert_identical(expected_x, actual_x)

actual_y = array.groupby('y').quantile(0)
expected_y = xr.DataArray([1, 22],
[('y', [0, 1]), ])
assert_identical(expected_y, actual_y)

actual_xx = array.groupby('x').quantile(0, dim='x')
expected_xx = xr.DataArray([[1, 11, 22], [4, 15, 24]],
[('x', [1, 2]), ('y', [0, 0, 1])])
assert_identical(expected_xx, actual_xx)

actual_yy = array.groupby('y').quantile(0, dim='y')
expected_yy = xr.DataArray([[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]],
[('x', [1, 1, 1, 2, 2]), ('y', [0, 1])])
assert_identical(expected_yy, actual_yy)

times = pd.date_range('2000-01-01', periods=365)
x = [0, 1]
foo = xr.DataArray(np.reshape(np.arange(365 * 2), (365, 2)),
coords=dict(time=times, x=x), dims=('time', 'x'))
g = foo.groupby(foo.time.dt.month)

actual = g.quantile(0)
expected = xr.DataArray([0., 62., 120., 182., 242., 304.,
364., 426., 488., 548., 610., 670.],
[('month', np.arange(1, 13))])
assert_identical(expected, actual)

actual = g.quantile(0, dim='time')[:2]
expected = xr.DataArray([[0., 1], [62., 63]],
[('month', [1, 2]), ('x', [0, 1])])
assert_identical(expected, actual)


# TODO: move other groupby tests from test_dataset and test_dataarray over here

0 comments on commit b054c31

Please sign in to comment.