Skip to content

Commit

Permalink
Coarsen keep attrs 3376 (pydata#3801)
Browse files Browse the repository at this point in the history
* Add test of DataWithCoords.coarsen() for pydata#3376

* Add test of Variable.coarsen() for pydata#3376

* Add keep_attrs kwarg to DataWithCoords.coarsen() for pydata#3376

* Style and spelling fixes (pydata#3376)

* Fix test_coarsen_keep_attrs by removing self from input

* Pass keep_attrs through to _coarsen_cls and _rolling_cls returns (pydata#3376)

* Move keyword from coarsen to mean in test_coarsen_keep_attrs

* Start handling keep_attrs in rolling class constructors (pydata#3376)

* Update Coarsen constructor and DatasetCoarsen class method (GH3376)

Assign keep_attrs keyword value to Coarsen objects in constructor
Add conditional inside _reduce_method.wrapped_func branching on self.keep_attrs and pass back to returned Dataset

* Incorporate code review from @max-sixty

* Fix Dataset.coarsen and Variable.coarsen for GH3376

Handle global keep_attrs setting inside Variable._coarsen_reshape

Pass attrs through consistently inside DatasetCoarsen._reduce_method

Don't pass Variable.coarsen a keyword argument it doesn't expect inside DataArrayCoarsen._reduce_method

* Update tests for GH3376

* Incorporate review changes to test_dataset for GH3376

Remove commented-out test from test_coarsen_keep_attrs

Add test_rolling_keep_attrs

* Change Rolling._dataset_implementation for GH3376

Return a Dataset object that results in test_rolling_keep_attrs Passing

* style fixes

* Remove duplicate variable assignment and document change (GH3776)
  • Loading branch information
amcnicho authored Mar 2, 2020
1 parent b155853 commit 1c5e1cd
Show file tree
Hide file tree
Showing 6 changed files with 165 additions and 17 deletions.
5 changes: 5 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ Bug fixes
- xarray now respects the over, under and bad colors if set on a provided colormap.
(:issue:`3590`, :pull:`3601`)
By `johnomotani <https://github.com/johnomotani>`_.
- :py:func:`coarsen` now respects ``xr.set_options(keep_attrs=True)``
to preserve attributes. :py:meth:`Dataset.coarsen` accepts a keyword
argument ``keep_attrs`` to change this setting. (:issue:`3376`,
:pull:`3801`) By `Andrew Thomas <https://github.com/amcnicho>`_.

- Fix :py:meth:`xarray.core.dataset.Dataset.to_zarr` when using `append_dim` and `group`
simultaneously. (:issue:`3170`). By `Matthias Meyer <https://github.com/niowniow>`_.

Expand Down
29 changes: 26 additions & 3 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,6 +753,7 @@ def rolling(
dim: Mapping[Hashable, int] = None,
min_periods: int = None,
center: bool = False,
keep_attrs: bool = None,
**window_kwargs: int,
):
"""
Expand All @@ -769,6 +770,10 @@ def rolling(
setting min_periods equal to the size of the window.
center : boolean, default False
Set the labels at the center of the window.
keep_attrs : bool, optional
If True, the object's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.
**window_kwargs : optional
The keyword arguments form of ``dim``.
One of dim or window_kwargs must be provided.
Expand Down Expand Up @@ -810,8 +815,13 @@ def rolling(
core.rolling.DataArrayRolling
core.rolling.DatasetRolling
"""
if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)

dim = either_dict_or_kwargs(dim, window_kwargs, "rolling")
return self._rolling_cls(self, dim, min_periods=min_periods, center=center)
return self._rolling_cls(
self, dim, min_periods=min_periods, center=center, keep_attrs=keep_attrs
)

def rolling_exp(
self,
Expand Down Expand Up @@ -859,6 +869,7 @@ def coarsen(
boundary: str = "exact",
side: Union[str, Mapping[Hashable, str]] = "left",
coord_func: str = "mean",
keep_attrs: bool = None,
**window_kwargs: int,
):
"""
Expand All @@ -879,8 +890,12 @@ def coarsen(
multiple of the window size. If 'trim', the excess entries are
dropped. If 'pad', NA will be padded.
side : 'left' or 'right' or mapping from dimension to 'left' or 'right'
coord_func : function (name) that is applied to the coordintes,
coord_func : function (name) that is applied to the coordinates,
or a mapping from coordinate name to function (name).
keep_attrs : bool, optional
If True, the object's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.
Returns
-------
Expand Down Expand Up @@ -915,9 +930,17 @@ def coarsen(
core.rolling.DataArrayCoarsen
core.rolling.DatasetCoarsen
"""
if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)

dim = either_dict_or_kwargs(dim, window_kwargs, "coarsen")
return self._coarsen_cls(
self, dim, boundary=boundary, side=side, coord_func=coord_func
self,
dim,
boundary=boundary,
side=side,
coord_func=coord_func,
keep_attrs=keep_attrs,
)

def resample(
Expand Down
67 changes: 54 additions & 13 deletions xarray/core/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from . import dtypes, duck_array_ops, utils
from .dask_array_ops import dask_rolling_wrapper
from .ops import inject_reduce_methods
from .options import _get_keep_attrs
from .pycompat import dask_array_type

try:
Expand Down Expand Up @@ -42,10 +43,10 @@ class Rolling:
DataArray.rolling
"""

__slots__ = ("obj", "window", "min_periods", "center", "dim")
_attributes = ("window", "min_periods", "center", "dim")
__slots__ = ("obj", "window", "min_periods", "center", "dim", "keep_attrs")
_attributes = ("window", "min_periods", "center", "dim", "keep_attrs")

def __init__(self, obj, windows, min_periods=None, center=False):
def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None):
"""
Moving window object.
Expand All @@ -65,6 +66,10 @@ def __init__(self, obj, windows, min_periods=None, center=False):
setting min_periods equal to the size of the window.
center : boolean, default False
Set the labels at the center of the window.
keep_attrs : bool, optional
If True, the object's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.
Returns
-------
Expand All @@ -89,6 +94,10 @@ def __init__(self, obj, windows, min_periods=None, center=False):
self.center = center
self.dim = dim

if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)
self.keep_attrs = keep_attrs

@property
def _min_periods(self):
return self.min_periods if self.min_periods is not None else self.window
Expand Down Expand Up @@ -143,7 +152,7 @@ def count(self):
class DataArrayRolling(Rolling):
__slots__ = ("window_labels",)

def __init__(self, obj, windows, min_periods=None, center=False):
def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None):
"""
Moving window object for DataArray.
You should use DataArray.rolling() method to construct this object
Expand All @@ -165,6 +174,10 @@ def __init__(self, obj, windows, min_periods=None, center=False):
setting min_periods equal to the size of the window.
center : boolean, default False
Set the labels at the center of the window.
keep_attrs : bool, optional
If True, the object's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.
Returns
-------
Expand All @@ -177,7 +190,11 @@ def __init__(self, obj, windows, min_periods=None, center=False):
Dataset.rolling
Dataset.groupby
"""
super().__init__(obj, windows, min_periods=min_periods, center=center)
if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)
super().__init__(
obj, windows, min_periods=min_periods, center=center, keep_attrs=keep_attrs
)

self.window_labels = self.obj[self.dim]

Expand Down Expand Up @@ -374,7 +391,7 @@ def _numpy_or_bottleneck_reduce(
class DatasetRolling(Rolling):
__slots__ = ("rollings",)

def __init__(self, obj, windows, min_periods=None, center=False):
def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None):
"""
Moving window object for Dataset.
You should use Dataset.rolling() method to construct this object
Expand All @@ -396,6 +413,10 @@ def __init__(self, obj, windows, min_periods=None, center=False):
setting min_periods equal to the size of the window.
center : boolean, default False
Set the labels at the center of the window.
keep_attrs : bool, optional
If True, the object's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.
Returns
-------
Expand All @@ -408,15 +429,17 @@ def __init__(self, obj, windows, min_periods=None, center=False):
Dataset.groupby
DataArray.groupby
"""
super().__init__(obj, windows, min_periods, center)
super().__init__(obj, windows, min_periods, center, keep_attrs)
if self.dim not in self.obj.dims:
raise KeyError(self.dim)
# Keep each Rolling object as a dictionary
self.rollings = {}
for key, da in self.obj.data_vars.items():
# keeps rollings only for the dataset depending on slf.dim
if self.dim in da.dims:
self.rollings[key] = DataArrayRolling(da, windows, min_periods, center)
self.rollings[key] = DataArrayRolling(
da, windows, min_periods, center, keep_attrs
)

def _dataset_implementation(self, func, **kwargs):
from .dataset import Dataset
Expand All @@ -427,7 +450,8 @@ def _dataset_implementation(self, func, **kwargs):
reduced[key] = func(self.rollings[key], **kwargs)
else:
reduced[key] = self.obj[key]
return Dataset(reduced, coords=self.obj.coords)
attrs = self.obj.attrs if self.keep_attrs else {}
return Dataset(reduced, coords=self.obj.coords, attrs=attrs)

def reduce(self, func, **kwargs):
"""Reduce the items in this group by applying `func` along some
Expand Down Expand Up @@ -466,7 +490,7 @@ def _numpy_or_bottleneck_reduce(
**kwargs,
)

def construct(self, window_dim, stride=1, fill_value=dtypes.NA):
def construct(self, window_dim, stride=1, fill_value=dtypes.NA, keep_attrs=None):
"""
Convert this rolling object to xr.Dataset,
where the window dimension is stacked as a new dimension
Expand All @@ -487,6 +511,9 @@ def construct(self, window_dim, stride=1, fill_value=dtypes.NA):

from .dataset import Dataset

if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=True)

dataset = {}
for key, da in self.obj.data_vars.items():
if self.dim in da.dims:
Expand All @@ -509,10 +536,18 @@ class Coarsen:
DataArray.coarsen
"""

__slots__ = ("obj", "boundary", "coord_func", "windows", "side", "trim_excess")
__slots__ = (
"obj",
"boundary",
"coord_func",
"windows",
"side",
"trim_excess",
"keep_attrs",
)
_attributes = ("windows", "side", "trim_excess")

def __init__(self, obj, windows, boundary, side, coord_func):
def __init__(self, obj, windows, boundary, side, coord_func, keep_attrs):
"""
Moving window object.
Expand Down Expand Up @@ -541,6 +576,7 @@ def __init__(self, obj, windows, boundary, side, coord_func):
self.windows = windows
self.side = side
self.boundary = boundary
self.keep_attrs = keep_attrs

absent_dims = [dim for dim in windows.keys() if dim not in self.obj.dims]
if absent_dims:
Expand Down Expand Up @@ -626,6 +662,11 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool
def wrapped_func(self, **kwargs):
from .dataset import Dataset

if self.keep_attrs:
attrs = self.obj.attrs
else:
attrs = {}

reduced = {}
for key, da in self.obj.data_vars.items():
reduced[key] = da.variable.coarsen(
Expand All @@ -644,7 +685,7 @@ def wrapped_func(self, **kwargs):
)
else:
coords[c] = v.variable
return Dataset(reduced, coords=coords)
return Dataset(reduced, coords=coords, attrs=attrs)

return wrapped_func

Expand Down
3 changes: 3 additions & 0 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1949,6 +1949,9 @@ def _coarsen_reshape(self, windows, boundary, side):
else:
shape.append(variable.shape[i])

keep_attrs = _get_keep_attrs(default=False)
variable.attrs = variable._attrs if keep_attrs else {}

return variable.data.reshape(shape), tuple(axes)

@property
Expand Down
56 changes: 56 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5664,6 +5664,62 @@ def test_coarsen_coords_cftime():
np.testing.assert_array_equal(actual.time, expected_times)


def test_coarsen_keep_attrs():
_attrs = {"units": "test", "long_name": "testing"}

var1 = np.linspace(10, 15, 100)
var2 = np.linspace(5, 10, 100)
coords = np.linspace(1, 10, 100)

ds = Dataset(
data_vars={"var1": ("coord", var1), "var2": ("coord", var2)},
coords={"coord": coords},
attrs=_attrs,
)

# Test dropped attrs
dat = ds.coarsen(coord=5).mean()
assert dat.attrs == {}

# Test kept attrs using dataset keyword
dat = ds.coarsen(coord=5, keep_attrs=True).mean()
assert dat.attrs == _attrs

# Test kept attrs using global option
with set_options(keep_attrs=True):
dat = ds.coarsen(coord=5).mean()
assert dat.attrs == _attrs


def test_rolling_keep_attrs():
_attrs = {"units": "test", "long_name": "testing"}

var1 = np.linspace(10, 15, 100)
var2 = np.linspace(5, 10, 100)
coords = np.linspace(1, 10, 100)

ds = Dataset(
data_vars={"var1": ("coord", var1), "var2": ("coord", var2)},
coords={"coord": coords},
attrs=_attrs,
)

# Test dropped attrs
dat = ds.rolling(dim={"coord": 5}, min_periods=None, center=False).mean()
assert dat.attrs == {}

# Test kept attrs using dataset keyword
dat = ds.rolling(
dim={"coord": 5}, min_periods=None, center=False, keep_attrs=True
).mean()
assert dat.attrs == _attrs

# Test kept attrs using global option
with set_options(keep_attrs=True):
dat = ds.rolling(dim={"coord": 5}, min_periods=None, center=False).mean()
assert dat.attrs == _attrs


def test_rolling_properties(ds):
# catching invalid args
with pytest.raises(ValueError, match="exactly one dim/window should"):
Expand Down
22 changes: 21 additions & 1 deletion xarray/tests/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import pytz

from xarray import Coordinate, Dataset, IndexVariable, Variable, set_options
from xarray.core import dtypes, indexing
from xarray.core import dtypes, duck_array_ops, indexing
from xarray.core.common import full_like, ones_like, zeros_like
from xarray.core.indexing import (
BasicIndexer,
Expand Down Expand Up @@ -1879,6 +1879,26 @@ def test_coarsen_2d(self):
expected = self.cls(("x", "y"), [[10, 18], [42, 35]])
assert_equal(actual, expected)

# perhaps @pytest.mark.parametrize("operation", [f for f in duck_array_ops])
def test_coarsen_keep_attrs(self, operation="mean"):
_attrs = {"units": "test", "long_name": "testing"}

test_func = getattr(duck_array_ops, operation, None)

# Test dropped attrs
with set_options(keep_attrs=False):
new = Variable(["coord"], np.linspace(1, 10, 100), attrs=_attrs).coarsen(
windows={"coord": 1}, func=test_func, boundary="exact", side="left"
)
assert new.attrs == {}

# Test kept attrs
with set_options(keep_attrs=True):
new = Variable(["coord"], np.linspace(1, 10, 100), attrs=_attrs).coarsen(
windows={"coord": 1}, func=test_func, boundary="exact", side="left"
)
assert new.attrs == _attrs


@requires_dask
class TestVariableWithDask(VariableSubclassobjects):
Expand Down

0 comments on commit 1c5e1cd

Please sign in to comment.