Skip to content
forked from pydata/xarray

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into vectorize-groupby-b…
Browse files Browse the repository at this point in the history
…inary

* upstream/main:
  Fix concat with scalar coordinate (pydata#6385)
  isel: convert IndexVariable to Variable if index is dropped (pydata#6388)
  fix dataset groupby combine dataarray func (pydata#6386)
  fix concat with variable or dataarray as dim (pydata#6387)
  pydata#6367 Fix for time units checking could produce "unhashable type" error (pydata#6368)
  • Loading branch information
dcherian committed Mar 21, 2022
2 parents bae15d5 + 83f238a commit f0e0f92
Show file tree
Hide file tree
Showing 9 changed files with 70 additions and 6 deletions.
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ Bug fixes
- Many bugs fixed by the explicit indexes refactor, mainly related to multi-index (virtual)
coordinates. See the corresponding pull-request on GitHub for more details. (:pull:`5692`).
By `Benoît Bovy <https://github.com/benbovy>`_.
- Fixed "unhashable type" error trying to read NetCDF file with variable having its 'units'
attribute not ``str`` (e.g. ``numpy.ndarray``) (:issue:`6368`). By `Oleh Khoma <https://github.com/okhoma>`_.

Documentation
~~~~~~~~~~~~~
Expand Down
3 changes: 2 additions & 1 deletion xarray/coding/times.py
Original file line number Diff line number Diff line change
Expand Up @@ -695,7 +695,8 @@ def encode(self, variable, name=None):
def decode(self, variable, name=None):
dims, data, attrs, encoding = unpack_for_decoding(variable)

if "units" in attrs and attrs["units"] in TIME_UNITS:
units = attrs.get("units")
if isinstance(units, str) and units in TIME_UNITS:
units = pop_to(attrs, encoding, "units")
transform = partial(decode_cf_timedelta, units=units)
dtype = np.dtype("timedelta64[ns]")
Expand Down
16 changes: 14 additions & 2 deletions xarray/core/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,7 @@ def _dataset_concat(
"""
Concatenate a sequence of datasets along a new or existing dimension
"""
from .dataarray import DataArray
from .dataset import Dataset

datasets = list(datasets)
Expand All @@ -438,6 +439,13 @@ def _dataset_concat(
"The elements in the input list need to be either all 'Dataset's or all 'DataArray's"
)

if isinstance(dim, DataArray):
dim_var = dim.variable
elif isinstance(dim, Variable):
dim_var = dim
else:
dim_var = None

dim, index = _calc_concat_dim_index(dim)

# Make sure we're working on a copy (we'll be loading variables)
Expand Down Expand Up @@ -524,7 +532,7 @@ def get_indexes(name):
elif name == dim:
var = ds._variables[name]
if not var.dims:
yield PandasIndex([var.values], dim)
yield PandasIndex([var.values.item()], dim)

# stack up each variable and/or index to fill-out the dataset (in order)
# n.b. this loop preserves variable order, needed for groupby.
Expand Down Expand Up @@ -582,7 +590,11 @@ def get_indexes(name):

if index is not None:
# add concat index / coordinate last to ensure that its in the final Dataset
result[dim] = index.create_variables()[dim]
if dim_var is not None:
index_vars = index.create_variables({dim: dim_var})
else:
index_vars = index.create_variables()
result[dim] = index_vars[dim]
result_indexes[dim] = index

# TODO: add indexes at Dataset creation (when it is supported)
Expand Down
2 changes: 2 additions & 0 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2262,6 +2262,8 @@ def _isel_fancy(
new_var = var.isel(indexers=var_indexers)
else:
new_var = var.copy(deep=False)
if name not in indexes:
new_var = new_var.to_base_variable()
variables[name] = new_var

coord_names = self._coord_names & variables.keys()
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1041,7 +1041,7 @@ def _combine(self, applied):
if coord is not None and dim not in applied_example.dims:
index, index_vars = create_default_index_implicit(coord)
indexes = {k: index for k in index_vars}
combined = combined._overwrite_indexes(indexes, variables=index_vars)
combined = combined._overwrite_indexes(indexes, index_vars)
combined = self._maybe_restore_empty_groups(combined)
combined = self._maybe_unstack(combined)
return combined
Expand Down
27 changes: 25 additions & 2 deletions xarray/tests/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,22 @@ def test_concat_promote_shape(self) -> None:
expected = Dataset({"z": (("x", "y"), [[-1], [1]])}, {"x": [0, 1], "y": [0]})
assert_identical(actual, expected)

# regression GH6384
objs = [
Dataset({}, {"x": pd.Interval(-1, 0, closed="right")}),
Dataset({"x": [pd.Interval(0, 1, closed="right")]}),
]
actual = concat(objs, "x")
expected = Dataset(
{
"x": [
pd.Interval(-1, 0, closed="right"),
pd.Interval(0, 1, closed="right"),
]
}
)
assert_identical(actual, expected)

def test_concat_do_not_promote(self) -> None:
# GH438
objs = [
Expand All @@ -459,8 +475,15 @@ def test_concat_do_not_promote(self) -> None:

def test_concat_dim_is_variable(self) -> None:
objs = [Dataset({"x": 0}), Dataset({"x": 1})]
coord = Variable("y", [3, 4])
expected = Dataset({"x": ("y", [0, 1]), "y": [3, 4]})
coord = Variable("y", [3, 4], attrs={"foo": "bar"})
expected = Dataset({"x": ("y", [0, 1]), "y": coord})
actual = concat(objs, coord)
assert_identical(actual, expected)

def test_concat_dim_is_dataarray(self) -> None:
objs = [Dataset({"x": 0}), Dataset({"x": 1})]
coord = DataArray([3, 4], dims="y", attrs={"foo": "bar"})
expected = Dataset({"x": ("y", [0, 1]), "y": coord})
actual = concat(objs, coord)
assert_identical(actual, expected)

Expand Down
7 changes: 7 additions & 0 deletions xarray/tests/test_conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,3 +416,10 @@ def test_encoding_kwarg(self) -> None:
def test_encoding_kwarg_fixed_width_string(self) -> None:
# CFEncodedInMemoryStore doesn't support explicit string encodings.
pass


class TestDecodeCFVariableWithArrayUnits:
def test_decode_cf_variable_with_array_units(self) -> None:
v = Variable(["t"], [1, 2, 3], {"units": np.array(["foobar"], dtype=object)})
v_decoded = conventions.decode_cf_variable("test2", v)
assert_identical(v, v_decoded)
9 changes: 9 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1262,6 +1262,15 @@ def test_isel_dataarray(self):
with pytest.raises(IndexError, match=r"dimension coordinate 'dim2'"):
actual = data.isel(dim2=indexing_ds["dim2"])

def test_isel_fancy_convert_index_variable(self) -> None:
# select index variable "x" with a DataArray of dim "z"
# -> drop index and convert index variable to base variable
ds = xr.Dataset({"foo": ("x", [1, 2, 3])}, coords={"x": [0, 1, 2]})
idxr = xr.DataArray([1], dims="z", name="x")
actual = ds.isel(x=idxr)
assert "x" not in actual.xindexes
assert not isinstance(actual.x.variable, IndexVariable)

def test_sel(self):
data = create_test_data()
int_slicers = {"dim1": slice(None, None, 2), "dim2": slice(2), "dim3": slice(3)}
Expand Down
8 changes: 8 additions & 0 deletions xarray/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -945,6 +945,14 @@ def test_groupby_dataset_assign():
assert_identical(actual, expected)


def test_groupby_dataset_map_dataarray_func():
# regression GH6379
ds = xr.Dataset({"foo": ("x", [1, 2, 3, 4])}, coords={"x": [0, 0, 1, 1]})
actual = ds.groupby("x").map(lambda grp: grp.foo.mean())
expected = xr.DataArray([1.5, 3.5], coords={"x": [0, 1]}, dims="x", name="foo")
assert_identical(actual, expected)


class TestDataArrayGroupBy:
@pytest.fixture(autouse=True)
def setup(self):
Expand Down

0 comments on commit f0e0f92

Please sign in to comment.