From cd8f72747c58d13975392129dc3104f8267d3b39 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 4 Oct 2019 17:33:01 -0600 Subject: [PATCH 01/16] Respect user-specified coordinates attribute. --- doc/io.rst | 11 ++++++++++ xarray/conventions.py | 36 ++++++++++++++++++++------------ xarray/tests/test_backends.py | 11 ++++++++++ xarray/tests/test_conventions.py | 15 +++++++++++++ 4 files changed, 60 insertions(+), 13 deletions(-) diff --git a/doc/io.rst b/doc/io.rst index dab2a195e90..5c32898e0b4 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -440,6 +440,17 @@ supported by netCDF4-python: 'standard', 'gregorian', 'proleptic_gregorian' 'nol By default, xarray uses the 'proleptic_gregorian' calendar and units of the smallest time difference between values, with a reference time of the first time value. +Coordinates +........... + +You can control the ``coordinates`` attribute written to disk by specifying ``encoding["coordinates"]``. +If not specified, xarray automatically sets the ``"coordinates"`` attribute to a space-delimited list +of names of coordinate variables that share dimensions with the variable. +This allows perfect roundtripping of xarray datasets but may not be desirable. +When an xarray dataset contains non-dimensional coordinates that do not share dimensions with any of +the variables, these coordinate variable names are saved under a global ``"coordinates"`` attribute. +This is not CF-compliant but again facilitates roundtripping of xarray datasets. + Invalid netCDF files ~~~~~~~~~~~~~~~~~~~~ diff --git a/xarray/conventions.py b/xarray/conventions.py index a83b4b31c17..4c3c3be3d42 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -663,31 +663,41 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): global_coordinates.discard(coord_name) variables = {k: v.copy(deep=False) for k, v in variables.items()} - - # These coordinates are saved according to CF conventions - for var_name, coord_names in variable_coordinates.items(): - attrs = variables[var_name].attrs - if "coordinates" in attrs: + for name, var in variables.items(): + encoding = var.encoding + attrs = var.attrs + if "coordinates" in encoding and "coordinates" in attrs: raise ValueError( - "cannot serialize coordinates because variable " - "%s already has an attribute 'coordinates'" % var_name + f"'coordinates' found in both encoding and attrs for variable {name!r}. Please specify the 'coordinates' string in encoding." + ) + + if "coordinates" in encoding: + warnings.warn( + f"'coordinates' attribute detected on variable {name!r}. This may prevent faithful roundtripping of xarray Datasets.", + UserWarning, ) - attrs["coordinates"] = " ".join(map(str, coord_names)) + attrs["coordinates"] = encoding["coordinates"] + else: + if variable_coordinates[name]: + attrs["coordinates"] = " ".join(map(str, variable_coordinates[name])) # These coordinates are not associated with any particular variables, so we # save them under a global 'coordinates' attribute so xarray can roundtrip # the dataset faithfully. Because this serialization goes beyond CF # conventions, only do it if necessary. # Reference discussion: - # http://mailman.cgd.ucar.edu/pipermail/cf-metadata/2014/057771.html + # http://mailman.cgd.ucar.edu/pipermail/cf-metadata/2014/007571.html if global_coordinates: attributes = dict(attributes) if "coordinates" in attributes: - raise ValueError( - "cannot serialize coordinates because the global " - "attribute 'coordinates' already exists" + warnings.warn( + f"cannot serialize global coordinates {global_coordinates!r} because the global " + f"attribute 'coordinates' already exists. This may prevent faithful roundtripping" + f"of xarray datasets", + UserWarning, ) - attributes["coordinates"] = " ".join(map(str, global_coordinates)) + else: + attributes["coordinates"] = " ".join(map(str, global_coordinates)) return variables, attributes diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 4bdebe73050..8b6997c6534 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -810,6 +810,17 @@ def equals_latlon(obj): assert "coordinates" not in ds["lat"].attrs assert "coordinates" not in ds["lon"].attrs + original["temp"].encoding["coordinates"] = "lat" + with self.roundtrip(original) as actual: + assert_identical(actual, original) + with create_tmp_file() as tmp_file: + original.to_netcdf(tmp_file) + with open_dataset(tmp_file, decode_coords=False) as ds: + assert equals_latlon(ds.precip.attrs["coordinates"]) + assert "lon" not in ds["temp"].attrs["coordinates"] + assert "coordinates" not in ds["lat"].attrs + assert "coordinates" not in ds["lon"].attrs + def test_roundtrip_endian(self): ds = Dataset( { diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 09002e252b4..d60c8a2bd7e 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -136,6 +136,21 @@ def test_multidimensional_coordinates(self): # Should not have any global coordinates. assert "coordinates" not in attrs + def test_do_not_overwrite_user_coordinates(self): + orig = Dataset( + coords={"x": [0, 1, 2], "y": ("x", [5, 6, 7]), "z": ("x", [8, 9, 10])}, + data_vars={"a": ("x", [1, 2, 3]), "b": ("x", [3, 5, 6])}, + ) + orig["a"].encoding["coordinates"] = "y" + orig["b"].encoding["coordinates"] = "z" + with pytest.warns(UserWarning, match="'coordinates' attribute"): + enc, _ = conventions.encode_dataset_coordinates(orig) + assert enc["a"].attrs["coordinates"] == "y" + assert enc["b"].attrs["coordinates"] == "z" + orig["a"].attrs["coordinates"] = "foo" + with raises_regex(ValueError, "'coordinates' found in both"): + conventions.encode_dataset_coordinates(orig) + @requires_dask def test_string_object_warning(self): original = Variable(("x",), np.array(["foo", "bar"], dtype=object)).chunk() From 59a87f1b37fe1ed9c0d420ac69e3054a6cdf19ed Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 5 Nov 2019 08:46:59 -0700 Subject: [PATCH 02/16] Add whats-new --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c117382f23f..aade1100284 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -63,6 +63,8 @@ New Features invoked. (:issue:`3378`, :pull:`3446`) By `Deepak Cherian `_ and `Guido Imperiale `_. +- xarray now respects the ``encoding["coordinates"]`` attribute when writing to disk. (:issue:`3351`, :pull:`3487`) + By `Deepak Cherian `_. Bug fixes ~~~~~~~~~ From b80bf20a499deec12a848979d2a9b2430ab4573b Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 5 Nov 2019 08:48:46 -0700 Subject: [PATCH 03/16] Better if statement. --- xarray/conventions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index 4c3c3be3d42..321b5bab585 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -666,9 +666,9 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): for name, var in variables.items(): encoding = var.encoding attrs = var.attrs - if "coordinates" in encoding and "coordinates" in attrs: + if "coordinates" in attrs: raise ValueError( - f"'coordinates' found in both encoding and attrs for variable {name!r}. Please specify the 'coordinates' string in encoding." + f"'coordinates' found in attrs for variable {name!r}. Please specify the 'coordinates' string in encoding." ) if "coordinates" in encoding: From 4ca804d1f16883065127d442c22a97794e38a1d5 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 5 Nov 2019 08:50:43 -0700 Subject: [PATCH 04/16] maybe it's better to not raise an error if "coordinates" in attrs. --- xarray/conventions.py | 5 +++-- xarray/tests/test_conventions.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index 321b5bab585..bc4079e21db 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -667,8 +667,9 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): encoding = var.encoding attrs = var.attrs if "coordinates" in attrs: - raise ValueError( - f"'coordinates' found in attrs for variable {name!r}. Please specify the 'coordinates' string in encoding." + warnings.warn( + f"'coordinates' found in attrs for variable {name!r}. This will be ignored. Instead please specify the 'coordinates' string in encoding.", + UserWarning, ) if "coordinates" in encoding: diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index d60c8a2bd7e..56740444123 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -148,7 +148,7 @@ def test_do_not_overwrite_user_coordinates(self): assert enc["a"].attrs["coordinates"] == "y" assert enc["b"].attrs["coordinates"] == "z" orig["a"].attrs["coordinates"] = "foo" - with raises_regex(ValueError, "'coordinates' found in both"): + with pytest.warns(UserWarning, match="'coordinates' found in attrs"): conventions.encode_dataset_coordinates(orig) @requires_dask From 7a3cdd77885564f9ee0caf65b268c275a1d8493d Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 5 Nov 2019 09:14:58 -0700 Subject: [PATCH 05/16] tweak whats-new. --- doc/io.rst | 9 ++++++--- doc/whats-new.rst | 23 ++++++++++++----------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/doc/io.rst b/doc/io.rst index 5c32898e0b4..377518f9784 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -440,15 +440,18 @@ supported by netCDF4-python: 'standard', 'gregorian', 'proleptic_gregorian' 'nol By default, xarray uses the 'proleptic_gregorian' calendar and units of the smallest time difference between values, with a reference time of the first time value. + +.. _io.coordinates: + Coordinates ........... -You can control the ``coordinates`` attribute written to disk by specifying ``encoding["coordinates"]``. -If not specified, xarray automatically sets the ``"coordinates"`` attribute to a space-delimited list +You can control the ``coordinates`` attribute written to disk by specifying ``DataArray.encoding["coordinates"]``. +If not specified, xarray automatically sets ``DataArray.encoding["coordinates"]`` to a space-delimited list of names of coordinate variables that share dimensions with the variable. This allows perfect roundtripping of xarray datasets but may not be desirable. When an xarray dataset contains non-dimensional coordinates that do not share dimensions with any of -the variables, these coordinate variable names are saved under a global ``"coordinates"`` attribute. +the variables, these coordinate variable names are saved under a "global" ``"coordinates"`` attribute. This is not CF-compliant but again facilitates roundtripping of xarray datasets. Invalid netCDF files diff --git a/doc/whats-new.rst b/doc/whats-new.rst index aade1100284..7d9af545cce 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -30,25 +30,25 @@ Breaking changes (`cftime/126 `_); please use version 1.0.4.2 instead. -- All leftover support for dates from non-standard calendars through netcdftime, the +- All leftover support for dates from non-standard calendars through ``netcdftime``, the module included in versions of netCDF4 prior to 1.4 that eventually became the cftime package, has been removed in favor of relying solely on the standalone - cftime package (:pull:`3450`). + ``cftime`` package (:pull:`3450`). By `Spencer Clark `_. New Features ~~~~~~~~~~~~ - :py:meth:`Dataset.transpose` and :py:meth:`DataArray.transpose` now support an ellipsis (`...`) to represent all 'other' dimensions. For example, to move one dimension to the front, - use `.transpose('x', ...)`. (:pull:`3421`) + use ``.transpose('x', ...)``. (:pull:`3421`) By `Maximilian Roos `_ -- Changed `xr.ALL_DIMS` to equal python's `Ellipsis` (`...`), and changed internal usages to use - `...` directly. As before, you can use this to instruct a `groupby` operation - to reduce over all dimensions. While we have no plans to remove `xr.ALL_DIMS`, we suggest +- Changed ``xr.ALL_DIMS`` to equal python's ``Ellipsis`` (`...`), and changed internal usages to use + `...` directly. As before, you can use this to instruct a ``groupby`` operation + to reduce over all dimensions. While we have no plans to remove ``xr.ALL_DIMS``, we suggest using `...`. (:pull:`3418`) By `Maximilian Roos `_ -- :py:func:`~xarray.dot`, and :py:func:`~xarray.DataArray.dot` now support the - `dims=...` option to sum over the union of dimensions of all input arrays +- :py:func:`xarray.dot`, and :py:meth:`DataArray.dot` now support the + ``dims=...`` option to sum over the union of dimensions of all input arrays (:issue:`3423`) by `Mathias Hauser `_. - Added new :py:meth:`Dataset._repr_html_` and :py:meth:`DataArray._repr_html_` to improve representation of objects in jupyter. By default this feature is turned off @@ -59,11 +59,12 @@ New Features `_ for xarray objects. Note that xarray objects with a dask.array backend already used deterministic hashing in previous releases; this change implements it when whole - xarray objects are embedded in a dask graph, e.g. when :meth:`DataArray.map` is + xarray objects are embedded in a dask graph, e.g. when :py:meth:`DataArray.map_blocks` is invoked. (:issue:`3378`, :pull:`3446`) By `Deepak Cherian `_ and `Guido Imperiale `_. -- xarray now respects the ``encoding["coordinates"]`` attribute when writing to disk. (:issue:`3351`, :pull:`3487`) +- xarray now respects the ``DataArray.encoding["coordinates"]`` attribute when writing to disk. + See :ref:`io.coordinates` for more. (:issue:`3351`, :pull:`3487`) By `Deepak Cherian `_. Bug fixes @@ -72,7 +73,7 @@ Bug fixes but cloudpickle isn't (:issue:`3401`) by `Rhys Doyle `_ - Fix grouping over variables with NaNs. (:issue:`2383`, :pull:`3406`). By `Deepak Cherian `_. -- Sync with cftime by removing `dayofwk=-1` for cftime>=1.0.4. +- Sync with cftime by removing ``dayofwk=-1`` for cftime>=1.0.4. By `Anderson Banihirwe `_. - Fix :py:meth:`xarray.core.groupby.DataArrayGroupBy.reduce` and :py:meth:`xarray.core.groupby.DatasetGroupBy.reduce` when reducing over multiple dimensions. From acdfdcd96a10dee17bb0be9b8c7bcfb05f1548db Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 5 Nov 2019 17:02:54 -0700 Subject: [PATCH 06/16] more thorough test. --- xarray/tests/test_backends.py | 22 ++++++++++++++-------- xarray/tests/test_conventions.py | 1 + 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 8b6997c6534..a43b2808b07 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -811,15 +811,21 @@ def equals_latlon(obj): assert "coordinates" not in ds["lon"].attrs original["temp"].encoding["coordinates"] = "lat" - with self.roundtrip(original) as actual: - assert_identical(actual, original) + with pytest.warns(UserWarning, match="'coordinates' attribute"): + with self.roundtrip(original) as actual: + assert_identical(actual, original) + original["precip"].encoding["coordinates"] = "lat" with create_tmp_file() as tmp_file: - original.to_netcdf(tmp_file) - with open_dataset(tmp_file, decode_coords=False) as ds: - assert equals_latlon(ds.precip.attrs["coordinates"]) - assert "lon" not in ds["temp"].attrs["coordinates"] - assert "coordinates" not in ds["lat"].attrs - assert "coordinates" not in ds["lon"].attrs + with pytest.warns(UserWarning, match="'coordinates' attribute"): + original.to_netcdf(tmp_file) + with open_dataset(tmp_file, decode_coords=True) as ds: + # lon was explicitly excluded from coordinates so it should + # be a data variable + assert "lon" not in ds["temp"].encoding["coordinates"] + assert "lon" not in ds["precip"].encoding["coordinates"] + assert "lon" not in ds.coords + assert "coordinates" not in ds["lat"].encoding + assert "coordinates" not in ds["lon"].encoding def test_roundtrip_endian(self): ds = Dataset( diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 56740444123..d8bf3ad8372 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -102,6 +102,7 @@ def test_missing_fillvalue(self): with pytest.warns(Warning, match="floating point data as an integer"): conventions.encode_cf_variable(v) + @pytest.mark.filterwarnings("ignore::UserWarning") def test_multidimensional_coordinates(self): # regression test for GH1763 # Set up test case with coordinates that have overlapping (but not From ab0c67da658c05729cf3a101c711a41c70df1840 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 6 Nov 2019 07:08:31 -0700 Subject: [PATCH 07/16] Emit one "coordinates" warning per dataset, instead of one per variable. Also add stacklevel --- xarray/conventions.py | 17 ++++++++++++----- xarray/tests/test_backends.py | 4 ++-- xarray/tests/test_conventions.py | 2 +- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index bc4079e21db..1092f60f169 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -663,25 +663,32 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): global_coordinates.discard(coord_name) variables = {k: v.copy(deep=False) for k, v in variables.items()} + vars_with_coords_enc = [] for name, var in variables.items(): encoding = var.encoding attrs = var.attrs if "coordinates" in attrs: warnings.warn( - f"'coordinates' found in attrs for variable {name!r}. This will be ignored. Instead please specify the 'coordinates' string in encoding.", + f"'coordinates' found in attrs for variable {name!r}. This will be ignored. " + f"Instead please specify the 'coordinates' string in encoding.", UserWarning, + stacklevel=5, ) if "coordinates" in encoding: - warnings.warn( - f"'coordinates' attribute detected on variable {name!r}. This may prevent faithful roundtripping of xarray Datasets.", - UserWarning, - ) + vars_with_coords_enc.append(name) attrs["coordinates"] = encoding["coordinates"] else: if variable_coordinates[name]: attrs["coordinates"] = " ".join(map(str, variable_coordinates[name])) + if vars_with_coords_enc: + warnings.warn( + f"'coordinates' found in encoding for variables {vars_with_coords_enc!r}. " + f"This may prevent faithful roundtripping of xarray Datasets.", + UserWarning, + stacklevel=5, + ) # These coordinates are not associated with any particular variables, so we # save them under a global 'coordinates' attribute so xarray can roundtrip # the dataset faithfully. Because this serialization goes beyond CF diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index a43b2808b07..a64fafe3164 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -811,12 +811,12 @@ def equals_latlon(obj): assert "coordinates" not in ds["lon"].attrs original["temp"].encoding["coordinates"] = "lat" - with pytest.warns(UserWarning, match="'coordinates' attribute"): + with pytest.warns(UserWarning, match="'coordinates' found"): with self.roundtrip(original) as actual: assert_identical(actual, original) original["precip"].encoding["coordinates"] = "lat" with create_tmp_file() as tmp_file: - with pytest.warns(UserWarning, match="'coordinates' attribute"): + with pytest.warns(UserWarning, match="'coordinates' found"): original.to_netcdf(tmp_file) with open_dataset(tmp_file, decode_coords=True) as ds: # lon was explicitly excluded from coordinates so it should diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index d8bf3ad8372..e42467ddf4b 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -144,7 +144,7 @@ def test_do_not_overwrite_user_coordinates(self): ) orig["a"].encoding["coordinates"] = "y" orig["b"].encoding["coordinates"] = "z" - with pytest.warns(UserWarning, match="'coordinates' attribute"): + with pytest.warns(UserWarning, match="'coordinates' found"): enc, _ = conventions.encode_dataset_coordinates(orig) assert enc["a"].attrs["coordinates"] == "y" assert enc["b"].attrs["coordinates"] == "z" From 46de4b72ffc528d6222a373ddc963af4dd7b6371 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 6 Nov 2019 08:08:24 -0700 Subject: [PATCH 08/16] Preserve attrs["coordinates"] when roundtripping with decode_coords=False --- xarray/conventions.py | 23 +++++++++-------------- xarray/tests/test_backends.py | 8 ++++++++ xarray/tests/test_conventions.py | 2 +- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index 1092f60f169..201b159de72 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -5,7 +5,7 @@ import pandas as pd from .coding import strings, times, variables -from .coding.variables import SerializationWarning +from .coding.variables import SerializationWarning, pop_to from .core import duck_array_ops, indexing from .core.common import contains_cftime_datetimes from .core.pycompat import dask_array_type @@ -667,27 +667,22 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): for name, var in variables.items(): encoding = var.encoding attrs = var.attrs - if "coordinates" in attrs: - warnings.warn( - f"'coordinates' found in attrs for variable {name!r}. This will be ignored. " - f"Instead please specify the 'coordinates' string in encoding.", - UserWarning, - stacklevel=5, + if "coordinates" in attrs and "coordinates" in encoding: + raise ValueError( + f"'coordinates' found in both attrs and encoding for variable {name!r}." ) - - if "coordinates" in encoding: + coords_str = pop_to(encoding, attrs, "coordinates") + if coords_str: vars_with_coords_enc.append(name) - attrs["coordinates"] = encoding["coordinates"] - else: - if variable_coordinates[name]: - attrs["coordinates"] = " ".join(map(str, variable_coordinates[name])) + elif variable_coordinates[name]: + attrs["coordinates"] = " ".join(map(str, variable_coordinates[name])) if vars_with_coords_enc: warnings.warn( f"'coordinates' found in encoding for variables {vars_with_coords_enc!r}. " f"This may prevent faithful roundtripping of xarray Datasets.", UserWarning, - stacklevel=5, + stacklevel=6, ) # These coordinates are not associated with any particular variables, so we # save them under a global 'coordinates' attribute so xarray can roundtrip diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index a64fafe3164..5de465bbf13 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -522,6 +522,14 @@ def test_roundtrip_coordinates(self): with self.roundtrip(original) as actual: assert_identical(original, actual) + original["foo"].encoding["coordinates"] = "y" + with self.roundtrip(original, open_kwargs={"decode_coords": False}) as expected: + # check roundtripping when decode_coords=False + with self.roundtrip( + expected, open_kwargs={"decode_coords": False} + ) as actual: + assert_identical(expected, actual) + def test_roundtrip_global_coordinates(self): original = Dataset({"x": [2, 3], "y": ("a", [42]), "z": ("x", [4, 5])}) with self.roundtrip(original) as actual: diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index e42467ddf4b..5cfdd0bfec5 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -149,7 +149,7 @@ def test_do_not_overwrite_user_coordinates(self): assert enc["a"].attrs["coordinates"] == "y" assert enc["b"].attrs["coordinates"] == "z" orig["a"].attrs["coordinates"] = "foo" - with pytest.warns(UserWarning, match="'coordinates' found in attrs"): + with raises_regex(ValueError, "'coordinates' found in both attrs"): conventions.encode_dataset_coordinates(orig) @requires_dask From 2279c36a39bfe1fe1c169c8d0e0b7986544079de Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 6 Nov 2019 17:29:31 -0700 Subject: [PATCH 09/16] Avoid raising warnings. --- xarray/conventions.py | 18 ++++++------------ xarray/tests/test_backends.py | 11 +++-------- xarray/tests/test_conventions.py | 3 +-- 3 files changed, 10 insertions(+), 22 deletions(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index 201b159de72..6f2a072e18a 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -660,10 +660,9 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): and set(target_dims) <= set(v.dims) ): variable_coordinates[k].add(coord_name) - global_coordinates.discard(coord_name) variables = {k: v.copy(deep=False) for k, v in variables.items()} - vars_with_coords_enc = [] + written_coords = set() for name, var in variables.items(): encoding = var.encoding attrs = var.attrs @@ -672,18 +671,13 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): f"'coordinates' found in both attrs and encoding for variable {name!r}." ) coords_str = pop_to(encoding, attrs, "coordinates") - if coords_str: - vars_with_coords_enc.append(name) - elif variable_coordinates[name]: + if not coords_str and variable_coordinates[name]: attrs["coordinates"] = " ".join(map(str, variable_coordinates[name])) + if "coordinates" in attrs: + written_coords.update(attrs["coordinates"].split()) + + global_coordinates.difference_update(written_coords) - if vars_with_coords_enc: - warnings.warn( - f"'coordinates' found in encoding for variables {vars_with_coords_enc!r}. " - f"This may prevent faithful roundtripping of xarray Datasets.", - UserWarning, - stacklevel=6, - ) # These coordinates are not associated with any particular variables, so we # save them under a global 'coordinates' attribute so xarray can roundtrip # the dataset faithfully. Because this serialization goes beyond CF diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 5de465bbf13..5371ca4a767 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -819,19 +819,14 @@ def equals_latlon(obj): assert "coordinates" not in ds["lon"].attrs original["temp"].encoding["coordinates"] = "lat" - with pytest.warns(UserWarning, match="'coordinates' found"): - with self.roundtrip(original) as actual: - assert_identical(actual, original) + with self.roundtrip(original) as actual: + assert_identical(actual, original) original["precip"].encoding["coordinates"] = "lat" with create_tmp_file() as tmp_file: - with pytest.warns(UserWarning, match="'coordinates' found"): - original.to_netcdf(tmp_file) + original.to_netcdf(tmp_file) with open_dataset(tmp_file, decode_coords=True) as ds: - # lon was explicitly excluded from coordinates so it should - # be a data variable assert "lon" not in ds["temp"].encoding["coordinates"] assert "lon" not in ds["precip"].encoding["coordinates"] - assert "lon" not in ds.coords assert "coordinates" not in ds["lat"].encoding assert "coordinates" not in ds["lon"].encoding diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 5cfdd0bfec5..bec05437ee9 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -144,8 +144,7 @@ def test_do_not_overwrite_user_coordinates(self): ) orig["a"].encoding["coordinates"] = "y" orig["b"].encoding["coordinates"] = "z" - with pytest.warns(UserWarning, match="'coordinates' found"): - enc, _ = conventions.encode_dataset_coordinates(orig) + enc, _ = conventions.encode_dataset_coordinates(orig) assert enc["a"].attrs["coordinates"] == "y" assert enc["b"].attrs["coordinates"] == "z" orig["a"].attrs["coordinates"] = "foo" From 0abe242659b18c2015c6fe0f95d3c252cc1c3ee3 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 12 Nov 2019 09:03:36 -0700 Subject: [PATCH 10/16] fix whats-new --- doc/whats-new.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d2e9297b7f7..c552c362021 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -86,7 +86,6 @@ Bug fixes but cloudpickle isn't (:issue:`3401`) by `Rhys Doyle `_ - Fix grouping over variables with NaNs. (:issue:`2383`, :pull:`3406`). By `Deepak Cherian `_. -- Sync with cftime by removing ``dayofwk=-1`` for cftime>=1.0.4. - Use dask names to compare dask objects prior to comparing values after computation. (:issue:`3068`, :issue:`3311`, :issue:`3454`, :pull:`3453`). By `Deepak Cherian `_. From 1a42c6c5526b9d9a339cb1d999ade968b1f9d3a0 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 14 Nov 2019 07:51:50 -0700 Subject: [PATCH 11/16] [minor] add comments --- doc/io.rst | 6 +++--- xarray/conventions.py | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/doc/io.rst b/doc/io.rst index 377518f9784..986c4374e89 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -437,7 +437,7 @@ like ``'days'`` for ``timedelta64`` data. ``calendar`` should be one of the cale supported by netCDF4-python: 'standard', 'gregorian', 'proleptic_gregorian' 'noleap', '365_day', '360_day', 'julian', 'all_leap', '366_day'. -By default, xarray uses the 'proleptic_gregorian' calendar and units of the smallest time +By default, xarray uses the ``'proleptic_gregorian'`` calendar and units of the smallest time difference between values, with a reference time of the first time value. @@ -448,9 +448,9 @@ Coordinates You can control the ``coordinates`` attribute written to disk by specifying ``DataArray.encoding["coordinates"]``. If not specified, xarray automatically sets ``DataArray.encoding["coordinates"]`` to a space-delimited list -of names of coordinate variables that share dimensions with the variable. +of names of coordinate variables that share dimensions with the ``DataArray`` being written. This allows perfect roundtripping of xarray datasets but may not be desirable. -When an xarray dataset contains non-dimensional coordinates that do not share dimensions with any of +When an xarray ``Dataset`` contains non-dimensional coordinates that do not share dimensions with any of the variables, these coordinate variable names are saved under a "global" ``"coordinates"`` attribute. This is not CF-compliant but again facilitates roundtripping of xarray datasets. diff --git a/xarray/conventions.py b/xarray/conventions.py index 6f2a072e18a..8afb90af686 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -662,6 +662,8 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): variable_coordinates[k].add(coord_name) variables = {k: v.copy(deep=False) for k, v in variables.items()} + + # keep track of variable names written to file under the "coordinates" attributes written_coords = set() for name, var in variables.items(): encoding = var.encoding @@ -676,14 +678,13 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): if "coordinates" in attrs: written_coords.update(attrs["coordinates"].split()) - global_coordinates.difference_update(written_coords) - # These coordinates are not associated with any particular variables, so we # save them under a global 'coordinates' attribute so xarray can roundtrip # the dataset faithfully. Because this serialization goes beyond CF # conventions, only do it if necessary. # Reference discussion: # http://mailman.cgd.ucar.edu/pipermail/cf-metadata/2014/007571.html + global_coordinates.difference_update(written_coords) if global_coordinates: attributes = dict(attributes) if "coordinates" in attributes: From 1c97c1ed8829c706daa6227374357da1a3bb99e6 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 14 Nov 2019 07:56:10 -0700 Subject: [PATCH 12/16] fix whats-new --- doc/whats-new.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a257c146c64..cb0d965e5dc 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -72,8 +72,6 @@ New Features `_ for xarray objects. Note that xarray objects with a dask.array backend already used deterministic hashing in previous releases; this change implements it when whole - xarray objects are embedded in a dask graph, e.g. when :py:meth:`DataArray.map_blocks` is - invoked. (:issue:`3378`, :pull:`3446`) xarray objects are embedded in a dask graph, e.g. when :meth:`DataArray.map` is invoked. (:issue:`3378`, :pull:`3446`, :pull:`3515`) By `Deepak Cherian `_ and From 5773e5eb5a76315ddf354bb7919cada9021aca5f Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 14 Nov 2019 08:24:24 -0700 Subject: [PATCH 13/16] Actually test global "coordinates" handling. --- xarray/conventions.py | 2 +- xarray/tests/test_backends.py | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index 8afb90af686..a73f31e2739 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -692,7 +692,7 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): f"cannot serialize global coordinates {global_coordinates!r} because the global " f"attribute 'coordinates' already exists. This may prevent faithful roundtripping" f"of xarray datasets", - UserWarning, + SerializationWarning, ) else: attributes["coordinates"] = " ".join(map(str, global_coordinates)) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index f3ee58ac445..20ef58c3c0a 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -33,6 +33,7 @@ from xarray.backends.netCDF4_ import _extract_nc4_variable_encoding from xarray.backends.pydap_ import PydapDataStore from xarray.coding.variables import SerializationWarning +from xarray.conventions import encode_dataset_coordinates from xarray.core import indexing from xarray.core.options import set_options from xarray.core.pycompat import dask_array_type @@ -531,14 +532,26 @@ def test_roundtrip_coordinates(self): assert_identical(expected, actual) def test_roundtrip_global_coordinates(self): - original = Dataset({"x": [2, 3], "y": ("a", [42]), "z": ("x", [4, 5])}) + original = Dataset( + {"foo": ("x", [0, 1])}, {"x": [2, 3], "y": ("a", [42]), "z": ("x", [4, 5])} + ) with self.roundtrip(original) as actual: assert_identical(original, actual) + # test that global "coordinates" is as expected + _, attrs = encode_dataset_coordinates(original) + assert attrs["coordinates"] == "y" + + # test warning when global "coordinates" is already set + original.attrs["coordinates"] = "foo" + with pytest.warns(SerializationWarning): + _, attrs = encode_dataset_coordinates(original) + assert attrs["coordinates"] == "foo" + def test_roundtrip_coordinates_with_space(self): original = Dataset(coords={"x": 0, "y z": 1}) expected = Dataset({"y z": 1}, {"x": 0}) - with pytest.warns(xr.SerializationWarning): + with pytest.warns(SerializationWarning): with self.roundtrip(original) as actual: assert_identical(expected, actual) From 670f97f7690ded34ad60c362750ae6e1e81ee1f5 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 14 Nov 2019 08:45:13 -0700 Subject: [PATCH 14/16] filerwarning not necessary. --- xarray/tests/test_conventions.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index bec05437ee9..acb2400ea04 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -102,7 +102,6 @@ def test_missing_fillvalue(self): with pytest.warns(Warning, match="floating point data as an integer"): conventions.encode_cf_variable(v) - @pytest.mark.filterwarnings("ignore::UserWarning") def test_multidimensional_coordinates(self): # regression test for GH1763 # Set up test case with coordinates that have overlapping (but not From 3ca669b341a8af7066ba41f81cab814b8bd8519f Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 4 Dec 2019 17:20:08 -0700 Subject: [PATCH 15/16] Add comment --- xarray/conventions.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/xarray/conventions.py b/xarray/conventions.py index a73f31e2739..a8b9906c153 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -672,6 +672,10 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): raise ValueError( f"'coordinates' found in both attrs and encoding for variable {name!r}." ) + + # this will copy coordinates from encoding to attrs if "coordinates" in attrs + # after the next line, "coordinates" is never in encoding + # we get support for attrs["coordinates"] for free. coords_str = pop_to(encoding, attrs, "coordinates") if not coords_str and variable_coordinates[name]: attrs["coordinates"] = " ".join(map(str, variable_coordinates[name])) From 8c526fdc3585b1f762cc4ecbfbeb42ac3dacbbc1 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 5 Dec 2019 09:38:59 -0700 Subject: [PATCH 16/16] fix whats-new --- doc/whats-new.rst | 6 ------ 1 file changed, 6 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 219b2184b5c..1eb53f156f7 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -81,8 +81,6 @@ Breaking changes - All leftover support for dates from non-standard calendars through ``netcdftime``, the module included in versions of netCDF4 prior to 1.4 that eventually became the - cftime package, has been removed in favor of relying solely on the standalone - ``cftime`` package (:pull:`3450`). `cftime `_ package, has been removed in favor of relying solely on the standalone ``cftime`` package (:pull:`3450`). By `Spencer Clark `_. @@ -117,10 +115,6 @@ New Features to represent all 'other' dimensions. For example, to move one dimension to the front, use ``.transpose('x', ...)``. (:pull:`3421`) By `Maximilian Roos `_ -- Changed ``xr.ALL_DIMS`` to equal python's ``Ellipsis`` (`...`), and changed internal usages to use - `...` directly. As before, you can use this to instruct a ``groupby`` operation - to reduce over all dimensions. While we have no plans to remove ``xr.ALL_DIMS``, we suggest - using `...`. (:pull:`3418`) - Changed ``xr.ALL_DIMS`` to equal python's ``Ellipsis`` (``...``), and changed internal usages to use ``...`` directly. As before, you can use this to instruct a ``groupby`` operation to reduce over all dimensions. While we have no plans to remove ``xr.ALL_DIMS``, we suggest