pydata · dcherian · Jun 25, 2019 · May 15, 2019 · May 15, 2019 · May 15, 2019
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -61,6 +61,10 @@ Enhancements
 Bug fixes
 ~~~~~~~~~
 
+- Don't set encoding attributes on bounds variables when writing to netCDF.
+  :py:meth:`xr.open_mfdataset` sets variable encodings to that of variables
+  in first file.(:issue:`2436`, :issue:`2921`)
+  By `Deepak Cherian <https://github.com/dcherian>`_.
 - NetCDF4 output: variables with unlimited dimensions must be chunked (not
   contiguous) on output. (:issue:`1849`)
   By `James McCreight <https://github.com/jmccreight>`_.

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -777,6 +777,9 @@ def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT,
 
     combined._file_obj = _MultiFileCloser(file_objs)
     combined.attrs = datasets[0].attrs
+    for v in combined.variables:
+        if v in datasets[0]:
+            combined[v].encoding = datasets[0][v].encoding
     return combined
 
 

diff --git a/xarray/conventions.py b/xarray/conventions.py
@@ -7,6 +7,7 @@
 from .coding import strings, times, variables
 from .coding.variables import SerializationWarning
 from .core import duck_array_ops, indexing
+from .core.common import contains_cftime_datetimes
 from .core.pycompat import dask_array_type
 from .core.variable import IndexVariable, Variable, as_variable
 
@@ -354,6 +355,51 @@ def _update_bounds_attributes(variables):
                     bounds_attrs.setdefault('calendar', attrs['calendar'])
 
 
+def _update_bounds_encoding(variables):
+    """Adds time encoding to time bounds variables.
+
+    Variables handling time bounds ("Cell boundaries" in the CF
+    conventions) do not necessarily carry the necessary attributes to be
+    decoded. This copies the encoding from the time variable to the
+    associated bounds variable so that we write CF-compliant files.
+
+    See Also:
+
+    http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/
+         cf-conventions.html#cell-boundaries
+
+    https://github.com/pydata/xarray/issues/2565
+    """
+
+    # For all time variables with bounds
+    for v in variables.values():
+        attrs = v.attrs
+        encoding = v.encoding
+        has_date_units = 'units' in encoding and 'since' in encoding['units']
+        is_datetime_type = (np.issubdtype(v.dtype, np.datetime64) or
+                            contains_cftime_datetimes(v))
+
+        if (is_datetime_type and not has_date_units and
+           'bounds' in attrs and attrs['bounds'] in variables):
+            warnings.warn("Variable '{0}' has datetime type and a "
+                          "bounds variable but {0}.encoding does not have "
+                          "units specified. The units encodings for '{0}' "
+                          "and '{1}' will be determined independently "
+                          "and may not be equal, counter to CF-conventions. "
+                          "If this is a concern, specify a units encoding for "
+                          "'{0}' before writing to a file."
+                          .format(v.name, attrs['bounds']),
+                          UserWarning)
+
+        if has_date_units and 'bounds' in attrs:
+            if attrs['bounds'] in variables:
+                bounds_encoding = variables[attrs['bounds']].encoding
+                bounds_encoding.setdefault('units', encoding['units'])
+                if 'calendar' in encoding:
+                    bounds_encoding.setdefault('calendar',
+                                               encoding['calendar'])
+
+
 def decode_cf_variables(variables, attributes, concat_characters=True,
                         mask_and_scale=True, decode_times=True,
                         decode_coords=True, drop_variables=None,
@@ -491,8 +537,6 @@ def cf_decoder(variables, attributes,
     """
     Decode a set of CF encoded variables and attributes.
 
-    See Also, decode_cf_variable
-
     Parameters
     ----------
     variables : dict
@@ -514,6 +558,10 @@ def cf_decoder(variables, attributes,
         A dictionary mapping from variable name to xarray.Variable objects.
     decoded_attributes : dict
         A dictionary mapping from attribute name to values.
+
+    See also
+    --------
+    decode_cf_variable
     """
     variables, attributes, _ = decode_cf_variables(
         variables, attributes, concat_characters, mask_and_scale, decode_times)
@@ -594,14 +642,12 @@ def encode_dataset_coordinates(dataset):
 
 def cf_encoder(variables, attributes):
     """
-    A function which takes a dicts of variables and attributes
-    and encodes them to conform to CF conventions as much
-    as possible.  This includes masking, scaling, character
-    array handling, and CF-time encoding.
-
-    Decode a set of CF encoded variables and attributes.
+    Encode a set of CF encoded variables and attributes.
+    Takes a dicts of variables and attributes and encodes them
+    to conform to CF conventions as much as possible.
+    This includes masking, scaling, character array handling,
+    and CF-time encoding.
 
-    See Also, decode_cf_variable
 
     Parameters
     ----------
@@ -617,8 +663,27 @@ def cf_encoder(variables, attributes):
     encoded_attributes : dict
         A dictionary mapping from attribute name to value
 
-    See also: encode_cf_variable
+    See also
+    --------
+    decode_cf_variable, encode_cf_variable
     """
+
+    # add encoding for time bounds variables if present.
+    _update_bounds_encoding(variables)
+
     new_vars = OrderedDict((k, encode_cf_variable(v, name=k))
                            for k, v in variables.items())
+
+    # Remove attrs from bounds variables (issue #2921)
+    for var in new_vars.values():
+        bounds = var.attrs['bounds'] if 'bounds' in var.attrs else None
+        if bounds and bounds in new_vars:
+            # see http://cfconventions.org/cf-conventions/cf-conventions.html#cell-boundaries # noqa
+            for attr in ['units', 'standard_name', 'axis', 'positive',
+                         'calendar', 'long_name', 'leap_month', 'leap_year',
+                         'month_lengths']:
+                if attr in new_vars[bounds].attrs and attr in var.attrs:
+                    if new_vars[bounds].attrs[attr] == var.attrs[attr]:
+                        new_vars[bounds].attrs.pop(attr)
+
     return new_vars, attributes
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
@@ -2486,6 +2486,26 @@ def test_attrs_mfdataset(self):
                                       'no attribute'):
                         actual.test2
 
+    def test_encoding_mfdataset(self):
+        original = Dataset({'foo': ('t', np.random.randn(10)),
+                            't': ('t', pd.date_range(start='2010-01-01',
+                                                     periods=10,
+                                                     freq='1D'))})
+        original.t.encoding['units'] = 'days since 2010-01-01'
+
+        with create_tmp_file() as tmp1:
+            with create_tmp_file() as tmp2:
+                ds1 = original.isel(t=slice(5))
+                ds2 = original.isel(t=slice(5, 10))
+                ds1.t.encoding['units'] = 'days since 2010-01-01'
+                ds2.t.encoding['units'] = 'days since 2000-01-01'
+                ds1.to_netcdf(tmp1)
+                ds2.to_netcdf(tmp2)
+                with open_mfdataset([tmp1, tmp2]) as actual:
+                    assert actual.t.encoding['units'] == original.t.encoding['units']  # noqa
+                    assert actual.t.encoding['units'] == ds1.t.encoding['units']  # noqa
+                    assert actual.t.encoding['units'] != ds2.t.encoding['units']  # noqa
+
     def test_preprocess_mfdataset(self):
         original = Dataset({'foo': ('x', np.random.randn(10))})
         with create_tmp_file() as tmp:

diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
@@ -5,11 +5,11 @@
 import pandas as pd
 import pytest
 
-from xarray import DataArray, Variable, coding, decode_cf
+from xarray import DataArray, Dataset, Variable, coding, decode_cf
 from xarray.coding.times import (
     _import_cftime, cftime_to_nptime, decode_cf_datetime, encode_cf_datetime)
 from xarray.coding.variables import SerializationWarning
-from xarray.conventions import _update_bounds_attributes
+from xarray.conventions import _update_bounds_attributes, cf_encoder
 from xarray.core.common import contains_cftime_datetimes
 from xarray.testing import assert_equal
 
@@ -671,6 +671,51 @@ def test_decode_cf_time_bounds():
     _update_bounds_attributes(ds.variables)
 
 
+@requires_cftime_or_netCDF4
+def test_encode_time_bounds():
+
+    time = pd.date_range('2000-01-16', periods=1)
+    time_bounds = pd.date_range('2000-01-01', periods=2, freq='MS')
+    ds = Dataset(dict(time=time, time_bounds=time_bounds))
+    ds.time.attrs = {'bounds': 'time_bounds'}
+    ds.time.encoding = {'calendar': 'noleap',
+                        'units': 'days since 2000-01-01'}
+
+    expected = dict()
+    # expected['time'] = Variable(data=np.array([15]), dims=['time'])
+    expected['time_bounds'] = Variable(data=np.array([0, 31]),
+                                       dims=['time_bounds'])
+
+    encoded, _ = cf_encoder(ds.variables, ds.attrs)
+    assert_equal(encoded['time_bounds'], expected['time_bounds'])
+    assert 'calendar' not in encoded['time_bounds'].attrs
+    assert 'units' not in encoded['time_bounds'].attrs
+
+    # if time_bounds attrs are same as time attrs, it doesn't matter
+    ds.time_bounds.encoding = {'calendar': 'noleap',
+                               'units': 'days since 2000-01-01'}
+    encoded, _ = cf_encoder({k: ds[k] for k in ds.variables},
+                            ds.attrs)
+    assert_equal(encoded['time_bounds'], expected['time_bounds'])
+    assert 'calendar' not in encoded['time_bounds'].attrs
+    assert 'units' not in encoded['time_bounds'].attrs
+
+    # for CF-noncompliant case of time_bounds attrs being different from
+    # time attrs; preserve them for faithful roundtrip
+    ds.time_bounds.encoding = {'calendar': 'noleap',
+                               'units': 'days since 1849-01-01'}
+    encoded, _ = cf_encoder({k: ds[k] for k in ds.variables},
+                            ds.attrs)
+    with pytest.raises(AssertionError):
+        assert_equal(encoded['time_bounds'], expected['time_bounds'])
+    assert 'calendar' not in encoded['time_bounds'].attrs
+    assert encoded['time_bounds'].attrs['units'] == ds.time_bounds.encoding['units']  # noqa
+
+    ds.time.encoding = {}
+    with pytest.warns(UserWarning):
+        cf_encoder(ds.variables, ds.attrs)
+
+
 @pytest.fixture(params=_ALL_CALENDARS)
 def calendar(request):
     return request.param