diff --git a/esmvalcore/iris_helpers.py b/esmvalcore/iris_helpers.py index e6f78baa9a..e5bc3dbeea 100644 --- a/esmvalcore/iris_helpers.py +++ b/esmvalcore/iris_helpers.py @@ -1,12 +1,16 @@ """Auxiliary functions for :mod:`iris`.""" +from typing import Dict, List, Sequence import dask.array as da import iris import iris.cube import iris.util import numpy as np +from iris.cube import Cube from iris.exceptions import CoordinateMultiDimError +from esmvalcore.typing import NetCDFAttr + def add_leading_dim_to_cube(cube, dim_coord): """Add new leading dimension to cube. @@ -58,7 +62,7 @@ def add_leading_dim_to_cube(cube, dim_coord): # Create new cube with shape (w, x, ..., z) where w is length of dim_coord # and already add ancillary variables and cell measures new_data = da.broadcast_to(cube.core_data(), new_shape) - new_cube = iris.cube.Cube( + new_cube = Cube( new_data, ancillary_variables_and_dims=ancillary_variables, cell_measures_and_dims=cell_measures, @@ -100,3 +104,56 @@ def date2num(date, unit, dtype=np.float64): return num.astype(dtype) except AttributeError: return dtype(num) + + +def merge_cube_attributes( + cubes: Sequence[Cube], + delimiter: str = ' ', +) -> None: + """Merge attributes of all given cubes in-place. + + After this operation, the attributes of all given cubes are equal. This is + useful for operations that combine cubes, such as + :meth:`iris.cube.CubeList.merge_cube` or + :meth:`iris.cube.CubeList.concatenate_cube`. + + Note + ---- + This function differs from :func:`iris.util.equalise_attributes` in this + respect that it does not delete attributes that are not identical but + rather concatenates them (sorted) using the given ``delimiter``. E.g., the + attributes ``exp: historical`` and ``exp: ssp585`` end up as ``exp: + historical ssp585`` using the default ``delimiter = ' '``. + + Parameters + ---------- + cubes: + Input cubes whose attributes will be modified in-place. + delimiter: + Delimiter that is used to concatenate non-identical attributes. + + """ + if len(cubes) <= 1: + return + + # Step 1: collect all attribute values in a list + attributes: Dict[str, List[NetCDFAttr]] = {} + for cube in cubes: + for (attr, val) in cube.attributes.items(): + attributes.setdefault(attr, []) + attributes[attr].append(val) + + # Step 2: if values are not equal, first convert them to strings (so that + # set() can be used); then extract unique elements from this list, sort it, + # and use the delimiter to join all elements to a single string + final_attributes: Dict[str, NetCDFAttr] = {} + for (attr, vals) in attributes.items(): + set_of_str = sorted({str(v) for v in vals}) + if len(set_of_str) == 1: + final_attributes[attr] = vals[0] + else: + final_attributes[attr] = delimiter.join(set_of_str) + + # Step 3: modify the cubes in-place + for cube in cubes: + cube.attributes = final_attributes diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index 3690863c48..467e75e78d 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -9,10 +9,11 @@ import iris import iris.aux_factory import iris.exceptions -import numpy as np import yaml from cf_units import suppress_errors +from esmvalcore.iris_helpers import merge_cube_attributes + from .._task import write_ncl_settings from ._time import extract_time @@ -174,21 +175,6 @@ def load(file, callback=None, ignore_warnings=None): return raw_cubes -def _fix_cube_attributes(cubes): - """Unify attributes of different cubes to allow concatenation.""" - attributes = {} - for cube in cubes: - for (attr, val) in cube.attributes.items(): - if attr not in attributes: - attributes[attr] = val - else: - if not np.array_equal(val, attributes[attr]): - attributes[attr] = '{};{}'.format(str(attributes[attr]), - str(val)) - for cube in cubes: - cube.attributes = attributes - - def _by_two_concatenation(cubes): """Perform a by-2 concatenation to avoid gaps.""" concatenated = iris.cube.CubeList(cubes).concatenate() @@ -226,7 +212,7 @@ def concatenate(cubes): if len(cubes) == 1: return cubes[0] - _fix_cube_attributes(cubes) + merge_cube_attributes(cubes) if len(cubes) > 1: # order cubes by first time point diff --git a/esmvalcore/typing.py b/esmvalcore/typing.py index a217bf46ee..5549e91dc2 100644 --- a/esmvalcore/typing.py +++ b/esmvalcore/typing.py @@ -2,10 +2,18 @@ from __future__ import annotations from numbers import Number -from typing import Dict, Sequence, Union +from typing import Dict, Iterable, Sequence, Union FacetValue = Union[str, Sequence[str], Number] """Type describing a single facet.""" Facets = Dict[str, FacetValue] """Type describing a collection of facets.""" + +NetCDFAttr = Union[str, Number, Iterable] +"""Type describing netCDF attributes. + +`NetCDF attributes +`_ can +be strings, numbers or sequences. +""" diff --git a/tests/integration/preprocessor/_io/test_concatenate.py b/tests/integration/preprocessor/_io/test_concatenate.py index 8fca36fe90..a5fef83380 100644 --- a/tests/integration/preprocessor/_io/test_concatenate.py +++ b/tests/integration/preprocessor/_io/test_concatenate.py @@ -426,6 +426,22 @@ def test_concatenate_with_order(self): concatenated.coord('time').points, np.array([1., 2., 5., 7., 100.])) + def test_concatenate_differing_attributes(self): + """Test concatenation of cubes with different attributes.""" + cubes = CubeList(self.raw_cubes) + for (idx, cube) in enumerate(cubes): + cube.attributes = { + 'equal_attr': 1, + 'different_attr': 3 - idx, + } + concatenated = _io.concatenate(cubes) + np.testing.assert_array_equal( + concatenated.coord('time').points, np.array([1, 2, 3, 4, 5, 6])) + self.assertEqual( + concatenated.attributes, + {'equal_attr': 1, 'different_attr': '1 2 3'}, + ) + def test_fail_on_calendar_concatenate_with_overlap(self): """Test fail of concatenation with overlap.""" time_coord = DimCoord([3., 7000.], @@ -479,68 +495,3 @@ def test_fail_metadata_differs(self): self.raw_cubes[1].units = 'K' with self.assertRaises(ValueError): _io.concatenate(self.raw_cubes) - - def test_fix_attributes(self): - """Test fixing attributes for concatenation.""" - identical_attrs = { - 'int': 42, - 'float': 3.1415, - 'bool': True, - 'str': 'Hello, world', - 'list': [1, 1, 2, 3, 5, 8, 13], - 'tuple': (1, 2, 3, 4, 5), - 'dict': { - 1: 'one', - 2: 'two', - 3: 'three' - }, - 'nparray': np.arange(42), - } - differing_attrs = [ - { - 'new_int': 0, - 'new_str': 'hello', - 'new_nparray': np.arange(3), - 'mix': np.arange(2), - }, - { - 'new_int': 1, - 'new_str': 'world', - 'new_list': [1, 1, 2], - 'new_tuple': (0, 1), - 'new_dict': { - 0: 'zero', - }, - 'mix': { - 1: 'one', - }, - }, - { - 'new_str': '!', - 'new_list': [1, 1, 2, 3], - 'new_tuple': (1, 2, 3), - 'new_dict': { - 0: 'zeroo', - 1: 'one', - }, - 'new_nparray': np.arange(2), - 'mix': False, - }, - ] - resulting_attrs = { - 'new_int': '0;1', - 'new_str': 'hello;world;!', - 'new_nparray': '[0 1 2];[0 1]', - 'new_list': '[1, 1, 2];[1, 1, 2, 3]', - 'new_tuple': '(0, 1);(1, 2, 3)', - 'new_dict': "{0: 'zero'};{0: 'zeroo', 1: 'one'}", - 'mix': "[0 1];{1: 'one'};False", - } - resulting_attrs.update(identical_attrs) - - for idx in range(3): - self.raw_cubes[idx].attributes = identical_attrs - self.raw_cubes[idx].attributes.update(differing_attrs[idx]) - _io._fix_cube_attributes(self.raw_cubes) # noqa - for cube in self.raw_cubes: - self.assertEqual(cube.attributes, resulting_attrs) diff --git a/tests/unit/test_iris_helpers.py b/tests/unit/test_iris_helpers.py index e0dc581af6..07e81608e8 100644 --- a/tests/unit/test_iris_helpers.py +++ b/tests/unit/test_iris_helpers.py @@ -1,5 +1,7 @@ """Tests for :mod:`esmvalcore.iris_helpers`.""" import datetime +from copy import deepcopy +from itertools import permutations from unittest import mock import numpy as np @@ -15,7 +17,11 @@ from iris.cube import Cube, CubeList from iris.exceptions import CoordinateMultiDimError -from esmvalcore.iris_helpers import add_leading_dim_to_cube, date2num +from esmvalcore.iris_helpers import ( + add_leading_dim_to_cube, + date2num, + merge_cube_attributes, +) @pytest.fixture @@ -128,3 +134,89 @@ def test_date2num_scalar(date, dtype, expected, units): num = date2num(date, units, dtype=dtype) assert num == expected assert num.dtype == dtype + + +def assert_attribues_equal(attrs_1: dict, attrs_2: dict) -> None: + """Check attributes using :func:`numpy.testing.assert_array_equal`.""" + assert len(attrs_1) == len(attrs_2) + for (attr, val) in attrs_1.items(): + assert attr in attrs_2 + np.testing.assert_array_equal(attrs_2[attr], val) + + +def make_cube_with_attrs(index): + """Make cube that contains different types of attributes.""" + attributes = { + # Identical attribute values across cubes + 'int': 42, + 'float': 3.1415, + 'bool': True, + 'str': 'Hello, world', + 'list': [1, 1, 2, 3, 5, 8, 13], + 'tuple': (1, 2, 3, 4, 5), + 'nparray': np.arange(42), + + # Differing attribute values across cubes + 'diff_int': index, + 'diff_str': 'abc'[index], + 'diff_nparray': np.arange(index), + 'mix': np.arange(3) if index == 0 else index, + 'diff_list': [index, index], + 'diff_tuple': (index, index), + + # Differing attribute keys across cubes + str(index + 1000): index, + str(index % 2 + 100): index, + str(index % 2): index % 2, + } + return Cube(0.0, attributes=attributes) + + +CUBES = [make_cube_with_attrs(i) for i in range(3)] + + +# Test all permutations of CUBES to test that results do not depend on order +@pytest.mark.parametrize("cubes", list(permutations(CUBES))) +def test_merge_cube_attributes(cubes): + """Test `merge_cube_attributes`.""" + expected_attributes = { + 'int': 42, + 'float': 3.1415, + 'bool': True, + 'str': 'Hello, world', + 'list': [1, 1, 2, 3, 5, 8, 13], + 'tuple': (1, 2, 3, 4, 5), + 'nparray': np.arange(42), + 'diff_int': '0 1 2', + 'diff_str': 'a b c', + 'diff_nparray': '[0 1] [0] []', + 'mix': '1 2 [0 1 2]', + 'diff_list': '[0, 0] [1, 1] [2, 2]', + 'diff_tuple': '(0, 0) (1, 1) (2, 2)', + '1000': 0, + '1001': 1, + '1002': 2, + '100': '0 2', + '101': 1, + '0': 0, + '1': 1, + } + cubes = deepcopy(cubes) + merge_cube_attributes(cubes) + assert len(cubes) == 3 + for cube in cubes: + assert_attribues_equal(cube.attributes, expected_attributes) + + +def test_merge_cube_attributes_0_cubes(): + """Test `merge_cube_attributes` with 0 cubes.""" + merge_cube_attributes([]) + + +def test_merge_cube_attributes_1_cube(): + """Test `merge_cube_attributes` with 1 cube.""" + cubes = CubeList([deepcopy(CUBES[0])]) + expected_attributes = deepcopy(cubes[0].attributes) + merge_cube_attributes(cubes) + assert len(cubes) == 1 + assert_attribues_equal(cubes[0].attributes, expected_attributes)