Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Made equalized attributes in concatenated cubes consistent across runs #1783

Merged
merged 13 commits into from
Feb 9, 2023
Merged
72 changes: 71 additions & 1 deletion esmvalcore/iris_helpers.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
"""Auxiliary functions for :mod:`iris`."""
import warnings
from typing import Any, Dict, List, Sequence

import dask.array as da
import iris
import iris.cube
import iris.util
import numpy as np
from iris import NameConstraint
from iris.cube import Cube
from iris.exceptions import CoordinateMultiDimError

from esmvalcore.exceptions import ESMValCoreDeprecationWarning
Expand Down Expand Up @@ -62,7 +64,7 @@ def add_leading_dim_to_cube(cube, dim_coord):
# Create new cube with shape (w, x, ..., z) where w is length of dim_coord
# and already add ancillary variables and cell measures
new_data = da.broadcast_to(cube.core_data(), new_shape)
new_cube = iris.cube.Cube(
new_cube = Cube(
new_data,
ancillary_variables_and_dims=ancillary_variables,
cell_measures_and_dims=cell_measures,
Expand Down Expand Up @@ -106,6 +108,74 @@ def date2num(date, unit, dtype=np.float64):
return dtype(num)


def merge_cube_attributes(
cubes: Sequence[Cube],
bouweandela marked this conversation as resolved.
Show resolved Hide resolved
delimiter: str = ' ',
) -> None:
"""Merge attributes of all given cubes in-place.

After this operation, the attributes of all given cubes are equal. This is
useful for operations that combine cubes, such as
:meth:`iris.cube.CubeList.merge_cube` or
:meth:`iris.cube.CubeList.concatenate_cube`.

Note
----
This function differs from :func:`iris.util.equalise_attributes` in this
respect that it does not delete attributes that are not identical but
rather concatenates them (sorted) using the given ``delimiter``. E.g., the
attributes ``exp: historical`` and ``exp: ssp585`` end up as ``exp:
historical ssp585`` using the default ``delimiter = ' '``.

Parameters
----------
cubes:
Input cubes whose attributes will be modified in-place.
delimiter:
Delimiter that is used to concatenate non-identical attributes.

"""
if len(cubes) <= 1:
return

# Step 1: collect all attribute values in a list
attributes: Dict[Any, List[Any]] = {}
schlunma marked this conversation as resolved.
Show resolved Hide resolved
for cube in cubes:
for (attr, val) in cube.attributes.items():
attributes.setdefault(attr, [])
attributes[attr].append(val)

# Step 2: if values are not equal, first convert them to strings (so that
# set() can be used); then extract unique elements from this list, sort it,
# and use the delimiter to join all elements to a single string
final_attributes: Dict[Any, Any] = {}
for (attr, vals) in attributes.items():
if _contains_identical_values(vals):
schlunma marked this conversation as resolved.
Show resolved Hide resolved
final_attributes[attr] = vals[0]
else:
vals = sorted(list({str(v) for v in vals}))
schlunma marked this conversation as resolved.
Show resolved Hide resolved
final_attributes[attr] = delimiter.join(vals)

# Step 3: modify the cubes in-place
for cube in cubes:
cube.attributes = final_attributes


def _contains_identical_values(sequence: Sequence) -> bool:
"""Check if a sequence contains identical values.

Note
----
We use :func:`np.array_equal` here since it is very versatile and works
with all kinds of input types.

"""
for (idx, val) in enumerate(sequence[:-1]):
if not np.array_equal(val, sequence[idx + 1]):
return False
return True


def var_name_constraint(var_name):
""":class:`iris.Constraint` using ``var_name``.

Expand Down
20 changes: 3 additions & 17 deletions esmvalcore/preprocessor/_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@
import iris
import iris.aux_factory
import iris.exceptions
import numpy as np
import yaml
from cf_units import suppress_errors

from esmvalcore.iris_helpers import merge_cube_attributes

from .._task import write_ncl_settings
from ._time import extract_time

Expand Down Expand Up @@ -173,21 +174,6 @@ def load(file, callback=None, ignore_warnings=None):
return raw_cubes


def _fix_cube_attributes(cubes):
"""Unify attributes of different cubes to allow concatenation."""
attributes = {}
for cube in cubes:
for (attr, val) in cube.attributes.items():
if attr not in attributes:
attributes[attr] = val
else:
if not np.array_equal(val, attributes[attr]):
attributes[attr] = '{};{}'.format(str(attributes[attr]),
str(val))
for cube in cubes:
cube.attributes = attributes


def _by_two_concatenation(cubes):
"""Perform a by-2 concatenation to avoid gaps."""
concatenated = iris.cube.CubeList(cubes).concatenate()
Expand Down Expand Up @@ -225,7 +211,7 @@ def concatenate(cubes):
if len(cubes) == 1:
return cubes[0]

_fix_cube_attributes(cubes)
merge_cube_attributes(cubes)

if len(cubes) > 1:
# order cubes by first time point
Expand Down
81 changes: 16 additions & 65 deletions tests/integration/preprocessor/_io/test_concatenate.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,22 @@ def test_concatenate_with_order(self):
concatenated.coord('time').points, np.array([1., 2., 5., 7.,
100.]))

def test_concatenate_differing_attributes(self):
"""Test concatenation of cubes with different attributes."""
cubes = CubeList(self.raw_cubes)
for (idx, cube) in enumerate(cubes):
cube.attributes = {
'equal_attr': 1,
'different_attr': 3 - idx,
}
concatenated = _io.concatenate(cubes)
np.testing.assert_array_equal(
concatenated.coord('time').points, np.array([1, 2, 3, 4, 5, 6]))
self.assertEqual(
concatenated.attributes,
{'equal_attr': 1, 'different_attr': '1 2 3'},
)

def test_fail_on_calendar_concatenate_with_overlap(self):
"""Test fail of concatenation with overlap."""
time_coord = DimCoord([3., 7000.],
Expand Down Expand Up @@ -479,68 +495,3 @@ def test_fail_metadata_differs(self):
self.raw_cubes[1].units = 'K'
with self.assertRaises(ValueError):
_io.concatenate(self.raw_cubes)

def test_fix_attributes(self):
"""Test fixing attributes for concatenation."""
identical_attrs = {
'int': 42,
'float': 3.1415,
'bool': True,
'str': 'Hello, world',
'list': [1, 1, 2, 3, 5, 8, 13],
'tuple': (1, 2, 3, 4, 5),
'dict': {
1: 'one',
2: 'two',
3: 'three'
},
'nparray': np.arange(42),
}
differing_attrs = [
{
'new_int': 0,
'new_str': 'hello',
'new_nparray': np.arange(3),
'mix': np.arange(2),
},
{
'new_int': 1,
'new_str': 'world',
'new_list': [1, 1, 2],
'new_tuple': (0, 1),
'new_dict': {
0: 'zero',
},
'mix': {
1: 'one',
},
},
{
'new_str': '!',
'new_list': [1, 1, 2, 3],
'new_tuple': (1, 2, 3),
'new_dict': {
0: 'zeroo',
1: 'one',
},
'new_nparray': np.arange(2),
'mix': False,
},
]
resulting_attrs = {
'new_int': '0;1',
'new_str': 'hello;world;!',
'new_nparray': '[0 1 2];[0 1]',
'new_list': '[1, 1, 2];[1, 1, 2, 3]',
'new_tuple': '(0, 1);(1, 2, 3)',
'new_dict': "{0: 'zero'};{0: 'zeroo', 1: 'one'}",
'mix': "[0 1];{1: 'one'};False",
}
resulting_attrs.update(identical_attrs)

for idx in range(3):
self.raw_cubes[idx].attributes = identical_attrs
self.raw_cubes[idx].attributes.update(differing_attrs[idx])
_io._fix_cube_attributes(self.raw_cubes) # noqa
for cube in self.raw_cubes:
self.assertEqual(cube.attributes, resulting_attrs)
103 changes: 103 additions & 0 deletions tests/unit/test_iris_helpers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Tests for :mod:`esmvalcore.iris_helpers`."""
import datetime
from copy import deepcopy
from itertools import permutations
from unittest import mock

import numpy as np
Expand All @@ -19,6 +21,7 @@
from esmvalcore.iris_helpers import (
add_leading_dim_to_cube,
date2num,
merge_cube_attributes,
var_name_constraint,
)

Expand Down Expand Up @@ -135,6 +138,106 @@ def test_date2num_scalar(date, dtype, expected, units):
assert num.dtype == dtype


def assert_attribues_equal(attrs_1: dict, attrs_2: dict) -> None:
"""Check attributes using :func:`numpy.testing.assert_array_equal`."""
assert len(attrs_1) == len(attrs_2)
for (attr, val) in attrs_1.items():
assert attr in attrs_2
np.testing.assert_array_equal(attrs_2[attr], val)


def make_cube_with_attrs(index):
"""Make cube that contains different types of attributes."""
attributes = {
# Identical attribute values across cubes
'int': 42,
'float': 3.1415,
'bool': True,
'str': 'Hello, world',
'list': [1, 1, 2, 3, 5, 8, 13],
'tuple': (1, 2, 3, 4, 5),
'dict': {
1: 'one',
2: 'two',
3: 'three',
},
'nparray': np.arange(42),

# Differing attribute values across cubes
'diff_int': index,
'diff_str': 'abc'[index],
'diff_nparray': np.arange(index),
'mix': np.arange(3) if index == 0 else index,
'diff_list': [index, index],
'diff_tuple': (index, index),
'diff_dict': {
0: index,
},

# Differing attribute keys across cubes
str(index + 1000): index,
str(index % 2 + 100): index,
str(index % 2): index % 2,
}
return Cube(0.0, attributes=attributes)


CUBES = [make_cube_with_attrs(i) for i in range(3)]


# Test all permutations of CUBES to test that results do not depend on order
@pytest.mark.parametrize("cubes", list(permutations(CUBES)))
def test_merge_cube_attributes(cubes):
"""Test `merge_cube_attributes`."""
expected_attributes = {
'int': 42,
'float': 3.1415,
'bool': True,
'str': 'Hello, world',
'list': [1, 1, 2, 3, 5, 8, 13],
'tuple': (1, 2, 3, 4, 5),
'dict': {
1: 'one',
2: 'two',
3: 'three',
},
'nparray': np.arange(42),
'diff_int': '0 1 2',
'diff_str': 'a b c',
'diff_nparray': '[0 1] [0] []',
'mix': '1 2 [0 1 2]',
'diff_list': '[0, 0] [1, 1] [2, 2]',
'diff_tuple': '(0, 0) (1, 1) (2, 2)',
'diff_dict': '{0: 0} {0: 1} {0: 2}',
'1000': 0,
'1001': 1,
'1002': 2,
'100': '0 2',
'101': 1,
'0': 0,
'1': 1,
}
cubes = deepcopy(cubes)
merge_cube_attributes(cubes)
assert len(cubes) == 3
for cube in cubes:
assert_attribues_equal(cube.attributes, expected_attributes)


def test_merge_cube_attributes_0_cubes():
"""Test `merge_cube_attributes` with 0 cubes."""
merge_cube_attributes([])


def test_merge_cube_attributes_1_cube():
"""Test `merge_cube_attributes` with 1 cube."""
cubes = CubeList([deepcopy(CUBES[0])])
expected_attributes = deepcopy(cubes[0].attributes)
merge_cube_attributes(cubes)
assert len(cubes) == 1
assert_attribues_equal(cubes[0].attributes, expected_attributes)


def test_var_name_constraint(cubes):
"""Test :func:`esmvalcore.iris_helpers.var_name_constraint`."""
with pytest.warns(ESMValCoreDeprecationWarning):
Expand Down