From 7fd30969817e47a515b9d6317310ccb4208926f6 Mon Sep 17 00:00:00 2001
From: Stef Smeets <s.smeets@esciencecenter.nl>
Date: Thu, 25 Feb 2021 16:48:50 +0100
Subject: [PATCH 1/5] Update multimodel preprocessor tests

---
 .../_multimodel/test_multimodel.py            | 552 ++++++++++--------
 1 file changed, 295 insertions(+), 257 deletions(-)

diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py
index 47cf186c2a..fe8f30ee77 100644
--- a/tests/unit/preprocessor/_multimodel/test_multimodel.py
+++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py
@@ -1,267 +1,305 @@
-"""Unit test for :func:`esmvalcore.preprocessor._multimodel`."""
+"""Unit test for :func:`esmvalcore.preprocessor._multimodel`"""
 
-import unittest
+from datetime import datetime
 
 import iris
 import numpy as np
+import pytest
 from cf_units import Unit
+from iris.cube import Cube
 
-import tests
+import esmvalcore.preprocessor._multimodel as mm
 from esmvalcore.preprocessor import multi_model_statistics
-from esmvalcore.preprocessor._multimodel import (
-    _assemble_data,
-    _compute_statistic,
-    _get_time_slice,
-    _multiproduct_statistics,
-    _plev_fix,
-    _put_in_cube,
-    _unify_time_coordinates,
+
+SPAN_OPTIONS = ('overlap', 'full')
+
+FREQUENCY_OPTIONS = ('daily', 'monthly', 'yearly')  # hourly
+
+CALENDAR_OPTIONS = ('360_day', '365_day', 'gregorian', 'proleptic_gregorian',
+                    'julian')
+
+
+def assert_array_almost_equal(this, other):
+    """Assert that array `this` almost equals array `other`."""
+    if np.ma.isMaskedArray(this) or np.ma.isMaskedArray(other):
+        np.testing.assert_array_equal(this.mask, other.mask)
+
+    np.testing.assert_allclose(this, other)
+
+
+def timecoord(frequency,
+              calendar='gregorian',
+              offset='days since 1850-01-01',
+              num=3):
+    """Return a time coordinate with the given time points and calendar."""
+
+    time_points = range(1, num + 1)
+
+    if frequency == 'hourly':
+        dates = [datetime(1850, 1, 1, i, 0, 0) for i in time_points]
+    if frequency == 'daily':
+        dates = [datetime(1850, 1, i, 0, 0, 0) for i in time_points]
+    elif frequency == 'monthly':
+        dates = [datetime(1850, i, 15, 0, 0, 0) for i in time_points]
+    elif frequency == 'yearly':
+        dates = [datetime(1850, 7, i, 0, 0, 0) for i in time_points]
+
+    unit = Unit(offset, calendar=calendar)
+    points = unit.date2num(dates)
+    return iris.coords.DimCoord(points, standard_name='time', units=unit)
+
+
+def generate_cube_from_dates(
+    dates,
+    calendar='gregorian',
+    offset='days since 1850-01-01',
+    fill_val=1,
+    len_data=3,
+    var_name=None,
+):
+    """Generate test cube from list of dates / frequency specification.
+
+    Parameters
+    ----------
+    calendar : str or list
+        Date frequency: 'hourly' / 'daily' / 'monthly' / 'yearly' or
+        list of datetimes.
+    offset : str
+        Offset to use
+    fill_val : int
+        Value to fill the data with
+    len_data : int
+        Number of data / time points
+    var_name : str
+        Name of the data variable
+
+    Returns
+    -------
+    iris.cube.Cube
+    """
+    if isinstance(dates, str):
+        time = timecoord(dates, calendar, offset=offset, num=len_data)
+    else:
+        unit = Unit(offset, calendar=calendar)
+        time = iris.coords.DimCoord(unit.date2num(dates),
+                                    standard_name='time',
+                                    units=unit)
+
+    return Cube((fill_val, ) * len_data,
+                dim_coords_and_dims=[(time, 0)],
+                var_name=var_name)
+
+
+def get_cubes_for_validation_test(frequency):
+    """Set up cubes used for testing multimodel statistics."""
+
+    # Simple 1d cube with standard time cord
+    cube1 = generate_cube_from_dates(frequency)
+
+    # Cube with masked data
+    cube2 = cube1.copy()
+    cube2.data = np.ma.array([5, 5, 5], mask=[True, False, False])
+
+    # Cube with deviating time coord
+    cube3 = generate_cube_from_dates(frequency,
+                                     calendar='360_day',
+                                     offset='days since 1950-01-01',
+                                     len_data=2,
+                                     fill_val=9)
+
+    return [cube1, cube2, cube3]
+
+
+VALIDATION_DATA_SUCCESS = (
+    ('full', 'mean', (5, 5, 3)),
+    ('full', 'std', (5.656854249492381, 4, 2.8284271247461903)),
+    # ('full', 'std_dev', (5.656854249492381, 4, 2.8284271247461903)),
+    ('full', 'min', (1, 1, 1)),
+    ('full', 'max', (9, 9, 5)),
+    ('full', 'median', (5, 5, 3)),
+    ('full', 'p50', (5, 5, 3)),
+    ('full', 'p99.5', (8.96, 8.96, 4.98)),
+    ('overlap', 'mean', (5, 5)),
+    ('overlap', 'std', (5.656854249492381, 4)),
+    # ('overlap', 'std_dev', (5.656854249492381, 4)),
+    ('overlap', 'min', (1, 1)),
+    ('overlap', 'max', (9, 9)),
+    ('overlap', 'median', (5, 5)),
+    ('overlap', 'p50', (5, 5)),
+    ('overlap', 'p99.5', (8.96, 8.96)),
+    # test multiple statistics
+    ('overlap', ('min', 'max'), ((1, 1), (9, 9))),
+    ('full', ('min', 'max'), ((1, 1, 1), (9, 9, 5))),
 )
 
 
-class Test(tests.Test):
-    """Test class for preprocessor/_multimodel.py."""
-    def setUp(self):
-        """Prepare tests."""
-        # Make various time arrays
-        time_args = {
-            'standard_name': 'time',
-            'units': Unit('days since 1850-01-01', calendar='gregorian')
-        }
-        monthly1 = iris.coords.DimCoord([14, 45], **time_args)
-        monthly2 = iris.coords.DimCoord([45, 73, 104, 134], **time_args)
-        monthly3 = iris.coords.DimCoord([104, 134], **time_args)
-        yearly1 = iris.coords.DimCoord([14., 410.], **time_args)
-        yearly2 = iris.coords.DimCoord([1., 367., 733., 1099.], **time_args)
-        daily1 = iris.coords.DimCoord([1., 2.], **time_args)
-        for time in [monthly1, monthly2, monthly3, yearly1, yearly2, daily1]:
-            time.guess_bounds()
-
-        # Other dimensions are fixed
-        zcoord = iris.coords.DimCoord([0.5, 5., 50.],
-                                      standard_name='air_pressure',
-                                      long_name='air_pressure',
-                                      bounds=[[0., 2.5], [2.5, 25.],
-                                              [25., 250.]],
-                                      units='m',
-                                      attributes={'positive': 'down'})
-        coord_sys = iris.coord_systems.GeogCS(iris.fileformats.pp.EARTH_RADIUS)
-        lons = iris.coords.DimCoord([1.5, 2.5],
-                                    standard_name='longitude',
-                                    long_name='longitude',
-                                    bounds=[[1., 2.], [2., 3.]],
-                                    units='degrees_east',
-                                    coord_system=coord_sys)
-        lats = iris.coords.DimCoord([1.5, 2.5],
-                                    standard_name='latitude',
-                                    long_name='latitude',
-                                    bounds=[[1., 2.], [2., 3.]],
-                                    units='degrees_north',
-                                    coord_system=coord_sys)
-
-        data1 = np.ma.ones((2, 3, 2, 2))
-        data2 = np.ma.ones((4, 3, 2, 2))
-        mask2 = np.full((4, 3, 2, 2), False)
-        mask2[0, 0, 0, 0] = True
-        data2 = np.ma.array(data2, mask=mask2)
-
-        coords_spec1 = [(monthly1, 0), (zcoord, 1), (lats, 2), (lons, 3)]
-        self.cube1 = iris.cube.Cube(data1, dim_coords_and_dims=coords_spec1)
-
-        coords_spec2 = [(monthly2, 0), (zcoord, 1), (lats, 2), (lons, 3)]
-        self.cube2 = iris.cube.Cube(data2, dim_coords_and_dims=coords_spec2)
-
-        coords_spec3 = [(monthly3, 0), (zcoord, 1), (lats, 2), (lons, 3)]
-        self.cube3 = iris.cube.Cube(data1, dim_coords_and_dims=coords_spec3)
-
-        coords_spec4 = [(yearly1, 0), (zcoord, 1), (lats, 2), (lons, 3)]
-        self.cube4 = iris.cube.Cube(data1, dim_coords_and_dims=coords_spec4)
-
-        coords_spec5 = [(yearly2, 0), (zcoord, 1), (lats, 2), (lons, 3)]
-        self.cube5 = iris.cube.Cube(data2, dim_coords_and_dims=coords_spec5)
-
-        coords_spec6 = [(daily1, 0), (zcoord, 1), (lats, 2), (lons, 3)]
-        self.cube6 = iris.cube.Cube(data1, dim_coords_and_dims=coords_spec6)
-
-    def test_compute_statistic(self):
-        """Test statistic."""
-        data = [self.cube1.data[0], self.cube2.data[0]]
-        stat_mean = _compute_statistic(data, "mean")
-        stat_median = _compute_statistic(data, "median")
-        expected_mean = np.ma.ones((3, 2, 2))
-        expected_median = np.ma.ones((3, 2, 2))
-        self.assert_array_equal(stat_mean, expected_mean)
-        self.assert_array_equal(stat_median, expected_median)
-
-    def test_compute_full_statistic_mon_cube(self):
-        data = [self.cube1, self.cube2]
-        stats = multi_model_statistics(products=data,
-                                       statistics=['mean'],
-                                       span='full')
-        expected_full_mean = np.ma.ones((5, 3, 2, 2))
-        expected_full_mean.mask = np.ones((5, 3, 2, 2))
-        expected_full_mean.mask[1] = False
-        self.assert_array_equal(stats['mean'].data, expected_full_mean)
-
-    def test_compute_full_statistic_yr_cube(self):
-        data = [self.cube4, self.cube5]
-        stats = multi_model_statistics(products=data,
-                                       statistics=['mean'],
-                                       span='full')
-        expected_full_mean = np.ma.ones((4, 3, 2, 2))
-        expected_full_mean.mask = np.zeros((4, 3, 2, 2))
-        expected_full_mean.mask[2:4] = True
-        self.assert_array_equal(stats['mean'].data, expected_full_mean)
-
-    def test_compute_overlap_statistic_mon_cube(self):
-        data = [self.cube1, self.cube1]
-        stats = multi_model_statistics(products=data,
-                                       statistics=['mean'],
-                                       span='overlap')
-        expected_ovlap_mean = np.ma.ones((2, 3, 2, 2))
-        self.assert_array_equal(stats['mean'].data, expected_ovlap_mean)
-
-    def test_compute_overlap_statistic_yr_cube(self):
-        data = [self.cube4, self.cube4]
-        stats = multi_model_statistics(products=data,
-                                       statistics=['mean'],
-                                       span='overlap')
-        expected_ovlap_mean = np.ma.ones((2, 3, 2, 2))
-        self.assert_array_equal(stats['mean'].data, expected_ovlap_mean)
-
-    def test_compute_std(self):
-        """Test statistic."""
-        data = [self.cube1.data[0], self.cube2.data[0] * 2]
-        stat = _compute_statistic(data, "std")
-        expected = np.ma.ones((3, 2, 2)) * 0.5
-        expected[0, 0, 0] = 0
-        self.assert_array_equal(stat, expected)
-
-    def test_compute_max(self):
-        """Test statistic."""
-        data = [self.cube1.data[0] * 0.5, self.cube2.data[0] * 2]
-        stat = _compute_statistic(data, "max")
-        expected = np.ma.ones((3, 2, 2)) * 2
-        expected[0, 0, 0] = 0.5
-        self.assert_array_equal(stat, expected)
-
-    def test_compute_min(self):
-        """Test statistic."""
-        data = [self.cube1.data[0] * 0.5, self.cube2.data[0] * 2]
-        stat = _compute_statistic(data, "min")
-        expected = np.ma.ones((3, 2, 2)) * 0.5
-        self.assert_array_equal(stat, expected)
-
-    def test_compute_percentile(self):
-        """Test statistic."""
-        data = [self.cube1.data[0] * 0.5, self.cube2.data[0] * 2]
-        stat = _compute_statistic(data, "p75")
-        expected = np.ma.ones((3, 2, 2)) * 1.625
-        expected[0, 0, 0] = 0.5
-        self.assert_array_equal(stat, expected)
-
-    def test_put_in_cube(self):
-        """Test put in cube."""
-        cube_data = np.ma.ones((2, 3, 2, 2))
-        stat_cube = _put_in_cube(self.cube1, cube_data, "mean", t_axis=[1, 2])
-        self.assert_array_equal(stat_cube.data, self.cube1.data)
-
-    def test_assemble_overlap_data(self):
-        """Test overlap data."""
-        comp_ovlap_mean = _assemble_data([self.cube1, self.cube1],
-                                         "mean",
-                                         span='overlap')
-        expected_ovlap_mean = np.ma.ones((2, 3, 2, 2))
-        self.assert_array_equal(comp_ovlap_mean.data, expected_ovlap_mean)
-
-    def test_assemble_full_data(self):
-        """Test full data."""
-        comp_full_mean = _assemble_data([self.cube1, self.cube2],
-                                        "mean",
-                                        span='full')
-        expected_full_mean = np.ma.ones((5, 3, 2, 2))
-        expected_full_mean.mask = np.ones((5, 3, 2, 2))
-        expected_full_mean.mask[1] = False
-        self.assert_array_equal(comp_full_mean.data, expected_full_mean)
-
-    def test_plev_fix(self):
-        """Test plev fix."""
-        fixed_data = _plev_fix(self.cube2.data, 1)
-        expected_data = np.ma.ones((3, 2, 2))
-        self.assert_array_equal(expected_data, fixed_data)
-
-    def test_unify_time_coordinates(self):
-        """Test set common calenar."""
-        cube1 = self.cube1
-        time1 = cube1.coord('time')
-        t_unit1 = time1.units
-        dates = t_unit1.num2date(time1.points)
-
-        t_unit2 = Unit('days since 1850-01-01', calendar='gregorian')
-        time2 = t_unit2.date2num(dates)
-        cube2 = self.cube1.copy()
-        cube2.coord('time').points = time2
-        cube2.coord('time').units = t_unit2
-        _unify_time_coordinates([cube1, cube2])
-        self.assertEqual(cube1.coord('time'), cube2.coord('time'))
-
-    def test_get_time_slice_all(self):
-        """Test get time slice if all cubes have data."""
-        cubes = [self.cube1, self.cube2]
-        result = _get_time_slice(cubes, time=45)
-        expected = [self.cube1[1].data, self.cube2[0].data]
-        self.assert_array_equal(expected, result)
-
-    def test_get_time_slice_part(self):
-        """Test get time slice if all cubes have data."""
-        cubes = [self.cube1, self.cube2]
-        result = _get_time_slice(cubes, time=14)
-        masked = np.ma.empty(list(cubes[0].shape[1:]))
-        masked.mask = True
-        expected = [self.cube1[0].data, masked]
-        self.assert_array_equal(expected, result)
-
-    def test_return_products(self):
-        """Check that the right product set is returned."""
-        input1 = PreprocessorFile(self.cube1)
-        input2 = PreprocessorFile(self.cube2)
-        output = PreprocessorFile()
-        products = set([input1, input2])
-        output_products = {'mean': output}
-        result1 = _multiproduct_statistics(products,
-                                           statistics=['mean'],
-                                           output_products=output_products,
-                                           span='full',
-                                           keep_input_datasets=True)
-        result2 = _multiproduct_statistics(products,
-                                           statistics=['mean'],
-                                           output_products=output_products,
-                                           span='full',
-                                           keep_input_datasets=False)
-        result3 = multi_model_statistics(products,
-                                         statistics=['mean'],
-                                         output_products=output_products,
-                                         span='full')
-        result4 = multi_model_statistics(products,
-                                         statistics=['mean'],
-                                         output_products=output_products,
-                                         span='full',
-                                         keep_input_datasets=False)
-        assert result1 == set([input1, input2, output])
-        assert result2 == set([output])
-        assert result3 == result1
-        assert result4 == result2
-
-
-class PreprocessorFile:
-    """Mockup to test output of multimodel."""
-    def __init__(self, cube=None):
-        if cube:
-            self.cubes = [cube]
-
-    def wasderivedfrom(self, product):
-        pass
-
-
-if __name__ == '__main__':
-    unittest.main()
+@pytest.mark.parametrize('frequency', FREQUENCY_OPTIONS)
+@pytest.mark.parametrize('span, statistics, expected', VALIDATION_DATA_SUCCESS)
+def test_multimodel_statistics(frequency, span, statistics, expected):
+    """High level test for multicube statistics function.
+
+    - Should work for multiple data frequencies
+    - Should be able to deal with multiple statistics
+    - Should work for both span arguments
+    - Should deal correctly with different mask options
+    - Return type should be a dict with all requested statistics as keys
+    """
+    cubes = get_cubes_for_validation_test(frequency)
+
+    if isinstance(statistics, str):
+        statistics = (statistics, )
+        expected = (expected, )
+
+    result = multi_model_statistics(cubes, span, statistics)
+
+    assert isinstance(result, dict)
+    assert set(result.keys()) == set(statistics)
+
+    for i, statistic in enumerate(statistics):
+        result_cube = result[statistic]
+        expected_data = np.ma.array(expected[i], mask=False)
+        assert_array_almost_equal(result_cube.data, expected_data)
+
+
+@pytest.mark.parametrize('calendar1, calendar2, expected', (
+    ('360_day', '360_day', '360_day'),
+    ('365_day', '365_day', '365_day'),
+    ('365_day', '360_day', 'gregorian'),
+    ('360_day', '365_day', 'gregorian'),
+    ('gregorian', '365_day', 'gregorian'),
+    ('proleptic_gregorian', 'julian', 'gregorian'),
+    ('julian', '365_day', 'gregorian'),
+))
+def test_get_consistent_time_unit(calendar1, calendar2, expected):
+    """Test same calendar returned or default if calendars differ.
+
+    Expected behaviour: If the calendars are the same, return that one.
+    If the calendars are not the same, return 'gregorian'.
+    """
+    cubes = (
+        generate_cube_from_dates('monthly', calendar=calendar1),
+        generate_cube_from_dates('monthly', calendar=calendar2),
+    )
+
+    result = mm._get_consistent_time_unit(cubes)
+    assert result.calendar == expected
+
+
+@pytest.mark.parametrize('span', SPAN_OPTIONS)
+def test_edge_case_different_time_offsets(span):
+    cubes = (
+        generate_cube_from_dates('monthly',
+                                 '360_day',
+                                 offset='days since 1888-01-01'),
+        generate_cube_from_dates('monthly',
+                                 '360_day',
+                                 offset='days since 1899-01-01'),
+    )
+
+    statistic = 'min'
+    statistics = (statistic, )
+
+    result = multi_model_statistics(cubes, span, statistics)
+
+    result_cube = result[statistic]
+
+    time_coord = result_cube.coord('time')
+
+    assert time_coord.units.calendar == 'gregorian'
+    assert time_coord.units.origin == 'days since 1850-01-01'
+
+    desired = np.array((14., 45., 73.))
+    np.testing.assert_array_equal(time_coord.points, desired)
+
+    # input cubes are updated in-place
+    for cube in cubes:
+        np.testing.assert_array_equal(cube.coord('time').points, desired)
+
+
+def generate_cubes_with_non_overlapping_timecoords():
+    """Generate sample data where time coords do not overlap."""
+    time_points = range(1, 4)
+    dates1 = [datetime(1850, i, 15, 0, 0, 0) for i in time_points]
+    dates2 = [datetime(1950, i, 15, 0, 0, 0) for i in time_points]
+
+    return (
+        generate_cube_from_dates(dates1),
+        generate_cube_from_dates(dates2),
+    )
+
+
+def test_edge_case_time_no_overlap_fail():
+    """Test case when time coords do not overlap using span='overlap'.
+
+    Expected behaviour: `multi_model_statistics` should fail if time
+    points are not overlapping.
+    """
+    cubes = generate_cubes_with_non_overlapping_timecoords()
+
+    statistic = 'min'
+    statistics = (statistic, )
+
+    with pytest.raises(ValueError):
+        _ = multi_model_statistics(cubes, 'overlap', statistics)
+
+
+def test_edge_case_time_no_overlap_success():
+    """Test case when time coords do not overlap using span='full'.
+
+    Expected behaviour: `multi_model_statistics` should use all
+    available time points.
+    """
+    cubes = generate_cubes_with_non_overlapping_timecoords()
+
+    statistic = 'min'
+    statistics = (statistic, )
+
+    result = multi_model_statistics(cubes, 'full', statistics)
+    result_cube = result[statistic]
+
+    assert result_cube.coord('time').shape == (6, )
+
+
+@pytest.mark.parametrize('span', SPAN_OPTIONS)
+def test_edge_case_time_not_in_middle_of_months(span):
+    """Test case when time coords are not on 15th for monthly data.
+
+    Expected behaviour: `multi_model_statistics` will set all dates to
+    the 15th.
+    """
+    time_points = range(1, 4)
+    dates1 = [datetime(1850, i, 12, 0, 0, 0) for i in time_points]
+    dates2 = [datetime(1850, i, 25, 0, 0, 0) for i in time_points]
+
+    cubes = (
+        generate_cube_from_dates(dates1),
+        generate_cube_from_dates(dates2),
+    )
+
+    statistic = 'min'
+    statistics = (statistic, )
+
+    result = multi_model_statistics(cubes, span, statistics)
+    result_cube = result[statistic]
+
+    time_coord = result_cube.coord('time')
+
+    desired = np.array((14., 45., 73.))
+    np.testing.assert_array_equal(time_coord.points, desired)
+
+    # input cubes are updated in-place
+    for cube in cubes:
+        np.testing.assert_array_equal(cube.coord('time').points, desired)
+
+
+@pytest.mark.parametrize('span', SPAN_OPTIONS)
+def test_edge_case_sub_daily_data_fail(span):
+    """Test case when cubes with sub-daily time coords are passed."""
+    cube = generate_cube_from_dates('hourly')
+    cubes = (cube, cube)
+
+    statistic = 'min'
+    statistics = (statistic, )
+
+    with pytest.raises(ValueError):
+        _ = multi_model_statistics(cubes, span, statistics)

From 682cdee8358c01af0f2337904761c20be7e84f36 Mon Sep 17 00:00:00 2001
From: Stef Smeets <s.smeets@esciencecenter.nl>
Date: Thu, 25 Feb 2021 16:59:19 +0100
Subject: [PATCH 2/5] Mark known failures with xfail

---
 .../preprocessor/_multimodel/test_multimodel.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py
index fe8f30ee77..7aa885b6c0 100644
--- a/tests/unit/preprocessor/_multimodel/test_multimodel.py
+++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py
@@ -112,16 +112,24 @@ def get_cubes_for_validation_test(frequency):
 
 VALIDATION_DATA_SUCCESS = (
     ('full', 'mean', (5, 5, 3)),
-    ('full', 'std', (5.656854249492381, 4, 2.8284271247461903)),
-    # ('full', 'std_dev', (5.656854249492381, 4, 2.8284271247461903)),
+    pytest.param(
+        'full',
+        'std', (5.656854249492381, 4, 2.8284271247461903),
+        marks=pytest.mark.xfail(
+            raises=AssertionError,
+            reason='Iris 3.0.1 uses `ddof=1` for calculation of std. dev.')),
     ('full', 'min', (1, 1, 1)),
     ('full', 'max', (9, 9, 5)),
     ('full', 'median', (5, 5, 3)),
     ('full', 'p50', (5, 5, 3)),
     ('full', 'p99.5', (8.96, 8.96, 4.98)),
     ('overlap', 'mean', (5, 5)),
-    ('overlap', 'std', (5.656854249492381, 4)),
-    # ('overlap', 'std_dev', (5.656854249492381, 4)),
+    pytest.param(
+        'full',
+        'std', (5.656854249492381, 4),
+        marks=pytest.mark.xfail(
+            raises=AssertionError,
+            reason='Iris 3.0.1 uses `ddof=1` for calculation of std. dev.')),
     ('overlap', 'min', (1, 1)),
     ('overlap', 'max', (9, 9)),
     ('overlap', 'median', (5, 5)),
@@ -228,6 +236,7 @@ def generate_cubes_with_non_overlapping_timecoords():
     )
 
 
+@pytest.mark.xfail(reason='Multimodel statistics returns the original cubes.')
 def test_edge_case_time_no_overlap_fail():
     """Test case when time coords do not overlap using span='overlap'.
 

From 56c6ebfc508eaa0752d59df16d32930265e41a19 Mon Sep 17 00:00:00 2001
From: Stef Smeets <s.smeets@esciencecenter.nl>
Date: Fri, 26 Feb 2021 10:41:12 +0100
Subject: [PATCH 3/5] Update reason with github issue

---
 tests/unit/preprocessor/_multimodel/test_multimodel.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py
index 7aa885b6c0..50c44a7c6a 100644
--- a/tests/unit/preprocessor/_multimodel/test_multimodel.py
+++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py
@@ -117,7 +117,7 @@ def get_cubes_for_validation_test(frequency):
         'std', (5.656854249492381, 4, 2.8284271247461903),
         marks=pytest.mark.xfail(
             raises=AssertionError,
-            reason='Iris 3.0.1 uses `ddof=1` for calculation of std. dev.')),
+            reason='https://github.com/ESMValGroup/ESMValCore/issues/1024')),
     ('full', 'min', (1, 1, 1)),
     ('full', 'max', (9, 9, 5)),
     ('full', 'median', (5, 5, 3)),
@@ -129,7 +129,7 @@ def get_cubes_for_validation_test(frequency):
         'std', (5.656854249492381, 4),
         marks=pytest.mark.xfail(
             raises=AssertionError,
-            reason='Iris 3.0.1 uses `ddof=1` for calculation of std. dev.')),
+            reason='https://github.com/ESMValGroup/ESMValCore/issues/1024')),
     ('overlap', 'min', (1, 1)),
     ('overlap', 'max', (9, 9)),
     ('overlap', 'median', (5, 5)),

From ad3e3c0324110f35e03faccfed3e8af96abaa21f Mon Sep 17 00:00:00 2001
From: Stef Smeets <s.smeets@esciencecenter.nl>
Date: Fri, 26 Feb 2021 12:12:55 +0100
Subject: [PATCH 4/5] Address review comments

---
 .../_multimodel/test_multimodel.py            | 62 +++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py
index 50c44a7c6a..9d2ad366a9 100644
--- a/tests/unit/preprocessor/_multimodel/test_multimodel.py
+++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py
@@ -312,3 +312,65 @@ def test_edge_case_sub_daily_data_fail(span):
 
     with pytest.raises(ValueError):
         _ = multi_model_statistics(cubes, span, statistics)
+
+
+def test_unify_time_coordinates():
+    """Test set common calendar."""
+    cube1 = generate_cube_from_dates('monthly',
+                                     calendar='360_day',
+                                     offset='days since 1850-01-01')
+    cube2 = generate_cube_from_dates('monthly',
+                                     calendar='gregorian',
+                                     offset='days since 1943-05-16')
+
+    mm._unify_time_coordinates([cube1, cube2])
+
+    assert cube1.coord('time') == cube2.coord('time')
+
+
+class PreprocessorFile:
+    """Mockup to test output of multimodel."""
+    def __init__(self, cube=None):
+        if cube:
+            self.cubes = [cube]
+
+    def wasderivedfrom(self, product):
+        pass
+
+
+def test_return_products():
+    """Check that the right product set is returned."""
+    cube1 = generate_cube_from_dates('monthly', fill_val=1)
+    cube2 = generate_cube_from_dates('monthly', fill_val=9)
+
+    input1 = PreprocessorFile(cube1)
+    input2 = PreprocessorFile(cube2)
+
+    products = set([input1, input2])
+
+    output = PreprocessorFile()
+    output_products = {'mean': output}
+
+    kwargs = {
+        'statistics': ['mean'],
+        'span': 'full',
+        'output_products': output_products
+    }
+
+    result1 = mm._multiproduct_statistics(products,
+                                          keep_input_datasets=True,
+                                          **kwargs)
+    result2 = mm._multiproduct_statistics(products,
+                                          keep_input_datasets=False,
+                                          **kwargs)
+
+    assert result1 == set([input1, input2, output])
+    assert result2 == set([output])
+
+    result3 = mm.multi_model_statistics(products, **kwargs)
+    result4 = mm.multi_model_statistics(products,
+                                        keep_input_datasets=False,
+                                        **kwargs)
+
+    assert result3 == result1
+    assert result4 == result2

From 3a3efb284d24b8abb0d887412ea73cb69ce1dcfd Mon Sep 17 00:00:00 2001
From: Stef Smeets <s.smeets@esciencecenter.nl>
Date: Mon, 1 Mar 2021 13:41:40 +0100
Subject: [PATCH 5/5] Rename array assert function

---
 tests/unit/preprocessor/_multimodel/test_multimodel.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py
index 9d2ad366a9..c63d3bb39d 100644
--- a/tests/unit/preprocessor/_multimodel/test_multimodel.py
+++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py
@@ -19,8 +19,8 @@
                     'julian')
 
 
-def assert_array_almost_equal(this, other):
-    """Assert that array `this` almost equals array `other`."""
+def assert_array_allclose(this, other):
+    """Assert that array `this` is close to array `other`."""
     if np.ma.isMaskedArray(this) or np.ma.isMaskedArray(other):
         np.testing.assert_array_equal(this.mask, other.mask)
 
@@ -166,7 +166,7 @@ def test_multimodel_statistics(frequency, span, statistics, expected):
     for i, statistic in enumerate(statistics):
         result_cube = result[statistic]
         expected_data = np.ma.array(expected[i], mask=False)
-        assert_array_almost_equal(result_cube.data, expected_data)
+        assert_array_allclose(result_cube.data, expected_data)
 
 
 @pytest.mark.parametrize('calendar1, calendar2, expected', (