ESMValGroup · bouweandela · Dec 7, 2020 · Nov 4, 2020 · Nov 4, 2020 · Nov 4, 2020
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -26,10 +26,17 @@ jobs:
       - image: esmvalgroup/esmvalcore:development
     steps:
       - checkout
+      - restore_cache:
+          key: test-{{ .Branch }}
       - run:
           command: |
             pip install .[test]
             pytest -n 2 -m "not installation"
+      - save_cache:
+          key: test-{{ .Branch }}
+          paths:
+            - ".eggs"
+            - ".pytest_cache"
       - store_test_results:
           path: test-reports/
       - store_artifacts:
@@ -44,7 +51,7 @@ jobs:
       - checkout
       - check_changes
       - restore_cache:
-          key: python3-install-{{ .Branch }}
+          key: install-{{ .Branch }}
       - run:
           command: |
             . /opt/conda/etc/profile.d/conda.sh
@@ -64,10 +71,11 @@ jobs:
             pytest -n 2
             esmvaltool version
       - save_cache:
-          key: python3-install-{{ .Branch }}
+          key: install-{{ .Branch }}
           paths:
             - "/opt/conda/pkgs"
             - ".eggs"
+            - ".pytest_cache"
       - store_artifacts:
           path: /logs
       - store_artifacts:

diff --git a/doc/contributing.rst b/doc/contributing.rst
@@ -33,6 +33,17 @@ adding ``-m 'not installation'`` to the previous command.
 Tests will also be run automatically by
 `CircleCI <https://circleci.com/gh/ESMValGroup/ESMValCore>`__.
 
+Sample data
+-----------
+
+If you need sample data to work with, `this repository <https://github.com/ESMValGroup/ESMValTool_sample_data>`__ contains samples of real data for use with ESMValTool development, demonstration purposes and automated testing. The goal is to keep the repository size small (~ 100 MB), so it can be easily downloaded and distributed.
+
+The data are installed as part of the developer dependencies, and used by some larger tests (i.e. in the `multimodel tests` <https://github.com/ESMValGroup/ESMValCore/tree/master/tests/sample_data>`__)
+
+The loading and preprocessing of the data can be somewhat time-consuming (~30 secs) and are cached by ``pytest`` to make the tests more performant.
+Clear the cache by using running pytest with the ``--cache-clear`` flag. To avoid running these tests using sample data, use `pytest -m "not use_sample_data"`.
+If you are adding new tests using sample data, please use the decorator ``@pytest.mark.use_sample_data``.
+
 Code style
 ----------
 

diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py
@@ -106,7 +106,7 @@ def _compute_statistic(data, statistic_name):
         quantile = float(statistic_name[1:]) / 100
         statistic_function = partial(_quantile, quantile=quantile)
     else:
-        raise NotImplementedError
+        raise ValueError(f'No such statistic: `{statistic_name}`')
 
     # no plevs
     if len(data[0].shape) < 3:
@@ -150,7 +150,8 @@ def _put_in_cube(template_cube, cube_data, statistic, t_axis):
         tunits = cf_units.Unit(unit_name, calendar="standard")
         times = iris.coords.DimCoord(t_axis,
                                      standard_name='time',
-                                     units=tunits)
+                                     units=tunits,
+                                     var_name='time')
 
     coord_names = [c.long_name for c in template_cube.coords()]
     coord_names.extend([c.standard_name for c in template_cube.coords()])
@@ -357,8 +358,7 @@ def _assemble_full_data(cubes, statistic):
 
 
 def multi_model_statistics(products, span, statistics, output_products=None):
-    """
-    Compute multi-model statistics.
+    """Compute multi-model statistics.
 
     Multimodel statistics computed along the time axis. Can be
     computed across a common overlap in time (set span: overlap)
@@ -383,22 +383,25 @@ def multi_model_statistics(products, span, statistics, output_products=None):
         span; if full, statistics are computed on full time spans, ignoring
         missing data.
     output_products: dict
-        dictionary of output products.
-    statistics: str
-        statistical measure to be computed. Available options: mean, median,
-        max, min, std, or pXX.YY (for percentile XX.YY; decimal part optional).
+        dictionary of output products. MUST be specified if products are NOT
+        cubes
+    statistics: list of str
+        list of statistical measure(s) to be computed. Available options:
+        mean, median, max, min, std, or pXX.YY (for percentile XX.YY; decimal
+        part optional).
 
     Returns
     -------
-    list
-        list of data products or cubes containing the multimodel stats
-        computed.
+    set or dict or list
+        `set` of data products if `output_products` is given
+        `dict` of cubes if `output_products` is not given
+        `list` of input cubes if there is no overlap between cubes when
+        using `span='overlap'`
 
     Raises
     ------
     ValueError
         If span is neither overlap nor full.
-
     """
     logger.debug('Multimodel statistics: computing: %s', statistics)
     if len(products) < 2:

diff --git a/setup.cfg b/setup.cfg
@@ -21,6 +21,7 @@ flake8-ignore =
 log_level = WARNING
 markers =
     installation: test requires installation of dependencies
+    functional: Run functional tests using real data
 
 [coverage:run]
 parallel = true

diff --git a/setup.py b/setup.py
@@ -42,6 +42,7 @@
         'pyyaml',
         'requests',
         'scitools-iris>=2.2',
+        'requests',
         'shapely[vectorized]',
         'stratify',
         'yamale==2.*',
@@ -57,6 +58,8 @@
         'pytest-metadata>=1.5.1',
         'pytest-mock',
         'pytest-xdist',
+        ('ESMValTool_sample_data @ '
+         'git+https://github.com/ESMValGroup/ESMValTool_sample_data@master'),
     ],
     # Development dependencies
     # Use pip install -e .[develop] to install in development mode

diff --git a/tests/sample_data/test_multimodel.py b/tests/sample_data/test_multimodel.py
@@ -0,0 +1,236 @@
+"""Test using sample data for :func:`esmvalcore.preprocessor._multimodel`."""
+
+import pickle
+from itertools import groupby
+from pathlib import Path
+
+import iris
+import numpy as np
+import pytest
+
+from esmvalcore.preprocessor import extract_time, multi_model_statistics
+
+esmvaltool_sample_data = pytest.importorskip("esmvaltool_sample_data")
+
+CALENDAR_PARAMS = (
+    pytest.param(
+        '360_day',
+        marks=pytest.mark.skip(
+            reason='Cannot calculate statistics with single cube in list')),
+    '365_day',
+    'gregorian',
+    'proleptic_gregorian',
+    pytest.param(
+        'julian',
+        marks=pytest.mark.skip(
+            reason='Cannot calculate statistics with single cube in list')),
+)
+
+SPAN_PARAMS = ('overlap', 'full')
+
+
+def assert_array_almost_equal(this, other):
+    """Assert that array `this` almost equals array `other`."""
+    if np.ma.isMaskedArray(this) or np.ma.isMaskedArray(other):
+        np.testing.assert_array_equal(this.mask, other.mask)
+
+    np.testing.assert_array_almost_equal(this, other)
+
+
+def preprocess_data(cubes, time_slice: dict = None):
+    """Regrid the data to the first cube and optional time-slicing."""
+    if time_slice:
+        cubes = [extract_time(cube, **time_slice) for cube in cubes]
+
+    first_cube = cubes[0]
+
+    # regrid to first cube
+    regrid_kwargs = {
+        'grid': first_cube,
+        'scheme': iris.analysis.Linear(),
+    }
+
+    cubes = [cube.regrid(**regrid_kwargs) for cube in cubes]
+
+    return cubes
+
+
+@pytest.fixture(scope="module")
+def timeseries_cubes_month(request):
+    """Load representative timeseries data."""
+    # cache the cubes to save about 30-60 seconds on repeat use
+    data = request.config.cache.get("sample_data/monthly", None)
+
+    if data:
+        cubes = pickle.loads(data.encode('latin1'))
+    else:
+        time_slice = {
+            'start_year': 1985,
+            'end_year': 1987,
+            'start_month': 12,
+            'end_month': 2,
+            'start_day': 1,
+            'end_day': 1,
+        }
+        cubes = esmvaltool_sample_data.load_timeseries_cubes(mip_table='Amon')
+        cubes = preprocess_data(cubes, time_slice=time_slice)
+
+        # cubes are not serializable via json, so we must go via pickle
+        request.config.cache.set("sample_data/monthly",
+                                 pickle.dumps(cubes).decode('latin1'))
+
+    return cubes
+
+
+@pytest.fixture(scope="module")
+def timeseries_cubes_day(request):
+    """Load representative timeseries data grouped by calendar."""
+    # cache the cubes to save about 30-60 seconds on repeat use
+    data = request.config.cache.get("sample_data/daily", None)
+
+    if data:
+        cubes = pickle.loads(data.encode('latin1'))
+
+    else:
+        time_slice = {
+            'start_year': 2001,
+            'end_year': 2002,
+            'start_month': 12,
+            'end_month': 2,
+            'start_day': 1,
+            'end_day': 1,
+        }
+        cubes = esmvaltool_sample_data.load_timeseries_cubes(mip_table='day')
+        cubes = preprocess_data(cubes, time_slice=time_slice)
+
+        # cubes are not serializable via json, so we must go via pickle
+        request.config.cache.set("sample_data/daily",
+                                 pickle.dumps(cubes).decode('latin1'))
+
+    def calendar(cube):
+        return cube.coord('time').units.calendar
+
+    # groupby requires sorted list
+    grouped = groupby(sorted(cubes, key=calendar), key=calendar)
+
+    cube_dict = {key: list(group) for key, group in grouped}
+
+    return cube_dict
+
+
+def multimodel_test(cubes, span, statistic):
+    """Run multimodel test with some simple checks."""
+    statistics = [statistic]
+
+    output = multi_model_statistics(cubes, span=span, statistics=statistics)
+    assert isinstance(output, dict)
+    assert statistic in output
+
+    return output
+
+
+def multimodel_regression_test(cubes, span, name):
+    """Run multimodel regression test.
+
+    This test will fail if the input data or multimodel code changed. To
+    update the data for the regression test, remove the corresponding
+    `.nc` files in this directory and re-run the tests. The tests will
+    fail the first time with a RuntimeError, because the reference data
+    are being written.
+    """
+    statistic = 'mean'
+    output = multimodel_test(cubes, span=span, statistic=statistic)
+    this_cube = output[statistic]
+
+    filename = Path(__file__).with_name(f'{name}-{span}-{statistic}.nc')
+    if filename.exists():
+        other_cube = iris.load(str(filename))[0]
+        assert_array_almost_equal(this_cube.data, other_cube.data)
+
+        # Compare coords
+        for this_coord, other_coord in zip(this_cube.coords(),
+                                           other_cube.coords()):
+            assert this_coord == other_coord
+
+        # remove Conventions which are added by Iris on save
+        other_cube.attributes.pop('Conventions', None)
+
+        assert other_cube.metadata == this_cube.metadata
+
+    else:
+        # The test will fail if no regression data are available.
+        iris.save(this_cube, filename)
+        raise RuntimeError(f'Wrote reference data to {filename.absolute()}')
+
+
+@pytest.mark.use_sample_data
+@pytest.mark.parametrize('span', SPAN_PARAMS)
+def test_multimodel_regression_month(timeseries_cubes_month, span):
+    """Test statistic."""
+    cubes = timeseries_cubes_month
+    name = 'timeseries_monthly'
+    multimodel_regression_test(
+        name=name,
+        span=span,
+        cubes=cubes,
+    )
+
+
+@pytest.mark.use_sample_data
+@pytest.mark.parametrize('calendar', CALENDAR_PARAMS)
+@pytest.mark.parametrize('span', SPAN_PARAMS)
+def test_multimodel_regression_day(timeseries_cubes_day, span, calendar):
+    """Test statistic."""
+    cubes = timeseries_cubes_day[calendar]
+    name = f'timeseries_daily_{calendar}'
+    multimodel_regression_test(
+        name=name,
+        span=span,
+        cubes=cubes,
+    )
+
+
+@pytest.mark.use_sample_data
+def test_multimodel_no_vertical_dimension(timeseries_cubes_month):
+    """Test statistic without vertical dimension using monthly data."""
+    span = 'full'
+    cubes = timeseries_cubes_month
+    cubes = [cube[:, 0] for cube in cubes]
+    multimodel_test(cubes, span=span, statistic='mean')
+
+
+@pytest.mark.use_sample_data
+@pytest.mark.xfail(
+    'iris.exceptions.CoordinateNotFoundError',
+    reason='https://github.com/ESMValGroup/ESMValCore/issues/891')
+def test_multimodel_no_horizontal_dimension(timeseries_cubes_month):
+    """Test statistic without horizontal dimension using monthly data."""
+    span = 'full'
+    cubes = timeseries_cubes_month
+    cubes = [cube[:, :, 0, 0] for cube in cubes]
+    # Coordinate not found error
+    # iris.exceptions.CoordinateNotFoundError:
+    # 'Expected to find exactly 1 depth coordinate, but found none.'
+    multimodel_test(cubes, span=span, statistic='mean')
+
+
+@pytest.mark.use_sample_data
+def test_multimodel_only_time_dimension(timeseries_cubes_month):
+    """Test statistic without only the time dimension using monthly data."""
+    cubes = timeseries_cubes_month
+    span = 'full'
+    cubes = [cube[:, 0, 0, 0] for cube in cubes]
+    multimodel_test(cubes, span=span, statistic='mean')
+
+
+@pytest.mark.use_sample_data
+@pytest.mark.xfail(
+    'ValueError',
+    reason='https://github.com/ESMValGroup/ESMValCore/issues/890')
+def test_multimodel_no_time_dimension(timeseries_cubes_month):
+    """Test statistic without time dimension using monthly data."""
+    span = 'full'
+    cubes = timeseries_cubes_month
+    cubes = [cube[0] for cube in cubes]
+    # ValueError: Cannot guess bounds for a coordinate of length 1.
+    multimodel_test(cubes, span=span, statistic='mean')
diff --git a/tests/sample_data/timeseries_daily_365_day-full-mean.nc b/tests/sample_data/timeseries_daily_365_day-full-mean.nc
diff --git a/tests/sample_data/timeseries_daily_365_day-overlap-mean.nc b/tests/sample_data/timeseries_daily_365_day-overlap-mean.nc
diff --git a/tests/sample_data/timeseries_daily_gregorian-full-mean.nc b/tests/sample_data/timeseries_daily_gregorian-full-mean.nc
diff --git a/tests/sample_data/timeseries_daily_gregorian-overlap-mean.nc b/tests/sample_data/timeseries_daily_gregorian-overlap-mean.nc
diff --git a/tests/sample_data/timeseries_daily_proleptic_gregorian-full-mean.nc b/tests/sample_data/timeseries_daily_proleptic_gregorian-full-mean.nc
diff --git a/tests/sample_data/timeseries_daily_proleptic_gregorian-overlap-mean.nc b/tests/sample_data/timeseries_daily_proleptic_gregorian-overlap-mean.nc
diff --git a/tests/sample_data/timeseries_monthly-full-mean.nc b/tests/sample_data/timeseries_monthly-full-mean.nc
diff --git a/tests/sample_data/timeseries_monthly-overlap-mean.nc b/tests/sample_data/timeseries_monthly-overlap-mean.nc