-
Notifications
You must be signed in to change notification settings - Fork 38
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add multimodel tests using samples of CMIP6 data (#856)
Co-authored-by: Bouwe Andela <[email protected]>
- Loading branch information
1 parent
1edded4
commit 114b8e0
Showing
14 changed files
with
276 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -42,6 +42,7 @@ | |
'pyyaml', | ||
'requests', | ||
'scitools-iris>=2.2', | ||
'requests', | ||
'shapely[vectorized]', | ||
'stratify', | ||
'yamale==2.*', | ||
|
@@ -57,6 +58,8 @@ | |
'pytest-metadata>=1.5.1', | ||
'pytest-mock', | ||
'pytest-xdist', | ||
('ESMValTool_sample_data @ ' | ||
'git+https://github.com/ESMValGroup/[email protected]'), | ||
], | ||
# Development dependencies | ||
# Use pip install -e .[develop] to install in development mode | ||
|
236 changes: 236 additions & 0 deletions
236
tests/sample_data/multimodel_statistics/test_multimodel.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,236 @@ | ||
"""Test using sample data for :func:`esmvalcore.preprocessor._multimodel`.""" | ||
|
||
import pickle | ||
from itertools import groupby | ||
from pathlib import Path | ||
|
||
import iris | ||
import numpy as np | ||
import pytest | ||
|
||
from esmvalcore.preprocessor import extract_time, multi_model_statistics | ||
|
||
esmvaltool_sample_data = pytest.importorskip("esmvaltool_sample_data") | ||
|
||
CALENDAR_PARAMS = ( | ||
pytest.param( | ||
'360_day', | ||
marks=pytest.mark.skip( | ||
reason='Cannot calculate statistics with single cube in list')), | ||
'365_day', | ||
'gregorian', | ||
'proleptic_gregorian', | ||
pytest.param( | ||
'julian', | ||
marks=pytest.mark.skip( | ||
reason='Cannot calculate statistics with single cube in list')), | ||
) | ||
|
||
SPAN_PARAMS = ('overlap', 'full') | ||
|
||
|
||
def assert_array_almost_equal(this, other): | ||
"""Assert that array `this` almost equals array `other`.""" | ||
if np.ma.isMaskedArray(this) or np.ma.isMaskedArray(other): | ||
np.testing.assert_array_equal(this.mask, other.mask) | ||
|
||
np.testing.assert_array_almost_equal(this, other) | ||
|
||
|
||
def preprocess_data(cubes, time_slice: dict = None): | ||
"""Regrid the data to the first cube and optional time-slicing.""" | ||
if time_slice: | ||
cubes = [extract_time(cube, **time_slice) for cube in cubes] | ||
|
||
first_cube = cubes[0] | ||
|
||
# regrid to first cube | ||
regrid_kwargs = { | ||
'grid': first_cube, | ||
'scheme': iris.analysis.Linear(), | ||
} | ||
|
||
cubes = [cube.regrid(**regrid_kwargs) for cube in cubes] | ||
|
||
return cubes | ||
|
||
|
||
@pytest.fixture(scope="module") | ||
def timeseries_cubes_month(request): | ||
"""Load representative timeseries data.""" | ||
# cache the cubes to save about 30-60 seconds on repeat use | ||
data = request.config.cache.get("sample_data/monthly", None) | ||
|
||
if data: | ||
cubes = pickle.loads(data.encode('latin1')) | ||
else: | ||
time_slice = { | ||
'start_year': 1985, | ||
'end_year': 1987, | ||
'start_month': 12, | ||
'end_month': 2, | ||
'start_day': 1, | ||
'end_day': 1, | ||
} | ||
cubes = esmvaltool_sample_data.load_timeseries_cubes(mip_table='Amon') | ||
cubes = preprocess_data(cubes, time_slice=time_slice) | ||
|
||
# cubes are not serializable via json, so we must go via pickle | ||
request.config.cache.set("sample_data/monthly", | ||
pickle.dumps(cubes).decode('latin1')) | ||
|
||
return cubes | ||
|
||
|
||
@pytest.fixture(scope="module") | ||
def timeseries_cubes_day(request): | ||
"""Load representative timeseries data grouped by calendar.""" | ||
# cache the cubes to save about 30-60 seconds on repeat use | ||
data = request.config.cache.get("sample_data/daily", None) | ||
|
||
if data: | ||
cubes = pickle.loads(data.encode('latin1')) | ||
|
||
else: | ||
time_slice = { | ||
'start_year': 2001, | ||
'end_year': 2002, | ||
'start_month': 12, | ||
'end_month': 2, | ||
'start_day': 1, | ||
'end_day': 1, | ||
} | ||
cubes = esmvaltool_sample_data.load_timeseries_cubes(mip_table='day') | ||
cubes = preprocess_data(cubes, time_slice=time_slice) | ||
|
||
# cubes are not serializable via json, so we must go via pickle | ||
request.config.cache.set("sample_data/daily", | ||
pickle.dumps(cubes).decode('latin1')) | ||
|
||
def calendar(cube): | ||
return cube.coord('time').units.calendar | ||
|
||
# groupby requires sorted list | ||
grouped = groupby(sorted(cubes, key=calendar), key=calendar) | ||
|
||
cube_dict = {key: list(group) for key, group in grouped} | ||
|
||
return cube_dict | ||
|
||
|
||
def multimodel_test(cubes, span, statistic): | ||
"""Run multimodel test with some simple checks.""" | ||
statistics = [statistic] | ||
|
||
result = multi_model_statistics(cubes, span=span, statistics=statistics) | ||
assert isinstance(result, dict) | ||
assert statistic in result | ||
|
||
return result | ||
|
||
|
||
def multimodel_regression_test(cubes, span, name): | ||
"""Run multimodel regression test. | ||
This test will fail if the input data or multimodel code changed. To | ||
update the data for the regression test, remove the corresponding | ||
`.nc` files in this directory and re-run the tests. The tests will | ||
fail the first time with a RuntimeError, because the reference data | ||
are being written. | ||
""" | ||
statistic = 'mean' | ||
result = multimodel_test(cubes, span=span, statistic=statistic) | ||
result_cube = result[statistic] | ||
|
||
filename = Path(__file__).with_name(f'{name}-{span}-{statistic}.nc') | ||
if filename.exists(): | ||
reference_cube = iris.load_cube(str(filename)) | ||
assert_array_almost_equal(result_cube.data, reference_cube.data) | ||
|
||
# Compare coords | ||
for this_coord, other_coord in zip(result_cube.coords(), | ||
reference_cube.coords()): | ||
assert this_coord == other_coord | ||
|
||
# remove Conventions which are added by Iris on save | ||
reference_cube.attributes.pop('Conventions', None) | ||
|
||
assert reference_cube.metadata == result_cube.metadata | ||
|
||
else: | ||
# The test will fail if no regression data are available. | ||
iris.save(result_cube, filename) | ||
raise RuntimeError(f'Wrote reference data to {filename.absolute()}') | ||
|
||
|
||
@pytest.mark.use_sample_data | ||
@pytest.mark.parametrize('span', SPAN_PARAMS) | ||
def test_multimodel_regression_month(timeseries_cubes_month, span): | ||
"""Test statistic.""" | ||
cubes = timeseries_cubes_month | ||
name = 'timeseries_monthly' | ||
multimodel_regression_test( | ||
name=name, | ||
span=span, | ||
cubes=cubes, | ||
) | ||
|
||
|
||
@pytest.mark.use_sample_data | ||
@pytest.mark.parametrize('calendar', CALENDAR_PARAMS) | ||
@pytest.mark.parametrize('span', SPAN_PARAMS) | ||
def test_multimodel_regression_day(timeseries_cubes_day, span, calendar): | ||
"""Test statistic.""" | ||
cubes = timeseries_cubes_day[calendar] | ||
name = f'timeseries_daily_{calendar}' | ||
multimodel_regression_test( | ||
name=name, | ||
span=span, | ||
cubes=cubes, | ||
) | ||
|
||
|
||
@pytest.mark.use_sample_data | ||
def test_multimodel_no_vertical_dimension(timeseries_cubes_month): | ||
"""Test statistic without vertical dimension using monthly data.""" | ||
span = 'full' | ||
cubes = timeseries_cubes_month | ||
cubes = [cube[:, 0] for cube in cubes] | ||
multimodel_test(cubes, span=span, statistic='mean') | ||
|
||
|
||
@pytest.mark.use_sample_data | ||
@pytest.mark.xfail( | ||
'iris.exceptions.CoordinateNotFoundError', | ||
reason='https://github.com/ESMValGroup/ESMValCore/issues/891') | ||
def test_multimodel_no_horizontal_dimension(timeseries_cubes_month): | ||
"""Test statistic without horizontal dimension using monthly data.""" | ||
span = 'full' | ||
cubes = timeseries_cubes_month | ||
cubes = [cube[:, :, 0, 0] for cube in cubes] | ||
# Coordinate not found error | ||
# iris.exceptions.CoordinateNotFoundError: | ||
# 'Expected to find exactly 1 depth coordinate, but found none.' | ||
multimodel_test(cubes, span=span, statistic='mean') | ||
|
||
|
||
@pytest.mark.use_sample_data | ||
def test_multimodel_only_time_dimension(timeseries_cubes_month): | ||
"""Test statistic without only the time dimension using monthly data.""" | ||
cubes = timeseries_cubes_month | ||
span = 'full' | ||
cubes = [cube[:, 0, 0, 0] for cube in cubes] | ||
multimodel_test(cubes, span=span, statistic='mean') | ||
|
||
|
||
@pytest.mark.use_sample_data | ||
@pytest.mark.xfail( | ||
'ValueError', | ||
reason='https://github.com/ESMValGroup/ESMValCore/issues/890') | ||
def test_multimodel_no_time_dimension(timeseries_cubes_month): | ||
"""Test statistic without time dimension using monthly data.""" | ||
span = 'full' | ||
cubes = timeseries_cubes_month | ||
cubes = [cube[0] for cube in cubes] | ||
# ValueError: Cannot guess bounds for a coordinate of length 1. | ||
multimodel_test(cubes, span=span, statistic='mean') |
Binary file added
BIN
+18.5 KB
tests/sample_data/multimodel_statistics/timeseries_daily_365_day-full-mean.nc
Binary file not shown.
Binary file added
BIN
+24.8 KB
tests/sample_data/multimodel_statistics/timeseries_daily_365_day-overlap-mean.nc
Binary file not shown.
Binary file added
BIN
+18.5 KB
tests/sample_data/multimodel_statistics/timeseries_daily_gregorian-full-mean.nc
Binary file not shown.
Binary file added
BIN
+25 KB
tests/sample_data/multimodel_statistics/timeseries_daily_gregorian-overlap-mean.nc
Binary file not shown.
Binary file added
BIN
+18.5 KB
tests/sample_data/multimodel_statistics/timeseries_daily_proleptic_gregorian-full-mean.nc
Binary file not shown.
Binary file added
BIN
+24.8 KB
tests/sample_data/multimodel_statistics/timeseries_daily_proleptic_gregorian-overlap-mean.nc
Binary file not shown.
Binary file added
BIN
+18.5 KB
tests/sample_data/multimodel_statistics/timeseries_monthly-full-mean.nc
Binary file not shown.
Binary file added
BIN
+22.4 KB
tests/sample_data/multimodel_statistics/timeseries_monthly-overlap-mean.nc
Binary file not shown.