-
Notifications
You must be signed in to change notification settings - Fork 38
/
test_multimodel.py
257 lines (201 loc) · 8.05 KB
/
test_multimodel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
"""Test using sample data for :func:`esmvalcore.preprocessor._multimodel`."""
import pickle
import sys
from itertools import groupby
from pathlib import Path
import iris
import numpy as np
import pytest
from esmvalcore.preprocessor import extract_time
from esmvalcore.preprocessor._multimodel import multi_model_statistics
esmvaltool_sample_data = pytest.importorskip("esmvaltool_sample_data")
CALENDAR_PARAMS = (
pytest.param(
'360_day',
marks=pytest.mark.skip(
reason='Cannot calculate statistics with single cube in list')),
'365_day',
'gregorian',
'proleptic_gregorian',
pytest.param(
'julian',
marks=pytest.mark.skip(
reason='Cannot calculate statistics with single cube in list')),
)
SPAN_PARAMS = ('overlap', 'full')
def assert_array_almost_equal(this, other):
"""Assert that array `this` almost equals array `other`."""
if np.ma.isMaskedArray(this) or np.ma.isMaskedArray(other):
np.testing.assert_array_equal(this.mask, other.mask)
np.testing.assert_array_almost_equal(this, other)
def preprocess_data(cubes, time_slice: dict = None):
"""Regrid the data to the first cube and optional time-slicing."""
if time_slice:
cubes = [extract_time(cube, **time_slice) for cube in cubes]
first_cube = cubes[0]
# regrid to first cube
regrid_kwargs = {
'grid': first_cube,
'scheme': iris.analysis.Nearest(),
}
cubes = [cube.regrid(**regrid_kwargs) for cube in cubes]
return cubes
def get_cache_key(value):
"""Get a cache key that is hopefully unique enough for unpickling.
If this doesn't avoid problems with unpickling the cached data,
manually clean the pytest cache with the command `pytest --cache-
clear`.
"""
return ' '.join([
str(value),
iris.__version__,
np.__version__,
sys.version,
])
@pytest.fixture(scope="module")
def timeseries_cubes_month(request):
"""Load representative timeseries data."""
# cache the cubes to save about 30-60 seconds on repeat use
cache_key = get_cache_key("sample_data/monthly")
data = request.config.cache.get(cache_key, None)
if data:
cubes = pickle.loads(data.encode('latin1'))
else:
time_slice = {
'start_year': 1985,
'end_year': 1987,
'start_month': 12,
'end_month': 2,
'start_day': 1,
'end_day': 1,
}
cubes = esmvaltool_sample_data.load_timeseries_cubes(mip_table='Amon')
cubes = preprocess_data(cubes, time_slice=time_slice)
# cubes are not serializable via json, so we must go via pickle
request.config.cache.set(cache_key,
pickle.dumps(cubes).decode('latin1'))
return cubes
@pytest.fixture(scope="module")
def timeseries_cubes_day(request):
"""Load representative timeseries data grouped by calendar."""
# cache the cubes to save about 30-60 seconds on repeat use
cache_key = get_cache_key("sample_data/daily")
data = request.config.cache.get(cache_key, None)
if data:
cubes = pickle.loads(data.encode('latin1'))
else:
time_slice = {
'start_year': 2001,
'end_year': 2002,
'start_month': 12,
'end_month': 2,
'start_day': 1,
'end_day': 1,
}
cubes = esmvaltool_sample_data.load_timeseries_cubes(mip_table='day')
cubes = preprocess_data(cubes, time_slice=time_slice)
# cubes are not serializable via json, so we must go via pickle
request.config.cache.set(cache_key,
pickle.dumps(cubes).decode('latin1'))
def calendar(cube):
return cube.coord('time').units.calendar
# groupby requires sorted list
grouped = groupby(sorted(cubes, key=calendar), key=calendar)
cube_dict = {key: list(group) for key, group in grouped}
return cube_dict
def multimodel_test(cubes, statistic, span):
"""Run multimodel test with some simple checks."""
statistics = [statistic]
result = multi_model_statistics(products=cubes,
statistics=statistics,
span=span)
assert isinstance(result, dict)
assert statistic in result
return result
def multimodel_regression_test(cubes, span, name):
"""Run multimodel regression test.
This test will fail if the input data or multimodel code changed. To
update the data for the regression test, remove the corresponding
`.nc` files in this directory and re-run the tests. The tests will
fail the first time with a RuntimeError, because the reference data
are being written.
"""
statistic = 'mean'
result = multimodel_test(cubes, statistic=statistic, span=span)
result_cube = result[statistic]
filename = Path(__file__).with_name(f'{name}-{span}-{statistic}.nc')
if filename.exists():
reference_cube = iris.load_cube(str(filename))
assert_array_almost_equal(result_cube.data, reference_cube.data)
# Compare coords
for this_coord, other_coord in zip(result_cube.coords(),
reference_cube.coords()):
assert this_coord == other_coord
# remove Conventions which are added by Iris on save
reference_cube.attributes.pop('Conventions', None)
assert reference_cube.metadata == result_cube.metadata
else:
# The test will fail if no regression data are available.
iris.save(result_cube, filename)
raise RuntimeError(f'Wrote reference data to {filename.absolute()}')
@pytest.mark.use_sample_data
@pytest.mark.parametrize('span', SPAN_PARAMS)
def test_multimodel_regression_month(timeseries_cubes_month, span):
"""Test statistic."""
cubes = timeseries_cubes_month
name = 'timeseries_monthly'
multimodel_regression_test(
name=name,
span=span,
cubes=cubes,
)
@pytest.mark.use_sample_data
@pytest.mark.parametrize('calendar', CALENDAR_PARAMS)
@pytest.mark.parametrize('span', SPAN_PARAMS)
def test_multimodel_regression_day(timeseries_cubes_day, span, calendar):
"""Test statistic."""
cubes = timeseries_cubes_day[calendar]
name = f'timeseries_daily_{calendar}'
multimodel_regression_test(
name=name,
span=span,
cubes=cubes,
)
@pytest.mark.use_sample_data
def test_multimodel_no_vertical_dimension(timeseries_cubes_month):
"""Test statistic without vertical dimension using monthly data."""
span = 'full'
cubes = timeseries_cubes_month
cubes = [cube[:, 0] for cube in cubes]
multimodel_test(cubes, span=span, statistic='mean')
@pytest.mark.use_sample_data
@pytest.mark.xfail(
'iris.exceptions.CoordinateNotFoundError',
reason='https://github.com/ESMValGroup/ESMValCore/issues/891')
def test_multimodel_no_horizontal_dimension(timeseries_cubes_month):
"""Test statistic without horizontal dimension using monthly data."""
span = 'full'
cubes = timeseries_cubes_month
cubes = [cube[:, :, 0, 0] for cube in cubes]
# Coordinate not found error
# iris.exceptions.CoordinateNotFoundError:
# 'Expected to find exactly 1 depth coordinate, but found none.'
multimodel_test(cubes, span=span, statistic='mean')
@pytest.mark.use_sample_data
def test_multimodel_only_time_dimension(timeseries_cubes_month):
"""Test statistic without only the time dimension using monthly data."""
cubes = timeseries_cubes_month
span = 'full'
cubes = [cube[:, 0, 0, 0] for cube in cubes]
multimodel_test(cubes, span=span, statistic='mean')
@pytest.mark.use_sample_data
@pytest.mark.xfail(
'ValueError',
reason='https://github.com/ESMValGroup/ESMValCore/issues/890')
def test_multimodel_no_time_dimension(timeseries_cubes_month):
"""Test statistic without time dimension using monthly data."""
span = 'full'
cubes = timeseries_cubes_month
cubes = [cube[0] for cube in cubes]
# ValueError: Cannot guess bounds for a coordinate of length 1.
multimodel_test(cubes, span=span, statistic='mean')