Skip to content

Commit

Permalink
Added preprocessor mask_multimodel (#767)
Browse files Browse the repository at this point in the history
* Added preprocessor mask_multimodel

* Added unit tests for mask_multimodel preproc

* Addde missing test case

* Added documentation for mask_multimodel

* Added integration test for mask_multimodel

Co-authored-by: bascrezee <[email protected]>
  • Loading branch information
schlunma and bascrezee authored Mar 18, 2021
1 parent ce4ece7 commit d50452c
Show file tree
Hide file tree
Showing 5 changed files with 457 additions and 14 deletions.
22 changes: 8 additions & 14 deletions doc/recipe/preprocessor.rst
Original file line number Diff line number Diff line change
Expand Up @@ -492,20 +492,14 @@ See also :func:`esmvalcore.preprocessor.mask_fillvalues`.
Common mask for multiple models
-------------------------------

It is possible to use ``mask_fillvalues`` to create a combined multi-model mask
(all the masks from all the analyzed models combined into a single mask); for
that purpose setting the ``threshold_fraction`` to 0 will not discard any time
windows, essentially keeping the original model masks and combining them into a
single mask; here is an example:

.. code-block:: yaml
preprocessors:
missing_values_preprocessor:
mask_fillvalues:
threshold_fraction: 0.0 # keep all missing values
min_value: -1e20 # small enough not to alter the data
# time_window: 10.0 # this will not matter anymore
To create a combined multi-model mask (all the masks from all the analyzed
datasets combined into a single mask using a logical OR), the preprocessor
``mask_multimodel`` can be used. In contrast to ``mask_fillvalues``,
``mask_multimodel`` does not expect that the datasets have a ``time``
coordinate, but works on datasets with arbitrary (but identical) coordinates.
After ``mask_multimodel``, all involved datasets have an identical mask.

See also :func:`esmvalcore.preprocessor.mask_multimodel`.

Minimum, maximum and interval masking
-------------------------------------
Expand Down
3 changes: 3 additions & 0 deletions esmvalcore/preprocessor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
mask_inside_range,
mask_landsea,
mask_landseaice,
mask_multimodel,
mask_outside_range,
)
from ._multimodel import multi_model_statistics
Expand Down Expand Up @@ -113,6 +114,7 @@
# Point interpolation
'extract_point',
# Masking missing values
'mask_multimodel',
'mask_fillvalues',
'mask_above_threshold',
'mask_below_threshold',
Expand Down Expand Up @@ -182,6 +184,7 @@

MULTI_MODEL_FUNCTIONS = {
'multi_model_statistics',
'mask_multimodel',
'mask_fillvalues',
}

Expand Down
93 changes: 93 additions & 0 deletions esmvalcore/preprocessor/_mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import os

import cartopy.io.shapereader as shpreader
import dask.array as da
import iris
import numpy as np
import shapely.vectorized as shp_vect
Expand Down Expand Up @@ -527,6 +528,98 @@ def mask_outside_range(cube, minimum, maximum):
return cube


def _get_shape(cubes):
"""Check and get shape of cubes."""
shapes = {cube.shape for cube in cubes}
if len(shapes) > 1:
raise ValueError(
f"Expected cubes with identical shapes, got shapes {shapes}")
return list(shapes)[0]


def _multimodel_mask_cubes(cubes, shape):
"""Apply common mask to all cubes in-place."""
# Create mask
mask = da.full(shape, False, dtype=bool)
for cube in cubes:
new_mask = da.ma.getmaskarray(cube.core_data())
mask |= new_mask

# Apply common mask
for cube in cubes:
cube.data = da.ma.masked_array(cube.core_data(), mask=mask)

return cubes


def _multimodel_mask_products(products, shape):
"""Apply common mask to all cubes of products in-place."""
# Create mask and get products used for mask
mask = da.full(shape, False, dtype=bool)
used_products = set()
for product in products:
for cube in product.cubes:
new_mask = da.ma.getmaskarray(cube.core_data())
mask |= new_mask
if da.any(new_mask):
used_products.add(product)

# Apply common mask and update provenance information
for product in products:
for cube in product.cubes:
cube.data = da.ma.masked_array(cube.core_data(), mask=mask)
for other_product in used_products:
if other_product.filename != product.filename:
product.wasderivedfrom(other_product)

return products


def mask_multimodel(products):
"""Apply common mask to all datasets (using logical OR).
Parameters
----------
products : iris.cube.CubeList or list of PreprocessorFile
Data products/cubes to be masked.
Returns
-------
iris.cube.CubeList or list of PreprocessorFile
Masked data products/cubes.
Raises
------
ValueError
Datasets have different shapes.
TypeError
Invalid input data.
"""
if not products:
return products

# Check input types
if all(isinstance(p, iris.cube.Cube) for p in products):
cubes = products
shape = _get_shape(cubes)
return _multimodel_mask_cubes(cubes, shape)
if all(type(p).__name__ == 'PreprocessorFile' for p in products):
# Avoid circular input: https://stackoverflow.com/q/16964467
cubes = iris.cube.CubeList()
for product in products:
cubes.extend(product.cubes)
if not cubes:
return products
shape = _get_shape(cubes)
return _multimodel_mask_products(products, shape)
product_types = {type(p) for p in products}
raise TypeError(
f"Input type for mask_multimodel not understood. Expected "
f"iris.cube.Cube or esmvalcore.preprocessor.PreprocessorFile, "
f"got {product_types}")


def mask_fillvalues(products,
threshold_fraction,
min_value=None,
Expand Down
38 changes: 38 additions & 0 deletions tests/integration/test_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2405,3 +2405,41 @@ def test_invalid_fx_var_cmip6(tmp_path, patched_datafinder, config_user):
get_recipe(tmp_path, content, config_user)
assert str(rec_err_exp.value) == INITIALIZATION_ERROR_MSG
assert msg in rec_err_exp.value.failed_tasks[0].message


def test_multimodel_mask(tmp_path, patched_datafinder, config_user):
"""Test ``mask_multimodel``."""
content = dedent("""
preprocessors:
preproc:
mask_multimodel:
diagnostics:
diagnostic_name:
variables:
tas:
preprocessor: preproc
project: CMIP5
mip: Amon
exp: historical
start_year: 2005
end_year: 2005
ensemble: r1i1p1
additional_datasets:
- {dataset: BNU-ESM}
- {dataset: CanESM2}
- {dataset: HadGEM2-ES}
scripts: null
""")
recipe = get_recipe(tmp_path, content, config_user)

# Check generated tasks
assert len(recipe.tasks) == 1
task = recipe.tasks.pop()
assert task.name == f'diagnostic_name{TASKSEP}tas'

# Check mask_multimodel
assert len(task.products) == 3
for product in task.products:
assert 'mask_multimodel' in product.settings
assert product.settings['mask_multimodel'] == {}
Loading

0 comments on commit d50452c

Please sign in to comment.