diff --git a/doc/conf.py b/doc/conf.py index b417b69640..2375e00cde 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -438,20 +438,21 @@ # Configuration for intersphinx intersphinx_mapping = { - 'cf_units': ('https://cf-units.readthedocs.io/en/latest/', None), + 'cf_units': ('https://cf-units.readthedocs.io/en/stable/', None), 'cftime': ('https://unidata.github.io/cftime/', None), 'esmvalcore': (f'https://docs.esmvaltool.org/projects/ESMValCore/en/{rtd_version}/', None), 'esmvaltool': (f'https://docs.esmvaltool.org/en/{rtd_version}/', None), + 'esmpy': ('https://earthsystemmodeling.org/esmpy_doc/release/latest/html/', + None), 'dask': ('https://docs.dask.org/en/stable/', None), 'distributed': ('https://distributed.dask.org/en/stable/', None), - 'iris': ('https://scitools-iris.readthedocs.io/en/latest/', None), - 'iris-esmf-regrid': ('https://iris-esmf-regrid.readthedocs.io/en/latest', - None), + 'iris': ('https://scitools-iris.readthedocs.io/en/stable/', None), + 'esmf_regrid': ('https://iris-esmf-regrid.readthedocs.io/en/stable/', None), 'matplotlib': ('https://matplotlib.org/stable/', None), 'numpy': ('https://numpy.org/doc/stable/', None), - 'pyesgf': ('https://esgf-pyclient.readthedocs.io/en/latest/', None), + 'pyesgf': ('https://esgf-pyclient.readthedocs.io/en/stable/', None), 'python': ('https://docs.python.org/3/', None), 'scipy': ('https://docs.scipy.org/doc/scipy/', None), } diff --git a/doc/quickstart/find_data.rst b/doc/quickstart/find_data.rst index cc2c87dfeb..0765a7a9cd 100644 --- a/doc/quickstart/find_data.rst +++ b/doc/quickstart/find_data.rst @@ -398,32 +398,17 @@ ESMValCore can automatically make native ICON data `UGRID loading the data. The UGRID conventions provide a standardized format to store data on unstructured grids, which is required by many software packages or tools to -work correctly. +work correctly and specifically by Iris to interpret the grid as a +:ref:`mesh `. An example is the horizontal regridding of native ICON data to a regular grid. -While the built-in :ref:`nearest scheme ` can handle unstructured grids not in UGRID format, using more complex -regridding algorithms (for example provided by the -:doc:`iris-esmf-regrid:index` package through :ref:`generic regridding -schemes`) requires the input data in UGRID format. -The following code snippet provides a preprocessor that regrids native ICON -data to a 1°x1° grid using `ESMF's first-order conservative regridding -algorithm `__: - -.. code-block:: yaml - - preprocessors: - regrid_icon: - regrid: - target_grid: 1x1 - scheme: - reference: esmf_regrid.schemes:ESMFAreaWeighted - +While the :ref:`built-in regridding schemes ` +`linear` and `nearest` can handle unstructured grids (i.e., not UGRID-compliant) and meshes (i.e., UGRID-compliant), +the `area_weighted` scheme requires the input data in UGRID format. This automatic UGRIDization is enabled by default, but can be switched off with the facet ``ugrid: false`` in the recipe or the extra facets (see below). -This is useful for diagnostics that do not support input data in UGRID format -(yet) like the :ref:`Psyplot diagnostic ` or -if you want to use the built-in :ref:`nearest scheme ` regridding scheme. +This is useful for diagnostics that act on the native ICON grid and do not +support input data in UGRID format (yet), like the +:ref:`Psyplot diagnostic `. For 3D ICON variables, ESMValCore tries to add the pressure level information (from the variables `pfull` and `phalf`) and/or altitude information (from the @@ -574,7 +559,7 @@ model output. .. warning:: - This is the first version of ACCESS-ESM CMORizer for ESMValCore. Currently, + This is the first version of ACCESS-ESM CMORizer for ESMValCore. Currently, Supported variables: ``pr``, ``ps``, ``psl``, ``rlds``, ``tas``, ``ta``, ``va``, ``ua``, ``zg``, ``hus``, ``clt``, ``rsus``, ``rlus``. @@ -585,7 +570,7 @@ The default naming conventions for input directories and files for ACCESS output .. hint:: - We only provide one default `input_dir` since this is how ACCESS-ESM native data was + We only provide one default `input_dir` since this is how ACCESS-ESM native data was stored on NCI. Users can modify this path in the :ref:`config-developer` to match their local file structure. @@ -594,7 +579,7 @@ Thus, example dataset entries could look like this: .. code-block:: yaml dataset: - - {project: ACCESS, mip: Amon, dataset:ACCESS_ESM1_5, sub_dataset: HI-CN-05, + - {project: ACCESS, mip: Amon, dataset:ACCESS_ESM1_5, sub_dataset: HI-CN-05, exp: history, modeling_realm: atm, special_attr: pa, start_year: 1986, end_year: 1986} @@ -612,15 +597,15 @@ Key Description Default value if not ==================== ====================================== ================================= ``raw_name`` Variable name of the variable in the CMOR variable name of the raw input file corresponding variable -``modeling_realm`` Realm attribute include `atm`, `ice` No default (needs to be +``modeling_realm`` Realm attribute include `atm`, `ice` No default (needs to be and `oce` specified in extra facets or recipe if default DRS is used) ```special_attr`` A special attribute in the filename No default - `ACCESS-ESM` raw data, it's related to + `ACCESS-ESM` raw data, it's related to frquency of raw data ``sub_dataset`` Part of the ACCESS-ESM raw dataset No default root, need to specify if you want to - use the cmoriser + use the cmoriser ==================== ====================================== ================================= .. _data-retrieval: @@ -860,7 +845,7 @@ about this since we can point the user to the specific functionality `here `_ but we will underline that the initial loading is done by adhering to the CF Conventions that `iris` operates by as well (see `CF Conventions Document `_ and the search -page for CF `standard names `_). +page for CF `standard names `_). Data concatenation from multiple sources ======================================== diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst index dd5e61b36b..d25d87a34e 100644 --- a/doc/recipe/preprocessor.rst +++ b/doc/recipe/preprocessor.rst @@ -890,15 +890,15 @@ The arguments are defined below: Regridding (interpolation, extrapolation) schemes ------------------------------------------------- -ESMValCore has a number of built-in regridding schemes, which are presented in -:ref:`built-in regridding schemes`. Additionally, it is also possible to use -third party regridding schemes designed for use with :doc:`Iris -`. This is explained in :ref:`generic regridding schemes`. +ESMValCore provides three default regridding schemes, which are presented in +:ref:`default regridding schemes`. Additionally, it is also possible to use +third party regridding schemes designed for use with :meth:`iris.cube.Cube.regrid`. +This is explained in :ref:`generic regridding schemes`. Grid types ~~~~~~~~~~ -In ESMValCore, we distinguish between three grid types (note that these might +In ESMValCore, we distinguish between various grid types (note that these might differ from other definitions): * **Regular grid**: A rectilinear grid with 1D latitude and 1D longitude @@ -907,30 +907,34 @@ differ from other definitions): longitude coordinates with common dimensions. * **Unstructured grid**: A grid with 1D latitude and 1D longitude coordinates with common dimensions (i.e., a simple list of points). +* **Mesh**: A mesh as supported by Iris and described in :ref:`iris:ugrid`. -.. _built-in regridding schemes: +.. _default regridding schemes: -Built-in regridding schemes -~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Default regridding schemes +~~~~~~~~~~~~~~~~~~~~~~~~~~ * ``linear``: Bilinear regridding. For source data on a regular grid, uses :obj:`~iris.analysis.Linear` with `extrapolation_mode='mask'`. - For source data on an irregular grid, uses - :class:`~esmvalcore.preprocessor.regrid_schemes.ESMPyLinear`. + For source and/or target data on an irregular grid or mesh, uses + :class:`~esmvalcore.preprocessor.regrid_schemes.IrisESMFRegrid` with + `method='bilinear'`. For source data on an unstructured grid, uses :class:`~esmvalcore.preprocessor.regrid_schemes.UnstructuredLinear`. * ``nearest``: Nearest-neighbor regridding. For source data on a regular grid, uses :obj:`~iris.analysis.Nearest` with `extrapolation_mode='mask'`. - For source data on an irregular grid, uses - :class:`~esmvalcore.preprocessor.regrid_schemes.ESMPyNearest`. + For source and/or target data on an irregular grid or mesh, uses + :class:`~esmvalcore.preprocessor.regrid_schemes.IrisESMFRegrid` with + `method='nearest'`. For source data on an unstructured grid, uses :class:`~esmvalcore.preprocessor.regrid_schemes.UnstructuredNearest`. * ``area_weighted``: First-order conservative (area-weighted) regridding. For source data on a regular grid, uses :obj:`~iris.analysis.AreaWeighted`. - For source data on an irregular grid, uses - :class:`~esmvalcore.preprocessor.regrid_schemes.ESMPyAreaWeighted`. + For source and/or target data on an irregular grid or mesh, uses + :class:`~esmvalcore.preprocessor.regrid_schemes.IrisESMFRegrid` with + `method='conservative'`. Source data on an unstructured grid is not supported. .. _generic regridding schemes: @@ -950,7 +954,9 @@ afforded by the built-in schemes described above. To facilitate this, the :func:`~esmvalcore.preprocessor.regrid` preprocessor allows the use of any scheme designed for Iris. The scheme must be installed -and importable. To use this feature, the ``scheme`` key passed to the +and importable. Several such schemes are provided by :mod:`iris.analysis` and +:mod:`esmvalcore.preprocessor.regrid_schemes`. +To use this feature, the ``scheme`` key passed to the preprocessor must be a dictionary instead of a simple string that contains all necessary information. That includes a ``reference`` to the desired scheme itself, as well as any arguments that should be passed through to the @@ -996,10 +1002,13 @@ module, the second refers to the scheme, i.e. some callable that will be called with the remaining entries of the ``scheme`` dictionary passed as keyword arguments. -One package that aims to capitalize on the :ref:`support for unstructured grids -introduced in Iris 3.2 ` is :doc:`iris-esmf-regrid:index`. +One package that aims to capitalize on the :ref:`support for meshes +introduced in Iris 3.2 ` is :doc:`esmf_regrid:index`. It aims to provide lazy regridding for structured regular and irregular grids, -as well as unstructured grids. +as well as meshes. It is recommended to use these schemes through +the :obj:`esmvalcore.preprocessor.regrid_schemes.IrisESMFRegrid` scheme though, +as that provides more efficient handling of masks. + An example of its usage in a preprocessor is: .. code-block:: yaml @@ -1009,8 +1018,11 @@ An example of its usage in a preprocessor is: regrid: target_grid: 2.5x2.5 scheme: - reference: esmf_regrid.schemes:ESMFAreaWeighted + reference: esmvalcore.preprocessor.regrid_schemes:IrisESMFRegrid + method: conservative mdtol: 0.7 + use_src_mask: true + collapse_src_mask_along: ZT Additionally, the use of generic schemes that take source and target grid cubes as arguments is also supported. The call function for such schemes must be defined as @@ -1018,7 +1030,7 @@ arguments is also supported. The call function for such schemes must be defined The `regrid` module will automatically pass the source and grid cubes as inputs of the scheme. An example of this usage is the :func:`~esmf_regrid.schemes.regrid_rectilinear_to_rectilinear` -scheme available in :doc:`iris-esmf-regrid:index`: +scheme available in :doc:`esmf_regrid:index`: .. code-block:: yaml diff --git a/environment.yml b/environment.yml index 20795a7f94..00696ef452 100644 --- a/environment.yml +++ b/environment.yml @@ -19,7 +19,7 @@ dependencies: - geopy - humanfriendly - iris >=3.10.0 - - iris-esmf-regrid >=0.10.0 # github.com/SciTools-incubator/iris-esmf-regrid/pull/342 + - iris-esmf-regrid >=0.11.0 - iris-grib - isodate - jinja2 diff --git a/esmvalcore/preprocessor/_mask.py b/esmvalcore/preprocessor/_mask.py index cdedcc6391..f7db814009 100644 --- a/esmvalcore/preprocessor/_mask.py +++ b/esmvalcore/preprocessor/_mask.py @@ -8,23 +8,33 @@ import logging import os +from collections.abc import Iterable +from typing import Literal, Optional import cartopy.io.shapereader as shpreader import dask.array as da import iris +import iris.util import numpy as np import shapely.vectorized as shp_vect from iris.analysis import Aggregator +from iris.cube import Cube from iris.util import rolling_window +from esmvalcore.preprocessor._shared import get_array_module + from ._supplementary_vars import register_supplementaries logger = logging.getLogger(__name__) -def _get_fx_mask(fx_data, fx_option, mask_type): +def _get_fx_mask( + fx_data: np.ndarray | da.Array, + fx_option: Literal['land', 'sea', 'landsea', 'ice'], + mask_type: Literal['sftlf', 'sftof', 'sftgif'], +) -> np.ndarray | da.Array: """Build a percentage-thresholded mask from an fx file.""" - inmask = da.zeros_like(fx_data, bool) + inmask = np.zeros_like(fx_data, bool) # respects dask through dispatch if mask_type == 'sftlf': if fx_option == 'land': # Mask land out @@ -50,22 +60,29 @@ def _get_fx_mask(fx_data, fx_option, mask_type): return inmask -def _apply_fx_mask(fx_mask, var_data): - """Apply the fx data extracted mask on the actual processed data.""" - # Apply mask across - old_mask = da.ma.getmaskarray(var_data) - mask = old_mask | fx_mask - var_data = da.ma.masked_array(var_data, mask=mask) - # maybe fill_value=1e+20 - - return var_data +def _apply_mask( + mask: np.ndarray | da.Array, + array: np.ndarray | da.Array, + dim_map: Optional[Iterable[int]] = None, +) -> np.ndarray | da.Array: + """Apply a (broadcasted) mask on an array.""" + npx = get_array_module(mask, array) + if dim_map is not None: + if isinstance(array, da.Array): + chunks = array.chunks + else: + chunks = None + mask = iris.util.broadcast_to_shape( + mask, array.shape, dim_map, chunks=chunks + ) + return npx.ma.masked_where(mask, array) @register_supplementaries( variables=['sftlf', 'sftof'], required='prefer_at_least_one', ) -def mask_landsea(cube, mask_out): +def mask_landsea(cube: Cube, mask_out: Literal['land', 'sea']) -> Cube: """Mask out either land mass or sea (oceans, seas and lakes). It uses dedicated ancillary variables (sftlf or sftof) or, @@ -78,16 +95,15 @@ def mask_landsea(cube, mask_out): Parameters ---------- - cube: iris.cube.Cube - data cube to be masked. If the cube has an + cube: + Data cube to be masked. If the cube has an :class:`iris.coords.AncillaryVariable` with standard name ``'land_area_fraction'`` or ``'sea_area_fraction'`` that will be used. If both are present, only the 'land_area_fraction' will be used. If the ancillary variable is not available, the mask will be calculated from Natural Earth shapefiles. - - mask_out: str - either "land" to mask out land mass or "sea" to mask out seas. + mask_out: + Either ``'land'`` to mask out land mass or ``'sea'`` to mask out seas. Returns ------- @@ -112,35 +128,40 @@ def mask_landsea(cube, mask_out): } # preserve importance order: try stflf first then sftof - fx_cube = None + ancillary_var = None try: - fx_cube = cube.ancillary_variable('land_area_fraction') + ancillary_var = cube.ancillary_variable('land_area_fraction') except iris.exceptions.AncillaryVariableNotFoundError: try: - fx_cube = cube.ancillary_variable('sea_area_fraction') + ancillary_var = cube.ancillary_variable('sea_area_fraction') except iris.exceptions.AncillaryVariableNotFoundError: - logger.debug('Ancillary variables land/sea area fraction not ' - 'found in cube. Check fx_file availability.') - - if fx_cube: - fx_cube_data = da.broadcast_to(fx_cube.core_data(), cube.shape) - landsea_mask = _get_fx_mask(fx_cube_data, mask_out, - fx_cube.var_name) - cube.data = _apply_fx_mask(landsea_mask, cube.core_data()) - logger.debug("Applying land-sea mask: %s", fx_cube.var_name) + logger.debug( + "Ancillary variables land/sea area fraction not found in " + "cube. Check fx_file availability." + ) + + if ancillary_var: + landsea_mask = _get_fx_mask( + ancillary_var.core_data(), mask_out, ancillary_var.var_name + ) + cube.data = _apply_mask( + landsea_mask, + cube.core_data(), + cube.ancillary_variable_dims(ancillary_var), + ) + logger.debug("Applying land-sea mask: %s", ancillary_var.var_name) else: if cube.coord('longitude').points.ndim < 2: - cube = _mask_with_shp(cube, shapefiles[mask_out], [ - 0, - ]) + cube = _mask_with_shp(cube, shapefiles[mask_out], [0]) logger.debug( "Applying land-sea mask from Natural Earth shapefile: \n%s", shapefiles[mask_out], ) else: - msg = ("Use of shapefiles with irregular grids not yet " - "implemented, land-sea mask not applied.") - raise ValueError(msg) + raise ValueError( + "Use of shapefiles with irregular grids not yet implemented, " + "land-sea mask not applied." + ) return cube @@ -149,7 +170,7 @@ def mask_landsea(cube, mask_out): variables=['sftgif'], required='require_at_least_one', ) -def mask_landseaice(cube, mask_out): +def mask_landseaice(cube: Cube, mask_out: Literal['landsea', 'ice']) -> Cube: """Mask out either landsea (combined) or ice. Function that masks out either landsea (land and seas) or ice (Antarctica, @@ -159,13 +180,13 @@ def mask_landseaice(cube, mask_out): Parameters ---------- - cube: iris.cube.Cube - data cube to be masked. It should have an + cube: + Data cube to be masked. It should have an :class:`iris.coords.AncillaryVariable` with standard name ``'land_ice_area_fraction'``. - mask_out: str - either "landsea" to mask out landsea or "ice" to mask out ice. + Either ``'landsea'`` to mask out land and oceans or ``'ice'`` to mask + out ice. Returns ------- @@ -178,20 +199,26 @@ def mask_landseaice(cube, mask_out): Error raised if landsea-ice mask not found as an ancillary variable. """ # sftgif is the only one so far but users can set others - fx_cube = None + ancillary_var = None try: - fx_cube = cube.ancillary_variable('land_ice_area_fraction') + ancillary_var = cube.ancillary_variable('land_ice_area_fraction') except iris.exceptions.AncillaryVariableNotFoundError: - logger.debug('Ancillary variable land ice area fraction ' - 'not found in cube. Check fx_file availability.') - if fx_cube: - fx_cube_data = da.broadcast_to(fx_cube.core_data(), cube.shape) - landice_mask = _get_fx_mask(fx_cube_data, mask_out, fx_cube.var_name) - cube.data = _apply_fx_mask(landice_mask, cube.core_data()) + logger.debug( + "Ancillary variable land ice area fraction not found in cube. " + "Check fx_file availability." + ) + if ancillary_var: + landseaice_mask = _get_fx_mask( + ancillary_var.core_data(), mask_out, ancillary_var.var_name + ) + cube.data = _apply_mask( + landseaice_mask, + cube.core_data(), + cube.ancillary_variable_dims(ancillary_var), + ) logger.debug("Applying landsea-ice mask: sftgif") else: - msg = "Landsea-ice mask could not be found. Stopping. " - raise ValueError(msg) + raise ValueError("Landsea-ice mask could not be found. Stopping.") return cube @@ -285,9 +312,10 @@ def _mask_with_shp(cube, shapefilename, region_indices=None): # Create a set of x,y points from the cube # 1D regular grids if cube.coord('longitude').points.ndim < 2: - x_p, y_p = da.meshgrid( + x_p, y_p = np.meshgrid( cube.coord(axis='X').points, - cube.coord(axis='Y').points) + cube.coord(axis='Y').points, + ) # 2D irregular grids; spit an error for now else: msg = ("No fx-files found (sftlf or sftof)!" @@ -296,14 +324,14 @@ def _mask_with_shp(cube, shapefilename, region_indices=None): raise ValueError(msg) # Wrap around longitude coordinate to match data - x_p_180 = da.where(x_p >= 180., x_p - 360., x_p) + x_p_180 = np.where(x_p >= 180., x_p - 360., x_p) # the NE mask has no points at x = -180 and y = +/-90 # so we will fool it and apply the mask at (-179, -89, 89) instead - x_p_180 = da.where(x_p_180 == -180., x_p_180 + 1., x_p_180) + x_p_180 = np.where(x_p_180 == -180., x_p_180 + 1., x_p_180) - y_p_0 = da.where(y_p == -90., y_p + 1., y_p) - y_p_90 = da.where(y_p_0 == 90., y_p_0 - 1., y_p_0) + y_p_0 = np.where(y_p == -90., y_p + 1., y_p) + y_p_90 = np.where(y_p_0 == 90., y_p_0 - 1., y_p_0) mask = None for region in regions: @@ -313,13 +341,14 @@ def _mask_with_shp(cube, shapefilename, region_indices=None): else: mask |= shp_vect.contains(region, x_p_180, y_p_90) - mask = da.array(mask) - iris.util.broadcast_to_shape(mask, cube.shape, cube.coord_dims('latitude') - + cube.coord_dims('longitude')) + if cube.has_lazy_data(): + mask = da.array(mask) - old_mask = da.ma.getmaskarray(cube.core_data()) - mask = old_mask | mask - cube.data = da.ma.masked_array(cube.core_data(), mask=mask) + cube.data = _apply_mask( + mask, + cube.core_data(), + cube.coord_dims('latitude') + cube.coord_dims('longitude'), + ) return cube diff --git a/esmvalcore/preprocessor/_regrid.py b/esmvalcore/preprocessor/_regrid.py index 5587450adf..228bd7dc26 100644 --- a/esmvalcore/preprocessor/_regrid.py +++ b/esmvalcore/preprocessor/_regrid.py @@ -30,6 +30,9 @@ from esmvalcore.cmor.table import CMOR_TABLES from esmvalcore.exceptions import ESMValCoreDeprecationWarning from esmvalcore.iris_helpers import has_irregular_grid, has_unstructured_grid +from esmvalcore.preprocessor._shared import ( + get_dims_along_axes, +) from esmvalcore.preprocessor._shared import ( get_array_module, preserve_float_dtype, @@ -39,10 +42,8 @@ add_cell_measure, ) from esmvalcore.preprocessor.regrid_schemes import ( - ESMPyAreaWeighted, - ESMPyLinear, - ESMPyNearest, GenericFuncScheme, + IrisESMFRegrid, UnstructuredLinear, UnstructuredNearest, ) @@ -91,9 +92,17 @@ # curvilinear grids; i.e., grids that can be described with 2D latitude and 2D # longitude coordinates with common dimensions) HORIZONTAL_SCHEMES_IRREGULAR = { - 'area_weighted': ESMPyAreaWeighted(), - 'linear': ESMPyLinear(), - 'nearest': ESMPyNearest(), + 'area_weighted': IrisESMFRegrid(method='conservative'), + 'linear': IrisESMFRegrid(method='bilinear'), + 'nearest': IrisESMFRegrid(method='nearest'), +} + +# Supported horizontal regridding schemes for meshes +# https://scitools-iris.readthedocs.io/en/stable/further_topics/ugrid/index.html +HORIZONTAL_SCHEMES_MESH = { + 'area_weighted': IrisESMFRegrid(method='conservative'), + 'linear': IrisESMFRegrid(method='bilinear'), + 'nearest': IrisESMFRegrid(method='nearest'), } # Supported horizontal regridding schemes for unstructured grids (i.e., grids, @@ -533,29 +542,7 @@ def _get_target_grid_cube( return target_grid_cube -def _attempt_irregular_regridding(cube: Cube, scheme: str) -> bool: - """Check if irregular regridding with ESMF should be used.""" - if not has_irregular_grid(cube): - return False - if scheme not in HORIZONTAL_SCHEMES_IRREGULAR: - raise ValueError( - f"Regridding scheme '{scheme}' does not support irregular data, " - f"expected one of {list(HORIZONTAL_SCHEMES_IRREGULAR)}") - return True - - -def _attempt_unstructured_regridding(cube: Cube, scheme: str) -> bool: - """Check if unstructured regridding should be used.""" - if not has_unstructured_grid(cube): - return False - if scheme not in HORIZONTAL_SCHEMES_UNSTRUCTURED: - raise ValueError( - f"Regridding scheme '{scheme}' does not support unstructured " - f"data, expected one of {list(HORIZONTAL_SCHEMES_UNSTRUCTURED)}") - return True - - -def _load_scheme(src_cube: Cube, scheme: str | dict): +def _load_scheme(src_cube: Cube, tgt_cube: Cube, scheme: str | dict): """Return scheme that can be used in :meth:`iris.cube.Cube.regrid`.""" loaded_scheme: Any = None @@ -586,23 +573,27 @@ def _load_scheme(src_cube: Cube, scheme: str | dict): logger.debug("Loaded regridding scheme %s", loaded_scheme) return loaded_scheme - # Scheme is a dict -> assume this describes a generic regridding scheme if isinstance(scheme, dict): + # Scheme is a dict -> assume this describes a generic regridding scheme loaded_scheme = _load_generic_scheme(scheme) - - # Scheme is a str -> load appropriate regridding scheme depending on the - # type of input data - elif _attempt_irregular_regridding(src_cube, scheme): - loaded_scheme = HORIZONTAL_SCHEMES_IRREGULAR[scheme] - elif _attempt_unstructured_regridding(src_cube, scheme): - loaded_scheme = HORIZONTAL_SCHEMES_UNSTRUCTURED[scheme] else: - loaded_scheme = HORIZONTAL_SCHEMES_REGULAR.get(scheme) - - if loaded_scheme is None: - raise ValueError( - f"Got invalid regridding scheme string '{scheme}', expected one " - f"of {list(HORIZONTAL_SCHEMES_REGULAR)}") + # Scheme is a str -> load appropriate regridding scheme depending on + # the type of input data + if has_irregular_grid(src_cube) or has_irregular_grid(tgt_cube): + grid_type = 'irregular' + elif src_cube.mesh is not None or tgt_cube.mesh is not None: + grid_type = 'mesh' + elif has_unstructured_grid(src_cube): + grid_type = 'unstructured' + else: + grid_type = 'regular' + + schemes = globals()[f"HORIZONTAL_SCHEMES_{grid_type.upper()}"] + if scheme not in schemes: + raise ValueError( + f"Regridding scheme '{scheme}' not available for {grid_type} " + f"data, expected one of: {', '.join(schemes)}") + loaded_scheme = schemes[scheme] logger.debug("Loaded regridding scheme %s", loaded_scheme) @@ -676,14 +667,14 @@ def _get_regridder( return regridder # Regridder is not in cached -> return a new one and cache it - loaded_scheme = _load_scheme(src_cube, scheme) + loaded_scheme = _load_scheme(src_cube, tgt_cube, scheme) regridder = loaded_scheme.regridder(src_cube, tgt_cube) _CACHED_REGRIDDERS.setdefault(name_shape_key, {}) _CACHED_REGRIDDERS[name_shape_key][coord_key] = regridder # (2) Weights caching disabled else: - loaded_scheme = _load_scheme(src_cube, scheme) + loaded_scheme = _load_scheme(src_cube, tgt_cube, scheme) regridder = loaded_scheme.regridder(src_cube, tgt_cube) return regridder @@ -860,36 +851,40 @@ def _cache_clear(): def _rechunk(cube: Cube, target_grid: Cube) -> Cube: """Re-chunk cube with optimal chunk sizes for target grid.""" - if not cube.has_lazy_data() or cube.ndim < 3: - # Only rechunk lazy multidimensional data + if not cube.has_lazy_data(): + # Only rechunk lazy data return cube - lon_coord = target_grid.coord(axis='X') - lat_coord = target_grid.coord(axis='Y') - if lon_coord.ndim != 1 or lat_coord.ndim != 1: - # This function only supports 1D lat/lon coordinates. - return cube + # Extract grid dimension information from source cube + src_grid_indices = get_dims_along_axes(cube, ["X", "Y"]) + src_grid_shape = tuple(cube.shape[i] for i in src_grid_indices) + src_grid_ndims = len(src_grid_indices) - lon_dim, = target_grid.coord_dims(lon_coord) - lat_dim, = target_grid.coord_dims(lat_coord) - grid_indices = sorted((lon_dim, lat_dim)) - target_grid_shape = tuple(target_grid.shape[i] for i in grid_indices) + # Extract grid dimension information from target cube. + tgt_grid_indices = get_dims_along_axes(target_grid, ["X", "Y"]) + tgt_grid_shape = tuple(target_grid.shape[i] for i in tgt_grid_indices) + tgt_grid_ndims = len(tgt_grid_indices) - if 2 * np.prod(cube.shape[-2:]) > np.prod(target_grid_shape): + if 2 * np.prod(src_grid_shape) > np.prod(tgt_grid_shape): # Only rechunk if target grid is more than a factor of 2 larger, # because rechunking will keep the original chunk in memory. return cube + # Compute a good chunk size for the target array + # This uses the fact that horizontal dimension(s) are the last dimension(s) + # of the input cube and also takes into account that iris regridding needs + # unchunked data along the grid dimensions. data = cube.lazy_data() + tgt_shape = data.shape[:-src_grid_ndims] + tgt_grid_shape + tgt_chunks = data.chunks[:-src_grid_ndims] + tgt_grid_shape - # Compute a good chunk size for the target array - tgt_shape = data.shape[:-2] + target_grid_shape - tgt_chunks = data.chunks[:-2] + target_grid_shape - tgt_data = da.empty(tgt_shape, dtype=data.dtype, chunks=tgt_chunks) - tgt_data = tgt_data.rechunk({i: "auto" for i in range(cube.ndim - 2)}) + tgt_data = da.empty(tgt_shape, chunks=tgt_chunks, dtype=data.dtype) + tgt_data = tgt_data.rechunk( + {i: "auto" + for i in range(tgt_data.ndim - tgt_grid_ndims)}) # Adjust chunks to source array and rechunk - chunks = tgt_data.chunks[:-2] + data.shape[-2:] + chunks = tgt_data.chunks[:-tgt_grid_ndims] + data.shape[-src_grid_ndims:] cube.data = data.rechunk(chunks) return cube diff --git a/esmvalcore/preprocessor/_regrid_esmpy.py b/esmvalcore/preprocessor/_regrid_esmpy.py index c7edfa829c..cce6dae92f 100755 --- a/esmvalcore/preprocessor/_regrid_esmpy.py +++ b/esmvalcore/preprocessor/_regrid_esmpy.py @@ -9,10 +9,13 @@ import ESMF as esmpy # noqa: N811 except ImportError: raise exc +import warnings import iris import numpy as np from iris.cube import Cube +from esmvalcore.exceptions import ESMValCoreDeprecationWarning + from ._mapping import get_empty_data, map_slices, ref_to_dims_index ESMF_MANAGER = esmpy.Manager(debug=False) @@ -45,6 +48,12 @@ class ESMPyRegridder: Does not support lazy regridding nor weights caching. + .. deprecated:: 2.12.0 + This regridder has been deprecated and is scheduled for removal in + version 2.14.0. Please use + :class:`~esmvalcore.preprocessor.regrid_schemes.IrisESMFRegrid` to + create an :doc:`esmf_regrid:index` regridder instead. + Parameters ---------- src_cube: @@ -122,6 +131,15 @@ class _ESMPyScheme: def __init__(self, mask_threshold: float = 0.99): """Initialize class instance.""" + msg = ( + "The `esmvalcore.preprocessor.regrid_schemes." + f"{self.__class__.__name__}' regridding scheme has been " + "deprecated in ESMValCore version 2.12.0 and is scheduled for " + "removal in version 2.14.0. Please use " + "`esmvalcore.preprocessor.regrid_schemes.IrisESMFRegrid` " + "instead." + ) + warnings.warn(msg, ESMValCoreDeprecationWarning) self.mask_threshold = mask_threshold def __repr__(self) -> str: @@ -161,6 +179,12 @@ class ESMPyAreaWeighted(_ESMPyScheme): Does not support lazy regridding. + .. deprecated:: 2.12.0 + This scheme has been deprecated and is scheduled for removal in version + 2.14.0. Please use + :class:`~esmvalcore.preprocessor.regrid_schemes.IrisESMFRegrid` + instead. + """ _METHOD = 'area_weighted' @@ -173,6 +197,12 @@ class ESMPyLinear(_ESMPyScheme): Does not support lazy regridding. + .. deprecated:: 2.12.0 + This scheme has been deprecated and is scheduled for removal in version + 2.14.0. Please use + :class:`~esmvalcore.preprocessor.regrid_schemes.IrisESMFRegrid` + instead. + """ _METHOD = 'linear' @@ -185,6 +215,12 @@ class ESMPyNearest(_ESMPyScheme): Does not support lazy regridding. + .. deprecated:: 2.12.0 + This scheme has been deprecated and is scheduled for removal in version + 2.14.0. Please use + :class:`~esmvalcore.preprocessor.regrid_schemes.IrisESMFRegrid` + instead. + """ _METHOD = 'nearest' diff --git a/esmvalcore/preprocessor/_regrid_iris_esmf_regrid.py b/esmvalcore/preprocessor/_regrid_iris_esmf_regrid.py new file mode 100644 index 0000000000..c09cc19835 --- /dev/null +++ b/esmvalcore/preprocessor/_regrid_iris_esmf_regrid.py @@ -0,0 +1,234 @@ +"""Iris-esmf-regrid based regridding scheme.""" +from __future__ import annotations + +from collections.abc import Iterable +from typing import Any, Literal + +import dask +import dask.array as da +import iris.cube +import iris.exceptions +import numpy as np +from esmf_regrid.schemes import ( + ESMFAreaWeightedRegridder, + ESMFBilinearRegridder, + ESMFNearestRegridder, +) + +from esmvalcore.preprocessor._shared import ( + get_dims_along_axes, + get_dims_along_coords, +) + +METHODS = { + 'conservative': ESMFAreaWeightedRegridder, + 'bilinear': ESMFBilinearRegridder, + 'nearest': ESMFNearestRegridder, +} + + +class IrisESMFRegrid: + """:doc:`esmf_regrid:index` based regridding scheme. + + Supports lazy regridding. + + Parameters + ---------- + method: + Either "conservative", "bilinear" or "nearest". Corresponds to the + :mod:`esmpy` methods + :attr:`~esmpy.api.constants.RegridMethod.CONSERVE`, + :attr:`~esmpy.api.constants.RegridMethod.BILINEAR` or + :attr:`~esmpy.api.constants.RegridMethod.NEAREST_STOD` used to + calculate regridding weights. + mdtol: + Tolerance of missing data. The value returned in each element of + the returned array will be masked if the fraction of masked data + exceeds ``mdtol``. ``mdtol=0`` means no missing data is tolerated while + ``mdtol=1`` will mean the resulting element will be masked if and only + if all the contributing elements of data are masked. If no value is + given, this will default to 1 for conservative regridding and 0 + otherwise. Only available for methods 'bilinear' and 'conservative'. + use_src_mask: + If True, derive a mask from the source cube data, + which will tell :mod:`esmpy` which points to ignore. If an array is + provided, that will be used. + If set to :obj:`None`, it will be set to :obj:`True` for methods + ``'bilinear'`` and ``'conservative'`` and to :obj:`False` for method + ``'nearest'``. This default may be changed to :obj:`True` for all + schemes once `SciTools-incubator/iris-esmf-regrid#368 + `_ + has been resolved. + use_tgt_mask: + If True, derive a mask from of the target cube, + which will tell :mod:`esmpy` which points to ignore. If an array is + provided, that will be used. + If set to :obj:`None`, it will be set to :obj:`True` for methods + ``'bilinear'`` and ``'conservative'`` and to :obj:`False` for method + ``'nearest'``. This default may be changed to :obj:`True` for all + schemes once `SciTools-incubator/iris-esmf-regrid#368`_ has been + resolved. + collapse_src_mask_along: + When deriving the mask from the source cube data, collapse the mask + along the dimensions identified by these axes or coordinates. Only + points that are masked at all time (``'T'``), vertical levels + (``'Z'``), or both time and vertical levels (``'TZ'``) will be + considered masked. Instead of the axes ``'T'`` and ``'Z'``, + coordinate names can also be provided. For any cube dimensions not + specified here, the first slice along the coordinate will be used to + determine the mask. + collapse_tgt_mask_along: + When deriving the mask from the target cube data, collapse the mask + along the dimensions identified by these axes or coordinates. Only + points that are masked at all time (``'T'``), vertical levels + (``'Z'``), or both time and vertical levels (``'TZ'``) will be + considered masked. Instead of the axes ``'T'`` and ``'Z'``, + coordinate names can also be provided. For any cube dimensions not + specified here, the first slice along the coordinate will be used to + determine the mask. + src_resolution: + If present, represents the amount of latitude slices per source cell + given to ESMF for calculation. If resolution is set, the source cube + must have strictly increasing bounds (bounds may be transposed + plus or minus 360 degrees to make the bounds strictly increasing). + Only available for method 'conservative'. + tgt_resolution: + If present, represents the amount of latitude slices per target cell + given to ESMF for calculation. If resolution is set, the target cube + must have strictly increasing bounds (bounds may be transposed + plus or minus 360 degrees to make the bounds strictly increasing). + Only available for method 'conservative'. + tgt_location: + Only used if the target grid is an :class:`iris.mesh.MeshXY`. Describes + the location for data on the mesh. Either ``'face'`` or ``'node'`` for + bilinear or nearest neighbour regridding, can only be ``'face'`` for + first order conservative regridding. + + Attributes + ---------- + kwargs: + Keyword arguments that will be provided to the regridder. + """ + + def __init__( + self, + method: Literal["bilinear", "conservative", "nearest"], + mdtol: float | None = None, + use_src_mask: None | bool | np.ndarray = None, + use_tgt_mask: None | bool | np.ndarray = None, + collapse_src_mask_along: Iterable[str] = ('Z', ), + collapse_tgt_mask_along: Iterable[str] = ('Z', ), + src_resolution: int | None = None, + tgt_resolution: int | None = None, + tgt_location: Literal['face', 'node'] | None = None, + ) -> None: + if method not in METHODS: + raise ValueError( + "`method` should be one of 'bilinear', 'conservative', or " + "'nearest'") + + if use_src_mask is None: + use_src_mask = method != "nearest" + if use_tgt_mask is None: + use_tgt_mask = method != "nearest" + + self.kwargs: dict[str, Any] = { + 'method': method, + 'use_src_mask': use_src_mask, + 'use_tgt_mask': use_tgt_mask, + 'collapse_src_mask_along': collapse_src_mask_along, + 'collapse_tgt_mask_along': collapse_tgt_mask_along, + 'tgt_location': tgt_location, + } + if method == 'nearest': + if mdtol is not None: + raise TypeError( + "`mdol` can only be specified when `method='bilinear'` " + "or `method='conservative'`") + else: + self.kwargs['mdtol'] = mdtol + if method == 'conservative': + self.kwargs['src_resolution'] = src_resolution + self.kwargs['tgt_resolution'] = tgt_resolution + elif src_resolution is not None: + raise TypeError("`src_resolution` can only be specified when " + "`method='conservative'`") + elif tgt_resolution is not None: + raise TypeError("`tgt_resolution` can only be specified when " + "`method='conservative'`") + + def __repr__(self) -> str: + """Return string representation of class.""" + kwargs_str = ", ".join(f"{k}={repr(v)}" + for k, v in self.kwargs.items()) + return f'{self.__class__.__name__}({kwargs_str})' + + @staticmethod + def _get_mask( + cube: iris.cube.Cube, + collapse_mask_along: Iterable[str], + ) -> np.ndarray: + """Read the mask from the cube data. + + This function assumes that the mask is constant in dimensions + that are not horizontal or specified in `collapse_mask_along`. + """ + horizontal_dims = get_dims_along_axes(cube, ["X", "Y"]) + axes = tuple(elem for elem in collapse_mask_along + if isinstance(elem, str) and elem.upper() in ("T", "Z")) + other_dims = ( + get_dims_along_axes(cube, axes) + # type: ignore[arg-type] + get_dims_along_coords(cube, collapse_mask_along)) + + slices = tuple( + slice(None) if i in horizontal_dims + other_dims else 0 + for i in range(cube.ndim)) + subcube = cube[slices] + subcube_other_dims = ( + get_dims_along_axes(subcube, axes) + # type: ignore[arg-type] + get_dims_along_coords(subcube, collapse_mask_along)) + + mask = da.ma.getmaskarray(subcube.core_data()) + return mask.all(axis=subcube_other_dims) + + def regridder( + self, + src_cube: iris.cube.Cube, + tgt_cube: iris.cube.Cube | iris.mesh.MeshXY, + ) -> (ESMFAreaWeightedRegridder + | ESMFBilinearRegridder + | ESMFNearestRegridder): + """Create an :doc:`esmf_regrid:index` based regridding function. + + Parameters + ---------- + src_cube: + Cube defining the source grid. + tgt_cube: + Cube defining the target grid. + + Returns + ------- + :obj:`esmf_regrid.schemes.ESMFAreaWeightedRegridder` or + :obj:`esmf_regrid.schemes.ESMFBilinearRegridder` or + :obj:`esmf_regrid.schemes.ESMFNearestRegridder`: + An :doc:`esmf_regrid:index` regridder. + """ + kwargs = self.kwargs.copy() + regridder_cls = METHODS[kwargs.pop('method')] + src_mask = kwargs.pop('use_src_mask') + collapse_mask_along = kwargs.pop('collapse_src_mask_along') + if src_mask is True: + src_mask = self._get_mask(src_cube, collapse_mask_along) + tgt_mask = kwargs.pop('use_tgt_mask') + collapse_mask_along = kwargs.pop('collapse_tgt_mask_along') + if tgt_mask is True: + tgt_mask = self._get_mask(tgt_cube, collapse_mask_along) + src_mask, tgt_mask = dask.compute(src_mask, tgt_mask) + return regridder_cls( + src_cube, + tgt_cube, + use_src_mask=src_mask, + use_tgt_mask=tgt_mask, + **kwargs, + ) diff --git a/esmvalcore/preprocessor/_shared.py b/esmvalcore/preprocessor/_shared.py index 74bba18579..67c3b89f5d 100644 --- a/esmvalcore/preprocessor/_shared.py +++ b/esmvalcore/preprocessor/_shared.py @@ -329,10 +329,11 @@ def get_weights( # Time weights: lengths of time interval if 'time' in coords: - weights *= broadcast_to_shape( + weights = weights * broadcast_to_shape( npx.array(get_time_weights(cube)), cube.shape, cube.coord_dims('time'), + chunks=cube.lazy_data().chunks if cube.has_lazy_data() else None, ) # Latitude weights: cell areas @@ -350,10 +351,17 @@ def get_weights( f"variable)" ) try_adding_calculated_cell_area(cube) - weights *= broadcast_to_shape( - cube.cell_measure('cell_area').core_data(), + area_weights = cube.cell_measure('cell_area').core_data() + if cube.has_lazy_data(): + area_weights = da.array(area_weights) + chunks = cube.lazy_data().chunks + else: + chunks = None + weights = weights * broadcast_to_shape( + area_weights, cube.shape, cube.cell_measure_dims('cell_area'), + chunks=chunks, ) return weights @@ -498,3 +506,33 @@ def get_all_coord_dims( all_coord_dims.extend(cube.coord_dims(coord)) sorted_all_coord_dims = sorted(list(set(all_coord_dims))) return tuple(sorted_all_coord_dims) + + +def _get_dims_along(cube, *args, **kwargs): + """Get a tuple with the cube dimensions matching *args and **kwargs.""" + try: + coord = cube.coord(*args, **kwargs, dim_coords=True) + except iris.exceptions.CoordinateNotFoundError: + try: + coord = cube.coord(*args, **kwargs) + except iris.exceptions.CoordinateNotFoundError: + return tuple() + return cube.coord_dims(coord) + + +def get_dims_along_axes( + cube: iris.cube.Cube, + axes: Iterable[Literal["T", "Z", "Y", "X"]], +) -> tuple[int, ...]: + """Get a tuple with the dimensions along one or more axis.""" + dims = {d for axis in axes for d in _get_dims_along(cube, axis=axis)} + return tuple(sorted(dims)) + + +def get_dims_along_coords( + cube: iris.cube.Cube, + coords: Iterable[str], +) -> tuple[int, ...]: + """Get a tuple with the dimensions along one or more coordinates.""" + dims = {d for coord in coords for d in _get_dims_along(cube, coord)} + return tuple(sorted(dims)) diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py index 473abb0190..840af3214e 100644 --- a/esmvalcore/preprocessor/_volume.py +++ b/esmvalcore/preprocessor/_volume.py @@ -161,9 +161,10 @@ def calculate_volume(cube: Cube) -> da.core.Array: try_adding_calculated_cell_area(cube) area = cube.cell_measure('cell_area').copy() area_dim = cube.cell_measure_dims(area) - - # Ensure cell area is in square meters as the units area.convert_units('m2') + area_array = area.core_data() + if cube.has_lazy_data(): + area_array = da.array(area_array) # Make sure input cube has not been modified if not has_cell_measure: @@ -171,9 +172,11 @@ def calculate_volume(cube: Cube) -> da.core.Array: chunks = cube.core_data().chunks if cube.has_lazy_data() else None area_arr = broadcast_to_shape( - area.core_data(), cube.shape, area_dim, chunks=chunks) + area_array, cube.shape, area_dim, chunks=chunks + ) thickness_arr = broadcast_to_shape( - thickness, cube.shape, z_dim, chunks=chunks) + thickness, cube.shape, z_dim, chunks=chunks + ) grid_volume = area_arr * thickness_arr return grid_volume diff --git a/esmvalcore/preprocessor/regrid_schemes.py b/esmvalcore/preprocessor/regrid_schemes.py index 91af9e3fdf..8c50b8065e 100644 --- a/esmvalcore/preprocessor/regrid_schemes.py +++ b/esmvalcore/preprocessor/regrid_schemes.py @@ -12,6 +12,7 @@ ESMPyNearest, ESMPyRegridder, ) +from esmvalcore.preprocessor._regrid_iris_esmf_regrid import IrisESMFRegrid from esmvalcore.preprocessor._regrid_unstructured import ( UnstructuredLinear, UnstructuredLinearRegridder, @@ -20,12 +21,12 @@ logger = logging.getLogger(__name__) - __all__ = [ 'ESMPyAreaWeighted', 'ESMPyLinear', 'ESMPyNearest', 'ESMPyRegridder', + 'IrisESMFRegrid', 'GenericFuncScheme', 'GenericRegridder', 'UnstructuredLinear', @@ -51,7 +52,6 @@ class GenericRegridder: Cube, \*\*kwargs) -> Cube. **kwargs: Keyword arguments for the generic regridding function. - """ def __init__( @@ -79,7 +79,6 @@ def __call__(self, cube: Cube) -> Cube: ------- Cube Regridded cube. - """ return self.func(cube, self.tgt_cube, **self.kwargs) @@ -98,7 +97,6 @@ class GenericFuncScheme: Cube, \*\*kwargs) -> Cube. **kwargs: Keyword arguments for the generic regridding function. - """ def __init__(self, func: Callable, **kwargs): @@ -125,6 +123,5 @@ def regridder(self, src_cube: Cube, tgt_cube: Cube) -> GenericRegridder: ------- GenericRegridder Regridder instance. - """ return GenericRegridder(src_cube, tgt_cube, self.func, **self.kwargs) diff --git a/setup.py b/setup.py index 94246b611d..e1645d3085 100755 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ 'dask[array,distributed]!=2024.8.0', # ESMValCore/issues/2503 'dask-jobqueue', 'esgf-pyclient>=0.3.1', - 'esmf-regrid>=0.10.0', # iris-esmf-regrid #342 + 'esmf-regrid>=0.11.0', 'esmpy!=8.1.0', # not on PyPI 'filelock', 'fiona', diff --git a/tests/integration/preprocessor/_derive/test_sispeed.py b/tests/integration/preprocessor/_derive/test_sispeed.py index 9daee38954..076c453756 100644 --- a/tests/integration/preprocessor/_derive/test_sispeed.py +++ b/tests/integration/preprocessor/_derive/test_sispeed.py @@ -22,7 +22,7 @@ def get_cube(name, lat=((0.5, 1.5), (2.5, 3.5)), lon=((0.5, 1.5), (2.5, 3.5))): @mock.patch( - 'esmvalcore.preprocessor._regrid_esmpy.ESMPyRegridder.__call__', + 'esmvalcore.preprocessor._derive.sispeed.regrid', autospec=True, ) def test_sispeed_calculation(mock_regrid): @@ -38,7 +38,7 @@ def test_sispeed_calculation(mock_regrid): @mock.patch( - 'esmvalcore.preprocessor._regrid_esmpy.ESMPyRegridder.__call__', + 'esmvalcore.preprocessor._derive.sispeed.regrid', autospec=True, ) def test_sispeed_calculation_coord_differ(mock_regrid): diff --git a/tests/integration/preprocessor/_mask/test_mask.py b/tests/integration/preprocessor/_mask/test_mask.py index 4e5e51167b..1b085519eb 100644 --- a/tests/integration/preprocessor/_mask/test_mask.py +++ b/tests/integration/preprocessor/_mask/test_mask.py @@ -4,10 +4,12 @@ """ from pathlib import Path +import dask.array as da import iris import iris.fileformats import numpy as np import pytest +from iris.coords import AuxCoord from esmvalcore.preprocessor import ( PreprocessorFile, @@ -64,55 +66,59 @@ def setUp(self): self.mock_data = np.ma.empty((4, 3, 3)) self.mock_data[:] = 10. - def test_components_fx_var(self): + @pytest.mark.parametrize('lazy_fx', [True, False]) + @pytest.mark.parametrize('lazy', [True, False]) + def test_components_fx_var(self, lazy, lazy_fx): """Test compatibility of ancillary variables.""" - self.fx_mask.var_name = 'sftlf' - self.fx_mask.standard_name = 'land_area_fraction' + if lazy: + cube_data = da.array(self.new_cube_data) + else: + cube_data = self.new_cube_data + fx_cube = self.fx_mask.copy() + if lazy_fx: + fx_cube.data = fx_cube.lazy_data() + + # mask_landsea + fx_cube.var_name = 'sftlf' + fx_cube.standard_name = 'land_area_fraction' new_cube_land = iris.cube.Cube( - self.new_cube_data, - dim_coords_and_dims=self.cube_coords_spec - ) - new_cube_land = add_supplementary_variables( - new_cube_land, - [self.fx_mask], - ) - result_land = mask_landsea( - new_cube_land, - 'land', + cube_data, dim_coords_and_dims=self.cube_coords_spec ) + new_cube_land = add_supplementary_variables(new_cube_land, [fx_cube]) + result_land = mask_landsea(new_cube_land, 'land') assert isinstance(result_land, iris.cube.Cube) + assert result_land.has_lazy_data() is (lazy or lazy_fx) - self.fx_mask.var_name = 'sftgif' - self.fx_mask.standard_name = 'land_ice_area_fraction' + # mask_landseaice + fx_cube.var_name = 'sftgif' + fx_cube.standard_name = 'land_ice_area_fraction' new_cube_ice = iris.cube.Cube( - self.new_cube_data, - dim_coords_and_dims=self.cube_coords_spec - ) - new_cube_ice = add_supplementary_variables( - new_cube_ice, - [self.fx_mask], - ) - result_ice = mask_landseaice( - new_cube_ice, - 'ice', + cube_data, dim_coords_and_dims=self.cube_coords_spec ) + new_cube_ice = add_supplementary_variables(new_cube_ice, [fx_cube]) + result_ice = mask_landseaice(new_cube_ice, 'ice') assert isinstance(result_ice, iris.cube.Cube) + assert result_ice.has_lazy_data() is (lazy or lazy_fx) - def test_mask_landsea(self): + @pytest.mark.parametrize('lazy', [True, False]) + def test_mask_landsea(self, lazy): """Test mask_landsea func.""" + if lazy: + cube_data = da.array(self.new_cube_data) + else: + cube_data = self.new_cube_data + self.fx_mask.var_name = 'sftlf' self.fx_mask.standard_name = 'land_area_fraction' new_cube_land = iris.cube.Cube( - self.new_cube_data, - dim_coords_and_dims=self.cube_coords_spec + cube_data, dim_coords_and_dims=self.cube_coords_spec ) new_cube_land = add_supplementary_variables( new_cube_land, [self.fx_mask], ) new_cube_sea = iris.cube.Cube( - self.new_cube_data, - dim_coords_and_dims=self.cube_coords_spec + cube_data, dim_coords_and_dims=self.cube_coords_spec ) new_cube_sea = add_supplementary_variables( new_cube_sea, @@ -128,6 +134,8 @@ def test_mask_landsea(self): new_cube_sea, 'sea', ) + assert result_land.has_lazy_data() is lazy + assert result_sea.has_lazy_data() is lazy expected = np.ma.empty((2, 3, 3)) expected.data[:] = 200. expected.mask = np.ones((2, 3, 3), bool) @@ -143,17 +151,17 @@ def test_mask_landsea(self): # mask with shp files new_cube_land = iris.cube.Cube( - self.new_cube_data, - dim_coords_and_dims=self.cube_coords_spec + cube_data, dim_coords_and_dims=self.cube_coords_spec ) new_cube_sea = iris.cube.Cube( - self.new_cube_data, - dim_coords_and_dims=self.cube_coords_spec + cube_data, dim_coords_and_dims=self.cube_coords_spec ) result_land = mask_landsea(new_cube_land, 'land') result_sea = mask_landsea(new_cube_sea, 'sea') # bear in mind all points are in the ocean + assert result_land.has_lazy_data() is lazy + assert result_sea.has_lazy_data() is lazy np.ma.set_fill_value(result_land.data, 1e+20) np.ma.set_fill_value(result_sea.data, 1e+20) expected.mask = np.zeros((3, 3), bool) @@ -161,19 +169,87 @@ def test_mask_landsea(self): expected.mask = np.ones((3, 3), bool) assert_array_equal(result_sea.data, expected) - def test_mask_landseaice(self): + @pytest.mark.parametrize('lazy', [True, False]) + def test_mask_landsea_transposed_fx(self, lazy): + """Test mask_landsea func.""" + if lazy: + cube_data = da.array(self.new_cube_data) + else: + cube_data = self.new_cube_data + cube = iris.cube.Cube( + cube_data, dim_coords_and_dims=self.cube_coords_spec + ) + self.fx_mask.var_name = 'sftlf' + self.fx_mask.standard_name = 'land_area_fraction' + cube = add_supplementary_variables(cube, [self.fx_mask]) + cube.transpose([2, 1, 0]) + + result = mask_landsea(cube, 'land') + + assert result.has_lazy_data() is lazy + expected = np.ma.array( + np.full((3, 3, 2), 200.0), mask=np.ones((3, 3, 2), bool) + ) + expected.mask[2, 1, :] = False + assert_array_equal(result.data, expected) + + @pytest.mark.parametrize('lazy', [True, False]) + def test_mask_landsea_transposed_shp(self, lazy): + """Test mask_landsea func.""" + if lazy: + cube_data = da.array(self.new_cube_data) + else: + cube_data = self.new_cube_data + cube = iris.cube.Cube( + cube_data, dim_coords_and_dims=self.cube_coords_spec + ) + cube.transpose([2, 1, 0]) + + result = mask_landsea(cube, 'land') + + assert result.has_lazy_data() is lazy + expected = np.ma.array( + np.full((3, 3, 2), 200.0), mask=np.zeros((3, 3, 2), bool) + ) + assert_array_equal(result.data, expected) + + def test_mask_landsea_multidim_fail(self): + """Test mask_landsea func.""" + lon_coord = AuxCoord(np.ones((3, 3)), standard_name='longitude') + cube = iris.cube.Cube( + self.new_cube_data, + dim_coords_and_dims=[(self.zcoord, 0), (self.lats, 1)], + aux_coords_and_dims=[(lon_coord, (1, 2))], + ) + + msg = ( + "Use of shapefiles with irregular grids not yet implemented, " + "land-sea mask not applied." + ) + with pytest.raises(ValueError, match=msg): + mask_landsea(cube, 'land') + + @pytest.mark.parametrize('lazy', [True, False]) + def test_mask_landseaice(self, lazy): """Test mask_landseaice func.""" + if lazy: + cube_data = da.array(self.new_cube_data).rechunk((1, 3, 3)) + else: + cube_data = self.new_cube_data + self.fx_mask.var_name = 'sftgif' self.fx_mask.standard_name = 'land_ice_area_fraction' new_cube_ice = iris.cube.Cube( - self.new_cube_data, - dim_coords_and_dims=self.cube_coords_spec + cube_data, dim_coords_and_dims=self.cube_coords_spec ) new_cube_ice = add_supplementary_variables( new_cube_ice, [self.fx_mask], ) result_ice = mask_landseaice(new_cube_ice, 'ice') + assert result_ice.has_lazy_data() is lazy + if lazy: + assert result_ice.lazy_data().chunksize == (1, 3, 3) expected = np.ma.empty((2, 3, 3)) expected.data[:] = 200. expected.mask = np.ones((2, 3, 3), bool) @@ -182,6 +258,19 @@ def test_mask_landseaice(self): np.ma.set_fill_value(expected, 1e+20) assert_array_equal(result_ice.data, expected) + def test_mask_landseaice_multidim_fail(self): + """Test mask_landseaice func.""" + lon_coord = AuxCoord(np.ones((3, 3)), standard_name='longitude') + cube = iris.cube.Cube( + self.new_cube_data, + dim_coords_and_dims=[(self.zcoord, 0), (self.lats, 1)], + aux_coords_and_dims=[(lon_coord, (1, 2))], + ) + + msg = "Landsea-ice mask could not be found. Stopping." + with pytest.raises(ValueError, match=msg): + mask_landseaice(cube, 'ice') + @pytest.mark.parametrize('lazy', [True, False]) def test_mask_fillvalues(self, mocker, lazy): """Test the fillvalues mask: func mask_fillvalues.""" diff --git a/tests/integration/preprocessor/_regrid/test_extract_coordinate_points.py b/tests/integration/preprocessor/_regrid/test_extract_coordinate_points.py index 872b4aec2a..7b4d4eb9b5 100644 --- a/tests/integration/preprocessor/_regrid/test_extract_coordinate_points.py +++ b/tests/integration/preprocessor/_regrid/test_extract_coordinate_points.py @@ -19,7 +19,7 @@ def setUp(self): """Prepare tests.""" shape = (3, 4, 4) data = np.arange(np.prod(shape)).reshape(shape) - self.cube = _make_cube(data, dtype=np.float64, rotated=True) + self.cube = _make_cube(data, dtype=np.float64, grid='rotated') self.cs = iris.coord_systems.GeogCS(iris.fileformats.pp.EARTH_RADIUS) def test_extract_point__single_linear(self): diff --git a/tests/integration/preprocessor/_regrid/test_regrid.py b/tests/integration/preprocessor/_regrid/test_regrid.py index 7166cfbfe5..4e2e472681 100644 --- a/tests/integration/preprocessor/_regrid/test_regrid.py +++ b/tests/integration/preprocessor/_regrid/test_regrid.py @@ -27,7 +27,6 @@ def setUp(self): self.cs = iris.coord_systems.GeogCS(iris.fileformats.pp.EARTH_RADIUS) # Setup grid for linear regridding - data = np.empty((1, 1)) lons = iris.coords.DimCoord([1.5], standard_name='longitude', bounds=[[1, 2]], @@ -39,11 +38,15 @@ def setUp(self): units='degrees_north', coord_system=self.cs) coords_spec = [(lats, 0), (lons, 1)] - grid = iris.cube.Cube(data, dim_coords_and_dims=coords_spec) - self.grid_for_linear = grid + self.grid_for_linear = iris.cube.Cube( + np.empty((1, 1)), + dim_coords_and_dims=coords_spec, + ) + + # Setup mesh cube + self.mesh_cube = _make_cube(data, grid='mesh') # Setup unstructured cube and grid - data = np.zeros((1, 1)) lons = iris.coords.DimCoord([1.6], standard_name='longitude', bounds=[[1, 2]], @@ -56,7 +59,7 @@ def setUp(self): coord_system=self.cs) coords_spec = [(lats, 0), (lons, 1)] self.tgt_grid_for_unstructured = iris.cube.Cube( - data, dim_coords_and_dims=coords_spec) + np.zeros((1, 1)), dim_coords_and_dims=coords_spec) lons = self.cube.coord('longitude') lats = self.cube.coord('latitude') @@ -85,7 +88,7 @@ def setUp(self): ) unstructured_data = np.ma.masked_less( - self.cube.data.reshape(3, 4).astype(np.float32), 3.5 + self.cube.data.reshape(3, -1).astype(np.float32), 3.5 ) self.unstructured_grid_cube = iris.cube.Cube( @@ -145,12 +148,15 @@ def load(_): expected = np.array([[[1.5]], [[5.5]], [[9.5]]]) assert_array_equal(result.data, expected) + @pytest.mark.parametrize('scheme', [{ + 'reference': + 'esmf_regrid.schemes:regrid_rectilinear_to_rectilinear', + }, { + 'reference': 'esmvalcore.preprocessor.regrid_schemes:IrisESMFRegrid', + 'method': 'bilinear', + }]) @pytest.mark.parametrize('cache_weights', [True, False]) - def test_regrid__esmf_rectilinear(self, cache_weights): - scheme_name = 'esmf_regrid.schemes:regrid_rectilinear_to_rectilinear' - scheme = { - 'reference': scheme_name - } + def test_regrid__esmf_rectilinear(self, scheme, cache_weights): result = regrid( self.cube, self.grid_for_linear, @@ -160,6 +166,11 @@ def test_regrid__esmf_rectilinear(self, cache_weights): expected = np.array([[[1.5]], [[5.5]], [[9.5]]]) np.testing.assert_array_almost_equal(result.data, expected, decimal=1) + def test_regrid__esmf_mesh_to_regular(self): + result = regrid(self.mesh_cube, self.grid_for_linear, 'linear') + expected = np.array([[[1.5]], [[5.5]], [[9.5]]]) + np.testing.assert_array_almost_equal(result.data, expected, decimal=1) + @pytest.mark.parametrize('cache_weights', [True, False]) def test_regrid__regular_coordinates(self, cache_weights): data = np.ones((1, 1)) @@ -309,8 +320,15 @@ def test_regrid__area_weighted(self, cache_weights): expected = np.array([1.499886, 5.499886, 9.499886]) np.testing.assert_array_almost_equal(result.data, expected, decimal=6) + @pytest.mark.parametrize('scheme', [{ + 'reference': + 'esmf_regrid.schemes:ESMFAreaWeighted' + }, { + 'reference': 'esmvalcore.preprocessor.regrid_schemes:IrisESMFRegrid', + 'method': 'conservative', + }]) @pytest.mark.parametrize('cache_weights', [True, False]) - def test_regrid__esmf_area_weighted(self, cache_weights): + def test_regrid__esmf_area_weighted(self, scheme, cache_weights): data = np.empty((1, 1)) lons = iris.coords.DimCoord([1.6], standard_name='longitude', @@ -324,9 +342,6 @@ def test_regrid__esmf_area_weighted(self, cache_weights): coord_system=self.cs) coords_spec = [(lats, 0), (lons, 1)] grid = iris.cube.Cube(data, dim_coords_and_dims=coords_spec) - scheme = { - 'reference': 'esmf_regrid.schemes:ESMFAreaWeighted' - } result = regrid(self.cube, grid, scheme, cache_weights=cache_weights) expected = np.array([[[1.499886]], [[5.499886]], [[9.499886]]]) np.testing.assert_array_almost_equal(result.data, expected, decimal=6) @@ -374,7 +389,8 @@ def test_regrid_linear_unstructured_grid_int(self, cache_weights): def test_invalid_scheme_for_unstructured_grid(self, cache_weights): """Test invalid scheme for unstructured cube.""" msg = ( - "Regridding scheme 'invalid' does not support unstructured data, " + "Regridding scheme 'invalid' not available for unstructured data, " + "expected one of: linear, nearest" ) with pytest.raises(ValueError, match=msg): regrid( @@ -388,7 +404,8 @@ def test_invalid_scheme_for_unstructured_grid(self, cache_weights): def test_invalid_scheme_for_irregular_grid(self, cache_weights): """Test invalid scheme for irregular cube.""" msg = ( - "Regridding scheme 'invalid' does not support irregular data, " + "Regridding scheme 'invalid' not available for irregular data, " + "expected one of: area_weighted, linear, nearest" ) with pytest.raises(ValueError, match=msg): regrid( diff --git a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py index 7def9afe1b..e30a4634fd 100644 --- a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py +++ b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py @@ -364,6 +364,7 @@ def test_reference_none_cubes(regular_cubes): ) +@pytest.mark.parametrize('lazy_weights', [True, False]) @pytest.mark.parametrize( 'metric,data,ref_data,long_name,var_name,units', TEST_DISTANCE_METRICS ) @@ -376,9 +377,14 @@ def test_distance_metric( long_name, var_name, units, + lazy_weights, ): """Test `distance_metric`.""" - regular_cubes[0].add_cell_measure(AREA_WEIGHTS, (1, 2)) + regular_cubes[0].add_cell_measure(AREA_WEIGHTS.copy(), (1, 2)) + if lazy_weights: + regular_cubes[0].cell_measure('cell_area').data = ( + regular_cubes[0].cell_measure('cell_area').lazy_data() + ) ref_product = PreprocessorFile( ref_cubes, 'REF', {'reference_for_metric': True} ) @@ -390,6 +396,10 @@ def test_distance_metric( out_products = distance_metric(products, metric) + assert ( + regular_cubes[0].cell_measure('cell_area').has_lazy_data() is + lazy_weights + ) assert isinstance(out_products, set) out_dict = products_set_to_dict(out_products) assert len(out_dict) == 3 @@ -407,6 +417,7 @@ def test_distance_metric( out_cube = product_a.cubes[0] assert out_cube.shape == () assert out_cube.dtype == np.float32 + assert not out_cube.has_lazy_data() assert_allclose(out_cube.data, np.array(data, dtype=np.float32)) assert out_cube.var_name == var_name assert out_cube.long_name == long_name @@ -425,6 +436,7 @@ def test_distance_metric( out_cube = product_b.cubes[0] assert out_cube.shape == () assert out_cube.dtype == np.float32 + assert not out_cube.has_lazy_data() assert_allclose(out_cube.data, np.array(data, dtype=np.float32)) assert out_cube.var_name == var_name assert out_cube.long_name == long_name @@ -445,6 +457,7 @@ def test_distance_metric( out_cube = product_ref.cubes[0] assert out_cube.shape == () assert out_cube.dtype == np.float32 + assert not out_cube.has_lazy_data() assert_allclose(out_cube.data, ref_data) assert out_cube.var_name == var_name assert out_cube.long_name == long_name @@ -531,22 +544,40 @@ def test_distance_metric_lazy( assert product_a.mock_ancestors == {ref_product} +@pytest.mark.parametrize('lazy_weights', [True, False]) @pytest.mark.parametrize( 'metric,data,_,long_name,var_name,units', TEST_DISTANCE_METRICS ) def test_distance_metric_cubes( - regular_cubes, ref_cubes, metric, data, _, long_name, var_name, units + regular_cubes, + ref_cubes, + metric, + data, + _, + long_name, + var_name, + units, + lazy_weights, ): """Test `distance_metric` with cubes.""" - regular_cubes[0].add_cell_measure(AREA_WEIGHTS, (1, 2)) + regular_cubes[0].add_cell_measure(AREA_WEIGHTS.copy(), (1, 2)) + if lazy_weights: + regular_cubes[0].cell_measure('cell_area').data = ( + regular_cubes[0].cell_measure('cell_area').lazy_data() + ) out_cubes = distance_metric(regular_cubes, metric, reference=ref_cubes[0]) + assert ( + regular_cubes[0].cell_measure('cell_area').has_lazy_data() is + lazy_weights + ) assert isinstance(out_cubes, CubeList) assert len(out_cubes) == 1 out_cube = out_cubes[0] assert out_cube.shape == () assert out_cube.dtype == np.float32 + assert not out_cube.has_lazy_data() assert_allclose(out_cube.data, np.array(data, dtype=np.float32)) assert out_cube.var_name == var_name assert out_cube.long_name == long_name @@ -557,12 +588,22 @@ def test_distance_metric_cubes( ) +@pytest.mark.parametrize('lazy_weights', [True, False]) @pytest.mark.parametrize('lazy', [True, False]) @pytest.mark.parametrize( 'metric,data,_,long_name,var_name,units', TEST_DISTANCE_METRICS ) def test_distance_metric_masked_data( - regular_cubes, ref_cubes, metric, data, _, long_name, var_name, units, lazy + regular_cubes, + ref_cubes, + metric, + data, + _, + long_name, + var_name, + units, + lazy, + lazy_weights, ): """Test `distance_metric` with masked data.""" # Test cube @@ -585,7 +626,11 @@ def test_distance_metric_masked_data( np.ma.masked_invalid(cube_data), dim_coords_and_dims=coord_specs ) cube.metadata = regular_cubes[0].metadata - cube.add_cell_measure(AREA_WEIGHTS, (1, 2)) + cube.add_cell_measure(AREA_WEIGHTS.copy(), (1, 2)) + if lazy_weights: + cube.cell_measure('cell_area').data = ( + cube.cell_measure('cell_area').lazy_data() + ) # Ref cube ref_cube = cube.copy() @@ -604,6 +649,7 @@ def test_distance_metric_masked_data( out_cubes = distance_metric([cube], metric, reference=ref_cube) + assert cube.cell_measure('cell_area').has_lazy_data() is lazy_weights assert isinstance(out_cubes, CubeList) assert len(out_cubes) == 1 out_cube = out_cubes[0] @@ -630,17 +676,31 @@ def test_distance_metric_masked_data( ) +@pytest.mark.parametrize('lazy_weights', [True, False]) @pytest.mark.parametrize('lazy', [True, False]) @pytest.mark.parametrize( 'metric,_,__,long_name,var_name,units', TEST_DISTANCE_METRICS ) def test_distance_metric_fully_masked_data( - regular_cubes, ref_cubes, metric, _, __, long_name, var_name, units, lazy + regular_cubes, + ref_cubes, + metric, + _, + __, + long_name, + var_name, + units, + lazy, + lazy_weights, ): """Test `distance_metric` with fully_masked data.""" cube = regular_cubes[0] cube.data = np.ma.masked_invalid(np.full(cube.shape, np.nan)) - cube.add_cell_measure(AREA_WEIGHTS, (1, 2)) + cube.add_cell_measure(AREA_WEIGHTS.copy(), (1, 2)) + if lazy_weights: + cube.cell_measure('cell_area').data = ( + cube.cell_measure('cell_area').lazy_data() + ) ref_cube = ref_cubes[0] if lazy: @@ -649,6 +709,7 @@ def test_distance_metric_fully_masked_data( out_cubes = distance_metric([cube], metric, reference=ref_cube) + assert cube.cell_measure('cell_area').has_lazy_data() is lazy_weights assert isinstance(out_cubes, CubeList) assert len(out_cubes) == 1 out_cube = out_cubes[0] diff --git a/tests/unit/preprocessor/_mask/test_mask.py b/tests/unit/preprocessor/_mask/test_mask.py index 44cb0246f9..229657b309 100644 --- a/tests/unit/preprocessor/_mask/test_mask.py +++ b/tests/unit/preprocessor/_mask/test_mask.py @@ -2,18 +2,22 @@ import unittest -import numpy as np - import iris import iris.fileformats -import tests +import numpy as np from cf_units import Unit -from esmvalcore.preprocessor._mask import (_apply_fx_mask, - count_spells, _get_fx_mask, - mask_above_threshold, - mask_below_threshold, - mask_glaciated, mask_inside_range, - mask_outside_range) + +import tests +from esmvalcore.preprocessor._mask import ( + _apply_mask, + _get_fx_mask, + count_spells, + mask_above_threshold, + mask_below_threshold, + mask_glaciated, + mask_inside_range, + mask_outside_range, +) class Test(tests.Test): @@ -48,11 +52,12 @@ def setUp(self): def test_apply_fx_mask_on_nonmasked_data(self): """Test _apply_fx_mask func.""" dummy_fx_mask = np.ma.array((True, False, True)) - app_mask = _apply_fx_mask(dummy_fx_mask, - self.time_cube.data[0:3].astype('float64')) - app_mask = app_mask.compute() - fixed_mask = np.ma.array(self.time_cube.data[0:3].astype('float64'), - mask=dummy_fx_mask) + app_mask = _apply_mask( + dummy_fx_mask, self.time_cube.data[0:3].astype('float64') + ) + fixed_mask = np.ma.array( + self.time_cube.data[0:3].astype('float64'), mask=dummy_fx_mask + ) self.assert_array_equal(fixed_mask, app_mask) def test_apply_fx_mask_on_masked_data(self): @@ -60,8 +65,7 @@ def test_apply_fx_mask_on_masked_data(self): dummy_fx_mask = np.ma.array((True, True, True)) masked_data = np.ma.array(self.time_cube.data[0:3].astype('float64'), mask=np.ma.array((False, True, False))) - app_mask = _apply_fx_mask(dummy_fx_mask, masked_data) - app_mask = app_mask.compute() + app_mask = _apply_mask(dummy_fx_mask, masked_data) fixed_mask = np.ma.array(self.time_cube.data[0:3].astype('float64'), mask=dummy_fx_mask) self.assert_array_equal(fixed_mask, app_mask) diff --git a/tests/unit/preprocessor/_regrid/__init__.py b/tests/unit/preprocessor/_regrid/__init__.py index db2f7c48a6..2c0e5a2f47 100644 --- a/tests/unit/preprocessor/_regrid/__init__.py +++ b/tests/unit/preprocessor/_regrid/__init__.py @@ -3,6 +3,7 @@ """ +from typing import Literal import iris import iris.fileformats import numpy as np @@ -39,11 +40,13 @@ def _make_vcoord(data, dtype=None): return zcoord -def _make_cube(data, - aux_coord=True, - dim_coord=True, - dtype=None, - rotated=False): +def _make_cube( + data, + aux_coord=True, + dim_coord=True, + dtype=None, + grid: Literal['regular', 'rotated', 'mesh'] = 'regular', +): """ Create a 3d synthetic test cube. @@ -55,6 +58,9 @@ def _make_cube(data, data = np.empty(data, dtype=dtype) z, y, x = data.shape + if grid == 'mesh': + # Meshes have a single lat/lon dimension. + data = data.reshape(z, -1) # Create the cube. cm = CellMethod( @@ -72,8 +78,8 @@ def _make_cube(data, if dim_coord: cube.add_dim_coord(_make_vcoord(z, dtype=dtype), 0) - # Create a synthetic test latitude coordinate. - if rotated: + if grid == 'rotated': + # Create a synthetic test latitude coordinate. data = np.arange(y, dtype=dtype) + 1 cs = iris.coord_systems.GeogCS(iris.fileformats.pp.EARTH_RADIUS) kwargs = dict( @@ -101,7 +107,66 @@ def _make_cube(data, if data.size > 1: xcoord.guess_bounds() cube.add_dim_coord(xcoord, 2) - else: + elif grid == 'mesh': + # This constructs a trivial rectangular mesh with square faces: + # 0. 1. 2. + # 0. +---+---+- + # | x | x | + # 1. +---+---+- + # | x | x | + # 2. +---+---+- + # where + # + is a node location + # x is a face location + # the lines between the nodes are the boundaries of the faces + # and the number are degrees latitude/longitude. + # + node_data_x = np.arange(x+1) + 0.5 + node_data_y = np.arange(y+1) + 0.5 + node_x, node_y = [ + AuxCoord(a.ravel(), name) + for a, name in zip( + np.meshgrid(node_data_x, node_data_y), + ['longitude', 'latitude'], + ) + ] + face_data_x = np.arange(x) + 1 + face_data_y = np.arange(y) + 1 + face_x, face_y = [ + AuxCoord(a.ravel(), name) + for a, name in zip( + np.meshgrid(face_data_x, face_data_y), + ['longitude', 'latitude'], + ) + ] + # Build the face connectivity indices by creating an array of squares + # and adding an offset of 1 more to each next square and then dropping: + # * the last column of connectivities - those would connect the last + # nodes in a row to the first nodes of the next row + # * the last row of connectivities - those refer to nodes outside the + # grid + n_nodes_x = len(node_data_x) + n_nodes_y = len(node_data_y) + square = np.array([0, n_nodes_x, n_nodes_x+1, 1]) + connectivities = ( + np.tile(square, (n_nodes_y * n_nodes_x, 1)) + + np.arange(n_nodes_y * n_nodes_x).reshape(-1, 1) + ).reshape(n_nodes_y, n_nodes_x, 4)[:-1, :-1].reshape(-1, 4) + face_connectivity = iris.mesh.Connectivity( + indices=connectivities, + cf_role="face_node_connectivity", + ) + mesh = iris.mesh.MeshXY( + topology_dimension=2, + node_coords_and_axes=[(node_x, "X"), (node_y, "Y")], + face_coords_and_axes=[(face_x, "X"), (face_y, "Y")], + connectivities=[face_connectivity], + ) + lon, lat = mesh.to_MeshCoords('face') + cube.add_aux_coord(lon, 1) + cube.add_aux_coord(lat, 1) + elif grid == 'regular': + # Create a synthetic test latitude coordinate. data = np.arange(y, dtype=dtype) + 1 cs = iris.coord_systems.GeogCS(iris.fileformats.pp.EARTH_RADIUS) kwargs = dict( @@ -133,14 +198,14 @@ def _make_cube(data, # Create a synthetic test 2d auxiliary coordinate # that spans the vertical dimension. if aux_coord: - data = np.arange(np.prod((z, y)), dtype=dtype).reshape(z, y) + hsize = y * x if grid == 'mesh' else y + data = np.arange(np.prod((z, hsize)), dtype=dtype).reshape(z, hsize) kwargs = dict( - standard_name=None, long_name='Pressure Slice', var_name='aplev', units='hPa', attributes=dict(positive='down'), - coord_system=None) + ) zycoord = AuxCoord(data, **kwargs) cube.add_aux_coord(zycoord, (0, 1)) diff --git a/tests/unit/preprocessor/_regrid/test_regrid.py b/tests/unit/preprocessor/_regrid/test_regrid.py index f7ffff1228..535aeed951 100644 --- a/tests/unit/preprocessor/_regrid/test_regrid.py +++ b/tests/unit/preprocessor/_regrid/test_regrid.py @@ -112,7 +112,10 @@ def test_invalid_target_grid(scheme, cube_10x10, mocker): def test_invalid_scheme(cube_10x10, cube_30x30): """Test `regrid.`""" - msg = "Got invalid regridding scheme string 'wibble'" + msg = ( + "Regridding scheme 'wibble' not available for regular data, " + "expected one of: area_weighted, linear, nearest" + ) with pytest.raises(ValueError, match=msg): regrid(cube_10x10, cube_30x30, 'wibble') @@ -237,8 +240,10 @@ def test_regrid_is_skipped_if_grids_are_the_same(): assert expected_different_cube is not cube -def make_test_cube(shape): - data = da.empty(shape, dtype=np.float32) +def make_test_cube_rectilinear(shape): + chunks = ["auto"] * len(shape) + chunks[-2] = chunks[-1] = None + data = da.empty(shape, chunks=chunks, dtype=np.float32) cube = iris.cube.Cube(data) if len(shape) > 2: cube.add_dim_coord( @@ -265,50 +270,126 @@ def make_test_cube(shape): return cube -def test_rechunk_on_increased_grid(): - """Test that an increase in grid size rechunks.""" - with dask.config.set({'array.chunk-size': '128 M'}): +def make_test_cube_irregular(shape): + data = da.empty(shape, dtype=np.float32) + cube = iris.cube.Cube(data) + if len(shape) > 2: + cube.add_dim_coord( + iris.coords.DimCoord( + np.arange(shape[0]), + standard_name='time', + ), + 0, + ) + lat_points = np.linspace(-90., 90., shape[-2], endpoint=True) + lon_points = np.linspace(0., 360., shape[-1]) + + cube.add_aux_coord( + iris.coords.AuxCoord( + np.broadcast_to(lat_points.reshape(-1, 1), shape[-2:]), + standard_name='latitude', + ), + (-2, -1), + ) + cube.add_aux_coord( + iris.coords.AuxCoord( + np.broadcast_to(lon_points.reshape(1, -1), shape[-2:]), + standard_name='longitude', + ), + (-2, -1), + ) + return cube - time_dim = 246 - src_grid_dims = (91, 180) - data = da.empty((time_dim, ) + src_grid_dims, dtype=np.float32) +def make_test_cube_unstructured(shape): + data = da.empty(shape, dtype=np.float32) + cube = iris.cube.Cube(data) + if len(shape) > 1: + cube.add_dim_coord( + iris.coords.DimCoord( + np.arange(shape[0]), + standard_name='time', + ), + 0, + ) + lat_points = np.linspace(-90., 90., shape[-1], endpoint=True) + lon_points = np.linspace(0., 360., shape[-1]) + + cube.add_aux_coord( + iris.coords.AuxCoord( + lat_points, + standard_name='latitude', + ), + (-1, ), + ) + cube.add_aux_coord( + iris.coords.AuxCoord( + lon_points, + standard_name='longitude', + ), + (-1, ), + ) + return cube + + +@pytest.mark.parametrize( + 'grids', + [ + ('rectilinear', 'rectilinear'), + ('rectilinear', 'irregular'), + ('irregular', 'rectilinear'), + ('irregular', 'irregular'), + ('unstructured', 'rectilinear'), + ], +) +def test_rechunk_on_increased_grid(grids): + """Test that an increase in grid size rechunks.""" + with dask.config.set({'array.chunk-size': '128 M'}): + src_grid, tgt_grid = grids + src_dims = (246, 91, 180) + if src_grid == 'unstructured': + src_dims = src_dims[:-2] + (np.prod(src_dims[-2:]), ) tgt_grid_dims = (2, 361, 720) - tgt_grid = make_test_cube(tgt_grid_dims) - result = _rechunk(iris.cube.Cube(data), tgt_grid) + src_cube = globals()[f"make_test_cube_{src_grid}"](src_dims) + tgt_grid = globals()[f"make_test_cube_{tgt_grid}"](tgt_grid_dims) + result = _rechunk(src_cube, tgt_grid) - assert result.core_data().chunks == ((123, 123), (91, ), (180, )) + expected = ((123, 123), (91, ), (180, )) + if src_grid == 'unstructured': + expected = expected[:-2] + (np.prod(expected[-2:]), ) + assert result.core_data().chunks == expected def test_no_rechunk_on_decreased_grid(): """Test that a decrease in grid size does not rechunk.""" with dask.config.set({'array.chunk-size': '128 M'}): - time_dim = 200 - src_grid_dims = (361, 720) - data = da.empty((time_dim, ) + src_grid_dims, dtype=np.float32) + src_dims = (200, 361, 720) + src_cube = make_test_cube_rectilinear(src_dims) tgt_grid_dims = (91, 180) - tgt_grid = make_test_cube(tgt_grid_dims) + tgt_grid_cube = make_test_cube_rectilinear(tgt_grid_dims) - result = _rechunk(iris.cube.Cube(data), tgt_grid) + expected = src_cube.core_data().chunks + result = _rechunk(src_cube, tgt_grid_cube) - assert result.core_data().chunks == data.chunks + assert result.core_data().chunks == expected -def test_no_rechunk_2d(): - """Test that a 2D cube is not rechunked.""" +def test_no_rechunk_horizontal_only(): + """Test that a horizontal only cube is not rechunked.""" with dask.config.set({'array.chunk-size': '64 MiB'}): src_grid_dims = (361, 720) - data = da.empty(src_grid_dims, dtype=np.float32) + src_cube = make_test_cube_rectilinear(src_grid_dims) tgt_grid_dims = (3601, 7200) - tgt_grid = da.empty(tgt_grid_dims, dtype=np.float32) + tgt_grid_cube = make_test_cube_rectilinear(tgt_grid_dims) - result = _rechunk(iris.cube.Cube(data), iris.cube.Cube(tgt_grid)) + expected = src_cube.core_data().chunks + result = _rechunk(src_cube, tgt_grid_cube) - assert result.core_data().chunks == data.chunks + assert result.core_data().chunks == expected def test_no_rechunk_non_lazy(): @@ -319,40 +400,6 @@ def test_no_rechunk_non_lazy(): assert result.data is cube.data -def test_no_rechunk_unsupported_grid(): - """Test that 2D target coordinates are ignored. - - Because they are not supported at the moment. This could be - implemented at a later stage if needed. - """ - cube = iris.cube.Cube(da.arange(2 * 4).reshape([1, 2, 4])) - tgt_grid_dims = (5, 10) - tgt_data = da.empty(tgt_grid_dims, dtype=np.float32) - tgt_grid = iris.cube.Cube(tgt_data) - lat_points = np.linspace(-90., 90., tgt_grid_dims[0], endpoint=True) - lon_points = np.linspace(0., 360., tgt_grid_dims[1]) - - tgt_grid.add_aux_coord( - iris.coords.AuxCoord( - np.broadcast_to(lat_points.reshape(-1, 1), tgt_grid_dims), - standard_name='latitude', - ), - (0, 1), - ) - tgt_grid.add_aux_coord( - iris.coords.AuxCoord( - np.broadcast_to(lon_points.reshape(1, -1), tgt_grid_dims), - standard_name='longitude', - ), - (0, 1), - ) - - expected_chunks = cube.core_data().chunks - result = _rechunk(cube, tgt_grid) - assert result is cube - assert result.core_data().chunks == expected_chunks - - @pytest.mark.parametrize('scheme', SCHEMES) def test_regridding_weights_use_cache(scheme, cube_10x10, cube_30x30, mocker): """Test `regrid.`""" diff --git a/tests/unit/preprocessor/_regrid_iris_esmf_regrid/__init__.py b/tests/unit/preprocessor/_regrid_iris_esmf_regrid/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/unit/preprocessor/_regrid_iris_esmf_regrid/test_regrid_iris_esmf_regrid.py b/tests/unit/preprocessor/_regrid_iris_esmf_regrid/test_regrid_iris_esmf_regrid.py new file mode 100644 index 0000000000..5896f442ba --- /dev/null +++ b/tests/unit/preprocessor/_regrid_iris_esmf_regrid/test_regrid_iris_esmf_regrid.py @@ -0,0 +1,164 @@ +"""Tests for `esmvalcore.preprocessor._regrid_iris_esmf_regrid`.""" +import iris.cube +import numpy as np +import pytest +import esmf_regrid + +from esmvalcore.preprocessor.regrid_schemes import IrisESMFRegrid + + +class TestIrisESMFRegrid: + + def test_repr(self): + scheme = IrisESMFRegrid(method='bilinear') + expected = ("IrisESMFRegrid(method='bilinear', use_src_mask=True, " + "use_tgt_mask=True, collapse_src_mask_along=('Z',), " + "collapse_tgt_mask_along=('Z',), tgt_location=None, " + "mdtol=None)") + assert repr(scheme) == expected + + def test_invalid_method_raises(self): + msg = ("`method` should be one of 'bilinear', 'conservative', or " + "'nearest'") + with pytest.raises(ValueError, match=msg): + IrisESMFRegrid(method='x') + + def test_unused_mdtol_raises(self): + msg = ("`mdol` can only be specified when `method='bilinear'` " + "or `method='conservative'`") + with pytest.raises(TypeError, match=msg): + IrisESMFRegrid(method='nearest', mdtol=1) + + def test_unused_src_resolution_raises(self): + msg = ("`src_resolution` can only be specified when " + "`method='conservative'`") + with pytest.raises(TypeError, match=msg): + IrisESMFRegrid(method='nearest', src_resolution=100) + + def test_unused_tgt_resolution_raises(self): + msg = ("`tgt_resolution` can only be specified when " + "`method='conservative'`") + with pytest.raises(TypeError, match=msg): + IrisESMFRegrid(method='nearest', tgt_resolution=100) + + def test_get_mask_2d(self): + + cube = iris.cube.Cube( + np.ma.masked_array(np.arange(4), mask=[1, 0, 1, 0]).reshape( + (2, 2)), + dim_coords_and_dims=( + [ + iris.coords.DimCoord( + np.arange(2), + standard_name='latitude', + ), + 0, + ], + [ + iris.coords.DimCoord( + np.arange(2), + standard_name='longitude', + ), + 1, + ], + ), + ) + mask = IrisESMFRegrid._get_mask(cube, ("Z", )) + np.testing.assert_array_equal(mask, cube.data.mask) + + def test_get_mask_3d(self): + + cube = iris.cube.Cube( + np.ma.masked_array(np.arange(4), mask=[1, 0, 1, 1]).reshape( + (2, 1, 2)), + dim_coords_and_dims=( + [ + iris.coords.DimCoord( + np.arange(2), + standard_name='air_pressure', + ), + 0, + ], + [ + iris.coords.DimCoord( + np.arange(1), + standard_name='latitude', + ), + 1, + ], + [ + iris.coords.DimCoord( + np.arange(2), + standard_name='longitude', + ), + 2, + ], + ), + ) + mask = IrisESMFRegrid._get_mask(cube, ("Z", )) + np.testing.assert_array_equal(mask, np.array([[1, 0]], dtype=bool)) + + def test_get_mask_3d_odd_dim_order(self): + + cube = iris.cube.Cube( + np.ma.masked_array(np.arange(4), mask=[1, 0, 1, 1]).reshape( + (1, 2, 2)), + dim_coords_and_dims=( + [ + iris.coords.DimCoord( + np.arange(1), + standard_name='latitude', + ), + 0, + ], + [ + iris.coords.DimCoord( + np.arange(2), + standard_name='air_pressure', + ), + 1, + ], + [ + iris.coords.DimCoord( + np.arange(2), + standard_name='longitude', + ), + 2, + ], + ), + ) + mask = IrisESMFRegrid._get_mask(cube, ["air_pressure"]) + np.testing.assert_array_equal(mask, np.array([[1, 0]], dtype=bool)) + + @pytest.mark.parametrize("scheme", [ + ('bilinear', esmf_regrid.ESMFBilinearRegridder), + ('conservative', esmf_regrid.ESMFAreaWeightedRegridder), + ('nearest', esmf_regrid.ESMFNearestRegridder), + ]) + def test_regrid(self, scheme): + method, scheme_cls = scheme + cube = iris.cube.Cube( + np.ma.arange(4).reshape((2, 2)), + dim_coords_and_dims=( + [ + iris.coords.DimCoord( + np.arange(2), + standard_name='latitude', + units='degrees', + ), + 0, + ], + [ + iris.coords.DimCoord( + np.arange(2), + standard_name='longitude', + units='degrees', + ), + 1, + ], + ), + ) + + scheme = IrisESMFRegrid(method=method) + regridder = scheme.regridder(cube, cube) + assert isinstance(regridder, scheme_cls) diff --git a/tests/unit/preprocessor/_volume/test_volume.py b/tests/unit/preprocessor/_volume/test_volume.py index f055d2f7b2..d45b9bd744 100644 --- a/tests/unit/preprocessor/_volume/test_volume.py +++ b/tests/unit/preprocessor/_volume/test_volume.py @@ -402,6 +402,7 @@ def test_extract_volume_mean(self): """Test to extract the top two layers and compute the weighted average of a cube.""" grid_volume = calculate_volume(self.grid_4d) + assert isinstance(grid_volume, np.ndarray) measure = iris.coords.CellMeasure(grid_volume, standard_name='ocean_volume', units='m3',