From 33bcce594cb486fafa531caa2e92e5df4ade54c5 Mon Sep 17 00:00:00 2001
From: Soroosh Mani <77082694+SorooshMani-NOAA@users.noreply.github.com>
Date: Wed, 20 Dec 2023 09:24:30 -0500
Subject: [PATCH] Add probability field calc and plots (#127)

* Add probability field calculation and plots
* Fix vmax cutoff issue
---
 ensembleperturbation/plotting/surrogate.py    |  73 +++++++++
 .../uncertainty_quantification/surrogate.py   | 151 ++++++++++++++++++
 2 files changed, 224 insertions(+)

diff --git a/ensembleperturbation/plotting/surrogate.py b/ensembleperturbation/plotting/surrogate.py
index 861b2f3e..607ca144 100644
--- a/ensembleperturbation/plotting/surrogate.py
+++ b/ensembleperturbation/plotting/surrogate.py
@@ -564,3 +564,76 @@ def plot_kl_surrogate_fit(
 
     if output_filename is not None:
         figure.savefig(output_filename, dpi=200, bbox_inches='tight')
+
+
+def plot_selected_probability_fields(
+    node_prob_field: xarray.Dataset, level_list: list, output_directory: PathLike
+):
+    probabilities = node_prob_field.probabilities
+
+    sources = node_prob_field['source'].values
+    if output_directory is not None:
+        if not isinstance(output_directory, Path):
+            output_directory = Path(output_directory)
+
+    bounds = numpy.array(
+        [
+            node_prob_field['x'].min(),
+            node_prob_field['y'].min(),
+            node_prob_field['x'].max(),
+            node_prob_field['y'].max(),
+        ]
+    )
+    vmax = 1 + numpy.finfo(float).eps
+    vmin = 0
+    for lvl in level_list:
+        figure = pyplot.figure()
+        figure.set_size_inches(10, 10 / 1.61803398875)
+        figure.suptitle(f'Probability of water level exceeding {lvl}-m')
+        for index, source in enumerate(sources):
+            map_axis = figure.add_subplot(2, len(sources), index + 1)
+            map_axis.title.set_text(f'{source}')
+            countries = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
+
+            map_axis.set_xlim((bounds[0], bounds[2]))
+            map_axis.set_ylim((bounds[1], bounds[3]))
+
+            xlim = map_axis.get_xlim()
+            ylim = map_axis.get_ylim()
+
+            countries.plot(color='lightgrey', ax=map_axis)
+
+            points = numpy.vstack(
+                (
+                    node_prob_field['x'],
+                    node_prob_field['y'],
+                    probabilities.sel(level=lvl, source=source),
+                )
+            ).T
+            if 'element' not in node_prob_field:
+                im = plot_points(
+                    points=points, axis=map_axis, add_colorbar=False, vmax=vmax, vmin=vmin,
+                )
+            else:
+                im = plot_surface(
+                    points=points,
+                    element_table=node_prob_field['element'].values,
+                    axis=map_axis,
+                    add_colorbar=False,
+                    levels=numpy.linspace(vmin, vmax, 25),
+                    extend='neither',
+                )
+
+            map_axis.set_xlim(xlim)
+            map_axis.set_ylim(ylim)
+
+        pyplot.subplots_adjust(wspace=0.02, right=0.96)
+        cax = pyplot.axes([0.95, 0.55, 0.015, 0.3])
+        cbar = figure.colorbar(im, extend='neither', cax=cax)
+
+        if output_directory is not None:
+            figure.savefig(
+                output_directory / f'probability_exceeding_{lvl}m.png',
+                dpi=200,
+                bbox_inches='tight',
+            )
diff --git a/ensembleperturbation/uncertainty_quantification/surrogate.py b/ensembleperturbation/uncertainty_quantification/surrogate.py
index db2c6163..dc158094 100644
--- a/ensembleperturbation/uncertainty_quantification/surrogate.py
+++ b/ensembleperturbation/uncertainty_quantification/surrogate.py
@@ -606,3 +606,154 @@ def compute_surrogate_percentiles(
     out = out.reshape(q.shape + shape)
 
     return out
+
+
+def probability_field_from_samples(
+    samples: xarray.Dataset,
+    levels: List[float],
+    surrogate_model: numpoly.ndpoly,
+    distribution: chaospy.Distribution,
+    minimum_allowable_value: float = None,
+    convert_from_log_scale: Union[bool, float] = False,
+    convert_from_depths: Union[bool, float] = False,
+) -> xarray.DataArray:
+
+    LOGGER.info(f'calculating {len(levels)} probability field(s): {levels}')
+
+    surrogate_prob_field = compute_surrogate_probability_field(
+        poly=surrogate_model,
+        levels=levels,
+        dist=distribution,
+        minimum_allowable_value=minimum_allowable_value,
+        convert_from_log_scale=convert_from_log_scale,
+        convert_from_depths=convert_from_depths,
+        depths=samples['depth'],
+    )
+
+    surrogate_prob_field = xarray.DataArray(
+        surrogate_prob_field,
+        coords={
+            'level': levels,
+            **{
+                coord: values
+                for coord, values in samples.coords.items()
+                if coord not in ['run', 'type']
+            },
+        },
+        dims=('level', *(dim for dim in samples.dims if dim not in ['run', 'type'])),
+    )
+
+    return surrogate_prob_field
+
+
+def probability_field_from_surrogate(
+    levels: List[float],
+    surrogate_model: numpoly.ndpoly,
+    distribution: chaospy.Distribution,
+    training_set: xarray.Dataset,
+    minimum_allowable_value: float = None,
+    convert_from_log_scale: Union[bool, float] = False,
+    convert_from_depths: Union[bool, float] = False,
+    element_table: xarray.DataArray = None,
+    filename: PathLike = None,
+) -> xarray.Dataset:
+
+    if filename is not None and not isinstance(filename, Path):
+        filename = Path(filename)
+
+    if filename is None or not filename.exists():
+        surrogate_prob_field = probability_field_from_samples(
+            samples=training_set,
+            levels=levels,
+            surrogate_model=surrogate_model,
+            distribution=distribution,
+            minimum_allowable_value=minimum_allowable_value,
+            convert_from_log_scale=convert_from_log_scale,
+            convert_from_depths=convert_from_depths,
+        )
+
+        # before evaluating prob. field for model set null water elevation to the ground elevation
+        # training_set = numpy.fmax(training_set, -training_set['depth'])
+        if minimum_allowable_value is not None:
+            too_small = (training_set + training_set['depth']).values < minimum_allowable_value
+            training_set.values[too_small] = numpy.nan
+
+        ds1, ds2 = xarray.broadcast(training_set, surrogate_prob_field['level'])
+        modeled_prob_field = (ds1 >= ds2).sum(dim='run') / len(training_set.run)
+
+        node_prob_field = xarray.combine_nested(
+            [surrogate_prob_field, modeled_prob_field], concat_dim='source'
+        ).assign_coords(source=['surrogate', 'model'])
+
+        node_prob_field = node_prob_field.to_dataset(name='probabilities')
+
+        node_prob_field = node_prob_field.assign(
+            differences=numpy.fabs(surrogate_prob_field - modeled_prob_field)
+        )
+
+        if element_table is not None:
+            node_prob_field = node_prob_field.assign_coords({'element': element_table})
+
+        if filename is not None:
+            LOGGER.info(f'saving prob_field to "{filename}"')
+            node_prob_field.to_netcdf(filename)
+    else:
+        LOGGER.info(f'loading prob_field from "{filename}"')
+        node_prob_field = xarray.open_dataset(filename)
+
+    return node_prob_field
+
+
+def compute_surrogate_probability_field(
+    poly: numpoly.ndpoly,
+    levels: List[float],
+    dist: chaospy.Distribution,
+    sample: int = 10000,
+    minimum_allowable_value: float = None,
+    convert_from_log_scale: Union[bool, float] = False,
+    convert_from_depths: Union[bool, float] = False,
+    depths: xarray.DataArray = None,
+    **kws,
+):
+
+    poly = chaospy.aspolynomial(poly)
+    shape = poly.shape
+    poly = poly.ravel()
+
+    levels = numpy.asarray(levels).ravel()
+    dim = len(dist)
+
+    # Interior
+    Z = dist.sample(sample, **kws).reshape(len(dist), sample)
+    poly1 = poly(*Z)
+
+    # Min/max
+    ext = numpy.mgrid[(slice(0, 2, 1),) * dim].reshape(dim, 2 ** dim).T
+    ext = numpy.where(ext, dist.lower, dist.upper).T
+    poly2 = poly(*ext)
+    poly2 = numpy.array([_ for _ in poly2.T if not numpy.any(numpy.isnan(_))]).T
+
+    # Finish
+    if poly2.shape:
+        poly1 = numpy.concatenate([poly1, poly2], -1)
+    if isinstance(convert_from_log_scale, float):
+        poly1 = convert_from_log_scale ** poly1
+    elif convert_from_log_scale:
+        poly1 = numpy.exp(poly1)
+    samples = poly1.shape[1]
+
+    # adjustments and elev corrections
+    if isinstance(convert_from_depths, (float, numpy.ndarray)):
+        poly1 -= convert_from_depths
+    if minimum_allowable_value is not None:
+        too_small = poly1 < minimum_allowable_value
+        poly1[too_small] = numpy.nan
+    if isinstance(convert_from_depths, (float, numpy.ndarray)) or convert_from_depths:
+        # TODO: Sanity check for depth vs poly shapes
+        poly1 -= depths.values[:, None]
+
+    out = (poly1[:, :, None] > (levels[None, None, :])).sum(axis=1).T / samples
+
+    out = out.reshape(levels.shape + shape)
+
+    return out