From 33bcce594cb486fafa531caa2e92e5df4ade54c5 Mon Sep 17 00:00:00 2001 From: Soroosh Mani <77082694+SorooshMani-NOAA@users.noreply.github.com> Date: Wed, 20 Dec 2023 09:24:30 -0500 Subject: [PATCH] Add probability field calc and plots (#127) * Add probability field calculation and plots * Fix vmax cutoff issue --- ensembleperturbation/plotting/surrogate.py | 73 +++++++++ .../uncertainty_quantification/surrogate.py | 151 ++++++++++++++++++ 2 files changed, 224 insertions(+) diff --git a/ensembleperturbation/plotting/surrogate.py b/ensembleperturbation/plotting/surrogate.py index 861b2f3e..607ca144 100644 --- a/ensembleperturbation/plotting/surrogate.py +++ b/ensembleperturbation/plotting/surrogate.py @@ -564,3 +564,76 @@ def plot_kl_surrogate_fit( if output_filename is not None: figure.savefig(output_filename, dpi=200, bbox_inches='tight') + + +def plot_selected_probability_fields( + node_prob_field: xarray.Dataset, level_list: list, output_directory: PathLike +): + probabilities = node_prob_field.probabilities + + sources = node_prob_field['source'].values + if output_directory is not None: + if not isinstance(output_directory, Path): + output_directory = Path(output_directory) + + bounds = numpy.array( + [ + node_prob_field['x'].min(), + node_prob_field['y'].min(), + node_prob_field['x'].max(), + node_prob_field['y'].max(), + ] + ) + vmax = 1 + numpy.finfo(float).eps + vmin = 0 + for lvl in level_list: + figure = pyplot.figure() + figure.set_size_inches(10, 10 / 1.61803398875) + figure.suptitle(f'Probability of water level exceeding {lvl}-m') + for index, source in enumerate(sources): + map_axis = figure.add_subplot(2, len(sources), index + 1) + map_axis.title.set_text(f'{source}') + countries = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres')) + + map_axis.set_xlim((bounds[0], bounds[2])) + map_axis.set_ylim((bounds[1], bounds[3])) + + xlim = map_axis.get_xlim() + ylim = map_axis.get_ylim() + + countries.plot(color='lightgrey', ax=map_axis) + + points = numpy.vstack( + ( + node_prob_field['x'], + node_prob_field['y'], + probabilities.sel(level=lvl, source=source), + ) + ).T + if 'element' not in node_prob_field: + im = plot_points( + points=points, axis=map_axis, add_colorbar=False, vmax=vmax, vmin=vmin, + ) + else: + im = plot_surface( + points=points, + element_table=node_prob_field['element'].values, + axis=map_axis, + add_colorbar=False, + levels=numpy.linspace(vmin, vmax, 25), + extend='neither', + ) + + map_axis.set_xlim(xlim) + map_axis.set_ylim(ylim) + + pyplot.subplots_adjust(wspace=0.02, right=0.96) + cax = pyplot.axes([0.95, 0.55, 0.015, 0.3]) + cbar = figure.colorbar(im, extend='neither', cax=cax) + + if output_directory is not None: + figure.savefig( + output_directory / f'probability_exceeding_{lvl}m.png', + dpi=200, + bbox_inches='tight', + ) diff --git a/ensembleperturbation/uncertainty_quantification/surrogate.py b/ensembleperturbation/uncertainty_quantification/surrogate.py index db2c6163..dc158094 100644 --- a/ensembleperturbation/uncertainty_quantification/surrogate.py +++ b/ensembleperturbation/uncertainty_quantification/surrogate.py @@ -606,3 +606,154 @@ def compute_surrogate_percentiles( out = out.reshape(q.shape + shape) return out + + +def probability_field_from_samples( + samples: xarray.Dataset, + levels: List[float], + surrogate_model: numpoly.ndpoly, + distribution: chaospy.Distribution, + minimum_allowable_value: float = None, + convert_from_log_scale: Union[bool, float] = False, + convert_from_depths: Union[bool, float] = False, +) -> xarray.DataArray: + + LOGGER.info(f'calculating {len(levels)} probability field(s): {levels}') + + surrogate_prob_field = compute_surrogate_probability_field( + poly=surrogate_model, + levels=levels, + dist=distribution, + minimum_allowable_value=minimum_allowable_value, + convert_from_log_scale=convert_from_log_scale, + convert_from_depths=convert_from_depths, + depths=samples['depth'], + ) + + surrogate_prob_field = xarray.DataArray( + surrogate_prob_field, + coords={ + 'level': levels, + **{ + coord: values + for coord, values in samples.coords.items() + if coord not in ['run', 'type'] + }, + }, + dims=('level', *(dim for dim in samples.dims if dim not in ['run', 'type'])), + ) + + return surrogate_prob_field + + +def probability_field_from_surrogate( + levels: List[float], + surrogate_model: numpoly.ndpoly, + distribution: chaospy.Distribution, + training_set: xarray.Dataset, + minimum_allowable_value: float = None, + convert_from_log_scale: Union[bool, float] = False, + convert_from_depths: Union[bool, float] = False, + element_table: xarray.DataArray = None, + filename: PathLike = None, +) -> xarray.Dataset: + + if filename is not None and not isinstance(filename, Path): + filename = Path(filename) + + if filename is None or not filename.exists(): + surrogate_prob_field = probability_field_from_samples( + samples=training_set, + levels=levels, + surrogate_model=surrogate_model, + distribution=distribution, + minimum_allowable_value=minimum_allowable_value, + convert_from_log_scale=convert_from_log_scale, + convert_from_depths=convert_from_depths, + ) + + # before evaluating prob. field for model set null water elevation to the ground elevation + # training_set = numpy.fmax(training_set, -training_set['depth']) + if minimum_allowable_value is not None: + too_small = (training_set + training_set['depth']).values < minimum_allowable_value + training_set.values[too_small] = numpy.nan + + ds1, ds2 = xarray.broadcast(training_set, surrogate_prob_field['level']) + modeled_prob_field = (ds1 >= ds2).sum(dim='run') / len(training_set.run) + + node_prob_field = xarray.combine_nested( + [surrogate_prob_field, modeled_prob_field], concat_dim='source' + ).assign_coords(source=['surrogate', 'model']) + + node_prob_field = node_prob_field.to_dataset(name='probabilities') + + node_prob_field = node_prob_field.assign( + differences=numpy.fabs(surrogate_prob_field - modeled_prob_field) + ) + + if element_table is not None: + node_prob_field = node_prob_field.assign_coords({'element': element_table}) + + if filename is not None: + LOGGER.info(f'saving prob_field to "{filename}"') + node_prob_field.to_netcdf(filename) + else: + LOGGER.info(f'loading prob_field from "{filename}"') + node_prob_field = xarray.open_dataset(filename) + + return node_prob_field + + +def compute_surrogate_probability_field( + poly: numpoly.ndpoly, + levels: List[float], + dist: chaospy.Distribution, + sample: int = 10000, + minimum_allowable_value: float = None, + convert_from_log_scale: Union[bool, float] = False, + convert_from_depths: Union[bool, float] = False, + depths: xarray.DataArray = None, + **kws, +): + + poly = chaospy.aspolynomial(poly) + shape = poly.shape + poly = poly.ravel() + + levels = numpy.asarray(levels).ravel() + dim = len(dist) + + # Interior + Z = dist.sample(sample, **kws).reshape(len(dist), sample) + poly1 = poly(*Z) + + # Min/max + ext = numpy.mgrid[(slice(0, 2, 1),) * dim].reshape(dim, 2 ** dim).T + ext = numpy.where(ext, dist.lower, dist.upper).T + poly2 = poly(*ext) + poly2 = numpy.array([_ for _ in poly2.T if not numpy.any(numpy.isnan(_))]).T + + # Finish + if poly2.shape: + poly1 = numpy.concatenate([poly1, poly2], -1) + if isinstance(convert_from_log_scale, float): + poly1 = convert_from_log_scale ** poly1 + elif convert_from_log_scale: + poly1 = numpy.exp(poly1) + samples = poly1.shape[1] + + # adjustments and elev corrections + if isinstance(convert_from_depths, (float, numpy.ndarray)): + poly1 -= convert_from_depths + if minimum_allowable_value is not None: + too_small = poly1 < minimum_allowable_value + poly1[too_small] = numpy.nan + if isinstance(convert_from_depths, (float, numpy.ndarray)) or convert_from_depths: + # TODO: Sanity check for depth vs poly shapes + poly1 -= depths.values[:, None] + + out = (poly1[:, :, None] > (levels[None, None, :])).sum(axis=1).T / samples + + out = out.reshape(levels.shape + shape) + + return out