From faf88a81ff3dba2954111f13e55329d7cc8c6f63 Mon Sep 17 00:00:00 2001 From: Rune Michael Dominik Date: Fri, 16 Feb 2024 14:09:12 +0100 Subject: [PATCH 1/4] Add tests for more correct handling of fill-values in RadMaxEstimator --- ...st_component_estimator_specific_classes.py | 191 +++++++++++++++++- 1 file changed, 189 insertions(+), 2 deletions(-) diff --git a/pyirf/interpolation/tests/test_component_estimator_specific_classes.py b/pyirf/interpolation/tests/test_component_estimator_specific_classes.py index c1f77913b..dbca00b51 100644 --- a/pyirf/interpolation/tests/test_component_estimator_specific_classes.py +++ b/pyirf/interpolation/tests/test_component_estimator_specific_classes.py @@ -1,8 +1,10 @@ import astropy.units as u import numpy as np -from pyirf.utils import cone_solid_angle +import pytest from scipy.stats import expon +from pyirf.utils import cone_solid_angle + def test_EnergyDispersionEstimator(prod5_irfs): from pyirf.interpolation import EnergyDispersionEstimator, QuantileInterpolator @@ -74,7 +76,9 @@ def hist(pnt): interp = estimator(target_point=zen_pnt[[1]]) - probability = (interp * omegas[np.newaxis, np.newaxis, np.newaxis, ...]).to_value(u.one) + probability = (interp * omegas[np.newaxis, np.newaxis, np.newaxis, ...]).to_value( + u.one + ) assert np.max(probability) <= 1 assert np.min(probability) >= 0 @@ -177,6 +181,7 @@ def test_RadMaxEstimator(): estimator = RadMaxEstimator( grid_points=grid_points, rad_max=rad_max, + fill_value=None, interpolator_cls=GridDataInterpolator, interpolator_kwargs=None, extrapolator_cls=None, @@ -186,3 +191,185 @@ def test_RadMaxEstimator(): assert interp.shape == (1, *rad_max_1.shape) assert np.allclose(interp, 1.5 * rad_max_1) + + +def test_RadMaxEstimator_fill_val_handling_1D(): + from pyirf.interpolation import ( + GridDataInterpolator, + ParametrizedNearestNeighborSearcher, + ParametrizedNearestSimplexExtrapolator, + RadMaxEstimator, + ) + + grid_points_1D = np.array([[0], [1], [2]]) + + rad_max_1 = np.array([[0.95, 0.95, 0.5, 0.95, 0.95], [0.95, 0.5, 0.3, 0.5, 0.95]]) + rad_max_2 = np.array([[0.95, 0.5, 0.3, 0.5, 0.95], [0.5, 0.3, 0.2, 0.9, 0.5]]) + rad_max_3 = np.array([[0.95, 0.4, 0.2, 0.4, 0.5], [0.5, 0.3, 0, 0.94, 0.6]]) + + rad_max_1D = np.array([rad_max_1, rad_max_2, rad_max_3]) + + truth_0 = np.array([[0.95, 0.95, 0.7, 0.95, 0.95], [0.95, 0.7, 0.4, 0.1, 0.95]]) + truth_1_5 = np.array([[0.95, 0.95, 0.4, 0.95, 0.95], [0.95, 0.4, 0.25, 0.7, 0.95]]) + + truth_4 = np.array([[0.95, 0.3, 0.1, 0.3, 0.95], [0.5, 0.3, 0, 0.95, 0.7]]) + + # State fill value + estim = RadMaxEstimator( + grid_points=grid_points_1D, + rad_max=rad_max_1D, + fill_value=0.95, + interpolator_cls=GridDataInterpolator, + interpolator_kwargs=None, + extrapolator_cls=ParametrizedNearestSimplexExtrapolator, + extrapolator_kwargs=None, + ) + + assert np.allclose(estim(np.array([-1])), truth_0) + assert np.allclose(estim(np.array([0.5])), truth_1_5) + assert np.allclose(estim(np.array([3])), truth_4) + + # Infer fill-val as max of rad-max vals + estim = RadMaxEstimator( + grid_points=grid_points_1D, + rad_max=rad_max_1D, + fill_value="infer", + interpolator_cls=GridDataInterpolator, + interpolator_kwargs=None, + extrapolator_cls=ParametrizedNearestSimplexExtrapolator, + extrapolator_kwargs=None, + ) + + assert np.allclose(estim(np.array([0.5])), truth_1_5) + assert np.allclose(estim(np.array([3])), truth_4) + + # Nearest neighbor cases + estim = RadMaxEstimator( + grid_points=grid_points_1D, + rad_max=rad_max_1D, + fill_value="infer", + interpolator_cls=ParametrizedNearestNeighborSearcher, + interpolator_kwargs=None, + extrapolator_cls=ParametrizedNearestNeighborSearcher, + extrapolator_kwargs=None, + ) + + assert np.allclose(estim(np.array([0.25])), rad_max_1) + assert np.allclose(estim(np.array([3])), rad_max_3) + + # Ignore fill values + estim = RadMaxEstimator( + grid_points=grid_points_1D, + rad_max=rad_max_1D, + fill_value=None, + interpolator_cls=GridDataInterpolator, + interpolator_kwargs=None, + extrapolator_cls=None, + extrapolator_kwargs=None, + ) + + assert np.allclose(estim(np.array([0.5])), (rad_max_1 + rad_max_2) / 2) + + +def test_RadMaxEstimator_fill_val_handling_2D(): + from pyirf.interpolation import ( + GridDataInterpolator, + ParametrizedNearestNeighborSearcher, + ParametrizedNearestSimplexExtrapolator, + RadMaxEstimator, + ) + + grid_points_2D = np.array([[0, 0], [1, 0], [0, 1]]) + + rad_max_1 = np.array([[0.95, 0.95, 0.5, 0.95, 0.95], [0.5, 0.5, 0.3, 0.5, 0.5]]) + rad_max_2 = np.array([[0.95, 0.95, 0.5, 0.5, 0.95], [0.95, 0.95, 0.95, 0.5, 0.95]]) + rad_max_3 = np.array([[0.95, 0.5, 0.5, 0.4, 0.5], [0.4, 0.95, 0, 0.5, 0.95]]) + + rad_max_2D = np.array([rad_max_1, rad_max_2, rad_max_3]) + + # Only test for combinatoric cases, thus inter- and extrapolation have the same + # result in this special test case. Correct estimation is checked elsewhere + truth = np.array([[0.95, 0.95, 0.5, 0.4, 0.95], [0.4, 0.95, 0, 0.5, 0.95]]) + + # State fill-value + estim = RadMaxEstimator( + grid_points=grid_points_2D, + rad_max=rad_max_2D, + fill_value=0.95, + interpolator_cls=GridDataInterpolator, + interpolator_kwargs=None, + extrapolator_cls=ParametrizedNearestSimplexExtrapolator, + extrapolator_kwargs=None, + ) + + assert np.allclose(estim(np.array([0.5, 0.5])), truth) + assert np.allclose(estim(np.array([-1, -1])), truth) + + # Infer fill-val as max of rad-max vals + estim = RadMaxEstimator( + grid_points=grid_points_2D, + rad_max=rad_max_2D, + fill_value="infer", + interpolator_cls=GridDataInterpolator, + interpolator_kwargs=None, + extrapolator_cls=ParametrizedNearestSimplexExtrapolator, + extrapolator_kwargs=None, + ) + + assert np.allclose(estim(np.array([0.5, 0.5])), truth) + assert np.allclose(estim(np.array([-1, -1])), truth) + + # Nearest neighbor cases + estim = RadMaxEstimator( + grid_points=grid_points_2D, + rad_max=rad_max_2D, + fill_value="infer", + interpolator_cls=ParametrizedNearestNeighborSearcher, + interpolator_kwargs=None, + extrapolator_cls=ParametrizedNearestNeighborSearcher, + extrapolator_kwargs=None, + ) + + assert np.allclose(estim(np.array([0.25, 0.25])), rad_max_1) + assert np.allclose(estim(np.array([0, 1.1])), rad_max_3) + + # Ignore fill-values + estim = RadMaxEstimator( + grid_points=grid_points_2D, + rad_max=rad_max_2D, + fill_value=None, + interpolator_cls=GridDataInterpolator, + interpolator_kwargs=None, + extrapolator_cls=None, + extrapolator_kwargs=None, + ) + + truth_interpolator = GridDataInterpolator(grid_points_2D, rad_max_2D) + + assert np.allclose( + estim(np.array([0.25, 0.25])), truth_interpolator(np.array([0.25, 0.25])) + ) + + +def test_RadMaxEstimator_fill_val_handling_3D(): + from pyirf.interpolation import GridDataInterpolator, RadMaxEstimator + + grid_points_3D = np.array([[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]]) + + rad_max = np.array([[0.95], [0.95], [0.95], [0.95]]) + + estim = RadMaxEstimator( + grid_points=grid_points_3D, + rad_max=rad_max, + fill_value=0.95, + interpolator_cls=GridDataInterpolator, + interpolator_kwargs=None, + extrapolator_cls=None, + extrapolator_kwargs=None, + ) + + with pytest.raises( + ValueError, + match="Fill-value handling only supported in up to two grid dimensions.", + ): + estim(np.array([0.25, 0.25, 0.25])) From 169059b5a41668903870bd20038f6a96d21e4db3 Mon Sep 17 00:00:00 2001 From: Rune Michael Dominik Date: Fri, 16 Feb 2024 14:10:01 +0100 Subject: [PATCH 2/4] Add more correct handling of fill-values in RadMaxEstimator --- pyirf/interpolation/component_estimators.py | 120 +++++++++++++++++++- 1 file changed, 118 insertions(+), 2 deletions(-) diff --git a/pyirf/interpolation/component_estimators.py b/pyirf/interpolation/component_estimators.py index 740180e60..f45b6c1e2 100644 --- a/pyirf/interpolation/component_estimators.py +++ b/pyirf/interpolation/component_estimators.py @@ -3,7 +3,6 @@ import astropy.units as u import numpy as np -from pyirf.utils import cone_solid_angle from scipy.spatial import Delaunay from .base_extrapolators import DiscretePDFExtrapolator, ParametrizedExtrapolator @@ -13,7 +12,9 @@ PDFNormalization, ) from .griddata_interpolator import GridDataInterpolator +from .nearest_neighbor_searcher import BaseNearestNeighborSearcher from .quantile_interpolator import QuantileInterpolator +from .utils import find_nearest_simplex __all__ = [ "BaseComponentEstimator", @@ -477,6 +478,7 @@ def __init__( self, grid_points, rad_max, + fill_value=None, interpolator_cls=GridDataInterpolator, interpolator_kwargs=None, extrapolator_cls=None, @@ -495,6 +497,13 @@ def __init__( Grid of theta cuts. Dimensions but the first can in principle be freely chosen. Class is RAD_MAX_2D compatible, which would require shape=(n_points, n_energy_bins, n_fov_offset_bins). + fill_val: + Indicator of fill-value handling. If None, fill values are regarded as + normal values and no special handeling is applied. If "infer", fill values + will be infered as max of all values, if a value is provided, + it is used to flag fill values. Flagged fill-values are + not used for interpolation. Fill-value handling is only supported in + up to two grid dimensions. interpolator_cls: pyirf interpolator class, defaults to GridDataInterpolator. interpolator_kwargs: dict @@ -523,6 +532,20 @@ def __init__( extrapolator_kwargs=extrapolator_kwargs, ) + self.params = rad_max + + if fill_value is None: + self.fill_val = None + elif fill_value == "infer": + self.fill_val = np.max(self.params) + else: + self.fill_val = fill_value + + # If fill-values should be handled in 2D, construct a trinangulation + # to later determine in which simplex the target values is + if self.fill_val and (self.grid_dim == 2): + self.triangulation = Delaunay(self.grid_points) + def __call__(self, target_point): """ Estimating rad max table at target_point, inter-/extrapolates as needed and @@ -540,8 +563,101 @@ def __call__(self, target_point): effective areas. For RAD_MAX_2D of shape (n_energy_bins, n_fov_offset_bins) """ + # First, construct estimation without handling fill-values + full_estimation = super().__call__(target_point) + # Safeguard against extreme extrapolation cases + full_estimation[full_estimation < 0] = 0 + + # Early exit if fill_values should not be handled + if not self.fill_val: + return full_estimation + + # Raise error if fill-values should be handled in >=3 dims + if self.grid_dim >= 3: + raise ValueError( + "Fill-value handling only supported in up to two grid dimensions." + ) + + # Early exit if a nearest neighbor estimation would be overwritten + if self.grid_dim == 1: + if ( + (target_point < self.grid_points.min()) + or (target_point > self.grid_points.max()) + ) and issubclass(self.extrapolator.__class__, BaseNearestNeighborSearcher): + return full_estimation + elif issubclass(self.interpolator.__class__, BaseNearestNeighborSearcher): + return full_estimation + elif self.grid_dim == 2: + target_simplex = self.triangulation.find_simplex(target_point) + + if (target_simplex == -1) and issubclass( + self.extrapolator.__class__, BaseNearestNeighborSearcher + ): + return full_estimation + elif issubclass(self.interpolator.__class__, BaseNearestNeighborSearcher): + return full_estimation + + # Actual fill-value handling + if self.grid_dim == 1: + # Locate target in grid + if target_point < self.grid_points.min(): + segment_inds = np.array([0, 1], "int") + elif target_point > self.grid_points.max(): + segment_inds = np.array([-2, -1], "int") + else: + target_bin = np.digitize( + target_point.squeeze(), self.grid_points.squeeze() + ) + segment_inds = np.array([target_bin - 1, target_bin], "int") + + mask_left = self.params[segment_inds[0]] >= self.fill_val + mask_right = self.params[segment_inds[1]] >= self.fill_val + # Indicate, wether one of the neighboring entries is a fill-value + mask = np.logical_or(mask_left, mask_right) + elif self.grid_dim == 2: + # Locate target + target_simplex = self.triangulation.find_simplex(target_point) + + if target_simplex == -1: + target_simplex = find_nearest_simplex(self.triangulation, target_point) + + simplex_nodes_indices = self.triangulation.simplices[ + target_simplex + ].squeeze() + + mask0 = self.params[simplex_nodes_indices[0]] >= self.fill_val + mask1 = self.params[simplex_nodes_indices[1]] >= self.fill_val + mask2 = self.params[simplex_nodes_indices[2]] >= self.fill_val + + # This collected mask now counts for each entry in the estimation how many + # of the entries used for extrapolation contained fill-values + intermediate_mask = mask0.astype("int") + mask1.astype("int") + mask2.astype("int") + mask = np.full_like(intermediate_mask, True, dtype=bool) + + # Simplest cases: All or none entries were fill-values, so either return + # a fill-value or the actual estimation + mask[intermediate_mask == 0] = False + mask[intermediate_mask == 3] = True + + # If two out of three values were fill-values return a fill-value as estimate + mask[intermediate_mask == 2] = True + + # If only one out of three values was a fill-value use the smallest value of the + # remaining two + mask[intermediate_mask == 1] = False + full_estimation = np.where( + intermediate_mask[np.newaxis, :] == 1, + np.min(self.params[simplex_nodes_indices], axis=0), + full_estimation, + ) + + # Set all flagged values to fill-value + full_estimation[mask[np.newaxis, :]] = self.fill_val + + # Safeguard against extreme extrapolation cases + full_estimation[full_estimation > self.fill_val] = self.fill_val - return super().__call__(target_point) + return full_estimation class EnergyDispersionEstimator(DiscretePDFComponentEstimator): From 584ca51fac2641fa3023aeb427e90c61be1cfe00 Mon Sep 17 00:00:00 2001 From: Rune Michael Dominik Date: Thu, 21 Mar 2024 13:43:35 +0100 Subject: [PATCH 3/4] Add newsfragment --- docs/changes/282.feature.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/changes/282.feature.rst diff --git a/docs/changes/282.feature.rst b/docs/changes/282.feature.rst new file mode 100644 index 000000000..3c61c6a44 --- /dev/null +++ b/docs/changes/282.feature.rst @@ -0,0 +1 @@ +Add basic combinatoric fill-value handling for RAD_MAX estimation. From 26a9d8b999c03351b7a4fd2e4c159f0fd8dd7ed1 Mon Sep 17 00:00:00 2001 From: Rune Michael Dominik Date: Mon, 8 Apr 2024 12:00:14 +0200 Subject: [PATCH 4/4] adress comments --- pyirf/interpolation/component_estimators.py | 20 +++++++++++-------- ...st_component_estimator_specific_classes.py | 20 +++++++++---------- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/pyirf/interpolation/component_estimators.py b/pyirf/interpolation/component_estimators.py index f45b6c1e2..cd14beade 100644 --- a/pyirf/interpolation/component_estimators.py +++ b/pyirf/interpolation/component_estimators.py @@ -541,6 +541,12 @@ def __init__( else: self.fill_val = fill_value + # Raise error if fill-values should be handled in >=3 dims + if self.fill_val and self.grid_dim >= 3: + raise ValueError( + "Fill-value handling only supported in up to two grid dimensions." + ) + # If fill-values should be handled in 2D, construct a trinangulation # to later determine in which simplex the target values is if self.fill_val and (self.grid_dim == 2): @@ -566,19 +572,15 @@ def __call__(self, target_point): # First, construct estimation without handling fill-values full_estimation = super().__call__(target_point) # Safeguard against extreme extrapolation cases - full_estimation[full_estimation < 0] = 0 + np.clip(full_estimation, 0, None, out=full_estimation) # Early exit if fill_values should not be handled if not self.fill_val: return full_estimation - # Raise error if fill-values should be handled in >=3 dims - if self.grid_dim >= 3: - raise ValueError( - "Fill-value handling only supported in up to two grid dimensions." - ) - # Early exit if a nearest neighbor estimation would be overwritten + # Complex setup is needed to catch settings where the user mixes approaches and + # e.g. uses nearest neighbors for extrapolation and an actual interpolation otherwise if self.grid_dim == 1: if ( (target_point < self.grid_points.min()) @@ -631,7 +633,9 @@ def __call__(self, target_point): # This collected mask now counts for each entry in the estimation how many # of the entries used for extrapolation contained fill-values - intermediate_mask = mask0.astype("int") + mask1.astype("int") + mask2.astype("int") + intermediate_mask = ( + mask0.astype("int") + mask1.astype("int") + mask2.astype("int") + ) mask = np.full_like(intermediate_mask, True, dtype=bool) # Simplest cases: All or none entries were fill-values, so either return diff --git a/pyirf/interpolation/tests/test_component_estimator_specific_classes.py b/pyirf/interpolation/tests/test_component_estimator_specific_classes.py index dbca00b51..af786c4e6 100644 --- a/pyirf/interpolation/tests/test_component_estimator_specific_classes.py +++ b/pyirf/interpolation/tests/test_component_estimator_specific_classes.py @@ -358,18 +358,16 @@ def test_RadMaxEstimator_fill_val_handling_3D(): rad_max = np.array([[0.95], [0.95], [0.95], [0.95]]) - estim = RadMaxEstimator( - grid_points=grid_points_3D, - rad_max=rad_max, - fill_value=0.95, - interpolator_cls=GridDataInterpolator, - interpolator_kwargs=None, - extrapolator_cls=None, - extrapolator_kwargs=None, - ) - with pytest.raises( ValueError, match="Fill-value handling only supported in up to two grid dimensions.", ): - estim(np.array([0.25, 0.25, 0.25])) + RadMaxEstimator( + grid_points=grid_points_3D, + rad_max=rad_max, + fill_value=0.95, + interpolator_cls=GridDataInterpolator, + interpolator_kwargs=None, + extrapolator_cls=None, + extrapolator_kwargs=None, + )