Skip to content

Commit

Permalink
Merge pull request #282 from cta-observatory/cutval_estimation
Browse files Browse the repository at this point in the history
Improve Handling of Fill-Values in RAD_MAX Estimator
  • Loading branch information
maxnoe committed May 14, 2024
2 parents 8c48951 + 26a9d8b commit a296ed2
Show file tree
Hide file tree
Showing 3 changed files with 310 additions and 4 deletions.
1 change: 1 addition & 0 deletions docs/changes/282.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add basic combinatoric fill-value handling for RAD_MAX estimation.
124 changes: 122 additions & 2 deletions pyirf/interpolation/component_estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import astropy.units as u
import numpy as np
from pyirf.utils import cone_solid_angle
from scipy.spatial import Delaunay

from .base_extrapolators import DiscretePDFExtrapolator, ParametrizedExtrapolator
Expand All @@ -13,7 +12,9 @@
PDFNormalization,
)
from .griddata_interpolator import GridDataInterpolator
from .nearest_neighbor_searcher import BaseNearestNeighborSearcher
from .quantile_interpolator import QuantileInterpolator
from .utils import find_nearest_simplex

__all__ = [
"BaseComponentEstimator",
Expand Down Expand Up @@ -477,6 +478,7 @@ def __init__(
self,
grid_points,
rad_max,
fill_value=None,
interpolator_cls=GridDataInterpolator,
interpolator_kwargs=None,
extrapolator_cls=None,
Expand All @@ -495,6 +497,13 @@ def __init__(
Grid of theta cuts. Dimensions but the first can in principle be freely
chosen. Class is RAD_MAX_2D compatible, which would require
shape=(n_points, n_energy_bins, n_fov_offset_bins).
fill_val:
Indicator of fill-value handling. If None, fill values are regarded as
normal values and no special handeling is applied. If "infer", fill values
will be infered as max of all values, if a value is provided,
it is used to flag fill values. Flagged fill-values are
not used for interpolation. Fill-value handling is only supported in
up to two grid dimensions.
interpolator_cls:
pyirf interpolator class, defaults to GridDataInterpolator.
interpolator_kwargs: dict
Expand Down Expand Up @@ -523,6 +532,26 @@ def __init__(
extrapolator_kwargs=extrapolator_kwargs,
)

self.params = rad_max

if fill_value is None:
self.fill_val = None
elif fill_value == "infer":
self.fill_val = np.max(self.params)
else:
self.fill_val = fill_value

# Raise error if fill-values should be handled in >=3 dims
if self.fill_val and self.grid_dim >= 3:
raise ValueError(
"Fill-value handling only supported in up to two grid dimensions."
)

# If fill-values should be handled in 2D, construct a trinangulation
# to later determine in which simplex the target values is
if self.fill_val and (self.grid_dim == 2):
self.triangulation = Delaunay(self.grid_points)

def __call__(self, target_point):
"""
Estimating rad max table at target_point, inter-/extrapolates as needed and
Expand All @@ -540,8 +569,99 @@ def __call__(self, target_point):
effective areas. For RAD_MAX_2D of shape (n_energy_bins, n_fov_offset_bins)
"""
# First, construct estimation without handling fill-values
full_estimation = super().__call__(target_point)
# Safeguard against extreme extrapolation cases
np.clip(full_estimation, 0, None, out=full_estimation)

# Early exit if fill_values should not be handled
if not self.fill_val:
return full_estimation

# Early exit if a nearest neighbor estimation would be overwritten
# Complex setup is needed to catch settings where the user mixes approaches and
# e.g. uses nearest neighbors for extrapolation and an actual interpolation otherwise
if self.grid_dim == 1:
if (
(target_point < self.grid_points.min())
or (target_point > self.grid_points.max())
) and issubclass(self.extrapolator.__class__, BaseNearestNeighborSearcher):
return full_estimation
elif issubclass(self.interpolator.__class__, BaseNearestNeighborSearcher):
return full_estimation
elif self.grid_dim == 2:
target_simplex = self.triangulation.find_simplex(target_point)

if (target_simplex == -1) and issubclass(
self.extrapolator.__class__, BaseNearestNeighborSearcher
):
return full_estimation
elif issubclass(self.interpolator.__class__, BaseNearestNeighborSearcher):
return full_estimation

# Actual fill-value handling
if self.grid_dim == 1:
# Locate target in grid
if target_point < self.grid_points.min():
segment_inds = np.array([0, 1], "int")
elif target_point > self.grid_points.max():
segment_inds = np.array([-2, -1], "int")
else:
target_bin = np.digitize(
target_point.squeeze(), self.grid_points.squeeze()
)
segment_inds = np.array([target_bin - 1, target_bin], "int")

mask_left = self.params[segment_inds[0]] >= self.fill_val
mask_right = self.params[segment_inds[1]] >= self.fill_val
# Indicate, wether one of the neighboring entries is a fill-value
mask = np.logical_or(mask_left, mask_right)
elif self.grid_dim == 2:
# Locate target
target_simplex = self.triangulation.find_simplex(target_point)

if target_simplex == -1:
target_simplex = find_nearest_simplex(self.triangulation, target_point)

simplex_nodes_indices = self.triangulation.simplices[
target_simplex
].squeeze()

mask0 = self.params[simplex_nodes_indices[0]] >= self.fill_val
mask1 = self.params[simplex_nodes_indices[1]] >= self.fill_val
mask2 = self.params[simplex_nodes_indices[2]] >= self.fill_val

# This collected mask now counts for each entry in the estimation how many
# of the entries used for extrapolation contained fill-values
intermediate_mask = (
mask0.astype("int") + mask1.astype("int") + mask2.astype("int")
)
mask = np.full_like(intermediate_mask, True, dtype=bool)

# Simplest cases: All or none entries were fill-values, so either return
# a fill-value or the actual estimation
mask[intermediate_mask == 0] = False
mask[intermediate_mask == 3] = True

# If two out of three values were fill-values return a fill-value as estimate
mask[intermediate_mask == 2] = True

# If only one out of three values was a fill-value use the smallest value of the
# remaining two
mask[intermediate_mask == 1] = False
full_estimation = np.where(
intermediate_mask[np.newaxis, :] == 1,
np.min(self.params[simplex_nodes_indices], axis=0),
full_estimation,
)

# Set all flagged values to fill-value
full_estimation[mask[np.newaxis, :]] = self.fill_val

# Safeguard against extreme extrapolation cases
full_estimation[full_estimation > self.fill_val] = self.fill_val

return super().__call__(target_point)
return full_estimation


class EnergyDispersionEstimator(DiscretePDFComponentEstimator):
Expand Down
189 changes: 187 additions & 2 deletions pyirf/interpolation/tests/test_component_estimator_specific_classes.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import astropy.units as u
import numpy as np
from pyirf.utils import cone_solid_angle
import pytest
from scipy.stats import expon

from pyirf.utils import cone_solid_angle


def test_EnergyDispersionEstimator(prod5_irfs):
from pyirf.interpolation import EnergyDispersionEstimator, QuantileInterpolator
Expand Down Expand Up @@ -74,7 +76,9 @@ def hist(pnt):

interp = estimator(target_point=zen_pnt[[1]])

probability = (interp * omegas[np.newaxis, np.newaxis, np.newaxis, ...]).to_value(u.one)
probability = (interp * omegas[np.newaxis, np.newaxis, np.newaxis, ...]).to_value(
u.one
)

assert np.max(probability) <= 1
assert np.min(probability) >= 0
Expand Down Expand Up @@ -177,6 +181,7 @@ def test_RadMaxEstimator():
estimator = RadMaxEstimator(
grid_points=grid_points,
rad_max=rad_max,
fill_value=None,
interpolator_cls=GridDataInterpolator,
interpolator_kwargs=None,
extrapolator_cls=None,
Expand All @@ -186,3 +191,183 @@ def test_RadMaxEstimator():

assert interp.shape == (1, *rad_max_1.shape)
assert np.allclose(interp, 1.5 * rad_max_1)


def test_RadMaxEstimator_fill_val_handling_1D():
from pyirf.interpolation import (
GridDataInterpolator,
ParametrizedNearestNeighborSearcher,
ParametrizedNearestSimplexExtrapolator,
RadMaxEstimator,
)

grid_points_1D = np.array([[0], [1], [2]])

rad_max_1 = np.array([[0.95, 0.95, 0.5, 0.95, 0.95], [0.95, 0.5, 0.3, 0.5, 0.95]])
rad_max_2 = np.array([[0.95, 0.5, 0.3, 0.5, 0.95], [0.5, 0.3, 0.2, 0.9, 0.5]])
rad_max_3 = np.array([[0.95, 0.4, 0.2, 0.4, 0.5], [0.5, 0.3, 0, 0.94, 0.6]])

rad_max_1D = np.array([rad_max_1, rad_max_2, rad_max_3])

truth_0 = np.array([[0.95, 0.95, 0.7, 0.95, 0.95], [0.95, 0.7, 0.4, 0.1, 0.95]])
truth_1_5 = np.array([[0.95, 0.95, 0.4, 0.95, 0.95], [0.95, 0.4, 0.25, 0.7, 0.95]])

truth_4 = np.array([[0.95, 0.3, 0.1, 0.3, 0.95], [0.5, 0.3, 0, 0.95, 0.7]])

# State fill value
estim = RadMaxEstimator(
grid_points=grid_points_1D,
rad_max=rad_max_1D,
fill_value=0.95,
interpolator_cls=GridDataInterpolator,
interpolator_kwargs=None,
extrapolator_cls=ParametrizedNearestSimplexExtrapolator,
extrapolator_kwargs=None,
)

assert np.allclose(estim(np.array([-1])), truth_0)
assert np.allclose(estim(np.array([0.5])), truth_1_5)
assert np.allclose(estim(np.array([3])), truth_4)

# Infer fill-val as max of rad-max vals
estim = RadMaxEstimator(
grid_points=grid_points_1D,
rad_max=rad_max_1D,
fill_value="infer",
interpolator_cls=GridDataInterpolator,
interpolator_kwargs=None,
extrapolator_cls=ParametrizedNearestSimplexExtrapolator,
extrapolator_kwargs=None,
)

assert np.allclose(estim(np.array([0.5])), truth_1_5)
assert np.allclose(estim(np.array([3])), truth_4)

# Nearest neighbor cases
estim = RadMaxEstimator(
grid_points=grid_points_1D,
rad_max=rad_max_1D,
fill_value="infer",
interpolator_cls=ParametrizedNearestNeighborSearcher,
interpolator_kwargs=None,
extrapolator_cls=ParametrizedNearestNeighborSearcher,
extrapolator_kwargs=None,
)

assert np.allclose(estim(np.array([0.25])), rad_max_1)
assert np.allclose(estim(np.array([3])), rad_max_3)

# Ignore fill values
estim = RadMaxEstimator(
grid_points=grid_points_1D,
rad_max=rad_max_1D,
fill_value=None,
interpolator_cls=GridDataInterpolator,
interpolator_kwargs=None,
extrapolator_cls=None,
extrapolator_kwargs=None,
)

assert np.allclose(estim(np.array([0.5])), (rad_max_1 + rad_max_2) / 2)


def test_RadMaxEstimator_fill_val_handling_2D():
from pyirf.interpolation import (
GridDataInterpolator,
ParametrizedNearestNeighborSearcher,
ParametrizedNearestSimplexExtrapolator,
RadMaxEstimator,
)

grid_points_2D = np.array([[0, 0], [1, 0], [0, 1]])

rad_max_1 = np.array([[0.95, 0.95, 0.5, 0.95, 0.95], [0.5, 0.5, 0.3, 0.5, 0.5]])
rad_max_2 = np.array([[0.95, 0.95, 0.5, 0.5, 0.95], [0.95, 0.95, 0.95, 0.5, 0.95]])
rad_max_3 = np.array([[0.95, 0.5, 0.5, 0.4, 0.5], [0.4, 0.95, 0, 0.5, 0.95]])

rad_max_2D = np.array([rad_max_1, rad_max_2, rad_max_3])

# Only test for combinatoric cases, thus inter- and extrapolation have the same
# result in this special test case. Correct estimation is checked elsewhere
truth = np.array([[0.95, 0.95, 0.5, 0.4, 0.95], [0.4, 0.95, 0, 0.5, 0.95]])

# State fill-value
estim = RadMaxEstimator(
grid_points=grid_points_2D,
rad_max=rad_max_2D,
fill_value=0.95,
interpolator_cls=GridDataInterpolator,
interpolator_kwargs=None,
extrapolator_cls=ParametrizedNearestSimplexExtrapolator,
extrapolator_kwargs=None,
)

assert np.allclose(estim(np.array([0.5, 0.5])), truth)
assert np.allclose(estim(np.array([-1, -1])), truth)

# Infer fill-val as max of rad-max vals
estim = RadMaxEstimator(
grid_points=grid_points_2D,
rad_max=rad_max_2D,
fill_value="infer",
interpolator_cls=GridDataInterpolator,
interpolator_kwargs=None,
extrapolator_cls=ParametrizedNearestSimplexExtrapolator,
extrapolator_kwargs=None,
)

assert np.allclose(estim(np.array([0.5, 0.5])), truth)
assert np.allclose(estim(np.array([-1, -1])), truth)

# Nearest neighbor cases
estim = RadMaxEstimator(
grid_points=grid_points_2D,
rad_max=rad_max_2D,
fill_value="infer",
interpolator_cls=ParametrizedNearestNeighborSearcher,
interpolator_kwargs=None,
extrapolator_cls=ParametrizedNearestNeighborSearcher,
extrapolator_kwargs=None,
)

assert np.allclose(estim(np.array([0.25, 0.25])), rad_max_1)
assert np.allclose(estim(np.array([0, 1.1])), rad_max_3)

# Ignore fill-values
estim = RadMaxEstimator(
grid_points=grid_points_2D,
rad_max=rad_max_2D,
fill_value=None,
interpolator_cls=GridDataInterpolator,
interpolator_kwargs=None,
extrapolator_cls=None,
extrapolator_kwargs=None,
)

truth_interpolator = GridDataInterpolator(grid_points_2D, rad_max_2D)

assert np.allclose(
estim(np.array([0.25, 0.25])), truth_interpolator(np.array([0.25, 0.25]))
)


def test_RadMaxEstimator_fill_val_handling_3D():
from pyirf.interpolation import GridDataInterpolator, RadMaxEstimator

grid_points_3D = np.array([[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]])

rad_max = np.array([[0.95], [0.95], [0.95], [0.95]])

with pytest.raises(
ValueError,
match="Fill-value handling only supported in up to two grid dimensions.",
):
RadMaxEstimator(
grid_points=grid_points_3D,
rad_max=rad_max,
fill_value=0.95,
interpolator_cls=GridDataInterpolator,
interpolator_kwargs=None,
extrapolator_cls=None,
extrapolator_kwargs=None,
)

0 comments on commit a296ed2

Please sign in to comment.