Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Start working on fixing interpolation for fixed edisp #251

Merged
merged 10 commits into from
Aug 22, 2023
6 changes: 3 additions & 3 deletions pyirf/interpolation/base_extrapolators.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ class DiscretePDFExtrapolator(BaseExtrapolator):
Derived from pyirf.interpolation.BaseExtrapolator
"""

def __init__(self, grid_points, bin_edges, bin_contents):
def __init__(self, grid_points, bin_edges, binned_pdf):
"""DiscretePDFExtrapolator

Parameters
Expand All @@ -92,7 +92,7 @@ def __init__(self, grid_points, bin_edges, bin_contents):
Grid points at which templates exist
bin_edges: np.ndarray, shape=(n_bins+1)
Edges of the data binning
bin_content: np.ndarray, shape=(n_points, ..., n_bins)
binned_pdf: np.ndarray, shape=(n_points, ..., n_bins)
Content of each bin in bin_edges for
each point in grid_points. First dimesion has to correspond to number
of grid_points, last dimension has to correspond to number of bins for
Expand All @@ -107,4 +107,4 @@ def __init__(self, grid_points, bin_edges, bin_contents):

self.bin_edges = bin_edges
self.bin_mids = bin_center(self.bin_edges)
self.bin_contents = bin_contents
self.binned_pdf = binned_pdf
6 changes: 3 additions & 3 deletions pyirf/interpolation/base_interpolators.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ class DiscretePDFInterpolator(BaseInterpolator):
Derived from pyirf.interpolation.BaseInterpolator
"""

def __init__(self, grid_points, bin_edges, bin_contents):
def __init__(self, grid_points, bin_edges, binned_pdf):
"""DiscretePDFInterpolator

Parameters
Expand All @@ -93,7 +93,7 @@ def __init__(self, grid_points, bin_edges, bin_contents):
Grid points at which interpolation templates exist
bin_edges: np.ndarray, shape=(n_bins+1)
Edges of the data binning
bin_content: np.ndarray, shape=(n_points, ..., n_bins)
binned_pdf: np.ndarray, shape=(n_points, ..., n_bins)
Content of each bin in bin_edges for
each point in grid_points. First dimesion has to correspond to number
of grid_points, last dimension has to correspond to number of bins for
Expand All @@ -108,4 +108,4 @@ def __init__(self, grid_points, bin_edges, bin_contents):

self.bin_edges = bin_edges
self.bin_mids = bin_center(self.bin_edges)
self.bin_contents = bin_contents
self.binned_pdf = binned_pdf
32 changes: 16 additions & 16 deletions pyirf/interpolation/component_estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def __init__(
self,
grid_points,
bin_edges,
bin_contents,
binned_pdf,
interpolator_cls=QuantileInterpolator,
interpolator_kwargs=None,
extrapolator_cls=None,
Expand All @@ -168,7 +168,7 @@ def __init__(
Grid points at which interpolation templates exist
bin_edges: np.ndarray, shape=(n_bins+1)
Common set of bin-edges for all discretized PDFs.
bin_contents: np.ndarray, shape=(n_points, ..., n_bins)
binned_pdf: np.ndarray, shape=(n_points, ..., n_bins)
Discretized PDFs for all grid points and arbitrary further dimensions
(in IRF term e.g. field-of-view offset bins). Actual interpolation dimension,
meaning the dimensions that contains actual histograms, has to be along
Expand All @@ -191,16 +191,16 @@ def __init__(
TypeError:
When bin_edges is not a np.ndarray.
TypeError:
When bin_content is not a np.ndarray..
When binned_pdf is not a np.ndarray..
TypeError:
When interpolator_cls is not a DiscretePDFInterpolator subclass.
TypeError:
When extrapolator_cls is not a DiscretePDFExtrapolator subclass.
ValueError:
When number of bins in bin_edges and contents in bin_contents is
When number of bins in bin_edges and contents in binned_pdf is
not matching.
ValueError:
When number of histograms in bin_contents and points in grid_points
When number of histograms in binned_pdf and points in grid_points
is not matching.

Note
Expand All @@ -212,28 +212,28 @@ def __init__(
grid_points,
)

if not isinstance(bin_contents, np.ndarray):
raise TypeError("Input bin_contents is not a numpy array.")
elif self.n_points != bin_contents.shape[0]:
if not isinstance(binned_pdf, np.ndarray):
raise TypeError("Input binned_pdf is not a numpy array.")
elif self.n_points != binned_pdf.shape[0]:
raise ValueError(
f"Shape missmatch, number of grid_points ({self.n_points}) and "
f"number of histograms in bin_contents ({bin_contents.shape[0]}) "
f"number of histograms in binned_pdf ({binned_pdf.shape[0]}) "
"not matching."
)
elif not isinstance(bin_edges, np.ndarray):
raise TypeError("Input bin_edges is not a numpy array.")
elif bin_contents.shape[-1] != (bin_edges.shape[0] - 1):
elif binned_pdf.shape[-1] != (bin_edges.shape[0] - 1):
raise ValueError(
f"Shape missmatch, bin_edges ({bin_edges.shape[0] - 1} bins) "
f"and bin_contents ({bin_contents.shape[-1]} bins) not matching."
f"and binned_pdf ({binned_pdf.shape[-1]} bins) not matching."
)

# Make sure that 1D input is sorted in increasing order
if self.grid_dim == 1:
sorting_inds = np.argsort(self.grid_points.squeeze())

self.grid_points = self.grid_points[sorting_inds]
bin_contents = bin_contents[sorting_inds]
binned_pdf = binned_pdf[sorting_inds]

if interpolator_kwargs is None:
interpolator_kwargs = {}
Expand All @@ -247,7 +247,7 @@ def __init__(
)

self.interpolator = interpolator_cls(
self.grid_points, bin_edges, bin_contents, **interpolator_kwargs
self.grid_points, bin_edges, binned_pdf, **interpolator_kwargs
)

if extrapolator_cls is None:
Expand All @@ -258,7 +258,7 @@ def __init__(
)
else:
self.extrapolator = extrapolator_cls(
self.grid_points, bin_edges, bin_contents, **extrapolator_kwargs
self.grid_points, bin_edges, binned_pdf, **extrapolator_kwargs
)


Expand Down Expand Up @@ -596,7 +596,7 @@ def __init__(
super().__init__(
grid_points=grid_points,
bin_edges=migra_bins,
bin_contents=np.swapaxes(energy_dispersion, axis, -1),
binned_pdf=np.swapaxes(energy_dispersion, axis, -1),
interpolator_cls=interpolator_cls,
interpolator_kwargs=interpolator_kwargs,
extrapolator_cls=extrapolator_cls,
Expand Down Expand Up @@ -688,7 +688,7 @@ def __init__(
super().__init__(
grid_points=grid_points,
bin_edges=source_offset_bins,
bin_contents=psf_normed,
binned_pdf=psf_normed,
interpolator_cls=interpolator_cls,
interpolator_kwargs=interpolator_kwargs,
extrapolator_cls=extrapolator_cls,
Expand Down
66 changes: 35 additions & 31 deletions pyirf/interpolation/moment_morph_interpolator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,16 @@
]


def _estimate_mean_std(bin_edges, bin_contents):
def _estimate_mean_std(bin_edges, binned_pdf):
"""
Function to roughly estimate mean and standard deviation from a histogram.

Parameters
----------
bin_edges: np.ndarray, shape=(M+1)
Array of common bin-edges for bin_contents
bin_contents: np.ndarray, shape=(N, ..., M)
Array of bin-entries, actual
Array of common bin-edges for binned_pdf
binned_pdf: np.ndarray, shape=(N, ..., M)
PDF values from which to compute mean and std

Returns
-------
Expand All @@ -29,35 +29,39 @@ def _estimate_mean_std(bin_edges, bin_contents):
the input template is =/= 0
"""
# Create an 2darray where the 1darray mids is repeated n_template times
mids = np.broadcast_to(bin_center(bin_edges), bin_contents.shape)
mids = np.broadcast_to(bin_center(bin_edges), binned_pdf.shape)
width = np.diff(bin_edges)

# integrate pdf to get probability in each bin
probability = binned_pdf * width
# Weighted averages to compute mean and std
mean = np.average(mids, weights=bin_contents, axis=-1)
mean = np.average(mids, weights=probability, axis=-1)
std = np.sqrt(
np.average((mids - mean[..., np.newaxis]) ** 2, weights=bin_contents, axis=-1)
np.average((mids - mean[..., np.newaxis])**2, weights=probability, axis=-1)
)

# Set std to 0.5*width for all those templates that have only one bin =/= 0. In those
# cases mids-mean = 0 and therefore std = 0. Uses the width of the one bin with
# bin_content!=0
# binned_pdf!=0
mask = std == 0
if np.any(mask):
# Create array of bin_widths and clone for each template
width = np.diff(bin_edges) / 2
maxnoe marked this conversation as resolved.
Show resolved Hide resolved
width = np.repeat(width[np.newaxis, :], bin_contents.shape[0], axis=0)
std[mask] = width[bin_contents[mask, :] != 0]
width = np.repeat(width[np.newaxis, :], binned_pdf.shape[0], axis=0)
std[mask] = width[binned_pdf[mask, :] != 0]

return mean, std


def _lookup(bin_edges, bin_contents, x):
def _lookup(bin_edges, binned_pdf, x):
"""
Function to return the bin-height at a desired point.

Parameters
----------
bin_edges: np.ndarray, shape=(M+1)
Array of common bin-edges for bin_contents
bin_contents: np.ndarray, shape=(N, ..., M)
Array of common bin-edges for binned_pdf
binned_pdf: np.ndarray, shape=(N, ..., M)
Array of bin-entries, actual
x: numpy.ndarray, shape=(N, ..., M)
Array of M points for each input template, where the histogram-value (bin-height) should be found
Expand All @@ -79,11 +83,11 @@ def _lookup(bin_edges, bin_contents, x):
[
cont[binnr_row]
for cont, binnr_row in zip(
bin_contents.reshape(-1, bin_contents.shape[-1]),
binned_pdf.reshape(-1, binned_pdf.shape[-1]),
binnr.reshape(-1, binnr.shape[-1]),
)
]
).reshape(bin_contents.shape)
).reshape(binned_pdf.shape)

# Set all invalid bins to 0
lu[~valid] = 0
Expand Down Expand Up @@ -172,18 +176,18 @@ def barycentric_2D_interpolation_coefficients(grid_points, target_point):
return coefficients


def moment_morph_estimation(bin_edges, bin_contents, coefficients):
def moment_morph_estimation(bin_edges, binned_pdf, coefficients):
"""
Function that wraps up the moment morph procedure [1] adopted for histograms.

Parameters
----------
bin_edges: np.ndarray, shape=(M+1)
Array of common bin-edges for bin_contents
bin_contents: np.ndarray, shape=(N, ..., M)
Array of common bin-edges for binned_pdf
binned_pdf: np.ndarray, shape=(N, ..., M)
Array of bin-entries, actual
coefficients: np.ndarray, shape=(N)
Estimation coefficients for each entry in bin_contents
Estimation coefficients for each entry in binned_pdf

Returns
-------
Expand All @@ -199,16 +203,16 @@ def moment_morph_estimation(bin_edges, bin_contents, coefficients):
bin_mids = bin_center(bin_edges)

# Catch all those templates, where at least one template histogram is all zeros.
zero_templates = ~np.all(~np.isclose(np.sum(bin_contents, axis=-1), 0), 0)
zero_templates = ~np.all(~np.isclose(np.sum(binned_pdf, axis=-1), 0), 0)

# Manipulate those templates so that computations pass without error
bin_contents[:, zero_templates] = np.full(len(bin_mids), 1 / len(bin_mids))
binned_pdf[:, zero_templates] = np.full(len(bin_mids), 1 / len(bin_mids))

# Estimate mean and std for each input template histogram. First adaption needed to extend
# the moment morph procedure to histograms
mus, sigs = _estimate_mean_std(bin_edges=bin_edges, bin_contents=bin_contents)
mus, sigs = _estimate_mean_std(bin_edges=bin_edges, binned_pdf=binned_pdf)
coefficients = coefficients.reshape(
bin_contents.shape[0], *np.ones(mus.ndim - 1, "int")
binned_pdf.shape[0], *np.ones(mus.ndim - 1, "int")
)

# Transform mean and std as in eq. (11) and (12) in [1]
Expand All @@ -221,15 +225,15 @@ def moment_morph_estimation(bin_edges, bin_contents, coefficients):
bij = mus - mu_prime * aij

# Transformation as in eq. (13) in [1]
mids = np.broadcast_to(bin_mids, bin_contents.shape)
mids = np.broadcast_to(bin_mids, binned_pdf.shape)
transf_mids = aij[..., np.newaxis] * mids + bij[..., np.newaxis]

# Compute the morphed historgram according to eq. (18) in [1]. The function "lookup" "resamples"
# the histogram at the transformed bin-mids by using the templates historgam value at the transformed
# bin-mid as new value for a whole transformed bin. Second adaption needed to extend
# the moment morph procedure to histograms, adaptes the behaviour of eq. (16)

transf_hist = _lookup(bin_edges=bin_edges, bin_contents=bin_contents, x=transf_mids)
transf_hist = _lookup(bin_edges=bin_edges, binned_pdf=binned_pdf, x=transf_mids)

f_new = np.sum(
np.expand_dims(coefficients, -1) * transf_hist * np.expand_dims(aij, -1), axis=0
Expand All @@ -243,12 +247,12 @@ def moment_morph_estimation(bin_edges, bin_contents, coefficients):
norm = np.expand_dims(np.sum(f_new, axis=-1), -1)

return np.divide(f_new, norm, out=np.zeros_like(f_new), where=norm != 0).reshape(
1, *bin_contents.shape[1:]
1, *binned_pdf.shape[1:]
)


class MomentMorphInterpolator(DiscretePDFInterpolator):
def __init__(self, grid_points, bin_edges, bin_contents):
def __init__(self, grid_points, bin_edges, binned_pdf):
"""
Interpolator class using moment morphing.

Expand All @@ -258,7 +262,7 @@ def __init__(self, grid_points, bin_edges, bin_contents):
Grid points at which interpolation templates exist. May be one ot two dimensional.
bin_edges: np.ndarray, shape=(M+1)
Edges of the data binning
bin_content: np.ndarray, shape=(N, ..., M)
binned_pdf: np.ndarray, shape=(N, ..., M)
Content of each bin in bin_edges for
each point in grid_points. First dimesion has to correspond to number
of grid_points. Interpolation dimension, meaning the
Expand All @@ -270,7 +274,7 @@ def __init__(self, grid_points, bin_edges, bin_contents):
----
Also calls pyirf.interpolation.DiscretePDFInterpolator.__init__.
"""
super().__init__(grid_points, bin_edges, bin_contents)
super().__init__(grid_points, bin_edges, binned_pdf)

if self.grid_dim == 2:
self.triangulation = Delaunay(self.grid_points)
Expand All @@ -293,7 +297,7 @@ def _interpolate1D(self, target_point):

return moment_morph_estimation(
bin_edges=self.bin_edges,
bin_contents=self.bin_contents[segment_inds],
binned_pdf=self.binned_pdf[segment_inds],
coefficients=coefficients,
)

Expand All @@ -312,7 +316,7 @@ def _interpolate2D(self, target_point):

return moment_morph_estimation(
bin_edges=self.bin_edges,
bin_contents=self.bin_contents[simplex_inds],
binned_pdf=self.binned_pdf[simplex_inds],
coefficients=coefficients,
)

Expand Down
Loading
Loading