diff --git a/ci/install.sh b/ci/install.sh index f402a1a6320..8967be98e1c 100644 --- a/ci/install.sh +++ b/ci/install.sh @@ -23,9 +23,9 @@ if [[ "$CONDA" == "true" ]]; then # Useful for debugging any issues with conda conda info -a - sed -i -e "s/- python=.*/- python=$PYTHON_VERSION/g" environment.yml - travis_wait 20 conda env create -n cta-dev --file environment.yml - conda activate cta-dev + sed -i -e "s/- python=.*/- python=$TRAVIS_PYTHON_VERSION/g" environment.yml + travis_wait 20 conda env create -n cta-dev --file environment.yml + conda activate cta-dev else pip install -U pip fi diff --git a/ctapipe/containers.py b/ctapipe/containers.py index 7a78a711379..053e125b923 100644 --- a/ctapipe/containers.py +++ b/ctapipe/containers.py @@ -83,20 +83,19 @@ class EventType(enum.Enum): class EventIndexContainer(Container): """ index columns to include in event lists, common to all data levels""" - container_prefix = "" # don't want to prefix these - - event_id = Field(0, "event identifier") obs_id = Field(0, "observation identifier") + event_id = Field(0, "event identifier") -class TelEventIndexContainer(EventIndexContainer): +class TelEventIndexContainer(Container): """ index columns to include in telescope-wise event lists, common to all data levels that have telescope-wise information """ - container_prefix = "" # don't want to prefix these + obs_id = Field(0, "observation identifier") + event_id = Field(0, "event identifier") tel_id = Field(0, "telescope identifier") @@ -181,11 +180,30 @@ class TimingParametersContainer(Container): class MorphologyContainer(Container): """ Parameters related to pixels surviving image cleaning """ - num_pixels = Field(nan, "Number of usable pixels") - num_islands = Field(nan, "Number of distinct islands in the image") - num_small_islands = Field(nan, "Number of <= 2 pixel islands") - num_medium_islands = Field(nan, "Number of 2-50 pixel islands") - num_large_islands = Field(nan, "Number of > 10 pixel islands") + num_pixels = Field(-1, "Number of usable pixels") + num_islands = Field(-1, "Number of distinct islands in the image") + num_small_islands = Field(-1, "Number of <= 2 pixel islands") + num_medium_islands = Field(-1, "Number of 2-50 pixel islands") + num_large_islands = Field(-1, "Number of > 50 pixel islands") + + +class StatisticsContainer(Container): + """Store descriptive statistics""" + + max = Field(nan, "value of pixel with maximum intensity") + min = Field(nan, "value of pixel with minimum intensity") + mean = Field(nan, "mean intensity") + std = Field(nan, "standard deviation of intensity") + skewness = Field(nan, "skewness of intensity") + kurtosis = Field(nan, "kurtosis of intensity") + + +class IntensityStatisticsContainer(StatisticsContainer): + container_prefix = "intensity" + + +class PeakTimeStatisticsContainer(StatisticsContainer): + container_prefix = "peak_time" class ImageParametersContainer(Container): @@ -197,6 +215,12 @@ class ImageParametersContainer(Container): leakage = Field(LeakageContainer(), "Leakage Parameters") concentration = Field(ConcentrationContainer(), "Concentration Parameters") morphology = Field(MorphologyContainer(), "Image Morphology Parameters") + intensity_statistics = Field( + IntensityStatisticsContainer(), "Intensity image statistics" + ) + peak_time_statistics = Field( + PeakTimeStatisticsContainer(), "Peak time image statistics" + ) class DL1CameraContainer(Container): @@ -230,7 +254,7 @@ class DL1CameraContainer(Container): parameters = Field(ImageParametersContainer(), "Parameters derived from images") -class MCDL1CameraContainer(DL1CameraContainer): +class MCDL1CameraContainer(Container): """ Contains all fields of the DL1CameraContainer, but adds fields for simulated DL1 image information.""" diff --git a/ctapipe/core/tool.py b/ctapipe/core/tool.py index ea9e73f05d5..b2d8ffdb1e6 100644 --- a/ctapipe/core/tool.py +++ b/ctapipe/core/tool.py @@ -214,7 +214,7 @@ def run(self, argv=None): Provenance().start_activity(self.name) self.setup() self.is_setup = True - self.log.info(f"CONFIG: {self.get_current_config()}") + self.log.debug(f"CONFIG: {self.get_current_config()}") Provenance().add_config(self.get_current_config()) self.start() self.finish() diff --git a/ctapipe/image/__init__.py b/ctapipe/image/__init__.py index 8fd0d8a09af..e2d730fa822 100644 --- a/ctapipe/image/__init__.py +++ b/ctapipe/image/__init__.py @@ -1,9 +1,20 @@ -from .hillas import * +from .hillas import ( + hillas_parameters, HillasParameterizationError, camera_to_shower_coordinates, +) +from .timing import timing_parameters +from .leakage import leakage +from .concentration import concentration +from .statistics import descriptive_statistics +from .morphology import ( + number_of_islands, + number_of_island_sizes, + morphology_parameters, + largest_island, +) + from .cleaning import * from .pixel_likelihood import * from .extractor import * from .reducer import * -from .muon import * from .geometry_converter import * -from .leakage import * -from .concentration import concentration +from .muon import * diff --git a/ctapipe/image/cleaning.py b/ctapipe/image/cleaning.py index 7053acdafb0..20b26df51fa 100644 --- a/ctapipe/image/cleaning.py +++ b/ctapipe/image/cleaning.py @@ -8,7 +8,6 @@ "mars_cleaning_1st_pass", "fact_image_cleaning", "apply_time_delta_cleaning", - "number_of_islands", "ImageCleaner", "TailcutsImageCleaner", ] @@ -16,7 +15,6 @@ from abc import abstractmethod import numpy as np -from scipy.sparse.csgraph import connected_components from ..core.component import TelescopeComponent from ..core.traits import ( @@ -202,42 +200,6 @@ def dilate(geom, mask): return mask | geom.neighbor_matrix_sparse.dot(mask) -def number_of_islands(geom, mask): - """ - Search a given pixel mask for connected clusters. - This can be used to seperate between gamma and hadronic showers. - - Parameters - ---------- - geom: `~ctapipe.instrument.CameraGeometry` - Camera geometry information - mask: ndarray - input mask (array of booleans) - - Returns - ------- - num_islands: int - Total number of clusters - island_labels: ndarray - Contains cluster membership of each pixel. - Dimension equals input geometry. - Entries range from 0 (not in the pixel mask) to num_islands. - """ - # compress sparse neighbor matrix - neighbor_matrix_compressed = geom.neighbor_matrix_sparse[mask][:, mask] - # pixels in no cluster have label == 0 - island_labels = np.zeros(geom.n_pixels, dtype="int32") - - num_islands, island_labels_compressed = connected_components( - neighbor_matrix_compressed, directed=False - ) - - # count clusters from 1 onwards - island_labels[mask] = island_labels_compressed + 1 - - return num_islands, island_labels - - def apply_time_delta_cleaning( geom, mask, arrival_times, min_number_neighbors, time_limit ): @@ -367,30 +329,6 @@ def fact_image_cleaning( return pixels_to_keep -def largest_island(islands_labels): - """Find the biggest island and filter it from the image. - - This function takes a list of islands in an image and isolates the largest one - for later parametrization. - - Parameters - ---------- - islands_labels : array - Flattened array containing a list of labelled islands from a cleaned image. - Second value returned by the function 'number_of_islands'. - - Returns - ------- - islands_labels : array - A boolean mask created from the input labels and filtered for the largest island. - If no islands survived the cleaning the array is all False. - - """ - if np.count_nonzero(islands_labels) == 0: - return np.zeros(islands_labels.shape, dtype="bool") - return islands_labels == np.argmax(np.bincount(islands_labels[islands_labels > 0])) - - class ImageCleaner(TelescopeComponent): """ Abstract class for all configurable Image Cleaning algorithms. Use diff --git a/ctapipe/image/extractor.py b/ctapipe/image/extractor.py index 977823b2d06..2acfb242b15 100644 --- a/ctapipe/image/extractor.py +++ b/ctapipe/image/extractor.py @@ -26,8 +26,8 @@ from ctapipe.core import TelescopeComponent from numba import njit, prange, guvectorize, float64, float32, int64 -from .cleaning import number_of_islands, largest_island, tailcuts_clean -from .timing_parameters import timing_parameters +from . import number_of_islands, largest_island, tailcuts_clean +from .timing import timing_parameters from .hillas import hillas_parameters, camera_to_shower_coordinates diff --git a/ctapipe/image/morphology.py b/ctapipe/image/morphology.py new file mode 100644 index 00000000000..f30a2688f6c --- /dev/null +++ b/ctapipe/image/morphology.py @@ -0,0 +1,126 @@ +import numpy as np +from scipy.sparse.csgraph import connected_components + +from ..containers import MorphologyContainer + + +def number_of_islands(geom, mask): + """ + Search a given pixel mask for connected clusters. + This can be used to seperate between gamma and hadronic showers. + + Parameters + ---------- + geom: `~ctapipe.instrument.CameraGeometry` + Camera geometry information + mask: ndarray + input mask (array of booleans) + + Returns + ------- + num_islands: int + Total number of clusters + island_labels: ndarray + Contains cluster membership of each pixel. + Dimension equals input geometry. + Entries range from 0 (not in the pixel mask) to num_islands. + """ + # compress sparse neighbor matrix + neighbor_matrix_compressed = geom.neighbor_matrix_sparse[mask][:, mask] + # pixels in no cluster have label == 0 + island_labels = np.zeros(geom.n_pixels, dtype="int32") + + num_islands, island_labels_compressed = connected_components( + neighbor_matrix_compressed, directed=False + ) + + # count clusters from 1 onwards + island_labels[mask] = island_labels_compressed + 1 + + return num_islands, island_labels + + +def number_of_island_sizes(island_labels): + ''' + Return number of small, medium and large islands + + Parameters + ---------- + island_labels: array[int] + Array with island labels, (second return value of ``number_of_islands``) + + Returns + ------- + n_small: int + number of islands with less than 3 pixels + n_medium: int + number of islands with 3 <= n_pixels <= 50 + n_large: int + number of islands with more than 50 pixels + ''' + + # count number of pixels in each island, remove 0 = no island + island_sizes = np.bincount(island_labels)[1:] + + # remove islands of size 0 (if labels are not consecutive) + # should not happen, but easy to check + island_sizes = island_sizes[island_sizes > 0] + + small = island_sizes <= 2 + large = island_sizes > 50 + n_medium = np.count_nonzero(~(small | large)) + + return np.count_nonzero(small), n_medium, np.count_nonzero(large) + + +def largest_island(islands_labels): + """Find the biggest island and filter it from the image. + + This function takes a list of islands in an image and isolates the largest one + for later parametrization. + + Parameters + ---------- + islands_labels : array + Flattened array containing a list of labelled islands from a cleaned image. + Second value returned by the function 'number_of_islands'. + + Returns + ------- + islands_labels : array + A boolean mask created from the input labels and filtered for the largest island. + If no islands survived the cleaning the array is all False. + + """ + if np.count_nonzero(islands_labels) == 0: + return np.zeros(islands_labels.shape, dtype="bool") + return islands_labels == np.argmax(np.bincount(islands_labels[islands_labels > 0])) + + +def morphology_parameters(geom, image_mask) -> MorphologyContainer: + """ + Compute image morphology parameters + + Parameters + ---------- + geom: ctapipe.instrument.camera.CameraGeometry + camera description + image_mask: np.ndarray(bool) + image of pixels surviving cleaning (True=survives) + + Returns + ------- + MorphologyContainer: parameters related to the morphology + """ + + num_islands, island_labels = number_of_islands(geom=geom, mask=image_mask) + + n_small, n_medium, n_large = number_of_island_sizes(island_labels) + + return MorphologyContainer( + num_pixels=np.count_nonzero(image_mask), + num_islands=num_islands, + num_small_islands=n_small, + num_medium_islands=n_medium, + num_large_islands=n_large, + ) diff --git a/ctapipe/image/statistics.py b/ctapipe/image/statistics.py new file mode 100644 index 00000000000..59a080eec50 --- /dev/null +++ b/ctapipe/image/statistics.py @@ -0,0 +1,15 @@ +from scipy.stats import skew, kurtosis + +from ..containers import StatisticsContainer + + +def descriptive_statistics(values, container_class=StatisticsContainer) -> StatisticsContainer: + """ compute intensity statistics of an image """ + return container_class( + max=values.max(), + min=values.min(), + mean=values.mean(), + std=values.std(), + skewness=skew(values), + kurtosis=kurtosis(values), + ) diff --git a/ctapipe/image/tests/test_cleaning.py b/ctapipe/image/tests/test_cleaning.py index 7c161e0b01e..400c72f2e67 100644 --- a/ctapipe/image/tests/test_cleaning.py +++ b/ctapipe/image/tests/test_cleaning.py @@ -211,78 +211,6 @@ def test_tailcuts_clean_with_isolated_pixels(): assert (result == mask).all() -def test_number_of_islands(): - # test with LST geometry (1855 pixels) - geom = CameraGeometry.from_name("LSTCam") - - # create 18 triggered pixels grouped to 5 clusters - island_mask_true = np.zeros(geom.n_pixels) - mask = np.zeros(geom.n_pixels).astype("bool") - triggered_pixels = np.array( - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 37, 38, 111, 222] - ) - island_mask_true[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]] = 1 - island_mask_true[14] = 2 - island_mask_true[[37, 38]] = 3 - island_mask_true[111] = 4 - island_mask_true[222] = 5 - mask[triggered_pixels] = 1 - - n_islands, island_mask = cleaning.number_of_islands(geom, mask) - n_islands_true = 5 - assert n_islands == n_islands_true - assert_allclose(island_mask, island_mask_true) - assert island_mask.dtype == np.int32 - - -def test_largest_island(): - """Test selection of largest island in imagea with given cleaning masks.""" - - # Create a simple rectangular camera made of 17 pixels - camera = CameraGeometry.make_rectangular(17, 1) - - # Assume a simple image (flattened array) made of 0, 1 or 2 photoelectrons - # [2, 1, 1, 1, 1, 2, 2, 1, 0, 2, 1, 1, 1, 0, 2, 2, 2] - # Assume a virtual tailcut cleaning that requires: - # - picture_threshold = 2 photoelectrons, - # - boundary_threshold = 1 photoelectron, - # - min_number_picture_neighbors = 0 - # There will be 4 islands left after cleaning: - clean_mask = np.zeros(camera.n_pixels).astype("bool") # initialization - clean_mask[[0, 1]] = 1 - clean_mask[[4, 5, 6, 7]] = 2 # this is the biggest - clean_mask[[9, 10]] = 3 - clean_mask[[14, 15, 16]] = 4 - # Label islands (their number is not important here) - _, islands_labels = cleaning.number_of_islands(camera, clean_mask) - # Create the true mask which takes into account only the biggest island - # Pixels with no signal are labelled with a 0 - true_mask_largest = np.zeros(camera.n_pixels).astype("bool") - true_mask_largest[[4, 5, 6, 7]] = 1 - # Apply the function to test - mask_largest = cleaning.largest_island(islands_labels) - - # Now the degenerate case of only one island surviving - # Same process as before - clean_mask_one = np.zeros(camera.n_pixels).astype("bool") - clean_mask_one[[0, 1]] = 1 - _, islands_labels_one = cleaning.number_of_islands(camera, clean_mask_one) - true_mask_largest_one = np.zeros(camera.n_pixels).astype("bool") - true_mask_largest_one[[0, 1]] = 1 - mask_largest_one = cleaning.largest_island(islands_labels_one) - - # Last the case of no islands surviving - clean_mask_0 = np.zeros(camera.n_pixels).astype("bool") - _, islands_labels_0 = cleaning.number_of_islands(camera, clean_mask_0) - true_mask_largest_0 = np.zeros(camera.n_pixels).astype("bool") - mask_largest_0 = cleaning.largest_island(islands_labels_0) - - # Check if the function recovers the ground truth in all of the three cases - assert (mask_largest_one == true_mask_largest_one).all() - assert (mask_largest_0 == true_mask_largest_0).all() - assert_allclose(mask_largest, true_mask_largest) - - def test_fact_image_cleaning(): # use LST pixel geometry geom = CameraGeometry.from_name("LSTCam") diff --git a/ctapipe/image/tests/test_morphology.py b/ctapipe/image/tests/test_morphology.py new file mode 100644 index 00000000000..870755d3cad --- /dev/null +++ b/ctapipe/image/tests/test_morphology.py @@ -0,0 +1,100 @@ +import numpy as np +from numpy.testing import assert_allclose +from ctapipe.instrument import CameraGeometry + + +def test_number_of_islands(): + from ctapipe.image import number_of_islands + # test with LST geometry (1855 pixels) + geom = CameraGeometry.from_name("LSTCam") + + # create 18 triggered pixels grouped to 5 clusters + island_mask_true = np.zeros(geom.n_pixels) + mask = np.zeros(geom.n_pixels).astype("bool") + triggered_pixels = np.array( + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 37, 38, 111, 222] + ) + island_mask_true[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]] = 1 + island_mask_true[14] = 2 + island_mask_true[[37, 38]] = 3 + island_mask_true[111] = 4 + island_mask_true[222] = 5 + mask[triggered_pixels] = 1 + + n_islands, island_mask = number_of_islands(geom, mask) + n_islands_true = 5 + assert n_islands == n_islands_true + assert_allclose(island_mask, island_mask_true) + assert island_mask.dtype == np.int32 + + +def test_number_of_island_sizes(): + from ctapipe.image import number_of_island_sizes + + island_labels = np.array( + 100 * [0] + + 2 * [1] + + 2 * [2] + + 3 * [3] + + 49 * [4] + + 51 * [5] + + 3 * [6] + + 100 * [7] + + [8] + + 2 * [9] + + [12] + ) + + n_small, n_medium, n_large = number_of_island_sizes(island_labels) + assert n_small == 5 + assert n_medium == 3 + assert n_large == 2 + + +def test_largest_island(): + """Test selection of largest island in imagea with given cleaning masks.""" + from ctapipe.image import number_of_islands, largest_island + + # Create a simple rectangular camera made of 17 pixels + camera = CameraGeometry.make_rectangular(17, 1) + + # Assume a simple image (flattened array) made of 0, 1 or 2 photoelectrons + # [2, 1, 1, 1, 1, 2, 2, 1, 0, 2, 1, 1, 1, 0, 2, 2, 2] + # Assume a virtual tailcut cleaning that requires: + # - picture_threshold = 2 photoelectrons, + # - boundary_threshold = 1 photoelectron, + # - min_number_picture_neighbors = 0 + # There will be 4 islands left after cleaning: + clean_mask = np.zeros(camera.n_pixels).astype("bool") # initialization + clean_mask[[0, 1]] = 1 + clean_mask[[4, 5, 6, 7]] = 2 # this is the biggest + clean_mask[[9, 10]] = 3 + clean_mask[[14, 15, 16]] = 4 + # Label islands (their number is not important here) + _, islands_labels = number_of_islands(camera, clean_mask) + # Create the true mask which takes into account only the biggest island + # Pixels with no signal are labelled with a 0 + true_mask_largest = np.zeros(camera.n_pixels).astype("bool") + true_mask_largest[[4, 5, 6, 7]] = 1 + # Apply the function to test + mask_largest = largest_island(islands_labels) + + # Now the degenerate case of only one island surviving + # Same process as before + clean_mask_one = np.zeros(camera.n_pixels).astype("bool") + clean_mask_one[[0, 1]] = 1 + _, islands_labels_one = number_of_islands(camera, clean_mask_one) + true_mask_largest_one = np.zeros(camera.n_pixels).astype("bool") + true_mask_largest_one[[0, 1]] = 1 + mask_largest_one = largest_island(islands_labels_one) + + # Last the case of no islands surviving + clean_mask_0 = np.zeros(camera.n_pixels).astype("bool") + _, islands_labels_0 = number_of_islands(camera, clean_mask_0) + true_mask_largest_0 = np.zeros(camera.n_pixels).astype("bool") + mask_largest_0 = largest_island(islands_labels_0) + + # Check if the function recovers the ground truth in all of the three cases + assert (mask_largest_one == true_mask_largest_one).all() + assert (mask_largest_0 == true_mask_largest_0).all() + assert_allclose(mask_largest, true_mask_largest) diff --git a/ctapipe/image/tests/test_timing_parameters.py b/ctapipe/image/tests/test_timing_parameters.py index 3154331c4bb..d4d22b4eb52 100644 --- a/ctapipe/image/tests/test_timing_parameters.py +++ b/ctapipe/image/tests/test_timing_parameters.py @@ -1,4 +1,3 @@ -from ctapipe.image.timing_parameters import timing_parameters import numpy as np import astropy.units as u from numpy.testing import assert_allclose @@ -7,6 +6,7 @@ def test_psi_0(): + from ctapipe.image import timing_parameters """ Simple test that gradient fitting gives expected answers for perfect gradient @@ -37,6 +37,7 @@ def test_psi_0(): def test_psi_20(): + from ctapipe.image import timing_parameters # Then try a different rotation angle grad = 2 @@ -67,6 +68,7 @@ def test_psi_20(): def test_ignore_negative(): + from ctapipe.image import timing_parameters grad = 2.0 intercept = 1.0 deviation = 0.1 diff --git a/ctapipe/image/timing_parameters.py b/ctapipe/image/timing.py similarity index 100% rename from ctapipe/image/timing_parameters.py rename to ctapipe/image/timing.py diff --git a/ctapipe/instrument/subarray.py b/ctapipe/instrument/subarray.py index 1a2ad57df92..808a7455879 100644 --- a/ctapipe/instrument/subarray.py +++ b/ctapipe/instrument/subarray.py @@ -10,6 +10,7 @@ from astropy import units as u from astropy.coordinates import SkyCoord from astropy.table import Table +from astropy.utils import lazyproperty import ctapipe @@ -99,7 +100,7 @@ def info(self, printer=print): ) ) - @property + @lazyproperty def tel_coords(self): """ returns telescope positions as astropy.coordinates.SkyCoord""" @@ -109,18 +110,18 @@ def tel_coords(self): return SkyCoord(x=pos_x, y=pos_y, z=pos_z, frame=GroundFrame()) - @property + @lazyproperty def tel_ids(self): """ telescope IDs as an array""" return np.array(list(self.tel.keys())) - @property + @lazyproperty def tel_indices(self): """ returns dict mapping tel_id to tel_index, useful for unpacking lists based on tel_ids into fixed-length arrays""" return {tel_id: ii for ii, tel_id in enumerate(self.tels.keys())} - @property + @lazyproperty def tel_index_array(self): """ returns an expanded array that maps tel_id to tel_index. I.e. for a given @@ -147,9 +148,29 @@ def tel_ids_to_indices(self, tel_ids): np.array: array of corresponding tel indices """ - tel_ids = np.asanyarray(tel_ids).ravel() - index_map = self.tel_index_array - return index_map[tel_ids] + tel_ids = np.array(tel_ids, dtype=int, copy=False).ravel() + return self.tel_index_array[tel_ids] + + def tel_ids_to_mask(self, tel_ids): + '''Convert a list of telescope ids to a boolean mask + of length ``num_tels`` where the **index** of the telescope + is set to ``True`` for each tel_id in tel_ids + + Parameters + ---------- + tel_ids : int or List[int] + array of tel IDs + + Returns + ------- + np.array[dtype=bool]: + Boolean array of length ``num_tels`` with indices of the + telescopes in ``tel_ids`` set to True. + ''' + mask = np.zeros(self.num_tels, dtype=bool) + indices = self.tel_ids_to_indices(tel_ids) + mask[indices] = True + return mask @property def footprint(self): diff --git a/ctapipe/instrument/tests/test_subarray.py b/ctapipe/instrument/tests/test_subarray.py index 49c9d26cde8..06e3b16d3b1 100644 --- a/ctapipe/instrument/tests/test_subarray.py +++ b/ctapipe/instrument/tests/test_subarray.py @@ -79,6 +79,20 @@ def test_tel_indexing(example_subarray): assert np.all(sub.tel_ids_to_indices([1, 2, 3]) == np.array([0, 1, 2])) +def test_tel_ids_to_mask(example_subarray): + lst = TelescopeDescription.from_name('LST', 'LSTCam') + subarray = SubarrayDescription( + 'someone_counted_in_binary', + tel_positions={1: [0, 0, 0] * u.m, 10: [50, 0, 0] * u.m}, + tel_descriptions={1: lst, 10: lst} + ) + + assert np.all(subarray.tel_ids_to_mask([]) == [False, False]) + assert np.all(subarray.tel_ids_to_mask([1, ]) == [True, False]) + assert np.all(subarray.tel_ids_to_mask([10]) == [False, True]) + assert np.all(subarray.tel_ids_to_mask([1, 10]) == [True, True]) + + def test_get_tel_ids_for_type(example_subarray): """ check that we can get a list of telescope ids by a telescope type, which can diff --git a/ctapipe/io/hdf5tableio.py b/ctapipe/io/hdf5tableio.py index e817cff327e..7d314104c38 100644 --- a/ctapipe/io/hdf5tableio.py +++ b/ctapipe/io/hdf5tableio.py @@ -214,6 +214,7 @@ def transform(enum_value): self.log.warning( f'Column {col_name} of' f' container {container.__class__.__name__}' + f' in table {table_name}' ' not writable, skipping' ) continue diff --git a/ctapipe/io/metadata.py b/ctapipe/io/metadata.py index 2d815c69e1a..fddf174c553 100644 --- a/ctapipe/io/metadata.py +++ b/ctapipe/io/metadata.py @@ -105,7 +105,7 @@ def from_provenance(cls, activity: _ActivityProvenance): name=activity["activity_name"], type_="software", id_=activity["activity_uuid"], - start=activity["start"]["time_utc"], + start_time=activity["start"]["time_utc"], software_name="ctapipe", software_version=activity["system"]["ctapipe_version"], ) diff --git a/ctapipe/io/tableio.py b/ctapipe/io/tableio.py index 8cc3dcb7f50..f6b55913694 100644 --- a/ctapipe/io/tableio.py +++ b/ctapipe/io/tableio.py @@ -42,6 +42,7 @@ def exclude(self, table_name, pattern): pattern: str regular expression string to match column name """ + table_name = table_name.lstrip('/') self._exclusions[table_name].append(re.compile(pattern)) def _is_column_excluded(self, table_name, col_name): diff --git a/ctapipe/tools/stage1.py b/ctapipe/tools/stage1.py new file mode 100644 index 00000000000..486102639c7 --- /dev/null +++ b/ctapipe/tools/stage1.py @@ -0,0 +1,750 @@ +""" +Generate DL1 (a or b) output files in HDF5 format from {R0,R1,DL0} inputs. + +# TODO: add event time per telescope! +""" +import pathlib +import sys + +import numpy as np +import tables +import tables.filters +from astropy import units as u +from tqdm.autonotebook import tqdm +from collections import defaultdict + +from ctapipe.io import metadata as meta +from ..calib.camera import CameraCalibrator, GainSelector +from ..containers import ( + ImageParametersContainer, + TelEventIndexContainer, + SimulatedShowerDistribution, + IntensityStatisticsContainer, + PeakTimeStatisticsContainer, + MCDL1CameraContainer, + TimingParametersContainer, +) +from ..core import Provenance +from ..core import QualityQuery, Container, Field, Tool, ToolConfigurationError +from ..core.traits import ( + Bool, + CaselessStrEnum, + Int, + List, + Path, + create_class_enum_trait, + classes_with_traits, +) +from ..image import ImageCleaner +from ..image import ( + hillas_parameters, + descriptive_statistics, + concentration as concentration_parameters, + timing_parameters, + leakage as leakage_parameters, + morphology_parameters, +) +from ..image.extractor import ImageExtractor +from ..io import EventSource, HDF5TableWriter, SimTelEventSource + +tables.parameters.NODE_CACHE_SLOTS = 3000 # fixes problem with too many datasets + +PROV = Provenance() + +# define the version of the DL1 data model written here. This should be updated +# when necessary: +# - increase the major number if there is a breaking change to the model +# (meaning readers need to update scripts) +# - increase the minor number if new columns or datasets are added +# - increase the patch number if there is a small bugfix to the model. +DL1_DATA_MODEL_VERSION = "v1.0.0" + + +def write_reference_metadata_headers(obs_id, subarray, writer): + """ + Attaches Core Provenence headers to an output HDF5 file. + Right now this is hard-coded for use with the ctapipe-stage1-process tool + + Parameters + ---------- + output_path: pathlib.Path + output HDF5 file + obs_id: int + observation ID + subarray: + SubarrayDescription to get metadata from + writer: HDF5TableWriter + output + """ + activity = PROV.current_activity.provenance + + reference = meta.Reference( + contact=meta.Contact(name="", email="", organization="CTA Consortium",), + product=meta.Product( + description="DL1 Data Product", + data_category="S", + data_level="DL1", + data_association="Subarray", + data_model_name="ASWG DL1", + data_model_version=DL1_DATA_MODEL_VERSION, + data_model_url="", + format="hdf5", + ), + process=meta.Process(type_="Simulation", subtype="", id_=int(obs_id)), + activity=meta.Activity.from_provenance(activity), + instrument=meta.Instrument( + site="Other", # need a way to detect site... + class_="Subarray", + type_="unknown", + version="unknown", + id_=subarray.name, + ), + ) + + # convert all values to strings, since hdf5 can't handle Times, etc.: + # TODO: add activity_stop_time? + headers = {k: str(v) for k, v in reference.to_dict().items()} + meta.write_to_hdf5(headers, writer._h5file) + + +class ImageQualityQuery(QualityQuery): + """ for configuring image-wise data checks """ + + quality_criteria = List( + default_value=[ + ("size_greater_0", "lambda image_selected: image_selected.sum() > 0"), + ], + help=QualityQuery.quality_criteria.help, + ).tag(config=True) + + +class Stage1ProcessorTool(Tool): + name = "ctapipe-stage1-process" + description = __doc__ + f" This currently writes {DL1_DATA_MODEL_VERSION} DL1 data" + examples = """ + To process data with all default values: + > ctapipe-stage1-process --input events.simtel.gz --output events.dl1.h5 --progress + + Or use an external configuration file, where you can specify all options: + > ctapipe-stage1-process --config stage1_config.json --progress + + The config file should be in JSON or python format (see traitlets docs). For an + example, see ctapipe/examples/stage1_config.json in the main code repo. + """ + + output_path = Path( + help="DL1 output filename", default_value=pathlib.Path("events.dl1.h5") + ).tag(config=True) + + write_images = Bool( + help="Store DL1/Event/Image data in output", default_value=False + ).tag(config=True) + + write_parameters = Bool( + help="Compute and store image parameters", default_value=True + ).tag(config=True) + + compression_level = Int( + help="compression level, 0=None, 9=maximum", default_value=5, min=0, max=9 + ).tag(config=True) + + split_datasets_by = CaselessStrEnum( + values=["tel_id", "tel_type"], + default_value="tel_id", + help="Splitting level for the parameters and images datasets", + ).tag(config=True) + + compression_type = CaselessStrEnum( + values=["blosc:zstd", "zlib"], + help="compressor algorithm to use. ", + default_value="blosc:zstd", + ).tag(config=True) + + image_extractor_type = create_class_enum_trait( + base_class=ImageExtractor, + default_value="NeighborPeakWindowSum", + help="Method to use to turn a waveform into a single charge value", + ).tag(config=True) + + gain_selector_type = create_class_enum_trait( + base_class=GainSelector, default_value="ThresholdGainSelector" + ).tag(config=True) + + image_cleaner_type = create_class_enum_trait( + base_class=ImageCleaner, default_value="TailcutsImageCleaner" + ) + + write_index_tables = Bool( + help=( + "Generate PyTables index datasets for all tables that contain an " + "event_id or tel_id. These speed up in-kernal pytables operations," + "but add some overhead to the file. They can also be generated " + "and attached after the file is written " + ), + default_value=False, + ).tag(config=True) + + overwrite = Bool(help="overwrite output file if it exists").tag(config=True) + progress_bar = Bool(help="show progress bar during processing").tag(config=True) + + aliases = { + "input": "EventSource.input_url", + "output": "Stage1ProcessorTool.output_path", + "allowed-tels": "EventSource.allowed_tels", + "max-events": "EventSource.max_events", + "image-extractor-type": "Stage1ProcessorTool.image_extractor_type", + "gain-selector-type": "Stage1ProcessorTool.gain_selector_type", + "image-cleaner-type": "Stage1ProcessorTool.image_cleaner_type", + } + + flags = { + "write-images": ( + {"Stage1ProcessorTool": {"write_images": True}}, + "store DL1/Event/Telescope images in output", + ), + "write-parameters": ( + {"Stage1ProcessorTool": {"write_parameters": True}}, + "store DL1/Event/Telescope parameters in output", + ), + "write-index-tables": ( + {"Stage1ProcessorTool": {"write_index_tables": True}}, + "generate PyTables index tables for the parameter and image datasets", + ), + "overwrite": ( + {"Stage1ProcessorTool": {"overwrite": True}}, + "Overwrite output file if it exists", + ), + "progress": ( + {"Stage1ProcessorTool": {"progress_bar": True}}, + "show a progress bar during event processing", + ), + } + + classes = List( + [CameraCalibrator, ImageQualityQuery, EventSource] + + classes_with_traits(ImageCleaner) + + classes_with_traits(ImageExtractor) + + classes_with_traits(GainSelector) + ) + + def setup(self): + # prepare output path: + self.output_path = self.output_path.expanduser() + if self.output_path.exists(): + if self.overwrite: + self.log.warning(f"Overwriting {self.output_path}") + self.output_path.unlink() + else: + self.log.critical( + f"Output file {self.output_path} exists" + ", use `--overwrite` to overwrite " + ) + sys.exit(1) + + PROV.add_output_file(str(self.output_path), role="DL1/Event") + + # check that options make sense: + if self.write_parameters is False and self.write_images is False: + raise ToolConfigurationError( + "The options 'write_parameters' and 'write_images' are " + "both set to False. No output will be generated in that case. " + "Please enable one or both of these options." + ) + + # setup components: + + self.gain_selector = self.add_component( + GainSelector.from_name(self.gain_selector_type, parent=self) + ) + self.event_source = self.add_component( + EventSource.from_config(parent=self, gain_selector=self.gain_selector) + ) + self.image_extractor = self.add_component( + ImageExtractor.from_name( + self.image_extractor_type, + parent=self, + subarray=self.event_source.subarray, + ) + ) + self.calibrate = self.add_component( + CameraCalibrator( + parent=self, + subarray=self.event_source.subarray, + image_extractor=self.image_extractor, + ) + ) + self.clean = self.add_component( + ImageCleaner.from_name( + self.image_cleaner_type, + parent=self, + subarray=self.event_source.subarray, + ) + ) + self.check_image = self.add_component(ImageQualityQuery(parent=self)) + + # check component setup + if self.event_source.max_events and self.event_source.max_events > 0: + self.log.warning( + "No Simulated shower distributions will be written because " + "EventSource.max_events is set to a non-zero number (and therefore " + "shower distributions read from the input MC file are invalid)." + ) + + # setup HDF5 compression: + self._hdf5_filters = tables.Filters( + complevel=self.compression_level, + complib=self.compression_type, + fletcher32=True, # attach a checksum to each chunk for error correction + ) + + # store last pointing to only write unique poitings + self._last_pointing_tel = defaultdict(lambda : (np.nan * u.deg, np.nan * u.deg)) + + def _write_simulation_configuration(self, writer): + """ + Write the simulation headers to a single row of a table. Later + if this file is merged with others, that table will grow. + + Note that this function should be run first + """ + self.log.debug("Writing simulation configuration") + + class ExtraMCInfo(Container): + container_prefix = "" + obs_id = Field(0, "MC Run Identifier") + + extramc = ExtraMCInfo() + extramc.obs_id = self.event_source.obs_id + self.event_source.mc_header.prefix = "" + writer.write( + "configuration/simulation/run", [extramc, self.event_source.mc_header] + ) + + def _write_simulation_histograms(self, writer: HDF5TableWriter): + """ Write the distribution of thrown showers + + Notes + ----- + - this only runs if this is a simulation file. The current implementation is + a bit of a hack and implies we should improve SimTelEventSource to read this + info. + - Currently the histograms are at the end of the simtel file, so if max_events + is set to non-zero, the end of the file may not be read, and this no + histograms will be found. + """ + self.log.debug("Writing simulation histograms") + + if not isinstance(self.event_source, SimTelEventSource): + return + + def fill_from_simtel( + obs_id, eventio_hist, container: SimulatedShowerDistribution + ): + """ fill from a SimTel Histogram entry""" + container.obs_id = obs_id + container.hist_id = eventio_hist["id"] + container.num_entries = eventio_hist["entries"] + xbins = np.linspace( + eventio_hist["lower_x"], + eventio_hist["upper_x"], + eventio_hist["n_bins_x"] + 1, + ) + ybins = np.linspace( + eventio_hist["lower_y"], + eventio_hist["upper_y"], + eventio_hist["n_bins_y"] + 1, + ) + + container.bins_core_dist = xbins * u.m + container.bins_energy = 10 ** ybins * u.TeV + container.histogram = eventio_hist["data"] + container.meta["hist_title"] = eventio_hist["title"] + container.meta["x_label"] = "Log10 E (TeV)" + container.meta["y_label"] = "3D Core Distance (m)" + + hists = self.event_source.file_.histograms + if hists is not None: + hist_container = SimulatedShowerDistribution() + hist_container.prefix = "" + for hist in hists: + if hist["id"] == 6: + fill_from_simtel(self.event_source.obs_id, hist, hist_container) + writer.write( + table_name="simulation/service/shower_distribution", + containers=hist_container, + ) + + def _write_instrument_configuration(self, subarray): + """write the SubarrayDescription + + Parameters + ---------- + subarray : ctapipe.instrument.SubarrayDescription + subarray description + """ + self.log.debug("Writing instrument configuration") + serialize_meta = True + + subarray.to_table().write( + self.output_path, + path="/configuration/instrument/subarray/layout", + serialize_meta=serialize_meta, + append=True, + ) + subarray.to_table(kind="optics").write( + self.output_path, + path="/configuration/instrument/telescope/optics", + append=True, + serialize_meta=serialize_meta, + ) + for telescope_type in subarray.telescope_types: + ids = set(subarray.get_tel_ids_for_type(telescope_type)) + if len(ids) > 0: # only write if there is a telescope with this camera + tel_id = list(ids)[0] + camera = subarray.tel[tel_id].camera + camera.geometry.to_table().write( + self.output_path, + path=f"/configuration/instrument/telescope/camera/geometry_{camera}", + append=True, + serialize_meta=serialize_meta, + ) + camera.readout.to_table().write( + self.output_path, + path=f"/configuration/instrument/telescope/camera/readout_{camera}", + append=True, + serialize_meta=serialize_meta, + ) + + def _write_processing_statistics(self): + """ write out the event selection stats, etc. """ + image_stats = self.check_image.to_table(functions=True) + image_stats.write( + self.output_path, + path="/dl1/service/image_statistics", + append=True, + serialize_meta=True, + ) + + def _parameterize_image(self, tel_id, image, signal_pixels, peak_time=None): + """Apply image cleaning and calculate image features + + Parameters + ---------- + subarray : SubarrayDescription + subarray description + data : DL1CameraContainer + calibrated camera data + tel_id: int + which telescope is being cleaned + + Returns + ------- + np.ndarray, ImageParametersContainer: + cleaning mask, parameters + """ + + tel = self.event_source.subarray.tel[tel_id] + geometry = tel.camera.geometry + image_selected = image[signal_pixels] + + # check if image can be parameterized: + image_criteria = self.check_image(image_selected) + self.log.debug( + "image_criteria: %s", + list(zip(self.check_image.criteria_names[1:], image_criteria)), + ) + + # parameterize the event if all criteria pass: + if all(image_criteria): + geom_selected = geometry[signal_pixels] + + hillas = hillas_parameters(geom=geom_selected, image=image_selected,) + leakage = leakage_parameters( + geom=geometry, image=image, cleaning_mask=signal_pixels + ) + concentration = concentration_parameters( + geom=geom_selected, image=image_selected, hillas_parameters=hillas, + ) + morphology = morphology_parameters(geom=geometry, image_mask=signal_pixels) + intensity_statistics = descriptive_statistics( + image_selected, container_class=IntensityStatisticsContainer + ) + + if peak_time is not None: + timing = timing_parameters( + geom=geom_selected, + image=image_selected, + peak_time=peak_time[signal_pixels], + hillas_parameters=hillas, + ) + peak_time_statistics = descriptive_statistics( + peak_time[signal_pixels], + container_class=PeakTimeStatisticsContainer, + ) + else: + timing = TimingParametersContainer() + peak_time_statistics = PeakTimeStatisticsContainer() + + return ImageParametersContainer( + hillas=hillas, + timing=timing, + leakage=leakage, + morphology=morphology, + concentration=concentration, + intensity_statistics=intensity_statistics, + peak_time_statistics=peak_time_statistics, + ) + + # return the default container (containing nan values) for no + # parameterization + return ImageParametersContainer() + + def _process_events(self, writer): + self.log.debug("Writing DL1/Event data") + self.event_source.subarray.info(printer=self.log.debug) + + # initial value for last known pointing position + last_pointing = (np.nan * u.deg, np.nan * u.deg) + + for event in tqdm( + self.event_source, + desc=self.event_source.__class__.__name__, + total=self.event_source.max_events, + unit="ev", + disable=not self.progress_bar, + ): + + self.log.log(9, "Writing event_id=%s", event.index.event_id) + + self.calibrate(event) + + event.trigger.prefix = "" + + p = event.pointing + current_pointing = (p.array_azimuth, p.array_altitude) + if current_pointing != last_pointing: + p.prefix = '' + writer.write('dl1/monitoring/subarray/pointing', [event.trigger, p]) + last_pointing = current_pointing + + # write the subarray tables + writer.write( + table_name="simulation/event/subarray/shower", + containers=[event.index, event.mc], + ) + writer.write( + table_name="dl1/event/subarray/trigger", + containers=[event.index, event.trigger], + ) + # write the telescope tables + self._write_telescope_event(writer, event) + + def _write_telescope_event(self, writer, event): + """ + add entries to the event/telescope tables for each telescope in a single + event + """ + + # write the telescope tables + for tel_id, dl1_camera in event.dl1.tel.items(): + dl1_camera.prefix = "" # don't want a prefix for this container + telescope = self.event_source.subarray.tel[tel_id] + tel_type = str(telescope) + + tel_index = TelEventIndexContainer( + obs_id=event.index.obs_id, + event_id=event.index.event_id, + tel_id=np.int16(tel_id), + ) + + p = event.pointing.tel[tel_id] + current_pointing = (p.azimuth, p.altitude) + if current_pointing != self._last_pointing_tel[tel_id]: + p.prefix = '' + writer.write( + f'dl1/monitoring/telescope/pointing/tel_{tel_id:03d}', + [event.trigger.tel[tel_id], p] + ) + self._last_pointing_tel[tel_id] = current_pointing + + table_name = ( + f"tel_{tel_id:03d}" if self.split_datasets_by == "tel_id" else tel_type + ) + + writer.write( + 'dl1/event/telescope/trigger', + [tel_index, event.trigger.tel[tel_id]] + ) + + if self.event_source.is_simulation: + true_image = event.mc.tel[tel_id].true_image + has_true_image = ( + true_image is not None and np.count_nonzero(true_image) > 0 + ) + + if has_true_image: + mcdl1 = MCDL1CameraContainer( + true_image=true_image, true_parameters=None + ) + mcdl1.prefix = "" + else: + has_true_image = False + + if self.write_parameters: + # apply cleaning + dl1_camera.image_mask = self.clean( + tel_id=tel_id, + image=dl1_camera.image, + arrival_times=dl1_camera.peak_time, + ) + + params = self._parameterize_image( + tel_id=tel_id, + image=dl1_camera.image, + signal_pixels=dl1_camera.image_mask, + peak_time=dl1_camera.peak_time, + ) + + self.log.debug("params: %s", params.as_dict(recursive=True)) + writer.write( + table_name=f"dl1/event/telescope/parameters/{table_name}", + containers=[tel_index, *params.values()], + ) + + if self.event_source.is_simulation and has_true_image: + mcdl1.true_parameters = self._parameterize_image( + tel_id, + image=true_image, + signal_pixels=true_image > 0, + peak_time=None, # true image from mc has no peak time + ) + writer.write( + f"simulation/event/telescope/parameters/{table_name}", + [tel_index, *mcdl1.true_parameters.values()], + ) + + if self.write_images: + # note that we always write the image, even if the image quality + # criteria are not met (those are only to determine if the parameters + # can be computed). + writer.write( + table_name=f"dl1/event/telescope/images/{table_name}", + containers=[tel_index, dl1_camera], + ) + + if has_true_image: + writer.write( + f"simulation/event/telescope/images/{table_name}", + [tel_index, mcdl1], + ) + + def _generate_table_indices(self, h5file, start_node): + + for node in h5file.iter_nodes(start_node): + if not isinstance(node, tables.group.Group): + self.log.debug(f"gen indices for: {node}") + if "event_id" in node.colnames: + node.cols.event_id.create_index() + self.log.debug("generated event_id index") + if "tel_id" in node.colnames: + node.cols.tel_id.create_index() + self.log.debug("generated tel_id index") + if "obs_id" in node.colnames: + self.log.debug("generated obs_id index") + node.cols.obs_id.create_index(kind="ultralight") + else: + # recurse + self._generate_table_indices(h5file, node) + + def _generate_indices(self, writer): + + if self.write_images: + self._generate_table_indices(writer._h5file, "/dl1/event/telescope/images") + self._generate_table_indices(writer._h5file, "/dl1/event/subarray") + + def _setup_writer(self, writer): + writer.add_column_transform( + table_name="dl1/event/subarray/trigger", + col_name="tels_with_trigger", + transform=self.event_source.subarray.tel_ids_to_mask, + ) + + # exclude some columns that are not writable + writer.exclude("dl1/event/subarray/trigger", 'tel') + writer.exclude("dl1/monitoring/subarray/pointing", 'tel') + writer.exclude("dl1/monitoring/subarray/pointing", 'event_type') + for tel_id, telescope in self.event_source.subarray.tel.items(): + tel_type = str(telescope) + if self.split_datasets_by == "tel_id": + table_name = f"tel_{tel_id:03d}" + else: + table_name = tel_type + + if self.write_parameters is False: + writer.exclude( + f"/dl1/event/telescope/images/{table_name}", "image_mask" + ) + writer.exclude(f"/dl1/event/telescope/images/{table_name}", "parameters") + writer.exclude(f"/dl1/monitoring/event/pointing/tel_{tel_id:03d}", 'event_type') + if self.event_source.is_simulation: + writer.exclude( + f"/simulation/event/telescope/images/{table_name}", + "true_parameters", + ) + # no timing information yet for true images + writer.exclude( + f"/simulation/event/telescope/parameters/{table_name}", + r"peak_time_.*", + ) + writer.exclude( + f"/simulation/event/telescope/parameters/{table_name}", r"timing_.*" + ) + writer.exclude(f"/simulation/event/subarray/shower", "true_tel") + + def start(self): + + # FIXME: this uses astropy tables hdf5 io, internally using h5py, + # and must thus be done before the table writer opens the file or it might lead + # to "Resource temporary unavailable" if h5py and tables are not linked + # against the same libhdf (happens when using the pre-build pip wheels) + # should be replaced by writing the table using tables + self._write_instrument_configuration(self.event_source.subarray) + + with HDF5TableWriter( + self.output_path, + parent=self, + mode="a", + add_prefix=True, + filters=self._hdf5_filters, + ) as writer: + + if self.event_source.is_simulation: + self._write_simulation_configuration(writer) + + self._setup_writer(writer) + self._process_events(writer) + + if self.event_source.is_simulation: + self._write_simulation_histograms(writer) + + if self.write_index_tables: + self._generate_indices(writer) + + write_reference_metadata_headers( + subarray=self.event_source.subarray, + obs_id=self.event_source.obs_id, + writer=writer, + ) + self._write_processing_statistics() + + def finish(self): + pass + + +def main(): + tool = Stage1ProcessorTool() + tool.run() + + +if __name__ == "__main__": + main() diff --git a/ctapipe/tools/tests/test_tools.py b/ctapipe/tools/tests/test_tools.py index 7bb323813cf..83fb005d54e 100644 --- a/ctapipe/tools/tests/test_tools.py +++ b/ctapipe/tools/tests/test_tools.py @@ -8,10 +8,12 @@ import matplotlib as mpl -from ctapipe.utils import get_dataset_path -from ctapipe.core import run_tool import tempfile +import pandas as pd import tables + +from ctapipe.utils import get_dataset_path +from ctapipe.core import run_tool import numpy as np @@ -19,6 +21,67 @@ LST_MUONS = get_dataset_path('lst_muons.simtel.zst') +def test_stage_1(): + from ctapipe.tools.stage1 import Stage1ProcessorTool + + with tempfile.NamedTemporaryFile(suffix='.hdf5') as f: + assert run_tool( + Stage1ProcessorTool(), + argv=[ + '--config=./examples/stage1_config.json', + f"--input={GAMMA_TEST_LARGE}", + f'--output={f.name}', + '--write-parameters', + '--overwrite', + ] + ) == 0 + + # check tables were written + with tables.open_file(f.name, mode='r') as tf: + assert tf.root.dl1 + assert tf.root.dl1.event.telescope + assert tf.root.dl1.event.subarray + assert tf.root.configuration.instrument.subarray.layout + assert tf.root.configuration.instrument.telescope.optics + assert tf.root.configuration.instrument.telescope.camera.geometry_LSTCam + assert tf.root.configuration.instrument.telescope.camera.readout_LSTCam + + # check we can read telescope parametrs + dl1_features = pd.read_hdf(f.name, '/dl1/event/telescope/parameters/tel_001') + features = ( + 'obs_id', 'event_id', 'tel_id', + 'hillas_intensity', 'concentration_cog', 'leakage_pixels_width_1' + ) + for feature in features: + assert feature in dl1_features.columns + + with tempfile.NamedTemporaryFile(suffix='.hdf5') as f: + assert run_tool( + Stage1ProcessorTool(), + argv=[ + '--config=./examples/stage1_config.json', + f"--input={GAMMA_TEST_LARGE}", + f'--output={f.name}', + '--write-images', + '--overwrite', + ] + ) == 0 + + with tables.open_file(f.name, mode='r') as tf: + assert tf.root.dl1 + assert tf.root.dl1.event.telescope + assert tf.root.dl1.event.subarray + assert tf.root.configuration.instrument.subarray.layout + assert tf.root.configuration.instrument.telescope.optics + assert tf.root.configuration.instrument.telescope.camera.geometry_LSTCam + assert tf.root.configuration.instrument.telescope.camera.readout_LSTCam + assert tf.root.dl1.event.telescope.images.tel_001 + dl1_image = tf.root.dl1.event.telescope.images.tel_001 + assert 'image_mask' in dl1_image.dtype.names + assert 'image' in dl1_image.dtype.names + assert 'peak_time' in dl1_image.dtype.names + + def test_muon_reconstruction(tmpdir): from ctapipe.tools.muon_reconstruction import MuonAnalysis diff --git a/ctapipe/visualization/tests/test_mpl.py b/ctapipe/visualization/tests/test_mpl.py index 3b964db8acb..4577f6fcc57 100644 --- a/ctapipe/visualization/tests/test_mpl.py +++ b/ctapipe/visualization/tests/test_mpl.py @@ -59,7 +59,7 @@ def test_camera_display_multiple(): def test_array_display(): """ check that we can do basic array display functionality """ from ctapipe.visualization.mpl_array import ArrayDisplay - from ctapipe.image.timing_parameters import timing_parameters + from ctapipe.image import timing_parameters # build a test subarray: tels = dict() diff --git a/docs/_static/theme_overrides.css b/docs/_static/theme_overrides.css new file mode 100644 index 00000000000..63ee6cc74ce --- /dev/null +++ b/docs/_static/theme_overrides.css @@ -0,0 +1,13 @@ +/* override table width restrictions */ +@media screen and (min-width: 767px) { + + .wy-table-responsive table td { + /* !important prevents the common CSS stylesheets from overriding + this as on RTD they are loaded after this stylesheet */ + white-space: normal !important; + } + + .wy-table-responsive { + overflow: visible !important; + } +} diff --git a/docs/conf.py b/docs/conf.py index 26e4e0cf706..8a4bd8943fd 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -127,6 +127,13 @@ # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". # html_static_path = ['_static'] +html_static_path = ['_static'] + +html_context = { + 'css_files': [ + '_static/theme_overrides.css', # override wide tables in RTD theme + ], + } # -- Options for HTMLHelp output ------------------------------------------ diff --git a/docs/data_models/dl1.rst b/docs/data_models/dl1.rst new file mode 100644 index 00000000000..4e397382572 --- /dev/null +++ b/docs/data_models/dl1.rst @@ -0,0 +1,105 @@ +.. _dl1_datamodel: + +.. currentmodule:: ctapipe.io.containers + +============== +DL1 Data Model +============== + +The DL1 files are HDF5 format files, with the following data set hierarchy. The tables +should be written with `pytables` (not `h5py`), ideally with the `ctapipe.io.HDF5TableWriter`, +which ensures the unit and other descriptive metadata are attached to the output. Containers marked +with a `+` should be written without their prefix (all others should use column prefixes). + +The following describes the contents of data level 1 (DL1) output files +generated by ctapipe (e.g. the `ctapipe-stage1-process` tool). + + +-------------------- +DL1/Event Data Model +-------------------- + +This describes data that change per-event. +The following datasets will be written to the group `/dl1/event/` in the output file: + +.. list-table:: + :widths: 25 50 25 + :header-rows: 1 + + * - Group/Dataset + - Description + - Contents + * - /subarray + - event-wise data pertaining to a subarray + - (group) + * - /subarray/trigger + - subarray trigger information + - `EventIndexContainer` +, `CentralTriggerContainer` + * - /subarray/mc_shower + - true shower parameters from Monte-Carlo simulation + - `EventIndexContainer` +, `MCEventContainer` + * - /telescope + - Per-telescope Per-event information + - (group) + * - /telescope/parameters/tel_{TEL_ID:03d} + - tables of image parameters (one per telescope) + - `TelEventIndexContainer` +, `HillasParametersContainer`, + `TimingParametersContainer`, `LeakageContainer`, `ConcentrationContainer`, + `MorphologyContainer`, `IntensityContainer` + * - /telescope/images/tel_{TEL_ID:03d} + - tables of telescope images (one per telescope) + - `TelEventIndexContainer` +, `DL1CameraContainer`, `ExtraImageContainer` + +------------------------ +Configuration Data Model +------------------------ + +The output file should also contain serializations of the instrument, observation (if +applicable), simulation (if applicable) configuration information, written to the +`/configuration` group: + +.. list-table:: + :widths: 25 50 25 + :header-rows: 1 + + * - Group/Dataset + - Description + - Contents + * - /instrument + - Serialized `SubarrayDescription` + - (group) + * - /instrument/subarray/layout + - Subarray layout info + - result of `SubarrayDescription.to_table()` output as HDF5 using `astropy.table` + functionality + * - /instrument/telescope/optics + - telescope optics information + - result of `SubarrayDescription.to_table(kind='optics')` output as HDF5 using + `astropy.table` functionality + * - /instrument/telescope/camera/{CAMERA_ID} + - camera geometry information + - result of `CameraGeometry.to_table()` output as HDF5 using `astropy.table` + functionality + * - /simulation + - Monte-Carlo simulation configuration information + - (group) + * - /simulation/run_config + - Monte-Carlo simulation run information + - `ExtraMCInfo` +, `MCHeaderContainer` + + * - /simulation/shower_distribution + - simulated shower distribution histograms + - `SimulatedShowerDistribution` + + +--------------- +Core Provenance +--------------- + +The root group of the file shall contain all of thethe "CTA Core Provenance Metadata" +headers as user attributes, with the hierarchy flattened and separated by spaces (e.g. +`"CTA ACTIVITY NAME" = "ctapipe-stage1-process"`) + + + + + diff --git a/docs/data_models/index.rst b/docs/data_models/index.rst new file mode 100644 index 00000000000..c28bd7475bf --- /dev/null +++ b/docs/data_models/index.rst @@ -0,0 +1,10 @@ +.. _datamodels: + +Data Models +=========== + +.. toctree:: + + dl1 + + diff --git a/docs/index.rst b/docs/index.rst index c8a4cbd9bb8..cab8c563a71 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -45,6 +45,7 @@ General documentation tutorials/index examples/index FAQ + data_models/index ctapipe_api/index bibliography changelog diff --git a/docs/tutorials/lst_analysis_bootcamp_2018.ipynb b/docs/tutorials/lst_analysis_bootcamp_2018.ipynb index 0276c77ea40..43d13c821cc 100644 --- a/docs/tutorials/lst_analysis_bootcamp_2018.ipynb +++ b/docs/tutorials/lst_analysis_bootcamp_2018.ipynb @@ -505,9 +505,9 @@ "outputs": [], "source": [ "from ctapipe.image import hillas_parameters, leakage, concentration\n", - "from ctapipe.image.timing_parameters import timing_parameters\n", - "from ctapipe.image.cleaning import number_of_islands\n", - "from ctapipe.image.hillas import camera_to_shower_coordinates" + "from ctapipe.image import timing_parameters\n", + "from ctapipe.image import number_of_islands\n", + "from ctapipe.image import camera_to_shower_coordinates" ] }, { @@ -653,9 +653,9 @@ "\n", "from ctapipe.calib import CameraCalibrator\n", "\n", - "from ctapipe.image.cleaning import tailcuts_clean, number_of_islands\n", + "from ctapipe.image import tailcuts_clean, number_of_islands\n", "from ctapipe.image import hillas_parameters, leakage, concentration\n", - "from ctapipe.image.timing_parameters import timing_parameters\n", + "from ctapipe.image import timing_parameters\n", "\n", "from ctapipe.reco import HillasReconstructor\n", "\n", diff --git a/environment.yml b/environment.yml index 8f3899cad68..b96cc3d8a9c 100644 --- a/environment.yml +++ b/environment.yml @@ -7,6 +7,7 @@ dependencies: - python=3.7 - pip - astropy + - h5py - bokeh=1 - conda-forge::nbsphinx - cython diff --git a/examples/plot_array_hillas.py b/examples/plot_array_hillas.py index 86f98a6f564..df34747f704 100644 --- a/examples/plot_array_hillas.py +++ b/examples/plot_array_hillas.py @@ -14,7 +14,7 @@ from ctapipe.image import ( hillas_parameters, tailcuts_clean, HillasParameterizationError ) -from ctapipe.image.timing_parameters import timing_parameters +from ctapipe.image import timing_parameters from ctapipe.io import event_source from ctapipe.utils import datasets from ctapipe.visualization import ArrayDisplay @@ -101,7 +101,6 @@ image = event.dl1.tel[tel_id].image time = event.dl1.tel[tel_id].peak_time - # Cleaning of the image cleaned_image = image.copy() diff --git a/examples/stage1_config.json b/examples/stage1_config.json new file mode 100644 index 00000000000..45cead2fe8a --- /dev/null +++ b/examples/stage1_config.json @@ -0,0 +1,29 @@ +{ + "Stage1ProcessorTool": { + "overwrite": false, + "write_images": true, + "image_extractor_type": "NeighborPeakWindowSum", + "image_cleaner_type": "TailcutsImageCleaner" + }, + "TailcutsImageCleaner": { + "picture_threshold_pe": [ + ["type", "*", 10.0], + ["type", "LST_LST_LSTCam", 6.0], + ["type", "MST_MST_NectarCam", 6.0] + ], + "boundary_threshold_pe": [ + ["type", "*", 5.0], + ["type", "LST*", 3.0], + ["type", "MST*", 4.0] + ], + "min_picture_neighbors":[ + ["type", "*", 2] + ] + }, + "ImageQualityQuery": { + "quality_criteria": [ + ["enough_pixels", "lambda im: np.count_nonzero(im) > 2"], + ["enough_charge", "lambda im: im.sum() > 50"] + ] + } +} diff --git a/setup.py b/setup.py index 9440878108b..54aefefc45b 100755 --- a/setup.py +++ b/setup.py @@ -29,6 +29,7 @@ 'ctapipe-reconstruct-muons = ctapipe.tools.muon_reconstruction:main', 'ctapipe-display-integration = ctapipe.tools.display_integrator:main', 'ctapipe-display-dl1 = ctapipe.tools.display_dl1:main', + 'ctapipe-stage1-process = ctapipe.tools.stage1:main', ] tests_require = [ 'pytest', @@ -62,6 +63,7 @@ 'tqdm>=4.32', 'traitlets>=4.1,<5.0', 'zstandard', + 'h5py', # needed for astropy hdf5 io ], # here are optional dependencies (as "tag" : "dependency spec") extras_require={