From 1281d0b2efd7a32d2e6b661ce241dccac354cab5 Mon Sep 17 00:00:00 2001 From: Michele Peresano Date: Mon, 4 Oct 2021 18:21:56 +0200 Subject: [PATCH 1/8] Add protopipe.pipeline.utils.prod5N_array and improve formatting --- protopipe/pipeline/utils.py | 217 ++++++++++++++++++++++++++---------- 1 file changed, 157 insertions(+), 60 deletions(-) diff --git a/protopipe/pipeline/utils.py b/protopipe/pipeline/utils.py index abe755f1..64f21a9a 100644 --- a/protopipe/pipeline/utils.py +++ b/protopipe/pipeline/utils.py @@ -3,8 +3,9 @@ import argparse import math import joblib +from typing import List, Union -import numpy +import numpy as np import astropy.units as u import matplotlib.pyplot as plt import os.path as path @@ -162,28 +163,29 @@ def make_argparser(): return parser -def final_array_to_use(sim_array, array, subarrays=None): +def final_array_to_use(original_array, subarray_selection: Union[str, List[int]], subarrays=None): """Infer IDs of telescopes and cameras with equivalent focal lengths. + This is an helper function for utils.prod3b_array. Parameters ---------- - subarrays : dict, optional + subarrays : `dict` [`str`, `list` [`int`]], optional Dictionary of subarray names linked to lists of tel_ids automatically extracted by telescope type. If set, it will extract tel_ids from there, otherwise from the custom list given by 'array'. - sim_array : ctapipe.instrument.SubarrayDescription + original_array : `ctapipe.instrument.SubarrayDescription` Full simulated array from the first event. - array : list, str + subarray_selection : {``str`, `list` [`int`]} Custom list of telescope IDs that the user wants to use or name of specific subarray. Returns ------- - tel_ids : list + tel_ids : `list` [`int`] List of telescope IDs to use in a format readable by ctapipe. - cams_and_foclens : dict + cams_and_foclens : `dict` [`str`, `float`] Dictionary containing the IDs of the involved cameras as inferred from the involved telescopes IDs, together with the equivalent focal lengths of the telescopes. @@ -195,91 +197,186 @@ def final_array_to_use(sim_array, array, subarrays=None): """ if subarrays: - tel_ids = subarrays[array] - subarray = sim_array.select_subarray(tel_ids, name="selected_subarray") + tel_ids = subarrays[subarray_selection] + selected_subarray = original_array.select_subarray( + tel_ids, name="selected_subarray") else: - subarray = sim_array.select_subarray(array, name="selected_subarray") - tel_ids = subarray.tel_ids - tel_types = subarray.telescope_types + selected_subarray = original_array.select_subarray( + subarray_selection, name="selected_subarray") + tel_ids = selected_subarray.tel_ids + tel_types = selected_subarray.telescope_types cams_and_foclens = { - tel_types[i] - .camera.camera_name: tel_types[i] - .optics.equivalent_focal_length.value + tel_types[i].camera.camera_name: tel_types[i].optics.equivalent_focal_length.value for i in range(len(tel_types)) } - return set(tel_ids), cams_and_foclens, subarray + return set(tel_ids), cams_and_foclens, selected_subarray -def prod3b_array(fileName, site, array): +def prod5N_array(file_name, site: str, subarray_selection: Union[str, List[int]]): + """Return tel IDs and involved cameras from configuration and simtel file. + + Alpha configurations refer to the subarrays selected in [1]_. + + Parameters + ---------- + file_name : `str` + Name of the first file of the list of files given by the user. + subarray_selection : {``str`, `list` [`int`]} + Name or list if telescope IDs which identifies the subarray to extract. + site : `str` + Can be only "north" or "south". + + Returns + ------- + tel_ids : `list` [`int`] + List of telescope IDs to use in a format readable by ctapipe. + cameras : `list` [`str`] + List of camera types inferred from tel_ids. + This will feed both the estimators and the image cleaning. + + References + ---------- + .. [1] Cherenkov Telescope Array Observatory, & Cherenkov Telescope Array + Consortium. (2021). CTAO Instrument Response Functions - prod5 version + v0.1 (v0.1) [Data set]. Zenodo. https://doi.org/10.5281/zenodo.5499840 + + """ + + source = EventSource(input_url=file_name, max_events=1) + original_array = source.subarray + tot_num_tels = original_array.num_tels + + # prod5N_alpha_north + # CTA North Advanced Threshold Layout D25 + # Prod5 North + # 4 LSTs and 9 MSTs (NectarCam type) + + # prod5N_alpha_south + # CTA South + # Prod5 layout S-M6C5-14MSTs37SSTs-MSTF + # 0 LSTs and 14 MSTs (FlashCam type), 37 SSTs + + site_to_subarray_name = {"north": ["prod5N_alpha_north"], + "south": ["prod5N_alpha_south"]} + + name_to_tel_ids = { + "full_array": original_array.tel_ids, + "prod5N_alpha_north": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 19, 35], + "prod5N_alpha_south": [5, 6, 8, 10, 11, 12, 13, 14, 126, 16, 125, 20, 24, + 26, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 133, 59, 61, + 66, 67, 68, 69, 70, 71, 72, 73, 143, 144, 145, 146]} + + # Add subarrays by telescope type for the full original array + # and extract the same from the alpha congiguration subarrays + for tel_type in original_array.telescope_types: + name_to_tel_ids[f"full_array_{tel_type}"] = original_array.get_tel_ids_for_type( + tel_type) + name_to_tel_ids[f"prod5N_alpha_north_subarray_{tel_type}"] = set( + name_to_tel_ids[f"full_array_{tel_type}"]).intersection(name_to_tel_ids["prod5N_alpha_north"]) + name_to_tel_ids[f"prod5N_alpha_south_subarray_{tel_type}"] = set( + name_to_tel_ids[f"full_array_{tel_type}"]).intersection(name_to_tel_ids["prod5N_alpha_south"]) + + # Check if the user is using the correct file for the site declared in + # the configuration + if site.lower() == "north" and (tot_num_tels > 84): + raise ValueError( + "\033[91m Input simtel file and site are uncorrelated!\033[0m" + ) + if site.lower() == "south" and (tot_num_tels < 180): + raise ValueError( + "\033[91m infile and site uncorrelated! \033[0m" + ) + + # Validate the subarray selection in case it is a string + if (type(subarray_selection) is str): + if (subarray_selection not in name_to_tel_ids): + raise ValueError( + f"""\033[91m {subarray_selection} is not a supported subarray_selection for this production. + Possible choices are: + - use a custom list of IDs, + - add a new subarray definition to protopipe.utils.prod5N_array. + """ + ) + elif subarray_selection not in site_to_subarray_name[site]: + raise ValueError( + f"""\033[91m {subarray_selection} is not a supported subarray_selection for the {site} site. + """ + ) + else: + print(f"\033[94m Extracting telescope IDs for {subarray_selection}...\033[0m") + return final_array_to_use(original_array, subarray_selection, name_to_tel_ids) + else: # subarray_selection is a list of telescope IDs + print(f"\033[94m Extracting telescope IDs list = {subarray_selection}...\033[0m") + return final_array_to_use(original_array, subarray_selection) + + return final_array_to_use(original_array, subarray_selection) + + +def prod3b_array(file_name, site, array): """Return tel IDs and involved cameras from configuration and simtel file. The initial check (and the too-high cyclomatic complexity) will disappear with the advent of the final array layouts. - Currently not very performant: it is necessary get at least the first event - of the simtel file to read the simulated array information. Parameters ---------- - first_fileName : str + file_name : `str` Name of the first file of the list of files given by the user. - array : str or list + array : {``str`, `list` [`int`]} Name of the subarray or - if not supported - a custom list of telescope IDs that the user wants to use - site : str + site : `str` Can be only "north" or "south". Currently relevant only for baseline simulations. For non-baseline simulations only custom lists of IDs matter. Returns ------- - tel_ids : list + tel_ids : `list` [`int`] List of telescope IDs to use in a format readable by ctapipe. - cameras : list + cameras : `list` [`str`] List of camera types inferred from tel_ids. This will feed both the estimators and the image cleaning. """ - source = EventSource(input_url=fileName, max_events=1) - - for event in source: # get only first event - pass - - sim_array = source.subarray # get simulated array + source = EventSource(input_url=file_name, max_events=1) + original_array = source.subarray # get simulated array # Dictionaries of subarray names for BASELINE simulations subarrays_N = { # La Palma has only 2 cameras - "subarray_LSTs": sim_array.get_tel_ids_for_type("LST_LST_LSTCam"), - "subarray_MSTs": sim_array.get_tel_ids_for_type("MST_MST_NectarCam"), - "full_array": sim_array.tel_ids, + "subarray_LSTs": original_array.get_tel_ids_for_type("LST_LST_LSTCam"), + "subarray_MSTs": original_array.get_tel_ids_for_type("MST_MST_NectarCam"), + "full_array": original_array.tel_ids, } subarrays_S = { # Paranal has only 3 cameras - "subarray_LSTs": sim_array.get_tel_ids_for_type("LST_LST_LSTCam"), - "subarray_MSTs": sim_array.get_tel_ids_for_type("MST_MST_FlashCam"), - "subarray_SSTs": sim_array.get_tel_ids_for_type("SST_GCT_CHEC"), - "full_array": sim_array.tel_ids, + "subarray_LSTs": original_array.get_tel_ids_for_type("LST_LST_LSTCam"), + "subarray_MSTs": original_array.get_tel_ids_for_type("MST_MST_FlashCam"), + "subarray_SSTs": original_array.get_tel_ids_for_type("SST_GCT_CHEC"), + "full_array": original_array.tel_ids, } if site.lower() == "north": - if sim_array.num_tels > 19: # this means non-baseline simulation.. + if original_array.num_tels > 19: # this means non-baseline simulation.. if ( - sim_array.num_tels > 125 # Paranal non-baseline - or sim_array.num_tels == 99 # Paranal baseline - or sim_array.num_tels == 98 # gamma_test_large + original_array.num_tels > 125 # Paranal non-baseline + or original_array.num_tels == 99 # Paranal baseline + or original_array.num_tels == 98 # gamma_test_large ): raise ValueError( "\033[91m ERROR: infile and site uncorrelated! \033[0m" ) - if type(array) == str and array != "full_array": + if (type(array) is str) and (array != "full_array"): raise ValueError( "\033[91m ERROR: Only 'full_array' supported for this production.\n\ Please, use that or define a custom array with a list of tel_ids. \033[0m" ) elif array == "full_array": - return final_array_to_use(sim_array, array, subarrays_N) + return final_array_to_use(original_array, array, subarrays_N) elif ( type(array) == list ): # ..for which only custom lists are currently supported - return final_array_to_use(sim_array, array) + return final_array_to_use(original_array, array) else: raise ValueError( f"\033[91m ERROR: array {array} not supported. \033[0m" @@ -291,20 +388,20 @@ def prod3b_array(fileName, site, array): "\033[91m ERROR: requested missing camera from simtel file. \033[0m" ) else: - return final_array_to_use(sim_array, array, subarrays_N) + return final_array_to_use(original_array, array, subarrays_N) elif type(array) == list: if any((tel_id < 1 or tel_id > 19) for tel_id in array): raise ValueError( "\033[91m ERROR: non-existent telescope ID. \033[0m" ) - return final_array_to_use(sim_array, array) + return final_array_to_use(original_array, array) else: raise ValueError( f"\033[91m ERROR: array {array} not supported. \033[0m" ) elif site.lower() == "south": - if sim_array.num_tels > 99: # this means non-baseline simulation.. - if sim_array.num_tels < 126: + if original_array.num_tels > 99: # this means non-baseline simulation.. + if original_array.num_tels < 126: raise ValueError( "\033[91m ERROR: infile and site uncorrelated! \033[0m" ) @@ -314,17 +411,17 @@ def prod3b_array(fileName, site, array): Please, use that or define a custom array with a list of tel_ids. \033[0m" ) if array == "full_array": - return final_array_to_use(sim_array, array, subarrays_S) + return final_array_to_use(original_array, array, subarrays_S) elif ( type(array) == list ): # ..for which only custom lists are currently supported - return final_array_to_use(sim_array, array) + return final_array_to_use(original_array, array) else: raise ValueError( f"\033[91m ERROR: Array {array} not supported. \033[0m" ) else: # this is a baseline simulation - if sim_array.num_tels == 19: + if original_array.num_tels == 19: raise ValueError( "\033[91m ERROR: infile and site uncorrelated! \033[0m" ) @@ -334,17 +431,17 @@ def prod3b_array(fileName, site, array): "\033[91m ERROR: requested missing camera from simtel file. \033[0m" ) else: - if sim_array.num_tels == 98: # this is gamma_test_large - subarrays_S["subarray_SSTs"] = sim_array.get_tel_ids_for_type( + if original_array.num_tels == 98: # this is gamma_test_large + subarrays_S["subarray_SSTs"] = original_array.get_tel_ids_for_type( "SST_ASTRI_ASTRICam" # in this file SSTs are ASTRI ) - return final_array_to_use(sim_array, array, subarrays_S) + return final_array_to_use(original_array, array, subarrays_S) elif type(array) == list: if any((tel_id < 1 or tel_id > 99) for tel_id in array): raise ValueError( "\033[91m ERROR: non-existent telescope ID. \033[0m" ) - return final_array_to_use(sim_array, array) + return final_array_to_use(original_array, array) else: raise ValueError( f"\033[91m ERROR: Array {array} not supported. \033[0m" @@ -467,7 +564,7 @@ def get_camera_names(inputPath=None): ========== infile : str Full path of the input DL1 file. - fileName : str + file_name : str Name of the input DL1 file. Returns @@ -487,7 +584,7 @@ def get_camera_names(inputPath=None): def load_models(path, cam_id_list): """Load the pickled dictionary of model from disk and fill the model dictionary. - + Parameters ---------- path : string @@ -499,20 +596,20 @@ def load_models(path, cam_id_list): List of camera identifiers like telescope ID or camera ID and the assumed distinguishing feature in the filenames of the various pickled regressors. - + Returns ------- model_dict: dict Dictionary with `cam_id` as keys and pickled models as values. """ - + model_dict = {} for key in cam_id_list: try: model_dict[key] = joblib.load(path.format(cam_id=key)) except IndexError: model_dict[key] = joblib.load(path.format(key)) - + return model_dict From 165fb9213fc050dbc22700928dda3b86bb23b226 Mon Sep 17 00:00:00 2001 From: Michele Peresano Date: Mon, 4 Oct 2021 18:23:16 +0200 Subject: [PATCH 2/8] Update data_training script - allow for use of Prod5 array selection - improve checks from analysis configuration file --- protopipe/scripts/data_training.py | 70 ++++++++++++++++++------------ 1 file changed, 43 insertions(+), 27 deletions(-) diff --git a/protopipe/scripts/data_training.py b/protopipe/scripts/data_training.py index 90dfd20e..20194394 100755 --- a/protopipe/scripts/data_training.py +++ b/protopipe/scripts/data_training.py @@ -17,6 +17,7 @@ from protopipe.pipeline import EventPreparer from protopipe.pipeline.utils import ( make_argparser, + prod5N_array, prod3b_array, str2bool, load_config, @@ -34,7 +35,7 @@ def main(): parser.add_argument( "--debug", action="store_true", help="Print debugging information", ) - + parser.add_argument( "--show_progress_bar", action="store_true", @@ -67,14 +68,15 @@ def main(): # Read configuration file cfg = load_config(args.config_file) - try: # If the user didn't specify a site and/or and array... + try: site = cfg["General"]["site"] array = cfg["General"]["array"] - except KeyError: # ...raise an error and exit. - print( - "\033[91m ERROR: make sure that both 'site' and 'array' are " - "specified in the analysis configuration file! \033[0m" - ) + production = cfg["General"]["production"] + assert all(len(x) > 0 for x in [site, array, production]) + except (KeyError, AssertionError): + raise ValueError("""\033[91m At least one of 'site', 'array' and + 'production' are not properly defined in the analysis configuration + file.\033[0m""") sys_exit(-1) if args.infile_list: @@ -93,9 +95,17 @@ def main(): # Get the IDs of the involved telescopes and associated cameras together # with the equivalent focal lengths from the first event - allowed_tels, cams_and_foclens, subarray = prod3b_array( - filenamelist[0], site, array - ) + if production == "Prod5N": + allowed_tels, cams_and_foclens, subarray = prod5N_array( + filenamelist[0], site, array + ) + elif production == "Prod3b": + allowed_tels, cams_and_foclens, subarray = prod3b_array( + filenamelist[0], site, array + ) + else: + raise ValueError("""\033[91m Unsupported production.\033[0m""") + sys_exit(-1) # keeping track of events and where they were rejected evt_cutflow = CutFlow("EventCutFlow") @@ -229,7 +239,7 @@ def main(): outfile = tb.open_file(args.outfile, mode="w") outTable = {} outData = {} - + # Configuration options for SimTelEventSource # Readout window integration correction try: @@ -244,7 +254,7 @@ def main(): source = MySimTelEventSource( input_url=filename, - calib_scale = calib_scale, + calib_scale=calib_scale, allowed_tels=allowed_tels, max_events=args.max_events ) @@ -270,14 +280,14 @@ def main(): good_event, good_for_reco, ) in tqdm( - preper.prepare_event(source, - save_images=args.save_images, - debug=args.debug), - desc=source.__class__.__name__, - total=source.max_events, - unit="event", - disable= not args.show_progress_bar - ): + preper.prepare_event(source, + save_images=args.save_images, + debug=args.debug), + desc=source.__class__.__name__, + total=source.max_events, + unit="event", + disable=not args.show_progress_bar + ): if good_event: @@ -398,14 +408,16 @@ def main(): if estimation_weight == "CTAMARS": # Get an array of trees - predictions_trees = np.array([tree.predict(features_values) for tree in model.estimators_]) + predictions_trees = np.array( + [tree.predict(features_values) for tree in model.estimators_]) energy_tel[idx] = np.mean(predictions_trees, axis=0) weight_statistic_tel[idx] = np.std(predictions_trees, axis=0) else: - data.eval(f'estimation_weight = {estimation_weight}', inplace=True) + data.eval( + f'estimation_weight = {estimation_weight}', inplace=True) energy_tel[idx] = model.predict(features_values) weight_tel[idx] = data["estimation_weight"] - + if log_10_target: energy_tel[idx] = 10**energy_tel[idx] weight_tel[idx] = 10**weight_tel[idx] @@ -484,7 +496,8 @@ def main(): outData[cam_id]["h_max"] = h_max.to("m").value outData[cam_id]["err_est_pos"] = np.nan outData[cam_id]["err_est_dir"] = np.nan - outData[cam_id]["true_energy"] = event.simulation.shower.energy.to("TeV").value + outData[cam_id]["true_energy"] = event.simulation.shower.energy.to( + "TeV").value outData[cam_id]["hillas_x"] = moments.x.to("deg").value outData[cam_id]["hillas_y"] = moments.y.to("deg").value outData[cam_id]["hillas_phi"] = moments.phi.to("deg").value @@ -499,11 +512,14 @@ def main(): outData[cam_id]["hillas_ellipticity"] = ellipticity.value outData[cam_id]["clusters"] = n_cluster_dict[tel_id] outData[cam_id]["n_tel_discri"] = n_tels["GOOD images"] - outData[cam_id]["mc_core_x"] = event.simulation.shower.core_x.to("m").value - outData[cam_id]["mc_core_y"] = event.simulation.shower.core_y.to("m").value + outData[cam_id]["mc_core_x"] = event.simulation.shower.core_x.to( + "m").value + outData[cam_id]["mc_core_y"] = event.simulation.shower.core_y.to( + "m").value outData[cam_id]["reco_core_x"] = reco_core_x.to("m").value outData[cam_id]["reco_core_y"] = reco_core_y.to("m").value - outData[cam_id]["mc_h_first_int"] = event.simulation.shower.h_first_int.to("m").value + outData[cam_id]["mc_h_first_int"] = event.simulation.shower.h_first_int.to( + "m").value outData[cam_id]["offset"] = offset.to("deg").value outData[cam_id]["mc_x_max"] = event.simulation.shower.x_max.value # g / cm2 outData[cam_id]["alt"] = reco_result.alt.to("deg").value From 085326f6aa5fb5e4237ee3b10e6bf4bcf0dc8dda Mon Sep 17 00:00:00 2001 From: Michele Peresano Date: Mon, 4 Oct 2021 18:28:32 +0200 Subject: [PATCH 3/8] Update DL2 script in the same way and improve formatting --- protopipe/scripts/data_training.py | 12 +++-- protopipe/scripts/write_dl2.py | 75 +++++++++++++++++++++--------- 2 files changed, 60 insertions(+), 27 deletions(-) diff --git a/protopipe/scripts/data_training.py b/protopipe/scripts/data_training.py index 20194394..62e79ea8 100755 --- a/protopipe/scripts/data_training.py +++ b/protopipe/scripts/data_training.py @@ -68,15 +68,19 @@ def main(): # Read configuration file cfg = load_config(args.config_file) + # Check that the user specify site, array and production try: site = cfg["General"]["site"] array = cfg["General"]["array"] production = cfg["General"]["production"] assert all(len(x) > 0 for x in [site, array, production]) except (KeyError, AssertionError): - raise ValueError("""\033[91m At least one of 'site', 'array' and - 'production' are not properly defined in the analysis configuration - file.\033[0m""") + raise ValueError( + bcolors.FAIL + + """At least one of 'site', 'array' and + 'production' are not properly defined in the analysis configuration + file.""" + + + bcolors.ENDC) sys_exit(-1) if args.infile_list: @@ -104,7 +108,7 @@ def main(): filenamelist[0], site, array ) else: - raise ValueError("""\033[91m Unsupported production.\033[0m""") + raise ValueError(bcolors.FAIL + "Unsupported production." + bcolors.ENDC) sys_exit(-1) # keeping track of events and where they were rejected diff --git a/protopipe/scripts/write_dl2.py b/protopipe/scripts/write_dl2.py index e8a46bfc..f0164fd0 100755 --- a/protopipe/scripts/write_dl2.py +++ b/protopipe/scripts/write_dl2.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -from sys import exit +from sys import exit as sys_exit import numpy as np import pandas as pd from glob import glob @@ -19,6 +19,7 @@ from protopipe.pipeline.utils import ( bcolors, make_argparser, + prod5N_array, prod3b_array, str2bool, load_config, @@ -35,7 +36,7 @@ def main(): parser.add_argument( "--debug", action="store_true", help="Print debugging information", ) - + parser.add_argument( "--show_progress_bar", action="store_true", @@ -86,6 +87,21 @@ def main(): ) exit() + # Check that the user specify site, array and production + try: + site = cfg["General"]["site"] + array = cfg["General"]["array"] + production = cfg["General"]["production"] + assert all(len(x) > 0 for x in [site, array, production]) + except (KeyError, AssertionError): + raise ValueError( + bcolors.FAIL + + """At least one of 'site', 'array' and + 'production' are not properly defined in the analysis configuration + file.""" + + + bcolors.ENDC) + sys_exit(-1) + # Add force_tailcut_for_extended_cleaning in configuration cfg["General"][ "force_tailcut_for_extended_cleaning" @@ -105,13 +121,21 @@ def main(): if not filenamelist: print("no files found; check indir: {}".format(args.indir)) - exit(-1) + sys_exit(-1) # Get the IDs of the involved telescopes and associated cameras together # with the equivalent focal lengths from the first event - allowed_tels, cams_and_foclens, subarray = prod3b_array( - filenamelist[0], site, array - ) + if production == "Prod5N": + allowed_tels, cams_and_foclens, subarray = prod5N_array( + filenamelist[0], site, array + ) + elif production == "Prod3b": + allowed_tels, cams_and_foclens, subarray = prod3b_array( + filenamelist[0], site, array + ) + else: + raise ValueError(bcolors.FAIL + "Unsupported production." + bcolors.ENDC) + sys_exit(-1) # keeping track of events and where they were rejected evt_cutflow = CutFlow("EventCutFlow") @@ -219,11 +243,11 @@ def main(): # Declaration of the column descriptor for the (possible) images file StoredImages = dict( - event_id = tb.Int32Col(dflt=1, pos=0), - tel_id = tb.Int16Col(dflt=1, pos=1) + event_id=tb.Int32Col(dflt=1, pos=0), + tel_id=tb.Int16Col(dflt=1, pos=1) # reco_image, true_image and cleaning_mask_reco # are defined later sicne they depend on the number of pixels - ) + ) # this class defines the reconstruction parameters to keep track of class RecoEvent(tb.IsDescription): @@ -254,7 +278,7 @@ class RecoEvent(tb.IsDescription): reco_core_y = tb.Float32Col(dflt=np.nan, pos=24) true_core_x = tb.Float32Col(dflt=np.nan, pos=25) true_core_y = tb.Float32Col(dflt=np.nan, pos=26) - is_valid=tb.BoolCol(dflt=False, pos=27) + is_valid = tb.BoolCol(dflt=False, pos=27) reco_outfile = tb.open_file( mode="w", @@ -279,7 +303,7 @@ class RecoEvent(tb.IsDescription): images_outfile = tb.open_file("images.h5", mode="w") images_table = {} images_phe = {} - + # Configuration options for MySimTelEventSource try: calib_scale = cfg["Calibration"]["calib_scale"] @@ -315,14 +339,14 @@ class RecoEvent(tb.IsDescription): good_event, good_for_reco, ) in tqdm( - preper.prepare_event(source, - save_images=args.save_images, - debug=args.debug), - desc=source.__class__.__name__, - total=source.max_events, - unit="event", - disable= not args.show_progress_bar - ): + preper.prepare_event(source, + save_images=args.save_images, + debug=args.debug), + desc=source.__class__.__name__, + total=source.max_events, + unit="event", + disable=not args.show_progress_bar + ): # True direction true_az = event.simulation.shower.az @@ -442,11 +466,13 @@ class RecoEvent(tb.IsDescription): if (good_for_reco[tel_id] == 1) and (estimation_weight_energy == "CTAMARS"): # Get an array of trees - predictions_trees = np.array([tree.predict(features_values) for tree in model.estimators_]) + predictions_trees = np.array( + [tree.predict(features_values) for tree in model.estimators_]) energy_tel[idx] = np.mean(predictions_trees, axis=0) weight_statistic_tel[idx] = np.std(predictions_trees, axis=0) elif (good_for_reco[tel_id] == 1): - data.eval(f'estimation_weight_energy = {estimation_weight_energy}', inplace=True) + data.eval( + f'estimation_weight_energy = {estimation_weight_energy}', inplace=True) energy_tel[idx] = model.predict(features_values) weight_tel[idx] = data["estimation_weight_energy"] else: @@ -548,7 +574,8 @@ class RecoEvent(tb.IsDescription): ############################################################ # add weigth to event dataframe - data.eval(f'estimation_weight_classification = {estimation_weight_classification}', inplace=True) + data.eval( + f'estimation_weight_classification = {estimation_weight_classification}', inplace=True) # Here we check for valid telescope-wise energies # Because it means that it's a good image @@ -559,7 +586,8 @@ class RecoEvent(tb.IsDescription): if use_proba_for_classifier is False: score_tel[idx] = model.decision_function(features_values) else: - gammaness_tel[idx] = model.predict_proba(features_values)[:, 1] + gammaness_tel[idx] = model.predict_proba(features_values)[ + :, 1] weight_tel[idx] = data["estimation_weight_classification"] else: # WARNING: @@ -724,5 +752,6 @@ class RecoEvent(tb.IsDescription): print("Job done!") + if __name__ == "__main__": main() From 47c5c97851f6427b5f2851efa24f556bf2e5e3b6 Mon Sep 17 00:00:00 2001 From: Michele Peresano Date: Mon, 4 Oct 2021 18:29:13 +0200 Subject: [PATCH 4/8] update analysis configuration file --- .../aux/example_config_files/analysis.yaml | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/protopipe/aux/example_config_files/analysis.yaml b/protopipe/aux/example_config_files/analysis.yaml index 9175714c..87038dbe 100644 --- a/protopipe/aux/example_config_files/analysis.yaml +++ b/protopipe/aux/example_config_files/analysis.yaml @@ -2,15 +2,16 @@ # WARNING: the settings recorded here are unstable and used here only to give an example # NOTE: only Prod3b simulations are currently supported. General: - config_name: '' - site: 'north' # 'north' or 'south' - # array can be either - # - 'subarray_LSTs', 'subarray_MSTs', 'subarray_SSTs' or 'full_array' - # - a custom list of telescope IDs - # WARNING: for simulations containing multiple copies of the telescopes, - # only 'full_array' or custom list are supported options! - array: full_array - cam_id_list : ['LSTCam', 'NectarCam'] # Selected cameras (disabled option) + config_name: '' + production: '' # Prod3b or Prod5N + site: '' # 'north' or 'south' + # 'array' can be either + # - a custom list of telescope IDs + # - 'full_array', 'prod5N_alpha_north' or 'prod5N_alpha_south' + # If a string, you can select a telescope-type subarray by + # adding e.g. '_LST_LST_LSTCam' + # NOTE: the 'full_array' is the total original array without any selection + array: '' Calibration: # for a CTAMARS-like analysis disable integration correction From 6dd747726cc87f1aa9bedf378b7aa17061db5c68 Mon Sep 17 00:00:00 2001 From: Michele Peresano Date: Tue, 5 Oct 2021 15:43:21 +0200 Subject: [PATCH 5/8] Remove comment from analysis config --- protopipe/aux/example_config_files/analysis.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/protopipe/aux/example_config_files/analysis.yaml b/protopipe/aux/example_config_files/analysis.yaml index 87038dbe..8612bcbb 100644 --- a/protopipe/aux/example_config_files/analysis.yaml +++ b/protopipe/aux/example_config_files/analysis.yaml @@ -1,6 +1,5 @@ # General informations # WARNING: the settings recorded here are unstable and used here only to give an example -# NOTE: only Prod3b simulations are currently supported. General: config_name: '' production: '' # Prod3b or Prod5N From 3c4092eb36bfa47be825faf352826fa70118e05f Mon Sep 17 00:00:00 2001 From: Michele Peresano Date: Tue, 5 Oct 2021 15:44:01 +0200 Subject: [PATCH 6/8] Update test files for new "production" key --- .../tests/test_config_analysis_north.yaml | 20 +++++++++--------- .../tests/test_config_analysis_south.yaml | 21 ++++++++++--------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/protopipe/scripts/tests/test_config_analysis_north.yaml b/protopipe/scripts/tests/test_config_analysis_north.yaml index 6abc53b1..6701e7a9 100644 --- a/protopipe/scripts/tests/test_config_analysis_north.yaml +++ b/protopipe/scripts/tests/test_config_analysis_north.yaml @@ -1,15 +1,15 @@ # General informations -# NOTE: only Prod3b simulations are currently supported. General: - config_name: 'test_CTA_North' - site: 'north' # 'north' or 'south' - # array can be either - # - 'subarray_LSTs', 'subarray_MSTs', 'subarray_SSTs' or 'full_array' - # - a custom list of telescope IDs - # WARNING: for simulations containing multiple copies of the telescopes, - # only 'full_array' or custom list are supported options! - array: full_array - cam_id_list : ['LSTCam', 'NectarCam'] # Selected cameras (disabled option) + config_name: 'test_CTA_North' + production: 'Prod3b' # Prod3b or Prod5N + site: 'north' # 'north' or 'south' + # 'array' can be either + # - a custom list of telescope IDs + # - 'full_array', 'prod5N_alpha_north' or 'prod5N_alpha_south' + # If a string, you can select a telescope-type subarray by + # adding e.g. '_LST_LST_LSTCam' + # NOTE: the 'full_array' is the total original array without any selection + array: 'full_array' Calibration: # factor to transform the integrated charges (in ADC counts) into number of diff --git a/protopipe/scripts/tests/test_config_analysis_south.yaml b/protopipe/scripts/tests/test_config_analysis_south.yaml index 554be2fa..f4c530d3 100644 --- a/protopipe/scripts/tests/test_config_analysis_south.yaml +++ b/protopipe/scripts/tests/test_config_analysis_south.yaml @@ -1,15 +1,16 @@ # General informations -# NOTE: only Prod3b simulations are currently supported. +# General informations General: - config_name: 'test_CTA_South' - site: 'south' # 'north' or 'south' - # array can be either - # - 'subarray_LSTs', 'subarray_MSTs', 'subarray_SSTs' or 'full_array' - # - a custom list of telescope IDs - # WARNING: for simulations containing multiple copies of the telescopes, - # only 'full_array' or custom list are supported options! - array: full_array - cam_id_list : ['LSTCam', 'FlashCam', 'CHEC'] # Selected cameras (disabled option) + config_name: 'test_CTA_South' + production: 'Prod3b' # Prod3b or Prod5N + site: 'south' # 'north' or 'south' + # 'array' can be either + # - a custom list of telescope IDs + # - 'full_array', 'prod5N_alpha_north' or 'prod5N_alpha_south' + # If a string, you can select a telescope-type subarray by + # adding e.g. '_LST_LST_LSTCam' + # NOTE: the 'full_array' is the total original array without any selection + array: 'full_array' Calibration: # factor to transform the integrated charges (in ADC counts) into number of From 52a4f725ede43a04e7c8dd3a9ca95103aa0e72d9 Mon Sep 17 00:00:00 2001 From: Michele Peresano Date: Mon, 24 Jan 2022 17:57:52 +0100 Subject: [PATCH 7/8] Fix docstrings --- protopipe/pipeline/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/protopipe/pipeline/utils.py b/protopipe/pipeline/utils.py index 64f21a9a..31518b5e 100644 --- a/protopipe/pipeline/utils.py +++ b/protopipe/pipeline/utils.py @@ -177,7 +177,7 @@ def final_array_to_use(original_array, subarray_selection: Union[str, List[int]] list given by 'array'. original_array : `ctapipe.instrument.SubarrayDescription` Full simulated array from the first event. - subarray_selection : {``str`, `list` [`int`]} + subarray_selection : {`str`, `list` [`int`]} Custom list of telescope IDs that the user wants to use or name of specific subarray. @@ -221,7 +221,7 @@ def prod5N_array(file_name, site: str, subarray_selection: Union[str, List[int]] ---------- file_name : `str` Name of the first file of the list of files given by the user. - subarray_selection : {``str`, `list` [`int`]} + subarray_selection : {`str`, `list` [`int`]} Name or list if telescope IDs which identifies the subarray to extract. site : `str` Can be only "north" or "south". @@ -323,7 +323,7 @@ def prod3b_array(file_name, site, array): ---------- file_name : `str` Name of the first file of the list of files given by the user. - array : {``str`, `list` [`int`]} + array : {`str`, `list` [`int`]} Name of the subarray or - if not supported - a custom list of telescope IDs that the user wants to use site : `str` From d4fb1e0e18f6e0b4d6274aa5dd7c2bf8e2cc18f7 Mon Sep 17 00:00:00 2001 From: Michele Peresano Date: Wed, 26 Jan 2022 17:56:06 +0100 Subject: [PATCH 8/8] call EventSource in with blocks + PEP8 formatting --- protopipe/pipeline/utils.py | 166 ++++++++++++++++++++++++++---------- 1 file changed, 119 insertions(+), 47 deletions(-) diff --git a/protopipe/pipeline/utils.py b/protopipe/pipeline/utils.py index 64f21a9a..a9bad6bf 100644 --- a/protopipe/pipeline/utils.py +++ b/protopipe/pipeline/utils.py @@ -52,11 +52,11 @@ def load_config(name): class SignalHandler: - """ handles ctrl+c signals; set up via - `signal_handler = SignalHandler() - `signal.signal(signal.SIGINT, signal_handler)` - # or for two step interupt: - `signal.signal(signal.SIGINT, signal_handler.stop_drawing)` + """handles ctrl+c signals; set up via + `signal_handler = SignalHandler() + `signal.signal(signal.SIGINT, signal_handler)` + # or for two step interupt: + `signal.signal(signal.SIGINT, signal_handler.stop_drawing)` """ def __init__(self): @@ -163,7 +163,9 @@ def make_argparser(): return parser -def final_array_to_use(original_array, subarray_selection: Union[str, List[int]], subarrays=None): +def final_array_to_use( + original_array, subarray_selection: Union[str, List[int]], subarrays=None +): """Infer IDs of telescopes and cameras with equivalent focal lengths. This is an helper function for utils.prod3b_array. @@ -199,14 +201,18 @@ def final_array_to_use(original_array, subarray_selection: Union[str, List[int]] if subarrays: tel_ids = subarrays[subarray_selection] selected_subarray = original_array.select_subarray( - tel_ids, name="selected_subarray") + tel_ids, name="selected_subarray" + ) else: selected_subarray = original_array.select_subarray( - subarray_selection, name="selected_subarray") + subarray_selection, name="selected_subarray" + ) tel_ids = selected_subarray.tel_ids tel_types = selected_subarray.telescope_types cams_and_foclens = { - tel_types[i].camera.camera_name: tel_types[i].optics.equivalent_focal_length.value + tel_types[i] + .camera.camera_name: tel_types[i] + .optics.equivalent_focal_length.value for i in range(len(tel_types)) } return set(tel_ids), cams_and_foclens, selected_subarray @@ -242,8 +248,9 @@ def prod5N_array(file_name, site: str, subarray_selection: Union[str, List[int]] """ - source = EventSource(input_url=file_name, max_events=1) - original_array = source.subarray + with EventSource(input_url=file_name, max_events=1) as source: + original_array = source.subarray + tot_num_tels = original_array.num_tels # prod5N_alpha_north @@ -256,41 +263,92 @@ def prod5N_array(file_name, site: str, subarray_selection: Union[str, List[int]] # Prod5 layout S-M6C5-14MSTs37SSTs-MSTF # 0 LSTs and 14 MSTs (FlashCam type), 37 SSTs - site_to_subarray_name = {"north": ["prod5N_alpha_north"], - "south": ["prod5N_alpha_south"]} + site_to_subarray_name = { + "north": ["prod5N_alpha_north"], + "south": ["prod5N_alpha_south"], + } name_to_tel_ids = { "full_array": original_array.tel_ids, "prod5N_alpha_north": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 19, 35], - "prod5N_alpha_south": [5, 6, 8, 10, 11, 12, 13, 14, 126, 16, 125, 20, 24, - 26, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, - 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 133, 59, 61, - 66, 67, 68, 69, 70, 71, 72, 73, 143, 144, 145, 146]} + "prod5N_alpha_south": [ + 5, + 6, + 8, + 10, + 11, + 12, + 13, + 14, + 126, + 16, + 125, + 20, + 24, + 26, + 30, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 133, + 59, + 61, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 143, + 144, + 145, + 146, + ], + } # Add subarrays by telescope type for the full original array # and extract the same from the alpha congiguration subarrays for tel_type in original_array.telescope_types: name_to_tel_ids[f"full_array_{tel_type}"] = original_array.get_tel_ids_for_type( - tel_type) + tel_type + ) name_to_tel_ids[f"prod5N_alpha_north_subarray_{tel_type}"] = set( - name_to_tel_ids[f"full_array_{tel_type}"]).intersection(name_to_tel_ids["prod5N_alpha_north"]) + name_to_tel_ids[f"full_array_{tel_type}"] + ).intersection(name_to_tel_ids["prod5N_alpha_north"]) name_to_tel_ids[f"prod5N_alpha_south_subarray_{tel_type}"] = set( - name_to_tel_ids[f"full_array_{tel_type}"]).intersection(name_to_tel_ids["prod5N_alpha_south"]) + name_to_tel_ids[f"full_array_{tel_type}"] + ).intersection(name_to_tel_ids["prod5N_alpha_south"]) # Check if the user is using the correct file for the site declared in # the configuration if site.lower() == "north" and (tot_num_tels > 84): - raise ValueError( - "\033[91m Input simtel file and site are uncorrelated!\033[0m" - ) + raise ValueError("\033[91m Input simtel file and site are uncorrelated!\033[0m") if site.lower() == "south" and (tot_num_tels < 180): - raise ValueError( - "\033[91m infile and site uncorrelated! \033[0m" - ) + raise ValueError("\033[91m infile and site uncorrelated! \033[0m") # Validate the subarray selection in case it is a string - if (type(subarray_selection) is str): - if (subarray_selection not in name_to_tel_ids): + if type(subarray_selection) is str: + if subarray_selection not in name_to_tel_ids: raise ValueError( f"""\033[91m {subarray_selection} is not a supported subarray_selection for this production. Possible choices are: @@ -304,10 +362,16 @@ def prod5N_array(file_name, site: str, subarray_selection: Union[str, List[int]] """ ) else: - print(f"\033[94m Extracting telescope IDs for {subarray_selection}...\033[0m") - return final_array_to_use(original_array, subarray_selection, name_to_tel_ids) + print( + f"\033[94m Extracting telescope IDs for {subarray_selection}...\033[0m" + ) + return final_array_to_use( + original_array, subarray_selection, name_to_tel_ids + ) else: # subarray_selection is a list of telescope IDs - print(f"\033[94m Extracting telescope IDs list = {subarray_selection}...\033[0m") + print( + f"\033[94m Extracting telescope IDs list = {subarray_selection}...\033[0m" + ) return final_array_to_use(original_array, subarray_selection) return final_array_to_use(original_array, subarray_selection) @@ -340,8 +404,8 @@ def prod3b_array(file_name, site, array): This will feed both the estimators and the image cleaning. """ - source = EventSource(input_url=file_name, max_events=1) - original_array = source.subarray # get simulated array + with EventSource(input_url=file_name, max_events=1) as source: + original_array = source.subarray # get simulated array # Dictionaries of subarray names for BASELINE simulations subarrays_N = { # La Palma has only 2 cameras @@ -432,7 +496,9 @@ def prod3b_array(file_name, site, array): ) else: if original_array.num_tels == 98: # this is gamma_test_large - subarrays_S["subarray_SSTs"] = original_array.get_tel_ids_for_type( + subarrays_S[ + "subarray_SSTs" + ] = original_array.get_tel_ids_for_type( "SST_ASTRI_ASTRICam" # in this file SSTs are ASTRI ) return final_array_to_use(original_array, array, subarrays_S) @@ -574,7 +640,7 @@ def get_camera_names(inputPath=None): """ if inputPath is None: print("ERROR: check input") - h5file = tables.open_file(inputPath, mode='r') + h5file = tables.open_file(inputPath, mode="r") group = h5file.get_node("/") camera_names = [x.name for x in group._f_list_nodes()] h5file.close() @@ -618,19 +684,25 @@ def add_stats(data, ax, x=0.70, y=0.85, fontsize=10): mu = data.mean() median = np.median(data) sigma = data.std() - textstr = '\n'.join(( - r'$\mu=%.2f$' % (mu, ), - r'$\mathrm{median}=%.2f$' % (median, ), - r'$\sigma=%.2f$' % (sigma, ))) + textstr = "\n".join( + ( + r"$\mu=%.2f$" % (mu,), + r"$\mathrm{median}=%.2f$" % (median,), + r"$\sigma=%.2f$" % (sigma,), + ) + ) # these are matplotlib.patch.Patch properties - props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) + props = dict(boxstyle="round", facecolor="wheat", alpha=0.5) # place a text box in upper left in axes coords - ax.text(x, y, - textstr, - transform=ax.transAxes, - fontsize=fontsize, - horizontalalignment='left', - verticalalignment='center', - bbox=props) + ax.text( + x, + y, + textstr, + transform=ax.transAxes, + fontsize=fontsize, + horizontalalignment="left", + verticalalignment="center", + bbox=props, + )