From f40269289932e6c9a34488df115175d356c55d0f Mon Sep 17 00:00:00 2001 From: Franca Cassol Date: Fri, 13 Oct 2023 09:04:56 +0000 Subject: [PATCH 1/6] add possibility to copy some requested nodes and not to merge them --- lstchain/io/io.py | 10 ++++++++++ lstchain/scripts/lstchain_merge_hdf5_files.py | 6 ++++++ 2 files changed, 16 insertions(+) diff --git a/lstchain/io/io.py b/lstchain/io/io.py index 3ebd9a0db4..229cc38c3b 100644 --- a/lstchain/io/io.py +++ b/lstchain/io/io.py @@ -300,6 +300,7 @@ def auto_merge_h5files( file_list, output_filename="merged.h5", nodes_keys=None, + keys_to_copy=None, merge_arrays=False, filters=HDF5_ZSTD_FILTERS, progress_bar=True, @@ -315,6 +316,7 @@ def auto_merge_h5files( file_list: list of path output_filename: path nodes_keys: list of path + keys_to_copy: list of nodes that must be copied (because the saem in all the files) merge_arrays: bool filters progress_bar: bool @@ -332,6 +334,10 @@ def auto_merge_h5files( else: keys = set(nodes_keys) + copy_keys = {} + if keys_to_copy: + copy_keys = set(keys_to_copy) + bar = tqdm(total=len(file_list), disable=not progress_bar) with open_file(output_filename, 'w', filters=filters) as merge_file: with open_file(file_list[0]) as f1: @@ -340,6 +346,10 @@ def auto_merge_h5files( bar.update(1) for filename in file_list[1:]: common_keys = keys.intersection(get_dataset_keys(filename)) + + # do not merge specifics nodes with equal data in all files + common_keys=common_keys.difference(copy_keys) + with open_file(filename) as file: for k in common_keys: in_node = file.root[k] diff --git a/lstchain/scripts/lstchain_merge_hdf5_files.py b/lstchain/scripts/lstchain_merge_hdf5_files.py index 9c7adfd530..0f4dba4296 100644 --- a/lstchain/scripts/lstchain_merge_hdf5_files.py +++ b/lstchain/scripts/lstchain_merge_hdf5_files.py @@ -72,6 +72,11 @@ help='Skip checks when merging files' ) +parser.add_argument( + '--keys-to-copy', + nargs="+", default=[''], + help='List of duplicated keys to be copied and not to be merged' +) def main(): args = parser.parse_args() @@ -95,6 +100,7 @@ def main(): file_list, args.output_file, nodes_keys=keys, + keys_to_copy=args.keys_to_copy, progress_bar=not args.no_progress, run_checks=not args.skip_checks ) From 3a9460ce64378b32a162039fd93ac230fe85d0e2 Mon Sep 17 00:00:00 2001 From: Franca Cassol Date: Fri, 13 Oct 2023 09:24:00 +0000 Subject: [PATCH 2/6] Improve comments --- lstchain/io/io.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lstchain/io/io.py b/lstchain/io/io.py index 229cc38c3b..681d0bc673 100644 --- a/lstchain/io/io.py +++ b/lstchain/io/io.py @@ -316,7 +316,7 @@ def auto_merge_h5files( file_list: list of path output_filename: path nodes_keys: list of path - keys_to_copy: list of nodes that must be copied (because the saem in all the files) + keys_to_copy: list of nodes that must be copied and not merged (because the same in all files) merge_arrays: bool filters progress_bar: bool @@ -347,7 +347,7 @@ def auto_merge_h5files( for filename in file_list[1:]: common_keys = keys.intersection(get_dataset_keys(filename)) - # do not merge specifics nodes with equal data in all files + # do not merge specific nodes with equal data in all files common_keys=common_keys.difference(copy_keys) with open_file(filename) as file: From ac1e3a2920642b03504c3dc83c84376e945f4adb Mon Sep 17 00:00:00 2001 From: Franca Cassol Date: Thu, 2 Nov 2023 11:42:45 +0000 Subject: [PATCH 3/6] add default keys to copy --- lstchain/io/io.py | 3 +++ lstchain/scripts/lstchain_merge_hdf5_files.py | 19 ++++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/lstchain/io/io.py b/lstchain/io/io.py index 681d0bc673..de461358a3 100644 --- a/lstchain/io/io.py +++ b/lstchain/io/io.py @@ -82,6 +82,9 @@ dl1_params_tel_mon_ped_key = "/dl1/event/telescope/monitoring/pedestal" dl1_params_tel_mon_cal_key = "/dl1/event/telescope/monitoring/calibration" dl1_params_tel_mon_flat_key = "/dl1/event/telescope/monitoring/flatfield" +dl1_mon_tel_CatB_ped_key = "/dl1/monitoring/telescope/catB/pedestal" +dl1_mon_tel_CatB_cal_key = "/dl1/monitoring/telescope/catB/calibration" +dl1_mon_tel_CatB_flat_key = "/dl1/monitoring/telescope/catB/flatfield" dl1_params_lstcam_key = "/dl1/event/telescope/parameters/LST_LSTCam" dl1_images_lstcam_key = "/dl1/event/telescope/image/LST_LSTCam" dl2_params_lstcam_key = "/dl2/event/telescope/parameters/LST_LSTCam" diff --git a/lstchain/scripts/lstchain_merge_hdf5_files.py b/lstchain/scripts/lstchain_merge_hdf5_files.py index 0f4dba4296..0fb409a9e5 100644 --- a/lstchain/scripts/lstchain_merge_hdf5_files.py +++ b/lstchain/scripts/lstchain_merge_hdf5_files.py @@ -22,7 +22,22 @@ from lstchain.io import auto_merge_h5files from lstchain.io import get_dataset_keys +from lstchain.io.io import ( + dl1_params_tel_mon_ped_key, + dl1_params_tel_mon_cal_key, + dl1_params_tel_mon_flat_key, + dl1_mon_tel_CatB_cal_key, + dl1_mon_tel_CatB_ped_key, + dl1_mon_tel_CatB_flat_key +) +default_keys_to_copy = [dl1_params_tel_mon_ped_key, + dl1_params_tel_mon_cal_key, + dl1_params_tel_mon_flat_key, + dl1_mon_tel_CatB_cal_key, + dl1_mon_tel_CatB_ped_key, + dl1_mon_tel_CatB_flat_key +] parser = argparse.ArgumentParser(description='Merge HDF5 files') # Required arguments @@ -96,11 +111,13 @@ def main(): else: keys = None + keys_to_copy = default_keys_to_copy + args.keys_to_copy + auto_merge_h5files( file_list, args.output_file, nodes_keys=keys, - keys_to_copy=args.keys_to_copy, + keys_to_copy=keys_to_copy, progress_bar=not args.no_progress, run_checks=not args.skip_checks ) From b08f2dbf04ae04f386e256e827bfc9e54b604115 Mon Sep 17 00:00:00 2001 From: Franca Cassol Date: Thu, 2 Nov 2023 16:30:46 +0000 Subject: [PATCH 4/6] verify number of rows for keys that are copied from the first file --- lstchain/io/io.py | 9 +++++++++ lstchain/scripts/lstchain_merge_hdf5_files.py | 3 ++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/lstchain/io/io.py b/lstchain/io/io.py index de461358a3..b6b675d916 100644 --- a/lstchain/io/io.py +++ b/lstchain/io/io.py @@ -348,12 +348,21 @@ def auto_merge_h5files( bar.update(1) for filename in file_list[1:]: + common_keys = keys.intersection(get_dataset_keys(filename)) # do not merge specific nodes with equal data in all files common_keys=common_keys.difference(copy_keys) with open_file(filename) as file: + + # check value of Table.nrow for keys copied from the first file + for k in copy_keys: + first_node = merge_file.root[k] + present_node = file.root[k] + if first_node.nrows != present_node.nrows: + raise ValueError("Length of key {} from file {} different than in file {}".format(k, filename, file_list[0])) + for k in common_keys: in_node = file.root[k] out_node = merge_file.root[k] diff --git a/lstchain/scripts/lstchain_merge_hdf5_files.py b/lstchain/scripts/lstchain_merge_hdf5_files.py index 0fb409a9e5..5fad0acba0 100644 --- a/lstchain/scripts/lstchain_merge_hdf5_files.py +++ b/lstchain/scripts/lstchain_merge_hdf5_files.py @@ -38,6 +38,7 @@ dl1_mon_tel_CatB_ped_key, dl1_mon_tel_CatB_flat_key ] + parser = argparse.ArgumentParser(description='Merge HDF5 files') # Required arguments @@ -89,7 +90,7 @@ parser.add_argument( '--keys-to-copy', - nargs="+", default=[''], + nargs="*", default=[], help='List of duplicated keys to be copied and not to be merged' ) From abd9acfe9a0fda577acbd9acb808c40611c413a4 Mon Sep 17 00:00:00 2001 From: Franca Cassol Date: Fri, 3 Nov 2023 15:18:45 +0000 Subject: [PATCH 5/6] check keys_to_copy --- lstchain/io/io.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lstchain/io/io.py b/lstchain/io/io.py index b6b675d916..0b9fe2db6c 100644 --- a/lstchain/io/io.py +++ b/lstchain/io/io.py @@ -339,6 +339,7 @@ def auto_merge_h5files( copy_keys = {} if keys_to_copy: + keys_to_copy = {k for k in keys_to_copy if k in keys} copy_keys = set(keys_to_copy) bar = tqdm(total=len(file_list), disable=not progress_bar) From fd7aa94fc6032574f2df7e4ef03632f4ee68d5e2 Mon Sep 17 00:00:00 2001 From: Franca Cassol Date: Tue, 28 Nov 2023 17:22:44 +0000 Subject: [PATCH 6/6] sintax changes --- lstchain/io/io.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/lstchain/io/io.py b/lstchain/io/io.py index ad43900567..ae1922442e 100644 --- a/lstchain/io/io.py +++ b/lstchain/io/io.py @@ -337,10 +337,7 @@ def auto_merge_h5files( else: keys = set(nodes_keys) - copy_keys = {} - if keys_to_copy: - keys_to_copy = {k for k in keys_to_copy if k in keys} - copy_keys = set(keys_to_copy) + keys_to_copy = set() if keys_to_copy is None else set(keys_to_copy).intersection(keys) bar = tqdm(total=len(file_list), disable=not progress_bar) with open_file(output_filename, 'w', filters=filters) as merge_file: @@ -353,12 +350,12 @@ def auto_merge_h5files( common_keys = keys.intersection(get_dataset_keys(filename)) # do not merge specific nodes with equal data in all files - common_keys=common_keys.difference(copy_keys) + common_keys=common_keys.difference(keys_to_copy) with open_file(filename) as file: # check value of Table.nrow for keys copied from the first file - for k in copy_keys: + for k in keys_to_copy: first_node = merge_file.root[k] present_node = file.root[k] if first_node.nrows != present_node.nrows: