Skip to content

Commit

Permalink
Merge pull request #1174 from cta-observatory/do_not_merge_calibratio…
Browse files Browse the repository at this point in the history
…n_info

Add possibility to copy and not merge some keys  by lstchain_merge_hdf5_files
  • Loading branch information
rlopezcoto committed Nov 30, 2023
2 parents e82bd68 + fd7aa94 commit 38da653
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 1 deletion.
18 changes: 17 additions & 1 deletion lstchain/io/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@
dl1_mon_tel_CatB_ped_key = "/dl1/monitoring/telescope/catB/pedestal"
dl1_mon_tel_CatB_cal_key = "/dl1/monitoring/telescope/catB/calibration"
dl1_mon_tel_CatB_flat_key = "/dl1/monitoring/telescope/catB/flatfield"

dl1_params_lstcam_key = "/dl1/event/telescope/parameters/LST_LSTCam"
dl1_images_lstcam_key = "/dl1/event/telescope/image/LST_LSTCam"
dl2_params_lstcam_key = "/dl2/event/telescope/parameters/LST_LSTCam"
Expand Down Expand Up @@ -304,6 +303,7 @@ def auto_merge_h5files(
file_list,
output_filename="merged.h5",
nodes_keys=None,
keys_to_copy=None,
merge_arrays=False,
filters=HDF5_ZSTD_FILTERS,
progress_bar=True,
Expand All @@ -319,6 +319,7 @@ def auto_merge_h5files(
file_list: list of path
output_filename: path
nodes_keys: list of path
keys_to_copy: list of nodes that must be copied and not merged (because the same in all files)
merge_arrays: bool
filters
progress_bar: bool
Expand All @@ -336,15 +337,30 @@ def auto_merge_h5files(
else:
keys = set(nodes_keys)

keys_to_copy = set() if keys_to_copy is None else set(keys_to_copy).intersection(keys)

bar = tqdm(total=len(file_list), disable=not progress_bar)
with open_file(output_filename, 'w', filters=filters) as merge_file:
with open_file(file_list[0]) as f1:
copy_h5_nodes(f1, merge_file, nodes=keys)

bar.update(1)
for filename in file_list[1:]:

common_keys = keys.intersection(get_dataset_keys(filename))

# do not merge specific nodes with equal data in all files
common_keys=common_keys.difference(keys_to_copy)

with open_file(filename) as file:

# check value of Table.nrow for keys copied from the first file
for k in keys_to_copy:
first_node = merge_file.root[k]
present_node = file.root[k]
if first_node.nrows != present_node.nrows:
raise ValueError("Length of key {} from file {} different than in file {}".format(k, filename, file_list[0]))

for k in common_keys:
in_node = file.root[k]
out_node = merge_file.root[k]
Expand Down
24 changes: 24 additions & 0 deletions lstchain/scripts/lstchain_merge_hdf5_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,22 @@

from lstchain.io import auto_merge_h5files
from lstchain.io import get_dataset_keys
from lstchain.io.io import (
dl1_params_tel_mon_ped_key,
dl1_params_tel_mon_cal_key,
dl1_params_tel_mon_flat_key,
dl1_mon_tel_CatB_cal_key,
dl1_mon_tel_CatB_ped_key,
dl1_mon_tel_CatB_flat_key
)

default_keys_to_copy = [dl1_params_tel_mon_ped_key,
dl1_params_tel_mon_cal_key,
dl1_params_tel_mon_flat_key,
dl1_mon_tel_CatB_cal_key,
dl1_mon_tel_CatB_ped_key,
dl1_mon_tel_CatB_flat_key
]

parser = argparse.ArgumentParser(description='Merge HDF5 files')

Expand Down Expand Up @@ -72,6 +88,11 @@
help='Skip checks when merging files'
)

parser.add_argument(
'--keys-to-copy',
nargs="*", default=[],
help='List of duplicated keys to be copied and not to be merged'
)

def main():
args = parser.parse_args()
Expand All @@ -91,10 +112,13 @@ def main():
else:
keys = None

keys_to_copy = default_keys_to_copy + args.keys_to_copy

auto_merge_h5files(
file_list,
args.output_file,
nodes_keys=keys,
keys_to_copy=keys_to_copy,
progress_bar=not args.no_progress,
run_checks=not args.skip_checks
)
Expand Down

0 comments on commit 38da653

Please sign in to comment.