Skip to content

Commit

Permalink
Merge pull request #14 from DUNE/develop
Browse files Browse the repository at this point in the history
Merge develop into main
  • Loading branch information
sam-fogarty authored Aug 29, 2023
2 parents 0b249ff + d146887 commit 45822d7
Show file tree
Hide file tree
Showing 11 changed files with 370 additions and 5 deletions.
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,30 @@ The code requires various inputs, most of which are specified in the input confi
- `layout` folder: detector-specific pixel layout yamls
- `pedestal` folder: contains `json` files with channel-by-channel pixel pedestal values

### Light Reconstruction
The light data reconstruction is done with `ndlar_flow`: https://github.com/DUNE/ndlar_flow/tree/main

In the `util` folder, there are scripts for running `ndlar_flow` for processing the light data. `ndlar_flow` uses yamls to control the steps to use in reconstruction. `light_event_reconstruction_for_39Ar.yaml` contains light waveform processing steps (e.g. waveform noise filtering, waveform deconvolution, waveform aligning, etc) not including hit finding (the plan is to do hit-finding here).

To set up `ndlar_flow`:
```bash
chmod +x setup_h5flow_ndlar_flow.sh
./setup_h5flow_ndlar_flow.sh
```

This will create directories of `h5flow` and `ndlar_flow` and it will make an environment that can be activated with:
```bash
source flow.venv/bin/activate
```

To run the light data reconstruction:
```bash
chmod +x run_flow_light_reco.sh
./run_flow_light_reco.sh
```

Be wary that the output file contains waveforms and thus is a very large file, so make sure to put it somewhere appropriate for the file size (one module-1 file was ~90 GB!). However, the charge-light matching will produce a much more lightweight file.

### Analysis examples
To access the datasets in python:
```python
Expand Down
70 changes: 70 additions & 0 deletions charge_reco/charge_cluster_selections.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#!/usr/bin/env python
"""
Command-line interface to applying selection criteria to charge clusters using matched external triggers
"""

import numpy as np
import h5py
import fire
from tqdm import tqdm

def main(input_filename, output_filename):
## Make selections on the charge cluster data using external triggers and
## output a new file for downstream charge-light matching.
f = h5py.File(input_filename, 'r')
clusters = np.array(f['clusters'])
ext_trig = f['ext_trig']

max_hits = 10 # maximum hits per cluster
max_clusters = 5 # maximum clusters per event

# get only matched clusters
clusters = clusters[clusters['ext_trig_index'] != -1]

# get ext trig indices, sort them to enable grouping by light trigger
light_ids = clusters['ext_trig_index']
sorted_indices = np.argsort(light_ids)
light_ids = light_ids[sorted_indices]
clusters = clusters[sorted_indices]

# group clusters by light event
light_trig_indices = np.concatenate(([0], np.flatnonzero(light_ids[:-1] != light_ids[1:])+1, [len(light_ids)]))
grouped_clusters = np.split(clusters, light_trig_indices[1:-1])

numEvents = 0
numEvents_nonUniqueMatch = 0

clusters_keep = []
ext_trig_keep = []
print('Total groups = ', len(grouped_clusters))
for i in tqdm(range(len(grouped_clusters)), desc=' Finding events according to criteria: '):
group = grouped_clusters[i]
unique_ext_trig_indices = np.unique(group['ext_trig_index'])
# require limit on number of clusters per ext trig
nClustersLimit = len(group) <= max_clusters
# require limit on number of hits per cluster in match
nHitsLimit = np.all(group['nhit'] <= max_hits)
# require no clusters with multiple matches
uniqueExtTrigs = len(unique_ext_trig_indices) == 1
if nClustersLimit and nHitsLimit and uniqueExtTrigs:
numEvents += 1
for cluster in group:
clusters_keep.append(cluster)
ext_trig_keep.append(ext_trig[unique_ext_trig_indices[0]])

if not uniqueExtTrigs:
numEvents_nonUniqueMatch += 1

clusters = np.array(clusters_keep)
ext_trig = np.array(ext_trig_keep)
print(f'Number of events satifying criteria = {numEvents}; {numEvents/len(grouped_clusters)} fraction of total events.')
print(f'Number of events with multi-matched clusters = {numEvents_nonUniqueMatch}; {numEvents_nonUniqueMatch/len(grouped_clusters)} fraction of total events.')
print(f'Total events in file = {len(grouped_clusters)}')

with h5py.File(output_filename, 'a') as output_file:
output_file.create_dataset('clusters', data=clusters)
output_file.create_dataset('ext_trig', data=ext_trig)
print(f'Saved output to {output_filename}')

if __name__ == "__main__":
fire.Fire(main)
9 changes: 5 additions & 4 deletions charge_reco/charge_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,11 @@ def run_reconstruction(input_config_name, input_filepath, output_filepath):

packets_batch = np.array(packets[index_start:index_end])
if mc_assn is not None:
mc_assn = np.array(mc_assn[index_start:index_end])
mc_assn_batch = np.array(mc_assn[index_start:index_end])

analysis_start_time = time.time()
results = \
analysis(packets_batch, pixel_xy, mc_assn, tracks, module, hits_max_cindex, disabled_channel_IDs, \
analysis(packets_batch, pixel_xy, mc_assn_batch, tracks, module, hits_max_cindex, disabled_channel_IDs, \
detprop, pedestal_dict, config_dict, dbscan)
if consts.save_hits:
clusters, ext_trig, hits, benchmarks = results
Expand Down Expand Up @@ -152,8 +152,9 @@ def run_reconstruction(input_config_name, input_filepath, output_filepath):
f['hits'].resize((f['hits'].shape[0] + hits.shape[0]), axis=0)
f['hits'][-hits.shape[0]:] = hits
hits_max_cindex = np.max(hits['cluster_index'])+1
f['ext_trig'].resize((f['ext_trig'].shape[0] + ext_trig.shape[0]), axis=0)
f['ext_trig'][-ext_trig.shape[0]:] = ext_trig
if len(ext_trig) > 0:
f['ext_trig'].resize((f['ext_trig'].shape[0] + ext_trig.shape[0]), axis=0)
f['ext_trig'][-ext_trig.shape[0]:] = ext_trig

index_start += batch_size
index_end += batch_size
Expand Down
2 changes: 1 addition & 1 deletion charge_reco/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,6 @@

ext_trig_dtype = np.dtype([('unix', '<i8'), ('t', '<i8')])

EVENT_SEPARATOR='eventID'
EVENT_SEPARATOR='event_id'
time_the_reconstruction = False
save_hits = False
23 changes: 23 additions & 0 deletions charge_reco/input_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,29 @@ def __init__(self, module_name):
self.charge_light_matching_unix_window = 0
self.ext_trig_PPS_window = 1000

if self.module_name == 'module-0_MC':
self.detector = 'module-0'
self.data_type = 'MC'
self.detector_dict_path = 'layout/module0_multi_tile_layout-2.3.16.yaml'
self.detprop_path = 'detector_properties/module0.yaml'
self.use_disabled_channels_list = False
self.disabled_channels_list = 'disabled_channels/module0_disabled_channels_noise_cut.npz'
self.pedestal_file = 'pedestal/module0_datalog_2021_04_02_19_00_46_CESTevd_ped.json'
self.config_file = 'config/module0_evd_config_21-03-31_12-36-13.json'
self.use_ped_config_files = False
self.PACMAN_clock_correction1 = [-9.597, 3.7453e-06]
self.PACMAN_clock_correction2 = [-9.329, 9.0283e-07]
self.PACMAN_clock_correction = False
self.timestamp_cut = False
self.nBatches = 10
self.batches_limit = 10
self.ext_trig_matching_tolerance_unix = 1
self.ext_trig_matching_tolerance_PPS = 1.5e3 # ns
self.charge_light_matching_lower_PPS_window = 61000
self.charge_light_matching_upper_PPS_window = full_drift_time + 61000
self.charge_light_matching_unix_window = 0
self.ext_trig_PPS_window = 1000

# You can add more elif conditions for different module names and their configurations
elif self.module_name == 'module-1':
self.detector = 'module-1'
Expand Down
89 changes: 89 additions & 0 deletions charge_reco/match_light_to_ext_triggers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#!/usr/bin/env python
"""
Command-line interface to the matching between external trigger and light triggers.
"""
import fire
import numpy as np
from tqdm import tqdm
import h5py
import os

def main(input_clusters_file, input_light_file, output_filename):
"""
# Args:
input_clusters_file (str): path to file that contains charge clusters
and external triggers processed with charge_clustering.py
input_light_file (str): path to file that contains ndlar_flow-processed
light data
"""
if os.path.exists(output_filename):
raise Exception('Output file '+ str(output_filename) + ' already exists.')
# get clusters
f_charge = h5py.File(input_clusters_file, 'r')
clusters = np.array(f_charge['clusters'])
ext_trig_indices = clusters['ext_trig_index']

# get external triggers
ext_trig_unix = np.array(f_charge['ext_trig']['unix'])
ext_trig_t = np.array(f_charge['ext_trig']['t'])

# get light events
f_light = h5py.File(input_light_file, 'r')
light_events = f_light['light/events/data']

ts_window = 1600 # nsec
unix_mask = np.zeros(len(ext_trig_unix), dtype=bool)
tai_ns_mask = np.zeros(len(ext_trig_unix), dtype=bool)
light_events_matched = np.zeros((0,), dtype=light_events.dtype)

num_light_events = 2500 # len(light_events)
light_events_matched = []
light_index = 0
light_event_indices = np.zeros(len(clusters))
# match between external triggers and light triggers
for i in tqdm(range(num_light_events), desc=' Matching external triggers to light events: '):
light_unix_s = int(np.unique(light_events[i]['utime_ms']*1e-3)[1])
light_tai_ns = int((np.unique(light_events[i]['tai_ns'])[1]/0.625) % 1e9)
isUnixMatch = ext_trig_unix == light_unix_s
isPPSMatch = (ext_trig_t > light_tai_ns - ts_window) & \
(ext_trig_t < light_tai_ns + ts_window)
unix_mask += isUnixMatch
tai_ns_mask += isPPSMatch
isMatch = isUnixMatch & isPPSMatch

# keep only matched light events, and keep track of indices for associations
if np.sum(isMatch) == 1:
light_events_matched.append(light_events[i])
#light_events_matched = np.concatenate((light_events_matched, np.array(light_events[i], dtype=light_events.dtype)))
ext_trig_index = np.where(isMatch)[0]
light_event_indices[clusters['ext_trig_index'] == ext_trig_index] \
= light_index
light_index += 1
light_events_matched = np.array(light_events_matched, dtype=light_events.dtype)
# get matched clusters
ext_trig_mask = unix_mask & tai_ns_mask
total_matches = np.sum(ext_trig_mask)
print(f'Efficiency of ext trigger matches = {total_matches/num_light_events}')
print(f'Efficiency of unix matches = {np.sum(unix_mask)/num_light_events}')
print(f'Efficiency of PPS matches = {np.sum(tai_ns_mask)/num_light_events}')

ext_trig_indices_matched = np.where(ext_trig_mask)[0]
clusters_matched_mask = np.isin(ext_trig_indices, ext_trig_indices_matched)
clusters_matched = clusters[clusters_matched_mask]
light_event_indices = light_event_indices[clusters_matched_mask]

# replace ext_trig_index with light event indices
clusters_new = np.zeros_like(clusters_matched)
for name in clusters_matched.dtype.names:
if name != 'ext_trig_index':
clusters_new[name] = clusters_matched[name]
else:
clusters_new['ext_trig_index'] = light_event_indices

clusters_matched = clusters_new
with h5py.File(output_filename, 'a') as output_file:
output_file.create_dataset('clusters', data=clusters_matched)
output_file.create_dataset('light_events', data=light_events_matched)
print(f'Saved output to {output_filename}')
if __name__ == "__main__":
fire.Fire(main)
30 changes: 30 additions & 0 deletions util/light_event_reconstruction_for_39Ar.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Performs noise subtraction, deconvolution, and peak finding on raw light
# waveforms

flow:
source: 'light/events'
stages: [timestamp_corrector, wvfm_filt, wvfm_deconv, wvfm_align, wvfm_sum]

# remove waveforms from output file
#drop: ['light/wvfm', 'light/fwvfm', 'light/deconv']


resources:
- !include yamls/module0_flow/resources/RunData.yaml
- !include yamls/module0_flow/resources/Geometry.yaml
- !include yamls/module0_flow/resources/LArData.yaml

timestamp_corrector:
!include yamls/module0_flow/reco/light/LightTimestampCorrector.yaml

wvfm_filt:
!include yamls/module0_flow/reco/light/WaveformNoiseFilter.yaml

wvfm_deconv:
!include yamls/module0_flow/reco/light/WaveformDeconvolution.yaml

wvfm_align:
!include yamls/module0_flow/reco/light/WaveformAlign.yaml

wvfm_sum:
!include yamls/module0_flow/reco/light/WaveformSum.yaml
33 changes: 33 additions & 0 deletions util/run_charge-light-matching.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env bash

DET="Module1"
if [ "${DET}" = "Module0" ]; then
OUTDIR=/global/cfs/cdirs/dune/users/sfogarty/Module0_reco
elif [ "${DET}" = "Module1" ]; then
OUTDIR=/global/cfs/cdirs/dune/users/sfogarty/Module1_reco
else
echo "Exiting as $DET is not a recognized run name"
exit 0
fi

INFILENAME_CLUSTERS=${OUTDIR}/clusters_selection_2022_02_08_01_47_59_CET.h5
INFILENAME_LIGHT=${OUTDIR}/0cd913fb_0cd93db0_20220208_014759_timestamp_corrected.h5
OUTFILENAME=${OUTDIR}/charge-light-matched-clusters_2022_02_08_01_47_59_CET.h5

shifter --image=mjkramer/sim2x2:genie_edep.3_04_00.20230620 --module=None -- /bin/bash << EOF
set +o posix
source /environment
cd ..
rm -rf convert.venv
python3 -m venv convert.venv
source convert.venv/bin/activate
pip install --upgrade pip
pip install -r requirements.txt
cd charge_reco
python3 match_light_to_ext_triggers.py ${INFILENAME_CLUSTERS} ${INFILENAME_LIGHT} ${OUTFILENAME}
EOF



32 changes: 32 additions & 0 deletions util/run_cluster_selection.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/usr/bin/env bash

DET="Module1"
if [ "${DET}" = "Module0" ]; then
OUTDIR=/global/cfs/cdirs/dune/users/sfogarty/Module0_reco
elif [ "${DET}" = "Module1" ]; then
OUTDIR=/global/cfs/cdirs/dune/users/sfogarty/Module1_reco
else
echo "Exiting as $DET is not a recognized run name"
exit 0
fi

INFILENAME_CLUSTERS=${OUTDIR}/packet_2022_02_08_01_47_59_CET_clusters.h5
OUTFILENAME=${OUTDIR}/clusters_selection_2022_02_08_01_47_59_CET.h5

shifter --image=mjkramer/sim2x2:genie_edep.3_04_00.20230620 --module=None -- /bin/bash << EOF
set +o posix
source /environment
cd ..
rm -rf convert.venv
python3 -m venv convert.venv
source convert.venv/bin/activate
pip install --upgrade pip
pip install -r requirements.txt
cd charge_reco
python3 charge_cluster_selections.py ${INFILENAME_CLUSTERS} ${OUTFILENAME}
EOF



40 changes: 40 additions & 0 deletions util/run_flow_light_reco.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/bash
### bash script for running ndlar_flow LRS reconstruction

original_dir=$(pwd)

# change to your own directories
ndlar_flow_dir=/global/u1/s/sfogarty/ndlar_flow
output_folder=/global/cfs/cdirs/dune/users/sfogarty

# change which files are used and which module is used
data_folder=/global/cfs/cdirs/dune/www/data/Module1/LRS/SingleModule_Jan22
ADC_name_1=0cd913fb
ADC_name_2=0cd93db0
file_timestamp=20220208_014759

LRS_file_1=${ADC_name_1}_${file_timestamp}.data
LRS_file_2=${ADC_name_2}_${file_timestamp}.data
light_workflow_yamls=yamls/module0_flow/workflows/light
light_reco_yamls=yamls/module0_flow/reco/light
output_file=${output_folder}/${ADC_name_1}_${ADC_name_2}_${file_timestamp}.h5

# you can tweak these files as needed before copying over
cp light_event_reconstruction_for_39Ar.yaml ${ndlar_flow_dir}/${light_workflow_yamls}

cd ${ndlar_flow_dir}
pip install .

# run light event building adc64 step for both files
h5flow -c ${light_workflow_yamls}/light_event_building_adc64.yaml \
-i ${data_folder}/${LRS_file_1} \
-o ${output_file}

h5flow -c ${light_workflow_yamls}/light_event_building_adc64.yaml \
-i ${data_folder}/${LRS_file_2} \
-o ${output_file}

# run light event reconstruction step
h5flow -c ${light_workflow_yamls}/light_event_reconstruction_for_39Ar.yaml \
-i ${output_file} \
-o ${output_file}
Loading

0 comments on commit 45822d7

Please sign in to comment.