From 349bd0bd155c4363245bfa174b1ecd27879a8bf0 Mon Sep 17 00:00:00 2001 From: Masatoshi Kobayashi Date: Thu, 19 Oct 2023 05:32:44 -0500 Subject: [PATCH 1/8] added option to apply total Edep selection, in order to save resources before WFSim --- bin/run_epix | 6 ++++++ epix/run_epix.py | 17 +++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/bin/run_epix b/bin/run_epix index afb8163..eeaa4af 100755 --- a/bin/run_epix +++ b/bin/run_epix @@ -53,6 +53,12 @@ def pars_args(): parser.add_argument('--ClusterMethod', dest='cluster_method', type=str, default='dbscan', help = 'Switch between DBSCAN, BETA-DECAY, ' 'and BREMSSTRAHLUNG microclustering, [ dbscan | betadecay | brem ]') + parser.add_argument('--MaxEnergy', dest='max_energy', type=float, + action='store', default=float('inf'), #keV + help='Maximam energy to be stored [keV].') + parser.add_argument('--MinEnergy', dest='min_nergy', type=float, + action='store', default=-1, #keV + help='Minimam energy to be stored [keV].') parser.add_argument('--JobNumber', dest='job_number', type=int, action='store', default=0, help='Job number in full chain simulation. Offset is computed as ' diff --git a/epix/run_epix.py b/epix/run_epix.py index e0204d6..52af608 100644 --- a/epix/run_epix.py +++ b/epix/run_epix.py @@ -189,6 +189,9 @@ def main(args, return_df=False, return_wfsim_instructions=False, strax=False): if args['debug'] & (len(result) == 0): warnings.warn('No interactions left, return empty DataFrame.') instructions = epix.awkward_to_wfsim_row_style(result) + # Apply energy range selection for instructions if required + if (args['min_energy'] > 0.0) or (args['max_energy'] < float('inf')): + instructions = apply_energy_selection(instructions,[args['min_energy'],args['max_energy']]) if args['source_rate'] != 0: # Only sort by time again if source rates were applied, otherwise # things are already sorted within the events and should stay this way. @@ -214,6 +217,20 @@ def main(args, return_df=False, return_wfsim_instructions=False, strax=False): return instructions +def apply_energy_selection(instr,e_range): + minE,maxE = e_range[0],e_range[1] + #merge all energy deposits in single G4 events + g4ids=instr['g4id'] + eds=instr['ed'] + tot_es=np.zeros_like(eds) + uids=np.unique(g4ids) + for i in uids: + indices=np.where(g4ids==i) + tot_e = np.sum(eds[indices])/2. #divide by 2 because edep for both S1 and S2 are summed up... + tot_es[indices]=tot_e + instr['tot_e'] = tot_es + return instr[(instr['tot_e']>=minE) & (instr['tot_e']<=maxE) ] + def monitor_time(prev_time, task): t = time.time() print(f'It took {(t - prev_time):.4f} sec to {task}') From 740f377f8279a0648df8c725213ddca3d7af57e4 Mon Sep 17 00:00:00 2001 From: Masatoshi Kobayashi Date: Mon, 18 Dec 2023 03:52:42 -0600 Subject: [PATCH 2/8] update the function for tot_e calculation --- epix/run_epix.py | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/epix/run_epix.py b/epix/run_epix.py index 52af608..90e6b44 100644 --- a/epix/run_epix.py +++ b/epix/run_epix.py @@ -216,19 +216,36 @@ def main(args, return_df=False, return_wfsim_instructions=False, strax=False): if return_wfsim_instructions: return instructions +def calc_tot_e_dep(df): + print("calculating tot edep...") + #merge all energy deposits in single G4 events\n", + g4id = df['g4id'].values + u, s = np.unique(g4id, return_index=True) + print("split with event_number...") + #types= np.split(df['type'].values,s[1:]) + edeps= np.split(df['e_dep'].values,s[1:]) + print("calculate tot_edep...") + tot_edep = np.asarray(list(map(lambda x:np.sum(x),edeps))) #sum up the e_deps + #max_edep = list(map(lambda x:np.max(x),edeps)) #get max e_deps + num_edep = np.asarray(list(map(lambda x:len(x),edeps)) #get number of e_deps + #return tot_edep, max_edep, num_edep + #return np.repeat(tot_edep,np.append((s[1:]-s[:-1]),len(df)-s[-1])) + return np.repeat(tot_edep,num_edep) + def apply_energy_selection(instr,e_range): minE,maxE = e_range[0],e_range[1] #merge all energy deposits in single G4 events - g4ids=instr['g4id'] - eds=instr['ed'] - tot_es=np.zeros_like(eds) - uids=np.unique(g4ids) - for i in uids: - indices=np.where(g4ids==i) - tot_e = np.sum(eds[indices])/2. #divide by 2 because edep for both S1 and S2 are summed up... - tot_es[indices]=tot_e - instr['tot_e'] = tot_es + instr['tot_e'] = calc_tot_e_dep(instr)/2. #devide by 2 (S1, S2) + #g4ids=instr['g4id'] + #eds=instr['ed'] + #tot_es=np.zeros_like(eds) + #uids=np.unique(g4ids) + #for i in uids: + # indices=np.where(g4ids==i) + # tot_e = np.sum(eds[indices])/2. #divide by 2 because edep for both S1 and S2 are summed up... + # tot_es[indices]=tot_e + #instr['tot_e'] = tot_es return instr[(instr['tot_e']>=minE) & (instr['tot_e']<=maxE) ] def monitor_time(prev_time, task): From 07ff3bc07859ca4896ac2df03d965cec15087647 Mon Sep 17 00:00:00 2001 From: Masatoshi Kobayashi Date: Thu, 21 Dec 2023 03:11:29 -0600 Subject: [PATCH 3/8] move functions to common.py. Add explanations of new options in README --- README.md | 2 ++ epix/common.py | 34 ++++++++++++++++++++++++++++++++++ epix/run_epix.py | 33 --------------------------------- 3 files changed, 36 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index 3d8d61a..4d0b7e6 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,8 @@ The other keyword arguments are: | `--MicroSeparationTime` | Clustering time (ns) | `10` | | `--TagClusterBy` | decide if you tag the cluster (particle type, energy depositing process) according to first interaction in it (`time`) or most energetic (`energy`) | `energy` | | `--MaxDelay` | Time after which we cut the rest of the event (ns) | `1e7` | +| `--MaxEnergy` | Upper range for energy selection (keV) | `inf` | +| `--MinEnergy` | Lower range for energy selection (keV) | `-1` | | `--YieldModel` | Model for yield/quanta generation (nest / bbf / beta) | `nest` | | `--ClusterMethod` | Microclustering method (dbscan / betadecay / brem) | `dbscan` | | `--SourceRate` | Event rate for event separation
- `0` for no time shift (G4 time remains)
- `-1` for clean time shift between events
- `>0` (Hz) for random spacing | `0` | diff --git a/epix/common.py b/epix/common.py index fd74809..974ae1d 100755 --- a/epix/common.py +++ b/epix/common.py @@ -179,3 +179,37 @@ def apply_time_offset(result, dt): if len(result) == 0: return result return result['t'][:, :] + dt + +def calc_tot_e_dep(df): + """ + Calculate the total energy deposit in the sensitive volume + :param df: dataframe + :return: total energy deposit in each g4 event + """ + #merge all energy deposits in single G4 events + g4id = df['g4id'].values + u, s = np.unique(g4id, return_index=True) + #split with event_number + edeps= np.split(df['e_dep'].values,s[1:]) + #calculate tot_edep + tot_edep = np.asarray(list(map(lambda x:np.sum(x),edeps))) #sum up the e_deps + num_edep = np.asarray(list(map(lambda x:len(x),edeps))) #get number of e_deps + #max_edep = list(map(lambda x:np.max(x),edeps)) #get max e_deps + return np.repeat(tot_edep,num_edep) + + +def apply_energy_selection(instr,e_range): + """ + Apply the energy selection with total energy deposit in the sensitive volume + :param instr: dataframe + :param e_range: (minimum E, maximum E) + :return: dataframe after the energy selection + """ + minE,maxE = e_range[0],e_range[1] + + #merge all energy deposits in a single G4 event + instr['tot_e'] = calc_tot_e_dep(instr)/2. #devide by 2 (S1, S2) + + return instr[(instr['tot_e']>=minE) & (instr['tot_e']<=maxE) ] + + diff --git a/epix/run_epix.py b/epix/run_epix.py index 90e6b44..6d2bbd6 100644 --- a/epix/run_epix.py +++ b/epix/run_epix.py @@ -215,39 +215,6 @@ def main(args, return_df=False, return_wfsim_instructions=False, strax=False): if return_wfsim_instructions: return instructions - -def calc_tot_e_dep(df): - print("calculating tot edep...") - #merge all energy deposits in single G4 events\n", - g4id = df['g4id'].values - u, s = np.unique(g4id, return_index=True) - print("split with event_number...") - #types= np.split(df['type'].values,s[1:]) - edeps= np.split(df['e_dep'].values,s[1:]) - print("calculate tot_edep...") - tot_edep = np.asarray(list(map(lambda x:np.sum(x),edeps))) #sum up the e_deps - #max_edep = list(map(lambda x:np.max(x),edeps)) #get max e_deps - num_edep = np.asarray(list(map(lambda x:len(x),edeps)) #get number of e_deps - #return tot_edep, max_edep, num_edep - #return np.repeat(tot_edep,np.append((s[1:]-s[:-1]),len(df)-s[-1])) - return np.repeat(tot_edep,num_edep) - - -def apply_energy_selection(instr,e_range): - minE,maxE = e_range[0],e_range[1] - #merge all energy deposits in single G4 events - instr['tot_e'] = calc_tot_e_dep(instr)/2. #devide by 2 (S1, S2) - #g4ids=instr['g4id'] - #eds=instr['ed'] - #tot_es=np.zeros_like(eds) - #uids=np.unique(g4ids) - #for i in uids: - # indices=np.where(g4ids==i) - # tot_e = np.sum(eds[indices])/2. #divide by 2 because edep for both S1 and S2 are summed up... - # tot_es[indices]=tot_e - #instr['tot_e'] = tot_es - return instr[(instr['tot_e']>=minE) & (instr['tot_e']<=maxE) ] - def monitor_time(prev_time, task): t = time.time() print(f'It took {(t - prev_time):.4f} sec to {task}') From 151ab7f0e39dfa48673f8bbfd26c6f852230a421 Mon Sep 17 00:00:00 2001 From: Masatoshi Kobayashi Date: Mon, 8 Jan 2024 21:09:40 -0600 Subject: [PATCH 4/8] bug fix --- bin/run_epix | 3 ++- epix/common.py | 4 ++-- epix/run_epix.py | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/bin/run_epix b/bin/run_epix index eeaa4af..e8c302e 100755 --- a/bin/run_epix +++ b/bin/run_epix @@ -56,7 +56,7 @@ def pars_args(): parser.add_argument('--MaxEnergy', dest='max_energy', type=float, action='store', default=float('inf'), #keV help='Maximam energy to be stored [keV].') - parser.add_argument('--MinEnergy', dest='min_nergy', type=float, + parser.add_argument('--MinEnergy', dest='min_energy', type=float, action='store', default=-1, #keV help='Minimam energy to be stored [keV].') parser.add_argument('--JobNumber', dest='job_number', type=int, @@ -73,6 +73,7 @@ def pars_args(): help='If specifed additional information is printed to the console.') args = parser.parse_args(sys.argv[1:]) + print(args) return args if __name__ == "__main__": diff --git a/epix/common.py b/epix/common.py index 974ae1d..b041f8c 100755 --- a/epix/common.py +++ b/epix/common.py @@ -187,10 +187,10 @@ def calc_tot_e_dep(df): :return: total energy deposit in each g4 event """ #merge all energy deposits in single G4 events - g4id = df['g4id'].values + g4id = df['g4id'] u, s = np.unique(g4id, return_index=True) #split with event_number - edeps= np.split(df['e_dep'].values,s[1:]) + edeps= np.split(df['e_dep'],s[1:]) #calculate tot_edep tot_edep = np.asarray(list(map(lambda x:np.sum(x),edeps))) #sum up the e_deps num_edep = np.asarray(list(map(lambda x:len(x),edeps))) #get number of e_deps diff --git a/epix/run_epix.py b/epix/run_epix.py index 6d2bbd6..27ff9e9 100644 --- a/epix/run_epix.py +++ b/epix/run_epix.py @@ -6,7 +6,7 @@ import warnings import epix -from .common import ak_num, calc_dt, apply_time_offset +from .common import ak_num, calc_dt, apply_time_offset,apply_energy_selection def main(args, return_df=False, return_wfsim_instructions=False, strax=False): From 7e4c7c679678e9852d3ed1ef912484adc0d63de9 Mon Sep 17 00:00:00 2001 From: Masatoshi Kobayashi Date: Wed, 24 Jan 2024 01:23:12 -0600 Subject: [PATCH 5/8] Add escape for empty instructions after the energy selection --- epix/common.py | 7 ++++++- epix/run_epix.py | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/epix/common.py b/epix/common.py index b041f8c..a351b9d 100755 --- a/epix/common.py +++ b/epix/common.py @@ -210,6 +210,11 @@ def apply_energy_selection(instr,e_range): #merge all energy deposits in a single G4 event instr['tot_e'] = calc_tot_e_dep(instr)/2. #devide by 2 (S1, S2) - return instr[(instr['tot_e']>=minE) & (instr['tot_e']<=maxE) ] + instr_aft_selection = instr[(instr['tot_e']>=minE) & (instr['tot_e']<=maxE) ] + + if len(inter_aft_selection) == 0: + return np.empty(0, dtype=wfsim.instruction_dtype) + + return instr_aft_selection diff --git a/epix/run_epix.py b/epix/run_epix.py index 27ff9e9..e197efc 100644 --- a/epix/run_epix.py +++ b/epix/run_epix.py @@ -192,6 +192,8 @@ def main(args, return_df=False, return_wfsim_instructions=False, strax=False): # Apply energy range selection for instructions if required if (args['min_energy'] > 0.0) or (args['max_energy'] < float('inf')): instructions = apply_energy_selection(instructions,[args['min_energy'],args['max_energy']]) + if args['debug'] & (len(instructions) == 0): + warnings.warn('No interactions left after the energy selection, return empty DataFrame.') if args['source_rate'] != 0: # Only sort by time again if source rates were applied, otherwise # things are already sorted within the events and should stay this way. From c91a2a2ebcd8d9c3c7202d3707f6b356918c3336 Mon Sep 17 00:00:00 2001 From: Masatoshi Kobayashi Date: Wed, 24 Jan 2024 06:44:22 -0600 Subject: [PATCH 6/8] add some lines so that to return empty instructions in case no events left after the energy selection --- epix/common.py | 4 ---- epix/io.py | 3 +++ epix/run_epix.py | 6 ++++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/epix/common.py b/epix/common.py index 39bb075..5695578 100755 --- a/epix/common.py +++ b/epix/common.py @@ -209,12 +209,8 @@ def apply_energy_selection(instr,e_range): #merge all energy deposits in a single G4 event instr['tot_e'] = calc_tot_e_dep(instr)/2. #devide by 2 (S1, S2) - instr_aft_selection = instr[(instr['tot_e']>=minE) & (instr['tot_e']<=maxE) ] - if len(inter_aft_selection) == 0: - return np.empty(0, dtype=wfsim.instruction_dtype) - return instr_aft_selection diff --git a/epix/io.py b/epix/io.py index 6aae857..20dd953 100755 --- a/epix/io.py +++ b/epix/io.py @@ -357,3 +357,6 @@ def awkward_to_wfsim_row_style(interactions): # Remove entries with no quanta res = res[res['amp'] > 0] return res + +def empty_wfsim_instructions(): + return np.empty(0, dtype=wfsim.instruction_dtype) diff --git a/epix/run_epix.py b/epix/run_epix.py index e197efc..c719e5a 100644 --- a/epix/run_epix.py +++ b/epix/run_epix.py @@ -192,8 +192,10 @@ def main(args, return_df=False, return_wfsim_instructions=False, strax=False): # Apply energy range selection for instructions if required if (args['min_energy'] > 0.0) or (args['max_energy'] < float('inf')): instructions = apply_energy_selection(instructions,[args['min_energy'],args['max_energy']]) - if args['debug'] & (len(instructions) == 0): - warnings.warn('No interactions left after the energy selection, return empty DataFrame.') + if len(instructions) == 0: + instructions = epix.empty_wfsim_instructions() + if args['debug']: + warnings.warn('No interactions left after the energy selection, return empty DataFrame.') if args['source_rate'] != 0: # Only sort by time again if source rates were applied, otherwise # things are already sorted within the events and should stay this way. From ead671be10eb7a16a0cc79cfa1d034a9997ab897 Mon Sep 17 00:00:00 2001 From: Pavel Kavrigin <68543201+PavelKavrigin@users.noreply.github.com> Date: Sun, 4 Feb 2024 14:08:36 +0200 Subject: [PATCH 7/8] Merge master with cut_by_eventid fix (#81) * Fix cut_by_eventid again * Fix cut_by_eventid again (#80) Co-authored-by: Pavel Kavrigin --------- Co-authored-by: Pavel Kavrigin --- epix/io.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/epix/io.py b/epix/io.py index 20dd953..eaaca89 100755 --- a/epix/io.py +++ b/epix/io.py @@ -185,20 +185,17 @@ def _load_root_file(self): # If user specified entry start/stop we have to update number of # events for source rate computation: - if self.kwargs['entry_start'] is not None: + if self.kwargs['entry_start'] is not None and self.cut_by_eventid: start = self.kwargs['entry_start'] else: start = 0 - if self.kwargs['entry_stop'] is not None: + if self.kwargs['entry_stop'] is not None and self.cut_by_eventid: stop = self.kwargs['entry_stop'] else: stop = n_simulated_events - if self.cut_by_eventid: - # Start/stop refers to eventid so drop start drop from kwargs - # dict if specified, otherwise we cut again on rows. - self.kwargs.pop('entry_start', None) - self.kwargs.pop('entry_stop', None) + self.kwargs.pop('entry_start', None) + self.kwargs.pop('entry_stop', None) # Conversions and parameters to be computed: alias = {'x': 'xp/10', # converting "geant4" mm to "straxen" cm From efa5a09cc5eee6190531f3ddf7a37a0d647f8509 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Diego=20Ram=C3=ADrez=20Garc=C3=ADa?= Date: Wed, 7 Feb 2024 11:24:42 +0100 Subject: [PATCH 8/8] Polish description in README --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4d0b7e6..e857dab 100644 --- a/README.md +++ b/README.md @@ -39,8 +39,8 @@ The other keyword arguments are: | `--MicroSeparationTime` | Clustering time (ns) | `10` | | `--TagClusterBy` | decide if you tag the cluster (particle type, energy depositing process) according to first interaction in it (`time`) or most energetic (`energy`) | `energy` | | `--MaxDelay` | Time after which we cut the rest of the event (ns) | `1e7` | -| `--MaxEnergy` | Upper range for energy selection (keV) | `inf` | -| `--MinEnergy` | Lower range for energy selection (keV) | `-1` | +| `--MaxEnergy` | Upper range for energy selection _in sensitive volume_ (keV) | `inf` | +| `--MinEnergy` | Lower range for energy selection _in sensitive volume_ (keV) | `-1` | | `--YieldModel` | Model for yield/quanta generation (nest / bbf / beta) | `nest` | | `--ClusterMethod` | Microclustering method (dbscan / betadecay / brem) | `dbscan` | | `--SourceRate` | Event rate for event separation
- `0` for no time shift (G4 time remains)
- `-1` for clean time shift between events
- `>0` (Hz) for random spacing | `0` |