Skip to content

Commit

Permalink
Merge pull request #156 from cta-observatory/fix_issue_148
Browse files Browse the repository at this point in the history
Fix CTAMARS-like energy estimation
  • Loading branch information
HealthyPear authored Aug 3, 2021
2 parents e1ce3e6 + 6f89fb3 commit 02f4b91
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 11 deletions.
4 changes: 2 additions & 2 deletions protopipe/aux/example_config_files/analysis.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# WARNING: the settings recorded here are unstable and used here only to give an example
# NOTE: only Prod3b simulations are currently supported.
General:
config_name: 'test'
config_name: ''
site: 'north' # 'north' or 'south'
# array can be either
# - 'subarray_LSTs', 'subarray_MSTs', 'subarray_SSTs' or 'full_array'
Expand Down Expand Up @@ -111,7 +111,7 @@ Reconstruction:
EnergyRegressor:
# Name of the regression method (e.g. AdaBoostRegressor, etc.)
method_name: 'RandomForestRegressor'
estimation_weight: 'STD'
estimation_weight: 'CTAMARS' # CTAMARS == 1/RMS^2 (RMS from the RF trees)

# Parameters for g/h separation
GammaHadronClassifier:
Expand Down
15 changes: 11 additions & 4 deletions protopipe/scripts/data_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def main():
try:
estimation_weight = cfg["EnergyRegressor"]["estimation_weight"]
except KeyError:
estimation_weight = "STD"
estimation_weight = "CTAMARS"

# wrapper for the scikit-learn regressor
if args.estimate_energy is True:
Expand Down Expand Up @@ -322,6 +322,7 @@ def main():
# Estimate energy only if the shower was reconstructed
if (args.estimate_energy is True) and is_valid:
weight_tel = np.zeros(len(hillas_dict.keys()))
weight_statistic_tel = np.zeros(len(hillas_dict.keys()))
energy_tel = np.zeros(len(hillas_dict.keys()))

for idx, tel_id in enumerate(hillas_dict.keys()):
Expand Down Expand Up @@ -395,19 +396,25 @@ def main():

############################################################

if estimation_weight == "STD":
if estimation_weight == "CTAMARS":
# Get an array of trees
predictions_trees = np.array([tree.predict(features_values) for tree in model.estimators_])
energy_tel[idx] = np.mean(predictions_trees, axis=0)
weight_tel[idx] = np.std(predictions_trees, axis=0)
weight_statistic_tel[idx] = np.std(predictions_trees, axis=0)
else:
data.eval(f'estimation_weight = {estimation_weight}', inplace=True)
energy_tel[idx] = model.predict(features_values)
weight_tel[idx] = data["estimation_weight"]

if log_10_target:
energy_tel[idx] = 10**energy_tel[idx]
weight_tel[idx] = 10**weight_tel[idx]
weight_statistic_tel[idx] = 10**weight_statistic_tel[idx]

if estimation_weight == "CTAMARS":
# in CTAMARS the average is done after converting
# energy and weight to linear energy scale
weight_tel[idx] = 1 / (weight_statistic_tel[idx]**2)

reco_energy_tel[tel_id] = energy_tel[idx]

Expand Down
2 changes: 1 addition & 1 deletion protopipe/scripts/tests/test_config_analysis_north.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ Reconstruction:
EnergyRegressor:
# Name of the regression method (e.g. AdaBoostRegressor, etc.)
method_name: 'RandomForestRegressor'
estimation_weight: 'STD'
estimation_weight: 'CTAMARS'

# Parameters for g/h separation
GammaHadronClassifier:
Expand Down
2 changes: 1 addition & 1 deletion protopipe/scripts/tests/test_config_analysis_south.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ Reconstruction:
EnergyRegressor:
# Name of the regression method (e.g. AdaBoostRegressor, etc.)
method_name: 'RandomForestRegressor'
estimation_weight: 'STD'
estimation_weight: 'CTAMARS'

# Parameters for g/h separation
GammaHadronClassifier:
Expand Down
13 changes: 10 additions & 3 deletions protopipe/scripts/write_dl2.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def main():
try:
estimation_weight_energy = cfg["EnergyRegressor"]["estimation_weight"]
except KeyError:
estimation_weight_energy = "STD"
estimation_weight_energy = "CTAMARS"
classifier_method = cfg["GammaHadronClassifier"]["method_name"]
estimation_weight_classification = cfg["GammaHadronClassifier"]["estimation_weight"]
use_proba_for_classifier = cfg["GammaHadronClassifier"]["use_proba"]
Expand Down Expand Up @@ -384,6 +384,7 @@ class RecoEvent(tb.IsDescription):
energy_tel = np.zeros(len(hillas_dict.keys()))
energy_tel_classifier = {}
weight_tel = np.zeros(len(hillas_dict.keys()))
weight_statistic_tel = np.zeros(len(hillas_dict.keys()))

for idx, tel_id in enumerate(hillas_dict.keys()):

Expand Down Expand Up @@ -439,11 +440,11 @@ class RecoEvent(tb.IsDescription):

############################################################

if (good_for_reco[tel_id] == 1) and (estimation_weight_energy == "STD"):
if (good_for_reco[tel_id] == 1) and (estimation_weight_energy == "CTAMARS"):
# Get an array of trees
predictions_trees = np.array([tree.predict(features_values) for tree in model.estimators_])
energy_tel[idx] = np.mean(predictions_trees, axis=0)
weight_tel[idx] = np.std(predictions_trees, axis=0)
weight_statistic_tel[idx] = np.std(predictions_trees, axis=0)
elif (good_for_reco[tel_id] == 1):
data.eval(f'estimation_weight_energy = {estimation_weight_energy}', inplace=True)
energy_tel[idx] = model.predict(features_values)
Expand All @@ -454,6 +455,12 @@ class RecoEvent(tb.IsDescription):
if log_10_target:
energy_tel[idx] = 10**energy_tel[idx]
weight_tel[idx] = 10**weight_tel[idx]
weight_statistic_tel[idx] = 10**weight_statistic_tel[idx]

if estimation_weight_energy == "CTAMARS":
# in CTAMARS the average is done after converting
# energy and weight to linear energy scale
weight_tel[idx] = 1 / (weight_statistic_tel[idx]**2)

# Record the values regardless of the validity
# We don't use this now, but it should be recorded
Expand Down

0 comments on commit 02f4b91

Please sign in to comment.