Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfixes and improvements to protopipe-MODELS #122

Merged
merged 3 commits into from
Mar 30, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions protopipe/mva/train_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def split_data(
data_train_sig,
data_test_sig,
) = split_train_test(
ds=data_sig,
survived_images=data_sig,
train_fraction=train_fraction,
feature_name_list=self.feature_name_list,
target_name=self.target_name,
Expand All @@ -88,13 +88,13 @@ def split_data(
data_train_bkg,
data_test_bkg,
) = split_train_test(
ds=data_bkg,
survived_images=data_bkg,
train_fraction=train_fraction,
feature_name_list=self.feature_name_list,
target_name=self.target_name,
)

max_events = -1
max_events = None

if force_same_nsig_nbkg is True:
if len(X_train_bkg) <= len(X_train_sig):
Expand Down
19 changes: 13 additions & 6 deletions protopipe/scripts/build_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def main():
parser.add_argument(
"--max_events",
type=int,
default=-1,
default=None,
help="maximum number of events for training",
)
mode_group = parser.add_mutually_exclusive_group()
Expand Down Expand Up @@ -157,7 +157,7 @@ def main():
if args.infile_background is None:
data_bkg_file = cfg["General"]["data_bkg_file"].format(args.mode)
else:
data_sig_file = args.infile_background
data_bkg_file = args.infile_background

filename_sig = path.join(data_dir, data_sig_file)
filename_bkg = path.join(data_dir, data_bkg_file)
Expand All @@ -167,7 +167,9 @@ def main():
cam_ids = cfg["General"]["cam_id_list"]
elif args.cameras_from_file:
print("TAKING CAMERAS FROM TRAINING FILE")
cam_ids = get_camera_names(filename)
# in the same analysis all particle types are analyzed in the
# same way so we can just use gammas
cam_ids = get_camera_names(filename_sig)
else:
print("TAKING CAMERAS FROM CLI")
cam_ids = args.cam_id_lists.split()
Expand Down Expand Up @@ -212,7 +214,9 @@ def main():
if model_type in "regressor":
# Load data
data = pd.read_hdf(filename, table_name[idx], mode="r")
data = prepare_data(ds=data, cuts=cuts)[0 : args.max_events]
data = prepare_data(ds=data, cuts=cuts)[0:args.max_events]

print(f"Going to split {len(data)} SIGNAL images...")

# Init model factory
factory = TrainModel(
Expand All @@ -232,8 +236,11 @@ def main():
data_sig = prepare_data(ds=data_sig, label=1, cuts=sig_cuts)
data_bkg = prepare_data(ds=data_bkg, label=0, cuts=bkg_cuts)

data_sig = data_sig[0 : args.max_events]
data_bkg = data_bkg[0 : args.max_events]
if args.max_events:
data_sig = data_sig[0:(args.max_events - 1)]
data_bkg = data_bkg[0:(args.max_events - 1)]

print(f"Going to split {len(data_sig)} SIGNAL images and {len(data_bkg)} BACKGROUND images")

# Init model factory
factory = TrainModel(
Expand Down