From 06b0131b9c91521e23bf09c669994006ddca4637 Mon Sep 17 00:00:00 2001 From: ppdebreuck Date: Tue, 25 May 2021 11:22:07 +0200 Subject: [PATCH 1/6] initial postprocess implementation --- modnet/models/vanilla.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/modnet/models/vanilla.py b/modnet/models/vanilla.py index 4855fdcf..39c95efe 100644 --- a/modnet/models/vanilla.py +++ b/modnet/models/vanilla.py @@ -302,6 +302,10 @@ def fit( else: validation_data = None + # set up bounds for postprocessing + self.min_y = training_data.df_targets.values.min(axis=0) + self.max_y = training_data.df_targets.values.max(axis=0) + # Optionally set up print callback if verbose: if val_fraction > 0 or validation_data: @@ -584,9 +588,19 @@ class OR only return the most probable class. x = self._scaler.transform(x) x = np.nan_to_num(x, nan=-1) - p = np.array(self.model.predict(x)) + p = np.array(self.model(x)) + if len(p.shape) == 2: p = np.array([p]) + + # post-process based on training data + yrange = self.max_y-self.min_y + upper_bound = self.max_y + 0.25 * yrange + lower_bound = self.min_y - 0.25 * yrange + for i,vals in enumerate(p): + out_of_range_idxs = np.where((vals < lower_bound[i]) | (vals > upper_bound[i])) + vals[out_of_range_idxs] = np.random.uniform(0, 1, size=len(out_of_range_idxs[0])) * (self.max_y[i] - self.min_y[i]) + self.min_y[i] + p_dic = {} for i, name in enumerate(self.targets_flatten): if self.num_classes[name] >= 2: From 11eb125a7e4dc99e9ce591690989f9c5f5873e27 Mon Sep 17 00:00:00 2001 From: ppdebreuck Date: Wed, 2 Jun 2021 15:35:30 +0200 Subject: [PATCH 2/6] classif bug fix --- modnet/matbench/benchmark.py | 5 +++-- modnet/models/vanilla.py | 25 +++++++++++++------------ 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/modnet/matbench/benchmark.py b/modnet/matbench/benchmark.py index a567a5e4..63ce56c2 100644 --- a/modnet/matbench/benchmark.py +++ b/modnet/matbench/benchmark.py @@ -167,8 +167,6 @@ def train_fold( fold_ind, (train_data, test_data) = fold results = {} - if classification: - fit_settings["num_classes"] = {t: 2 for t in target_weights} multi_target = bool(len(target) - 1) @@ -185,6 +183,9 @@ def train_fold( model_settings.update(model_kwargs) + if classification: + model_settings["num_classes"] = {t: 2 for t in target_weights} + model = model_type(target, target_weights, **model_settings) if hp_optimization: diff --git a/modnet/models/vanilla.py b/modnet/models/vanilla.py index 39c95efe..18eda6d5 100644 --- a/modnet/models/vanilla.py +++ b/modnet/models/vanilla.py @@ -286,18 +286,19 @@ def fit( ].values val_x = self._scaler.transform(val_x) val_x = np.nan_to_num(val_x, nan=-1) - try: - val_y = list( - val_data.get_target_df()[self.targets_flatten] - .values.astype(np.float, copy=False) - .transpose() - ) - except Exception: - val_y = list( - val_data.get_target_df() - .values.astype(np.float, copy=False) - .transpose() - ) + val_y = [] + for targ in self.targets_flatten: + if self.num_classes[targ] >= 2: # Classification + y_inner = tf.keras.utils.to_categorical( + val_data.df_targets[targ].values, + num_classes=self.num_classes[targ], + ) + loss = "categorical_crossentropy" + else: + y_inner = val_data.df_targets[targ].values.astype( + np.float, copy=False + ) + val_y.append(y_inner) validation_data = (val_x, val_y) else: validation_data = None From e94bb095720bf1c1c15e5bb6fb6cd6e53c34ed1c Mon Sep 17 00:00:00 2001 From: ppdebreuck Date: Wed, 2 Jun 2021 15:35:52 +0200 Subject: [PATCH 3/6] refit update --- modnet/models/ensemble.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/modnet/models/ensemble.py b/modnet/models/ensemble.py index eefb766f..8ca381ae 100644 --- a/modnet/models/ensemble.py +++ b/modnet/models/ensemble.py @@ -365,21 +365,20 @@ def fit_preset( ) if refit: - LOG.info("Refitting with all data and parameters: {}".format(best_preset)) + LOG.info("Refitting with all data and parameters: {} models, {}".format(100,best_preset)) # Building final model n_feat = min(len(data.get_optimal_descriptors()), best_preset["n_feat"]) - self.model = EnsembleMODNetModel( + self.__init__( self.targets, self.weights, - n_models=self.n_models, + n_models=100, num_neurons=best_preset["num_neurons"], n_feat=n_feat, act=best_preset["act"], out_act=self.out_act, num_classes=self.num_classes, - ).model - self.n_feat = n_feat + ) self.fit( data, val_fraction=0, From c18ec50a0221c6f761d003667013443fd0bf4570 Mon Sep 17 00:00:00 2001 From: ppdebreuck Date: Mon, 14 Jun 2021 16:52:52 +0200 Subject: [PATCH 4/6] pre-commit run --- modnet/models/ensemble.py | 6 +++++- modnet/models/vanilla.py | 14 ++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/modnet/models/ensemble.py b/modnet/models/ensemble.py index 8ca381ae..52fde07d 100644 --- a/modnet/models/ensemble.py +++ b/modnet/models/ensemble.py @@ -365,7 +365,11 @@ def fit_preset( ) if refit: - LOG.info("Refitting with all data and parameters: {} models, {}".format(100,best_preset)) + LOG.info( + "Refitting with all data and parameters: {} models, {}".format( + 100, best_preset + ) + ) # Building final model n_feat = min(len(data.get_optimal_descriptors()), best_preset["n_feat"]) diff --git a/modnet/models/vanilla.py b/modnet/models/vanilla.py index 18eda6d5..411ac914 100644 --- a/modnet/models/vanilla.py +++ b/modnet/models/vanilla.py @@ -595,12 +595,18 @@ class OR only return the most probable class. p = np.array([p]) # post-process based on training data - yrange = self.max_y-self.min_y + yrange = self.max_y - self.min_y upper_bound = self.max_y + 0.25 * yrange lower_bound = self.min_y - 0.25 * yrange - for i,vals in enumerate(p): - out_of_range_idxs = np.where((vals < lower_bound[i]) | (vals > upper_bound[i])) - vals[out_of_range_idxs] = np.random.uniform(0, 1, size=len(out_of_range_idxs[0])) * (self.max_y[i] - self.min_y[i]) + self.min_y[i] + for i, vals in enumerate(p): + out_of_range_idxs = np.where( + (vals < lower_bound[i]) | (vals > upper_bound[i]) + ) + vals[out_of_range_idxs] = ( + np.random.uniform(0, 1, size=len(out_of_range_idxs[0])) + * (self.max_y[i] - self.min_y[i]) + + self.min_y[i] + ) p_dic = {} for i, name in enumerate(self.targets_flatten): From bfac76084797cdff88e3542c6834f285c8bf2dc7 Mon Sep 17 00:00:00 2001 From: ppdebreuck Date: Mon, 14 Jun 2021 16:59:07 +0200 Subject: [PATCH 5/6] v0.1.10 release --- modnet/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modnet/__init__.py b/modnet/__init__.py index c11f861a..569b1212 100644 --- a/modnet/__init__.py +++ b/modnet/__init__.py @@ -1 +1 @@ -__version__ = "0.1.9" +__version__ = "0.1.10" From 823325057e60d39f2f26a3042ed573c9441324cb Mon Sep 17 00:00:00 2001 From: ppdebreuck Date: Mon, 14 Jun 2021 18:05:10 +0200 Subject: [PATCH 6/6] using model.predict instead of tensor --- modnet/models/vanilla.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modnet/models/vanilla.py b/modnet/models/vanilla.py index 411ac914..4454b3f6 100644 --- a/modnet/models/vanilla.py +++ b/modnet/models/vanilla.py @@ -589,7 +589,7 @@ class OR only return the most probable class. x = self._scaler.transform(x) x = np.nan_to_num(x, nan=-1) - p = np.array(self.model(x)) + p = np.array(self.model.predict(x)) if len(p.shape) == 2: p = np.array([p])