diff --git a/modnet/__init__.py b/modnet/__init__.py index c11f861a..569b1212 100644 --- a/modnet/__init__.py +++ b/modnet/__init__.py @@ -1 +1 @@ -__version__ = "0.1.9" +__version__ = "0.1.10" diff --git a/modnet/matbench/benchmark.py b/modnet/matbench/benchmark.py index a567a5e4..63ce56c2 100644 --- a/modnet/matbench/benchmark.py +++ b/modnet/matbench/benchmark.py @@ -167,8 +167,6 @@ def train_fold( fold_ind, (train_data, test_data) = fold results = {} - if classification: - fit_settings["num_classes"] = {t: 2 for t in target_weights} multi_target = bool(len(target) - 1) @@ -185,6 +183,9 @@ def train_fold( model_settings.update(model_kwargs) + if classification: + model_settings["num_classes"] = {t: 2 for t in target_weights} + model = model_type(target, target_weights, **model_settings) if hp_optimization: diff --git a/modnet/models/ensemble.py b/modnet/models/ensemble.py index eefb766f..52fde07d 100644 --- a/modnet/models/ensemble.py +++ b/modnet/models/ensemble.py @@ -365,21 +365,24 @@ def fit_preset( ) if refit: - LOG.info("Refitting with all data and parameters: {}".format(best_preset)) + LOG.info( + "Refitting with all data and parameters: {} models, {}".format( + 100, best_preset + ) + ) # Building final model n_feat = min(len(data.get_optimal_descriptors()), best_preset["n_feat"]) - self.model = EnsembleMODNetModel( + self.__init__( self.targets, self.weights, - n_models=self.n_models, + n_models=100, num_neurons=best_preset["num_neurons"], n_feat=n_feat, act=best_preset["act"], out_act=self.out_act, num_classes=self.num_classes, - ).model - self.n_feat = n_feat + ) self.fit( data, val_fraction=0, diff --git a/modnet/models/vanilla.py b/modnet/models/vanilla.py index 4855fdcf..4454b3f6 100644 --- a/modnet/models/vanilla.py +++ b/modnet/models/vanilla.py @@ -286,22 +286,27 @@ def fit( ].values val_x = self._scaler.transform(val_x) val_x = np.nan_to_num(val_x, nan=-1) - try: - val_y = list( - val_data.get_target_df()[self.targets_flatten] - .values.astype(np.float, copy=False) - .transpose() - ) - except Exception: - val_y = list( - val_data.get_target_df() - .values.astype(np.float, copy=False) - .transpose() - ) + val_y = [] + for targ in self.targets_flatten: + if self.num_classes[targ] >= 2: # Classification + y_inner = tf.keras.utils.to_categorical( + val_data.df_targets[targ].values, + num_classes=self.num_classes[targ], + ) + loss = "categorical_crossentropy" + else: + y_inner = val_data.df_targets[targ].values.astype( + np.float, copy=False + ) + val_y.append(y_inner) validation_data = (val_x, val_y) else: validation_data = None + # set up bounds for postprocessing + self.min_y = training_data.df_targets.values.min(axis=0) + self.max_y = training_data.df_targets.values.max(axis=0) + # Optionally set up print callback if verbose: if val_fraction > 0 or validation_data: @@ -585,8 +590,24 @@ class OR only return the most probable class. x = np.nan_to_num(x, nan=-1) p = np.array(self.model.predict(x)) + if len(p.shape) == 2: p = np.array([p]) + + # post-process based on training data + yrange = self.max_y - self.min_y + upper_bound = self.max_y + 0.25 * yrange + lower_bound = self.min_y - 0.25 * yrange + for i, vals in enumerate(p): + out_of_range_idxs = np.where( + (vals < lower_bound[i]) | (vals > upper_bound[i]) + ) + vals[out_of_range_idxs] = ( + np.random.uniform(0, 1, size=len(out_of_range_idxs[0])) + * (self.max_y[i] - self.min_y[i]) + + self.min_y[i] + ) + p_dic = {} for i, name in enumerate(self.targets_flatten): if self.num_classes[name] >= 2: