From a7a94e89643d113fe359a8676bfbb3145c9514ab Mon Sep 17 00:00:00 2001 From: ArlindKadra Date: Fri, 4 Jun 2021 21:08:09 +0200 Subject: [PATCH 01/26] Update implementation --- .../setup/network_backbone/ResNetBackbone.py | 20 +++++-------------- autoPyTorch/utils/implementations.py | 2 +- 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py index fe25972c3..139179a41 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py @@ -261,6 +261,7 @@ def __init__( # if the shortcut needs a layer we apply batchnorm and activation to the shortcut # as well (start_norm) if in_features != out_features: + self.shortcut = nn.Linear(in_features, out_features) initial_normalization = list() if self.config['use_batch_norm']: @@ -286,18 +287,11 @@ def __init__( def _build_block(self, in_features: int, out_features: int) -> nn.Module: layers = list() + if self.start_norm is None: if self.config['use_batch_norm']: layers.append(nn.BatchNorm1d(in_features)) layers.append(self.activation()) - else: - # if start norm is not None and skip connection is None - # we will never apply the start_norm for the first layer in the block, - # which is why we should account for this case. - if not self.config['use_skip_connection']: - if self.config['use_batch_norm']: - layers.append(nn.BatchNorm1d(in_features)) - layers.append(self.activation()) layers.append(nn.Linear(in_features, out_features)) @@ -337,13 +331,8 @@ def forward(self, x: torch.FloatTensor) -> torch.FloatTensor: x2 = self.shake_shake_layers(x) alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda) x = shake_shake(x1, x2, alpha, beta) - else: + elif self.config["multi_branch_choice"] == 'shake-drop': x = self.layers(x) - else: - x = self.layers(x) - - if self.config["use_skip_connection"]: - if self.config["multi_branch_choice"] == 'shake-drop': alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda) bl = shake_drop_get_bl( self.block_index, @@ -353,8 +342,9 @@ def forward(self, x: torch.FloatTensor) -> torch.FloatTensor: x.is_cuda, ) x = shake_drop(x, alpha, beta, bl) + else: + x = self.layers(x) - if self.config["use_skip_connection"]: x = x + residual return x diff --git a/autoPyTorch/utils/implementations.py b/autoPyTorch/utils/implementations.py index 2130cfd6b..3f07ffea9 100644 --- a/autoPyTorch/utils/implementations.py +++ b/autoPyTorch/utils/implementations.py @@ -35,7 +35,7 @@ def __call__(self, y: Union[np.ndarray, torch.Tensor]) -> np.ndarray: weights = (np.ones(y.shape[1]) * weight_per_class) / np.maximum(counts, 1) else: classes, counts = np.unique(y, axis=0, return_counts=True) - classes, counts = classes[::-1], counts[::-1] + #classes, counts = classes[::-1], counts[::-1] weight_per_class = total_weight / classes.shape[0] weights = (np.ones(classes.shape[0]) * weight_per_class) / counts From 3b7f5598b32ce4614d18605f18e00b05fa89b65a Mon Sep 17 00:00:00 2001 From: ArlindKadra Date: Mon, 7 Jun 2021 12:29:14 +0200 Subject: [PATCH 02/26] Coding style fixes --- .../components/setup/network_backbone/ResNetBackbone.py | 1 - autoPyTorch/utils/implementations.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py index 139179a41..1f7054e15 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py @@ -287,7 +287,6 @@ def __init__( def _build_block(self, in_features: int, out_features: int) -> nn.Module: layers = list() - if self.start_norm is None: if self.config['use_batch_norm']: layers.append(nn.BatchNorm1d(in_features)) diff --git a/autoPyTorch/utils/implementations.py b/autoPyTorch/utils/implementations.py index 3f07ffea9..f1ea1e651 100644 --- a/autoPyTorch/utils/implementations.py +++ b/autoPyTorch/utils/implementations.py @@ -35,7 +35,7 @@ def __call__(self, y: Union[np.ndarray, torch.Tensor]) -> np.ndarray: weights = (np.ones(y.shape[1]) * weight_per_class) / np.maximum(counts, 1) else: classes, counts = np.unique(y, axis=0, return_counts=True) - #classes, counts = classes[::-1], counts[::-1] + # classes, counts = classes[::-1], counts[::-1] weight_per_class = total_weight / classes.shape[0] weights = (np.ones(classes.shape[0]) * weight_per_class) / counts From 11e7021b0c8a9501ff7740f51a664f42fb25defb Mon Sep 17 00:00:00 2001 From: ArlindKadra Date: Mon, 7 Jun 2021 17:09:24 +0200 Subject: [PATCH 03/26] Implementation update --- .../setup/network_backbone/ResNetBackbone.py | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py index 1f7054e15..f8be1c98d 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py @@ -1,3 +1,4 @@ +import copy from typing import Any, Callable, Dict, List, Optional, Tuple, Union import ConfigSpace as CS @@ -261,19 +262,19 @@ def __init__( # if the shortcut needs a layer we apply batchnorm and activation to the shortcut # as well (start_norm) if in_features != out_features: - - self.shortcut = nn.Linear(in_features, out_features) - initial_normalization = list() - if self.config['use_batch_norm']: + if self.config["use_skip_connection"]: + self.shortcut = nn.Linear(in_features, out_features) + initial_normalization = list() + if self.config['use_batch_norm']: + initial_normalization.append( + nn.BatchNorm1d(in_features) + ) initial_normalization.append( - nn.BatchNorm1d(in_features) + self.activation() + ) + self.start_norm = nn.Sequential( + *initial_normalization ) - initial_normalization.append( - self.activation() - ) - self.start_norm = nn.Sequential( - *initial_normalization - ) self.block_index = block_index self.num_blocks = blocks_per_group * self.config["num_groups"] @@ -320,8 +321,7 @@ def forward(self, x: torch.FloatTensor) -> torch.FloatTensor: # if in_features != out_features # -> result = W_shortcut(A(BN(x))) + W_2(~D(A(BN(W_1(A(BN(x)))))) x = self.start_norm(x) - if self.config["use_skip_connection"]: - residual = self.shortcut(x) + residual = self.shortcut(x) # TODO make the below code better if self.config["use_skip_connection"]: @@ -345,5 +345,7 @@ def forward(self, x: torch.FloatTensor) -> torch.FloatTensor: x = self.layers(x) x = x + residual + else: + x = self.layers(x) return x From 375c055effc5dda0a57ed6d9e75a0449c028675a Mon Sep 17 00:00:00 2001 From: ArlindKadra Date: Mon, 7 Jun 2021 17:38:04 +0200 Subject: [PATCH 04/26] Style fix --- .../pipeline/components/setup/network_backbone/ResNetBackbone.py | 1 - 1 file changed, 1 deletion(-) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py index f8be1c98d..069ca4679 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py @@ -1,4 +1,3 @@ -import copy from typing import Any, Callable, Dict, List, Optional, Tuple, Union import ConfigSpace as CS From 3413bc32758b4be47b4511a09b64aff4f3a8e005 Mon Sep 17 00:00:00 2001 From: ArlindKadra Date: Tue, 8 Jun 2021 16:53:09 +0200 Subject: [PATCH 05/26] Turn weighted loss into a constant again, implementation update --- autoPyTorch/datasets/base_dataset.py | 4 ++-- .../network_backbone/ShapedResNetBackbone.py | 1 + .../components/setup/network_head/no_head.py | 1 - .../training/data_loader/base_data_loader.py | 2 +- .../training/trainer/AdversarialTrainer.py | 14 +++++++++++--- .../training/trainer/RowCutOutTrainer.py | 7 ++++--- .../training/trainer/StandardTrainer.py | 4 ++-- .../components/training/trainer/base_trainer.py | 16 ++++++++++++---- .../training/trainer/base_trainer_choice.py | 1 + .../components/training/trainer/cutout_utils.py | 14 +++++++++++--- .../components/training/trainer/mixup_utils.py | 15 ++++++++++++--- 11 files changed, 57 insertions(+), 22 deletions(-) diff --git a/autoPyTorch/datasets/base_dataset.py b/autoPyTorch/datasets/base_dataset.py index 9f9b95369..8cb951977 100644 --- a/autoPyTorch/datasets/base_dataset.py +++ b/autoPyTorch/datasets/base_dataset.py @@ -322,7 +322,7 @@ def create_holdout_val_split( self.random_state, val_share, self._get_indices(), **kwargs) return train, val - def get_dataset_for_training(self, split_id: int, train: bool) -> Dataset: + def get_dataset_for_training(self, split_id: int, train: bool, subset: int = 0) -> Dataset: """ The above split methods employ the Subset to internally subsample the whole dataset. @@ -336,7 +336,7 @@ def get_dataset_for_training(self, split_id: int, train: bool) -> Dataset: Dataset: the reduced dataset to be used for testing """ # Subset creates a dataset. Splits is a (train_indices, test_indices) tuple - return TransformSubset(self, self.splits[split_id][0], train=train) + return TransformSubset(self, self.splits[split_id][subset], train=train) def replace_data(self, X_train: BaseDatasetInputType, X_test: Optional[BaseDatasetInputType]) -> 'BaseDataset': diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py index 8e03a29f0..e0867cdd3 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py @@ -72,6 +72,7 @@ def build_backbone(self, input_shape: Tuple[int, ...]) -> None: ) if self.config['use_batch_norm']: layers.append(torch.nn.BatchNorm1d(self.config["num_units_%i" % self.config['num_groups']])) + layers.append(_activations[self.config["activation"]]()) backbone = torch.nn.Sequential(*layers) self.backbone = backbone return backbone diff --git a/autoPyTorch/pipeline/components/setup/network_head/no_head.py b/autoPyTorch/pipeline/components/setup/network_head/no_head.py index 870f680fb..0e711f06c 100644 --- a/autoPyTorch/pipeline/components/setup/network_head/no_head.py +++ b/autoPyTorch/pipeline/components/setup/network_head/no_head.py @@ -23,7 +23,6 @@ def build_head(self, input_shape: Tuple[int, ...], output_shape: Tuple[int, ...] layers = [] in_features = np.prod(input_shape).item() out_features = np.prod(output_shape).item() - layers.append(_activations[self.config["activation"]]()) layers.append(nn.Linear(in_features=in_features, out_features=out_features)) return nn.Sequential(*layers) diff --git a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py index 4ea56627a..26725698f 100644 --- a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py +++ b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py @@ -120,7 +120,7 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> torch.utils.data.DataLoader: ) if X['val_indices'] is not None: - val_dataset = datamanager.get_dataset_for_training(split_id=X['split_id'], train=False) + val_dataset = datamanager.get_dataset_for_training(split_id=X['split_id'], train=False, subset=1) self.val_data_loader = torch.utils.data.DataLoader( val_dataset, batch_size=min(self.batch_size, len(val_dataset)), diff --git a/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py b/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py index ffbbf5e8b..c5a536dd0 100644 --- a/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py +++ b/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py @@ -24,7 +24,7 @@ class AdversarialTrainer(BaseTrainerComponent): def __init__( self, epsilon: float, - weighted_loss: bool = False, + weighted_loss: int = 0, random_state: Optional[np.random.RandomState] = None, use_stochastic_weight_averaging: bool = False, use_snapshot_ensemble: bool = False, @@ -159,8 +159,8 @@ def get_hyperparameter_search_space( dataset_properties: Optional[Dict] = None, weighted_loss: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="weighted_loss", - value_range=[True, False], - default_value=True), + value_range=[1], + default_value=1), la_steps: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="la_steps", value_range=(5, 10), @@ -226,9 +226,17 @@ def get_hyperparameter_search_space( parent_hyperparameter=parent_hyperparameter ) + """ # TODO, decouple the weighted loss from the trainer if dataset_properties is not None: if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS: add_hyperparameter(cs, weighted_loss, CategoricalHyperparameter) + """ + # TODO, decouple the weighted loss from the trainer. Uncomment the code above and + # remove the code below. Also update the method signature, so the weighted loss + # is not a constant. + if dataset_properties is not None: + if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS: + add_hyperparameter(cs, weighted_loss, Constant) return cs diff --git a/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py b/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py index 97f0caa18..c09603523 100644 --- a/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py +++ b/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py @@ -40,14 +40,14 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray, indices = self.random_state.choice(range(1, size), max(1, np.int32(size * self.patch_ratio)), replace=False) - if not isinstance(self.numerical_columns, typing.Iterable): + """if not isinstance(self.numerical_columns, typing.Iterable): raise ValueError("{} requires numerical columns information of {}" "to prepare data got {}.".format(self.__class__.__name__, typing.Iterable, self.numerical_columns)) numerical_indices = torch.tensor(self.numerical_columns) categorical_indices = torch.tensor([index for index in indices if index not in self.numerical_columns]) - + # We use an ordinal encoder on the categorical columns of tabular data # -1 is the conceptual equivalent to 0 in a image, that does not # have color as a feature and hence the network has to learn to deal @@ -55,7 +55,8 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray, # similar to the effect that setting 0 as a pixel value in an image. X[:, categorical_indices.long()] = self.CATEGORICAL_VALUE X[:, numerical_indices.long()] = self.NUMERICAL_VALUE - + """ + X[:, indices] = 0 lam = 1 y_a = y y_b = y diff --git a/autoPyTorch/pipeline/components/training/trainer/StandardTrainer.py b/autoPyTorch/pipeline/components/training/trainer/StandardTrainer.py index 02db03f1d..1f6bbd224 100644 --- a/autoPyTorch/pipeline/components/training/trainer/StandardTrainer.py +++ b/autoPyTorch/pipeline/components/training/trainer/StandardTrainer.py @@ -7,7 +7,7 @@ class StandardTrainer(BaseTrainerComponent): def __init__(self, - weighted_loss: bool = False, + weighted_loss: int = 0, use_stochastic_weight_averaging: bool = False, use_snapshot_ensemble: bool = False, se_lastk: int = 3, @@ -18,7 +18,7 @@ def __init__(self, This class handles the training of a network for a single given epoch. Args: - weighted_loss (bool): whether to use weighted loss + weighted_loss (int): whether to use weighted loss """ super().__init__(random_state=random_state, diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py index cb6f8ee3d..b77bb729a 100644 --- a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py +++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py @@ -175,7 +175,7 @@ class BaseTrainerComponent(autoPyTorchTrainingComponent): """ Base class for training Args: - weighted_loss (bool, default=False): In case for classification, whether to weight + weighted_loss (int, default=0): In case for classification, whether to weight the loss function according to the distribution of classes in the target use_stochastic_weight_averaging (bool, default=True): whether to use stochastic weight averaging. Stochastic weight averaging is a simple average of @@ -190,7 +190,7 @@ class BaseTrainerComponent(autoPyTorchTrainingComponent): random_state: **lookahead_config: """ - def __init__(self, weighted_loss: bool = False, + def __init__(self, weighted_loss: int = 0, use_stochastic_weight_averaging: bool = True, use_snapshot_ensemble: bool = True, se_lastk: int = 3, @@ -537,8 +537,8 @@ def get_hyperparameter_search_space( dataset_properties: Optional[Dict] = None, weighted_loss: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="weighted_loss", - value_range=[True, False], - default_value=True), + value_range=[1], + default_value=1), la_steps: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="la_steps", value_range=(5, 10), @@ -599,9 +599,17 @@ def get_hyperparameter_search_space( parent_hyperparameter=parent_hyperparameter ) + """ # TODO, decouple the weighted loss from the trainer if dataset_properties is not None: if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS: add_hyperparameter(cs, weighted_loss, CategoricalHyperparameter) + """ + # TODO, decouple the weighted loss from the trainer. Uncomment the code above and + # remove the code below. Also update the method signature, so the weighted loss + # is not a constant. + if dataset_properties is not None: + if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS: + add_hyperparameter(cs, weighted_loss, Constant) return cs diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py index 0075d69a7..502445c14 100755 --- a/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py +++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py @@ -402,6 +402,7 @@ def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> 'TrainerChoic torch.cuda.empty_cache() if self.choice.use_stochastic_weight_averaging and self.choice.swa_updated: + # update batch norm statistics swa_utils.update_bn(X['train_data_loader'], self.choice.swa_model.double()) # change model diff --git a/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py b/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py index 582014f9b..c7feb2214 100644 --- a/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py +++ b/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py @@ -20,7 +20,7 @@ class CutOut: def __init__(self, patch_ratio: float, cutout_prob: float, - weighted_loss: bool = False, + weighted_loss: int = 0, random_state: Optional[np.random.RandomState] = None, use_stochastic_weight_averaging: bool = False, use_snapshot_ensemble: bool = False, @@ -63,8 +63,8 @@ def get_hyperparameter_search_space( dataset_properties: Optional[Dict] = None, weighted_loss: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="weighted_loss", - value_range=[True, False], - default_value=True), + value_range=[1], + default_value=1), la_steps: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="la_steps", value_range=(5, 10), @@ -136,9 +136,17 @@ def get_hyperparameter_search_space( parent_hyperparameter=parent_hyperparameter ) + """ # TODO, decouple the weighted loss from the trainer if dataset_properties is not None: if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS: add_hyperparameter(cs, weighted_loss, CategoricalHyperparameter) + """ + # TODO, decouple the weighted loss from the trainer. Uncomment the code above and + # remove the code below. Also update the method signature, so the weighted loss + # is not a constant. + if dataset_properties is not None: + if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS: + add_hyperparameter(cs, weighted_loss, Constant) return cs diff --git a/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py b/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py index a0348a566..a2325b91c 100644 --- a/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py +++ b/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py @@ -19,7 +19,7 @@ class MixUp: def __init__(self, alpha: float, - weighted_loss: bool = False, + weighted_loss: int = 0, random_state: Optional[np.random.RandomState] = None, use_stochastic_weight_averaging: bool = False, use_snapshot_ensemble: bool = False, @@ -61,8 +61,8 @@ def get_hyperparameter_search_space( dataset_properties: Optional[Dict] = None, weighted_loss: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="weighted_loss", - value_range=[True, False], - default_value=True), + value_range=[1], + default_value=1), la_steps: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="la_steps", value_range=(5, 10), @@ -127,9 +127,18 @@ def get_hyperparameter_search_space( la_config_space, parent_hyperparameter=parent_hyperparameter ) + + """ # TODO, decouple the weighted loss from the trainer if dataset_properties is not None: if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS: add_hyperparameter(cs, weighted_loss, CategoricalHyperparameter) + """ + # TODO, decouple the weighted loss from the trainer. Uncomment the code above and + # remove the code below. Also update the method signature, so the weighted loss + # is not a constant. + if dataset_properties is not None: + if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS: + add_hyperparameter(cs, weighted_loss, Constant) return cs From d37d4a5187165c38c116fca7fa2b5ab3087884b2 Mon Sep 17 00:00:00 2001 From: Ravin Kohli <13005107+ravinkohli@users.noreply.github.com> Date: Wed, 21 Jul 2021 18:04:07 +0200 Subject: [PATCH 06/26] Cocktail branch inconsistencies (#275) * To nemo * Revert change in T_curr as results conclusively prove it should be 0 * Revert cutmix change after data from run * Final conclusion after results * FIX bug in shake alpha beta * Updated if is_training condition for shake drop * Remove temp fix in row cutmic --- .../components/setup/network/base_network.py | 15 ++++++++------- .../components/setup/network_backbone/utils.py | 6 +++--- .../training/data_loader/base_data_loader.py | 2 ++ autoPyTorch/pipeline/tabular_classification.py | 11 +++-------- 4 files changed, 16 insertions(+), 18 deletions(-) diff --git a/autoPyTorch/pipeline/components/setup/network/base_network.py b/autoPyTorch/pipeline/components/setup/network/base_network.py index c58f25283..3f246b6fa 100644 --- a/autoPyTorch/pipeline/components/setup/network/base_network.py +++ b/autoPyTorch/pipeline/components/setup/network/base_network.py @@ -131,13 +131,14 @@ def _predict(self, network: torch.nn.Module, loader: torch.utils.data.DataLoader # Batch prediction Y_batch_preds = list() - for i, (X_batch, Y_batch) in enumerate(loader): - # Predict on batch - X_batch = X_batch.float().to(self.device) - Y_batch_pred = network(X_batch) - if self.final_activation is not None: - Y_batch_pred = self.final_activation(Y_batch_pred) - Y_batch_preds.append(Y_batch_pred.detach().cpu()) + with torch.no_grad(): + for i, (X_batch, Y_batch) in enumerate(loader): + # Predict on batch + X_batch = X_batch.float().to(self.device) + Y_batch_pred = network(X_batch) + if self.final_activation is not None: + Y_batch_pred = self.final_activation(Y_batch_pred) + Y_batch_preds.append(Y_batch_pred.detach().cpu()) return torch.cat(Y_batch_preds, 0) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py index aa46876fa..ef19beac8 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py @@ -94,7 +94,7 @@ def backward(ctx: typing.Any, def shake_get_alpha_beta(is_training: bool, is_cuda: bool ) -> typing.Tuple[torch.tensor, torch.tensor]: - if is_training: + if not is_training: result = (torch.FloatTensor([0.5]), torch.FloatTensor([0.5])) return result if not is_cuda else (result[0].cuda(), result[1].cuda()) @@ -118,10 +118,10 @@ def shake_drop_get_bl( ) -> torch.tensor: pl = 1 - ((block_index + 1) / num_blocks) * (1 - min_prob_no_shake) - if not is_training: + if is_training: # Move to torch.randn(1) for reproducibility bl = torch.tensor(1.0) if torch.randn(1) <= pl else torch.tensor(0.0) - if is_training: + else: bl = torch.tensor(pl) if is_cuda: diff --git a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py index 26725698f..5b8e445ac 100644 --- a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py +++ b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py @@ -148,6 +148,8 @@ def get_loader(self, X: np.ndarray, y: Optional[np.ndarray] = None, batch_size: dataset = BaseDataset( train_tensors=(X, y), # This dataset is used for loading test data in a batched format + seed=self.random_state.get_state()[1][0], + shuffle=False, train_transforms=self.test_transform, val_transforms=self.test_transform, ) diff --git a/autoPyTorch/pipeline/tabular_classification.py b/autoPyTorch/pipeline/tabular_classification.py index bb4cb10ac..d19fc7215 100644 --- a/autoPyTorch/pipeline/tabular_classification.py +++ b/autoPyTorch/pipeline/tabular_classification.py @@ -91,12 +91,7 @@ def _predict_proba(self, X: np.ndarray) -> np.ndarray: loader = self.named_steps['data_loader'].get_loader(X=X) pred = self.named_steps['network'].predict(loader) if isinstance(self.dataset_properties['output_shape'], int): - proba = pred[:, :self.dataset_properties['output_shape']] - normalizer = proba.sum(axis=1)[:, np.newaxis] - normalizer[normalizer == 0.0] = 1.0 - proba /= normalizer - - return proba + return pred else: all_proba = [] @@ -147,8 +142,8 @@ def predict_proba(self, X: np.ndarray, batch_size: Optional[int] = None) -> np.n # Neural networks might not be fit to produce a [0-1] output # For instance, after small number of epochs. - y = np.clip(y, 0, 1) - y = sklearn.preprocessing.normalize(y, axis=1, norm='l1') + # y = np.clip(y, 0, 1) + # y = sklearn.preprocessing.normalize(y, axis=1, norm='l1') return y From 23466f0c7210db7b1d946f5accd086d0a78b9e98 Mon Sep 17 00:00:00 2001 From: Ravin Kohli <13005107+ravinkohli@users.noreply.github.com> Date: Wed, 20 Oct 2021 11:58:11 +0200 Subject: [PATCH 07/26] Cocktail fixes time debug (#286) * preprocess inside data validator * add time debug statements * Add fixes for categorical data * add fit_ensemble * add arlind fix for swa and se * fix bug in trainer choice fit * fix ensemble bug * Correct bug in cleanup * Cleanup for removing time debug statements * ablation for adversarial * shuffle false in dataloader * drop last false in dataloader * fix bug for validation set, and cutout and cutmix * shuffle = False * Shake Shake updates (#287) * To test locally * fix bug in trainer choice fit * fix ensemble bug * Correct bug in cleanup * To test locally * Cleanup for removing time debug statements * ablation for adversarial * shuffle false in dataloader * drop last false in dataloader * fix bug for validation set, and cutout and cutmix * To test locally * shuffle = False * To test locally * updates to search space * updates to search space * update branch with search space * undo search space update * fix bug in shake shake flag * limit to shake-even * restrict to even even * Add even even and others for shake-drop also * fix bug in passing alpha beta method * restrict to only even even * fix silly bug: * remove imputer and ordinal encoder for categorical transformer in feature validator * Address comments from shuhei * fix issues with ensemble fitting post hoc * Address comments on the PR * Fix flake and mypy errors * Address comments from PR #286 * fix bug in embedding * Update autoPyTorch/api/tabular_classification.py Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com> * Update autoPyTorch/datasets/base_dataset.py Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com> * Update autoPyTorch/datasets/base_dataset.py Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com> * Update autoPyTorch/pipeline/components/training/trainer/base_trainer.py Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com> * Address comments from shuhei * adress comments from shuhei * fix flake and mypy * Update autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com> * Update autoPyTorch/pipeline/tabular_classification.py Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com> * Update autoPyTorch/pipeline/components/setup/network_backbone/utils.py Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com> * Update autoPyTorch/pipeline/components/setup/network_backbone/utils.py Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com> * Update autoPyTorch/pipeline/components/setup/network_backbone/utils.py Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com> * increase threads_per_worker * fix bug in rowcutmix * Enhancement for the tabular validator. (#291) * Initial try at an enhancement for the tabular validator * Adding a few type annotations * Fixing bugs in implementation * Adding wrongly deleted code part during rebase * Fix bug in _get_args * Fix bug in _get_args * Addressing Shuhei's comments * Address Shuhei's comments * Refactoring code * Refactoring code * Typos fix and additional comments * Replace nan in categoricals with simple imputer * Remove unused function * add comment * Update autoPyTorch/data/tabular_feature_validator.py Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com> * Update autoPyTorch/data/tabular_feature_validator.py Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com> * Adding unit test for only nall columns in the tabular feature categorical evaluator * fix bug in remove all nan columns * Bug fix for making tests run by arlind * fix flake errors in feature validator * made typing code uniform * Apply suggestions from code review Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com> * address comments from shuhei * address comments from shuhei (2) Co-authored-by: Ravin Kohli Co-authored-by: Ravin Kohli <13005107+ravinkohli@users.noreply.github.com> Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com> * resolve code issues with new versions * Address comments from shuhei * make run_traditional_ml function * implement suggestion from shuhei and fix bug in rowcutmixtrainer * fix return type docstring * add better documentation and fix bug in shake_drop_get_bl * Apply suggestions from code review Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com> * add test for comparator and other improvements based on PR comments * fix bug in test * [fix] Fix the condition in the raising error of all_nan_columns * [refactor] Unite name conventions of numpy array and pandas dataframe * [doc] Add the description about the tabular feature transformation * [doc] Add the description of the tabular feature transformation * address comments from arlind * address comments from arlind * change to as_tensor and address comments from arlind * correct description for functions in data module Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com> Co-authored-by: Arlind Kadra Co-authored-by: nabenabe0928 --- autoPyTorch/api/base_task.py | 337 ++++++++++-- autoPyTorch/api/tabular_classification.py | 2 + autoPyTorch/api/tabular_regression.py | 2 + autoPyTorch/data/base_feature_validator.py | 83 ++- autoPyTorch/data/base_target_validator.py | 52 +- autoPyTorch/data/base_validator.py | 4 +- autoPyTorch/data/tabular_feature_validator.py | 511 ++++++++++-------- autoPyTorch/data/tabular_target_validator.py | 8 +- autoPyTorch/datasets/base_dataset.py | 15 +- autoPyTorch/ensemble/singlebest_ensemble.py | 5 +- autoPyTorch/pipeline/base_pipeline.py | 49 +- .../normalise/ImageNormalizer.py | 8 +- .../normalise/NoNormalizer.py | 8 +- .../TabularColumnTransformer.py | 20 +- .../encoding/base_encoder.py | 2 +- .../imputation/base_imputer.py | 2 +- .../scaling/base_scaler.py | 2 +- .../setup/network_backbone/ResNetBackbone.py | 28 +- .../network_backbone/ShapedResNetBackbone.py | 21 +- .../setup/network_backbone/utils.py | 74 ++- .../base_network_embedding.py | 37 +- .../setup/optimizer/AdamWOptimizer.py | 4 +- .../training/data_loader/base_data_loader.py | 4 +- .../training/trainer/AdversarialTrainer.py | 11 +- .../training/trainer/RowCutMixTrainer.py | 31 +- .../training/trainer/RowCutOutTrainer.py | 34 +- .../training/trainer/base_trainer.py | 8 +- .../training/trainer/base_trainer_choice.py | 5 +- .../pipeline/tabular_classification.py | 10 +- autoPyTorch/utils/backend.py | 18 +- autoPyTorch/utils/common.py | 2 +- .../example_custom_configuration_space.py | 11 +- .../example_posthoc_ensemble_fit.py | 81 +++ requirements.txt | 2 +- test/test_data/test_feature_validator.py | 166 +++++- 35 files changed, 1130 insertions(+), 527 deletions(-) create mode 100644 examples/tabular/40_advanced/example_posthoc_ensemble_fit.py diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py index 11b0de273..a85695801 100644 --- a/autoPyTorch/api/base_task.py +++ b/autoPyTorch/api/base_task.py @@ -189,6 +189,9 @@ def __init__( self.trajectory: Optional[List] = None self.dataset_name: Optional[str] = None self.cv_models_: Dict = {} + self.precision: Optional[int] = None + self.opt_metric: Optional[str] = None + self.dataset: Optional[BaseDataset] = None # By default try to use the TCP logging port or get a new port self._logger_port = logging.handlers.DEFAULT_TCP_LOGGING_PORT @@ -394,6 +397,7 @@ def _clean_logger(self) -> None: self.logging_server.join(timeout=5) self.logging_server.terminate() del self.stop_logging_server + self._logger = None def _create_dask_client(self) -> None: """ @@ -408,7 +412,7 @@ def _create_dask_client(self) -> None: dask.distributed.LocalCluster( n_workers=self.n_jobs, processes=True, - threads_per_worker=1, + threads_per_worker=2, # We use the temporal directory to save the # dask workers, because deleting workers # more time than deleting backend directories @@ -488,6 +492,23 @@ def _load_models(self) -> bool: return True + def _cleanup(self) -> None: + """ + Closes the different servers created during api search. + Returns: + None + """ + if hasattr(self, '_logger') and self._logger is not None: + self._logger.info("Closing the dask infrastructure") + self._close_dask_client() + self._logger.info("Finished closing the dask infrastructure") + + # Clean up the logger + self._logger.info("Starting to clean up the logger") + self._clean_logger() + else: + self._close_dask_client() + def _load_best_individual_model(self) -> SingleBest: """ In case of failure during ensemble building, @@ -725,6 +746,38 @@ def _do_traditional_prediction(self, time_left: int, func_eval_time_limit_secs: save_external=True) return + def run_traditional_ml( + self, + current_task_name: str, + runtime_limit: int, + func_eval_time_limit_secs: int + ) -> None: + """ + This function can be used to run the suite of traditional machine + learning models during the current task (for e.g, ensemble fit, search) + + Args: + current_task_name (str): name of the current task, + runtime_limit (int): time limit for fitting traditional models, + func_eval_time_limit_secs (int): Time limit + for a single call to the machine learning model. + Model fitting will be terminated if the machine + learning algorithm runs over the time limit. + """ + assert self._logger is not None # for mypy compliancy + if STRING_TO_TASK_TYPES[self.task_type] in REGRESSION_TASKS: + self._logger.warning("Traditional Pipeline is not enabled for regression. Skipping...") + else: + traditional_task_name = 'runTraditional' + self._stopwatch.start_task(traditional_task_name) + elapsed_time = self._stopwatch.wall_elapsed(current_task_name) + time_for_traditional = int(runtime_limit - elapsed_time) + self._do_traditional_prediction( + func_eval_time_limit_secs=func_eval_time_limit_secs, + time_left=time_for_traditional, + ) + self._stopwatch.stop_task(traditional_task_name) + def _search( self, optimize_metric: str, @@ -819,8 +872,10 @@ def _search( if self.task_type != dataset.task_type: raise ValueError("Incompatible dataset entered for current task," - "expected dataset to have task type :{} got " + "expected dataset to have task type :{} but got " ":{}".format(self.task_type, dataset.task_type)) + if precision not in [16, 32, 64]: + raise ValueError("precision must be one of 16, 32, 64 but got {}".format(precision)) # Initialise information needed for the experiment experiment_task_name: str = 'runSearch' @@ -866,7 +921,7 @@ def _search( # If no dask client was provided, we create one, so that we can # start a ensemble process in parallel to smbo optimize if ( - self._dask_client is None and (self.ensemble_size > 0 or self.n_jobs is not None and self.n_jobs > 1) + self._dask_client is None and (self.ensemble_size > 0 or self.n_jobs > 1) ): self._create_dask_client() else: @@ -895,31 +950,25 @@ def _search( ) # ============> Run dummy predictions - dummy_task_name = 'runDummy' - self._stopwatch.start_task(dummy_task_name) - self._do_dummy_prediction() - self._stopwatch.stop_task(dummy_task_name) + # We only want to run dummy predictions in case we want to build an ensemble + if self.ensemble_size > 0: + dummy_task_name = 'runDummy' + self._stopwatch.start_task(dummy_task_name) + self._do_dummy_prediction() + self._stopwatch.stop_task(dummy_task_name) # ============> Run traditional ml - - if enable_traditional_pipeline: - if STRING_TO_TASK_TYPES[self.task_type] in REGRESSION_TASKS: - self._logger.warning("Traditional Pipeline is not enabled for regression. Skipping...") - else: - traditional_task_name = 'runTraditional' - self._stopwatch.start_task(traditional_task_name) - elapsed_time = self._stopwatch.wall_elapsed(self.dataset_name) - # We want time for at least 1 Neural network in SMAC - time_for_traditional = int( - self._time_for_task - elapsed_time - func_eval_time_limit_secs - ) - self._do_traditional_prediction( - func_eval_time_limit_secs=func_eval_time_limit_secs, - time_left=time_for_traditional, - ) - self._stopwatch.stop_task(traditional_task_name) + # We only want to run traditional predictions in case we want to build an ensemble + # We want time for at least 1 Neural network in SMAC + if enable_traditional_pipeline and self.ensemble_size > 0: + traditional_runtime_limit = int(self._time_for_task - func_eval_time_limit_secs) + self.run_traditional_ml(current_task_name=self.dataset_name, + runtime_limit=traditional_runtime_limit, + func_eval_time_limit_secs=func_eval_time_limit_secs) # ============> Starting ensemble + self.precision = precision + self.opt_metric = optimize_metric elapsed_time = self._stopwatch.wall_elapsed(self.dataset_name) time_left_for_ensembles = max(0, total_walltime_limit - elapsed_time) proc_ensemble = None @@ -936,26 +985,12 @@ def _search( self._logger.info("Starting ensemble") ensemble_task_name = 'ensemble' self._stopwatch.start_task(ensemble_task_name) - proc_ensemble = EnsembleBuilderManager( - start_time=time.time(), - time_left_for_ensembles=time_left_for_ensembles, - backend=copy.deepcopy(self._backend), - dataset_name=str(dataset.dataset_name), - output_type=STRING_TO_OUTPUT_TYPES[dataset.output_type], - task_type=STRING_TO_TASK_TYPES[self.task_type], - metrics=[self._metric], - opt_metric=optimize_metric, - ensemble_size=self.ensemble_size, - ensemble_nbest=self.ensemble_nbest, - max_models_on_disc=self.max_models_on_disc, - seed=self.seed, - max_iterations=None, - read_at_most=sys.maxsize, - ensemble_memory_limit=self._memory_limit, - random_state=self.seed, - precision=precision, - logger_port=self._logger_port, - ) + proc_ensemble = self._init_ensemble_builder(time_left_for_ensembles=time_left_for_ensembles, + ensemble_size=self.ensemble_size, + ensemble_nbest=self.ensemble_nbest, + precision=precision, + optimize_metric=self.opt_metric + ) self._stopwatch.stop_task(ensemble_task_name) # ==> Run SMAC @@ -1035,18 +1070,12 @@ def _search( pd.DataFrame(self.ensemble_performance_history).to_json( os.path.join(self._backend.internals_directory, 'ensemble_history.json')) - self._logger.info("Closing the dask infrastructure") - self._close_dask_client() - self._logger.info("Finished closing the dask infrastructure") - if load_models: self._logger.info("Loading models...") self._load_models() self._logger.info("Finished loading models...") - # Clean up the logger - self._logger.info("Starting to clean up the logger") - self._clean_logger() + self._cleanup() return self @@ -1241,7 +1270,7 @@ def fit_pipeline(self, dataset_requirements = get_dataset_requirements( info=self._get_required_dataset_properties(dataset)) dataset_properties = dataset.get_dataset_properties(dataset_requirements) - self._backend.save_datamanager(dataset) + self._backend.replace_datamanager(dataset) if self._logger is None: self._logger = self._get_logger(dataset.dataset_name) @@ -1333,6 +1362,203 @@ def fit_pipeline(self, return fitted_pipeline, run_info, run_value, dataset + def fit_ensemble( + self, + optimize_metric: Optional[str] = None, + precision: Optional[int] = None, + ensemble_nbest: int = 50, + ensemble_size: int = 50, + load_models: bool = True, + time_for_task: int = 100, + func_eval_time_limit_secs: int = 50, + enable_traditional_pipeline: bool = True, + ) -> 'BaseTask': + """ + Enables post-hoc fitting of the ensemble after the `search()` + method is finished. This method creates an ensemble using all + the models stored on disk during the smbo run + Args: + optimize_metric (str): name of the metric that is used to + evaluate a pipeline. if not specified, value passed to search will be used + precision (int), (default=32): Numeric precision used when loading + ensemble data. Can be either 16, 32 or 64. + ensemble_nbest (Optional[int]): + only consider the ensemble_nbest models to build the ensemble. + If None, uses the value stored in class attribute `ensemble_nbest`. + ensemble_size (int) (default=50): + Number of models added to the ensemble built by + Ensemble selection from libraries of models. + Models are drawn with replacement. + enable_traditional_pipeline (bool), (default=True): + We fit traditional machine learning algorithms + (LightGBM, CatBoost, RandomForest, ExtraTrees, KNN, SVM) + prior building PyTorch Neural Networks. You can disable this + feature by turning this flag to False. All machine learning + algorithms that are fitted during search() are considered for + ensemble building. + load_models (bool), (default=True): Whether to load the + models after fitting AutoPyTorch. + time_for_task (int), (default=100): Time limit + in seconds for the search of appropriate models. + By increasing this value, autopytorch has a higher + chance of finding better models. + func_eval_time_limit_secs (int), (default=None): Time limit + for a single call to the machine learning model. + Model fitting will be terminated if the machine + learning algorithm runs over the time limit. Set + this value high enough so that typical machine + learning algorithms can be fit on the training + data. + When set to None, this time will automatically be set to + total_walltime_limit // 2 to allow enough time to fit + at least 2 individual machine learning algorithms. + Set to np.inf in case no time limit is desired. + Returns: + self + """ + # Make sure that input is valid + if self.dataset is None or self.opt_metric is None: + raise ValueError("fit_ensemble() can only be called after `search()`. " + "Please call the `search()` method of {} prior to " + "fit_ensemble().".format(self.__class__.__name__)) + + if precision not in [16, 32, 64]: + raise ValueError("precision must be one of 16, 32, 64 but got {}".format(precision)) + + if self._logger is None: + self._logger = self._get_logger(self.dataset.dataset_name) + + # Create a client if needed + if self._dask_client is None: + self._create_dask_client() + else: + self._is_dask_client_internally_created = False + + ensemble_fit_task_name = 'EnsembleFit' + self._stopwatch.start_task(ensemble_fit_task_name) + if enable_traditional_pipeline: + if func_eval_time_limit_secs is None or func_eval_time_limit_secs > time_for_task: + self._logger.warning( + 'Time limit for a single run is higher than total time ' + 'limit. Capping the limit for a single run to the total ' + 'time given to Ensemble fit (%f)' % time_for_task + ) + func_eval_time_limit_secs = time_for_task + + # Make sure that at least 2 models are created for the ensemble process + num_models = time_for_task // func_eval_time_limit_secs + if num_models < 2: + func_eval_time_limit_secs = time_for_task // 2 + self._logger.warning( + "Capping the func_eval_time_limit_secs to {} to have " + "time for at least 2 models to ensemble.".format( + func_eval_time_limit_secs + ) + ) + # ============> Run Dummy predictions + dummy_task_name = 'runDummy' + self._stopwatch.start_task(dummy_task_name) + self._do_dummy_prediction() + self._stopwatch.stop_task(dummy_task_name) + + # ============> Run traditional ml + if enable_traditional_pipeline: + self.run_traditional_ml(current_task_name=ensemble_fit_task_name, + runtime_limit=time_for_task, + func_eval_time_limit_secs=func_eval_time_limit_secs) + + elapsed_time = self._stopwatch.wall_elapsed(ensemble_fit_task_name) + time_left_for_ensemble = int(time_for_task - elapsed_time) + manager = self._init_ensemble_builder( + time_left_for_ensembles=time_left_for_ensemble, + optimize_metric=self.opt_metric if optimize_metric is None else optimize_metric, + precision=self.precision if precision is None else precision, + ensemble_size=ensemble_size, + ensemble_nbest=ensemble_nbest, + ) + + manager.build_ensemble(self._dask_client) + future = manager.futures.pop() + result = future.result() + if result is None: + raise ValueError("Errors occurred while building the ensemble - please" + " check the log file and command line output for error messages.") + self.ensemble_performance_history, _, _, _ = result + + if load_models: + self._load_models() + + self._stopwatch.stop_task(ensemble_fit_task_name) + + self._cleanup() + + return self + + def _init_ensemble_builder( + self, + time_left_for_ensembles: float, + optimize_metric: str, + ensemble_nbest: int, + ensemble_size: int, + precision: int = 32, + ) -> EnsembleBuilderManager: + """ + Initializes an `EnsembleBuilderManager`. + Args: + time_left_for_ensembles (float): + Time (in seconds) allocated to building the ensemble + optimize_metric (str): + Name of the metric to optimize the ensemble. + ensemble_nbest (int): + only consider the ensemble_nbest models to build the ensemble. + ensemble_size (int): + Number of models added to the ensemble built by + Ensemble selection from libraries of models. + Models are drawn with replacement. + precision (int), (default=32): Numeric precision used when loading + ensemble data. Can be either 16, 32 or 64. + Returns: + EnsembleBuilderManager + """ + if self._logger is None: + raise ValueError("logger should be initialized to fit ensemble") + if self.dataset is None: + raise ValueError("ensemble can only be initialised after or during `search()`. " + "Please call the `search()` method of {}.".format(self.__class__.__name__)) + + self._logger.info("Starting ensemble") + ensemble_task_name = 'ensemble' + self._stopwatch.start_task(ensemble_task_name) + + # Use the current thread to start the ensemble builder process + # The function ensemble_builder_process will internally create a ensemble + # builder in the provide dask client + required_dataset_properties = {'task_type': self.task_type, + 'output_type': self.dataset.output_type} + proc_ensemble = EnsembleBuilderManager( + start_time=time.time(), + time_left_for_ensembles=time_left_for_ensembles, + backend=copy.deepcopy(self._backend), + dataset_name=str(self.dataset.dataset_name), + output_type=STRING_TO_OUTPUT_TYPES[self.dataset.output_type], + task_type=STRING_TO_TASK_TYPES[self.task_type], + metrics=[self._metric] if self._metric is not None else get_metrics( + dataset_properties=required_dataset_properties, names=[optimize_metric]), + opt_metric=optimize_metric, + ensemble_size=ensemble_size, + ensemble_nbest=ensemble_nbest, + max_models_on_disc=self.max_models_on_disc, + seed=self.seed, + max_iterations=None, + read_at_most=sys.maxsize, + ensemble_memory_limit=self._memory_limit, + random_state=self.seed, + precision=precision, + logger_port=self._logger_port, + ) + self._stopwatch.stop_task(ensemble_task_name) + return proc_ensemble + def predict( self, X_test: np.ndarray, @@ -1382,7 +1608,7 @@ def predict( predictions = self.ensemble_.predict(all_predictions) - self._clean_logger() + self._cleanup() return predictions @@ -1419,10 +1645,7 @@ def __getstate__(self) -> Dict[str, Any]: return self.__dict__ def __del__(self) -> None: - # Clean up the logger - self._clean_logger() - - self._close_dask_client() + self._cleanup() # When a multiprocessing work is done, the # objects are deleted. We don't want to delete run areas diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py index 1a73d8625..7be504f6d 100644 --- a/autoPyTorch/api/tabular_classification.py +++ b/autoPyTorch/api/tabular_classification.py @@ -275,6 +275,8 @@ def search( y_test=y_test, dataset_name=dataset_name) + if self.dataset is None: + raise ValueError("`dataset` in {} must be initialized, but got None".format(self.__class__.__name__)) return self._search( dataset=self.dataset, optimize_metric=optimize_metric, diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py index e7fb919bd..8742549af 100644 --- a/autoPyTorch/api/tabular_regression.py +++ b/autoPyTorch/api/tabular_regression.py @@ -261,6 +261,8 @@ def search( y_test=y_test, dataset_name=dataset_name) + if self.dataset is None: + raise ValueError("`dataset` in {} must be initialized, but got None".format(self.__class__.__name__)) return self._search( dataset=self.dataset, optimize_metric=optimize_metric, diff --git a/autoPyTorch/data/base_feature_validator.py b/autoPyTorch/data/base_feature_validator.py index 0106a3aa8..a7cab5913 100644 --- a/autoPyTorch/data/base_feature_validator.py +++ b/autoPyTorch/data/base_feature_validator.py @@ -1,5 +1,5 @@ import logging -import typing +from typing import List, Optional, Set, Tuple, Union import numpy as np @@ -12,8 +12,8 @@ from autoPyTorch.utils.logging_ import PicklableClientLogger -SUPPORTED_FEAT_TYPES = typing.Union[ - typing.List, +SUPPORTED_FEAT_TYPES = Union[ + List, pd.DataFrame, np.ndarray, scipy.sparse.bsr_matrix, @@ -35,60 +35,61 @@ class BaseFeatureValidator(BaseEstimator): List of the column types found by this estimator during fit. data_type (str): Class name of the data type provided during fit. - encoder (typing.Optional[BaseEstimator]) + encoder (Optional[BaseEstimator]) Host a encoder object if the data requires transformation (for example, if provided a categorical column in a pandas DataFrame) - enc_columns (typing.List[str]) + enc_columns (List[str]) List of columns that were encoded. """ def __init__(self, - logger: typing.Optional[typing.Union[PicklableClientLogger, logging.Logger - ]] = None, + logger: Optional[Union[PicklableClientLogger, logging.Logger + ] + ] = None, ) -> None: # Register types to detect unsupported data format changes - self.feat_type = None # type: typing.Optional[typing.List[str]] - self.data_type = None # type: typing.Optional[type] - self.dtypes = [] # type: typing.List[str] - self.column_order = [] # type: typing.List[str] + self.feat_type: Optional[List[str]] = None + self.data_type: Optional[type] = None + self.dtypes: List[str] = [] + self.column_order: List[str] = [] - self.encoder = None # type: typing.Optional[BaseEstimator] - self.enc_columns = [] # type: typing.List[str] + self.encoder: Optional[BaseEstimator] = None + self.enc_columns: List[str] = [] - self.logger: typing.Union[ + self.logger: Union[ PicklableClientLogger, logging.Logger ] = logger if logger is not None else logging.getLogger(__name__) # Required for dataset properties - self.num_features = None # type: typing.Optional[int] - self.categories = [] # type: typing.List[typing.List[int]] - self.categorical_columns: typing.List[int] = [] - self.numerical_columns: typing.List[int] = [] - # column identifiers may be integers or strings - self.null_columns: typing.Set[str] = set() + self.num_features: Optional[int] = None + self.categories: List[List[int]] = [] + self.categorical_columns: List[int] = [] + self.numerical_columns: List[int] = [] + + self.all_nan_columns: Optional[Set[Union[int, str]]] = None self._is_fitted = False def fit( self, X_train: SUPPORTED_FEAT_TYPES, - X_test: typing.Optional[SUPPORTED_FEAT_TYPES] = None, + X_test: Optional[SUPPORTED_FEAT_TYPES] = None, ) -> BaseEstimator: """ Validates and fit a categorical encoder (if needed) to the features. The supported data types are List, numpy arrays and pandas DataFrames. CSR sparse data types are also supported - Arguments: + Args: X_train (SUPPORTED_FEAT_TYPES): A set of features that are going to be validated (type and dimensionality checks) and a encoder fitted in the case the data needs encoding - X_test (typing.Optional[SUPPORTED_FEAT_TYPES]): + X_test (Optional[SUPPORTED_FEAT_TYPES]): A hold out set of data used for checking """ # If a list was provided, it will be converted to pandas if isinstance(X_train, list): - X_train, X_test = self.list_to_dataframe(X_train, X_test) + X_train, X_test = self.list_to_pandas(X_train, X_test) self._check_data(X_train) @@ -114,7 +115,7 @@ def _fit( X: SUPPORTED_FEAT_TYPES, ) -> BaseEstimator: """ - Arguments: + Args: X (SUPPORTED_FEAT_TYPES): A set of features that are going to be validated (type and dimensionality checks) and a encoder fitted in the case the data needs encoding @@ -122,6 +123,7 @@ def _fit( self: The fitted base estimator """ + raise NotImplementedError() def _check_data( @@ -131,11 +133,12 @@ def _check_data( """ Feature dimensionality and data type checks - Arguments: + Args: X (SUPPORTED_FEAT_TYPES): A set of features that are going to be validated (type and dimensionality checks) and a encoder fitted in the case the data needs encoding """ + raise NotImplementedError() def transform( @@ -143,7 +146,7 @@ def transform( X: SUPPORTED_FEAT_TYPES, ) -> np.ndarray: """ - Arguments: + Args: X_train (SUPPORTED_FEAT_TYPES): A set of features, whose categorical features are going to be transformed @@ -152,4 +155,30 @@ def transform( np.ndarray: The transformed array """ + + raise NotImplementedError() + + def list_to_pandas( + self, + X_train: SUPPORTED_FEAT_TYPES, + X_test: Optional[SUPPORTED_FEAT_TYPES] = None, + ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: + """ + Converts a list to a pandas DataFrame. In this process, column types are inferred. + + If test data is provided, we proactively match it to train data + + Args: + X_train (SUPPORTED_FEAT_TYPES): + A set of features that are going to be validated (type and dimensionality + checks) and a encoder fitted in the case the data needs encoding + X_test (Optional[SUPPORTED_FEAT_TYPES]): + A hold out set of data used for checking + Returns: + pd.DataFrame: + transformed train data from list to pandas DataFrame + pd.DataFrame: + transformed test data from list to pandas DataFrame + """ + raise NotImplementedError() diff --git a/autoPyTorch/data/base_target_validator.py b/autoPyTorch/data/base_target_validator.py index dba9c19e3..f191e985b 100644 --- a/autoPyTorch/data/base_target_validator.py +++ b/autoPyTorch/data/base_target_validator.py @@ -1,5 +1,5 @@ import logging -import typing +from typing import List, Optional, Union, cast import numpy as np @@ -12,8 +12,8 @@ from autoPyTorch.utils.logging_ import PicklableClientLogger -SUPPORTED_TARGET_TYPES = typing.Union[ - typing.List, +SUPPORTED_TARGET_TYPES = Union[ + List, pd.Series, pd.DataFrame, np.ndarray, @@ -35,48 +35,50 @@ class BaseTargetValidator(BaseEstimator): is_classification (bool): A bool that indicates if the validator should operate in classification mode. During classification, the targets are encoded. - encoder (typing.Optional[BaseEstimator]): + encoder (Optional[BaseEstimator]): Host a encoder object if the data requires transformation (for example, if provided a categorical column in a pandas DataFrame) - enc_columns (typing.List[str]) + enc_columns (List[str]) List of columns that where encoded """ def __init__(self, is_classification: bool = False, - logger: typing.Optional[typing.Union[PicklableClientLogger, logging.Logger - ]] = None, + logger: Optional[Union[PicklableClientLogger, + logging.Logger + ] + ] = None, ) -> None: self.is_classification = is_classification - self.data_type = None # type: typing.Optional[type] + self.data_type: Optional[type] = None - self.encoder = None # type: typing.Optional[BaseEstimator] + self.encoder: Optional[BaseEstimator] = None - self.out_dimensionality = None # type: typing.Optional[int] - self.type_of_target = None # type: typing.Optional[str] + self.out_dimensionality: Optional[int] = None + self.type_of_target: Optional[str] = None - self.logger: typing.Union[ + self.logger: Union[ PicklableClientLogger, logging.Logger ] = logger if logger is not None else logging.getLogger(__name__) # Store the dtype for remapping to correct type - self.dtype = None # type: typing.Optional[type] + self.dtype: Optional[type] = None self._is_fitted = False def fit( self, y_train: SUPPORTED_TARGET_TYPES, - y_test: typing.Optional[SUPPORTED_TARGET_TYPES] = None, + y_test: Optional[SUPPORTED_TARGET_TYPES] = None, ) -> BaseEstimator: """ Validates and fit a categorical encoder (if needed) to the targets The supported data types are List, numpy arrays and pandas DataFrames. - Arguments: + Args: y_train (SUPPORTED_TARGET_TYPES) A set of targets set aside for training - y_test (typing.Union[SUPPORTED_TARGET_TYPES]) + y_test (Union[SUPPORTED_TARGET_TYPES]) A hold out set of data used of the targets. It is also used to fit the categories of the encoder. """ @@ -95,8 +97,8 @@ def fit( np.shape(y_test) )) if isinstance(y_train, pd.DataFrame): - y_train = typing.cast(pd.DataFrame, y_train) - y_test = typing.cast(pd.DataFrame, y_test) + y_train = cast(pd.DataFrame, y_train) + y_test = cast(pd.DataFrame, y_test) if y_train.columns.tolist() != y_test.columns.tolist(): raise ValueError( "Train and test targets must both have the same columns, yet " @@ -127,24 +129,24 @@ def fit( def _fit( self, y_train: SUPPORTED_TARGET_TYPES, - y_test: typing.Optional[SUPPORTED_TARGET_TYPES] = None, + y_test: Optional[SUPPORTED_TARGET_TYPES] = None, ) -> BaseEstimator: """ - Arguments: + Args: y_train (SUPPORTED_TARGET_TYPES) The labels of the current task. They are going to be encoded in case of classification - y_test (typing.Optional[SUPPORTED_TARGET_TYPES]) + y_test (Optional[SUPPORTED_TARGET_TYPES]) A holdout set of labels """ raise NotImplementedError() def transform( self, - y: typing.Union[SUPPORTED_TARGET_TYPES], + y: Union[SUPPORTED_TARGET_TYPES], ) -> np.ndarray: """ - Arguments: + Args: y (SUPPORTED_TARGET_TYPES) A set of targets that are going to be encoded if the current task is classification @@ -161,8 +163,8 @@ def inverse_transform( """ Revert any encoding transformation done on a target array - Arguments: - y (typing.Union[np.ndarray, pd.DataFrame, pd.Series]): + Args: + y (Union[np.ndarray, pd.DataFrame, pd.Series]): Target array to be transformed back to original form before encoding Returns: np.ndarray: diff --git a/autoPyTorch/data/base_validator.py b/autoPyTorch/data/base_validator.py index 7528d56ab..4ef54c665 100644 --- a/autoPyTorch/data/base_validator.py +++ b/autoPyTorch/data/base_validator.py @@ -58,7 +58,7 @@ def fit( + Checks for dimensionality as well as missing values are performed. + If performing a classification task, the data is going to be encoded - Arguments: + Args: X_train (SUPPORTED_FEAT_TYPES): A set of features that are going to be validated (type and dimensionality checks). If this data contains categorical columns, an encoder is going to @@ -102,7 +102,7 @@ def transform( """ Transform the given target or features to a numpy array - Arguments: + Args: X (SUPPORTED_FEAT_TYPES): A set of features to transform y (typing.Optional[SUPPORTED_TARGET_TYPES]): diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py index 4c8a8fbc2..62bd0b465 100644 --- a/autoPyTorch/data/tabular_feature_validator.py +++ b/autoPyTorch/data/tabular_feature_validator.py @@ -1,5 +1,5 @@ import functools -import typing +from typing import Dict, List, Optional, Tuple, cast import numpy as np @@ -9,15 +9,98 @@ import scipy.sparse import sklearn.utils -from sklearn import preprocessing from sklearn.base import BaseEstimator from sklearn.compose import ColumnTransformer from sklearn.exceptions import NotFittedError +from sklearn.impute import SimpleImputer +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import OneHotEncoder, StandardScaler from autoPyTorch.data.base_feature_validator import BaseFeatureValidator, SUPPORTED_FEAT_TYPES +def _create_column_transformer( + preprocessors: Dict[str, List[BaseEstimator]], + numerical_columns: List[str], + categorical_columns: List[str], +) -> ColumnTransformer: + """ + Given a dictionary of preprocessors, this function + creates a sklearn column transformer with appropriate + columns associated with their preprocessors. + Args: + preprocessors (Dict[str, List[BaseEstimator]]): + Dictionary containing list of numerical and categorical preprocessors. + numerical_columns (List[str]): + List of names of numerical columns + categorical_columns (List[str]): + List of names of categorical columns + Returns: + ColumnTransformer + """ + + numerical_pipeline = 'drop' + categorical_pipeline = 'drop' + if len(numerical_columns) > 0: + numerical_pipeline = make_pipeline(*preprocessors['numerical']) + if len(categorical_columns) > 0: + categorical_pipeline = make_pipeline(*preprocessors['categorical']) + + return ColumnTransformer([ + ('categorical_pipeline', categorical_pipeline, categorical_columns), + ('numerical_pipeline', numerical_pipeline, numerical_columns)], + remainder='drop' + ) + + +def get_tabular_preprocessors() -> Dict[str, List[BaseEstimator]]: + """ + This function creates a Dictionary containing a list + of numerical and categorical preprocessors + + Returns: + Dict[str, List[BaseEstimator]] + """ + preprocessors: Dict[str, List[BaseEstimator]] = dict() + + # Categorical Preprocessors + onehot_encoder = OneHotEncoder(categories='auto', sparse=False, handle_unknown='ignore') + categorical_imputer = SimpleImputer(strategy='constant', copy=False) + + # Numerical Preprocessors + numerical_imputer = SimpleImputer(strategy='median', copy=False) + standard_scaler = StandardScaler(with_mean=True, with_std=True, copy=False) + + preprocessors['categorical'] = [categorical_imputer, onehot_encoder] + preprocessors['numerical'] = [numerical_imputer, standard_scaler] + + return preprocessors + + class TabularFeatureValidator(BaseFeatureValidator): + + @staticmethod + def _comparator(cmp1: str, cmp2: str) -> int: + """Order so that categorical columns come left and numerical columns come right + + Args: + cmp1 (str): First variable to compare + cmp2 (str): Second variable to compare + + Raises: + ValueError: if the values of the variables to compare + are not in 'categorical' or 'numerical' + + Returns: + int: either [0, -1, 1] + """ + choices = ['categorical', 'numerical'] + if cmp1 not in choices or cmp2 not in choices: + raise ValueError('The comparator for the column order only accepts {}, ' + 'but got {} and {}'.format(choices, cmp1, cmp2)) + idx1, idx2 = choices.index(cmp1), choices.index(cmp2) + return idx1 - idx2 + def _fit( self, X: SUPPORTED_FEAT_TYPES, @@ -27,7 +110,7 @@ def _fit( features (from categorical for example) to a numerical value that further stages will be able to use - Arguments: + Args: X (SUPPORTED_FEAT_TYPES): A set of features that are going to be validated (type and dimensionality checks) and a encoder fitted in the case the data needs encoding @@ -39,79 +122,45 @@ def _fit( # The final output of a validator is a numpy array. But pandas # gives us information about the column dtype if isinstance(X, np.ndarray): - X = self.numpy_array_to_pandas(X) + + X = self.numpy_to_pandas(X) + # Replace the data type from the previously saved type. + self.data_type = type(X) + # save all the information about the column order and data types + self._check_data(X) if hasattr(X, "iloc") and not scipy.sparse.issparse(X): - X = typing.cast(pd.DataFrame, X) - # Treat a column with all instances a NaN as numerical - # This will prevent doing encoding to a categorical column made completely - # out of nan values -- which will trigger a fail, as encoding is not supported - # with nan values. - # Columns that are completely made of NaN values are provided to the pipeline - # so that later stages decide how to handle them - - # Clear whatever null column markers we had previously - self.null_columns.clear() - if np.any(pd.isnull(X)): - for column in X.columns: - if X[column].isna().all(): - self.null_columns.add(column) - X[column] = pd.to_numeric(X[column]) - # Also note this change in self.dtypes - if len(self.dtypes) != 0: - self.dtypes[list(X.columns).index(column)] = X[column].dtype - - if not X.select_dtypes(include='object').empty: - X = self.infer_objects(X) - self._check_data(X) - self.enc_columns, self.feat_type = self._get_columns_to_encode(X) - - if len(self.enc_columns) > 0: - X = self.impute_nan_in_categories(X) - - self.encoder = ColumnTransformer( - [ - ("encoder", - preprocessing.OrdinalEncoder( - handle_unknown='use_encoded_value', - unknown_value=-1, - ), self.enc_columns)], - remainder="passthrough" - ) + X = cast(pd.DataFrame, X) - # Mypy redefinition - assert self.encoder is not None - self.encoder.fit(X) - - # The column transformer reoders the feature types - we therefore need to change - # it as well - # This means columns are shifted to the right - def comparator(cmp1: str, cmp2: str) -> int: - if ( - cmp1 == 'categorical' and cmp2 == 'categorical' - or cmp1 == 'numerical' and cmp2 == 'numerical' - ): - return 0 - elif cmp1 == 'categorical' and cmp2 == 'numerical': - return -1 - elif cmp1 == 'numerical' and cmp2 == 'categorical': - return 1 - else: - raise ValueError((cmp1, cmp2)) - - self.feat_type = sorted( - self.feat_type, - key=functools.cmp_to_key(comparator) - ) + self.all_nan_columns = set([column for column in X.columns if X[column].isna().all()]) - self.categories = [ - # We fit an ordinal encoder, where all categorical - # columns are shifted to the left - list(range(len(cat))) - for cat in self.encoder.transformers_[0][1].categories_ - ] + categorical_columns, numerical_columns, feat_type = self._get_columns_info(X) + + self.enc_columns = categorical_columns + + preprocessors = get_tabular_preprocessors() + self.column_transformer = _create_column_transformer( + preprocessors=preprocessors, + numerical_columns=numerical_columns, + categorical_columns=categorical_columns, + ) + + # Mypy redefinition + assert self.column_transformer is not None + self.column_transformer.fit(X) + + # The column transformer reorders the feature types + # therefore, we need to change the order of columns as well + # This means categorical columns are shifted to the left + + self.feat_type = sorted( + feat_type, + key=functools.cmp_to_key(self._comparator) + ) + # differently to categorical_columns and numerical_columns, + # this saves the index of the column. for i, type_ in enumerate(self.feat_type): if 'numerical' in type_: self.numerical_columns.append(i) @@ -119,7 +168,8 @@ def comparator(cmp1: str, cmp2: str) -> int: self.categorical_columns.append(i) # Lastly, store the number of features - self.num_features = np.shape(X)[1] + self.num_features = len(X.columns) + return self def transform( @@ -130,7 +180,7 @@ def transform( Validates and fit a categorical encoder (if needed) to the features. The supported data types are List, numpy arrays and pandas DataFrames. - Arguments: + Args: X_train (SUPPORTED_FEAT_TYPES): A set of features, whose categorical features are going to be transformed @@ -138,49 +188,69 @@ def transform( Return: np.ndarray: The transformed array + + Note: + The default transform performs the folloing: + * simple imputation for both + * scaling for numerical + * one-hot encoding for categorical + For example, here is a simple case + of which all the columns are categorical. + data = [ + {'A': 1, 'B': np.nan, 'C': np.nan}, + {'A': np.nan, 'B': 3, 'C': np.nan}, + {'A': 2, 'B': np.nan, 'C': np.nan} + ] + and suppose all the columns are categorical, + then + * `A` in {np.nan, 1, 2} + * `B` in {np.nan, 3} + * `C` in {np.nan} <=== it will be dropped. + + So in the column A, + * np.nan ==> [1, 0, 0] (always the index 0) + * 1 ==> [0, 1, 0] + * 2 ==> [0, 0, 1] + in the column B, + * np.nan ==> [1, 0] + * 3 ==> [0, 1] + Therefore, by concatenating, + * {'A': 1, 'B': np.nan, 'C': np.nan} ==> [0, 1, 0, 1, 0] + * {'A': np.nan, 'B': 3, 'C': np.nan} ==> [1, 0, 0, 0, 1] + * {'A': 2, 'B': np.nan, 'C': np.nan} ==> [0, 0, 1, 1, 0] + ==> [ + [0, 1, 0, 1, 0], + [1, 0, 0, 0, 1], + [0, 0, 1, 1, 0] + ] """ if not self._is_fitted: raise NotFittedError("Cannot call transform on a validator that is not fitted") # If a list was provided, it will be converted to pandas if isinstance(X, list): - X, _ = self.list_to_dataframe(X) + X, _ = self.list_to_pandas(X) if isinstance(X, np.ndarray): - X = self.numpy_array_to_pandas(X) + X = self.numpy_to_pandas(X) if hasattr(X, "iloc") and not scipy.sparse.issparse(X): - X = typing.cast(pd.DataFrame, X) - # If we had null columns in our fit call and we made them numeric, then: - # - If the columns are null even in transform, apply the same procedure. - # - Otherwise, substitute the values with np.NaN and then make the columns numeric. - # If the column is null here, but it was not in fit, it does not matter. - for column in self.null_columns: - # The column is not null, make it null since it was null in fit. - if not X[column].isna().all(): - X[column] = np.NaN - X[column] = pd.to_numeric(X[column]) - - # for the test set, if we have columns with only null values - # they will probably have a numeric type. If these columns were not - # with only null values in the train set, they should be converted - # to the type that they had during fitting. - for column in X.columns: - if X[column].isna().all(): - X[column] = X[column].astype(self.dtypes[list(X.columns).index(column)]) - - # Also remove the object dtype for new data - if not X.select_dtypes(include='object').empty: - X = self.infer_objects(X) + X = cast(pd.DataFrame, X) # Check the data here so we catch problems on new test data self._check_data(X) - # We also need to fillna on the transformation - # in case test data is provided - X = self.impute_nan_in_categories(X) - if self.encoder is not None: - X = self.encoder.transform(X) + # in case of test data being all none and train data + # having a value for a categorical column. + # We need to convert the column in test data to + # object otherwise the test column is interpreted as float + if len(self.categorical_columns) > 0: + categorical_columns = self.column_transformer.transformers_[0][-1] + for column in categorical_columns: + if X[column].isna().all(): + X[column] = X[column].astype('object') + + X = self.column_transformer.transform(X) # Sparse related transformations # Not all sparse format support index sorting @@ -209,7 +279,7 @@ def _check_data( """ Feature dimensionality and data type checks - Arguments: + Args: X (SUPPORTED_FEAT_TYPES): A set of features that are going to be validated (type and dimensionality checks) and a encoder fitted in the case the data needs encoding @@ -246,125 +316,120 @@ def _check_data( # Then for Pandas, we do not support Nan in categorical columns if hasattr(X, "iloc"): # If entered here, we have a pandas dataframe - X = typing.cast(pd.DataFrame, X) + X = cast(pd.DataFrame, X) # Handle objects if possible - if not X.select_dtypes(include='object').empty: + exist_object_columns = has_object_columns(X.dtypes.values) + if exist_object_columns: X = self.infer_objects(X) - # Define the column to be encoded here as the feature validator is fitted once - # per estimator - enc_columns, _ = self._get_columns_to_encode(X) - column_order = [column for column in X.columns] if len(self.column_order) > 0: if self.column_order != column_order: - raise ValueError("Changing the column order of the features after fit() is " - "not supported. Fit() method was called with " - "{} whereas the new features have {} as type".format(self.column_order, - column_order,) - ) + raise ValueError("The column order of the features must not be changed after fit(), but" + " the column order are different between training ({}) and" + " test ({}) datasets.".format(self.column_order, column_order)) else: self.column_order = column_order dtypes = [dtype.name for dtype in X.dtypes] - if len(self.dtypes) > 0: - if self.dtypes != dtypes: - raise ValueError("Changing the dtype of the features after fit() is " - "not supported. Fit() method was called with " - "{} whereas the new features have {} as type".format(self.dtypes, - dtypes, - ) - ) - else: - self.dtypes = dtypes - def _get_columns_to_encode( + dtypes_diff = [s_dtype != dtype for s_dtype, dtype in zip(self.dtypes, dtypes)] + if len(self.dtypes) == 0: + self.dtypes = dtypes + elif ( + any(dtypes_diff) # the dtypes of some columns are different in train and test dataset + and self.all_nan_columns is not None # Ignore all_nan_columns is None + and len(set(X.columns[dtypes_diff]).difference(self.all_nan_columns)) != 0 + ): + # The dtypes can be different if and only if the column belongs + # to all_nan_columns as these columns would be imputed. + raise ValueError("The dtype of the features must not be changed after fit(), but" + " the dtypes of some columns are different between training ({}) and" + " test ({}) datasets.".format(self.dtypes, dtypes)) + + def _get_columns_info( self, X: pd.DataFrame, - ) -> typing.Tuple[typing.List[str], typing.List[str]]: + ) -> Tuple[List[str], List[str], List[str]]: """ Return the columns to be encoded from a pandas dataframe - Arguments: + Args: X (pd.DataFrame) A set of features that are going to be validated (type and dimensionality checks) and a encoder fitted in the case the data needs encoding Returns: - enc_columns (List[str]): - Columns to encode, if any - feat_type: + categorical_columns (List[str]) + List of the names of categorical columns. + numerical_columns (List[str]) + List of the names of numerical columns. + feat_type (List[str]) Type of each column numerical/categorical """ # Register if a column needs encoding - enc_columns = [] - + numerical_columns = [] + categorical_columns = [] # Also, register the feature types for the estimator feat_type = [] # Make sure each column is a valid type for i, column in enumerate(X.columns): - if X[column].dtype.name in ['category', 'bool']: - - enc_columns.append(column) + if self.all_nan_columns is not None and column in self.all_nan_columns: + continue + column_dtype = self.dtypes[i] + err_msg = "Valid types are `numerical`, `categorical` or `boolean`, " \ + "but input column {} has an invalid type `{}`.".format(column, column_dtype) + if column_dtype in ['category', 'bool']: + categorical_columns.append(column) feat_type.append('categorical') # Move away from np.issubdtype as it causes # TypeError: data type not understood in certain pandas types - elif not is_numeric_dtype(X[column]): - if X[column].dtype.name == 'object': - raise ValueError( - "Input Column {} has invalid type object. " - "Cast it to a valid dtype before using it in AutoPyTorch. " - "Valid types are numerical, categorical or boolean. " - "You can cast it to a valid dtype using " - "pandas.Series.astype ." - "If working with string objects, the following " - "tutorial illustrates how to work with text data: " - "https://scikit-learn.org/stable/tutorial/text_analytics/working_with_text_data.html".format( - # noqa: E501 - column, - ) - ) - elif pd.core.dtypes.common.is_datetime_or_timedelta_dtype( - X[column].dtype - ): - raise ValueError( - "AutoPyTorch does not support time and/or date datatype as given " - "in column {}. Please convert the time information to a numerical value " - "first. One example on how to do this can be found on " - "https://stats.stackexchange.com/questions/311494/".format( - column, - ) - ) - else: - raise ValueError( - "Input Column {} has unsupported dtype {}. " - "Supported column types are categorical/bool/numerical dtypes. " - "Make sure your data is formatted in a correct way, " - "before feeding it to AutoPyTorch.".format( - column, - X[column].dtype.name, - ) + elif is_numeric_dtype(column_dtype): + feat_type.append('numerical') + numerical_columns.append(column) + elif column_dtype == 'object': + # TODO verify how would this happen when we always convert the object dtypes to category + raise TypeError( + "{} Cast it to a valid dtype before feeding it to AutoPyTorch. " + "You can cast it to a valid dtype using pandas.Series.astype." + "If you are working with string objects, the following " + "tutorial illustrates how to work with text data: " + "https://scikit-learn.org/stable/tutorial/text_analytics/working_with_text_data.html".format( + # noqa: E501 + err_msg, ) + ) + elif pd.core.dtypes.common.is_datetime_or_timedelta_dtype(column_dtype): + raise TypeError( + "{} Convert the time information to a numerical value" + " before feeding it to AutoPyTorch. " + "One example of the conversion can be found on " + "https://stats.stackexchange.com/questions/311494/".format(err_msg) + ) else: - feat_type.append('numerical') - return enc_columns, feat_type + raise TypeError( + "{} Make sure your data is formatted in a correct way" + "before feeding it to AutoPyTorch.".format(err_msg) + ) - def list_to_dataframe( + return categorical_columns, numerical_columns, feat_type + + def list_to_pandas( self, X_train: SUPPORTED_FEAT_TYPES, - X_test: typing.Optional[SUPPORTED_FEAT_TYPES] = None, - ) -> typing.Tuple[pd.DataFrame, typing.Optional[pd.DataFrame]]: + X_test: Optional[SUPPORTED_FEAT_TYPES] = None, + ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: """ Converts a list to a pandas DataFrame. In this process, column types are inferred. If test data is provided, we proactively match it to train data - Arguments: + Args: X_train (SUPPORTED_FEAT_TYPES): A set of features that are going to be validated (type and dimensionality checks) and a encoder fitted in the case the data needs encoding - X_test (typing.Optional[SUPPORTED_FEAT_TYPES]): + X_test (Optional[SUPPORTED_FEAT_TYPES]): A hold out set of data used for checking Returns: pd.DataFrame: @@ -374,7 +439,7 @@ def list_to_dataframe( """ # If a list was provided, it will be converted to pandas - X_train = pd.DataFrame(data=X_train).infer_objects() + X_train = pd.DataFrame(data=X_train).convert_dtypes() self.logger.warning("The provided feature types to AutoPyTorch are of type list." "Features have been interpreted as: {}".format([(col, t) for col, t in zip(X_train.columns, X_train.dtypes)])) @@ -383,17 +448,18 @@ def list_to_dataframe( self.logger.warning("Train features are a list while the provided test data" "is {}. X_test will be casted as DataFrame.".format(type(X_test)) ) - X_test = pd.DataFrame(data=X_test).infer_objects() + X_test = pd.DataFrame(data=X_test).convert_dtypes() + return X_train, X_test - def numpy_array_to_pandas( - self, + @staticmethod + def numpy_to_pandas( X: np.ndarray, ) -> pd.DataFrame: """ Converts a numpy array to pandas for type inference - Arguments: + Args: X (np.ndarray): data to be interpreted. @@ -408,7 +474,7 @@ def infer_objects(self, X: pd.DataFrame) -> pd.DataFrame: This has to be done once, so the test and train data are treated equally - Arguments: + Args: X (pd.DataFrame): data to be interpreted. @@ -426,66 +492,33 @@ def infer_objects(self, X: pd.DataFrame) -> pd.DataFrame: self.logger.warning(f"Tried to cast column {key} to {dtype} caused {e}") pass else: + # Calling for the first time to infer the categories X = X.infer_objects() - for column in X.columns: - if not is_numeric_dtype(X[column]): + for column, data_type in zip(X.columns, X.dtypes): + if not is_numeric_dtype(data_type): X[column] = X[column].astype('category') - self.object_dtype_mapping = {column: X[column].dtype for column in X.columns} - self.logger.debug(f"Infer Objects: {self.object_dtype_mapping}") - return X - def impute_nan_in_categories(self, X: pd.DataFrame) -> pd.DataFrame: - """ - impute missing values before encoding, - remove once sklearn natively supports - it in ordinal encoding. Sklearn issue: - "https://github.com/scikit-learn/scikit-learn/issues/17123)" + # only numerical attributes and categories + self.object_dtype_mapping = {column: data_type for column, data_type in zip(X.columns, X.dtypes)} - Arguments: - X (pd.DataFrame): - data to be interpreted. - - Returns: - pd.DataFrame - """ + self.logger.debug(f"Infer Objects: {self.object_dtype_mapping}") - # To be on the safe side, map always to the same missing - # value per column - if not hasattr(self, 'dict_nancol_to_missing'): - self.dict_missing_value_per_col: typing.Dict[str, typing.Any] = {} - - # First make sure that we do not alter the type of the column which cause: - # TypeError: '<' not supported between instances of 'int' and 'str' - # in the encoding - for column in self.enc_columns: - if X[column].isna().any(): - if column not in self.dict_missing_value_per_col: - try: - float(X[column].dropna().values[0]) - can_cast_as_number = True - except Exception: - can_cast_as_number = False - if can_cast_as_number: - # In this case, we expect to have a number as category - # it might be string, but its value represent a number - missing_value: typing.Union[str, int] = '-1' if isinstance(X[column].dropna().values[0], - str) else -1 - else: - missing_value = 'Missing!' - - # Make sure this missing value is not seen before - # Do this check for categorical columns - # else modify the value - if hasattr(X[column], 'cat'): - while missing_value in X[column].cat.categories: - if isinstance(missing_value, str): - missing_value += '0' - else: - missing_value += missing_value - self.dict_missing_value_per_col[column] = missing_value - - # Convert the frame in place - X[column].cat.add_categories([self.dict_missing_value_per_col[column]], - inplace=True) - X.fillna({column: self.dict_missing_value_per_col[column]}, inplace=True) return X + + +def has_object_columns( + feature_types: pd.Series, +) -> bool: + """ + Indicate whether on a Series of dtypes for a Pandas DataFrame + there exists one or more object columns. + + Args: + feature_types (pd.Series): The feature types for a DataFrame. + + Returns: + bool: + True if the DataFrame dtypes contain an object column, False + otherwise. + """ + return np.dtype('O') in feature_types diff --git a/autoPyTorch/data/tabular_target_validator.py b/autoPyTorch/data/tabular_target_validator.py index 239791768..7cbd88c38 100644 --- a/autoPyTorch/data/tabular_target_validator.py +++ b/autoPyTorch/data/tabular_target_validator.py @@ -28,7 +28,7 @@ def _fit( It does so by also using the classes from the test data, to prevent encoding errors - Arguments: + Args: y_train (SUPPORTED_TARGET_TYPES) The labels of the current task. They are going to be encoded in case of classification @@ -100,7 +100,7 @@ def transform( Validates and fit a categorical encoder (if needed) to the features. The supported data types are List, numpy arrays and pandas DataFrames. - Arguments: + Args: y (SUPPORTED_TARGET_TYPES) A set of targets that are going to be encoded if the current task is classification @@ -152,7 +152,7 @@ def inverse_transform( """ Revert any encoding transformation done on a target array - Arguments: + Args: y (typing.Union[np.ndarray, pd.DataFrame, pd.Series]): Target array to be transformed back to original form before encoding Returns: @@ -189,7 +189,7 @@ def _check_data( """ Perform dimensionality and data type checks on the targets - Arguments: + Args: y (typing.Union[np.ndarray, pd.DataFrame, pd.Series]): A set of features whose dimensionality and data type is going to be checked """ diff --git a/autoPyTorch/datasets/base_dataset.py b/autoPyTorch/datasets/base_dataset.py index 8cb951977..f041be5ec 100644 --- a/autoPyTorch/datasets/base_dataset.py +++ b/autoPyTorch/datasets/base_dataset.py @@ -322,7 +322,7 @@ def create_holdout_val_split( self.random_state, val_share, self._get_indices(), **kwargs) return train, val - def get_dataset_for_training(self, split_id: int, train: bool, subset: int = 0) -> Dataset: + def get_dataset_for_training(self, split_id: int, train: bool) -> Dataset: """ The above split methods employ the Subset to internally subsample the whole dataset. @@ -330,13 +330,22 @@ def get_dataset_for_training(self, split_id: int, train: bool, subset: int = 0) to provide training data to fit a pipeline Args: - split (int): The desired subset of the dataset to split and use + split_id (int): which split id to get from the splits + train (bool): whether the dataset is required for training or evaluating. Returns: + Dataset: the reduced dataset to be used for testing """ # Subset creates a dataset. Splits is a (train_indices, test_indices) tuple - return TransformSubset(self, self.splits[split_id][subset], train=train) + if split_id >= len(self.splits): # old version: split_id > len(self.splits) + raise IndexError("split_id out of range, got split_id={}" + " (>= num_splits={})".format(split_id, len(self.splits))) + subset = int(not train) + indices = self.splits[split_id][subset] + if indices is None: + raise ValueError("Specified fold (or subset) does not exist") + return TransformSubset(self, indices, train=train) def replace_data(self, X_train: BaseDatasetInputType, X_test: Optional[BaseDatasetInputType]) -> 'BaseDataset': diff --git a/autoPyTorch/ensemble/singlebest_ensemble.py b/autoPyTorch/ensemble/singlebest_ensemble.py index c6fbaf576..6f82cbdf4 100644 --- a/autoPyTorch/ensemble/singlebest_ensemble.py +++ b/autoPyTorch/ensemble/singlebest_ensemble.py @@ -3,7 +3,7 @@ import numpy as np -from smac.runhistory.runhistory import RunHistory +from smac.runhistory.runhistory import RunHistory, StatusType from autoPyTorch.ensemble.abstract_ensemble import AbstractEnsemble from autoPyTorch.pipeline.base_pipeline import BasePipeline @@ -49,6 +49,9 @@ def get_identifiers_from_run_history(self) -> List[Tuple[int, int, float]]: for run_key in self.run_history.data.keys(): run_value = self.run_history.data[run_key] + if run_value.status == StatusType.CRASHED: + continue + score = self.metric._optimum - (self.metric._sign * run_value.cost) if (score > best_model_score and self.metric._sign > 0) \ diff --git a/autoPyTorch/pipeline/base_pipeline.py b/autoPyTorch/pipeline/base_pipeline.py index 842f63271..d98be9bd4 100644 --- a/autoPyTorch/pipeline/base_pipeline.py +++ b/autoPyTorch/pipeline/base_pipeline.py @@ -21,7 +21,9 @@ get_match_array ) from autoPyTorch.utils.common import FitRequirement -from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates +from autoPyTorch.utils.hyperparameter_search_space_update import ( + HyperparameterSearchSpaceUpdates +) class BasePipeline(Pipeline): @@ -398,6 +400,7 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]], raise ValueError("Unknown node name. Expected update node name to be in {} " "got {}".format(self.named_steps.keys(), update.node_name)) node = self.named_steps[update.node_name] + node_name = node.__class__.__name__ # if node is a choice module if hasattr(node, 'get_components'): split_hyperparameter = update.hyperparameter.split(':') @@ -425,18 +428,18 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]], if choice in exclude[update.node_name]: raise ValueError("Found {} in exclude".format(choice)) if choice not in components.keys(): - raise ValueError("Unknown hyperparameter for choice {}. " + raise ValueError("Unknown component choice for node {}. " "Expected update hyperparameter " - "to be in {} got {}".format(node.__class__.__name__, - components.keys(), choice)) + "to be in {}, but got {}".format(node_name, + components.keys(), choice)) # check if the component whose hyperparameter # needs to be updated is in components of the # choice module elif split_hyperparameter[0] not in components.keys(): - raise ValueError("Unknown hyperparameter for choice {}. " - "Expected update hyperparameter " - "to be in {} got {}".format(node.__class__.__name__, - components.keys(), split_hyperparameter[0])) + raise ValueError("Unknown component choice for node {}. " + "Expected update component " + "to be in {}, but got {}".format(node_name, + components.keys(), split_hyperparameter[0])) else: # check if hyperparameter is in the search space of the component component = components[split_hyperparameter[0]] @@ -449,14 +452,16 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]], component.get_hyperparameter_search_space( dataset_properties=self.dataset_properties).get_hyperparameter_names()]): continue - raise ValueError("Unknown hyperparameter for component {}. " - "Expected update hyperparameter " - "to be in {} got {}".format(node.__class__.__name__, - component. - get_hyperparameter_search_space( - dataset_properties=self.dataset_properties). - get_hyperparameter_names(), - split_hyperparameter[1])) + component_hyperparameters = component.get_hyperparameter_search_space( + dataset_properties=self.dataset_properties).get_hyperparameter_names() + raise ValueError("Unknown hyperparameter for component {} of node {}." + " Expected update hyperparameter " + "to be in {}, but got {}.".format(component.__name__, + node_name, + component_hyperparameters, + split_hyperparameter[1] + ) + ) else: if update.hyperparameter not in node.get_hyperparameter_search_space( dataset_properties=self.dataset_properties): @@ -464,13 +469,13 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]], node.get_hyperparameter_search_space( dataset_properties=self.dataset_properties).get_hyperparameter_names()]): continue - raise ValueError("Unknown hyperparameter for component {}. " + node_hyperparameters = node.get_hyperparameter_search_space( + dataset_properties=self.dataset_properties).get_hyperparameter_names() + raise ValueError("Unknown hyperparameter for node {}. " "Expected update hyperparameter " - "to be in {} got {}".format(node.__class__.__name__, - node. - get_hyperparameter_search_space( - dataset_properties=self.dataset_properties). - get_hyperparameter_names(), update.hyperparameter)) + "to be in {}, but got {}".format(node_name, + node_hyperparameters, + update.hyperparameter)) def _get_pipeline_steps(self, dataset_properties: Optional[Dict[str, Any]] ) -> List[Tuple[str, autoPyTorchChoice]]: diff --git a/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/ImageNormalizer.py b/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/ImageNormalizer.py index 4327d6346..a3be8fa79 100644 --- a/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/ImageNormalizer.py +++ b/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/ImageNormalizer.py @@ -2,7 +2,7 @@ import numpy as np -import torch.tensor +import torch from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.normalise.base_normalizer import BaseNormalizer @@ -30,16 +30,16 @@ def fit(self, X: Dict[str, Any], y: Optional[Any] = None) -> "ImageNormalizer": self.std = X['dataset_properties']['std'] return self - def __call__(self, X: Union[np.ndarray, torch.tensor]) -> Union[np.ndarray, torch.tensor]: + def __call__(self, X: Union[np.ndarray, torch.Tensor]) -> Union[np.ndarray, torch.Tensor]: """ Makes the autoPyTorchPreprocessingComponent Callable. Calling the component calls the transform function of the underlying early_preprocessor and returns the transformed array. Args: - X (Union[np.ndarray, torch.tensor]): input data tensor + X (Union[np.ndarray, torch.Tensor]): input data tensor Returns: - Union[np.ndarray, torch.tensor]: Transformed data tensor + Union[np.ndarray, torch.Tensor]: Transformed data tensor """ X = (X - self.mean) / self.std return X diff --git a/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/NoNormalizer.py b/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/NoNormalizer.py index 7aeb83a9c..b36a50f4e 100644 --- a/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/NoNormalizer.py +++ b/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/NoNormalizer.py @@ -2,7 +2,7 @@ import numpy as np -import torch.tensor +import torch from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.normalise.base_normalizer import ( BaseNormalizer @@ -34,16 +34,16 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]: X.update({'normalise': self}) return X - def __call__(self, X: Union[np.ndarray, torch.tensor]) -> Union[np.ndarray, torch.tensor]: + def __call__(self, X: Union[np.ndarray, torch.Tensor]) -> Union[np.ndarray, torch.Tensor]: """ Makes the autoPyTorchPreprocessingComponent Callable. Calling the component calls the transform function of the underlying early_preprocessor and returns the transformed array. Args: - X (Union[np.ndarray, torch.tensor]): input data tensor + X (Union[np.ndarray, torch.Tensor]): input data tensor Returns: - Union[np.ndarray, torch.tensor]: Transformed data tensor + Union[np.ndarray, torch.Tensor]: Transformed data tensor """ return X diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py index e1e08e94e..e513b8729 100644 --- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py +++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py @@ -3,14 +3,14 @@ import numpy as np from sklearn.compose import ColumnTransformer -from sklearn.pipeline import make_pipeline +# from sklearn.pipeline import make_pipeline import torch from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import ( autoPyTorchTabularPreprocessingComponent ) -from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.utils import get_tabular_preprocessers +# from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.utils import get_tabular_preprocessers from autoPyTorch.utils.common import FitRequirement, subsampler @@ -47,15 +47,16 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> "TabularColumnTransformer": Returns: "TabularColumnTransformer": an instance of self """ + self.check_requirements(X, y) - numerical_pipeline = 'drop' - categorical_pipeline = 'drop' + numerical_pipeline = 'passthrough' + categorical_pipeline = 'passthrough' - preprocessors = get_tabular_preprocessers(X) - if len(X['dataset_properties']['numerical_columns']): - numerical_pipeline = make_pipeline(*preprocessors['numerical']) - if len(X['dataset_properties']['categorical_columns']): - categorical_pipeline = make_pipeline(*preprocessors['categorical']) + # preprocessors = get_tabular_preprocessers(X) + # if len(X['dataset_properties']['numerical_columns']): + # numerical_pipeline = make_pipeline(*preprocessors['numerical']) + # if len(X['dataset_properties']['categorical_columns']): + # categorical_pipeline = make_pipeline(*preprocessors['categorical']) self.preprocessor = ColumnTransformer([ ('numerical_pipeline', numerical_pipeline, X['dataset_properties']['numerical_columns']), @@ -71,6 +72,7 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> "TabularColumnTransformer": X_train = X['backend'].load_datamanager().train_tensors[0] self.preprocessor.fit(X_train) + return self def transform(self, X: Dict[str, Any]) -> Dict[str, Any]: diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder.py index eadc0a188..9829cadcd 100644 --- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder.py +++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder.py @@ -28,5 +28,5 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]: if self.preprocessor['numerical'] is None and self.preprocessor['categorical'] is None: raise ValueError("cant call transform on {} without fitting first." .format(self.__class__.__name__)) - X.update({'encoder': self.preprocessor}) + # X.update({'encoder': self.preprocessor}) return X diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/imputation/base_imputer.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/imputation/base_imputer.py index b65f3c229..ac0648481 100644 --- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/imputation/base_imputer.py +++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/imputation/base_imputer.py @@ -29,5 +29,5 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]: if self.preprocessor['numerical'] is None and self.preprocessor['categorical'] is None: raise ValueError("cant call transform on {} without fitting first." .format(self.__class__.__name__)) - X.update({'imputer': self.preprocessor}) + # X.update({'imputer': self.preprocessor}) return X diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/base_scaler.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/base_scaler.py index 39834dd2b..270fac246 100644 --- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/base_scaler.py +++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/base_scaler.py @@ -28,5 +28,5 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]: if self.preprocessor['numerical'] is None and self.preprocessor['categorical'] is None: raise ValueError("cant call transform on {} without fitting first." .format(self.__class__.__name__)) - X.update({'scaler': self.preprocessor}) + # X.update({'scaler': self.preprocessor}) return X diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py index 069ca4679..4a7893f94 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py @@ -139,6 +139,14 @@ def get_hyperparameter_search_space( value_range=(True, False), default_value=True, ), + shake_shake_update_func: HyperparameterSearchSpace = HyperparameterSearchSpace( + hyperparameter="shake_shake_update_func", + value_range=('shake-shake', + 'shake-even', + 'even-even', + 'M3'), + default_value='shake-shake', + ), use_shake_drop: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="use_shake_drop", value_range=(True, False), default_value=True, @@ -180,18 +188,25 @@ def get_hyperparameter_search_space( if skip_connection_flag: - shake_drop_prob_flag = False - if 'shake-drop' in multi_branch_choice.value_range: - shake_drop_prob_flag = True + shake_shake_flag = 'shake-shake' in multi_branch_choice.value_range + shake_drop_prob_flag = 'shake-drop' in multi_branch_choice.value_range mb_choice = get_hyperparameter(multi_branch_choice, CategoricalHyperparameter) cs.add_hyperparameter(mb_choice) cs.add_condition(CS.EqualsCondition(mb_choice, use_sc, True)) + shake_shake_update_func_conditional: List[str] = list() if shake_drop_prob_flag: shake_drop_prob = get_hyperparameter(max_shake_drop_probability, UniformFloatHyperparameter) cs.add_hyperparameter(shake_drop_prob) cs.add_condition(CS.EqualsCondition(shake_drop_prob, mb_choice, "shake-drop")) + shake_shake_update_func_conditional.append('shake-drop') + if shake_shake_flag: + shake_shake_update_func_conditional.append('shake-shake') + if len(shake_shake_update_func_conditional) > 0: + method = get_hyperparameter(shake_shake_update_func, CategoricalHyperparameter) + cs.add_hyperparameter(method) + cs.add_condition(CS.InCondition(method, mb_choice, shake_shake_update_func_conditional)) # It is the upper bound of the nr of groups, # since the configuration will actually be sampled. @@ -327,11 +342,14 @@ def forward(self, x: torch.FloatTensor) -> torch.FloatTensor: if self.config["multi_branch_choice"] == 'shake-shake': x1 = self.layers(x) x2 = self.shake_shake_layers(x) - alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda) + alpha, beta = shake_get_alpha_beta(is_training=self.training, + is_cuda=x.is_cuda, + method=self.config['shake_shake_update_func']) x = shake_shake(x1, x2, alpha, beta) elif self.config["multi_branch_choice"] == 'shake-drop': x = self.layers(x) - alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda) + alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda, + method=self.config['shake_shake_update_func']) bl = shake_drop_get_bl( self.block_index, 1 - self.config["max_shake_drop_probability"], diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py index e0867cdd3..f9ad4e6e0 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py @@ -145,6 +145,14 @@ def get_hyperparameter_search_space( # type: ignore[override] 'stairs'), default_value='funnel', ), + shake_shake_update_func: HyperparameterSearchSpace = HyperparameterSearchSpace( + hyperparameter="shake_shake_update_func", + value_range=('shake-shake', + 'shake-even', + 'even-even', + 'M3'), + default_value='shake-shake', + ), max_shake_drop_probability: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="max_shake_drop_probability", value_range=(0, 1), @@ -188,17 +196,24 @@ def get_hyperparameter_search_space( # type: ignore[override] if skip_connection_flag: - shake_drop_prob_flag = False - if 'shake-drop' in multi_branch_choice.value_range: - shake_drop_prob_flag = True + shake_shake_flag = 'shake-shake' in multi_branch_choice.value_range + shake_drop_prob_flag = 'shake-drop' in multi_branch_choice.value_range mb_choice = get_hyperparameter(multi_branch_choice, CategoricalHyperparameter) cs.add_hyperparameter(mb_choice) cs.add_condition(CS.EqualsCondition(mb_choice, use_sc, True)) + shake_shake_update_func_conditional: List[str] = list() if shake_drop_prob_flag: shake_drop_prob = get_hyperparameter(max_shake_drop_probability, UniformFloatHyperparameter) cs.add_hyperparameter(shake_drop_prob) cs.add_condition(CS.EqualsCondition(shake_drop_prob, mb_choice, "shake-drop")) + shake_shake_update_func_conditional.append('shake-drop') + if shake_shake_flag: + shake_shake_update_func_conditional.append('shake-shake') + if len(shake_shake_update_func_conditional) > 0: + method = get_hyperparameter(shake_shake_update_func, CategoricalHyperparameter) + cs.add_hyperparameter(method) + cs.add_condition(CS.InCondition(method, mb_choice, shake_shake_update_func_conditional)) return cs diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py index ef19beac8..96390d003 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py @@ -28,6 +28,7 @@ def get_output_shape(network: torch.nn.Module, input_shape: typing.Tuple[int, .. placeholder = torch.randn((2, *input_shape), dtype=torch.float) with torch.no_grad(): output = network(placeholder) + return tuple(output.shape[1:]) @@ -37,8 +38,8 @@ def forward( ctx: typing.Any, # No typing for AutogradContext x1: torch.Tensor, x2: torch.Tensor, - alpha: torch.tensor, - beta: torch.tensor, + alpha: torch.Tensor, + beta: torch.Tensor, ) -> torch.Tensor: ctx.save_for_backward(x1, x2, alpha, beta) @@ -66,10 +67,10 @@ def backward(ctx: typing.Any, class ShakeDropFunction(Function): @staticmethod def forward(ctx: typing.Any, - x: torch.tensor, - alpha: torch.tensor, - beta: torch.tensor, - bl: torch.tensor, + x: torch.Tensor, + alpha: torch.Tensor, + beta: torch.Tensor, + bl: torch.Tensor, ) -> torch.Tensor: ctx.save_for_backward(x, alpha, beta, bl) @@ -92,15 +93,35 @@ def backward(ctx: typing.Any, shake_drop = ShakeDropFunction.apply -def shake_get_alpha_beta(is_training: bool, is_cuda: bool - ) -> typing.Tuple[torch.tensor, torch.tensor]: +def shake_get_alpha_beta( + is_training: bool, + is_cuda: bool, + method: str +) -> typing.Tuple[torch.Tensor, torch.Tensor]: + """ + The methods used in this function have been introduced in 'ShakeShake Regularisation' + https://arxiv.org/abs/1705.07485. The names have been taken from the paper as well. + Currently, this function supports `even-even`, `shake-even`, `shake-shake` and `M3`. + """ if not is_training: result = (torch.FloatTensor([0.5]), torch.FloatTensor([0.5])) return result if not is_cuda else (result[0].cuda(), result[1].cuda()) # TODO implement other update methods - alpha = torch.rand(1) - beta = torch.rand(1) + # alpha is the weight ratio for the forward pass and beta is that for the backward pass + alpha = torch.FloatTensor([0.5]) if method.startswith('even') else torch.rand(1) + if method.endswith('even'): + beta = torch.FloatTensor([0.5]) + elif method.endswith('shake'): + beta = torch.rand(1) + elif method == 'M3': + # Table 4 in the paper `Shake-Shake regularization` + rnd = torch.rand(1) + beta = torch.FloatTensor( + [rnd * (0.5 - alpha) + alpha if alpha < 0.5 else rnd * (alpha - 0.5) + 0.5] + ) + else: + raise ValueError("Unknown method for ShakeShakeRegularisation in NetworkBackbone") if is_cuda: alpha = alpha.cuda() @@ -110,19 +131,36 @@ def shake_get_alpha_beta(is_training: bool, is_cuda: bool def shake_drop_get_bl( - block_index: int, - min_prob_no_shake: float, - num_blocks: int, - is_training: bool, - is_cuda: bool -) -> torch.tensor: + block_index: int, + min_prob_no_shake: float, + num_blocks: int, + is_training: bool, + is_cuda: bool +) -> torch.Tensor: + """ + The sampling of Bernoulli random variable + based on Eq. (4) in the paper + Args: + block_index (int): The index of the block from the input layer + min_prob_no_shake (float): The initial shake probability + num_blocks (int): The total number of building blocks + is_training (bool): Whether it is training + is_cuda (bool): Whether the tensor is on CUDA + Returns: + bl (torch.Tensor): a Bernoulli random variable in {0, 1} + Reference: + ShakeDrop Regularization for Deep Residual Learning + Yoshihiro Yamada et. al. (2020) + paper: https://arxiv.org/pdf/1802.02375.pdf + implementation: https://github.com/imenurok/ShakeDrop + """ pl = 1 - ((block_index + 1) / num_blocks) * (1 - min_prob_no_shake) if is_training: # Move to torch.randn(1) for reproducibility - bl = torch.tensor(1.0) if torch.randn(1) <= pl else torch.tensor(0.0) + bl = torch.as_tensor(1.0) if torch.rand(1) <= pl else torch.as_tensor(0.0) else: - bl = torch.tensor(pl) + bl = torch.as_tensor(pl) if is_cuda: bl = bl.cuda() diff --git a/autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py b/autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py index 5ae2880ed..6feac0fba 100644 --- a/autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py +++ b/autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py @@ -1,4 +1,4 @@ -import copy +# import copy from typing import Any, Dict, Optional, Tuple import numpy as np @@ -31,21 +31,22 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]: def build_embedding(self, num_input_features: np.ndarray, num_numerical_features: int) -> nn.Module: raise NotImplementedError - def _get_args(self, X: Dict[str, Any]) -> Tuple[int, np.ndarray]: + def _get_args(self, X: Dict[str, Any]) -> Tuple[None, None]: # Tuple[int, np.ndarray]: # Feature preprocessors can alter numerical columns - if len(X['dataset_properties']['numerical_columns']) == 0: - num_numerical_columns = 0 - else: - X_train = copy.deepcopy(X['backend'].load_datamanager().train_tensors[0][:2]) - - numerical_column_transformer = X['tabular_transformer'].preprocessor. \ - named_transformers_['numerical_pipeline'] - num_numerical_columns = numerical_column_transformer.transform( - X_train[:, X['dataset_properties']['numerical_columns']]).shape[1] - num_input_features = np.zeros((num_numerical_columns + len(X['dataset_properties']['categorical_columns'])), - dtype=int) - categories = X['dataset_properties']['categories'] - - for i, category in enumerate(categories): - num_input_features[num_numerical_columns + i, ] = len(category) - return num_numerical_columns, num_input_features + # if len(X['dataset_properties']['numerical_columns']) == 0: + # num_numerical_columns = 0 + # else: + # X_train = copy.deepcopy(X['backend'].load_datamanager().train_tensors[0][:2]) + # + # numerical_column_transformer = X['tabular_transformer'].preprocessor. \ + # named_transformers_['numerical_pipeline'] + # num_numerical_columns = numerical_column_transformer.transform( + # X_train[:, X['dataset_properties']['numerical_columns']]).shape[1] + # num_input_features = np.zeros((num_numerical_columns + len(X['dataset_properties']['categorical_columns'])), + # dtype=int) + # categories = X['dataset_properties']['categories'] + # + # for i, category in enumerate(categories): + # num_input_features[num_numerical_columns + i, ] = len(category) + # return num_numerical_columns, num_input_features + return None, None diff --git a/autoPyTorch/pipeline/components/setup/optimizer/AdamWOptimizer.py b/autoPyTorch/pipeline/components/setup/optimizer/AdamWOptimizer.py index 4d11c3026..a415ff1c6 100644 --- a/autoPyTorch/pipeline/components/setup/optimizer/AdamWOptimizer.py +++ b/autoPyTorch/pipeline/components/setup/optimizer/AdamWOptimizer.py @@ -95,9 +95,9 @@ def get_hyperparameter_search_space( default_value=True, ), weight_decay: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="weight_decay", - value_range=(1E-7, 0.1), + value_range=(1E-5, 0.1), default_value=1E-4, - log=True), + log=False), ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py index 5b8e445ac..15d568002 100644 --- a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py +++ b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py @@ -115,12 +115,12 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> torch.utils.data.DataLoader: shuffle=True, num_workers=X.get('num_workers', 0), pin_memory=X.get('pin_memory', True), - drop_last=X.get('drop_last', True), + drop_last=X.get('drop_last', False), collate_fn=custom_collate_fn, ) if X['val_indices'] is not None: - val_dataset = datamanager.get_dataset_for_training(split_id=X['split_id'], train=False, subset=1) + val_dataset = datamanager.get_dataset_for_training(split_id=X['split_id'], train=False) self.val_data_loader = torch.utils.data.DataLoader( val_dataset, batch_size=min(self.batch_size, len(val_dataset)), diff --git a/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py b/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py index c5a536dd0..709ee197f 100644 --- a/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py +++ b/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py @@ -150,7 +150,7 @@ def get_properties(dataset_properties: Optional[Dict[str, Any]] = None 'shortname': 'AdversarialTrainer', 'name': 'AdversarialTrainer', 'handles_tabular': True, - 'handles_image': False, + 'handles_image': True, 'handles_time_series': False, } @@ -189,12 +189,17 @@ def get_hyperparameter_search_space( default_value=3), epsilon: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="epsilon", - value_range=(0.05, 0.2), - default_value=0.2), + value_range=(0.001, 0.15), + default_value=0.007, + log=True), ) -> ConfigurationSpace: cs = ConfigurationSpace() + epsilon = HyperparameterSearchSpace(hyperparameter="epsilon", + value_range=(0.007, 0.007), + default_value=0.007) add_hyperparameter(cs, epsilon, UniformFloatHyperparameter) + add_hyperparameter(cs, use_stochastic_weight_averaging, CategoricalHyperparameter) snapshot_ensemble_flag = False if any(use_snapshot_ensemble.value_range): diff --git a/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py b/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py index 20d02c793..67de50108 100644 --- a/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py +++ b/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py @@ -1,4 +1,4 @@ -import typing +from typing import Any, Dict, Optional, Tuple, Union import numpy as np @@ -11,7 +11,7 @@ class RowCutMixTrainer(MixUp, BaseTrainerComponent): def data_preparation(self, X: np.ndarray, y: np.ndarray, - ) -> typing.Tuple[np.ndarray, typing.Dict[str, np.ndarray]]: + ) -> Tuple[np.ndarray, Dict[str, np.ndarray]]: """ Depending on the trainer choice, data fed to the network might be pre-processed on a different way. That is, in standard training we provide the data to the @@ -28,29 +28,32 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray, """ beta = 1.0 lam = self.random_state.beta(beta, beta) - batch_size = X.size()[0] - index = torch.randperm(batch_size).cuda() if X.is_cuda else torch.randperm(batch_size) + batch_size, n_columns = np.shape(X) + # shuffled_indices: Shuffled version of torch.arange(batch_size) + shuffled_indices = torch.randperm(batch_size).cuda() if X.is_cuda else torch.randperm(batch_size) r = self.random_state.rand(1) if beta <= 0 or r > self.alpha: - return X, {'y_a': y, 'y_b': y[index], 'lam': 1} + return X, {'y_a': y, 'y_b': y[shuffled_indices], 'lam': 1} - size = X.shape[1] - indices = torch.tensor(self.random_state.choice(range(1, size), max(1, np.int32(size * lam)), - replace=False)) + cut_column_indices = torch.as_tensor(self.random_state.choice(range(n_columns), + max(1, np.int32(n_columns * lam)), + replace=False)) - X[:, indices] = X[index, :][:, indices] + # Replace the values in `cut_indices` columns with + # the values from `permed_indices` + X[:, cut_column_indices] = X[shuffled_indices, :][:, cut_column_indices] - # Adjust lam - lam = 1 - ((len(indices)) / (X.size()[1])) + # Since we cannot cut exactly `lam x 100 %` of rows, we need to adjust the `lam` + lam = 1 - (len(cut_column_indices) / n_columns) - y_a, y_b = y, y[index] + y_a, y_b = y, y[shuffled_indices] return X, {'y_a': y_a, 'y_b': y_b, 'lam': lam} @staticmethod - def get_properties(dataset_properties: typing.Optional[typing.Dict[str, typing.Any]] = None - ) -> typing.Dict[str, typing.Union[str, bool]]: + def get_properties(dataset_properties: Optional[Dict[str, Any]] = None + ) -> Dict[str, Union[str, bool]]: return { 'shortname': 'RowCutMixTrainer', 'name': 'MixUp Regularized with Cutoff Tabular Trainer', diff --git a/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py b/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py index c09603523..fffc35476 100644 --- a/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py +++ b/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py @@ -1,19 +1,15 @@ -import typing +from typing import Any, Dict, Optional, Tuple, Union import numpy as np -import torch - from autoPyTorch.pipeline.components.training.trainer.base_trainer import BaseTrainerComponent from autoPyTorch.pipeline.components.training.trainer.cutout_utils import CutOut class RowCutOutTrainer(CutOut, BaseTrainerComponent): - NUMERICAL_VALUE = 0 - CATEGORICAL_VALUE = -1 def data_preparation(self, X: np.ndarray, y: np.ndarray, - ) -> typing.Tuple[np.ndarray, typing.Dict[str, np.ndarray]]: + ) -> Tuple[np.ndarray, Dict[str, np.ndarray]]: """ Depending on the trainer choice, data fed to the network might be pre-processed on a different way. That is, in standard training we provide the data to the @@ -26,7 +22,7 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray, Returns: np.ndarray: that processes data - typing.Dict[str, np.ndarray]: arguments to the criterion function + Dict[str, np.ndarray]: arguments to the criterion function """ r = self.random_state.rand(1) @@ -36,26 +32,10 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray, lam = 1 return X, {'y_a': y_a, 'y_b': y_b, 'lam': lam} - size = X.shape[1] - indices = self.random_state.choice(range(1, size), max(1, np.int32(size * self.patch_ratio)), + size: int = np.shape(X)[1] + indices = self.random_state.choice(range(size), max(1, np.int32(size * self.patch_ratio)), replace=False) - """if not isinstance(self.numerical_columns, typing.Iterable): - raise ValueError("{} requires numerical columns information of {}" - "to prepare data got {}.".format(self.__class__.__name__, - typing.Iterable, - self.numerical_columns)) - numerical_indices = torch.tensor(self.numerical_columns) - categorical_indices = torch.tensor([index for index in indices if index not in self.numerical_columns]) - - # We use an ordinal encoder on the categorical columns of tabular data - # -1 is the conceptual equivalent to 0 in a image, that does not - # have color as a feature and hence the network has to learn to deal - # without this data. For numerical columns we use 0 to cutout the features - # similar to the effect that setting 0 as a pixel value in an image. - X[:, categorical_indices.long()] = self.CATEGORICAL_VALUE - X[:, numerical_indices.long()] = self.NUMERICAL_VALUE - """ X[:, indices] = 0 lam = 1 y_a = y @@ -63,8 +43,8 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray, return X, {'y_a': y_a, 'y_b': y_b, 'lam': lam} @staticmethod - def get_properties(dataset_properties: typing.Optional[typing.Dict[str, typing.Any]] = None - ) -> typing.Dict[str, typing.Union[str, bool]]: + def get_properties(dataset_properties: Optional[Dict[str, Any]] = None + ) -> Dict[str, Union[str, bool]]: return { 'shortname': 'RowCutOutTrainer', 'name': 'RowCutOutTrainer', diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py index b77bb729a..60bf7a69b 100644 --- a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py +++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py @@ -318,8 +318,12 @@ def on_epoch_end(self, X: Dict[str, Any], epoch: int) -> bool: if self.use_snapshot_ensemble: assert self.model_snapshots is not None, "model snapshots container can't be " \ "none when snapshot ensembling is enabled" - model_copy = deepcopy(self.swa_model) if self.use_stochastic_weight_averaging \ - else deepcopy(self.model) + is_last_epoch = (epoch == self.budget_tracker.max_epochs) + if is_last_epoch and self.use_stochastic_weight_averaging: + model_copy = deepcopy(self.swa_model) + else: + model_copy = deepcopy(self.model) + assert model_copy is not None model_copy.cpu() self.model_snapshots.append(model_copy) diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py index 502445c14..a344e92ce 100755 --- a/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py +++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py @@ -285,7 +285,6 @@ def fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> autoPyTorchCom self.choice: autoPyTorchComponent = cast(autoPyTorchComponent, self.choice) if self.choice.use_snapshot_ensemble: X['network_snapshots'].extend(self.choice.model_snapshots) - return self.choice def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> 'TrainerChoice': @@ -408,8 +407,8 @@ def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> 'TrainerChoic # change model update_model_state_dict_from_swa(X['network'], self.choice.swa_model.state_dict()) if self.choice.use_snapshot_ensemble: - for model in self.choice.model_snapshots: - swa_utils.update_bn(X['train_data_loader'], model.double()) + # we update only the last network which pertains to the stochastic weight averaging model + swa_utils.update_bn(X['train_data_loader'], self.choice.model_snapshots[-1].double()) # wrap up -- add score if not evaluating every epoch if not self.eval_valid_each_epoch(X): diff --git a/autoPyTorch/pipeline/tabular_classification.py b/autoPyTorch/pipeline/tabular_classification.py index d19fc7215..926d6308c 100644 --- a/autoPyTorch/pipeline/tabular_classification.py +++ b/autoPyTorch/pipeline/tabular_classification.py @@ -7,7 +7,6 @@ import numpy as np -import sklearn.preprocessing from sklearn.base import ClassifierMixin import torch @@ -91,7 +90,7 @@ def _predict_proba(self, X: np.ndarray) -> np.ndarray: loader = self.named_steps['data_loader'].get_loader(X=X) pred = self.named_steps['network'].predict(loader) if isinstance(self.dataset_properties['output_shape'], int): - return pred + return pred else: all_proba = [] @@ -103,7 +102,7 @@ def _predict_proba(self, X: np.ndarray) -> np.ndarray: proba_k /= normalizer all_proba.append(proba_k) - return all_proba + return np.array(all_proba) def predict_proba(self, X: np.ndarray, batch_size: Optional[int] = None) -> np.ndarray: """predict_proba. @@ -140,11 +139,6 @@ def predict_proba(self, X: np.ndarray, batch_size: Optional[int] = None) -> np.n pred_prob = self.predict_proba(X[batch_from:batch_to], batch_size=None) y[batch_from:batch_to] = pred_prob.astype(np.float32) - # Neural networks might not be fit to produce a [0-1] output - # For instance, after small number of epochs. - # y = np.clip(y, 0, 1) - # y = sklearn.preprocessing.normalize(y, axis=1, norm='l1') - return y def _get_hyperparameter_search_space(self, diff --git a/autoPyTorch/utils/backend.py b/autoPyTorch/utils/backend.py index c9681adb3..85160af42 100644 --- a/autoPyTorch/utils/backend.py +++ b/autoPyTorch/utils/backend.py @@ -205,12 +205,12 @@ def temporary_directory(self) -> str: def _make_internals_directory(self) -> None: try: - os.makedirs(self.internals_directory) + os.makedirs(self.internals_directory, exist_ok=True) except Exception as e: if self._logger is not None: self._logger.debug("_make_internals_directory: %s" % e) try: - os.makedirs(self.get_runs_directory()) + os.makedirs(self.get_runs_directory(), exist_ok=True) except Exception as e: if self._logger is not None: self._logger.debug("_make_internals_directory: %s" % e) @@ -328,6 +328,20 @@ def load_datamanager(self) -> BaseDataset: with open(filepath, 'rb') as fh: return pickle.load(fh) + def replace_datamanager(self, datamanager: BaseDataset) -> None: + """ + This function is called to replace the old datamanager with a datamanager + in case it is required. + + Args: + datamanager (BaseDataset): the new datamanager to replace the old. + """ + warnings.warn("Original dataset will be overwritten with the provided dataset") + datamanager_pickle_file = self._get_datamanager_pickle_filename() + if os.path.exists(datamanager_pickle_file): + os.remove(datamanager_pickle_file) + self.save_datamanager(datamanager=datamanager) + def get_runs_directory(self) -> str: return os.path.join(self.internals_directory, 'runs') diff --git a/autoPyTorch/utils/common.py b/autoPyTorch/utils/common.py index 98bd20a68..13543b5fc 100644 --- a/autoPyTorch/utils/common.py +++ b/autoPyTorch/utils/common.py @@ -96,7 +96,7 @@ def replace_prefix_in_config_dict(config: Dict[str, Any], prefix: str, replace: k.startswith(prefix)} -def custom_collate_fn(batch: List) -> List[Optional[torch.tensor]]: +def custom_collate_fn(batch: List) -> List[Optional[torch.Tensor]]: """ In the case of not providing a y tensor, in a dataset of form {X, y}, y would be None. diff --git a/examples/tabular/40_advanced/example_custom_configuration_space.py b/examples/tabular/40_advanced/example_custom_configuration_space.py index 6a3764b94..b95ceeaa5 100644 --- a/examples/tabular/40_advanced/example_custom_configuration_space.py +++ b/examples/tabular/40_advanced/example_custom_configuration_space.py @@ -54,6 +54,15 @@ def get_search_space_updates(): hyperparameter='ResNetBackbone:dropout', value_range=[0, 0.5], default_value=0.2) + updates.append(node_name='network_backbone', + hyperparameter='ResNetBackbone:multi_branch_choice', + value_range=['shake-shake'], + default_value='shake-shake') + updates.append(node_name='network_backbone', + hyperparameter='ResNetBackbone:shake_shake_method', + value_range=['M3'], + default_value='M3' + ) return updates @@ -74,7 +83,7 @@ def get_search_space_updates(): # ================================================== api = TabularClassificationTask( search_space_updates=get_search_space_updates(), - include_components={'network_backbone': ['MLPBackbone', 'ResNetBackbone'], + include_components={'network_backbone': ['ResNetBackbone'], 'encoder': ['OneHotEncoder']} ) diff --git a/examples/tabular/40_advanced/example_posthoc_ensemble_fit.py b/examples/tabular/40_advanced/example_posthoc_ensemble_fit.py new file mode 100644 index 000000000..b9383b2a6 --- /dev/null +++ b/examples/tabular/40_advanced/example_posthoc_ensemble_fit.py @@ -0,0 +1,81 @@ +""" +===================================================== +Tabular Classification with Post-Hoc Ensemble Fitting +===================================================== + +The following example shows how to fit a sample classification model +and create an ensemble post-hoc with AutoPyTorch +""" +import os +import tempfile as tmp +import warnings + +os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir() +os.environ['OMP_NUM_THREADS'] = '1' +os.environ['OPENBLAS_NUM_THREADS'] = '1' +os.environ['MKL_NUM_THREADS'] = '1' + +warnings.simplefilter(action='ignore', category=UserWarning) +warnings.simplefilter(action='ignore', category=FutureWarning) + +import sklearn.datasets +import sklearn.model_selection + +from autoPyTorch.api.tabular_classification import TabularClassificationTask + + +if __name__ == '__main__': + + ############################################################################ + # Data Loading + # ============ + X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True) + X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( + X, + y, + random_state=42, + ) + + ############################################################################ + # Build and fit a classifier + # ========================== + api = TabularClassificationTask( + ensemble_size=0, + seed=42, + ) + + ############################################################################ + # Search for the best neural network + # ================================== + api.search( + X_train=X_train, + y_train=y_train, + X_test=X_test.copy(), + y_test=y_test.copy(), + optimize_metric='accuracy', + total_walltime_limit=250, + func_eval_time_limit_secs=50 + ) + + ############################################################################ + # Print the final performance of the incumbent neural network + # =========================================================== + print(api.run_history, api.trajectory) + y_pred = api.predict(X_test) + score = api.score(y_pred, y_test) + print(score) + + ############################################################################ + # Fit an ensemble with the neural networks fitted during the search + # ================================================================= + + api.fit_ensemble(ensemble_size=5, + # Set the enable_traditional_pipeline=True + # to also include traditional models + # in the ensemble + enable_traditional_pipeline=False) + # Print the final ensemble built by AutoPyTorch + y_pred = api.predict(X_test) + score = api.score(y_pred, y_test) + print(score) + print(api.show_models()) diff --git a/requirements.txt b/requirements.txt index c79104461..f4a913789 100755 --- a/requirements.txt +++ b/requirements.txt @@ -10,7 +10,7 @@ imgaug>=0.4.0 ConfigSpace>=0.4.14,<0.5 pynisher>=0.6.3 pyrfr>=0.7,<0.9 -smac>=0.13.1,<0.14 +smac dask distributed>=2.2.0 catboost diff --git a/test/test_data/test_feature_validator.py b/test/test_data/test_feature_validator.py index f9ba2855e..3a70549b0 100644 --- a/test/test_data/test_feature_validator.py +++ b/test/test_data/test_feature_validator.py @@ -1,4 +1,5 @@ import copy +import functools import numpy as np @@ -283,9 +284,9 @@ def test_featurevalidator_fitontypeA_transformtypeB(input_data_featuretest): if isinstance(input_data_featuretest, pd.DataFrame): pytest.skip("Column order change in pandas is not supported") elif isinstance(input_data_featuretest, np.ndarray): - complementary_type = validator.numpy_array_to_pandas(input_data_featuretest) + complementary_type = validator.numpy_to_pandas(input_data_featuretest) elif isinstance(input_data_featuretest, list): - complementary_type, _ = validator.list_to_dataframe(input_data_featuretest) + complementary_type, _ = validator.list_to_pandas(input_data_featuretest) elif sparse.issparse(input_data_featuretest): complementary_type = sparse.csr_matrix(input_data_featuretest.todense()) else: @@ -311,10 +312,121 @@ def test_featurevalidator_get_columns_to_encode(): for col in df.columns: df[col] = df[col].astype(col) - enc_columns, feature_types = validator._get_columns_to_encode(df) + validator.fit(df) - assert enc_columns == ['category', 'bool'] - assert feature_types == ['numerical', 'numerical', 'categorical', 'categorical'] + categorical_columns, numerical_columns, feat_type = validator._get_columns_info(df) + + assert numerical_columns == ['int', 'float'] + assert categorical_columns == ['category', 'bool'] + assert feat_type == ['numerical', 'numerical', 'categorical', 'categorical'] + + +def feature_validator_remove_nan_catcolumns(df_train: pd.DataFrame, df_test: pd.DataFrame, + ans_train: np.ndarray, ans_test: np.ndarray) -> None: + validator = TabularFeatureValidator() + validator.fit(df_train) + transformed_df_train = validator.transform(df_train) + transformed_df_test = validator.transform(df_test) + + assert np.array_equal(transformed_df_train, ans_train) + assert np.array_equal(transformed_df_test, ans_test) + + +def test_feature_validator_remove_nan_catcolumns(): + """ + Make sure categorical columns that have only nan values are removed. + Transform performs the folloing: + * simple imputation for both + * scaling for numerical + * one-hot encoding for categorical + For example, + data = [ + {'A': 1, 'B': np.nan, 'C': np.nan}, + {'A': np.nan, 'B': 3, 'C': np.nan}, + {'A': 2, 'B': np.nan, 'C': np.nan} + ] + and suppose all the columns are categorical, + then + * `A` in {np.nan, 1, 2} + * `B` in {np.nan, 3} + * `C` in {np.nan} <=== it will be dropped. + + So in the column A, + * np.nan ==> [1, 0, 0] + * 1 ==> [0, 1, 0] + * 2 ==> [0, 0, 1] + in the column B, + * np.nan ==> [1, 0] + * 3 ==> [0, 1] + Therefore, by concatenating, + * {'A': 1, 'B': np.nan, 'C': np.nan} ==> [0, 1, 0, 1, 0] + * {'A': np.nan, 'B': 3, 'C': np.nan} ==> [1, 0, 0, 0, 1] + * {'A': 2, 'B': np.nan, 'C': np.nan} ==> [0, 0, 1, 1, 0] + """ + # First case, there exist null columns (B and C) in the train set + # and a same column (C) are not all null for the test set. + + df_train = pd.DataFrame( + [ + {'A': 1, 'B': np.nan, 'C': np.nan}, + {'A': np.nan, 'C': np.nan}, + {'A': 1} + ], + dtype='category', + ) + ans_train = np.array([[0, 1], [1, 0], [0, 1]], dtype=np.float64) + df_test = pd.DataFrame( + [ + {'A': np.nan, 'B': np.nan, 'C': 5}, + {'A': np.nan, 'C': np.nan}, + {'A': 1} + ], + dtype='category', + ) + ans_test = np.array([[1, 0], [1, 0], [0, 1]], dtype=np.float64) + feature_validator_remove_nan_catcolumns(df_train, df_test, ans_train, ans_test) + + # Second case, there exist null columns (B and C) in the training set and + # the same columns (B and C) are null in the test set. + df_train = pd.DataFrame( + [ + {'A': 1, 'B': np.nan, 'C': np.nan}, + {'A': np.nan, 'C': np.nan}, + {'A': 1} + ], + dtype='category', + ) + ans_train = np.array([[0, 1], [1, 0], [0, 1]], dtype=np.float64) + df_test = pd.DataFrame( + [ + {'A': np.nan, 'B': np.nan, 'C': np.nan}, + {'A': np.nan, 'C': np.nan}, + {'A': 1} + ], + dtype='category', + ) + ans_test = np.array([[1, 0], [1, 0], [0, 1]], dtype=np.float64) + feature_validator_remove_nan_catcolumns(df_train, df_test, ans_train, ans_test) + + # Third case, there exist no null columns in the training set and + # null columns exist in the test set. + df_train = pd.DataFrame( + [ + {'A': 1, 'B': 1}, + {'A': 2, 'B': 2} + ], + dtype='category', + ) + ans_train = np.array([[1, 0, 1, 0], [0, 1, 0, 1]], dtype=np.float64) + df_test = pd.DataFrame( + [ + {'A': np.nan, 'B': np.nan}, + {'A': np.nan, 'B': np.nan} + ], + dtype='category', + ) + ans_test = np.array([[0, 0, 0, 0], [0, 0, 0, 0]], dtype=np.float64) + feature_validator_remove_nan_catcolumns(df_train, df_test, ans_train, ans_test) def test_features_unsupported_calls_are_raised(): @@ -550,15 +662,16 @@ def test_feature_validator_imbalanced_data(): validator.fit(X_train) train_feature_types = copy.deepcopy(validator.feat_type) - assert train_feature_types == ['numerical', 'numerical', 'numerical', 'numerical'] + assert train_feature_types == ['numerical'] # validator will throw an error if the column types are not the same transformed_X_test = validator.transform(X_test) transformed_X_test = pd.DataFrame(transformed_X_test) - null_columns = [] - for column in transformed_X_test.columns: - if transformed_X_test[column].isna().all(): - null_columns.append(column) - assert null_columns == [0, 2, 3] + assert sorted(validator.all_nan_columns) == sorted(['A', 'C', 'D']) + # as there are no categorical columns, we can make such an + # assertion. We only expect to drop the all nan columns + total_all_nan_columns = len(validator.all_nan_columns) + total_columns = len(validator.column_order) + assert total_columns - total_all_nan_columns == len(transformed_X_test.columns) # Columns with not all null values in the train split and # completely null on the test split. @@ -577,14 +690,33 @@ def test_feature_validator_imbalanced_data(): X_test = pd.DataFrame.from_dict(test_features) validator = TabularFeatureValidator() validator.fit(X_train) + train_feature_types = copy.deepcopy(validator.feat_type) assert train_feature_types == ['categorical', 'numerical', 'numerical'] transformed_X_test = validator.transform(X_test) transformed_X_test = pd.DataFrame(transformed_X_test) - null_columns = [] - for column in transformed_X_test.columns: - if transformed_X_test[column].isna().all(): - null_columns.append(column) - - assert null_columns == [1] + assert not len(validator.all_nan_columns) + + +def test_comparator(): + numerical = 'numerical' + categorical = 'categorical' + + validator = TabularFeatureValidator + + feat_type = [numerical, categorical] * 10 + ans = [categorical] * 10 + [numerical] * 10 + feat_type = sorted( + feat_type, + key=functools.cmp_to_key(validator._comparator) + ) + assert ans == feat_type + + feat_type = [numerical] * 10 + [categorical] * 10 + ans = [categorical] * 10 + [numerical] * 10 + feat_type = sorted( + feat_type, + key=functools.cmp_to_key(validator._comparator) + ) + assert ans == feat_type From 00f80cbdf709f6fdc6477827b8aad5521967a5e2 Mon Sep 17 00:00:00 2001 From: ArlindKadra Date: Wed, 20 Oct 2021 17:20:04 +0200 Subject: [PATCH 08/26] Addressing Shuhei's comments --- .../components/setup/network/base_network.py | 1 + .../setup/network_backbone/ResNetBackbone.py | 86 ++++++++++--------- .../training/trainer/AdversarialTrainer.py | 4 +- .../training/trainer/base_trainer.py | 4 +- .../training/trainer/cutout_utils.py | 4 +- .../training/trainer/mixup_utils.py | 4 +- .../pipeline/tabular_classification.py | 2 +- 7 files changed, 54 insertions(+), 51 deletions(-) diff --git a/autoPyTorch/pipeline/components/setup/network/base_network.py b/autoPyTorch/pipeline/components/setup/network/base_network.py index 3f246b6fa..512c8360e 100644 --- a/autoPyTorch/pipeline/components/setup/network/base_network.py +++ b/autoPyTorch/pipeline/components/setup/network/base_network.py @@ -131,6 +131,7 @@ def _predict(self, network: torch.nn.Module, loader: torch.utils.data.DataLoader # Batch prediction Y_batch_preds = list() + # `torch.no_grad` reduces memory usage even after `model.eval()` with torch.no_grad(): for i, (X_batch, Y_batch) in enumerate(loader): # Predict on batch diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py index 4a7893f94..75a45d712 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py @@ -275,20 +275,19 @@ def __init__( # if in != out the shortcut needs a linear layer to match the result dimensions # if the shortcut needs a layer we apply batchnorm and activation to the shortcut # as well (start_norm) - if in_features != out_features: - if self.config["use_skip_connection"]: - self.shortcut = nn.Linear(in_features, out_features) - initial_normalization = list() - if self.config['use_batch_norm']: - initial_normalization.append( - nn.BatchNorm1d(in_features) - ) + if in_features != out_features and self.config["use_skip_connection"]: + self.shortcut = nn.Linear(in_features, out_features) + initial_normalization = list() + if self.config['use_batch_norm']: initial_normalization.append( - self.activation() - ) - self.start_norm = nn.Sequential( - *initial_normalization + nn.BatchNorm1d(in_features) ) + initial_normalization.append( + self.activation() + ) + self.start_norm = nn.Sequential( + *initial_normalization + ) self.block_index = block_index self.num_blocks = blocks_per_group * self.config["num_groups"] @@ -321,9 +320,6 @@ def _build_block(self, in_features: int, out_features: int) -> nn.Module: def forward(self, x: torch.FloatTensor) -> torch.FloatTensor: - if self.config["use_skip_connection"]: - residual = x - # if shortcut is not none we need a layer such that x matches the output dimension if self.shortcut is not None and self.start_norm is not None: # in this case self.start_norm is also != none @@ -331,38 +327,44 @@ def forward(self, x: torch.FloatTensor) -> torch.FloatTensor: # in front of shortcut and layers. Note that in this case layers # does not start with batchnorm+activation but with the first linear layer # (see _build_block). As a result if in_features == out_features - # -> result = x + W(~D(A(BN(W(A(BN(x)))))) + # -> result = x + W_2(~D(A(BN(W_1(A(BN(x)))))) # if in_features != out_features # -> result = W_shortcut(A(BN(x))) + W_2(~D(A(BN(W_1(A(BN(x)))))) x = self.start_norm(x) residual = self.shortcut(x) - - # TODO make the below code better - if self.config["use_skip_connection"]: - if self.config["multi_branch_choice"] == 'shake-shake': - x1 = self.layers(x) - x2 = self.shake_shake_layers(x) - alpha, beta = shake_get_alpha_beta(is_training=self.training, - is_cuda=x.is_cuda, - method=self.config['shake_shake_update_func']) - x = shake_shake(x1, x2, alpha, beta) - elif self.config["multi_branch_choice"] == 'shake-drop': - x = self.layers(x) - alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda, - method=self.config['shake_shake_update_func']) - bl = shake_drop_get_bl( - self.block_index, - 1 - self.config["max_shake_drop_probability"], - self.num_blocks, - self.training, - x.is_cuda, - ) - x = shake_drop(x, alpha, beta, bl) + else: + if not self.config["use_skip_connection"]: + # Early-return + return self.layers(x) else: - x = self.layers(x) - - x = x + residual + # We use a skip connection but we do not need to match dimensions + residual = x + + if self.config["multi_branch_choice"] == 'shake-shake': + x1 = self.layers(x) + x2 = self.shake_shake_layers(x) + alpha, beta = shake_get_alpha_beta( + is_training=self.training, + is_cuda=x.is_cuda, + method=self.config['shake_shake_update_func'], + ) + x = shake_shake(x1, x2, alpha, beta) + elif self.config["multi_branch_choice"] == 'shake-drop': + x = self.layers(x) + alpha, beta = shake_get_alpha_beta( + self.training, + x.is_cuda, + method=self.config['shake_shake_update_func'], + ) + bl = shake_drop_get_bl( + self.block_index, + 1 - self.config["max_shake_drop_probability"], + self.num_blocks, + self.training, + x.is_cuda, + ) + x = shake_drop(x, alpha, beta, bl) else: x = self.layers(x) - return x + return x + residual diff --git a/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py b/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py index 709ee197f..c83625a80 100644 --- a/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py +++ b/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py @@ -159,7 +159,7 @@ def get_hyperparameter_search_space( dataset_properties: Optional[Dict] = None, weighted_loss: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="weighted_loss", - value_range=[1], + value_range=(1, ), default_value=1), la_steps: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="la_steps", @@ -185,7 +185,7 @@ def get_hyperparameter_search_space( default_value=True), se_lastk: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="se_lastk", - value_range=(3,), + value_range=(3, ), default_value=3), epsilon: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="epsilon", diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py index 60bf7a69b..3791e7a7d 100644 --- a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py +++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py @@ -541,7 +541,7 @@ def get_hyperparameter_search_space( dataset_properties: Optional[Dict] = None, weighted_loss: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="weighted_loss", - value_range=[1], + value_range=(1, ), default_value=1), la_steps: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="la_steps", @@ -567,7 +567,7 @@ def get_hyperparameter_search_space( default_value=True), se_lastk: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="se_lastk", - value_range=(3,), + value_range=(3, ), default_value=3), ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py b/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py index c7feb2214..0c253d0ff 100644 --- a/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py +++ b/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py @@ -63,7 +63,7 @@ def get_hyperparameter_search_space( dataset_properties: Optional[Dict] = None, weighted_loss: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="weighted_loss", - value_range=[1], + value_range=(1, ), default_value=1), la_steps: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="la_steps", @@ -89,7 +89,7 @@ def get_hyperparameter_search_space( default_value=True), se_lastk: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="se_lastk", - value_range=(3,), + value_range=(3, ), default_value=3), patch_ratio: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="patch_ratio", diff --git a/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py b/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py index a2325b91c..157ca615c 100644 --- a/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py +++ b/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py @@ -61,7 +61,7 @@ def get_hyperparameter_search_space( dataset_properties: Optional[Dict] = None, weighted_loss: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="weighted_loss", - value_range=[1], + value_range=(1, ), default_value=1), la_steps: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="la_steps", @@ -87,7 +87,7 @@ def get_hyperparameter_search_space( default_value=True), se_lastk: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="se_lastk", - value_range=(3,), + value_range=(3, ), default_value=3), alpha: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="alpha", diff --git a/autoPyTorch/pipeline/tabular_classification.py b/autoPyTorch/pipeline/tabular_classification.py index 926d6308c..b611cd8b3 100644 --- a/autoPyTorch/pipeline/tabular_classification.py +++ b/autoPyTorch/pipeline/tabular_classification.py @@ -90,8 +90,8 @@ def _predict_proba(self, X: np.ndarray) -> np.ndarray: loader = self.named_steps['data_loader'].get_loader(X=X) pred = self.named_steps['network'].predict(loader) if isinstance(self.dataset_properties['output_shape'], int): + # The final layer is always softmax now (`pred` already gives pseudo proba) return pred - else: all_proba = [] From 88e02281f584af2fd0fef2e5520b35574dbdbe73 Mon Sep 17 00:00:00 2001 From: Arlind Kadra Date: Wed, 20 Oct 2021 18:08:14 +0200 Subject: [PATCH 09/26] flake8 problems fix --- autoPyTorch/data/tabular_feature_validator.py | 5 +++-- .../components/training/trainer/RowCutMixTrainer.py | 10 +++++++--- .../components/training/trainer/RowCutOutTrainer.py | 8 ++++++-- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py index 62bd0b465..46116e25e 100644 --- a/autoPyTorch/data/tabular_feature_validator.py +++ b/autoPyTorch/data/tabular_feature_validator.py @@ -28,6 +28,7 @@ def _create_column_transformer( Given a dictionary of preprocessors, this function creates a sklearn column transformer with appropriate columns associated with their preprocessors. + Args: preprocessors (Dict[str, List[BaseEstimator]]): Dictionary containing list of numerical and categorical preprocessors. @@ -512,12 +513,12 @@ def has_object_columns( """ Indicate whether on a Series of dtypes for a Pandas DataFrame there exists one or more object columns. - + Args: feature_types (pd.Series): The feature types for a DataFrame. Returns: - bool: + bool: True if the DataFrame dtypes contain an object column, False otherwise. """ diff --git a/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py b/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py index 67de50108..3f7866f3c 100644 --- a/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py +++ b/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py @@ -36,9 +36,13 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray, if beta <= 0 or r > self.alpha: return X, {'y_a': y, 'y_b': y[shuffled_indices], 'lam': 1} - cut_column_indices = torch.as_tensor(self.random_state.choice(range(n_columns), - max(1, np.int32(n_columns * lam)), - replace=False)) + cut_column_indices = torch.as_tensor( + self.random_state.choice( + range(n_columns), + max(1, np.int32(n_columns * lam)), + replace=False, + ), + ) # Replace the values in `cut_indices` columns with # the values from `permed_indices` diff --git a/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py b/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py index fffc35476..441b9be28 100644 --- a/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py +++ b/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py @@ -33,9 +33,13 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray, return X, {'y_a': y_a, 'y_b': y_b, 'lam': lam} size: int = np.shape(X)[1] - indices = self.random_state.choice(range(size), max(1, np.int32(size * self.patch_ratio)), - replace=False) + indices = self.random_state.choice( + range(size), + max(1, np.int32(size * self.patch_ratio)), + replace=False, + ) + # Mask the selected features as 0 X[:, indices] = 0 lam = 1 y_a = y From 3b6ec0323b1321abaed3c100302b8010557a517c Mon Sep 17 00:00:00 2001 From: Arlind Kadra Date: Thu, 21 Oct 2021 12:33:11 +0200 Subject: [PATCH 10/26] Update autoPyTorch/api/base_task.py Add indent. Co-authored-by: Ravin Kohli <13005107+ravinkohli@users.noreply.github.com> --- autoPyTorch/api/base_task.py | 1 + 1 file changed, 1 insertion(+) diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py index a85695801..60e676290 100644 --- a/autoPyTorch/api/base_task.py +++ b/autoPyTorch/api/base_task.py @@ -1517,6 +1517,7 @@ def _init_ensemble_builder( Models are drawn with replacement. precision (int), (default=32): Numeric precision used when loading ensemble data. Can be either 16, 32 or 64. + Returns: EnsembleBuilderManager """ From a26edbe3835fde42f1ebd576948ae5a7a74d01da Mon Sep 17 00:00:00 2001 From: Arlind Kadra Date: Thu, 21 Oct 2021 12:33:27 +0200 Subject: [PATCH 11/26] Update autoPyTorch/api/base_task.py Add indent. Co-authored-by: Ravin Kohli <13005107+ravinkohli@users.noreply.github.com> --- autoPyTorch/api/base_task.py | 1 + 1 file changed, 1 insertion(+) diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py index 60e676290..15375fbb6 100644 --- a/autoPyTorch/api/base_task.py +++ b/autoPyTorch/api/base_task.py @@ -1558,6 +1558,7 @@ def _init_ensemble_builder( logger_port=self._logger_port, ) self._stopwatch.stop_task(ensemble_task_name) + return proc_ensemble def predict( From 73a11c93e6806bd3571c77644029c2d08744d239 Mon Sep 17 00:00:00 2001 From: Arlind Kadra Date: Thu, 21 Oct 2021 12:49:03 +0200 Subject: [PATCH 12/26] Update autoPyTorch/data/tabular_feature_validator.py Add indentation. Co-authored-by: Ravin Kohli <13005107+ravinkohli@users.noreply.github.com> --- autoPyTorch/data/tabular_feature_validator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py index 46116e25e..ccbc1739f 100644 --- a/autoPyTorch/data/tabular_feature_validator.py +++ b/autoPyTorch/data/tabular_feature_validator.py @@ -36,6 +36,7 @@ def _create_column_transformer( List of names of numerical columns categorical_columns (List[str]): List of names of categorical columns + Returns: ColumnTransformer """ From 37e3537bff195d850cdf5d1d8819f262f240742e Mon Sep 17 00:00:00 2001 From: Arlind Kadra Date: Thu, 21 Oct 2021 12:57:05 +0200 Subject: [PATCH 13/26] Update autoPyTorch/pipeline/components/setup/network_backbone/utils.py Add line indentation. Co-authored-by: Ravin Kohli <13005107+ravinkohli@users.noreply.github.com> --- autoPyTorch/pipeline/components/setup/network_backbone/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py index 96390d003..356a00404 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py @@ -146,6 +146,7 @@ def shake_drop_get_bl( num_blocks (int): The total number of building blocks is_training (bool): Whether it is training is_cuda (bool): Whether the tensor is on CUDA + Returns: bl (torch.Tensor): a Bernoulli random variable in {0, 1} Reference: From dc5e8a29acd4a4823478bdcd70b2e084256e430a Mon Sep 17 00:00:00 2001 From: Arlind Kadra Date: Thu, 21 Oct 2021 13:04:47 +0200 Subject: [PATCH 14/26] Update autoPyTorch/data/tabular_feature_validator.py Validate if there is a column transformer since for sparse matrices we will not have one. Co-authored-by: Ravin Kohli <13005107+ravinkohli@users.noreply.github.com> --- autoPyTorch/data/tabular_feature_validator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py index ccbc1739f..dac3ed058 100644 --- a/autoPyTorch/data/tabular_feature_validator.py +++ b/autoPyTorch/data/tabular_feature_validator.py @@ -252,7 +252,8 @@ def transform( if X[column].isna().all(): X[column] = X[column].astype('object') - X = self.column_transformer.transform(X) + if self.column_transformer is not None: + X = self.column_transformer.transform(X) # Sparse related transformations # Not all sparse format support index sorting From 48b16a38fa7c5afe7a3df47fbfe9686235251230 Mon Sep 17 00:00:00 2001 From: Arlind Kadra Date: Thu, 21 Oct 2021 13:05:16 +0200 Subject: [PATCH 15/26] Update autoPyTorch/utils/implementations.py Delete uncommented line. Co-authored-by: Ravin Kohli <13005107+ravinkohli@users.noreply.github.com> --- autoPyTorch/utils/implementations.py | 1 - 1 file changed, 1 deletion(-) diff --git a/autoPyTorch/utils/implementations.py b/autoPyTorch/utils/implementations.py index f1ea1e651..40ceb85d1 100644 --- a/autoPyTorch/utils/implementations.py +++ b/autoPyTorch/utils/implementations.py @@ -35,7 +35,6 @@ def __call__(self, y: Union[np.ndarray, torch.Tensor]) -> np.ndarray: weights = (np.ones(y.shape[1]) * weight_per_class) / np.maximum(counts, 1) else: classes, counts = np.unique(y, axis=0, return_counts=True) - # classes, counts = classes[::-1], counts[::-1] weight_per_class = total_weight / classes.shape[0] weights = (np.ones(classes.shape[0]) * weight_per_class) / counts From dab2f762f4409af03f488016354de930089688b6 Mon Sep 17 00:00:00 2001 From: ArlindKadra Date: Thu, 21 Oct 2021 17:20:38 +0200 Subject: [PATCH 16/26] Allow the number of threads to be given by the user --- autoPyTorch/api/base_task.py | 9 +++++++-- autoPyTorch/api/tabular_classification.py | 5 +++++ autoPyTorch/api/tabular_regression.py | 9 +++++++-- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py index 15375fbb6..5880e5ee7 100644 --- a/autoPyTorch/api/base_task.py +++ b/autoPyTorch/api/base_task.py @@ -101,7 +101,10 @@ class BaseTask: Base class for the tasks that serve as API to the pipelines. Args: seed (int), (default=1): seed to be used for reproducibility. - n_jobs (int), (default=1): number of consecutive processes to spawn. + n_jobs (int), (default=1): + number of consecutive processes to spawn. + nr_threads (int), (default=1): + number of threads to use for each process. logging_config (Optional[Dict]): specifies configuration for logging, if None, it is loaded from the logging.yaml ensemble_size (int), (default=50): Number of models added to the ensemble built by @@ -138,6 +141,7 @@ def __init__( self, seed: int = 1, n_jobs: int = 1, + nr_threads: int = 1, logging_config: Optional[Dict] = None, ensemble_size: int = 50, ensemble_nbest: int = 50, @@ -158,6 +162,7 @@ def __init__( ) -> None: self.seed = seed self.n_jobs = n_jobs + self.nr_threads = nr_threads self.ensemble_size = ensemble_size self.ensemble_nbest = ensemble_nbest self.max_models_on_disc = max_models_on_disc @@ -412,7 +417,7 @@ def _create_dask_client(self) -> None: dask.distributed.LocalCluster( n_workers=self.n_jobs, processes=True, - threads_per_worker=2, + threads_per_worker=self.nr_threads, # We use the temporal directory to save the # dask workers, because deleting workers # more time than deleting backend directories diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py index 7be504f6d..3cced5e3d 100644 --- a/autoPyTorch/api/tabular_classification.py +++ b/autoPyTorch/api/tabular_classification.py @@ -27,11 +27,14 @@ class TabularClassificationTask(BaseTask): """ Tabular Classification API to the pipelines. + Args: seed (int): seed to be used for reproducibility. n_jobs (int), (default=1): number of consecutive processes to spawn. + nr_threads (int), (default=1): + number of threads to use for each process. logging_config (Optional[Dict]): specifies configuration for logging, if None, it is loaded from the logging.yaml ensemble_size (int), (default=50): @@ -63,6 +66,7 @@ def __init__( self, seed: int = 1, n_jobs: int = 1, + nr_threads: int = 1, logging_config: Optional[Dict] = None, ensemble_size: int = 50, ensemble_nbest: int = 50, @@ -83,6 +87,7 @@ def __init__( super().__init__( seed=seed, n_jobs=n_jobs, + nr_threads=nr_threads, logging_config=logging_config, ensemble_size=ensemble_size, ensemble_nbest=ensemble_nbest, diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py index 8742549af..88a828867 100644 --- a/autoPyTorch/api/tabular_regression.py +++ b/autoPyTorch/api/tabular_regression.py @@ -27,9 +27,13 @@ class TabularRegressionTask(BaseTask): """ Tabular Regression API to the pipelines. + Args: seed (int): seed to be used for reproducibility. - n_jobs (int), (default=1): number of consecutive processes to spawn. + n_jobs (int), (default=1): + number of consecutive processes to spawn. + nr_threads (int), (default=1): + number of threads to use for each process. logging_config (Optional[Dict]): specifies configuration for logging, if None, it is loaded from the logging.yaml ensemble_size (int), (default=50): Number of models added to the ensemble built by @@ -50,11 +54,11 @@ class TabularRegressionTask(BaseTask): Otherwise specifies set of components not to use. Incompatible with include components """ - def __init__( self, seed: int = 1, n_jobs: int = 1, + nr_threads: int = 1, logging_config: Optional[Dict] = None, ensemble_size: int = 50, ensemble_nbest: int = 50, @@ -75,6 +79,7 @@ def __init__( super().__init__( seed=seed, n_jobs=n_jobs, + nr_threads=nr_threads, logging_config=logging_config, ensemble_size=ensemble_size, ensemble_nbest=ensemble_nbest, From 6f0aecb11233cec1dd4c58c393d90e6ec9143274 Mon Sep 17 00:00:00 2001 From: ArlindKadra Date: Thu, 21 Oct 2021 17:45:31 +0200 Subject: [PATCH 17/26] Removing unnecessary argument and refactoring the attribute. --- autoPyTorch/data/base_feature_validator.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/autoPyTorch/data/base_feature_validator.py b/autoPyTorch/data/base_feature_validator.py index a7cab5913..700af1050 100644 --- a/autoPyTorch/data/base_feature_validator.py +++ b/autoPyTorch/data/base_feature_validator.py @@ -29,7 +29,8 @@ class BaseFeatureValidator(BaseEstimator): """ A class to pre-process features. In this regards, the format of the data is checked, - and if applicable, features are encoded + and if applicable, features are encoded. + Attributes: feat_type (List[str]): List of the column types found by this estimator during fit. @@ -37,23 +38,19 @@ class BaseFeatureValidator(BaseEstimator): Class name of the data type provided during fit. encoder (Optional[BaseEstimator]) Host a encoder object if the data requires transformation (for example, - if provided a categorical column in a pandas DataFrame) - enc_columns (List[str]) - List of columns that were encoded. + if provided a categorical column in a pandas DataFrame). """ - def __init__(self, - logger: Optional[Union[PicklableClientLogger, logging.Logger - ] - ] = None, - ) -> None: + def __init__( + self, + logger: Optional[Union[PicklableClientLogger, logging.Logger]] = None, + ) -> None: # Register types to detect unsupported data format changes self.feat_type: Optional[List[str]] = None self.data_type: Optional[type] = None self.dtypes: List[str] = [] self.column_order: List[str] = [] - self.encoder: Optional[BaseEstimator] = None - self.enc_columns: List[str] = [] + self.column_transformer: Optional[BaseEstimator] = None self.logger: Union[ PicklableClientLogger, logging.Logger From 84d74066e423eaeaf9bbf1bcf74ca5b585523860 Mon Sep 17 00:00:00 2001 From: ArlindKadra Date: Thu, 21 Oct 2021 18:08:54 +0200 Subject: [PATCH 18/26] Addressing Ravin's comments --- autoPyTorch/data/tabular_feature_validator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py index dac3ed058..66faa8e41 100644 --- a/autoPyTorch/data/tabular_feature_validator.py +++ b/autoPyTorch/data/tabular_feature_validator.py @@ -170,7 +170,7 @@ def _fit( self.categorical_columns.append(i) # Lastly, store the number of features - self.num_features = len(X.columns) + self.num_features = np.shape(X)[1] return self From 9f8ebb5134267332598285c01921fba4ab3e57ae Mon Sep 17 00:00:00 2001 From: Arlind Kadra Date: Thu, 21 Oct 2021 18:10:41 +0200 Subject: [PATCH 19/26] Update autoPyTorch/pipeline/components/setup/network_backbone/utils.py Updating the function documentation according to the agreed style. Co-authored-by: Ravin Kohli <13005107+ravinkohli@users.noreply.github.com> --- .../setup/network_backbone/utils.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py index 356a00404..314f852ca 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py @@ -98,8 +98,27 @@ def shake_get_alpha_beta( is_cuda: bool, method: str ) -> typing.Tuple[torch.Tensor, torch.Tensor]: + """ """ The methods used in this function have been introduced in 'ShakeShake Regularisation' + Each method name is available in the referred paper. + Currently, this function supports `even-even`, `shake-even`, `shake-shake` and `M3`. + + Args: + is_training (bool): Whether the computation for the training + is_cuda (bool): Whether the tensor is on CUDA + method (str): The shake method either `even-even`, `shake-even`, `shake-shake` or `M3` + + Returns: + alpha, beta (Tuple[float, float]): + alpha (in [0, 1]) is the weight coefficient for the forward pass + beta (in [0, 1]) is the weight coefficient for the backward pass + + Reference: + Title: Shake-shake regularization + Author: Xavier Gastaldi + URL: https://arxiv.org/abs/1705.07485 + """ https://arxiv.org/abs/1705.07485. The names have been taken from the paper as well. Currently, this function supports `even-even`, `shake-even`, `shake-shake` and `M3`. """ From 14889783d669e11a0ca2153801edabce0b781f71 Mon Sep 17 00:00:00 2001 From: Arlind Kadra Date: Thu, 21 Oct 2021 18:13:30 +0200 Subject: [PATCH 20/26] Update autoPyTorch/pipeline/components/setup/network_backbone/utils.py Providing information on the wrong method provided for shake-shake regularization. Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com> --- autoPyTorch/pipeline/components/setup/network_backbone/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py index 314f852ca..725ff1d09 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py @@ -140,7 +140,7 @@ def shake_get_alpha_beta( [rnd * (0.5 - alpha) + alpha if alpha < 0.5 else rnd * (alpha - 0.5) + 0.5] ) else: - raise ValueError("Unknown method for ShakeShakeRegularisation in NetworkBackbone") + raise ValueError(f"Unknown method `{method}` for ShakeShakeRegularisation in NetworkBackbone") if is_cuda: alpha = alpha.cuda() From 6c8a55b61fbcda08b993383bce64962ace342bc5 Mon Sep 17 00:00:00 2001 From: Ravin Kohli Date: Thu, 21 Oct 2021 20:53:41 +0200 Subject: [PATCH 21/26] add todo for backend and accept changes from shuhei --- .../pipeline/components/setup/network_backbone/utils.py | 5 ++--- autoPyTorch/utils/backend.py | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py index 06412e591..70cab66fc 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py @@ -117,7 +117,6 @@ def shake_get_alpha_beta( is_cuda: bool, method: str ) -> typing.Tuple[torch.Tensor, torch.Tensor]: - """ """ The methods used in this function have been introduced in 'ShakeShake Regularisation' Each method name is available in the referred paper. @@ -137,8 +136,8 @@ def shake_get_alpha_beta( Title: Shake-shake regularization Author: Xavier Gastaldi URL: https://arxiv.org/abs/1705.07485 - """ - https://arxiv.org/abs/1705.07485. The names have been taken from the paper as well. + + The names have been taken from the paper as well. Currently, this function supports `even-even`, `shake-even`, `shake-shake` and `M3`. """ if not is_training: diff --git a/autoPyTorch/utils/backend.py b/autoPyTorch/utils/backend.py index 85160af42..5348bd11c 100644 --- a/autoPyTorch/utils/backend.py +++ b/autoPyTorch/utils/backend.py @@ -204,6 +204,7 @@ def temporary_directory(self) -> str: return self.context.temporary_directory def _make_internals_directory(self) -> None: + # TODO: make exist_ok a function argument try: os.makedirs(self.internals_directory, exist_ok=True) except Exception as e: From e9dfea949c1eb0b47a3058fbe6bae61a19492069 Mon Sep 17 00:00:00 2001 From: ArlindKadra Date: Thu, 21 Oct 2021 22:21:51 +0200 Subject: [PATCH 22/26] Addressing Shuhei's and Ravin's comments --- autoPyTorch/api/base_task.py | 4 ++-- autoPyTorch/api/tabular_classification.py | 6 +++--- autoPyTorch/api/tabular_regression.py | 6 +++--- .../components/training/trainer/RowCutOutTrainer.py | 5 ++--- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py index d36122b6e..c1ba020cb 100644 --- a/autoPyTorch/api/base_task.py +++ b/autoPyTorch/api/base_task.py @@ -103,7 +103,7 @@ class BaseTask: seed (int), (default=1): seed to be used for reproducibility. n_jobs (int), (default=1): number of consecutive processes to spawn. - nr_threads (int), (default=1): + n_threads (int), (default=1): number of threads to use for each process. logging_config (Optional[Dict]): specifies configuration for logging, if None, it is loaded from the logging.yaml @@ -141,7 +141,7 @@ def __init__( self, seed: int = 1, n_jobs: int = 1, - nr_threads: int = 1, + n_threads: int = 1, logging_config: Optional[Dict] = None, ensemble_size: int = 50, ensemble_nbest: int = 50, diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py index 024e5d113..b261f3b0f 100644 --- a/autoPyTorch/api/tabular_classification.py +++ b/autoPyTorch/api/tabular_classification.py @@ -33,7 +33,7 @@ class TabularClassificationTask(BaseTask): seed to be used for reproducibility. n_jobs (int), (default=1): number of consecutive processes to spawn. - nr_threads (int), (default=1): + n_threads (int), (default=1): number of threads to use for each process. logging_config (Optional[Dict]): specifies configuration for logging, if None, it is loaded from the logging.yaml @@ -66,7 +66,7 @@ def __init__( self, seed: int = 1, n_jobs: int = 1, - nr_threads: int = 1, + n_threads: int = 1, logging_config: Optional[Dict] = None, ensemble_size: int = 50, ensemble_nbest: int = 50, @@ -87,7 +87,7 @@ def __init__( super().__init__( seed=seed, n_jobs=n_jobs, - nr_threads=nr_threads, + n_threads=n_threads, logging_config=logging_config, ensemble_size=ensemble_size, ensemble_nbest=ensemble_nbest, diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py index ab5332b38..cbaaa9099 100644 --- a/autoPyTorch/api/tabular_regression.py +++ b/autoPyTorch/api/tabular_regression.py @@ -32,7 +32,7 @@ class TabularRegressionTask(BaseTask): seed (int): seed to be used for reproducibility. n_jobs (int), (default=1): number of consecutive processes to spawn. - nr_threads (int), (default=1): + n_threads (int), (default=1): number of threads to use for each process. logging_config (Optional[Dict]): specifies configuration for logging, if None, it is loaded from the logging.yaml @@ -58,7 +58,7 @@ def __init__( self, seed: int = 1, n_jobs: int = 1, - nr_threads: int = 1, + n_threads: int = 1, logging_config: Optional[Dict] = None, ensemble_size: int = 50, ensemble_nbest: int = 50, @@ -79,7 +79,7 @@ def __init__( super().__init__( seed=seed, n_jobs=n_jobs, - nr_threads=nr_threads, + n_threads=n_threads, logging_config=logging_config, ensemble_size=ensemble_size, ensemble_nbest=ensemble_nbest, diff --git a/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py b/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py index 1e90f7e31..4578082cb 100644 --- a/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py +++ b/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py @@ -31,7 +31,6 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray, np.ndarray: that processes data Dict[str, np.ndarray]: arguments to the criterion function """ - r = self.random_state.rand(1) if r > self.cutout_prob: y_a = y @@ -40,14 +39,14 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray, return X, {'y_a': y_a, 'y_b': y_b, 'lam': lam} size: int = np.shape(X)[1] - indices = self.random_state.choice( + cut_column_indices = self.random_state.choice( range(size), max(1, np.int32(size * self.patch_ratio)), replace=False, ) # Mask the selected features as 0 - X[:, indices] = 0 + X[:, cut_column_indices] = 0 lam = 1 y_a = y y_b = y From 88893a980be65312d534ad906cca2838dfaff5fc Mon Sep 17 00:00:00 2001 From: ArlindKadra Date: Thu, 21 Oct 2021 22:27:43 +0200 Subject: [PATCH 23/26] Addressing Shuhei's and Ravin's comments, bug fix --- autoPyTorch/api/base_task.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py index c1ba020cb..e9344e213 100644 --- a/autoPyTorch/api/base_task.py +++ b/autoPyTorch/api/base_task.py @@ -162,7 +162,7 @@ def __init__( ) -> None: self.seed = seed self.n_jobs = n_jobs - self.nr_threads = nr_threads + self.n_threads = n_threads self.ensemble_size = ensemble_size self.ensemble_nbest = ensemble_nbest self.max_models_on_disc = max_models_on_disc @@ -417,7 +417,7 @@ def _create_dask_client(self) -> None: dask.distributed.LocalCluster( n_workers=self.n_jobs, processes=True, - threads_per_worker=self.nr_threads, + threads_per_worker=self.n_threads, # We use the temporal directory to save the # dask workers, because deleting workers # more time than deleting backend directories @@ -1381,7 +1381,8 @@ def fit_ensemble( """ Enables post-hoc fitting of the ensemble after the `search()` method is finished. This method creates an ensemble using all - the models stored on disk during the smbo run + the models stored on disk during the smbo run. + Args: optimize_metric (str): name of the metric that is used to evaluate a pipeline. if not specified, value passed to search will be used @@ -1418,6 +1419,7 @@ def fit_ensemble( total_walltime_limit // 2 to allow enough time to fit at least 2 individual machine learning algorithms. Set to np.inf in case no time limit is desired. + Returns: self """ From da6e47c3896ac256d8a3cb7fb7b24f778c9937c0 Mon Sep 17 00:00:00 2001 From: Arlind Kadra Date: Thu, 21 Oct 2021 22:28:28 +0200 Subject: [PATCH 24/26] Update autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py Improving code readibility. Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com> --- .../setup/network_backbone/ResNetBackbone.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py index 75a45d712..853162caa 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py @@ -332,11 +332,11 @@ def forward(self, x: torch.FloatTensor) -> torch.FloatTensor: # -> result = W_shortcut(A(BN(x))) + W_2(~D(A(BN(W_1(A(BN(x)))))) x = self.start_norm(x) residual = self.shortcut(x) - else: - if not self.config["use_skip_connection"]: - # Early-return - return self.layers(x) - else: + elif self.config["use_skip_connection"]: + # We use a skip connection but we do not need to match dimensions + residual = x + else: # Early-return because no need of skip connection + return self.layers(x) # We use a skip connection but we do not need to match dimensions residual = x From 2740052173e53e39efc869beeab9a290368038b6 Mon Sep 17 00:00:00 2001 From: Arlind Kadra Date: Thu, 21 Oct 2021 22:28:54 +0200 Subject: [PATCH 25/26] Update autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py Improving consistency. Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com> --- .../components/setup/network_backbone/ResNetBackbone.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py index 853162caa..079f17249 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py @@ -352,8 +352,8 @@ def forward(self, x: torch.FloatTensor) -> torch.FloatTensor: elif self.config["multi_branch_choice"] == 'shake-drop': x = self.layers(x) alpha, beta = shake_get_alpha_beta( - self.training, - x.is_cuda, + is_training=self.training, + is_cuda=x.is_cuda, method=self.config['shake_shake_update_func'], ) bl = shake_drop_get_bl( From e59795149bbfe1bf1c2cc9d0e059f9acdc49ad98 Mon Sep 17 00:00:00 2001 From: ArlindKadra Date: Thu, 21 Oct 2021 22:31:48 +0200 Subject: [PATCH 26/26] bug fix --- .../components/setup/network_backbone/ResNetBackbone.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py index 079f17249..51269a254 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py @@ -337,8 +337,6 @@ def forward(self, x: torch.FloatTensor) -> torch.FloatTensor: residual = x else: # Early-return because no need of skip connection return self.layers(x) - # We use a skip connection but we do not need to match dimensions - residual = x if self.config["multi_branch_choice"] == 'shake-shake': x1 = self.layers(x)