automl · ravinkohli · Oct 21, 2021 · Jun 4, 2021 · Jun 7, 2021 · Jun 7, 2021
diff --git a/autoPyTorch/datasets/base_dataset.py b/autoPyTorch/datasets/base_dataset.py
@@ -322,7 +322,7 @@ def create_holdout_val_split(
             self.random_state, val_share, self._get_indices(), **kwargs)
         return train, val
 
-    def get_dataset_for_training(self, split_id: int, train: bool) -> Dataset:
+    def get_dataset_for_training(self, split_id: int, train: bool, subset: int = 0) -> Dataset:
         """
         The above split methods employ the Subset to internally subsample the whole dataset.
 
@@ -336,7 +336,7 @@ def get_dataset_for_training(self, split_id: int, train: bool) -> Dataset:
             Dataset: the reduced dataset to be used for testing
         """
         # Subset creates a dataset. Splits is a (train_indices, test_indices) tuple
-        return TransformSubset(self, self.splits[split_id][0], train=train)
+        return TransformSubset(self, self.splits[split_id][subset], train=train)
 
     def replace_data(self, X_train: BaseDatasetInputType,
                      X_test: Optional[BaseDatasetInputType]) -> 'BaseDataset':

diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py
@@ -261,18 +261,19 @@ def __init__(
         # if the shortcut needs a layer we apply batchnorm and activation to the shortcut
         # as well (start_norm)
         if in_features != out_features:
-        if in_features != out_features:
+        if in_features != out_features and self.config["use_skip_connection"]:
+            self.shortcut = nn.Linear(in_features, out_features)
+            initial_normalization = list()
+            if self.config['use_batch_norm']:
+                initial_normalization.append(
+                    nn.BatchNorm1d(in_features)
+                )
+            initial_normalization.append(
+                self.activation()
+            )
+            self.start_norm = nn.Sequential(
+                *initial_normalization
+            )
-        if in_features != out_features:
+        if in_features != out_features and self.config["use_skip_connection"]:
+            self.shortcut = nn.Linear(in_features, out_features)
+            initial_normalization = list()
+            if self.config['use_batch_norm']:
+                initial_normalization.append(
+                    nn.BatchNorm1d(in_features)
+                )
+            initial_normalization.append(
+                self.activation()
+            )
+            self.start_norm = nn.Sequential(
+                *initial_normalization
+            )
-            self.shortcut = nn.Linear(in_features, out_features)
-            initial_normalization = list()
-            if self.config['use_batch_norm']:
+            if self.config["use_skip_connection"]:
+                self.shortcut = nn.Linear(in_features, out_features)
+                initial_normalization = list()
+                if self.config['use_batch_norm']:
+                    initial_normalization.append(
+                        nn.BatchNorm1d(in_features)
+                    )
                 initial_normalization.append(
-                    nn.BatchNorm1d(in_features)
+                    self.activation()
+                )
+                self.start_norm = nn.Sequential(
+                    *initial_normalization
                 )
-            initial_normalization.append(
-                self.activation()
-            )
-            self.start_norm = nn.Sequential(
-                *initial_normalization
-            )
 
         self.block_index = block_index
         self.num_blocks = blocks_per_group * self.config["num_groups"]
@@ -290,14 +291,6 @@ def _build_block(self, in_features: int, out_features: int) -> nn.Module:
             if self.config['use_batch_norm']:
                 layers.append(nn.BatchNorm1d(in_features))
             layers.append(self.activation())
-        else:
-            # if start norm is not None and skip connection is None
-            # we will never apply the start_norm for the first layer in the block,
-            # which is why we should account for this case.
-            if not self.config['use_skip_connection']:
-                if self.config['use_batch_norm']:
-                    layers.append(nn.BatchNorm1d(in_features))
-                layers.append(self.activation())
 
         layers.append(nn.Linear(in_features, out_features))
 
@@ -327,8 +320,7 @@ def forward(self, x: torch.FloatTensor) -> torch.FloatTensor:
             # if in_features != out_features
             # -> result = W_shortcut(A(BN(x))) + W_2(~D(A(BN(W_1(A(BN(x))))))
             x = self.start_norm(x)
-            if self.config["use_skip_connection"]:
-                residual = self.shortcut(x)
+            residual = self.shortcut(x)
 
         # TODO make the below code better
         if self.config["use_skip_connection"]:
@@ -337,13 +329,8 @@ def forward(self, x: torch.FloatTensor) -> torch.FloatTensor:
                 x2 = self.shake_shake_layers(x)
                 alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda)
                 x = shake_shake(x1, x2, alpha, beta)
-            else:
+            elif self.config["multi_branch_choice"] == 'shake-drop':
                 x = self.layers(x)
-        else:
-            x = self.layers(x)
-
-        if self.config["use_skip_connection"]:
-            if self.config["multi_branch_choice"] == 'shake-drop':
                 alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda)
                 bl = shake_drop_get_bl(
                     self.block_index,
@@ -353,8 +340,11 @@ def forward(self, x: torch.FloatTensor) -> torch.FloatTensor:
                     x.is_cuda,
                 )
                 x = shake_drop(x, alpha, beta, bl)
+            else:
+                x = self.layers(x)
 
-        if self.config["use_skip_connection"]:
             x = x + residual
+        else:
+            x = self.layers(x)
 
         return x
diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py
@@ -72,6 +72,7 @@ def build_backbone(self, input_shape: Tuple[int, ...]) -> None:
             )
         if self.config['use_batch_norm']:
             layers.append(torch.nn.BatchNorm1d(self.config["num_units_%i" % self.config['num_groups']]))
+        layers.append(_activations[self.config["activation"]]())
         backbone = torch.nn.Sequential(*layers)
         self.backbone = backbone
         return backbone

diff --git a/autoPyTorch/pipeline/components/setup/network_head/no_head.py b/autoPyTorch/pipeline/components/setup/network_head/no_head.py
@@ -23,7 +23,6 @@ def build_head(self, input_shape: Tuple[int, ...], output_shape: Tuple[int, ...]
         layers = []
         in_features = np.prod(input_shape).item()
         out_features = np.prod(output_shape).item()
-        layers.append(_activations[self.config["activation"]]())
         layers.append(nn.Linear(in_features=in_features,
                                 out_features=out_features))
         return nn.Sequential(*layers)

diff --git a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py
@@ -120,7 +120,7 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> torch.utils.data.DataLoader:
         )
 
         if X['val_indices'] is not None:
-            val_dataset = datamanager.get_dataset_for_training(split_id=X['split_id'], train=False)
+            val_dataset = datamanager.get_dataset_for_training(split_id=X['split_id'], train=False, subset=1)
             self.val_data_loader = torch.utils.data.DataLoader(
                 val_dataset,
                 batch_size=min(self.batch_size, len(val_dataset)),

diff --git a/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py b/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py
@@ -24,7 +24,7 @@ class AdversarialTrainer(BaseTrainerComponent):
     def __init__(
             self,
             epsilon: float,
-            weighted_loss: bool = False,
+            weighted_loss: int = 0,
             random_state: Optional[np.random.RandomState] = None,
             use_stochastic_weight_averaging: bool = False,
             use_snapshot_ensemble: bool = False,
@@ -159,8 +159,8 @@ def get_hyperparameter_search_space(
         dataset_properties: Optional[Dict] = None,
         weighted_loss: HyperparameterSearchSpace = HyperparameterSearchSpace(
             hyperparameter="weighted_loss",
-            value_range=[True, False],
-            default_value=True),
+            value_range=[1],
-            value_range=[1],
+            value_range=(1, ),
-            value_range=[1],
+            value_range=(1, ),
+            default_value=1),
         la_steps: HyperparameterSearchSpace = HyperparameterSearchSpace(
             hyperparameter="la_steps",
             value_range=(5, 10),
@@ -226,9 +226,17 @@ def get_hyperparameter_search_space(
                 parent_hyperparameter=parent_hyperparameter
             )
 
+        """
         # TODO, decouple the weighted loss from the trainer
         if dataset_properties is not None:
             if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS:
                 add_hyperparameter(cs, weighted_loss, CategoricalHyperparameter)
+        """
+        # TODO, decouple the weighted loss from the trainer. Uncomment the code above and
+        # remove the code below. Also update the method signature, so the weighted loss
+        # is not a constant.
+        if dataset_properties is not None:
+            if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS:
+                add_hyperparameter(cs, weighted_loss, Constant)
 
         return cs
diff --git a/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py b/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py
@@ -40,22 +40,23 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
         indices = self.random_state.choice(range(1, size), max(1, np.int32(size * self.patch_ratio)),
                                            replace=False)
 
-        if not isinstance(self.numerical_columns, typing.Iterable):
+        """if not isinstance(self.numerical_columns, typing.Iterable):
             raise ValueError("{} requires numerical columns information of {}"
                              "to prepare data got {}.".format(self.__class__.__name__,
                                                               typing.Iterable,
                                                               self.numerical_columns))
         numerical_indices = torch.tensor(self.numerical_columns)
         categorical_indices = torch.tensor([index for index in indices if index not in self.numerical_columns])
-
+    
         # We use an ordinal encoder on the categorical columns of tabular data
         # -1 is the conceptual equivalent to 0 in a image, that does not
         # have color as a feature and hence the network has to learn to deal
         # without this data. For numerical columns we use 0 to cutout the features
         # similar to the effect that setting 0 as a pixel value in an image.
         X[:, categorical_indices.long()] = self.CATEGORICAL_VALUE
         X[:, numerical_indices.long()] = self.NUMERICAL_VALUE
-
+        """
+        X[:, indices] = 0
         lam = 1
         y_a = y
         y_b = y

diff --git a/autoPyTorch/pipeline/components/training/trainer/StandardTrainer.py b/autoPyTorch/pipeline/components/training/trainer/StandardTrainer.py
@@ -7,7 +7,7 @@
 
 class StandardTrainer(BaseTrainerComponent):
     def __init__(self,
-                 weighted_loss: bool = False,
+                 weighted_loss: int = 0,
                  use_stochastic_weight_averaging: bool = False,
                  use_snapshot_ensemble: bool = False,
                  se_lastk: int = 3,
@@ -18,7 +18,7 @@ def __init__(self,
         This class handles the training of a network for a single given epoch.
 
         Args:
-            weighted_loss (bool): whether to use weighted loss
+            weighted_loss (int): whether to use weighted loss
 
         """
         super().__init__(random_state=random_state,

diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py
@@ -175,7 +175,7 @@ class BaseTrainerComponent(autoPyTorchTrainingComponent):
     """
     Base class for training
     Args:
-        weighted_loss (bool, default=False): In case for classification, whether to weight
+        weighted_loss (int, default=0): In case for classification, whether to weight
             the loss function according to the distribution of classes in the target
         use_stochastic_weight_averaging (bool, default=True): whether to use stochastic
             weight averaging. Stochastic weight averaging is a simple average of
@@ -190,7 +190,7 @@ class BaseTrainerComponent(autoPyTorchTrainingComponent):
         random_state:
         **lookahead_config:
     """
-    def __init__(self, weighted_loss: bool = False,
+    def __init__(self, weighted_loss: int = 0,
                  use_stochastic_weight_averaging: bool = True,
                  use_snapshot_ensemble: bool = True,
                  se_lastk: int = 3,
@@ -537,8 +537,8 @@ def get_hyperparameter_search_space(
         dataset_properties: Optional[Dict] = None,
         weighted_loss: HyperparameterSearchSpace = HyperparameterSearchSpace(
             hyperparameter="weighted_loss",
-            value_range=[True, False],
-            default_value=True),
+            value_range=[1],
-            value_range=[1],
+            value_range=(1, ),
-            value_range=[1],
+            value_range=(1, ),
+            default_value=1),
         la_steps: HyperparameterSearchSpace = HyperparameterSearchSpace(
             hyperparameter="la_steps",
             value_range=(5, 10),
@@ -599,9 +599,17 @@ def get_hyperparameter_search_space(
                 parent_hyperparameter=parent_hyperparameter
             )
 
+        """
         # TODO, decouple the weighted loss from the trainer
         if dataset_properties is not None:
             if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS:
                 add_hyperparameter(cs, weighted_loss, CategoricalHyperparameter)
+        """
+        # TODO, decouple the weighted loss from the trainer. Uncomment the code above and
+        # remove the code below. Also update the method signature, so the weighted loss
+        # is not a constant.
+        if dataset_properties is not None:
+            if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS:
+                add_hyperparameter(cs, weighted_loss, Constant)
 
         return cs
diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py
@@ -402,6 +402,7 @@ def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> 'TrainerChoic
                 torch.cuda.empty_cache()
 
         if self.choice.use_stochastic_weight_averaging and self.choice.swa_updated:
+
             # update batch norm statistics
             swa_utils.update_bn(X['train_data_loader'], self.choice.swa_model.double())
             # change model

diff --git a/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py b/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py
@@ -20,7 +20,7 @@
 class CutOut:
     def __init__(self, patch_ratio: float,
                  cutout_prob: float,
-                 weighted_loss: bool = False,
+                 weighted_loss: int = 0,
                  random_state: Optional[np.random.RandomState] = None,
                  use_stochastic_weight_averaging: bool = False,
                  use_snapshot_ensemble: bool = False,
@@ -63,8 +63,8 @@ def get_hyperparameter_search_space(
             dataset_properties: Optional[Dict] = None,
             weighted_loss: HyperparameterSearchSpace = HyperparameterSearchSpace(
                 hyperparameter="weighted_loss",
-                value_range=[True, False],
-                default_value=True),
+                value_range=[1],
-                value_range=[1],
+                value_range=(1, ),
-                value_range=[1],
+                value_range=(1, ),
+                default_value=1),
             la_steps: HyperparameterSearchSpace = HyperparameterSearchSpace(
                 hyperparameter="la_steps",
                 value_range=(5, 10),
@@ -136,9 +136,17 @@ def get_hyperparameter_search_space(
                 parent_hyperparameter=parent_hyperparameter
             )
 
+        """
         # TODO, decouple the weighted loss from the trainer
         if dataset_properties is not None:
             if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS:
                 add_hyperparameter(cs, weighted_loss, CategoricalHyperparameter)
+        """
+        # TODO, decouple the weighted loss from the trainer. Uncomment the code above and
+        # remove the code below. Also update the method signature, so the weighted loss
+        # is not a constant.
+        if dataset_properties is not None:
+            if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS:
+                add_hyperparameter(cs, weighted_loss, Constant)
 
         return cs
diff --git a/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py b/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py
@@ -19,7 +19,7 @@
 
 class MixUp:
     def __init__(self, alpha: float,
-                 weighted_loss: bool = False,
+                 weighted_loss: int = 0,
                  random_state: Optional[np.random.RandomState] = None,
                  use_stochastic_weight_averaging: bool = False,
                  use_snapshot_ensemble: bool = False,
@@ -61,8 +61,8 @@ def get_hyperparameter_search_space(
         dataset_properties: Optional[Dict] = None,
         weighted_loss: HyperparameterSearchSpace = HyperparameterSearchSpace(
             hyperparameter="weighted_loss",
-            value_range=[True, False],
-            default_value=True),
+            value_range=[1],
-            value_range=[1],
+            value_range=(1, ),
-            value_range=[1],
+            value_range=(1, ),
+            default_value=1),
         la_steps: HyperparameterSearchSpace = HyperparameterSearchSpace(
             hyperparameter="la_steps",
             value_range=(5, 10),
@@ -127,9 +127,18 @@ def get_hyperparameter_search_space(
                 la_config_space,
                 parent_hyperparameter=parent_hyperparameter
             )
+
+        """
         # TODO, decouple the weighted loss from the trainer
         if dataset_properties is not None:
             if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS:
                 add_hyperparameter(cs, weighted_loss, CategoricalHyperparameter)
+        """
+        # TODO, decouple the weighted loss from the trainer. Uncomment the code above and
+        # remove the code below. Also update the method signature, so the weighted loss
+        # is not a constant.
+        if dataset_properties is not None:
+            if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS:
+                add_hyperparameter(cs, weighted_loss, Constant)
 
         return cs
diff --git a/autoPyTorch/utils/implementations.py b/autoPyTorch/utils/implementations.py
@@ -35,7 +35,7 @@ def __call__(self, y: Union[np.ndarray, torch.Tensor]) -> np.ndarray:
             weights = (np.ones(y.shape[1]) * weight_per_class) / np.maximum(counts, 1)
         else:
             classes, counts = np.unique(y, axis=0, return_counts=True)
-            classes, counts = classes[::-1], counts[::-1]
+            # classes, counts = classes[::-1], counts[::-1]
             weight_per_class = total_weight / classes.shape[0]
             weights = (np.ones(classes.shape[0]) * weight_per_class) / counts