Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug fixes #249

Merged
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
a7a94e8
Update implementation
ArlindKadra Jun 4, 2021
3b7f559
Coding style fixes
ArlindKadra Jun 7, 2021
11e7021
Implementation update
ArlindKadra Jun 7, 2021
375c055
Style fix
ArlindKadra Jun 7, 2021
3413bc3
Turn weighted loss into a constant again, implementation update
ArlindKadra Jun 8, 2021
d37d4a5
Cocktail branch inconsistencies (#275)
ravinkohli Jul 21, 2021
23466f0
Cocktail fixes time debug (#286)
ravinkohli Oct 20, 2021
00f80cb
Addressing Shuhei's comments
ArlindKadra Oct 20, 2021
88e0228
flake8 problems fix
ArlindKadra Oct 20, 2021
3b6ec03
Update autoPyTorch/api/base_task.py
ArlindKadra Oct 21, 2021
a26edbe
Update autoPyTorch/api/base_task.py
ArlindKadra Oct 21, 2021
73a11c9
Update autoPyTorch/data/tabular_feature_validator.py
ArlindKadra Oct 21, 2021
37e3537
Update autoPyTorch/pipeline/components/setup/network_backbone/utils.py
ArlindKadra Oct 21, 2021
dc5e8a2
Update autoPyTorch/data/tabular_feature_validator.py
ArlindKadra Oct 21, 2021
48b16a3
Update autoPyTorch/utils/implementations.py
ArlindKadra Oct 21, 2021
dab2f76
Allow the number of threads to be given by the user
ArlindKadra Oct 21, 2021
6f0aecb
Removing unnecessary argument and refactoring the attribute.
ArlindKadra Oct 21, 2021
84d7406
Addressing Ravin's comments
ArlindKadra Oct 21, 2021
9f8ebb5
Update autoPyTorch/pipeline/components/setup/network_backbone/utils.py
ArlindKadra Oct 21, 2021
1488978
Update autoPyTorch/pipeline/components/setup/network_backbone/utils.py
ArlindKadra Oct 21, 2021
a044a19
Merge branch 'refactor_development_regularization_cocktails' into coc…
ravinkohli Oct 21, 2021
6c8a55b
add todo for backend and accept changes from shuhei
ravinkohli Oct 21, 2021
e9dfea9
Addressing Shuhei's and Ravin's comments
ArlindKadra Oct 21, 2021
88893a9
Addressing Shuhei's and Ravin's comments, bug fix
ArlindKadra Oct 21, 2021
da6e47c
Update autoPyTorch/pipeline/components/setup/network_backbone/ResNetB…
ArlindKadra Oct 21, 2021
2740052
Update autoPyTorch/pipeline/components/setup/network_backbone/ResNetB…
ArlindKadra Oct 21, 2021
e597951
bug fix
ArlindKadra Oct 21, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions autoPyTorch/datasets/base_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ def create_holdout_val_split(
self.random_state, val_share, self._get_indices(), **kwargs)
return train, val

def get_dataset_for_training(self, split_id: int, train: bool) -> Dataset:
def get_dataset_for_training(self, split_id: int, train: bool, subset: int = 0) -> Dataset:
ravinkohli marked this conversation as resolved.
Show resolved Hide resolved
"""
The above split methods employ the Subset to internally subsample the whole dataset.

Expand All @@ -336,7 +336,7 @@ def get_dataset_for_training(self, split_id: int, train: bool) -> Dataset:
Dataset: the reduced dataset to be used for testing
"""
# Subset creates a dataset. Splits is a (train_indices, test_indices) tuple
return TransformSubset(self, self.splits[split_id][0], train=train)
return TransformSubset(self, self.splits[split_id][subset], train=train)

def replace_data(self, X_train: BaseDatasetInputType,
X_test: Optional[BaseDatasetInputType]) -> 'BaseDataset':
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -261,18 +261,19 @@ def __init__(
# if the shortcut needs a layer we apply batchnorm and activation to the shortcut
# as well (start_norm)
if in_features != out_features:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if in_features != out_features:
if in_features != out_features and self.config["use_skip_connection"]:
self.shortcut = nn.Linear(in_features, out_features)
initial_normalization = list()
if self.config['use_batch_norm']:
initial_normalization.append(
nn.BatchNorm1d(in_features)
)
initial_normalization.append(
self.activation()
)
self.start_norm = nn.Sequential(
*initial_normalization
)

self.shortcut = nn.Linear(in_features, out_features)
initial_normalization = list()
if self.config['use_batch_norm']:
if self.config["use_skip_connection"]:
self.shortcut = nn.Linear(in_features, out_features)
initial_normalization = list()
if self.config['use_batch_norm']:
initial_normalization.append(
nn.BatchNorm1d(in_features)
)
initial_normalization.append(
nn.BatchNorm1d(in_features)
self.activation()
)
self.start_norm = nn.Sequential(
*initial_normalization
)
initial_normalization.append(
self.activation()
)
self.start_norm = nn.Sequential(
*initial_normalization
)

self.block_index = block_index
self.num_blocks = blocks_per_group * self.config["num_groups"]
Expand All @@ -290,14 +291,6 @@ def _build_block(self, in_features: int, out_features: int) -> nn.Module:
if self.config['use_batch_norm']:
layers.append(nn.BatchNorm1d(in_features))
layers.append(self.activation())
else:
# if start norm is not None and skip connection is None
# we will never apply the start_norm for the first layer in the block,
# which is why we should account for this case.
if not self.config['use_skip_connection']:
if self.config['use_batch_norm']:
layers.append(nn.BatchNorm1d(in_features))
layers.append(self.activation())
nabenabe0928 marked this conversation as resolved.
Show resolved Hide resolved

layers.append(nn.Linear(in_features, out_features))

Expand Down Expand Up @@ -327,8 +320,7 @@ def forward(self, x: torch.FloatTensor) -> torch.FloatTensor:
# if in_features != out_features
# -> result = W_shortcut(A(BN(x))) + W_2(~D(A(BN(W_1(A(BN(x))))))
x = self.start_norm(x)
if self.config["use_skip_connection"]:
residual = self.shortcut(x)
residual = self.shortcut(x)
nabenabe0928 marked this conversation as resolved.
Show resolved Hide resolved

# TODO make the below code better
if self.config["use_skip_connection"]:
Expand All @@ -337,13 +329,8 @@ def forward(self, x: torch.FloatTensor) -> torch.FloatTensor:
x2 = self.shake_shake_layers(x)
alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda)
x = shake_shake(x1, x2, alpha, beta)
else:
elif self.config["multi_branch_choice"] == 'shake-drop':
x = self.layers(x)
else:
x = self.layers(x)

if self.config["use_skip_connection"]:
if self.config["multi_branch_choice"] == 'shake-drop':
alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda)
bl = shake_drop_get_bl(
self.block_index,
Expand All @@ -353,8 +340,11 @@ def forward(self, x: torch.FloatTensor) -> torch.FloatTensor:
x.is_cuda,
)
x = shake_drop(x, alpha, beta, bl)
else:
x = self.layers(x)

if self.config["use_skip_connection"]:
ravinkohli marked this conversation as resolved.
Show resolved Hide resolved
x = x + residual
else:
x = self.layers(x)

return x
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def build_backbone(self, input_shape: Tuple[int, ...]) -> None:
)
if self.config['use_batch_norm']:
layers.append(torch.nn.BatchNorm1d(self.config["num_units_%i" % self.config['num_groups']]))
layers.append(_activations[self.config["activation"]]())
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need it, right? (this change originates in this PR, right?)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, makes it consistent with the ResNetBackbone.

backbone = torch.nn.Sequential(*layers)
self.backbone = backbone
return backbone
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ def build_head(self, input_shape: Tuple[int, ...], output_shape: Tuple[int, ...]
layers = []
in_features = np.prod(input_shape).item()
out_features = np.prod(output_shape).item()
layers.append(_activations[self.config["activation"]]())
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why deleted?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I am not mistaken, that is because firstly, we do not add an extra layer here, secondly, this last nonlinearity layer was for the residual block and now it is added there already.

I will double check.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, I confirmed it, it is consistent, the last activation layer for the last layer of the last block is in every network_backbone. Something worth mentioning though is that the MLP and ShapedMLP backbone do not have batch norm inside.

layers.append(nn.Linear(in_features=in_features,
out_features=out_features))
return nn.Sequential(*layers)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> torch.utils.data.DataLoader:
)

if X['val_indices'] is not None:
val_dataset = datamanager.get_dataset_for_training(split_id=X['split_id'], train=False)
val_dataset = datamanager.get_dataset_for_training(split_id=X['split_id'], train=False, subset=1)
self.val_data_loader = torch.utils.data.DataLoader(
val_dataset,
batch_size=min(self.batch_size, len(val_dataset)),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class AdversarialTrainer(BaseTrainerComponent):
def __init__(
self,
epsilon: float,
weighted_loss: bool = False,
weighted_loss: int = 0,
random_state: Optional[np.random.RandomState] = None,
use_stochastic_weight_averaging: bool = False,
use_snapshot_ensemble: bool = False,
Expand Down Expand Up @@ -159,8 +159,8 @@ def get_hyperparameter_search_space(
dataset_properties: Optional[Dict] = None,
weighted_loss: HyperparameterSearchSpace = HyperparameterSearchSpace(
hyperparameter="weighted_loss",
value_range=[True, False],
default_value=True),
value_range=[1],
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
value_range=[1],
value_range=(1, ),

default_value=1),
la_steps: HyperparameterSearchSpace = HyperparameterSearchSpace(
hyperparameter="la_steps",
value_range=(5, 10),
Expand Down Expand Up @@ -226,9 +226,17 @@ def get_hyperparameter_search_space(
parent_hyperparameter=parent_hyperparameter
)

"""
# TODO, decouple the weighted loss from the trainer
if dataset_properties is not None:
if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS:
add_hyperparameter(cs, weighted_loss, CategoricalHyperparameter)
"""
# TODO, decouple the weighted loss from the trainer. Uncomment the code above and
# remove the code below. Also update the method signature, so the weighted loss
# is not a constant.
if dataset_properties is not None:
if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS:
add_hyperparameter(cs, weighted_loss, Constant)
ravinkohli marked this conversation as resolved.
Show resolved Hide resolved

return cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,22 +40,23 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
indices = self.random_state.choice(range(1, size), max(1, np.int32(size * self.patch_ratio)),
replace=False)

if not isinstance(self.numerical_columns, typing.Iterable):
"""if not isinstance(self.numerical_columns, typing.Iterable):
raise ValueError("{} requires numerical columns information of {}"
"to prepare data got {}.".format(self.__class__.__name__,
typing.Iterable,
self.numerical_columns))
numerical_indices = torch.tensor(self.numerical_columns)
ravinkohli marked this conversation as resolved.
Show resolved Hide resolved
categorical_indices = torch.tensor([index for index in indices if index not in self.numerical_columns])

# We use an ordinal encoder on the categorical columns of tabular data
# -1 is the conceptual equivalent to 0 in a image, that does not
# have color as a feature and hence the network has to learn to deal
# without this data. For numerical columns we use 0 to cutout the features
# similar to the effect that setting 0 as a pixel value in an image.
X[:, categorical_indices.long()] = self.CATEGORICAL_VALUE
X[:, numerical_indices.long()] = self.NUMERICAL_VALUE

"""
X[:, indices] = 0
nabenabe0928 marked this conversation as resolved.
Show resolved Hide resolved
lam = 1
y_a = y
y_b = y
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

class StandardTrainer(BaseTrainerComponent):
def __init__(self,
weighted_loss: bool = False,
weighted_loss: int = 0,
use_stochastic_weight_averaging: bool = False,
use_snapshot_ensemble: bool = False,
se_lastk: int = 3,
Expand All @@ -18,7 +18,7 @@ def __init__(self,
This class handles the training of a network for a single given epoch.

Args:
weighted_loss (bool): whether to use weighted loss
weighted_loss (int): whether to use weighted loss

"""
super().__init__(random_state=random_state,
Expand Down
16 changes: 12 additions & 4 deletions autoPyTorch/pipeline/components/training/trainer/base_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ class BaseTrainerComponent(autoPyTorchTrainingComponent):
"""
Base class for training
Args:
weighted_loss (bool, default=False): In case for classification, whether to weight
weighted_loss (int, default=0): In case for classification, whether to weight
nabenabe0928 marked this conversation as resolved.
Show resolved Hide resolved
the loss function according to the distribution of classes in the target
use_stochastic_weight_averaging (bool, default=True): whether to use stochastic
weight averaging. Stochastic weight averaging is a simple average of
Expand All @@ -190,7 +190,7 @@ class BaseTrainerComponent(autoPyTorchTrainingComponent):
random_state:
**lookahead_config:
"""
def __init__(self, weighted_loss: bool = False,
def __init__(self, weighted_loss: int = 0,
use_stochastic_weight_averaging: bool = True,
use_snapshot_ensemble: bool = True,
se_lastk: int = 3,
Expand Down Expand Up @@ -537,8 +537,8 @@ def get_hyperparameter_search_space(
dataset_properties: Optional[Dict] = None,
weighted_loss: HyperparameterSearchSpace = HyperparameterSearchSpace(
hyperparameter="weighted_loss",
value_range=[True, False],
default_value=True),
value_range=[1],
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
value_range=[1],
value_range=(1, ),

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to check, if passing a tuple is what is expected.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it expects an Iterable.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True, but I guess we can be consistent with the other hyperparameter spaces too, so I will do the change.

default_value=1),
ravinkohli marked this conversation as resolved.
Show resolved Hide resolved
la_steps: HyperparameterSearchSpace = HyperparameterSearchSpace(
hyperparameter="la_steps",
value_range=(5, 10),
Expand Down Expand Up @@ -599,9 +599,17 @@ def get_hyperparameter_search_space(
parent_hyperparameter=parent_hyperparameter
)

"""
# TODO, decouple the weighted loss from the trainer
if dataset_properties is not None:
if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS:
add_hyperparameter(cs, weighted_loss, CategoricalHyperparameter)
"""
# TODO, decouple the weighted loss from the trainer. Uncomment the code above and
# remove the code below. Also update the method signature, so the weighted loss
# is not a constant.
if dataset_properties is not None:
if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS:
add_hyperparameter(cs, weighted_loss, Constant)

return cs
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,7 @@ def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> 'TrainerChoic
torch.cuda.empty_cache()

if self.choice.use_stochastic_weight_averaging and self.choice.swa_updated:

# update batch norm statistics
swa_utils.update_bn(X['train_data_loader'], self.choice.swa_model.double())
# change model
Expand Down
14 changes: 11 additions & 3 deletions autoPyTorch/pipeline/components/training/trainer/cutout_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
class CutOut:
def __init__(self, patch_ratio: float,
cutout_prob: float,
weighted_loss: bool = False,
weighted_loss: int = 0,
random_state: Optional[np.random.RandomState] = None,
use_stochastic_weight_averaging: bool = False,
use_snapshot_ensemble: bool = False,
Expand Down Expand Up @@ -63,8 +63,8 @@ def get_hyperparameter_search_space(
dataset_properties: Optional[Dict] = None,
weighted_loss: HyperparameterSearchSpace = HyperparameterSearchSpace(
hyperparameter="weighted_loss",
value_range=[True, False],
default_value=True),
value_range=[1],
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
value_range=[1],
value_range=(1, ),

default_value=1),
la_steps: HyperparameterSearchSpace = HyperparameterSearchSpace(
hyperparameter="la_steps",
value_range=(5, 10),
Expand Down Expand Up @@ -136,9 +136,17 @@ def get_hyperparameter_search_space(
parent_hyperparameter=parent_hyperparameter
)

"""
# TODO, decouple the weighted loss from the trainer
if dataset_properties is not None:
if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS:
add_hyperparameter(cs, weighted_loss, CategoricalHyperparameter)
"""
# TODO, decouple the weighted loss from the trainer. Uncomment the code above and
# remove the code below. Also update the method signature, so the weighted loss
# is not a constant.
if dataset_properties is not None:
if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS:
add_hyperparameter(cs, weighted_loss, Constant)

return cs
15 changes: 12 additions & 3 deletions autoPyTorch/pipeline/components/training/trainer/mixup_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

class MixUp:
def __init__(self, alpha: float,
weighted_loss: bool = False,
weighted_loss: int = 0,
random_state: Optional[np.random.RandomState] = None,
use_stochastic_weight_averaging: bool = False,
use_snapshot_ensemble: bool = False,
Expand Down Expand Up @@ -61,8 +61,8 @@ def get_hyperparameter_search_space(
dataset_properties: Optional[Dict] = None,
weighted_loss: HyperparameterSearchSpace = HyperparameterSearchSpace(
hyperparameter="weighted_loss",
value_range=[True, False],
default_value=True),
value_range=[1],
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
value_range=[1],
value_range=(1, ),

default_value=1),
la_steps: HyperparameterSearchSpace = HyperparameterSearchSpace(
hyperparameter="la_steps",
value_range=(5, 10),
Expand Down Expand Up @@ -127,9 +127,18 @@ def get_hyperparameter_search_space(
la_config_space,
parent_hyperparameter=parent_hyperparameter
)

"""
# TODO, decouple the weighted loss from the trainer
if dataset_properties is not None:
if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS:
add_hyperparameter(cs, weighted_loss, CategoricalHyperparameter)
"""
# TODO, decouple the weighted loss from the trainer. Uncomment the code above and
# remove the code below. Also update the method signature, so the weighted loss
# is not a constant.
if dataset_properties is not None:
if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS:
add_hyperparameter(cs, weighted_loss, Constant)

return cs
2 changes: 1 addition & 1 deletion autoPyTorch/utils/implementations.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def __call__(self, y: Union[np.ndarray, torch.Tensor]) -> np.ndarray:
weights = (np.ones(y.shape[1]) * weight_per_class) / np.maximum(counts, 1)
else:
classes, counts = np.unique(y, axis=0, return_counts=True)
nabenabe0928 marked this conversation as resolved.
Show resolved Hide resolved
classes, counts = classes[::-1], counts[::-1]
# classes, counts = classes[::-1], counts[::-1]
ArlindKadra marked this conversation as resolved.
Show resolved Hide resolved
weight_per_class = total_weight / classes.shape[0]
weights = (np.ones(classes.shape[0]) * weight_per_class) / counts

Expand Down