diff --git a/autoPyTorch/pipeline/base_pipeline.py b/autoPyTorch/pipeline/base_pipeline.py index 90c0f6362..7974359f5 100644 --- a/autoPyTorch/pipeline/base_pipeline.py +++ b/autoPyTorch/pipeline/base_pipeline.py @@ -500,7 +500,7 @@ def get_fit_requirements(self) -> List[FitRequirement]: Returns: List[NamedTuple]: List of FitRequirements """ - fit_requirements = list() # List[FitRequirement] + fit_requirements: List[FitRequirement] = list() for name, step in self.steps: step_requirements = step.get_fit_requirements() if step_requirements: @@ -569,6 +569,7 @@ def get_pipeline_representation(self) -> Dict[str, str]: @staticmethod def get_default_pipeline_options() -> Dict[str, Any]: + return { 'num_run': 0, 'device': 'cpu', @@ -578,5 +579,6 @@ def get_default_pipeline_options() -> Dict[str, Any]: 'torch_num_threads': 1, 'early_stopping': 10, 'use_tensorboard_logger': True, + 'use_pynisher': False, 'metrics_during_training': True } diff --git a/autoPyTorch/pipeline/components/setup/lr_scheduler/CosineAnnealingWarmRestarts.py b/autoPyTorch/pipeline/components/setup/lr_scheduler/CosineAnnealingWarmRestarts.py index e46248b92..46e3fdd26 100644 --- a/autoPyTorch/pipeline/components/setup/lr_scheduler/CosineAnnealingWarmRestarts.py +++ b/autoPyTorch/pipeline/components/setup/lr_scheduler/CosineAnnealingWarmRestarts.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, Optional, Tuple, Union +from typing import Any, Dict, Optional, Union from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import UniformIntegerHyperparameter diff --git a/autoPyTorch/pipeline/components/setup/lr_scheduler/ReduceLROnPlateau.py b/autoPyTorch/pipeline/components/setup/lr_scheduler/ReduceLROnPlateau.py index ed0702796..490d6709f 100644 --- a/autoPyTorch/pipeline/components/setup/lr_scheduler/ReduceLROnPlateau.py +++ b/autoPyTorch/pipeline/components/setup/lr_scheduler/ReduceLROnPlateau.py @@ -1,4 +1,5 @@ -from typing import Any, Dict, Optional, Tuple, Union +from typing import Any, Dict, Optional, Union + from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( CategoricalHyperparameter, diff --git a/autoPyTorch/pipeline/components/setup/network/base_network.py b/autoPyTorch/pipeline/components/setup/network/base_network.py index daba6307d..cb981e131 100644 --- a/autoPyTorch/pipeline/components/setup/network/base_network.py +++ b/autoPyTorch/pipeline/components/setup/network/base_network.py @@ -128,6 +128,7 @@ def predict(self, loader: torch.utils.data.DataLoader) -> torch.Tensor: return Y_snapshot_preds_tensor.mean(dim=0).cpu().numpy() def _predict(self, network: torch.nn.Module, loader: torch.utils.data.DataLoader) -> torch.Tensor: + network.to(self.device) network.float() network.eval() # Batch prediction @@ -136,10 +137,10 @@ def _predict(self, network: torch.nn.Module, loader: torch.utils.data.DataLoader for i, (X_batch, Y_batch) in enumerate(loader): # Predict on batch X_batch = X_batch.float().to(self.device) - Y_batch_pred = network(X_batch).detach().cpu() + Y_batch_pred = network(X_batch) if self.final_activation is not None: Y_batch_pred = self.final_activation(Y_batch_pred) - Y_batch_preds.append(Y_batch_pred) + Y_batch_preds.append(Y_batch_pred.detach().cpu()) return torch.cat(Y_batch_preds, 0) diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/MLPBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/MLPBackbone.py index f3fb4d7a2..d2c804d32 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/MLPBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/MLPBackbone.py @@ -89,13 +89,13 @@ def get_hyperparameter_search_space( num_units: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="num_units", value_range=(10, 1024), default_value=200, + log=True ), dropout: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="dropout", value_range=(0, 0.8), default_value=0.5, ), ) -> ConfigurationSpace: - cs = ConfigurationSpace() # The number of hidden layers the network will have. @@ -116,6 +116,7 @@ def get_hyperparameter_search_space( default_value=num_units.default_value, log=num_units.log) n_units_hp = get_hyperparameter(n_units_search_space, UniformIntegerHyperparameter) + cs.add_hyperparameter(n_units_hp) if i > int(min_mlp_layers): diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py index 8ee3ed19b..cd8a07525 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py @@ -113,12 +113,14 @@ def get_hyperparameter_search_space( default_value=True, ), multi_branch_choice: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="mb_choice", - value_range=('None', 'shake-shake', 'shake-drop'), + value_range=('None', 'shake-shake', + 'shake-drop'), default_value='shake-drop', ), num_units: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="num_units", value_range=(10, 1024), default_value=200, + log=True ), activation: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="activation", value_range=tuple(_activations.keys()), diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedMLPBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedMLPBackbone.py index 46574642c..194f018aa 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedMLPBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedMLPBackbone.py @@ -96,11 +96,11 @@ def get_hyperparameter_search_space( max_units: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="max_units", value_range=(10, 1024), default_value=200, - ), + log=True), output_dim: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="output_dim", value_range=(10, 1024), default_value=200, - ), + log=True), mlp_shape: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="mlp_shape", value_range=('funnel', 'long_funnel', 'diamond', 'hexagon', @@ -114,7 +114,6 @@ def get_hyperparameter_search_space( ), ) -> ConfigurationSpace: - cs = ConfigurationSpace() # The number of groups that will compose the resnet. That is, diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py index 59cd45d5d..217253f91 100644 --- a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py +++ b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py @@ -98,6 +98,7 @@ def get_hyperparameter_search_space( # type: ignore[override] output_dim: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="output_dim", value_range=(10, 1024), default_value=200, + log=True ), num_groups: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="num_groups", value_range=(1, 15), @@ -116,12 +117,15 @@ def get_hyperparameter_search_space( # type: ignore[override] default_value=True, ), multi_branch_choice: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="mb_choice", - value_range=('None', 'shake-shake', 'shake-drop'), + value_range=('None', 'shake-shake', + 'shake-drop'), default_value='shake-drop', ), max_units: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="max_units", value_range=(10, 1024), - default_value=200), + default_value=200, + log=True + ), activation: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="activation", value_range=tuple(_activations.keys()), default_value=list(_activations.keys())[0]), @@ -154,6 +158,7 @@ def get_hyperparameter_search_space( # type: ignore[override] use_dropout = get_hyperparameter(use_dropout, CategoricalHyperparameter) max_dropout = get_hyperparameter(max_dropout, UniformFloatHyperparameter) + cs.add_hyperparameters([use_dropout, max_dropout]) cs.add_condition(CS.EqualsCondition(max_dropout, use_dropout, True)) use_sc = get_hyperparameter(use_skip_connection, CategoricalHyperparameter) diff --git a/autoPyTorch/pipeline/components/setup/optimizer/AdamOptimizer.py b/autoPyTorch/pipeline/components/setup/optimizer/AdamOptimizer.py index 2fef66aac..ab722940e 100644 --- a/autoPyTorch/pipeline/components/setup/optimizer/AdamOptimizer.py +++ b/autoPyTorch/pipeline/components/setup/optimizer/AdamOptimizer.py @@ -93,12 +93,13 @@ def get_hyperparameter_search_space( value_range=(0.9, 0.9999), default_value=0.9), use_weight_decay: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="use_weight_decay", - value_range=(True, False), - default_value=True, - ), + value_range=(True, False), + default_value=True, + ), weight_decay: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="weight_decay", - value_range=(0.0, 0.1), - default_value=0.0), + value_range=(1E-7, 0.1), + default_value=1E-4, + log=True), ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autoPyTorch/pipeline/components/setup/optimizer/AdamWOptimizer.py b/autoPyTorch/pipeline/components/setup/optimizer/AdamWOptimizer.py index f7df85756..4ac43bc87 100644 --- a/autoPyTorch/pipeline/components/setup/optimizer/AdamWOptimizer.py +++ b/autoPyTorch/pipeline/components/setup/optimizer/AdamWOptimizer.py @@ -97,8 +97,9 @@ def get_hyperparameter_search_space( default_value=True, ), weight_decay: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="weight_decay", - value_range=(0.0, 0.1), - default_value=0.0), + value_range=(1E-7, 0.1), + default_value=1E-4, + log=True), ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autoPyTorch/pipeline/components/setup/optimizer/RMSpropOptimizer.py b/autoPyTorch/pipeline/components/setup/optimizer/RMSpropOptimizer.py index d1dc6f077..a718ff1bd 100644 --- a/autoPyTorch/pipeline/components/setup/optimizer/RMSpropOptimizer.py +++ b/autoPyTorch/pipeline/components/setup/optimizer/RMSpropOptimizer.py @@ -97,8 +97,9 @@ def get_hyperparameter_search_space( default_value=True, ), weight_decay: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="weight_decay", - value_range=(0.0, 0.1), - default_value=0.0), + value_range=(1E-7, 0.1), + default_value=1E-4, + log=True), momentum: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="momentum", value_range=(0.0, 0.99), default_value=0.0), @@ -109,7 +110,6 @@ def get_hyperparameter_search_space( add_hyperparameter(cs, lr, UniformFloatHyperparameter) add_hyperparameter(cs, alpha, UniformFloatHyperparameter) add_hyperparameter(cs, momentum, UniformFloatHyperparameter) - weight_decay = get_hyperparameter(weight_decay, UniformFloatHyperparameter) use_weight_decay = get_hyperparameter(use_weight_decay, CategoricalHyperparameter) cs.add_hyperparameters([use_weight_decay, weight_decay]) diff --git a/autoPyTorch/pipeline/components/setup/optimizer/SGDOptimizer.py b/autoPyTorch/pipeline/components/setup/optimizer/SGDOptimizer.py index 492bdf97e..9b240f970 100644 --- a/autoPyTorch/pipeline/components/setup/optimizer/SGDOptimizer.py +++ b/autoPyTorch/pipeline/components/setup/optimizer/SGDOptimizer.py @@ -88,8 +88,9 @@ def get_hyperparameter_search_space( default_value=True, ), weight_decay: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="weight_decay", - value_range=(0.0, 0.1), - default_value=0.0), + value_range=(1E-7, 0.1), + default_value=1E-4, + log=True), momentum: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="momentum", value_range=(0.0, 0.99), default_value=0.0), diff --git a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py index f39194477..7a2e6f746 100644 --- a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py +++ b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py @@ -262,10 +262,12 @@ def get_hyperparameter_search_space( dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None, batch_size: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="batch_size", value_range=(32, 320), - default_value=64) + default_value=64, + log=True) ) -> ConfigurationSpace: cs = ConfigurationSpace() add_hyperparameter(cs, batch_size, UniformIntegerHyperparameter) + return cs def __str__(self) -> str: diff --git a/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py b/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py index 166c85481..e8344844d 100644 --- a/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py +++ b/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py @@ -1,5 +1,5 @@ -import typing from copy import deepcopy +from typing import Any, Callable, Dict, Optional, Tuple, Union from ConfigSpace.conditions import EqualsCondition from ConfigSpace.configuration_space import ConfigurationSpace @@ -17,6 +17,7 @@ from autoPyTorch.constants import CLASSIFICATION_TASKS, STRING_TO_TASK_TYPES from autoPyTorch.pipeline.components.training.trainer.base_trainer import BaseTrainerComponent from autoPyTorch.pipeline.components.training.trainer.utils import Lookahead +from autoPyTorch.utils.common import HyperparameterSearchSpace, add_hyperparameter, get_hyperparameter class AdversarialTrainer(BaseTrainerComponent): @@ -24,12 +25,12 @@ def __init__( self, epsilon: float, weighted_loss: bool = False, - random_state: typing.Optional[np.random.RandomState] = None, + random_state: Optional[np.random.RandomState] = None, use_stochastic_weight_averaging: bool = False, use_snapshot_ensemble: bool = False, se_lastk: int = 3, use_lookahead_optimizer: bool = True, - **lookahead_config: typing.Any + **lookahead_config: Any ): """ This class handles the training of a network for a single given epoch. @@ -48,7 +49,7 @@ def __init__( self.epsilon = epsilon def data_preparation(self, X: np.ndarray, y: np.ndarray, - ) -> typing.Tuple[typing.Tuple[np.ndarray, np.ndarray], typing.Dict[str, np.ndarray]]: + ) -> Tuple[Tuple[np.ndarray, np.ndarray], Dict[str, np.ndarray]]: """Generate adversarial examples from the original inputs. Args: @@ -63,7 +64,7 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray, return (X, X_adversarial), {'y_a': y} def criterion_preparation(self, y_a: np.ndarray, y_b: np.ndarray = None, lam: float = 1.0 - ) -> typing.Callable: + ) -> Callable: # Initial implementation, consider the adversarial loss and the normal network loss # equally. return lambda criterion, pred, adversarial_pred: 0.5 * criterion(pred, y_a) + \ @@ -142,8 +143,8 @@ def fgsm_attack( return adv_data @staticmethod - def get_properties(dataset_properties: typing.Optional[typing.Dict[str, typing.Any]] = None - ) -> typing.Dict[str, typing.Union[str, bool]]: + def get_properties(dataset_properties: Optional[Dict[str, Any]] = None + ) -> Dict[str, Union[str, bool]]: return { 'shortname': 'AdversarialTrainer', @@ -155,53 +156,67 @@ def get_properties(dataset_properties: typing.Optional[typing.Dict[str, typing.A @staticmethod def get_hyperparameter_search_space( - dataset_properties: typing.Optional[typing.Dict] = None, - weighted_loss: typing.Tuple[typing.Tuple, bool] = ((True, False), True), - use_stochastic_weight_averaging: typing.Tuple[typing.Tuple, bool] = ((True, False), True), - use_snapshot_ensemble: typing.Tuple[typing.Tuple, bool] = ((True, False), True), - se_lastk: typing.Tuple[typing.Tuple, int] = ((3,), 3), - use_lookahead_optimizer: typing.Tuple[typing.Tuple, bool] = ((True, False), True), - la_steps: typing.Tuple[typing.Tuple, int, bool] = ((5, 10), 6, False), - la_alpha: typing.Tuple[typing.Tuple, float, bool] = ((0.5, 0.8), 0.6, False), - epsilon: typing.Tuple[typing.Tuple[float, float], float] = ((0.05, 0.2), 0.2), + dataset_properties: Optional[Dict] = None, + weighted_loss: HyperparameterSearchSpace = HyperparameterSearchSpace( + hyperparameter="weighted_loss", + value_range=(True, False), + default_value=True), + la_steps: HyperparameterSearchSpace = HyperparameterSearchSpace( + hyperparameter="la_steps", + value_range=(5, 10), + default_value=6, + log=False), + la_alpha: HyperparameterSearchSpace = HyperparameterSearchSpace( + hyperparameter="la_alpha", + value_range=(0.5, 0.8), + default_value=0.6, + log=False), + use_lookahead_optimizer: HyperparameterSearchSpace = HyperparameterSearchSpace( + hyperparameter="use_lookahead_optimizer", + value_range=(True, False), + default_value=True), + use_stochastic_weight_averaging: HyperparameterSearchSpace = HyperparameterSearchSpace( + hyperparameter="use_stochastic_weight_averaging", + value_range=(True, False), + default_value=True), + use_snapshot_ensemble: HyperparameterSearchSpace = HyperparameterSearchSpace( + hyperparameter="use_snapshot_ensemble", + value_range=(True, False), + default_value=True), + se_lastk: HyperparameterSearchSpace = HyperparameterSearchSpace( + hyperparameter="se_lastk", + value_range=(3,), + default_value=3), + epsilon: HyperparameterSearchSpace = HyperparameterSearchSpace( + hyperparameter="epsilon", + value_range=(0.05, 0.2), + default_value=0.2), ) -> ConfigurationSpace: - epsilon = UniformFloatHyperparameter( - "epsilon", epsilon[0][0], epsilon[0][1], default_value=epsilon[1]) - weighted_loss = CategoricalHyperparameter("weighted_loss", choices=weighted_loss[0], - default_value=weighted_loss[1]) - - use_swa = CategoricalHyperparameter("use_stochastic_weight_averaging", - choices=use_stochastic_weight_averaging[0], - default_value=use_stochastic_weight_averaging[1]) - use_se = CategoricalHyperparameter("use_snapshot_ensemble", - choices=use_snapshot_ensemble[0], - default_value=use_snapshot_ensemble[1]) - - # Note, this is not easy to be considered as a hyperparameter. - # When used with cyclic learning rates, it depends on the number - # of restarts. - se_lastk = Constant('se_lastk', se_lastk[1]) - - use_lookahead_optimizer = CategoricalHyperparameter("use_lookahead_optimizer", - choices=use_lookahead_optimizer[0], - default_value=use_lookahead_optimizer[1]) - - config_space = Lookahead.get_hyperparameter_search_space(la_steps=la_steps, - la_alpha=la_alpha) - parent_hyperparameter = {'parent': use_lookahead_optimizer, 'value': True} - cs = ConfigurationSpace() - cs.add_hyperparameters([use_swa, use_se, se_lastk, use_lookahead_optimizer]) + + add_hyperparameter(cs, epsilon, UniformFloatHyperparameter) + + get_hyperparameter(se_lastk, Constant) + add_hyperparameter(cs, use_stochastic_weight_averaging, CategoricalHyperparameter) + use_snapshot_ensemble = get_hyperparameter(use_snapshot_ensemble, CategoricalHyperparameter) + se_lastk = get_hyperparameter(se_lastk, Constant) + cs.add_hyperparameters([use_snapshot_ensemble, se_lastk]) + cond = EqualsCondition(se_lastk, use_snapshot_ensemble, True) + cs.add_condition(cond) + + use_lookahead_optimizer = get_hyperparameter(use_lookahead_optimizer, CategoricalHyperparameter) + cs.add_hyperparameter(use_lookahead_optimizer) + la_config_space = Lookahead.get_hyperparameter_search_space(la_steps=la_steps, + la_alpha=la_alpha) + parent_hyperparameter = {'parent': use_lookahead_optimizer, 'value': True} cs.add_configuration_space( Lookahead.__name__, - config_space, + la_config_space, parent_hyperparameter=parent_hyperparameter ) - cond = EqualsCondition(se_lastk, use_se, True) - cs.add_condition(cond) - cs.add_hyperparameters([epsilon]) if dataset_properties is not None: if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS: - cs.add_hyperparameters([weighted_loss]) + add_hyperparameter(cs, weighted_loss, CategoricalHyperparameter) + return cs diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py index f801ed1e2..9ea9b9756 100644 --- a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py +++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py @@ -29,7 +29,7 @@ from autoPyTorch.pipeline.components.training.trainer.utils import Lookahead from autoPyTorch.pipeline.components.training.metrics.utils import calculate_score from autoPyTorch.pipeline.components.training.trainer.utils import Lookahead, swa_average_function -from autoPyTorch.utils.common import FitRequirement +from autoPyTorch.utils.common import FitRequirement, HyperparameterSearchSpace, add_hyperparameter, get_hyperparameter from autoPyTorch.utils.implementations import get_loss_weight_strategy @@ -570,49 +570,61 @@ def criterion_preparation(self, y_a: torch.Tensor, y_b: torch.Tensor = None, lam raise NotImplementedError() @staticmethod - def get_hyperparameter_search_space(dataset_properties: Optional[Dict] = None, - weighted_loss: Tuple[Tuple, bool] = ((True, False), True), - use_stochastic_weight_averaging: Tuple[Tuple, bool] = ((True, False), True), - use_snapshot_ensemble: Tuple[Tuple, bool] = ((True, False), True), - se_lastk: Tuple[Tuple, int] = ((3,), 3), - use_lookahead_optimizer: Tuple[Tuple, bool] = ((True, False), True), - la_steps: Tuple[Tuple, int, bool] = ((5, 10), 6, False), - la_alpha: Tuple[Tuple, float, bool] = ((0.5, 0.8), 0.6, False), - ) -> ConfigurationSpace: - weighted_loss = CategoricalHyperparameter("weighted_loss", choices=weighted_loss[0], - default_value=weighted_loss[1]) - use_swa = CategoricalHyperparameter("use_stochastic_weight_averaging", - choices=use_stochastic_weight_averaging[0], - default_value=use_stochastic_weight_averaging[1]) - use_se = CategoricalHyperparameter("use_snapshot_ensemble", - choices=use_snapshot_ensemble[0], - default_value=use_snapshot_ensemble[1]) - - # Note, this is not easy to be considered as a hyperparameter. - # When used with cyclic learning rates, it depends on the number - # of restarts. - se_lastk = Constant('se_lastk', se_lastk[1]) - - use_lookahead_optimizer = CategoricalHyperparameter("use_lookahead_optimizer", - choices=use_lookahead_optimizer[0], - default_value=use_lookahead_optimizer[1]) - - config_space = Lookahead.get_hyperparameter_search_space(la_steps=la_steps, - la_alpha=la_alpha) - parent_hyperparameter = {'parent': use_lookahead_optimizer, 'value': True} - + def get_hyperparameter_search_space( + dataset_properties: Optional[Dict] = None, + weighted_loss: HyperparameterSearchSpace = HyperparameterSearchSpace( + hyperparameter="weighted_loss", + value_range=(True, False), + default_value=True), + la_steps: HyperparameterSearchSpace = HyperparameterSearchSpace( + hyperparameter="la_steps", + value_range=(5, 10), + default_value=6, + log=False), + la_alpha: HyperparameterSearchSpace = HyperparameterSearchSpace( + hyperparameter="la_alpha", + value_range=(0.5, 0.8), + default_value=0.6, + log=False), + use_lookahead_optimizer: HyperparameterSearchSpace = HyperparameterSearchSpace( + hyperparameter="use_lookahead_optimizer", + value_range=(True, False), + default_value=True), + use_stochastic_weight_averaging: HyperparameterSearchSpace = HyperparameterSearchSpace( + hyperparameter="use_stochastic_weight_averaging", + value_range=(True, False), + default_value=True), + use_snapshot_ensemble: HyperparameterSearchSpace = HyperparameterSearchSpace( + hyperparameter="use_snapshot_ensemble", + value_range=(True, False), + default_value=True), + se_lastk: HyperparameterSearchSpace = HyperparameterSearchSpace( + hyperparameter="se_lastk", + value_range=(3,), + default_value=3), + ) -> ConfigurationSpace: cs = ConfigurationSpace() - cs.add_hyperparameters([use_swa, use_se, se_lastk, use_lookahead_optimizer]) + + add_hyperparameter(cs, use_stochastic_weight_averaging, CategoricalHyperparameter) + use_snapshot_ensemble = get_hyperparameter(use_snapshot_ensemble, CategoricalHyperparameter) + se_lastk = get_hyperparameter(se_lastk, Constant) + cs.add_hyperparameters([use_snapshot_ensemble, se_lastk]) + cond = EqualsCondition(se_lastk, use_snapshot_ensemble, True) + cs.add_condition(cond) + + use_lookahead_optimizer = get_hyperparameter(use_lookahead_optimizer, CategoricalHyperparameter) + cs.add_hyperparameter(use_lookahead_optimizer) + la_config_space = Lookahead.get_hyperparameter_search_space(la_steps=la_steps, + la_alpha=la_alpha) + parent_hyperparameter = {'parent': use_lookahead_optimizer, 'value': True} cs.add_configuration_space( Lookahead.__name__, - config_space, + la_config_space, parent_hyperparameter=parent_hyperparameter ) - cond = EqualsCondition(se_lastk, use_se, True) - cs.add_condition(cond) if dataset_properties is not None: if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS: - cs.add_hyperparameters([weighted_loss]) + add_hyperparameter(cs, weighted_loss, CategoricalHyperparameter) return cs diff --git a/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py b/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py index e5ef2ee1d..74cc9f935 100644 --- a/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py +++ b/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py @@ -58,14 +58,6 @@ def get_hyperparameter_search_space( hyperparameter="weighted_loss", value_range=(True, False), default_value=True), - patch_ratio: HyperparameterSearchSpace = HyperparameterSearchSpace( - hyperparameter="patch_ratio", - value_range=(0, 1), - default_value=0.2), - cutout_prob: HyperparameterSearchSpace = HyperparameterSearchSpace( - hyperparameter="cutout_prob", - value_range=(0, 1), - default_value=0.2), la_steps: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="la_steps", value_range=(5, 10), @@ -92,20 +84,29 @@ def get_hyperparameter_search_space( hyperparameter="se_lastk", value_range=(3, ), default_value=3), + patch_ratio: HyperparameterSearchSpace = HyperparameterSearchSpace( + hyperparameter="patch_ratio", + value_range=(0, 1), + default_value=0.2), + cutout_prob: HyperparameterSearchSpace = HyperparameterSearchSpace( + hyperparameter="cutout_prob", + value_range=(0, 1), + default_value=0.2), ) -> ConfigurationSpace: cs = ConfigurationSpace() add_hyperparameter(cs, patch_ratio, UniformFloatHyperparameter) add_hyperparameter(cs, cutout_prob, UniformFloatHyperparameter) - add_hyperparameter(cs, se_lastk, Constant) add_hyperparameter(cs, use_stochastic_weight_averaging, CategoricalHyperparameter) - use_snapshot_ensemble = get_hyperparameter(cs, use_snapshot_ensemble, CategoricalHyperparameter) - cs.add_hyperparameter(use_snapshot_ensemble) + use_snapshot_ensemble = get_hyperparameter(use_snapshot_ensemble, CategoricalHyperparameter) + se_lastk = get_hyperparameter(se_lastk, Constant) + cs.add_hyperparameters([use_snapshot_ensemble, se_lastk]) cond = EqualsCondition(se_lastk, use_snapshot_ensemble, True) cs.add_condition(cond) - add_hyperparameter(cs, use_lookahead_optimizer, CategoricalHyperparameter) + use_lookahead_optimizer = get_hyperparameter(use_lookahead_optimizer, CategoricalHyperparameter) + cs.add_hyperparameter(use_lookahead_optimizer) la_config_space = Lookahead.get_hyperparameter_search_space(la_steps=la_steps, la_alpha=la_alpha) parent_hyperparameter = {'parent': use_lookahead_optimizer, 'value': True} diff --git a/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py b/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py index 5b28b756f..f85474495 100644 --- a/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py +++ b/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py @@ -55,9 +55,6 @@ def get_hyperparameter_search_space( weighted_loss: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="weighted_loss", value_range=(True, False), default_value=True), - alpha: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="alpha", - value_range=(0, 1), - default_value=0.2), la_steps: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="la_steps", value_range=(5, 10), @@ -84,18 +81,23 @@ def get_hyperparameter_search_space( hyperparameter="se_lastk", value_range=(3,), default_value=3), + alpha: HyperparameterSearchSpace = HyperparameterSearchSpace( + hyperparameter="alpha", + value_range=(0, 1), + default_value=0.2), ) -> ConfigurationSpace: cs = ConfigurationSpace() add_hyperparameter(cs, alpha, UniformFloatHyperparameter) - add_hyperparameter(cs, se_lastk, Constant) add_hyperparameter(cs, use_stochastic_weight_averaging, CategoricalHyperparameter) - use_snapshot_ensemble = get_hyperparameter(cs, use_snapshot_ensemble, CategoricalHyperparameter) - cs.add_hyperparameter(use_snapshot_ensemble) + use_snapshot_ensemble = get_hyperparameter(use_snapshot_ensemble, CategoricalHyperparameter) + se_lastk = get_hyperparameter(se_lastk, Constant) + cs.add_hyperparameters([use_snapshot_ensemble, se_lastk]) cond = EqualsCondition(se_lastk, use_snapshot_ensemble, True) cs.add_condition(cond) - add_hyperparameter(cs, use_lookahead_optimizer, CategoricalHyperparameter) + use_lookahead_optimizer = get_hyperparameter(use_lookahead_optimizer, CategoricalHyperparameter) + cs.add_hyperparameter(use_lookahead_optimizer) la_config_space = Lookahead.get_hyperparameter_search_space(la_steps=la_steps, la_alpha=la_alpha) parent_hyperparameter = {'parent': use_lookahead_optimizer, 'value': True} diff --git a/test/conftest.py b/test/conftest.py index 604d8f00e..da0b8f7b7 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -299,6 +299,7 @@ def get_fit_dictionary(X, y, validator, backend): 'use_tensorboard_logger': True, 'metrics_during_training': True, 'split_id': 0, + 'use_pynisher': False, 'backend': backend, 'logger_port': logging.handlers.DEFAULT_TCP_LOGGING_PORT, }