diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py index 531125bff..80d8bd51e 100644 --- a/autoPyTorch/api/base_task.py +++ b/autoPyTorch/api/base_task.py @@ -40,7 +40,12 @@ ) from autoPyTorch.data.base_validator import BaseInputValidator from autoPyTorch.datasets.base_dataset import BaseDataset, BaseDatasetPropertiesType -from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes +from autoPyTorch.datasets.resampling_strategy import ( + CrossValTypes, + HoldoutValTypes, + NoResamplingStrategyTypes, + ResamplingStrategies, +) from autoPyTorch.ensemble.ensemble_builder import EnsembleBuilderManager from autoPyTorch.ensemble.singlebest_ensemble import SingleBest from autoPyTorch.evaluation.abstract_evaluator import fit_and_suppress_warnings @@ -145,6 +150,13 @@ class BaseTask(ABC): name and Value is an Iterable of the names of the components to exclude. All except these components will be present in the search space. + resampling_strategy resampling_strategy (RESAMPLING_STRATEGIES), + (default=HoldoutValTypes.holdout_validation): + strategy to split the training data. + resampling_strategy_args (Optional[Dict[str, Any]]): arguments + required for the chosen resampling strategy. If None, uses + the default values provided in DEFAULT_RESAMPLING_PARAMETERS + in ```datasets/resampling_strategy.py```. search_space_updates (Optional[HyperparameterSearchSpaceUpdates]): Search space updates that can be used to modify the search space of particular components or choice modules of the pipeline @@ -166,11 +178,15 @@ def __init__( include_components: Optional[Dict[str, Any]] = None, exclude_components: Optional[Dict[str, Any]] = None, backend: Optional[Backend] = None, - resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation, + resampling_strategy: ResamplingStrategies = HoldoutValTypes.holdout_validation, resampling_strategy_args: Optional[Dict[str, Any]] = None, search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None, task_type: Optional[str] = None ) -> None: + + if isinstance(resampling_strategy, NoResamplingStrategyTypes) and ensemble_size != 0: + raise ValueError("`NoResamplingStrategy` cannot be used for ensemble construction") + self.seed = seed self.n_jobs = n_jobs self.n_threads = n_threads @@ -280,7 +296,7 @@ def _get_dataset_input_validator( y_train: Union[List, pd.DataFrame, np.ndarray], X_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None, y_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None, - resampling_strategy: Optional[Union[CrossValTypes, HoldoutValTypes]] = None, + resampling_strategy: Optional[ResamplingStrategies] = None, resampling_strategy_args: Optional[Dict[str, Any]] = None, dataset_name: Optional[str] = None, ) -> Tuple[BaseDataset, BaseInputValidator]: @@ -298,7 +314,7 @@ def _get_dataset_input_validator( Testing feature set y_test (Optional[Union[List, pd.DataFrame, np.ndarray]]): Testing target set - resampling_strategy (Optional[Union[CrossValTypes, HoldoutValTypes]]): + resampling_strategy (Optional[RESAMPLING_STRATEGIES]): Strategy to split the training data. if None, uses HoldoutValTypes.holdout_validation. resampling_strategy_args (Optional[Dict[str, Any]]): @@ -322,7 +338,7 @@ def get_dataset( y_train: Union[List, pd.DataFrame, np.ndarray], X_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None, y_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None, - resampling_strategy: Optional[Union[CrossValTypes, HoldoutValTypes]] = None, + resampling_strategy: Optional[ResamplingStrategies] = None, resampling_strategy_args: Optional[Dict[str, Any]] = None, dataset_name: Optional[str] = None, ) -> BaseDataset: @@ -338,7 +354,7 @@ def get_dataset( Testing feature set y_test (Optional[Union[List, pd.DataFrame, np.ndarray]]): Testing target set - resampling_strategy (Optional[Union[CrossValTypes, HoldoutValTypes]]): + resampling_strategy (Optional[RESAMPLING_STRATEGIES]): Strategy to split the training data. if None, uses HoldoutValTypes.holdout_validation. resampling_strategy_args (Optional[Dict[str, Any]]): @@ -1360,7 +1376,7 @@ def fit_pipeline( X_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None, y_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None, dataset_name: Optional[str] = None, - resampling_strategy: Optional[Union[HoldoutValTypes, CrossValTypes]] = None, + resampling_strategy: Optional[Union[HoldoutValTypes, CrossValTypes, NoResamplingStrategyTypes]] = None, resampling_strategy_args: Optional[Dict[str, Any]] = None, run_time_limit_secs: int = 60, memory_limit: Optional[int] = None, @@ -1395,7 +1411,7 @@ def fit_pipeline( be provided to track the generalization performance of each stage. dataset_name (Optional[str]): Name of the dataset, if None, random value is used. - resampling_strategy (Optional[Union[CrossValTypes, HoldoutValTypes]]): + resampling_strategy (Optional[RESAMPLING_STRATEGIES]): Strategy to split the training data. if None, uses HoldoutValTypes.holdout_validation. resampling_strategy_args (Optional[Dict[str, Any]]): @@ -1657,7 +1673,7 @@ def predict( # Mypy assert assert self.ensemble_ is not None, "Load models should error out if no ensemble" - if isinstance(self.resampling_strategy, HoldoutValTypes): + if isinstance(self.resampling_strategy, (HoldoutValTypes, NoResamplingStrategyTypes)): models = self.models_ elif isinstance(self.resampling_strategy, CrossValTypes): models = self.cv_models_ diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py index aeb69277c..03519bef8 100644 --- a/autoPyTorch/api/tabular_classification.py +++ b/autoPyTorch/api/tabular_classification.py @@ -13,8 +13,8 @@ from autoPyTorch.data.tabular_validator import TabularInputValidator from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType from autoPyTorch.datasets.resampling_strategy import ( - CrossValTypes, HoldoutValTypes, + ResamplingStrategies, ) from autoPyTorch.datasets.tabular_dataset import TabularDataset from autoPyTorch.evaluation.utils import DisableFileOutputParameters @@ -64,8 +64,15 @@ class TabularClassificationTask(BaseTask): name and Value is an Iterable of the names of the components to exclude. All except these components will be present in the search space. + resampling_strategy resampling_strategy (RESAMPLING_STRATEGIES), + (default=HoldoutValTypes.holdout_validation): + strategy to split the training data. + resampling_strategy_args (Optional[Dict[str, Any]]): arguments + required for the chosen resampling strategy. If None, uses + the default values provided in DEFAULT_RESAMPLING_PARAMETERS + in ```datasets/resampling_strategy.py```. search_space_updates (Optional[HyperparameterSearchSpaceUpdates]): - search space updates that can be used to modify the search + Search space updates that can be used to modify the search space of particular components or choice modules of the pipeline """ def __init__( @@ -83,7 +90,7 @@ def __init__( delete_output_folder_after_terminate: bool = True, include_components: Optional[Dict[str, Any]] = None, exclude_components: Optional[Dict[str, Any]] = None, - resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation, + resampling_strategy: ResamplingStrategies = HoldoutValTypes.holdout_validation, resampling_strategy_args: Optional[Dict[str, Any]] = None, backend: Optional[Backend] = None, search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None @@ -153,7 +160,7 @@ def _get_dataset_input_validator( y_train: Union[List, pd.DataFrame, np.ndarray], X_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None, y_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None, - resampling_strategy: Optional[Union[CrossValTypes, HoldoutValTypes]] = None, + resampling_strategy: Optional[ResamplingStrategies] = None, resampling_strategy_args: Optional[Dict[str, Any]] = None, dataset_name: Optional[str] = None, ) -> Tuple[TabularDataset, TabularInputValidator]: @@ -170,7 +177,7 @@ def _get_dataset_input_validator( Testing feature set y_test (Optional[Union[List, pd.DataFrame, np.ndarray]]): Testing target set - resampling_strategy (Optional[Union[CrossValTypes, HoldoutValTypes]]): + resampling_strategy (Optional[RESAMPLING_STRATEGIES]): Strategy to split the training data. if None, uses HoldoutValTypes.holdout_validation. resampling_strategy_args (Optional[Dict[str, Any]]): diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py index f429b210c..8c0637e39 100644 --- a/autoPyTorch/api/tabular_regression.py +++ b/autoPyTorch/api/tabular_regression.py @@ -13,8 +13,8 @@ from autoPyTorch.data.tabular_validator import TabularInputValidator from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType from autoPyTorch.datasets.resampling_strategy import ( - CrossValTypes, HoldoutValTypes, + ResamplingStrategies, ) from autoPyTorch.datasets.tabular_dataset import TabularDataset from autoPyTorch.evaluation.utils import DisableFileOutputParameters @@ -64,8 +64,15 @@ class TabularRegressionTask(BaseTask): name and Value is an Iterable of the names of the components to exclude. All except these components will be present in the search space. + resampling_strategy resampling_strategy (RESAMPLING_STRATEGIES), + (default=HoldoutValTypes.holdout_validation): + strategy to split the training data. + resampling_strategy_args (Optional[Dict[str, Any]]): arguments + required for the chosen resampling strategy. If None, uses + the default values provided in DEFAULT_RESAMPLING_PARAMETERS + in ```datasets/resampling_strategy.py```. search_space_updates (Optional[HyperparameterSearchSpaceUpdates]): - search space updates that can be used to modify the search + Search space updates that can be used to modify the search space of particular components or choice modules of the pipeline """ @@ -84,7 +91,7 @@ def __init__( delete_output_folder_after_terminate: bool = True, include_components: Optional[Dict[str, Any]] = None, exclude_components: Optional[Dict[str, Any]] = None, - resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation, + resampling_strategy: ResamplingStrategies = HoldoutValTypes.holdout_validation, resampling_strategy_args: Optional[Dict[str, Any]] = None, backend: Optional[Backend] = None, search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None @@ -154,7 +161,7 @@ def _get_dataset_input_validator( y_train: Union[List, pd.DataFrame, np.ndarray], X_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None, y_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None, - resampling_strategy: Optional[Union[CrossValTypes, HoldoutValTypes]] = None, + resampling_strategy: Optional[ResamplingStrategies] = None, resampling_strategy_args: Optional[Dict[str, Any]] = None, dataset_name: Optional[str] = None, ) -> Tuple[TabularDataset, TabularInputValidator]: @@ -171,7 +178,7 @@ def _get_dataset_input_validator( Testing feature set y_test (Optional[Union[List, pd.DataFrame, np.ndarray]]): Testing target set - resampling_strategy (Optional[Union[CrossValTypes, HoldoutValTypes]]): + resampling_strategy (Optional[RESAMPLING_STRATEGIES]): Strategy to split the training data. if None, uses HoldoutValTypes.holdout_validation. resampling_strategy_args (Optional[Dict[str, Any]]): diff --git a/autoPyTorch/datasets/base_dataset.py b/autoPyTorch/datasets/base_dataset.py index a3838007a..0f37e7938 100644 --- a/autoPyTorch/datasets/base_dataset.py +++ b/autoPyTorch/datasets/base_dataset.py @@ -21,7 +21,11 @@ DEFAULT_RESAMPLING_PARAMETERS, HoldOutFunc, HoldOutFuncs, - HoldoutValTypes + HoldoutValTypes, + NoResamplingFunc, + NoResamplingFuncs, + NoResamplingStrategyTypes, + ResamplingStrategies ) from autoPyTorch.utils.common import FitRequirement @@ -78,7 +82,7 @@ def __init__( dataset_name: Optional[str] = None, val_tensors: Optional[BaseDatasetInputType] = None, test_tensors: Optional[BaseDatasetInputType] = None, - resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation, + resampling_strategy: ResamplingStrategies = HoldoutValTypes.holdout_validation, resampling_strategy_args: Optional[Dict[str, Any]] = None, shuffle: Optional[bool] = True, seed: Optional[int] = 42, @@ -95,8 +99,7 @@ def __init__( validation data test_tensors (An optional tuple of objects that have a __len__ and a __getitem__ attribute): test data - resampling_strategy (Union[CrossValTypes, HoldoutValTypes]), - (default=HoldoutValTypes.holdout_validation): + resampling_strategy (RESAMPLING_STRATEGIES: default=HoldoutValTypes.holdout_validation): strategy to split the training data. resampling_strategy_args (Optional[Dict[str, Any]]): arguments required for the chosen resampling strategy. If None, uses @@ -109,16 +112,18 @@ def __init__( val_transforms (Optional[torchvision.transforms.Compose]): Additional Transforms to be applied to the validation/test data """ - self.dataset_name = dataset_name - if self.dataset_name is None: + if dataset_name is None: self.dataset_name = str(uuid.uuid1(clock_seq=os.getpid())) + else: + self.dataset_name = dataset_name if not hasattr(train_tensors[0], 'shape'): type_check(train_tensors, val_tensors) self.train_tensors, self.val_tensors, self.test_tensors = train_tensors, val_tensors, test_tensors self.cross_validators: Dict[str, CrossValFunc] = {} self.holdout_validators: Dict[str, HoldOutFunc] = {} + self.no_resampling_validators: Dict[str, NoResamplingFunc] = {} self.random_state = np.random.RandomState(seed=seed) self.shuffle = shuffle self.resampling_strategy = resampling_strategy @@ -143,6 +148,8 @@ def __init__( # Make sure cross validation splits are created once self.cross_validators = CrossValFuncs.get_cross_validators(*CrossValTypes) self.holdout_validators = HoldOutFuncs.get_holdout_validators(*HoldoutValTypes) + self.no_resampling_validators = NoResamplingFuncs.get_no_resampling_validators(*NoResamplingStrategyTypes) + self.splits = self.get_splits_from_resampling_strategy() # We also need to be able to transform the data, be it for pre-processing @@ -210,7 +217,7 @@ def __len__(self) -> int: def _get_indices(self) -> np.ndarray: return self.random_state.permutation(len(self)) if self.shuffle else np.arange(len(self)) - def get_splits_from_resampling_strategy(self) -> List[Tuple[List[int], List[int]]]: + def get_splits_from_resampling_strategy(self) -> List[Tuple[List[int], Optional[List[int]]]]: """ Creates a set of splits based on a resampling strategy provided @@ -241,6 +248,9 @@ def get_splits_from_resampling_strategy(self) -> List[Tuple[List[int], List[int] num_splits=cast(int, num_splits), ) ) + elif isinstance(self.resampling_strategy, NoResamplingStrategyTypes): + splits.append((self.no_resampling_validators[self.resampling_strategy.name](self.random_state, + self._get_indices()), None)) else: raise ValueError(f"Unsupported resampling strategy={self.resampling_strategy}") return splits @@ -312,7 +322,7 @@ def create_holdout_val_split( self.random_state, val_share, self._get_indices(), **kwargs) return train, val - def get_dataset_for_training(self, split_id: int) -> Tuple[Dataset, Dataset]: + def get_dataset(self, split_id: int, train: bool) -> Dataset: """ The above split methods employ the Subset to internally subsample the whole dataset. @@ -320,14 +330,21 @@ def get_dataset_for_training(self, split_id: int) -> Tuple[Dataset, Dataset]: to provide training data to fit a pipeline Args: - split (int): The desired subset of the dataset to split and use + split_id (int): which split id to get from the splits + train (bool): whether the dataset is required for training or evaluating. Returns: Dataset: the reduced dataset to be used for testing """ # Subset creates a dataset. Splits is a (train_indices, test_indices) tuple - return (TransformSubset(self, self.splits[split_id][0], train=True), - TransformSubset(self, self.splits[split_id][1], train=False)) + if split_id >= len(self.splits): # old version: split_id > len(self.splits) + raise IndexError(f"self.splits index out of range, got split_id={split_id}" + f" (>= num_splits={len(self.splits)})") + indices = self.splits[split_id][int(not train)] # 0: for training, 1: for evaluation + if indices is None: + raise ValueError("Specified fold (or subset) does not exist") + + return TransformSubset(self, indices, train=train) def replace_data(self, X_train: BaseDatasetInputType, X_test: Optional[BaseDatasetInputType]) -> 'BaseDataset': diff --git a/autoPyTorch/datasets/image_dataset.py b/autoPyTorch/datasets/image_dataset.py index 9da55ebc0..74b79db15 100644 --- a/autoPyTorch/datasets/image_dataset.py +++ b/autoPyTorch/datasets/image_dataset.py @@ -24,6 +24,7 @@ from autoPyTorch.datasets.resampling_strategy import ( CrossValTypes, HoldoutValTypes, + NoResamplingStrategyTypes ) IMAGE_DATASET_INPUT = Union[Dataset, Tuple[Union[np.ndarray, List[str]], np.ndarray]] @@ -39,7 +40,7 @@ class ImageDataset(BaseDataset): validation data test (Union[Dataset, Tuple[Union[np.ndarray, List[str]], np.ndarray]]): testing data - resampling_strategy (Union[CrossValTypes, HoldoutValTypes]), + resampling_strategy (Union[CrossValTypes, HoldoutValTypes, NoResamplingStrategyTypes]), (default=HoldoutValTypes.holdout_validation): strategy to split the training data. resampling_strategy_args (Optional[Dict[str, Any]]): arguments @@ -57,7 +58,9 @@ def __init__(self, train: IMAGE_DATASET_INPUT, val: Optional[IMAGE_DATASET_INPUT] = None, test: Optional[IMAGE_DATASET_INPUT] = None, - resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation, + resampling_strategy: Union[CrossValTypes, + HoldoutValTypes, + NoResamplingStrategyTypes] = HoldoutValTypes.holdout_validation, resampling_strategy_args: Optional[Dict[str, Any]] = None, shuffle: Optional[bool] = True, seed: Optional[int] = 42, diff --git a/autoPyTorch/datasets/resampling_strategy.py b/autoPyTorch/datasets/resampling_strategy.py index 86e0ec733..78447a04e 100644 --- a/autoPyTorch/datasets/resampling_strategy.py +++ b/autoPyTorch/datasets/resampling_strategy.py @@ -16,6 +16,13 @@ # Use callback protocol as workaround, since callable with function fields count 'self' as argument +class NoResamplingFunc(Protocol): + def __call__(self, + random_state: np.random.RandomState, + indices: np.ndarray) -> np.ndarray: + ... + + class CrossValFunc(Protocol): def __call__(self, random_state: np.random.RandomState, @@ -76,10 +83,20 @@ def is_stratified(self) -> bool: return getattr(self, self.name) in stratified +class NoResamplingStrategyTypes(IntEnum): + no_resampling = 8 + + def is_stratified(self) -> bool: + return False + + # TODO: replace it with another way -RESAMPLING_STRATEGIES = [CrossValTypes, HoldoutValTypes] +ResamplingStrategies = Union[CrossValTypes, HoldoutValTypes, NoResamplingStrategyTypes] -DEFAULT_RESAMPLING_PARAMETERS: Dict[Union[HoldoutValTypes, CrossValTypes], Dict[str, Any]] = { +DEFAULT_RESAMPLING_PARAMETERS: Dict[ + ResamplingStrategies, + Dict[str, Any] +] = { HoldoutValTypes.holdout_validation: { 'val_share': 0.33, }, @@ -98,6 +115,7 @@ def is_stratified(self) -> bool: CrossValTypes.time_series_cross_validation: { 'num_splits': 5, }, + NoResamplingStrategyTypes.no_resampling: {} } @@ -225,3 +243,30 @@ def get_cross_validators(cls, *cross_val_types: CrossValTypes) -> Dict[str, Cros for cross_val_type in cross_val_types } return cross_validators + + +class NoResamplingFuncs(): + @classmethod + def get_no_resampling_validators(cls, *no_resampling_types: NoResamplingStrategyTypes + ) -> Dict[str, NoResamplingFunc]: + no_resampling_strategies: Dict[str, NoResamplingFunc] = { + no_resampling_type.name: getattr(cls, no_resampling_type.name) + for no_resampling_type in no_resampling_types + } + return no_resampling_strategies + + @staticmethod + def no_resampling(random_state: np.random.RandomState, + indices: np.ndarray) -> np.ndarray: + """ + Returns the indices without performing + any operation on them. To be used for + fitting on the whole dataset. + This strategy is not compatible with + HPO search. + Args: + indices: array of indices + Returns: + np.ndarray: array of indices + """ + return indices diff --git a/autoPyTorch/datasets/tabular_dataset.py b/autoPyTorch/datasets/tabular_dataset.py index 16335dfbb..96fcdeb86 100644 --- a/autoPyTorch/datasets/tabular_dataset.py +++ b/autoPyTorch/datasets/tabular_dataset.py @@ -21,6 +21,7 @@ from autoPyTorch.datasets.resampling_strategy import ( CrossValTypes, HoldoutValTypes, + NoResamplingStrategyTypes ) @@ -32,7 +33,7 @@ class TabularDataset(BaseDataset): Y (Union[np.ndarray, pd.Series]): training data targets. X_test (Optional[Union[np.ndarray, pd.DataFrame]]): input testing data. Y_test (Optional[Union[np.ndarray, pd.DataFrame]]): testing data targets - resampling_strategy (Union[CrossValTypes, HoldoutValTypes]), + resampling_strategy (Union[CrossValTypes, HoldoutValTypes, NoResamplingStrategyTypes]), (default=HoldoutValTypes.holdout_validation): strategy to split the training data. resampling_strategy_args (Optional[Dict[str, Any]]): @@ -55,7 +56,9 @@ def __init__(self, Y: Union[np.ndarray, pd.Series], X_test: Optional[Union[np.ndarray, pd.DataFrame]] = None, Y_test: Optional[Union[np.ndarray, pd.DataFrame]] = None, - resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation, + resampling_strategy: Union[CrossValTypes, + HoldoutValTypes, + NoResamplingStrategyTypes] = HoldoutValTypes.holdout_validation, resampling_strategy_args: Optional[Dict[str, Any]] = None, shuffle: Optional[bool] = True, seed: Optional[int] = 42, diff --git a/autoPyTorch/evaluation/tae.py b/autoPyTorch/evaluation/tae.py index 683870304..17c34df3a 100644 --- a/autoPyTorch/evaluation/tae.py +++ b/autoPyTorch/evaluation/tae.py @@ -22,8 +22,14 @@ from smac.tae import StatusType, TAEAbortException from smac.tae.execute_func import AbstractTAFunc -import autoPyTorch.evaluation.train_evaluator from autoPyTorch.automl_common.common.utils.backend import Backend +from autoPyTorch.datasets.resampling_strategy import ( + CrossValTypes, + HoldoutValTypes, + NoResamplingStrategyTypes +) +from autoPyTorch.evaluation.test_evaluator import eval_test_function +from autoPyTorch.evaluation.train_evaluator import eval_train_function from autoPyTorch.evaluation.utils import ( DisableFileOutputParameters, empty_queue, @@ -123,7 +129,27 @@ def __init__( search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None ): - eval_function = autoPyTorch.evaluation.train_evaluator.eval_function + self.backend = backend + + dm = self.backend.load_datamanager() + if dm.val_tensors is not None: + self._get_validation_loss = True + else: + self._get_validation_loss = False + if dm.test_tensors is not None: + self._get_test_loss = True + else: + self._get_test_loss = False + + self.resampling_strategy = dm.resampling_strategy + self.resampling_strategy_args = dm.resampling_strategy_args + + if isinstance(self.resampling_strategy, (HoldoutValTypes, CrossValTypes)): + eval_function = eval_train_function + self.output_y_hat_optimization = output_y_hat_optimization + elif isinstance(self.resampling_strategy, NoResamplingStrategyTypes): + eval_function = eval_test_function + self.output_y_hat_optimization = False self.worst_possible_result = cost_for_crash @@ -142,12 +168,10 @@ def __init__( abort_on_first_run_crash=abort_on_first_run_crash, ) - self.backend = backend self.pynisher_context = pynisher_context self.seed = seed self.initial_num_run = initial_num_run self.metric = metric - self.output_y_hat_optimization = output_y_hat_optimization self.include = include self.exclude = exclude self.disable_file_output = disable_file_output @@ -175,19 +199,6 @@ def __init__( memory_limit = int(math.ceil(memory_limit)) self.memory_limit = memory_limit - dm = self.backend.load_datamanager() - if dm.val_tensors is not None: - self._get_validation_loss = True - else: - self._get_validation_loss = False - if dm.test_tensors is not None: - self._get_test_loss = True - else: - self._get_test_loss = False - - self.resampling_strategy = dm.resampling_strategy - self.resampling_strategy_args = dm.resampling_strategy_args - self.search_space_updates = search_space_updates def run_wrapper( diff --git a/autoPyTorch/evaluation/test_evaluator.py b/autoPyTorch/evaluation/test_evaluator.py new file mode 100644 index 000000000..0c6da71a9 --- /dev/null +++ b/autoPyTorch/evaluation/test_evaluator.py @@ -0,0 +1,241 @@ +from multiprocessing.queues import Queue +from typing import Any, Dict, List, Optional, Tuple, Union + +from ConfigSpace.configuration_space import Configuration + +import numpy as np + +from smac.tae import StatusType + +from autoPyTorch.automl_common.common.utils.backend import Backend +from autoPyTorch.datasets.resampling_strategy import NoResamplingStrategyTypes +from autoPyTorch.evaluation.abstract_evaluator import ( + AbstractEvaluator, + fit_and_suppress_warnings +) +from autoPyTorch.evaluation.utils import DisableFileOutputParameters +from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric +from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates + + +__all__ = [ + 'eval_test_function', + 'TestEvaluator' +] + + +class TestEvaluator(AbstractEvaluator): + """ + This class builds a pipeline using the provided configuration. + A pipeline implementing the provided configuration is fitted + using the datamanager object retrieved from disc, via the backend. + After the pipeline is fitted, it is save to disc and the performance estimate + is communicated to the main process via a Queue. It is only compatible + with `NoResamplingStrategyTypes`, i.e, when the training data + is not split and the test set is used for SMBO optimisation. It can not + be used for building ensembles which is ensured by having + `output_y_hat_optimisation`=False + + Attributes: + backend (Backend): + An object to interface with the disk storage. In particular, allows to + access the train and test datasets + queue (Queue): + Each worker available will instantiate an evaluator, and after completion, + it will return the evaluation result via a multiprocessing queue + metric (autoPyTorchMetric): + A scorer object that is able to evaluate how good a pipeline was fit. It + is a wrapper on top of the actual score method (a wrapper on top of scikit + lean accuracy for example) that formats the predictions accordingly. + budget: (float): + The amount of epochs/time a configuration is allowed to run. + budget_type (str): + The budget type, which can be epochs or time + pipeline_config (Optional[Dict[str, Any]]): + Defines the content of the pipeline being evaluated. For example, it + contains pipeline specific settings like logging name, or whether or not + to use tensorboard. + configuration (Union[int, str, Configuration]): + Determines the pipeline to be constructed. A dummy estimator is created for + integer configurations, a traditional machine learning pipeline is created + for string based configuration, and NAS is performed when a configuration + object is passed. + seed (int): + A integer that allows for reproducibility of results + output_y_hat_optimization (bool): + Whether this worker should output the target predictions, so that they are + stored on disk. Fundamentally, the resampling strategy might shuffle the + Y_train targets, so we store the split in order to re-use them for ensemble + selection. + num_run (Optional[int]): + An identifier of the current configuration being fit. This number is unique per + configuration. + include (Optional[Dict[str, Any]]): + An optional dictionary to include components of the pipeline steps. + exclude (Optional[Dict[str, Any]]): + An optional dictionary to exclude components of the pipeline steps. + disable_file_output (Optional[List[Union[str, DisableFileOutputParameters]]]): + Used as a list to pass more fine-grained + information on what to save. Must be a member of `DisableFileOutputParameters`. + Allowed elements in the list are: + + + `y_optimization`: + do not save the predictions for the optimization set, + which would later on be used to build an ensemble. Note that SMAC + optimizes a metric evaluated on the optimization set. + + `pipeline`: + do not save any individual pipeline files + + `pipelines`: + In case of cross validation, disables saving the joint model of the + pipelines fit on each fold. + + `y_test`: + do not save the predictions for the test set. + + `all`: + do not save any of the above. + For more information check `autoPyTorch.evaluation.utils.DisableFileOutputParameters`. + init_params (Optional[Dict[str, Any]]): + Optional argument that is passed to each pipeline step. It is the equivalent of + kwargs for the pipeline steps. + logger_port (Optional[int]): + Logging is performed using a socket-server scheme to be robust against many + parallel entities that want to write to the same file. This integer states the + socket port for the communication channel. If None is provided, a traditional + logger is used. + all_supported_metrics (bool): + Whether all supported metric should be calculated for every configuration. + search_space_updates (Optional[HyperparameterSearchSpaceUpdates]): + An object used to fine tune the hyperparameter search space of the pipeline + """ + def __init__( + self, + backend: Backend, queue: Queue, + metric: autoPyTorchMetric, + budget: float, + configuration: Union[int, str, Configuration], + budget_type: str = None, + pipeline_config: Optional[Dict[str, Any]] = None, + seed: int = 1, + output_y_hat_optimization: bool = False, + num_run: Optional[int] = None, + include: Optional[Dict[str, Any]] = None, + exclude: Optional[Dict[str, Any]] = None, + disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None, + init_params: Optional[Dict[str, Any]] = None, + logger_port: Optional[int] = None, + all_supported_metrics: bool = True, + search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None + ) -> None: + super().__init__( + backend=backend, + queue=queue, + configuration=configuration, + metric=metric, + seed=seed, + output_y_hat_optimization=output_y_hat_optimization, + num_run=num_run, + include=include, + exclude=exclude, + disable_file_output=disable_file_output, + init_params=init_params, + budget=budget, + budget_type=budget_type, + logger_port=logger_port, + all_supported_metrics=all_supported_metrics, + pipeline_config=pipeline_config, + search_space_updates=search_space_updates + ) + + if not isinstance(self.datamanager.resampling_strategy, (NoResamplingStrategyTypes)): + resampling_strategy = self.datamanager.resampling_strategy + raise ValueError( + f'resampling_strategy for TestEvaluator must be in ' + f'NoResamplingStrategyTypes, but got {resampling_strategy}' + ) + + self.splits = self.datamanager.splits + if self.splits is None: + raise AttributeError("create_splits must be called in {}".format(self.datamanager.__class__.__name__)) + + def fit_predict_and_loss(self) -> None: + + split_id = 0 + train_indices, test_indices = self.splits[split_id] + + self.pipeline = self._get_pipeline() + X = {'train_indices': train_indices, + 'val_indices': test_indices, + 'split_id': split_id, + 'num_run': self.num_run, + **self.fit_dictionary} # fit dictionary + y = None + fit_and_suppress_warnings(self.logger, self.pipeline, X, y) + train_loss, _ = self.predict_and_loss(train=True) + test_loss, test_pred = self.predict_and_loss() + self.Y_optimization = self.y_test + self.finish_up( + loss=test_loss, + train_loss=train_loss, + opt_pred=test_pred, + valid_pred=None, + test_pred=test_pred, + file_output=True, + additional_run_info=None, + status=StatusType.SUCCESS, + ) + + def predict_and_loss( + self, train: bool = False + ) -> Tuple[Dict[str, float], np.ndarray]: + labels = self.y_train if train else self.y_test + feats = self.X_train if train else self.X_test + preds = self.predict_function( + X=feats, + pipeline=self.pipeline, + Y_train=self.y_train # Need this as we need to know all the classes in train splits + ) + loss_dict = self._loss(labels, preds) + + return loss_dict, preds + + +# create closure for evaluating an algorithm +def eval_test_function( + backend: Backend, + queue: Queue, + metric: autoPyTorchMetric, + budget: float, + config: Optional[Configuration], + seed: int, + output_y_hat_optimization: bool, + num_run: int, + include: Optional[Dict[str, Any]], + exclude: Optional[Dict[str, Any]], + disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None, + pipeline_config: Optional[Dict[str, Any]] = None, + budget_type: str = None, + init_params: Optional[Dict[str, Any]] = None, + logger_port: Optional[int] = None, + all_supported_metrics: bool = True, + search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None, + instance: str = None, +) -> None: + evaluator = TestEvaluator( + backend=backend, + queue=queue, + metric=metric, + configuration=config, + seed=seed, + num_run=num_run, + output_y_hat_optimization=output_y_hat_optimization, + include=include, + exclude=exclude, + disable_file_output=disable_file_output, + init_params=init_params, + budget=budget, + budget_type=budget_type, + logger_port=logger_port, + all_supported_metrics=all_supported_metrics, + pipeline_config=pipeline_config, + search_space_updates=search_space_updates) + + evaluator.fit_predict_and_loss() diff --git a/autoPyTorch/evaluation/train_evaluator.py b/autoPyTorch/evaluation/train_evaluator.py index 1bf1bce4c..a9313ee9e 100644 --- a/autoPyTorch/evaluation/train_evaluator.py +++ b/autoPyTorch/evaluation/train_evaluator.py @@ -14,6 +14,7 @@ CLASSIFICATION_TASKS, MULTICLASSMULTIOUTPUT, ) +from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes from autoPyTorch.evaluation.abstract_evaluator import ( AbstractEvaluator, fit_and_suppress_warnings @@ -23,7 +24,7 @@ from autoPyTorch.utils.common import dict_repr, subsampler from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates -__all__ = ['TrainEvaluator', 'eval_function'] +__all__ = ['TrainEvaluator', 'eval_train_function'] def _get_y_array(y: np.ndarray, task_type: int) -> np.ndarray: @@ -40,7 +41,9 @@ class TrainEvaluator(AbstractEvaluator): A pipeline implementing the provided configuration is fitted using the datamanager object retrieved from disc, via the backend. After the pipeline is fitted, it is save to disc and the performance estimate - is communicated to the main process via a Queue. + is communicated to the main process via a Queue. It is only compatible + with `CrossValTypes`, `HoldoutValTypes`, i.e, when the training data + is split and the validation set is used for SMBO optimisation. Attributes: backend (Backend): @@ -149,6 +152,13 @@ def __init__(self, backend: Backend, queue: Queue, search_space_updates=search_space_updates ) + if not isinstance(self.datamanager.resampling_strategy, (CrossValTypes, HoldoutValTypes)): + resampling_strategy = self.datamanager.resampling_strategy + raise ValueError( + f'resampling_strategy for TrainEvaluator must be in ' + f'(CrossValTypes, HoldoutValTypes), but got {resampling_strategy}' + ) + self.splits = self.datamanager.splits if self.splits is None: raise AttributeError("Must have called create_splits on {}".format(self.datamanager.__class__.__name__)) @@ -402,25 +412,25 @@ def _predict(self, pipeline: BaseEstimator, # create closure for evaluating an algorithm -def eval_function( - backend: Backend, - queue: Queue, - metric: autoPyTorchMetric, - budget: float, - config: Optional[Configuration], - seed: int, - output_y_hat_optimization: bool, - num_run: int, - include: Optional[Dict[str, Any]], - exclude: Optional[Dict[str, Any]], - disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None, - pipeline_config: Optional[Dict[str, Any]] = None, - budget_type: str = None, - init_params: Optional[Dict[str, Any]] = None, - logger_port: Optional[int] = None, - all_supported_metrics: bool = True, - search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None, - instance: str = None, +def eval_train_function( + backend: Backend, + queue: Queue, + metric: autoPyTorchMetric, + budget: float, + config: Optional[Configuration], + seed: int, + output_y_hat_optimization: bool, + num_run: int, + include: Optional[Dict[str, Any]], + exclude: Optional[Dict[str, Any]], + disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None, + pipeline_config: Optional[Dict[str, Any]] = None, + budget_type: str = None, + init_params: Optional[Dict[str, Any]] = None, + logger_port: Optional[int] = None, + all_supported_metrics: bool = True, + search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None, + instance: str = None, ) -> None: """ This closure allows the communication between the ExecuteTaFuncWithQueue and the diff --git a/autoPyTorch/optimizer/smbo.py b/autoPyTorch/optimizer/smbo.py index aa444c782..d0bb4056c 100644 --- a/autoPyTorch/optimizer/smbo.py +++ b/autoPyTorch/optimizer/smbo.py @@ -23,6 +23,7 @@ CrossValTypes, DEFAULT_RESAMPLING_PARAMETERS, HoldoutValTypes, + NoResamplingStrategyTypes ) from autoPyTorch.ensemble.ensemble_builder import EnsembleBuilderManager from autoPyTorch.evaluation.tae import ExecuteTaFuncWithQueue, get_cost_of_crash @@ -98,7 +99,9 @@ def __init__(self, pipeline_config: Dict[str, Any], start_num_run: int = 1, seed: int = 1, - resampling_strategy: Union[HoldoutValTypes, CrossValTypes] = HoldoutValTypes.holdout_validation, + resampling_strategy: Union[HoldoutValTypes, + CrossValTypes, + NoResamplingStrategyTypes] = HoldoutValTypes.holdout_validation, resampling_strategy_args: Optional[Dict[str, Any]] = None, include: Optional[Dict[str, Any]] = None, exclude: Optional[Dict[str, Any]] = None, diff --git a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py index f39194477..365213bae 100644 --- a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py +++ b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py @@ -106,7 +106,8 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> torch.utils.data.DataLoader: # This parameter indicates that the data has been pre-processed for speed # Overwrite the datamanager with the pre-processes data datamanager.replace_data(X['X_train'], X['X_test'] if 'X_test' in X else None) - train_dataset, val_dataset = datamanager.get_dataset_for_training(split_id=X['split_id']) + + train_dataset = datamanager.get_dataset(split_id=X['split_id'], train=True) self.train_data_loader = torch.utils.data.DataLoader( train_dataset, @@ -118,15 +119,17 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> torch.utils.data.DataLoader: collate_fn=custom_collate_fn, ) - self.val_data_loader = torch.utils.data.DataLoader( - val_dataset, - batch_size=min(self.batch_size, len(val_dataset)), - shuffle=False, - num_workers=X.get('num_workers', 0), - pin_memory=X.get('pin_memory', True), - drop_last=X.get('drop_last', False), - collate_fn=custom_collate_fn, - ) + if X.get('val_indices', None) is not None: + val_dataset = datamanager.get_dataset(split_id=X['split_id'], train=False) + self.val_data_loader = torch.utils.data.DataLoader( + val_dataset, + batch_size=min(self.batch_size, len(val_dataset)), + shuffle=False, + num_workers=X.get('num_workers', 0), + pin_memory=X.get('pin_memory', True), + drop_last=X.get('drop_last', True), + collate_fn=custom_collate_fn, + ) if X.get('X_test', None) is not None: self.test_data_loader = self.get_loader(X=X['X_test'], @@ -184,7 +187,6 @@ def get_val_data_loader(self) -> torch.utils.data.DataLoader: Returns: torch.utils.data.DataLoader: A validation data loader """ - assert self.val_data_loader is not None, "No val data loader fitted" return self.val_data_loader def get_test_data_loader(self) -> torch.utils.data.DataLoader: diff --git a/autoPyTorch/pipeline/components/training/trainer/__init__.py b/autoPyTorch/pipeline/components/training/trainer/__init__.py index 1645c00cd..c1008b3ba 100755 --- a/autoPyTorch/pipeline/components/training/trainer/__init__.py +++ b/autoPyTorch/pipeline/components/training/trainer/__init__.py @@ -66,6 +66,7 @@ def __init__(self, random_state=random_state) self.run_summary: Optional[RunSummary] = None self.writer: Optional[SummaryWriter] = None + self.early_stopping_split_type: Optional[str] = None self._fit_requirements: Optional[List[FitRequirement]] = [ FitRequirement("lr_scheduler", (_LRScheduler,), user_defined=False, dataset_property=False), FitRequirement("num_run", (int,), user_defined=False, dataset_property=False), @@ -277,6 +278,11 @@ def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> 'TrainerChoic optimize_metric=None if not X['metrics_during_training'] else X.get('optimize_metric'), ) + if X['val_data_loader'] is not None: + self.early_stopping_split_type = 'val' + else: + self.early_stopping_split_type = 'train' + epoch = 1 while True: @@ -302,7 +308,8 @@ def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> 'TrainerChoic val_loss, val_metrics, test_loss, test_metrics = None, {}, None, {} if self.eval_valid_each_epoch(X): - val_loss, val_metrics = self.choice.evaluate(X['val_data_loader'], epoch, writer) + if X['val_data_loader']: + val_loss, val_metrics = self.choice.evaluate(X['val_data_loader'], epoch, writer) if 'test_data_loader' in X and X['test_data_loader']: test_loss, test_metrics = self.choice.evaluate(X['test_data_loader'], epoch, writer) @@ -346,7 +353,8 @@ def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> 'TrainerChoic # wrap up -- add score if not evaluating every epoch if not self.eval_valid_each_epoch(X): - val_loss, val_metrics = self.choice.evaluate(X['val_data_loader'], epoch, writer) + if X['val_data_loader']: + val_loss, val_metrics = self.choice.evaluate(X['val_data_loader'], epoch, writer) if 'test_data_loader' in X and X['val_data_loader']: test_loss, test_metrics = self.choice.evaluate(X['test_data_loader'], epoch, writer) self.run_summary.add_performance( @@ -382,14 +390,17 @@ def _load_best_weights_and_clean_checkpoints(self, X: Dict[str, Any]) -> None: """ assert self.checkpoint_dir is not None # mypy assert self.run_summary is not None # mypy + assert self.early_stopping_split_type is not None # mypy best_path = os.path.join(self.checkpoint_dir, 'best.pth') - self.logger.debug(f" Early stopped model {X['num_run']} on epoch {self.run_summary.get_best_epoch()}") + best_epoch = self.run_summary.get_best_epoch(split_type=self.early_stopping_split_type) + self.logger.debug(f" Early stopped model {X['num_run']} on epoch {best_epoch}") # We will stop the training. Load the last best performing weights X['network'].load_state_dict(torch.load(best_path)) # Clean the temp dir shutil.rmtree(self.checkpoint_dir) + self.checkpoint_dir = None def early_stop_handler(self, X: Dict[str, Any]) -> bool: """ @@ -404,6 +415,7 @@ def early_stop_handler(self, X: Dict[str, Any]) -> bool: bool: If true, training should be stopped """ assert self.run_summary is not None + assert self.early_stopping_split_type is not None # mypy # Allow to disable early stopping if X['early_stopping'] is None or X['early_stopping'] < 0: @@ -413,7 +425,9 @@ def early_stop_handler(self, X: Dict[str, Any]) -> bool: if self.checkpoint_dir is None: self.checkpoint_dir = tempfile.mkdtemp(dir=X['backend'].temporary_directory) - epochs_since_best = self.run_summary.get_last_epoch() - self.run_summary.get_best_epoch() + last_epoch = self.run_summary.get_last_epoch() + best_epoch = self.run_summary.get_best_epoch(split_type=self.early_stopping_split_type) + epochs_since_best = last_epoch - best_epoch # Save the checkpoint if there is a new best epoch best_path = os.path.join(self.checkpoint_dir, 'best.pth') diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py index 6be283ebb..4fe94ca4f 100644 --- a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py +++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py @@ -119,10 +119,11 @@ def add_performance(self, self.performance_tracker['val_metrics'][epoch] = val_metrics self.performance_tracker['test_metrics'][epoch] = test_metrics - def get_best_epoch(self, loss_type: str = 'val_loss') -> int: - # If we compute validation scores, prefer the performance + def get_best_epoch(self, split_type: str = 'val') -> int: + # If we compute for optimization, prefer the performance # metric to the loss if self.optimize_metric is not None: + metrics_type = f"{split_type}_metrics" scorer = CLASSIFICATION_METRICS[ self.optimize_metric ] if self.optimize_metric in CLASSIFICATION_METRICS else REGRESSION_METRICS[ @@ -131,13 +132,12 @@ def get_best_epoch(self, loss_type: str = 'val_loss') -> int: # Some metrics maximize, other minimize! opt_func = np.argmax if scorer._sign > 0 else np.argmin return int(opt_func( - [self.performance_tracker['val_metrics'][e][self.optimize_metric] - for e in range(1, len(self.performance_tracker['val_metrics']) + 1)] + [metrics[self.optimize_metric] for metrics in self.performance_tracker[metrics_type].values()] )) + 1 # Epochs start at 1 else: + loss_type = f"{split_type}_loss" return int(np.argmin( - [self.performance_tracker[loss_type][e] - for e in range(1, len(self.performance_tracker[loss_type]) + 1)], + list(self.performance_tracker[loss_type].values()), )) + 1 # Epochs start at 1 def get_last_epoch(self) -> int: diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py index fda013612..e3603f668 100644 --- a/test/test_api/test_api.py +++ b/test/test_api/test_api.py @@ -4,7 +4,7 @@ import pickle import tempfile import unittest -from test.test_api.utils import dummy_do_dummy_prediction, dummy_eval_function +from test.test_api.utils import dummy_do_dummy_prediction, dummy_eval_train_function import ConfigSpace as CS from ConfigSpace.configuration_space import Configuration @@ -29,6 +29,7 @@ from autoPyTorch.datasets.resampling_strategy import ( CrossValTypes, HoldoutValTypes, + NoResamplingStrategyTypes, ) from autoPyTorch.optimizer.smbo import AutoMLSMBO from autoPyTorch.pipeline.base_pipeline import BasePipeline @@ -42,8 +43,8 @@ # Test # ==== -@unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_function', - new=dummy_eval_function) +@unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_train_function', + new=dummy_eval_train_function) @pytest.mark.parametrize('openml_id', (40981, )) @pytest.mark.parametrize('resampling_strategy,resampling_strategy_args', ((HoldoutValTypes.holdout_validation, None), @@ -219,8 +220,8 @@ def test_tabular_classification(openml_id, resampling_strategy, backend, resampl @pytest.mark.parametrize('openml_name', ("boston", )) -@unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_function', - new=dummy_eval_function) +@unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_train_function', + new=dummy_eval_train_function) @pytest.mark.parametrize('resampling_strategy,resampling_strategy_args', ((HoldoutValTypes.holdout_validation, None), (CrossValTypes.k_fold_cross_validation, {'num_splits': CV_NUM_SPLITS}) @@ -465,7 +466,7 @@ def test_do_dummy_prediction(dask_client, fit_dictionary_tabular): estimator._all_supported_metrics = False with pytest.raises(ValueError, match=r".*Dummy prediction failed with run state.*"): - with unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_function') as dummy: + with unittest.mock.patch('autoPyTorch.evaluation.tae.eval_train_function') as dummy: dummy.side_effect = MemoryError estimator._do_dummy_prediction() @@ -496,8 +497,8 @@ def test_do_dummy_prediction(dask_client, fit_dictionary_tabular): del estimator -@unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_function', - new=dummy_eval_function) +@unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_train_function', + new=dummy_eval_train_function) @pytest.mark.parametrize('openml_id', (40981, )) def test_portfolio_selection(openml_id, backend, n_samples): @@ -538,8 +539,8 @@ def test_portfolio_selection(openml_id, backend, n_samples): assert any(successful_config in portfolio_configs for successful_config in successful_configs) -@unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_function', - new=dummy_eval_function) +@unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_train_function', + new=dummy_eval_train_function) @pytest.mark.parametrize('openml_id', (40981, )) def test_portfolio_selection_failure(openml_id, backend, n_samples): @@ -649,7 +650,8 @@ def test_build_pipeline(api_type, fit_dictionary_tabular): @pytest.mark.parametrize('openml_id', (40984,)) @pytest.mark.parametrize('resampling_strategy,resampling_strategy_args', ((HoldoutValTypes.holdout_validation, {'val_share': 0.8}), - (CrossValTypes.k_fold_cross_validation, {'num_splits': 2}) + (CrossValTypes.k_fold_cross_validation, {'num_splits': 2}), + (NoResamplingStrategyTypes.no_resampling, {}) ) ) @pytest.mark.parametrize("budget", [15, 20]) @@ -672,6 +674,7 @@ def test_pipeline_fit(openml_id, estimator = TabularClassificationTask( backend=backend, resampling_strategy=resampling_strategy, + ensemble_size=0 ) dataset = estimator.get_dataset(X_train=X_train, @@ -705,7 +708,7 @@ def test_pipeline_fit(openml_id, score = accuracy(dataset.test_tensors[1], preds) assert isinstance(score, float) - assert score > 0.7 + assert score > 0.65 else: assert isinstance(pipeline, BasePipeline) # To make sure we fitted the model, there should be a @@ -718,10 +721,10 @@ def test_pipeline_fit(openml_id, score = accuracy(dataset.test_tensors[1], preds) assert isinstance(score, float) - assert score > 0.7 + assert score > 0.65 else: assert pipeline is None - assert run_value.cost < 0.3 + assert run_value.cost < 0.35 # Make sure that the pipeline can be pickled dump_file = os.path.join(tempfile.gettempdir(), 'automl.dump.pkl') @@ -790,3 +793,114 @@ def test_pipeline_fit_error( assert 'TIMEOUT' in str(run_value.status) assert pipeline is None + + +@pytest.mark.parametrize('openml_id', (40981, )) +def test_tabular_classification_test_evaluator(openml_id, backend, n_samples): + + # Get the data and check that contents of data-manager make sense + X, y = sklearn.datasets.fetch_openml( + data_id=int(openml_id), + return_X_y=True, as_frame=True + ) + X, y = X.iloc[:n_samples], y.iloc[:n_samples] + + X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( + X, y, random_state=42) + + # Search for a good configuration + estimator = TabularClassificationTask( + backend=backend, + resampling_strategy=NoResamplingStrategyTypes.no_resampling, + seed=42, + ensemble_size=0 + ) + + with unittest.mock.patch.object(estimator, '_do_dummy_prediction', new=dummy_do_dummy_prediction): + estimator.search( + X_train=X_train, y_train=y_train, + X_test=X_test, y_test=y_test, + optimize_metric='accuracy', + total_walltime_limit=50, + func_eval_time_limit_secs=20, + enable_traditional_pipeline=False, + ) + + # Internal dataset has expected settings + assert estimator.dataset.task_type == 'tabular_classification' + + assert estimator.resampling_strategy == NoResamplingStrategyTypes.no_resampling + assert estimator.dataset.resampling_strategy == NoResamplingStrategyTypes.no_resampling + # Check for the created files + tmp_dir = estimator._backend.temporary_directory + loaded_datamanager = estimator._backend.load_datamanager() + assert len(loaded_datamanager.train_tensors) == len(estimator.dataset.train_tensors) + + expected_files = [ + 'smac3-output/run_42/configspace.json', + 'smac3-output/run_42/runhistory.json', + 'smac3-output/run_42/scenario.txt', + 'smac3-output/run_42/stats.json', + 'smac3-output/run_42/train_insts.txt', + 'smac3-output/run_42/trajectory.json', + '.autoPyTorch/datamanager.pkl', + '.autoPyTorch/start_time_42', + ] + for expected_file in expected_files: + assert os.path.exists(os.path.join(tmp_dir, expected_file)), "{}/{}/{}".format( + tmp_dir, + [data for data in pathlib.Path(tmp_dir).glob('*')], + expected_file, + ) + + # Check that smac was able to find proper models + succesful_runs = [run_value.status for run_value in estimator.run_history.data.values( + ) if 'SUCCESS' in str(run_value.status)] + assert len(succesful_runs) > 1, [(k, v) for k, v in estimator.run_history.data.items()] + + # Search for an existing run key in disc. A individual model might have + # a timeout and hence was not written to disc + successful_num_run = None + SUCCESS = False + for i, (run_key, value) in enumerate(estimator.run_history.data.items()): + if 'SUCCESS' in str(value.status): + run_key_model_run_dir = estimator._backend.get_numrun_directory( + estimator.seed, run_key.config_id + 1, run_key.budget) + successful_num_run = run_key.config_id + 1 + if os.path.exists(run_key_model_run_dir): + # Runkey config id is different from the num_run + # more specifically num_run = config_id + 1(dummy) + SUCCESS = True + break + + assert SUCCESS, f"Successful run was not properly saved for num_run: {successful_num_run}" + + model_file = os.path.join(run_key_model_run_dir, + f"{estimator.seed}.{successful_num_run}.{run_key.budget}.model") + assert os.path.exists(model_file), model_file + + # Make sure that predictions on the test data are printed and make sense + test_prediction = os.path.join(run_key_model_run_dir, + estimator._backend.get_prediction_filename( + 'test', estimator.seed, successful_num_run, + run_key.budget)) + assert os.path.exists(test_prediction), test_prediction + assert np.shape(np.load(test_prediction, allow_pickle=True))[0] == np.shape(X_test)[0] + + y_pred = estimator.predict(X_test) + assert np.shape(y_pred)[0] == np.shape(X_test)[0] + + # Make sure that predict proba has the expected shape + probabilites = estimator.predict_proba(X_test) + assert np.shape(probabilites) == (np.shape(X_test)[0], 2) + + score = estimator.score(y_pred, y_test) + assert 'accuracy' in score + + # check incumbent config and results + incumbent_config, incumbent_results = estimator.get_incumbent_results() + assert isinstance(incumbent_config, Configuration) + assert isinstance(incumbent_results, dict) + assert 'opt_loss' in incumbent_results, "run history: {}, successful_num_run: {}".format(estimator.run_history.data, + successful_num_run) + assert 'train_loss' in incumbent_results diff --git a/test/test_api/test_base_api.py b/test/test_api/test_base_api.py index 3b379dbd6..f487ad5ea 100644 --- a/test/test_api/test_base_api.py +++ b/test/test_api/test_base_api.py @@ -12,6 +12,7 @@ from autoPyTorch.api.base_task import BaseTask, _pipeline_predict from autoPyTorch.constants import TABULAR_CLASSIFICATION, TABULAR_REGRESSION +from autoPyTorch.datasets.resampling_strategy import NoResamplingStrategyTypes from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline @@ -143,3 +144,19 @@ def test_pipeline_get_budget(fit_dictionary_tabular, min_budget, max_budget, bud assert list(smac_mock.call_args)[1]['ta_kwargs']['pipeline_config'] == default_pipeline_config assert list(smac_mock.call_args)[1]['max_budget'] == max_budget assert list(smac_mock.call_args)[1]['initial_budget'] == min_budget + + +def test_no_resampling_error(backend): + """ + Checks if an error is raised when trying to construct ensemble + using `NoResamplingStrategy`. + """ + BaseTask.__abstractmethods__ = set() + + with pytest.raises(ValueError, match=r"`NoResamplingStrategy` cannot be used for ensemble construction"): + BaseTask( + backend=backend, + resampling_strategy=NoResamplingStrategyTypes.no_resampling, + seed=42, + ensemble_size=1 + ) diff --git a/test/test_api/utils.py b/test/test_api/utils.py index a8c258fe9..f8a11db88 100644 --- a/test/test_api/utils.py +++ b/test/test_api/utils.py @@ -69,7 +69,7 @@ def _fit_and_predict(self, pipeline, fold: int, train_indices, # create closure for evaluating an algorithm -def dummy_eval_function( +def dummy_eval_train_function( backend, queue, metric, diff --git a/test/test_datasets/test_tabular_dataset.py b/test/test_datasets/test_tabular_dataset.py index 409e6bdec..2ee8b608e 100644 --- a/test/test_datasets/test_tabular_dataset.py +++ b/test/test_datasets/test_tabular_dataset.py @@ -2,6 +2,9 @@ import pytest +from autoPyTorch.data.tabular_validator import TabularInputValidator +from autoPyTorch.datasets.base_dataset import TransformSubset +from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes, NoResamplingStrategyTypes from autoPyTorch.datasets.tabular_dataset import TabularDataset from autoPyTorch.utils.pipeline import get_dataset_requirements @@ -46,3 +49,34 @@ def test_get_dataset_properties(backend, fit_dictionary_tabular): def test_not_supported(): with pytest.raises(ValueError, match=r".*A feature validator is required to build.*"): TabularDataset(np.ones(10), np.ones(10)) + + +@pytest.mark.parametrize('resampling_strategy', + (HoldoutValTypes.holdout_validation, + CrossValTypes.k_fold_cross_validation, + NoResamplingStrategyTypes.no_resampling + )) +def test_get_dataset(resampling_strategy, n_samples): + """ + Checks the functionality of get_dataset function of the TabularDataset + gives an error when trying to get training and validation subset + """ + X = np.zeros(shape=(n_samples, 4)) + Y = np.ones(n_samples) + validator = TabularInputValidator(is_classification=True) + validator.fit(X, Y) + dataset = TabularDataset( + resampling_strategy=resampling_strategy, + X=X, + Y=Y, + validator=validator + ) + transform_subset = dataset.get_dataset(split_id=0, train=True) + assert isinstance(transform_subset, TransformSubset) + + if isinstance(resampling_strategy, NoResamplingStrategyTypes): + with pytest.raises(ValueError): + dataset.get_dataset(split_id=0, train=False) + else: + transform_subset = dataset.get_dataset(split_id=0, train=False) + assert isinstance(transform_subset, TransformSubset) diff --git a/test/test_evaluation/test_evaluation.py b/test/test_evaluation/test_evaluation.py index 222755b6e..9de1918a1 100644 --- a/test/test_evaluation/test_evaluation.py +++ b/test/test_evaluation/test_evaluation.py @@ -92,7 +92,7 @@ def run_over_time(): ############################################################################ # Test ExecuteTaFuncWithQueue.run_wrapper() - @unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_function') + @unittest.mock.patch('autoPyTorch.evaluation.tae.eval_train_function') def test_eval_with_limits_holdout(self, pynisher_mock): pynisher_mock.side_effect = safe_eval_success_mock config = unittest.mock.Mock() @@ -106,7 +106,7 @@ def test_eval_with_limits_holdout(self, pynisher_mock): logger_port=self.logger_port, pynisher_context='fork', ) - info = ta.run_wrapper(RunInfo(config=config, cutoff=30, instance=None, + info = ta.run_wrapper(RunInfo(config=config, cutoff=2000000, instance=None, instance_specific=None, seed=1, capped=False)) self.assertEqual(info[0].config.config_id, 198) self.assertEqual(info[1].status, StatusType.SUCCESS, info) @@ -178,7 +178,7 @@ def test_zero_or_negative_cutoff(self, pynisher_mock): instance_specific=None, seed=1, capped=False)) self.assertEqual(run_value.status, StatusType.STOP) - @unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_function') + @unittest.mock.patch('autoPyTorch.evaluation.tae.eval_train_function') def test_eval_with_limits_holdout_fail_silent(self, pynisher_mock): pynisher_mock.return_value = None config = unittest.mock.Mock() @@ -220,7 +220,7 @@ def test_eval_with_limits_holdout_fail_silent(self, pynisher_mock): 'subprocess_stdout': '', 'subprocess_stderr': ''}) - @unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_function') + @unittest.mock.patch('autoPyTorch.evaluation.tae.eval_train_function') def test_eval_with_limits_holdout_fail_memory_error(self, pynisher_mock): pynisher_mock.side_effect = MemoryError config = unittest.mock.Mock() @@ -302,7 +302,7 @@ def side_effect(**kwargs): self.assertIsInstance(info[1].time, float) self.assertNotIn('exitcode', info[1].additional_info) - @unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_function') + @unittest.mock.patch('autoPyTorch.evaluation.tae.eval_train_function') def test_eval_with_limits_holdout_2(self, eval_houldout_mock): config = unittest.mock.Mock() config.config_id = 198 @@ -331,7 +331,7 @@ def side_effect(*args, **kwargs): self.assertIn('configuration_origin', info[1].additional_info) self.assertEqual(info[1].additional_info['message'], "{'subsample': 30}") - @unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_function') + @unittest.mock.patch('autoPyTorch.evaluation.tae.eval_train_function') def test_exception_in_target_function(self, eval_holdout_mock): config = unittest.mock.Mock() config.config_id = 198 diff --git a/test/test_evaluation/test_train_evaluator.py b/test/test_evaluation/test_evaluators.py similarity index 65% rename from test/test_evaluation/test_train_evaluator.py rename to test/test_evaluation/test_evaluators.py index a3ff067f1..2ca32af10 100644 --- a/test/test_evaluation/test_train_evaluator.py +++ b/test/test_evaluation/test_evaluators.py @@ -15,7 +15,8 @@ from smac.tae import StatusType from autoPyTorch.automl_common.common.utils.backend import create -from autoPyTorch.datasets.resampling_strategy import CrossValTypes +from autoPyTorch.datasets.resampling_strategy import CrossValTypes, NoResamplingStrategyTypes +from autoPyTorch.evaluation.test_evaluator import TestEvaluator from autoPyTorch.evaluation.train_evaluator import TrainEvaluator from autoPyTorch.evaluation.utils import read_queue from autoPyTorch.pipeline.base_pipeline import BasePipeline @@ -294,3 +295,155 @@ def test_additional_metrics_during_training(self, pipeline_mock): self.assertIn('additional_run_info', result) self.assertIn('opt_loss', result['additional_run_info']) self.assertGreater(len(result['additional_run_info']['opt_loss'].keys()), 1) + + +class TestTestEvaluator(BaseEvaluatorTest, unittest.TestCase): + _multiprocess_can_split_ = True + + def setUp(self): + """ + Creates a backend mock + """ + tmp_dir_name = self.id() + self.ev_path = os.path.join(this_directory, '.tmp_evaluations', tmp_dir_name) + if os.path.exists(self.ev_path): + shutil.rmtree(self.ev_path) + os.makedirs(self.ev_path, exist_ok=False) + dummy_model_files = [os.path.join(self.ev_path, str(n)) for n in range(100)] + dummy_pred_files = [os.path.join(self.ev_path, str(n)) for n in range(100, 200)] + dummy_cv_model_files = [os.path.join(self.ev_path, str(n)) for n in range(200, 300)] + backend_mock = unittest.mock.Mock() + backend_mock.get_model_dir.return_value = self.ev_path + backend_mock.get_cv_model_dir.return_value = self.ev_path + backend_mock.get_model_path.side_effect = dummy_model_files + backend_mock.get_cv_model_path.side_effect = dummy_cv_model_files + backend_mock.get_prediction_output_path.side_effect = dummy_pred_files + backend_mock.temporary_directory = self.ev_path + self.backend_mock = backend_mock + + self.tmp_dir = os.path.join(self.ev_path, 'tmp_dir') + self.output_dir = os.path.join(self.ev_path, 'out_dir') + + def tearDown(self): + if os.path.exists(self.ev_path): + shutil.rmtree(self.ev_path) + + @unittest.mock.patch('autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline') + def test_no_resampling(self, pipeline_mock): + # Binary iris, contains 69 train samples, 31 test samples + D = get_binary_classification_datamanager(NoResamplingStrategyTypes.no_resampling) + pipeline_mock.predict_proba.side_effect = \ + lambda X, batch_size=None: np.tile([0.6, 0.4], (len(X), 1)) + pipeline_mock.side_effect = lambda **kwargs: pipeline_mock + pipeline_mock.get_additional_run_info.return_value = None + pipeline_mock.get_default_pipeline_options.return_value = {'budget_type': 'epochs', 'epochs': 10} + + configuration = unittest.mock.Mock(spec=Configuration) + backend_api = create(self.tmp_dir, self.output_dir, 'autoPyTorch') + backend_api.load_datamanager = lambda: D + queue_ = multiprocessing.Queue() + + evaluator = TestEvaluator(backend_api, queue_, configuration=configuration, metric=accuracy, budget=0) + evaluator.file_output = unittest.mock.Mock(spec=evaluator.file_output) + evaluator.file_output.return_value = (None, {}) + + evaluator.fit_predict_and_loss() + + rval = read_queue(evaluator.queue) + self.assertEqual(len(rval), 1) + result = rval[0]['loss'] + self.assertEqual(len(rval[0]), 3) + self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1) + + self.assertEqual(evaluator.file_output.call_count, 1) + self.assertEqual(result, 0.5806451612903225) + self.assertEqual(pipeline_mock.fit.call_count, 1) + # 2 calls because of train and test set + self.assertEqual(pipeline_mock.predict_proba.call_count, 2) + self.assertEqual(evaluator.file_output.call_count, 1) + # Should be none as no val preds are mentioned + self.assertIsNone(evaluator.file_output.call_args[0][1]) + # Number of y_test_preds and Y_test should be the same + self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], + D.test_tensors[1].shape[0]) + self.assertEqual(evaluator.pipeline.fit.call_count, 1) + + @unittest.mock.patch.object(TestEvaluator, '_loss') + def test_file_output(self, loss_mock): + + D = get_regression_datamanager(NoResamplingStrategyTypes.no_resampling) + D.name = 'test' + self.backend_mock.load_datamanager.return_value = D + configuration = unittest.mock.Mock(spec=Configuration) + queue_ = multiprocessing.Queue() + loss_mock.return_value = None + + evaluator = TestEvaluator(self.backend_mock, queue_, configuration=configuration, metric=accuracy, budget=0) + + self.backend_mock.get_model_dir.return_value = True + evaluator.pipeline = 'model' + evaluator.Y_optimization = D.train_tensors[1] + rval = evaluator.file_output( + D.train_tensors[1], + None, + D.test_tensors[1], + ) + + self.assertEqual(rval, (None, {})) + # These targets are not saved as Fit evaluator is not used to make an ensemble + self.assertEqual(self.backend_mock.save_targets_ensemble.call_count, 0) + self.assertEqual(self.backend_mock.save_numrun_to_dir.call_count, 1) + self.assertEqual(self.backend_mock.save_numrun_to_dir.call_args_list[-1][1].keys(), + {'seed', 'idx', 'budget', 'model', 'cv_model', + 'ensemble_predictions', 'valid_predictions', 'test_predictions'}) + self.assertIsNotNone(self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['model']) + self.assertIsNone(self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['cv_model']) + + # Check for not containing NaNs - that the models don't predict nonsense + # for unseen data + D.test_tensors[1][0] = np.NaN + rval = evaluator.file_output( + D.train_tensors[1], + None, + D.test_tensors[1], + ) + self.assertEqual( + rval, + ( + 1.0, + { + 'error': + 'Model predictions for test set contains NaNs.' + }, + ) + ) + + @unittest.mock.patch('autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline') + def test_predict_proba_binary_classification(self, mock): + D = get_binary_classification_datamanager(NoResamplingStrategyTypes.no_resampling) + self.backend_mock.load_datamanager.return_value = D + mock.predict_proba.side_effect = lambda y, batch_size=None: np.array( + [[0.1, 0.9]] * y.shape[0] + ) + mock.side_effect = lambda **kwargs: mock + mock.get_default_pipeline_options.return_value = {'budget_type': 'epochs', 'epochs': 10} + configuration = unittest.mock.Mock(spec=Configuration) + queue_ = multiprocessing.Queue() + + evaluator = TestEvaluator(self.backend_mock, queue_, configuration=configuration, metric=accuracy, budget=0) + + evaluator.fit_predict_and_loss() + Y_test_pred = self.backend_mock.save_numrun_to_dir.call_args_list[0][-1][ + 'ensemble_predictions'] + + for i in range(7): + self.assertEqual(0.9, Y_test_pred[i][1]) + + def test_get_results(self): + queue_ = multiprocessing.Queue() + for i in range(5): + queue_.put((i * 1, 1 - (i * 0.2), 0, "", StatusType.SUCCESS)) + result = read_queue(queue_) + self.assertEqual(len(result), 5) + self.assertEqual(result[0][0], 0) + self.assertAlmostEqual(result[0][1], 1.0) diff --git a/test/test_pipeline/components/setup/test_setup_preprocessing_node.py b/test/test_pipeline/components/setup/test_setup_preprocessing_node.py index 0fc0bb4c0..1ec858864 100644 --- a/test/test_pipeline/components/setup/test_setup_preprocessing_node.py +++ b/test/test_pipeline/components/setup/test_setup_preprocessing_node.py @@ -23,7 +23,7 @@ def setUp(self): dataset = mock.MagicMock() dataset.__len__.return_value = 1 datamanager = mock.MagicMock() - datamanager.get_dataset_for_training.return_value = (dataset, dataset) + datamanager.get_dataset.return_value = (dataset, dataset) datamanager.train_tensors = (np.random.random((10, 15)), np.random.random(10)) datamanager.test_tensors = None self.backend.load_datamanager.return_value = datamanager diff --git a/test/test_pipeline/components/training/test_training.py b/test/test_pipeline/components/training/test_training.py index 98bb748c4..6b277d36d 100644 --- a/test/test_pipeline/components/training/test_training.py +++ b/test/test_pipeline/components/training/test_training.py @@ -108,7 +108,7 @@ def test_fit_transform(self): dataset = unittest.mock.MagicMock() dataset.__len__.return_value = 1 datamanager = unittest.mock.MagicMock() - datamanager.get_dataset_for_training.return_value = (dataset, dataset) + datamanager.get_dataset.return_value = (dataset, dataset) fit_dictionary['backend'].load_datamanager.return_value = datamanager # Mock child classes requirements