[FIX] Tests after rebase of reg_cocktails (#359)

* update requirements * update requirements * resolve remaining conflicts and fix flake and mypy * Fix remaining tests and examples * fix failing checks * fix flake
automl · Mar 9, 2022 · 9b350aa · 9b350aa
1 parent 3e50b27
commit 9b350aa
Show file tree

Hide file tree

Showing 38 changed files with 291 additions and 1,016 deletions.
diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
@@ -941,18 +941,15 @@ def run_traditional_ml(
                 learning algorithm runs over the time limit.
         """
         assert self._logger is not None  # for mypy compliancy
-        if STRING_TO_TASK_TYPES[self.task_type] in REGRESSION_TASKS:
-            self._logger.warning("Traditional Pipeline is not enabled for regression. Skipping...")
-        else:
-            traditional_task_name = 'runTraditional'
-            self._stopwatch.start_task(traditional_task_name)
-            elapsed_time = self._stopwatch.wall_elapsed(current_task_name)
-            time_for_traditional = int(runtime_limit - elapsed_time)
-            self._do_traditional_prediction(
-                func_eval_time_limit_secs=func_eval_time_limit_secs,
-                time_left=time_for_traditional,
-            )
-            self._stopwatch.stop_task(traditional_task_name)
+        traditional_task_name = 'runTraditional'
+        self._stopwatch.start_task(traditional_task_name)
+        elapsed_time = self._stopwatch.wall_elapsed(current_task_name)
+        time_for_traditional = int(runtime_limit - elapsed_time)
+        self._do_traditional_prediction(
+            func_eval_time_limit_secs=func_eval_time_limit_secs,
+            time_left=time_for_traditional,
+        )
+        self._stopwatch.stop_task(traditional_task_name)
 
     def _search(
         self,
@@ -1322,22 +1319,7 @@ def _search(
         self._logger.info("Starting Shutdown")
 
         if proc_ensemble is not None:
-            self._results_manager.ensemble_performance_history = list(proc_ensemble.history)
-
-            if len(proc_ensemble.futures) > 0:
-                # Also add ensemble runs that did not finish within smac time
-                # and add them into the ensemble history
-                self._logger.info("Ensemble script still running, waiting for it to finish.")
-                result = proc_ensemble.futures.pop().result()
-                if result:
-                    ensemble_history, _, _, _ = result
-                    self._results_manager.ensemble_performance_history.extend(ensemble_history)
-                self._logger.info("Ensemble script finished, continue shutdown.")
-
-            # save the ensemble performance history file
-            if len(self.ensemble_performance_history) > 0:
-                pd.DataFrame(self.ensemble_performance_history).to_json(
-                    os.path.join(self._backend.internals_directory, 'ensemble_history.json'))
+            self._collect_results_ensemble(proc_ensemble)
 
         if load_models:
             self._logger.info("Loading models...")
@@ -1605,7 +1587,7 @@ def fit_pipeline(
             exclude=self.exclude_components,
             search_space_updates=self.search_space_updates)
         dataset_properties = dataset.get_dataset_properties(dataset_requirements)
-        self._backend.replace_datamanager(dataset)
+        self._backend.save_datamanager(dataset)
 
         if self._logger is None:
             self._logger = self._get_logger(dataset.dataset_name)
@@ -1796,7 +1778,7 @@ def fit_ensemble(
         ensemble_fit_task_name = 'EnsembleFit'
         self._stopwatch.start_task(ensemble_fit_task_name)
         if enable_traditional_pipeline:
-            if func_eval_time_limit_secs is None or func_eval_time_limit_secs > time_for_task:
+            if func_eval_time_limit_secs > time_for_task:
                 self._logger.warning(
                     'Time limit for a single run is higher than total time '
                     'limit. Capping the limit for a single run to the total '
@@ -1837,12 +1819,8 @@ def fit_ensemble(
         )
 
         manager.build_ensemble(self._dask_client)
-        future = manager.futures.pop()
-        result = future.result()
-        if result is None:
-            raise ValueError("Errors occurred while building the ensemble - please"
-                             " check the log file and command line output for error messages.")
-        self.ensemble_performance_history, _, _, _ = result
+        if manager is not None:
+            self._collect_results_ensemble(manager)
 
         if load_models:
             self._load_models()
@@ -1920,6 +1898,31 @@ def _init_ensemble_builder(
 
         return proc_ensemble
 
+    def _collect_results_ensemble(
+        self,
+        manager: EnsembleBuilderManager
+    ) -> None:
+
+        if self._logger is None:
+            raise ValueError("logger should be initialized to fit ensemble")
+
+        self._results_manager.ensemble_performance_history = list(manager.history)
+
+        if len(manager.futures) > 0:
+            # Also add ensemble runs that did not finish within smac time
+            # and add them into the ensemble history
+            self._logger.info("Ensemble script still running, waiting for it to finish.")
+            result = manager.futures.pop().result()
+            if result:
+                ensemble_history, _, _, _ = result
+                self._results_manager.ensemble_performance_history.extend(ensemble_history)
+            self._logger.info("Ensemble script finished, continue shutdown.")
+
+        # save the ensemble performance history file
+        if len(self.ensemble_performance_history) > 0:
+            pd.DataFrame(self.ensemble_performance_history).to_json(
+                os.path.join(self._backend.internals_directory, 'ensemble_history.json'))
+
     def predict(
         self,
         X_test: np.ndarray,

diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py
@@ -18,6 +18,7 @@
 from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
 from autoPyTorch.datasets.resampling_strategy import (
     HoldoutValTypes,
+    CrossValTypes,
     ResamplingStrategies,
 )
 from autoPyTorch.datasets.tabular_dataset import TabularDataset
@@ -437,6 +438,7 @@ def search(
 
         if self.dataset is None:
             raise ValueError("`dataset` in {} must be initialized, but got None".format(self.__class__.__name__))
+
         return self._search(
             dataset=self.dataset,
             optimize_metric=optimize_metric,
@@ -476,23 +478,23 @@ def predict(
             raise ValueError("predict() is only supported after calling search. Kindly call first "
                              "the estimator search() method.")
 
-        X_test = self.input_validator.feature_validator.transform(X_test)
+        X_test = self.InputValidator.feature_validator.transform(X_test)
         predicted_probabilities = super().predict(X_test, batch_size=batch_size,
                                                   n_jobs=n_jobs)
 
-        if self.input_validator.target_validator.is_single_column_target():
+        if self.InputValidator.target_validator.is_single_column_target():
             predicted_indexes = np.argmax(predicted_probabilities, axis=1)
         else:
             predicted_indexes = (predicted_probabilities > 0.5).astype(int)
 
         # Allow to predict in the original domain -- that is, the user is not interested
         # in our encoded values
-        return self.input_validator.target_validator.inverse_transform(predicted_indexes)
+        return self.InputValidator.target_validator.inverse_transform(predicted_indexes)
 
     def predict_proba(self,
                       X_test: Union[np.ndarray, pd.DataFrame, List],
                       batch_size: Optional[int] = None, n_jobs: int = 1) -> np.ndarray:
-        if self.input_validator is None or not self.input_validator._is_fitted:
+        if self.InputValidator is None or not self.InputValidator._is_fitted:
             raise ValueError("predict() is only supported after calling search. Kindly call first "
                              "the estimator search() method.")
         X_test = self.input_validator.feature_validator.transform(X_test)

diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py
@@ -18,6 +18,7 @@
 from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
 from autoPyTorch.datasets.resampling_strategy import (
     HoldoutValTypes,
+    CrossValTypes,
     ResamplingStrategies,
 )
 from autoPyTorch.datasets.tabular_dataset import TabularDataset
@@ -437,6 +438,7 @@ def search(
 
         if self.dataset is None:
             raise ValueError("`dataset` in {} must be initialized, but got None".format(self.__class__.__name__))
+
         return self._search(
             dataset=self.dataset,
             optimize_metric=optimize_metric,
@@ -462,14 +464,14 @@ def predict(
             batch_size: Optional[int] = None,
             n_jobs: int = 1
     ) -> np.ndarray:
-        if self.input_validator is None or not self.input_validator._is_fitted:
+        if self.InputValidator is None or not self.InputValidator._is_fitted:
             raise ValueError("predict() is only supported after calling search. Kindly call first "
                              "the estimator search() method.")
 
-        X_test = self.input_validator.feature_validator.transform(X_test)
+        X_test = self.InputValidator.feature_validator.transform(X_test)
         predicted_values = super().predict(X_test, batch_size=batch_size,
                                            n_jobs=n_jobs)
 
         # Allow to predict in the original domain -- that is, the user is not interested
         # in our encoded values
-        return self.input_validator.target_validator.inverse_transform(predicted_values)
+        return self.InputValidator.target_validator.inverse_transform(predicted_values)
diff --git a/autoPyTorch/data/base_target_validator.py b/autoPyTorch/data/base_target_validator.py
@@ -86,7 +86,6 @@ def fit(
                                      np.shape(y_test)
                                  ))
             if isinstance(y_train, pd.DataFrame):
-                y_train = cast(pd.DataFrame, y_train)
                 y_test = cast(pd.DataFrame, y_test)
                 if y_train.columns.tolist() != y_test.columns.tolist():
                     raise ValueError(

diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py
@@ -2,6 +2,7 @@
 from logging import Logger
 from typing import Dict, List, Optional, Tuple, Union, cast
 
+
 import numpy as np
 
 import pandas as pd
@@ -270,7 +271,7 @@ def transform(
         if isinstance(X, np.ndarray):
             X = self.numpy_to_pandas(X)
 
-        if hasattr(X, "iloc") and not issparse(X):
+        if ispandas(X) and not issparse(X):
             X = cast(pd.DataFrame, X)
 
         # Check the data here so we catch problems on new test data
@@ -400,9 +401,6 @@ def _get_columns_info(
                 Type of each column numerical/categorical
         """
 
-        if len(self.transformed_columns) > 0 and self.feat_type is not None:
-            return self.transformed_columns, self.feat_type
-
         # Register if a column needs encoding
         numerical_columns = []
         categorical_columns = []

diff --git a/autoPyTorch/data/tabular_target_validator.py b/autoPyTorch/data/tabular_target_validator.py
@@ -1,4 +1,4 @@
-from typing import List, Optional, Union, cast
+from typing import List, Optional, cast
 
 import numpy as np
 

diff --git a/autoPyTorch/evaluation/fit_evaluator.py b/autoPyTorch/evaluation/fit_evaluator.py
@@ -10,13 +10,13 @@
 
 from smac.tae import StatusType
 
+from autoPyTorch.automl_common.common.utils.backend import Backend
 from autoPyTorch.datasets.resampling_strategy import NoResamplingStrategyTypes
 from autoPyTorch.evaluation.abstract_evaluator import (
     AbstractEvaluator,
     fit_and_suppress_warnings
 )
 from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
-from autoPyTorch.utils.backend import Backend
 from autoPyTorch.utils.common import subsampler
 from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
 

diff --git a/autoPyTorch/optimizer/smbo.py b/autoPyTorch/optimizer/smbo.py
@@ -104,7 +104,7 @@ def __init__(self,
                  resampling_strategy_args: Optional[Dict[str, Any]] = None,
                  include: Optional[Dict[str, Any]] = None,
                  exclude: Optional[Dict[str, Any]] = None,
-                 disable_file_output: List = [],
+                 disable_file_output: Union[bool, List[str]] = False,
                  smac_scenario_args: Optional[Dict[str, Any]] = None,
                  get_smac_object_callback: Optional[Callable] = None,
                  all_supported_metrics: bool = True,

diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
@@ -83,11 +83,7 @@ class ShakeDropFunction(Function):
         Github URL: https://github.com/owruby/shake-drop_pytorch/blob/master/models/shakedrop.py
     """
     @staticmethod
-<<<<<<< HEAD
     def forward(ctx: Any,
-=======
-    def forward(ctx: typing.Any,
->>>>>>> Bug fixes (#249)
                 x: torch.Tensor,
                 alpha: torch.Tensor,
                 beta: torch.Tensor,
@@ -114,31 +110,20 @@ def backward(ctx: Any,
 shake_drop = ShakeDropFunction.apply
 
 
-<<<<<<< HEAD
-def shake_get_alpha_beta(is_training: bool, is_cuda: bool
-                         ) -> Tuple[torch.Tensor, torch.Tensor]:
-    """
-    The methods used in this function have been introduced in 'ShakeShake Regularisation'
-    Currently, this function supports `shake-shake`.
-=======
 def shake_get_alpha_beta(
     is_training: bool,
     is_cuda: bool,
     method: str
-) -> typing.Tuple[torch.Tensor, torch.Tensor]:
+) -> Tuple[torch.Tensor, torch.Tensor]:
     """
     The methods used in this function have been introduced in 'ShakeShake Regularisation'
     Each method name is available in the referred paper.
     Currently, this function supports `even-even`, `shake-even`, `shake-shake` and `M3`.
->>>>>>> Bug fixes (#249)
 
     Args:
         is_training (bool): Whether the computation for the training
         is_cuda (bool): Whether the tensor is on CUDA
-<<<<<<< HEAD
-=======
         method (str): The shake method either `even-even`, `shake-even`, `shake-shake` or `M3`
->>>>>>> Bug fixes (#249)
 
     Returns:
         alpha, beta (Tuple[float, float]):
@@ -150,14 +135,8 @@ def shake_get_alpha_beta(
         Author: Xavier Gastaldi
         URL: https://arxiv.org/abs/1705.07485
 
-<<<<<<< HEAD
-    Note:
-        The names have been taken from the paper as well.
-        Currently, this function supports `shake-shake`.
-=======
     The names have been taken from the paper as well.
     Currently, this function supports `even-even`, `shake-even`, `shake-shake` and `M3`.
->>>>>>> Bug fixes (#249)
     """
     if not is_training:
         result = (torch.FloatTensor([0.5]), torch.FloatTensor([0.5]))
@@ -187,27 +166,15 @@ def shake_get_alpha_beta(
 
 
 def shake_drop_get_bl(
-<<<<<<< HEAD
-        block_index: int,
-        min_prob_no_shake: float,
-        num_blocks: int,
-        is_training: bool,
-        is_cuda: bool
-=======
     block_index: int,
     min_prob_no_shake: float,
     num_blocks: int,
     is_training: bool,
     is_cuda: bool
->>>>>>> Bug fixes (#249)
 ) -> torch.Tensor:
     """
     The sampling of Bernoulli random variable
     based on Eq. (4) in the paper
-<<<<<<< HEAD
-
-=======
->>>>>>> Bug fixes (#249)
     Args:
         block_index (int): The index of the block from the input layer
         min_prob_no_shake (float): The initial shake probability
@@ -217,28 +184,16 @@ def shake_drop_get_bl(
 
     Returns:
         bl (torch.Tensor): a Bernoulli random variable in {0, 1}
-<<<<<<< HEAD
-
-=======
->>>>>>> Bug fixes (#249)
     Reference:
         ShakeDrop Regularization for Deep Residual Learning
         Yoshihiro Yamada et. al. (2020)
         paper: https://arxiv.org/pdf/1802.02375.pdf
         implementation: https://github.com/imenurok/ShakeDrop
     """
-<<<<<<< HEAD
-
-    pl = 1 - ((block_index + 1) / num_blocks) * (1 - min_prob_no_shake)
-
-    if is_training:
-        # Move to torch.rand(1) for reproducibility
-=======
     pl = 1 - ((block_index + 1) / num_blocks) * (1 - min_prob_no_shake)
 
     if is_training:
         # Move to torch.randn(1) for reproducibility
->>>>>>> Bug fixes (#249)
         bl = torch.as_tensor(1.0) if torch.rand(1) <= pl else torch.as_tensor(0.0)
     else:
         bl = torch.as_tensor(pl)