Update scikit-learn dependency (#115)

kiudee · Jul 19, 2023 · 8f9b9c3 · 8f9b9c3
1 parent b806f49
commit 8f9b9c3
Show file tree

Hide file tree

Showing 9 changed files with 2,266 additions and 1,914 deletions.
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -12,9 +12,9 @@ jobs:
               with:
                   python-version: '3.9'
                   architecture: x64
-            - run: pip install nox==2021.6.12
-            - run: pip install nox-poetry==0.8.6
-            - run: pip install poetry==1.1.6
+            - run: pip install nox==2023.4.22
+            - run: pip install nox-poetry==1.0.3
+            - run: pip install poetry==1.5. 
             - run: nox
             - run: poetry build
             - run: poetry publish --username=__token__ --password=${{ secrets.PYPI_TOKEN }}
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -10,7 +10,6 @@ jobs:
             matrix:
                 include:
                     - { python-version: 3.9, os: ubuntu-latest, session: "pre-commit" }
-                    - { python-version: 3.7, os: ubuntu-latest, session: "tests" }
                     - { python-version: 3.8, os: ubuntu-latest, session: "tests" }
                     - { python-version: 3.9, os: ubuntu-latest, session: "tests" }
                     - { python-version: 3.8, os: macos-latest, session: "tests" }
@@ -27,9 +26,9 @@ jobs:
               with:
                   python-version: ${{ matrix.python-version }}
                   architecture: x64
-            - run: pip install nox==2021.6.12
-            - run: pip install nox-poetry==0.8.6
-            - run: pip install poetry==1.1.4
+            - run: pip install nox==2023.4.22
+            - run: pip install nox-poetry==1.0.3
+            - run: pip install poetry==1.5.1
             - name: Compute pre-commit cache key
               if: matrix.session == 'pre-commit'
               id: pre-commit-cache

diff --git a/bask/bayesgpr.py b/bask/bayesgpr.py
@@ -4,7 +4,6 @@
 import emcee as mc
 import numpy as np
 import scipy.stats as st
-import sklearn
 from scipy.linalg import cho_solve, cholesky, solve_triangular
 from sklearn.utils import check_random_state
 from skopt.learning import GaussianProcessRegressor
@@ -460,18 +459,12 @@ def sample(
         if X is not None:
             if self.normalize_y:
                 self._y_train_mean = np.mean(y, axis=0)
-                if int(sklearn.__version__[2:4]) >= 23:
-                    self._y_train_std = np.std(y, axis=0)
+                self._y_train_std = np.std(y, axis=0)
             else:
                 self._y_train_mean = np.zeros(1)
-                if int(sklearn.__version__[2:4]) >= 23:
-                    self._y_train_std = 1
-            if int(sklearn.__version__[2:4]) >= 23:
-                self.y_train_std_ = self._y_train_std
-                self.y_train_mean_ = self._y_train_mean
-            else:
-                self.y_train_mean_ = self._y_train_mean
-                self.y_train_std_ = 1
+                self._y_train_std = 1
+            self.y_train_std_ = self._y_train_std
+            self.y_train_mean_ = self._y_train_mean
             y = (y - self.y_train_mean_) / self.y_train_std_
 
             if noise_vector is not None:
@@ -590,13 +583,7 @@ def fit(
 
         """
         self.kernel = self._kernel
-        # In sklearn >= 23 the normalization includes scaling the output by the
-        # standard deviation. We need to scale the noise_vector accordingly here:
-        if (
-            int(sklearn.__version__[2:4]) >= 23
-            and self.normalize_y
-            and noise_vector is not None
-        ):
+        if self.normalize_y and noise_vector is not None:
             y_std = np.std(y, axis=0)
             noise_vector = np.array(noise_vector) / np.power(y_std, 2)
         self._apply_noise_vector(len(y), noise_vector)

diff --git a/bask/searchcv.py b/bask/searchcv.py
@@ -1,15 +1,6 @@
-try:
-    from collections.abc import Iterable
-except ImportError:
-    from collections import Iterable
-
-import logging
-
 import numpy as np
-from scipy.stats import rankdata
-from sklearn.utils.validation import check_is_fitted
 from skopt import BayesSearchCV as BayesSearchCVSK
-from skopt.utils import create_result, dimensions_aslist, expected_minimum, point_asdict
+from skopt.utils import dimensions_aslist, point_asdict
 
 from bask.optimizer import Optimizer
 
@@ -324,86 +315,22 @@ def _make_optimizer(self, params_space):
 
         return optimizer
 
-    @property
-    def best_params_(self):
-        check_is_fitted(self, "cv_results_")
-        if self.return_policy == "best_setting" or len(self.optimizers_) > 1:
-            if len(self.optimizers_) > 1:
-                logging.warning(
-                    "Return policy 'best_mean' is incompatible with multiple search"
-                    "spaces. Reverting to 'best_setting'."
-                )
-            return self.cv_results_["params"][self.best_index_]
-        if self.return_policy == "best_mean":
-            random_state = self.optimizer_kwargs_["random_state"]
-            # We construct a result object manually here, since in skopt versions up to
-            # 0.7.4 they were not saved yet:
-            opt = self.optimizers_[0]
-            result_object = create_result(
-                opt.Xi, opt.yi, space=opt.space, rng=random_state, models=[opt.gp]
-            )
-            point, _ = expected_minimum(
-                res=result_object, n_random_starts=100, random_state=random_state,
-            )
-            dict = point_asdict(self.search_spaces, point)
-            return dict
-
-    def _step(self, X, y, search_space, optimizer, groups=None, n_points=1):
+    def _step(self, search_space, optimizer, evaluate_candidates, n_points=1):
         """Generate n_jobs parameters and evaluate them in parallel."""
 
         # get parameter values to evaluate
-        # TODO: Until n_points is supported, we will wrap the return value in a list
         params = [optimizer.ask(n_points=n_points)]
 
         # convert parameters to python native types
-        # in case we have any Iterable parameters, we want to
-        # stop numpy from coercing them into an np.array
-        def try_convert_to_np(item):
-            if isinstance(item, Iterable):
-                return item
-            try:
-                return np.array(item).item()
-            except ValueError:
-                return item
-
-        params = [[try_convert_to_np(v) for v in p] for p in params]
+        params = [[np.array(v).item() for v in p] for p in params]
 
         # make lists into dictionaries
         params_dict = [point_asdict(search_space, p) for p in params]
 
-        # HACK: self.cv_results_ is reset at every call to _fit, keep current
-        all_cv_results = self.cv_results_
-
-        # HACK: this adds compatibility with different versions of sklearn
-        refit = self.refit
-        self.refit = False
-        self._fit(X, y, groups, params_dict)
-        self.refit = refit
-
-        # merge existing and new cv_results_
-        for k in self.cv_results_:
-            all_cv_results[k].extend(self.cv_results_[k])
-
-        all_cv_results["rank_test_score"] = list(
-            np.asarray(
-                rankdata(-np.array(all_cv_results["mean_test_score"]), method="min"),
-                dtype=np.int32,
-            )
-        )
-        if self.return_train_score:
-            all_cv_results["rank_train_score"] = list(
-                np.asarray(
-                    rankdata(
-                        -np.array(all_cv_results["mean_train_score"]), method="min"
-                    ),
-                    dtype=np.int32,
-                )
-            )
-        self.cv_results_ = all_cv_results
-        self.best_index_ = np.argmax(self.cv_results_["mean_test_score"])
-
-        # feed the point and objective back into optimizer
-        local_results = self.cv_results_["mean_test_score"][-len(params) :]
+        all_results = evaluate_candidates(params_dict)
+        # Feed the point and objective value back into optimizer
+        # Optimizer minimizes objective, hence provide negative score
+        local_results = all_results["mean_test_score"][-len(params) :]
 
         # optimizer minimizes objective, hence provide negative score
         return optimizer.tell(

diff --git a/noxfile.py b/noxfile.py
@@ -7,7 +7,7 @@
 
 locations = "bask", "noxfile.py"
 nox.options.sessions = ("pre-commit", "tests")
-python_versions = ["3.7", "3.8", "3.9"]
+python_versions = ["3.8", "3.9"]
 
 
 def activate_virtualenv_in_precommit_hooks(session):