Skip to content

Commit

Permalink
Update scikit-learn dependency (#115)
Browse files Browse the repository at this point in the history
  • Loading branch information
kiudee authored Jul 19, 2023
1 parent b806f49 commit 8f9b9c3
Show file tree
Hide file tree
Showing 9 changed files with 2,266 additions and 1,914 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ jobs:
with:
python-version: '3.9'
architecture: x64
- run: pip install nox==2021.6.12
- run: pip install nox-poetry==0.8.6
- run: pip install poetry==1.1.6
- run: pip install nox==2023.4.22
- run: pip install nox-poetry==1.0.3
- run: pip install poetry==1.5.
- run: nox
- run: poetry build
- run: poetry publish --username=__token__ --password=${{ secrets.PYPI_TOKEN }}
7 changes: 3 additions & 4 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ jobs:
matrix:
include:
- { python-version: 3.9, os: ubuntu-latest, session: "pre-commit" }
- { python-version: 3.7, os: ubuntu-latest, session: "tests" }
- { python-version: 3.8, os: ubuntu-latest, session: "tests" }
- { python-version: 3.9, os: ubuntu-latest, session: "tests" }
- { python-version: 3.8, os: macos-latest, session: "tests" }
Expand All @@ -27,9 +26,9 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
architecture: x64
- run: pip install nox==2021.6.12
- run: pip install nox-poetry==0.8.6
- run: pip install poetry==1.1.4
- run: pip install nox==2023.4.22
- run: pip install nox-poetry==1.0.3
- run: pip install poetry==1.5.1
- name: Compute pre-commit cache key
if: matrix.session == 'pre-commit'
id: pre-commit-cache
Expand Down
23 changes: 5 additions & 18 deletions bask/bayesgpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import emcee as mc
import numpy as np
import scipy.stats as st
import sklearn
from scipy.linalg import cho_solve, cholesky, solve_triangular
from sklearn.utils import check_random_state
from skopt.learning import GaussianProcessRegressor
Expand Down Expand Up @@ -460,18 +459,12 @@ def sample(
if X is not None:
if self.normalize_y:
self._y_train_mean = np.mean(y, axis=0)
if int(sklearn.__version__[2:4]) >= 23:
self._y_train_std = np.std(y, axis=0)
self._y_train_std = np.std(y, axis=0)
else:
self._y_train_mean = np.zeros(1)
if int(sklearn.__version__[2:4]) >= 23:
self._y_train_std = 1
if int(sklearn.__version__[2:4]) >= 23:
self.y_train_std_ = self._y_train_std
self.y_train_mean_ = self._y_train_mean
else:
self.y_train_mean_ = self._y_train_mean
self.y_train_std_ = 1
self._y_train_std = 1
self.y_train_std_ = self._y_train_std
self.y_train_mean_ = self._y_train_mean
y = (y - self.y_train_mean_) / self.y_train_std_

if noise_vector is not None:
Expand Down Expand Up @@ -590,13 +583,7 @@ def fit(
"""
self.kernel = self._kernel
# In sklearn >= 23 the normalization includes scaling the output by the
# standard deviation. We need to scale the noise_vector accordingly here:
if (
int(sklearn.__version__[2:4]) >= 23
and self.normalize_y
and noise_vector is not None
):
if self.normalize_y and noise_vector is not None:
y_std = np.std(y, axis=0)
noise_vector = np.array(noise_vector) / np.power(y_std, 2)
self._apply_noise_vector(len(y), noise_vector)
Expand Down
87 changes: 7 additions & 80 deletions bask/searchcv.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,6 @@
try:
from collections.abc import Iterable
except ImportError:
from collections import Iterable

import logging

import numpy as np
from scipy.stats import rankdata
from sklearn.utils.validation import check_is_fitted
from skopt import BayesSearchCV as BayesSearchCVSK
from skopt.utils import create_result, dimensions_aslist, expected_minimum, point_asdict
from skopt.utils import dimensions_aslist, point_asdict

from bask.optimizer import Optimizer

Expand Down Expand Up @@ -324,86 +315,22 @@ def _make_optimizer(self, params_space):

return optimizer

@property
def best_params_(self):
check_is_fitted(self, "cv_results_")
if self.return_policy == "best_setting" or len(self.optimizers_) > 1:
if len(self.optimizers_) > 1:
logging.warning(
"Return policy 'best_mean' is incompatible with multiple search"
"spaces. Reverting to 'best_setting'."
)
return self.cv_results_["params"][self.best_index_]
if self.return_policy == "best_mean":
random_state = self.optimizer_kwargs_["random_state"]
# We construct a result object manually here, since in skopt versions up to
# 0.7.4 they were not saved yet:
opt = self.optimizers_[0]
result_object = create_result(
opt.Xi, opt.yi, space=opt.space, rng=random_state, models=[opt.gp]
)
point, _ = expected_minimum(
res=result_object, n_random_starts=100, random_state=random_state,
)
dict = point_asdict(self.search_spaces, point)
return dict

def _step(self, X, y, search_space, optimizer, groups=None, n_points=1):
def _step(self, search_space, optimizer, evaluate_candidates, n_points=1):
"""Generate n_jobs parameters and evaluate them in parallel."""

# get parameter values to evaluate
# TODO: Until n_points is supported, we will wrap the return value in a list
params = [optimizer.ask(n_points=n_points)]

# convert parameters to python native types
# in case we have any Iterable parameters, we want to
# stop numpy from coercing them into an np.array
def try_convert_to_np(item):
if isinstance(item, Iterable):
return item
try:
return np.array(item).item()
except ValueError:
return item

params = [[try_convert_to_np(v) for v in p] for p in params]
params = [[np.array(v).item() for v in p] for p in params]

# make lists into dictionaries
params_dict = [point_asdict(search_space, p) for p in params]

# HACK: self.cv_results_ is reset at every call to _fit, keep current
all_cv_results = self.cv_results_

# HACK: this adds compatibility with different versions of sklearn
refit = self.refit
self.refit = False
self._fit(X, y, groups, params_dict)
self.refit = refit

# merge existing and new cv_results_
for k in self.cv_results_:
all_cv_results[k].extend(self.cv_results_[k])

all_cv_results["rank_test_score"] = list(
np.asarray(
rankdata(-np.array(all_cv_results["mean_test_score"]), method="min"),
dtype=np.int32,
)
)
if self.return_train_score:
all_cv_results["rank_train_score"] = list(
np.asarray(
rankdata(
-np.array(all_cv_results["mean_train_score"]), method="min"
),
dtype=np.int32,
)
)
self.cv_results_ = all_cv_results
self.best_index_ = np.argmax(self.cv_results_["mean_test_score"])

# feed the point and objective back into optimizer
local_results = self.cv_results_["mean_test_score"][-len(params) :]
all_results = evaluate_candidates(params_dict)
# Feed the point and objective value back into optimizer
# Optimizer minimizes objective, hence provide negative score
local_results = all_results["mean_test_score"][-len(params) :]

# optimizer minimizes objective, hence provide negative score
return optimizer.tell(
Expand Down
2 changes: 1 addition & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

locations = "bask", "noxfile.py"
nox.options.sessions = ("pre-commit", "tests")
python_versions = ["3.7", "3.8", "3.9"]
python_versions = ["3.8", "3.9"]


def activate_virtualenv_in_precommit_hooks(session):
Expand Down
Loading

0 comments on commit 8f9b9c3

Please sign in to comment.