Merge pull request #461 from sebp/sklearn-1-5

Add support for scikit-learn 1.5
sebp · Jun 8, 2024 · bceb53e · bceb53e
2 parents 10af97a + ce1f061
commit bceb53e
Show file tree

Hide file tree

Showing 11 changed files with 37 additions and 29 deletions.
diff --git a/README.rst b/README.rst
@@ -39,7 +39,7 @@ Requirements
 - numpy
 - osqp
 - pandas 1.0.5 or later
-- scikit-learn 1.4
+- scikit-learn 1.4 or 1.5
 - scipy
 - C/C++ compiler
 

diff --git a/ci/appveyor/py311.ps1 b/ci/appveyor/py311.ps1
@@ -1,4 +1,4 @@
 $env:CI_PYTHON_VERSION="3.11.*"
 $env:CI_PANDAS_VERSION="2.0.*"
 $env:CI_NUMPY_VERSION="1.25.*"
-$env:CI_SKLEARN_VERSION="1.4.*"
+$env:CI_SKLEARN_VERSION="1.5.*"
diff --git a/ci/appveyor/py312.ps1 b/ci/appveyor/py312.ps1
@@ -1,4 +1,4 @@
 $env:CI_PYTHON_VERSION="3.12.*"
 $env:CI_PANDAS_VERSION="2.2.*"
 $env:CI_NUMPY_VERSION="1.26.*"
-$env:CI_SKLEARN_VERSION="1.4.*"
+$env:CI_SKLEARN_VERSION="1.5.*"
diff --git a/ci/deps/py311.sh b/ci/deps/py311.sh
@@ -2,5 +2,5 @@
 export CI_PYTHON_VERSION='3.11.*'
 export CI_PANDAS_VERSION='2.0.*'
 export CI_NUMPY_VERSION='1.25.*'
-export CI_SKLEARN_VERSION='1.4.*'
+export CI_SKLEARN_VERSION='1.5.*'
 export CI_NO_SLOW=true
diff --git a/ci/deps/py312.sh b/ci/deps/py312.sh
@@ -2,5 +2,5 @@
 export CI_PYTHON_VERSION='3.12.*'
 export CI_PANDAS_VERSION='2.2.*'
 export CI_NUMPY_VERSION='1.26.*'
-export CI_SKLEARN_VERSION='1.4.*'
+export CI_SKLEARN_VERSION='1.5.*'
 export CI_NO_SLOW=false
diff --git a/doc/install.rst b/doc/install.rst
@@ -91,6 +91,6 @@ The current minimum dependencies to run scikit-survival are:
 - numpy
 - osqp
 - pandas 1.0.5 or later
-- scikit-learn 1.4
+- scikit-learn 1.4 or 1.5
 - scipy
 - C/C++ compiler
diff --git a/doc/user_guide/00-introduction.ipynb b/doc/user_guide/00-introduction.ipynb
@@ -1187,15 +1187,15 @@
        "</div>"
       ],
       "text/plain": [
-       "  param_select__k            params  split0_test_score  split1_test_score  \\\n",
-       "4               5  {'select__k': 5}           0.716093           0.719862   \n",
-       "3               4  {'select__k': 4}           0.697368           0.722332   \n",
-       "7               8  {'select__k': 8}           0.706478           0.723320   \n",
-       "5               6  {'select__k': 6}           0.704453           0.719368   \n",
-       "6               7  {'select__k': 7}           0.700405           0.719368   \n",
-       "1               2  {'select__k': 2}           0.699393           0.717885   \n",
-       "0               1  {'select__k': 1}           0.698887           0.707510   \n",
-       "2               3  {'select__k': 3}           0.708502           0.714427   \n",
+       "   param_select__k            params  split0_test_score  split1_test_score  \\\n",
+       "4                5  {'select__k': 5}           0.716093           0.719862   \n",
+       "3                4  {'select__k': 4}           0.697368           0.722332   \n",
+       "7                8  {'select__k': 8}           0.706478           0.723320   \n",
+       "5                6  {'select__k': 6}           0.704453           0.719368   \n",
+       "6                7  {'select__k': 7}           0.700405           0.719368   \n",
+       "1                2  {'select__k': 2}           0.699393           0.717885   \n",
+       "0                1  {'select__k': 1}           0.698887           0.707510   \n",
+       "2                3  {'select__k': 3}           0.708502           0.714427   \n",
        "\n",
        "   split2_test_score  mean_test_score  std_test_score  rank_test_score  \\\n",
        "4           0.716685         0.717547        0.001655                1   \n",

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,9 +4,9 @@ requires = [
     "setuptools-scm>=8",
     "packaging",
     # same as scikit-learn
-    "Cython>=3.0.8",
+    "Cython>=3.0.10",
     # building against numpy 2.x is compatible with numpy 1.x
-    "numpy>=2.0.0rc1",
+    "numpy>=2.0.0rc2",
 
     # scikit-learn requirements
     "scikit-learn~=1.4.0; python_version<='3.12'",
@@ -50,7 +50,7 @@ dependencies = [
     "osqp !=0.6.0,!=0.6.1",
     "pandas >=1.0.5",
     "scipy >=1.3.2",
-    "scikit-learn >=1.4.0,<1.5",
+    "scikit-learn >=1.4.0,<1.6",
 ]
 dynamic = ["version"]
 
@@ -138,6 +138,8 @@ filterwarnings = [
     "ignore:np\\.find_common_type is deprecated.  Please use `np\\.result_type` or `np\\.promote_types`:DeprecationWarning",
     # deprecated since NumPy 2.0
     "ignore:`trapz` is deprecated\\. Use `trapezoid` instead.*:DeprecationWarning",
+    # deprecated since scikit-learn 1.5
+    "ignore:'multi_class' was deprecated in version 1\\.5 and will be removed in 1\\.7.*:FutureWarning",
 ]
 
 [tool.coverage.run]

diff --git a/sksurv/kernels/clinical.py b/sksurv/kernels/clinical.py
@@ -174,7 +174,7 @@ def _prepare_by_column_dtype(self, X):
         nominal_columns = []
         numeric_ranges = []
 
-        fit_data = np.empty_like(X)
+        fit_data = np.empty(X.shape, dtype=np.float64)
 
         for i, dt in enumerate(X.dtypes):
             col = X.iloc[:, i]
@@ -310,7 +310,10 @@ def pairwise_kernel(self, X, Y):
         """
         check_is_fitted(self, "X_fit_")
         if X.shape[0] != Y.shape[0]:
-            raise ValueError("X and Y have different number of features")
+            raise ValueError(
+                f"Incompatible dimension for X and Y matrices: X.shape[0] == {X.shape[0]} "
+                f"while Y.shape[0] == {Y.shape[0]}"
+            )
 
         val = pairwise_continuous_ordinal_kernel(
             X[self._numeric_columns], Y[self._numeric_columns], self._numeric_ranges

diff --git a/tests/test_clinical_kernel.py b/tests/test_clinical_kernel.py
@@ -228,7 +228,10 @@ def test_pairwise_x_and_y_error_shape(make_data):
         t = ClinicalKernelTransform()
         t.fit(data)
 
-        with pytest.raises(ValueError, match="X and Y have different number of features"):
+        with pytest.raises(
+            ValueError,
+            match=r"Incompatible dimension for X and Y matrices: X\.shape\[0\] == 4 while Y\.shape\[0\] == 2",
+        ):
             t.pairwise_kernel(data.iloc[0, :], data.iloc[1, :2])
 
     @staticmethod
@@ -269,9 +272,9 @@ def test_pairwise_feature_mismatch(make_data):
 
         with pytest.raises(
             ValueError,
-            match=r"Incompatible dimension for X and Y matrices: X.shape\[1\] == 4 while Y.shape\[1\] == 17",
+            match=r"Incompatible dimension for X and Y matrices: X\.shape\[[0-1]\] == 4 while Y\.shape\[[0-1]\] == 17",
         ):
-            pairwise_kernels(t.X_fit_, np.zeros((2, 17), dtype=float), metric=t.pairwise_kernel, n_jobs=1)
+            pairwise_kernels(t.X_fit_, np.zeros((5, 17), dtype=float), metric=t.pairwise_kernel, n_jobs=1)
 
     @staticmethod
     def test_prepare(make_data):

diff --git a/tests/test_stacking.py b/tests/test_stacking.py
@@ -42,13 +42,13 @@ def dummy_data():
 
 @pytest.fixture()
 def iris_data_with_estimator():
-    def _make_estimator(**params):
+    def _make_estimator():
         data = load_iris()
         x = data["data"]
         y = data["target"]
 
         meta = Stacking(
-            LogisticRegression(**params),
+            LogisticRegression(solver="lbfgs", multi_class="multinomial"),
             [
                 ("tree", DecisionTreeClassifier(max_depth=1, random_state=0)),
                 ("svm", SVC(probability=True, gamma="auto", random_state=0)),
@@ -104,7 +104,7 @@ def test_names_not_unique(dummy_data):
 
     @staticmethod
     def test_fit(iris_data_with_estimator):
-        x, y, meta = iris_data_with_estimator(solver="liblinear", multi_class="ovr")
+        x, y, meta = iris_data_with_estimator()
         assert 2 == len(meta)
         meta.fit(x, y)
 
@@ -115,7 +115,7 @@ def test_fit(iris_data_with_estimator):
 
     @staticmethod
     def test_fit_sample_weights(iris_data_with_estimator):
-        x, y, meta = iris_data_with_estimator(solver="liblinear", multi_class="ovr")
+        x, y, meta = iris_data_with_estimator()
 
         sample_weight = np.random.RandomState(0).uniform(size=x.shape[0])
         meta.fit(x, y, tree__sample_weight=sample_weight, svm__sample_weight=sample_weight)
@@ -147,7 +147,7 @@ def test_set_params():
 
     @staticmethod
     def test_predict(iris_data_with_estimator):
-        x, y, meta = iris_data_with_estimator(multi_class="multinomial", solver="lbfgs")
+        x, y, meta = iris_data_with_estimator()
         assert 2 == len(meta)
         meta.fit(x, y)
         p = meta.predict(x)
@@ -158,7 +158,7 @@ def test_predict(iris_data_with_estimator):
     @staticmethod
     @pytest.mark.parametrize("method", ["predict_proba", "predict_log_proba"])
     def test_predict_proba(iris_data_with_estimator, method):
-        x, y, meta = iris_data_with_estimator(multi_class="multinomial", solver="lbfgs")
+        x, y, meta = iris_data_with_estimator()
         meta.fit(x, y)
         p = getattr(meta, method)(x)