From 102de2d90a129d33c78f58597d590df1f0da3ad3 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 5 May 2024 22:38:41 +0200 Subject: [PATCH] feat: regularization for decision trees and random forests (#730) Closes #700 ### Summary of Changes Add regularization options for decision trees and random forests: * maximum depth * minimum number of samples in leaves --- .../classification/_decision_tree.py | 70 ++++++++++++++-- .../classification/_random_forest.py | 81 ++++++++++++++----- .../ml/classical/regression/_decision_tree.py | 70 ++++++++++++++-- .../ml/classical/regression/_random_forest.py | 76 +++++++++++++---- .../classification/test_decision_tree.py | 49 +++++++++++ .../classification/test_random_forest.py | 38 +++++++++ .../regression/test_decision_tree.py | 49 +++++++++++ .../regression/test_random_forest.py | 38 +++++++++ 8 files changed, 420 insertions(+), 51 deletions(-) create mode 100644 tests/safeds/ml/classical/classification/test_decision_tree.py create mode 100644 tests/safeds/ml/classical/regression/test_decision_tree.py diff --git a/src/safeds/ml/classical/classification/_decision_tree.py b/src/safeds/ml/classical/classification/_decision_tree.py index 0bb5fe014..ca7cd8d5b 100644 --- a/src/safeds/ml/classical/classification/_decision_tree.py +++ b/src/safeds/ml/classical/classification/_decision_tree.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING from safeds._utils import _structural_hash +from safeds.exceptions import ClosedBound, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict from ._classifier import Classifier @@ -16,17 +17,66 @@ class DecisionTreeClassifier(Classifier): - """Decision tree classification.""" + """ + Decision tree classification. + + Parameters + ---------- + maximum_depth: + The maximum depth of each tree. If None, the depth is not limited. Has to be greater than 0. + minimum_number_of_samples_in_leaves: + The minimum number of samples that must remain in the leaves of each tree. Has to be greater than 0. + + Raises + ------ + OutOfBoundsError + If `maximum_depth` is less than 1. + OutOfBoundsError + If `minimum_number_of_samples_in_leaves` is less than 1. + """ + + def __init__( + self, + *, + maximum_depth: int | None = None, + minimum_number_of_samples_in_leaves: int = 1, + ) -> None: + # Validation + if maximum_depth is not None and maximum_depth < 1: + raise OutOfBoundsError(maximum_depth, name="maximum_depth", lower_bound=ClosedBound(1)) + if minimum_number_of_samples_in_leaves < 1: + raise OutOfBoundsError( + minimum_number_of_samples_in_leaves, + name="minimum_number_of_samples_in_leaves", + lower_bound=ClosedBound(1), + ) + + # Hyperparameters + self._maximum_depth: int | None = maximum_depth + self._minimum_number_of_samples_in_leaves: int = minimum_number_of_samples_in_leaves - def __hash__(self) -> int: - return _structural_hash(Classifier.__hash__(self), self._target_name, self._feature_names) - - def __init__(self) -> None: # Internal state self._wrapped_classifier: sk_DecisionTreeClassifier | None = None self._feature_names: list[str] | None = None self._target_name: str | None = None + def __hash__(self) -> int: + return _structural_hash( + Classifier.__hash__(self), + self._feature_names, + self._target_name, + ) + + @property + def maximum_depth(self) -> int | None: + """The maximum depth of the tree.""" + return self._maximum_depth + + @property + def minimum_number_of_samples_in_leaves(self) -> int: + """The minimum number of samples that must remain in the leaves of the tree.""" + return self._minimum_number_of_samples_in_leaves + def fit(self, training_set: TabularDataset) -> DecisionTreeClassifier: """ Create a copy of this classifier and fit it with the given training data. @@ -59,7 +109,10 @@ def fit(self, training_set: TabularDataset) -> DecisionTreeClassifier: wrapped_classifier = self._get_sklearn_classifier() fit(wrapped_classifier, training_set) - result = DecisionTreeClassifier() + result = DecisionTreeClassifier( + maximum_depth=self._maximum_depth, + minimum_number_of_samples_in_leaves=self._minimum_number_of_samples_in_leaves, + ) result._wrapped_classifier = wrapped_classifier result._feature_names = training_set.features.column_names result._target_name = training_set.target.name @@ -105,4 +158,7 @@ def is_fitted(self) -> bool: def _get_sklearn_classifier(self) -> ClassifierMixin: from sklearn.tree import DecisionTreeClassifier as sk_DecisionTreeClassifier - return sk_DecisionTreeClassifier() + return sk_DecisionTreeClassifier( + max_depth=self._maximum_depth, + min_samples_leaf=self._minimum_number_of_samples_in_leaves, + ) diff --git a/src/safeds/ml/classical/classification/_random_forest.py b/src/safeds/ml/classical/classification/_random_forest.py index 8f6ba7247..567106c3d 100644 --- a/src/safeds/ml/classical/classification/_random_forest.py +++ b/src/safeds/ml/classical/classification/_random_forest.py @@ -17,52 +17,82 @@ class RandomForestClassifier(Classifier): - """Random forest classification. + """ + Random forest classification. Parameters ---------- number_of_trees: The number of trees to be used in the random forest. Has to be greater than 0. + maximum_depth: + The maximum depth of each tree. If None, the depth is not limited. Has to be greater than 0. + minimum_number_of_samples_in_leaves: + The minimum number of samples that must remain in the leaves of each tree. Has to be greater than 0. Raises ------ OutOfBoundsError If `number_of_trees` is less than 1. + OutOfBoundsError + If `maximum_depth` is less than 1. + OutOfBoundsError + If `minimum_number_of_samples_in_leaves` is less than 1. """ - def __hash__(self) -> int: - return _structural_hash( - Classifier.__hash__(self), - self._target_name, - self._feature_names, - self._number_of_trees, - ) - - def __init__(self, *, number_of_trees: int = 100) -> None: + def __init__( + self, + *, + number_of_trees: int = 100, + maximum_depth: int | None = None, + minimum_number_of_samples_in_leaves: int = 1, + ) -> None: # Validation if number_of_trees < 1: raise OutOfBoundsError(number_of_trees, name="number_of_trees", lower_bound=ClosedBound(1)) + if maximum_depth is not None and maximum_depth < 1: + raise OutOfBoundsError(maximum_depth, name="maximum_depth", lower_bound=ClosedBound(1)) + if minimum_number_of_samples_in_leaves < 1: + raise OutOfBoundsError( + minimum_number_of_samples_in_leaves, + name="minimum_number_of_samples_in_leaves", + lower_bound=ClosedBound(1), + ) # Hyperparameters - self._number_of_trees = number_of_trees + self._number_of_trees: int = number_of_trees + self._maximum_depth: int | None = maximum_depth + self._minimum_number_of_samples_in_leaves: int = minimum_number_of_samples_in_leaves # Internal state self._wrapped_classifier: sk_RandomForestClassifier | None = None self._feature_names: list[str] | None = None self._target_name: str | None = None + def __hash__(self) -> int: + return _structural_hash( + Classifier.__hash__(self), + self._feature_names, + self._target_name, + self._number_of_trees, + self._maximum_depth, + self._minimum_number_of_samples_in_leaves, + ) + @property def number_of_trees(self) -> int: - """ - Get the number of trees used in the random forest. - - Returns - ------- - result: - The number of trees. - """ + """The number of trees used in the random forest.""" return self._number_of_trees + @property + def maximum_depth(self) -> int | None: + """The maximum depth of each tree.""" + return self._maximum_depth + + @property + def minimum_number_of_samples_in_leaves(self) -> int: + """The minimum number of samples that must remain in the leaves of each tree.""" + return self._minimum_number_of_samples_in_leaves + def fit(self, training_set: TabularDataset) -> RandomForestClassifier: """ Create a copy of this classifier and fit it with the given training data. @@ -95,7 +125,11 @@ def fit(self, training_set: TabularDataset) -> RandomForestClassifier: wrapped_classifier = self._get_sklearn_classifier() fit(wrapped_classifier, training_set) - result = RandomForestClassifier(number_of_trees=self._number_of_trees) + result = RandomForestClassifier( + number_of_trees=self._number_of_trees, + maximum_depth=self._maximum_depth, + minimum_number_of_samples_in_leaves=self._minimum_number_of_samples_in_leaves, + ) result._wrapped_classifier = wrapped_classifier result._feature_names = training_set.features.column_names result._target_name = training_set.target.name @@ -149,4 +183,9 @@ def _get_sklearn_classifier(self) -> ClassifierMixin: """ from sklearn.ensemble import RandomForestClassifier as sk_RandomForestClassifier - return sk_RandomForestClassifier(self._number_of_trees, n_jobs=-1) + return sk_RandomForestClassifier( + n_estimators=self._number_of_trees, + max_depth=self._maximum_depth, + min_samples_leaf=self._minimum_number_of_samples_in_leaves, + n_jobs=-1, + ) diff --git a/src/safeds/ml/classical/regression/_decision_tree.py b/src/safeds/ml/classical/regression/_decision_tree.py index 2cd8066f9..d8a066973 100644 --- a/src/safeds/ml/classical/regression/_decision_tree.py +++ b/src/safeds/ml/classical/regression/_decision_tree.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING from safeds._utils import _structural_hash +from safeds.exceptions import ClosedBound, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict from ._regressor import Regressor @@ -16,17 +17,66 @@ class DecisionTreeRegressor(Regressor): - """Decision tree regression.""" + """ + Decision tree regression. + + Parameters + ---------- + maximum_depth: + The maximum depth of each tree. If None, the depth is not limited. Has to be greater than 0. + minimum_number_of_samples_in_leaves: + The minimum number of samples that must remain in the leaves of each tree. Has to be greater than 0. + + Raises + ------ + OutOfBoundsError + If `maximum_depth` is less than 1. + OutOfBoundsError + If `minimum_number_of_samples_in_leaves` is less than 1. + """ + + def __init__( + self, + *, + maximum_depth: int | None = None, + minimum_number_of_samples_in_leaves: int = 5, + ) -> None: + # Validation + if maximum_depth is not None and maximum_depth < 1: + raise OutOfBoundsError(maximum_depth, name="maximum_depth", lower_bound=ClosedBound(1)) + if minimum_number_of_samples_in_leaves < 1: + raise OutOfBoundsError( + minimum_number_of_samples_in_leaves, + name="minimum_number_of_samples_in_leaves", + lower_bound=ClosedBound(1), + ) + + # Hyperparameters + self._maximum_depth: int | None = maximum_depth + self._minimum_number_of_samples_in_leaves: int = minimum_number_of_samples_in_leaves - def __hash__(self) -> int: - return _structural_hash(Regressor.__hash__(self), self._target_name, self._feature_names) - - def __init__(self) -> None: # Internal state self._wrapped_regressor: sk_DecisionTreeRegressor | None = None self._feature_names: list[str] | None = None self._target_name: str | None = None + def __hash__(self) -> int: + return _structural_hash( + Regressor.__hash__(self), + self._feature_names, + self._target_name, + ) + + @property + def maximum_depth(self) -> int | None: + """The maximum depth of the tree.""" + return self._maximum_depth + + @property + def minimum_number_of_samples_in_leaves(self) -> int: + """The minimum number of samples that must remain in the leaves of the tree.""" + return self._minimum_number_of_samples_in_leaves + def fit(self, training_set: TabularDataset) -> DecisionTreeRegressor: """ Create a copy of this regressor and fit it with the given training data. @@ -59,7 +109,10 @@ def fit(self, training_set: TabularDataset) -> DecisionTreeRegressor: wrapped_regressor = self._get_sklearn_regressor() fit(wrapped_regressor, training_set) - result = DecisionTreeRegressor() + result = DecisionTreeRegressor( + maximum_depth=self._maximum_depth, + minimum_number_of_samples_in_leaves=self._minimum_number_of_samples_in_leaves, + ) result._wrapped_regressor = wrapped_regressor result._feature_names = training_set.features.column_names result._target_name = training_set.target.name @@ -113,4 +166,7 @@ def _get_sklearn_regressor(self) -> RegressorMixin: """ from sklearn.tree import DecisionTreeRegressor as sk_DecisionTreeRegressor - return sk_DecisionTreeRegressor() + return sk_DecisionTreeRegressor( + max_depth=self._maximum_depth, + min_samples_leaf=self._minimum_number_of_samples_in_leaves, + ) diff --git a/src/safeds/ml/classical/regression/_random_forest.py b/src/safeds/ml/classical/regression/_random_forest.py index c595c5e7d..1d807d3b9 100644 --- a/src/safeds/ml/classical/regression/_random_forest.py +++ b/src/safeds/ml/classical/regression/_random_forest.py @@ -17,47 +17,82 @@ class RandomForestRegressor(Regressor): - """Random forest regression. + """ + Random forest regression. Parameters ---------- number_of_trees: The number of trees to be used in the random forest. Has to be greater than 0. + maximum_depth: + The maximum depth of each tree. If None, the depth is not limited. Has to be greater than 0. + minimum_number_of_samples_in_leaves: + The minimum number of samples that must remain in the leaves of each tree. Has to be greater than 0. Raises ------ OutOfBoundsError If `number_of_trees` is less than 1. + OutOfBoundsError + If `maximum_depth` is less than 1. + OutOfBoundsError + If `minimum_number_of_samples_in_leaves` is less than 1. """ - def __hash__(self) -> int: - return _structural_hash(Regressor.__hash__(self), self._target_name, self._feature_names, self._number_of_trees) - - def __init__(self, *, number_of_trees: int = 100) -> None: + def __init__( + self, + *, + number_of_trees: int = 100, + maximum_depth: int | None = None, + minimum_number_of_samples_in_leaves: int = 5, + ) -> None: # Validation if number_of_trees < 1: raise OutOfBoundsError(number_of_trees, name="number_of_trees", lower_bound=ClosedBound(1)) + if maximum_depth is not None and maximum_depth < 1: + raise OutOfBoundsError(maximum_depth, name="maximum_depth", lower_bound=ClosedBound(1)) + if minimum_number_of_samples_in_leaves < 1: + raise OutOfBoundsError( + minimum_number_of_samples_in_leaves, + name="minimum_number_of_samples_in_leaves", + lower_bound=ClosedBound(1), + ) # Hyperparameters - self._number_of_trees = number_of_trees + self._number_of_trees: int = number_of_trees + self._maximum_depth: int | None = maximum_depth + self._minimum_number_of_samples_in_leaves: int = minimum_number_of_samples_in_leaves # Internal state self._wrapped_regressor: sk_RandomForestRegressor | None = None self._feature_names: list[str] | None = None self._target_name: str | None = None + def __hash__(self) -> int: + return _structural_hash( + Regressor.__hash__(self), + self._feature_names, + self._target_name, + self._number_of_trees, + self._maximum_depth, + self._minimum_number_of_samples_in_leaves, + ) + @property def number_of_trees(self) -> int: - """ - Get the number of trees used in the random forest. - - Returns - ------- - result: - The number of trees. - """ + """The number of trees used in the random forest.""" return self._number_of_trees + @property + def maximum_depth(self) -> int | None: + """The maximum depth of each tree.""" + return self._maximum_depth + + @property + def minimum_number_of_samples_in_leaves(self) -> int: + """The minimum number of samples that must remain in the leaves of each tree.""" + return self._minimum_number_of_samples_in_leaves + def fit(self, training_set: TabularDataset) -> RandomForestRegressor: """ Create a copy of this regressor and fit it with the given training data. @@ -90,7 +125,11 @@ def fit(self, training_set: TabularDataset) -> RandomForestRegressor: wrapped_regressor = self._get_sklearn_regressor() fit(wrapped_regressor, training_set) - result = RandomForestRegressor(number_of_trees=self._number_of_trees) + result = RandomForestRegressor( + number_of_trees=self._number_of_trees, + maximum_depth=self._maximum_depth, + minimum_number_of_samples_in_leaves=self._minimum_number_of_samples_in_leaves, + ) result._wrapped_regressor = wrapped_regressor result._feature_names = training_set.features.column_names result._target_name = training_set.target.name @@ -144,4 +183,9 @@ def _get_sklearn_regressor(self) -> RegressorMixin: """ from sklearn.ensemble import RandomForestRegressor as sk_RandomForestRegressor - return sk_RandomForestRegressor(self._number_of_trees, n_jobs=-1) + return sk_RandomForestRegressor( + n_estimators=self._number_of_trees, + max_depth=self._maximum_depth, + min_samples_leaf=self._minimum_number_of_samples_in_leaves, + n_jobs=-1, + ) diff --git a/tests/safeds/ml/classical/classification/test_decision_tree.py b/tests/safeds/ml/classical/classification/test_decision_tree.py new file mode 100644 index 000000000..c1ad02dae --- /dev/null +++ b/tests/safeds/ml/classical/classification/test_decision_tree.py @@ -0,0 +1,49 @@ +import pytest +from safeds.data.labeled.containers import TabularDataset +from safeds.data.tabular.containers import Table +from safeds.exceptions import OutOfBoundsError +from safeds.ml.classical.classification import DecisionTreeClassifier + + +@pytest.fixture() +def training_set() -> TabularDataset: + table = Table({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}) + return table.to_tabular_dataset(target_name="col1") + + +class TestMaximumDepth: + def test_should_be_passed_to_fitted_model(self, training_set: TabularDataset) -> None: + fitted_model = DecisionTreeClassifier(maximum_depth=2).fit(training_set) + assert fitted_model.maximum_depth == 2 + + def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None: + fitted_model = DecisionTreeClassifier(maximum_depth=2).fit(training_set) + assert fitted_model._wrapped_classifier is not None + assert fitted_model._wrapped_classifier.max_depth == 2 + + @pytest.mark.parametrize("maximum_depth", [-1, 0], ids=["minus_one", "zero"]) + def test_should_raise_if_less_than_or_equal_to_0(self, maximum_depth: int) -> None: + with pytest.raises( + OutOfBoundsError, + match=rf"maximum_depth \(={maximum_depth}\) is not inside \[1, \u221e\)\.", + ): + DecisionTreeClassifier(maximum_depth=maximum_depth) + + +class TestMinimumNumberOfSamplesInLeaves: + def test_should_be_passed_to_fitted_model(self, training_set: TabularDataset) -> None: + fitted_model = DecisionTreeClassifier(minimum_number_of_samples_in_leaves=2).fit(training_set) + assert fitted_model.minimum_number_of_samples_in_leaves == 2 + + def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None: + fitted_model = DecisionTreeClassifier(minimum_number_of_samples_in_leaves=2).fit(training_set) + assert fitted_model._wrapped_classifier is not None + assert fitted_model._wrapped_classifier.min_samples_leaf == 2 + + @pytest.mark.parametrize("minimum_number_of_samples_in_leaves", [-1, 0], ids=["minus_one", "zero"]) + def test_should_raise_if_less_than_or_equal_to_0(self, minimum_number_of_samples_in_leaves: int) -> None: + with pytest.raises( + OutOfBoundsError, + match=rf"minimum_number_of_samples_in_leaves \(={minimum_number_of_samples_in_leaves}\) is not inside \[1, \u221e\)\.", + ): + DecisionTreeClassifier(minimum_number_of_samples_in_leaves=minimum_number_of_samples_in_leaves) diff --git a/tests/safeds/ml/classical/classification/test_random_forest.py b/tests/safeds/ml/classical/classification/test_random_forest.py index 14e87e6a0..ec44d657f 100644 --- a/tests/safeds/ml/classical/classification/test_random_forest.py +++ b/tests/safeds/ml/classical/classification/test_random_forest.py @@ -28,3 +28,41 @@ def test_should_raise_if_less_than_or_equal_to_0(self, number_of_trees: int) -> match=rf"number_of_trees \(={number_of_trees}\) is not inside \[1, \u221e\)\.", ): RandomForestClassifier(number_of_trees=number_of_trees) + + +class TestMaximumDepth: + def test_should_be_passed_to_fitted_model(self, training_set: TabularDataset) -> None: + fitted_model = RandomForestClassifier(maximum_depth=2).fit(training_set) + assert fitted_model.maximum_depth == 2 + + def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None: + fitted_model = RandomForestClassifier(maximum_depth=2).fit(training_set) + assert fitted_model._wrapped_classifier is not None + assert fitted_model._wrapped_classifier.max_depth == 2 + + @pytest.mark.parametrize("maximum_depth", [-1, 0], ids=["minus_one", "zero"]) + def test_should_raise_if_less_than_or_equal_to_0(self, maximum_depth: int) -> None: + with pytest.raises( + OutOfBoundsError, + match=rf"maximum_depth \(={maximum_depth}\) is not inside \[1, \u221e\)\.", + ): + RandomForestClassifier(maximum_depth=maximum_depth) + + +class TestMinimumNumberOfSamplesInLeaves: + def test_should_be_passed_to_fitted_model(self, training_set: TabularDataset) -> None: + fitted_model = RandomForestClassifier(minimum_number_of_samples_in_leaves=2).fit(training_set) + assert fitted_model.minimum_number_of_samples_in_leaves == 2 + + def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None: + fitted_model = RandomForestClassifier(minimum_number_of_samples_in_leaves=2).fit(training_set) + assert fitted_model._wrapped_classifier is not None + assert fitted_model._wrapped_classifier.min_samples_leaf == 2 + + @pytest.mark.parametrize("minimum_number_of_samples_in_leaves", [-1, 0], ids=["minus_one", "zero"]) + def test_should_raise_if_less_than_or_equal_to_0(self, minimum_number_of_samples_in_leaves: int) -> None: + with pytest.raises( + OutOfBoundsError, + match=rf"minimum_number_of_samples_in_leaves \(={minimum_number_of_samples_in_leaves}\) is not inside \[1, \u221e\)\.", + ): + RandomForestClassifier(minimum_number_of_samples_in_leaves=minimum_number_of_samples_in_leaves) diff --git a/tests/safeds/ml/classical/regression/test_decision_tree.py b/tests/safeds/ml/classical/regression/test_decision_tree.py new file mode 100644 index 000000000..883dc5107 --- /dev/null +++ b/tests/safeds/ml/classical/regression/test_decision_tree.py @@ -0,0 +1,49 @@ +import pytest +from safeds.data.labeled.containers import TabularDataset +from safeds.data.tabular.containers import Table +from safeds.exceptions import OutOfBoundsError +from safeds.ml.classical.regression import DecisionTreeRegressor + + +@pytest.fixture() +def training_set() -> TabularDataset: + table = Table({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}) + return table.to_tabular_dataset(target_name="col1") + + +class TestMaximumDepth: + def test_should_be_passed_to_fitted_model(self, training_set: TabularDataset) -> None: + fitted_model = DecisionTreeRegressor(maximum_depth=2).fit(training_set) + assert fitted_model.maximum_depth == 2 + + def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None: + fitted_model = DecisionTreeRegressor(maximum_depth=2).fit(training_set) + assert fitted_model._wrapped_regressor is not None + assert fitted_model._wrapped_regressor.max_depth == 2 + + @pytest.mark.parametrize("maximum_depth", [-1, 0], ids=["minus_one", "zero"]) + def test_should_raise_if_less_than_or_equal_to_0(self, maximum_depth: int) -> None: + with pytest.raises( + OutOfBoundsError, + match=rf"maximum_depth \(={maximum_depth}\) is not inside \[1, \u221e\)\.", + ): + DecisionTreeRegressor(maximum_depth=maximum_depth) + + +class TestMinimumNumberOfSamplesInLeaves: + def test_should_be_passed_to_fitted_model(self, training_set: TabularDataset) -> None: + fitted_model = DecisionTreeRegressor(minimum_number_of_samples_in_leaves=2).fit(training_set) + assert fitted_model.minimum_number_of_samples_in_leaves == 2 + + def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None: + fitted_model = DecisionTreeRegressor(minimum_number_of_samples_in_leaves=2).fit(training_set) + assert fitted_model._wrapped_regressor is not None + assert fitted_model._wrapped_regressor.min_samples_leaf == 2 + + @pytest.mark.parametrize("minimum_number_of_samples_in_leaves", [-1, 0], ids=["minus_one", "zero"]) + def test_should_raise_if_less_than_or_equal_to_0(self, minimum_number_of_samples_in_leaves: int) -> None: + with pytest.raises( + OutOfBoundsError, + match=rf"minimum_number_of_samples_in_leaves \(={minimum_number_of_samples_in_leaves}\) is not inside \[1, \u221e\)\.", + ): + DecisionTreeRegressor(minimum_number_of_samples_in_leaves=minimum_number_of_samples_in_leaves) diff --git a/tests/safeds/ml/classical/regression/test_random_forest.py b/tests/safeds/ml/classical/regression/test_random_forest.py index 2f5f97579..a37e2d902 100644 --- a/tests/safeds/ml/classical/regression/test_random_forest.py +++ b/tests/safeds/ml/classical/regression/test_random_forest.py @@ -28,3 +28,41 @@ def test_should_raise_if_less_than_or_equal_to_0(self, number_of_trees: int) -> match=rf"number_of_trees \(={number_of_trees}\) is not inside \[1, \u221e\)\.", ): RandomForestRegressor(number_of_trees=number_of_trees) + + +class TestMaximumDepth: + def test_should_be_passed_to_fitted_model(self, training_set: TabularDataset) -> None: + fitted_model = RandomForestRegressor(maximum_depth=2).fit(training_set) + assert fitted_model.maximum_depth == 2 + + def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None: + fitted_model = RandomForestRegressor(maximum_depth=2).fit(training_set) + assert fitted_model._wrapped_regressor is not None + assert fitted_model._wrapped_regressor.max_depth == 2 + + @pytest.mark.parametrize("maximum_depth", [-1, 0], ids=["minus_one", "zero"]) + def test_should_raise_if_less_than_or_equal_to_0(self, maximum_depth: int) -> None: + with pytest.raises( + OutOfBoundsError, + match=rf"maximum_depth \(={maximum_depth}\) is not inside \[1, \u221e\)\.", + ): + RandomForestRegressor(maximum_depth=maximum_depth) + + +class TestMinimumNumberOfSamplesInLeaves: + def test_should_be_passed_to_fitted_model(self, training_set: TabularDataset) -> None: + fitted_model = RandomForestRegressor(minimum_number_of_samples_in_leaves=2).fit(training_set) + assert fitted_model.minimum_number_of_samples_in_leaves == 2 + + def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None: + fitted_model = RandomForestRegressor(minimum_number_of_samples_in_leaves=2).fit(training_set) + assert fitted_model._wrapped_regressor is not None + assert fitted_model._wrapped_regressor.min_samples_leaf == 2 + + @pytest.mark.parametrize("minimum_number_of_samples_in_leaves", [-1, 0], ids=["minus_one", "zero"]) + def test_should_raise_if_less_than_or_equal_to_0(self, minimum_number_of_samples_in_leaves: int) -> None: + with pytest.raises( + OutOfBoundsError, + match=rf"minimum_number_of_samples_in_leaves \(={minimum_number_of_samples_in_leaves}\) is not inside \[1, \u221e\)\.", + ): + RandomForestRegressor(minimum_number_of_samples_in_leaves=minimum_number_of_samples_in_leaves)