Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Regressor.summarize_metrics and Classifier.summarize_metrics #729

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 37 additions & 1 deletion src/safeds/ml/classical/classification/_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,43 @@ def _get_sklearn_classifier(self) -> ClassifierMixin:
The sklearn Classifier.
"""

# noinspection PyProtectedMember
# ------------------------------------------------------------------------------------------------------------------
# Metrics
# ------------------------------------------------------------------------------------------------------------------

def summarize_metrics(self, validation_or_test_set: TabularDataset, positive_class: Any) -> Table:
"""
Summarize the classifier's metrics on the given data.

Parameters
----------
validation_or_test_set:
The validation or test set.
positive_class:
The class to be considered positive. All other classes are considered negative.

Returns
-------
metrics:
A table containing the classifier's metrics.

Raises
------
TypeError
If a table is passed instead of a tabular dataset.
"""
accuracy = self.accuracy(validation_or_test_set)
precision = self.precision(validation_or_test_set, positive_class)
recall = self.recall(validation_or_test_set, positive_class)
f1_score = self.f1_score(validation_or_test_set, positive_class)

return Table(
{
"metric": ["accuracy", "precision", "recall", "f1_score"],
"value": [accuracy, precision, recall, f1_score],
},
)

def accuracy(self, validation_or_test_set: TabularDataset) -> float:
"""
Compute the accuracy of the classifier on the given data.
Expand Down
58 changes: 45 additions & 13 deletions src/safeds/ml/classical/regression/_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,13 @@ def _get_sklearn_regressor(self) -> RegressorMixin:
The sklearn Regressor.
"""

# noinspection PyProtectedMember
def mean_squared_error(self, validation_or_test_set: TabularDataset) -> float:
# ------------------------------------------------------------------------------------------------------------------
# Metrics
# ------------------------------------------------------------------------------------------------------------------

def summarize_metrics(self, validation_or_test_set: TabularDataset) -> Table:
"""
Compute the mean squared error (MSE) on the given data.
Summarize the regressor's metrics on the given data.

Parameters
----------
Expand All @@ -102,28 +105,57 @@ def mean_squared_error(self, validation_or_test_set: TabularDataset) -> float:

Returns
-------
mean_squared_error:
The calculated mean squared error (the average of the distance of each individual row squared).
metrics:
A table containing the regressor's metrics.

Raises
------
TypeError
If a table is passed instead of a tabular dataset.
"""
from sklearn.metrics import mean_squared_error as sk_mean_squared_error
mean_absolute_error = self.mean_absolute_error(validation_or_test_set)
mean_squared_error = self.mean_squared_error(validation_or_test_set)

return Table(
{
"metric": ["mean_absolute_error", "mean_squared_error"],
"value": [mean_absolute_error, mean_squared_error],
},
)

def mean_absolute_error(self, validation_or_test_set: TabularDataset) -> float:
"""
Compute the mean absolute error (MAE) of the regressor on the given data.

Parameters
----------
validation_or_test_set:
The validation or test set.

Returns
-------
mean_absolute_error:
The calculated mean absolute error (the average of the distance of each individual row).

Raises
------
TypeError
If a table is passed instead of a tabular dataset.
"""
from sklearn.metrics import mean_absolute_error as sk_mean_absolute_error

if not isinstance(validation_or_test_set, TabularDataset) and isinstance(validation_or_test_set, Table):
raise PlainTableError
expected = validation_or_test_set.target
predicted = self.predict(validation_or_test_set.features).target

_check_metrics_preconditions(predicted, expected)
return sk_mean_squared_error(expected._data, predicted._data)
return sk_mean_absolute_error(expected._data, predicted._data)

# noinspection PyProtectedMember
def mean_absolute_error(self, validation_or_test_set: TabularDataset) -> float:
def mean_squared_error(self, validation_or_test_set: TabularDataset) -> float:
"""
Compute the mean absolute error (MAE) of the regressor on the given data.
Compute the mean squared error (MSE) on the given data.

Parameters
----------
Expand All @@ -132,23 +164,23 @@ def mean_absolute_error(self, validation_or_test_set: TabularDataset) -> float:

Returns
-------
mean_absolute_error:
The calculated mean absolute error (the average of the distance of each individual row).
mean_squared_error:
The calculated mean squared error (the average of the distance of each individual row squared).

Raises
------
TypeError
If a table is passed instead of a tabular dataset.
"""
from sklearn.metrics import mean_absolute_error as sk_mean_absolute_error
from sklearn.metrics import mean_squared_error as sk_mean_squared_error

if not isinstance(validation_or_test_set, TabularDataset) and isinstance(validation_or_test_set, Table):
raise PlainTableError
expected = validation_or_test_set.target
predicted = self.predict(validation_or_test_set.features).target

_check_metrics_preconditions(predicted, expected)
return sk_mean_absolute_error(expected._data, predicted._data)
return sk_mean_squared_error(expected._data, predicted._data)


# noinspection PyProtectedMember
Expand Down
46 changes: 46 additions & 0 deletions tests/safeds/ml/classical/classification/test_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,52 @@ def _get_sklearn_classifier(self) -> ClassifierMixin:
pass


class TestSummarizeMetrics:
@pytest.mark.parametrize(
("predicted", "expected", "result"),
[
(
[1, 2],
[1, 2],
Table(
{
"metric": ["accuracy", "precision", "recall", "f1_score"],
"value": [1.0, 1.0, 1.0, 1.0],
},
),
),
],
)
def test_valid_data(self, predicted: list[float], expected: list[float], result: Table) -> None:
table = Table(
{
"predicted": predicted,
"expected": expected,
},
).to_tabular_dataset(
target_name="expected",
)

assert DummyClassifier().summarize_metrics(table, 1) == result

@pytest.mark.parametrize(
"table",
[
Table(
{
"a": [1.0, 0.0, 0.0, 0.0],
"b": [0.0, 1.0, 1.0, 0.0],
"c": [0.0, 0.0, 0.0, 1.0],
},
),
],
ids=["table"],
)
def test_should_raise_if_given_normal_table(self, table: Table) -> None:
with pytest.raises(PlainTableError):
DummyClassifier().summarize_metrics(table, 1) # type: ignore[arg-type]


class TestAccuracy:
def test_with_same_type(self) -> None:
table = Table(
Expand Down
46 changes: 46 additions & 0 deletions tests/safeds/ml/classical/regression/test_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,52 @@ def _get_sklearn_regressor(self) -> RegressorMixin:
pass


class TestSummarizeMetrics:
@pytest.mark.parametrize(
("predicted", "expected", "result"),
[
(
[1, 2],
[1, 2],
Table(
{
"metric": ["mean_absolute_error", "mean_squared_error"],
"value": [0.0, 0.0],
},
),
),
],
)
def test_valid_data(self, predicted: list[float], expected: list[float], result: Table) -> None:
table = Table(
{
"predicted": predicted,
"expected": expected,
},
).to_tabular_dataset(
target_name="expected",
)

assert DummyRegressor().summarize_metrics(table) == result

@pytest.mark.parametrize(
"table",
[
Table(
{
"a": [1.0, 0.0, 0.0, 0.0],
"b": [0.0, 1.0, 1.0, 0.0],
"c": [0.0, 0.0, 0.0, 1.0],
},
),
],
ids=["table"],
)
def test_should_raise_if_given_normal_table(self, table: Table) -> None:
with pytest.raises(PlainTableError):
DummyRegressor().summarize_metrics(table) # type: ignore[arg-type]


class TestMeanAbsoluteError:
@pytest.mark.parametrize(
("predicted", "expected", "result"),
Expand Down