Skip to content

Commit

Permalink
feat: add InvalidFitDataError (#824)
Browse files Browse the repository at this point in the history
Closes #655 

### Summary of Changes

Add InvalidFitDataError and tests

---------

Co-authored-by: megalinter-bot <[email protected]>
  • Loading branch information
sibre28 and megalinter-bot authored Jun 11, 2024
1 parent 06eab77 commit 487854c
Show file tree
Hide file tree
Showing 4 changed files with 126 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/safeds/exceptions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
DatasetMissesFeaturesError,
FeatureDataMismatchError,
InputSizeError,
InvalidFitDataError,
InvalidModelStructureError,
LearningError,
ModelNotFittedError,
Expand Down Expand Up @@ -69,6 +70,7 @@ class OutOfBoundsError(SafeDsError):
"DatasetMissesDataError",
"DatasetMissesFeaturesError",
"FeatureDataMismatchError",
"InvalidFitDataError",
"InputSizeError",
"InvalidModelStructureError",
"LearningError",
Expand Down
7 changes: 7 additions & 0 deletions src/safeds/exceptions/_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@ def __init__(self) -> None:
super().__init__("Dataset contains no rows")


class InvalidFitDataError(Exception):
"""Raised when a Neural Network is fitted on invalid data."""

def __init__(self, reason: str) -> None:
super().__init__(f"The given Fit Data is invalid:\n{reason}")


class LearningError(Exception):
"""
Raised when an error occurred while training a model.
Expand Down
19 changes: 19 additions & 0 deletions src/safeds/ml/nn/converters/_input_converter_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from safeds.data.labeled.containers import TabularDataset
from safeds.data.tabular.containers import Column, Table
from safeds.exceptions import InvalidFitDataError

from ._input_converter import InputConversion

Expand Down Expand Up @@ -43,6 +44,24 @@ def _is_fit_data_valid(self, input_data: TabularDataset) -> bool:
self._feature_names = input_data.features.column_names
self._target_name = input_data.target.name
self._first = False

columns_with_missing_values = []
columns_with_non_numerical_data = []

for col in input_data.features.add_columns([input_data.target]).to_columns():
if col.missing_value_count() > 0:
columns_with_missing_values.append(col.name)
if not col.type.is_numeric:
columns_with_non_numerical_data.append(col.name)

reason = ""
if len(columns_with_missing_values) > 0:
reason += f"The following Columns contain missing values: {columns_with_missing_values}\n"
if len(columns_with_non_numerical_data) > 0:
reason += f"The following Columns contain non-numerical data: {columns_with_non_numerical_data}"
if reason != "":
raise InvalidFitDataError(reason)

return (sorted(input_data.features.column_names)).__eq__(sorted(self._feature_names))

def _is_predict_data_valid(self, input_data: Table) -> bool:
Expand Down
98 changes: 98 additions & 0 deletions tests/safeds/ml/nn/test_model.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import pickle
import re

import pytest
from safeds.data.image.typing import ImageSize
from safeds.data.labeled.containers import TabularDataset
from safeds.data.tabular.containers import Table
from safeds.exceptions import (
FeatureDataMismatchError,
InvalidFitDataError,
InvalidModelStructureError,
ModelNotFittedError,
OutOfBoundsError,
Expand Down Expand Up @@ -231,6 +233,54 @@ def test_should_raise_if_train_features_mismatch(self, device: Device) -> None:
):
learned_model.fit(Table.from_dict({"k": [0.1, 0, 0.2], "l": [0, 0.15, 0.5]}).to_tabular_dataset("k"))

@pytest.mark.parametrize(
("table", "reason"),
[
(
Table.from_dict({"a": [1, 2, 3], "b": [1, 2, None], "c": [0, 15, 5]}).to_tabular_dataset("c"),
re.escape("The given Fit Data is invalid:\nThe following Columns contain missing values: ['b']\n"),
),
(
Table.from_dict({"a": ["a", "b", "c"], "b": [1, 2, 3], "c": [0, 15, 5]}).to_tabular_dataset("c"),
re.escape("The given Fit Data is invalid:\nThe following Columns contain non-numerical data: ['a']"),
),
(
Table.from_dict({"a": ["a", "b", "c"], "b": [1, 2, None], "c": [0, 15, 5]}).to_tabular_dataset("c"),
re.escape(
"The given Fit Data is invalid:\nThe following Columns contain missing values: ['b']\nThe following Columns contain non-numerical data: ['a']",
),
),
(
Table.from_dict({"a": [1, 2, 3], "b": [1, 2, 3], "c": [0, None, 5]}).to_tabular_dataset("c"),
re.escape(
"The given Fit Data is invalid:\nThe following Columns contain missing values: ['c']\n",
),
),
(
Table.from_dict({"a": [1, 2, 3], "b": [1, 2, 3], "c": ["a", "b", "a"]}).to_tabular_dataset("c"),
re.escape("The given Fit Data is invalid:\nThe following Columns contain non-numerical data: ['c']"),
),
],
ids=[
"missing value feature",
"non-numerical feature",
"missing value and non-numerical features",
"missing value target",
"non-numerical target",
],
)
def test_should_catch_invalid_fit_data(self, device: Device, table: TabularDataset, reason: str) -> None:
configure_test_with_device(device)
model = NeuralNetworkClassifier(
InputConversionTable(),
[ForwardLayer(neuron_count=4), ForwardLayer(1)],
)
with pytest.raises(
InvalidFitDataError,
match=reason,
):
model.fit(table)

# def test_should_raise_if_table_size_and_input_size_mismatch(self, device: Device) -> None:
# configure_test_with_device(device)
# model = NeuralNetworkClassifier(
Expand Down Expand Up @@ -609,6 +659,54 @@ def test_should_raise_if_train_features_mismatch(self, device: Device) -> None:
Table.from_dict({"k": [1, 0, 2], "l": [0, 15, 5]}).to_tabular_dataset("l"),
)

@pytest.mark.parametrize(
("table", "reason"),
[
(
Table.from_dict({"a": [1, 2, 3], "b": [1, 2, None], "c": [0, 15, 5]}).to_tabular_dataset("c"),
re.escape("The given Fit Data is invalid:\nThe following Columns contain missing values: ['b']\n"),
),
(
Table.from_dict({"a": ["a", "b", "c"], "b": [1, 2, 3], "c": [0, 15, 5]}).to_tabular_dataset("c"),
re.escape("The given Fit Data is invalid:\nThe following Columns contain non-numerical data: ['a']"),
),
(
Table.from_dict({"a": ["a", "b", "c"], "b": [1, 2, None], "c": [0, 15, 5]}).to_tabular_dataset("c"),
re.escape(
"The given Fit Data is invalid:\nThe following Columns contain missing values: ['b']\nThe following Columns contain non-numerical data: ['a']",
),
),
(
Table.from_dict({"a": [1, 2, 3], "b": [1, 2, 3], "c": [0, None, 5]}).to_tabular_dataset("c"),
re.escape(
"The given Fit Data is invalid:\nThe following Columns contain missing values: ['c']\n",
),
),
(
Table.from_dict({"a": [1, 2, 3], "b": [1, 2, 3], "c": ["a", "b", "a"]}).to_tabular_dataset("c"),
re.escape("The given Fit Data is invalid:\nThe following Columns contain non-numerical data: ['c']"),
),
],
ids=[
"missing value feature",
"non-numerical feature",
"missing value and non-numerical features",
"missing value target",
"non-numerical target",
],
)
def test_should_catch_invalid_fit_data(self, device: Device, table: TabularDataset, reason: str) -> None:
configure_test_with_device(device)
model = NeuralNetworkRegressor(
InputConversionTable(),
[ForwardLayer(neuron_count=4), ForwardLayer(1)],
)
with pytest.raises(
InvalidFitDataError,
match=reason,
):
model.fit(table)

# def test_should_raise_if_table_size_and_input_size_mismatch(self, device: Device) -> None:
# configure_test_with_device(device)
# model = NeuralNetworkRegressor(
Expand Down

0 comments on commit 487854c

Please sign in to comment.