Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Improve error handling of TaggedTable #450

Merged
merged 14 commits into from
Jul 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 16 additions & 7 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -861,7 +861,7 @@ def add_column(self, column: Column) -> Table:
DuplicateColumnNameError
If the new column already exists.
ColumnSizeError
If the size of the column does not match the amount of rows.
If the size of the column does not match the number of rows.

Examples
--------
Expand Down Expand Up @@ -902,10 +902,10 @@ def add_columns(self, columns: list[Column] | Table) -> Table:

Raises
------
ColumnSizeError
If at least one of the column sizes from the provided column list does not match the table.
DuplicateColumnNameError
If at least one column name from the provided column list already exists in the table.
ColumnSizeError
If at least one of the column sizes from the provided column list does not match the table.

Examples
--------
Expand Down Expand Up @@ -973,7 +973,12 @@ def add_row(self, row: Row) -> Table:
if self.number_of_columns == 0:
return Table.from_rows([row])
if len(set(self.column_names) - set(row.column_names)) > 0:
raise UnknownColumnNameError(list(set(self.column_names) - set(row.column_names)))
raise UnknownColumnNameError(
sorted(
set(self.column_names) - set(row.column_names),
key={val: ix for ix, val in enumerate(self.column_names)}.__getitem__,
),
)

if result.number_of_rows == 0:
int_columns = list(filter(lambda name: isinstance(row[name], int | np.int64 | np.int32), row.column_names))
Expand Down Expand Up @@ -1026,16 +1031,20 @@ def add_rows(self, rows: list[Row] | Table) -> Table:
"""
if isinstance(rows, Table):
rows = rows.to_rows()
result = self._copy()

if len(rows) == 0:
return self._copy()

different_column_names = set()
for row in rows:
different_column_names.update(set(rows[0].column_names) - set(row.column_names))
different_column_names.update(set(self.column_names) - set(row.column_names))
if len(different_column_names) > 0:
raise UnknownColumnNameError(list(different_column_names))
raise UnknownColumnNameError(
sorted(
different_column_names,
key={val: ix for ix, val in enumerate(self.column_names)}.__getitem__,
),
)

result = self._copy()

Expand Down
61 changes: 48 additions & 13 deletions src/safeds/data/tabular/containers/_tagged_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@
from typing import TYPE_CHECKING

from safeds.data.tabular.containers import Column, Row, Table
from safeds.exceptions import ColumnIsTargetError, IllegalSchemaModificationError, UnknownColumnNameError
from safeds.exceptions import (
ColumnIsTargetError,
IllegalSchemaModificationError,
UnknownColumnNameError,
)

if TYPE_CHECKING:
from collections.abc import Callable, Mapping, Sequence
Expand Down Expand Up @@ -167,10 +171,26 @@ def __init__(

@property
def features(self) -> Table:
"""
Get the feature columns of the tagged table.

Returns
-------
Table
The table containing the feature columns.
"""
return self._features

@property
def target(self) -> Column:
"""
Get the target column of the tagged table.

Returns
-------
Column
The target column.
"""
return self._target

# ------------------------------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -198,6 +218,11 @@ def add_column_as_feature(self, column: Column) -> TaggedTable:

the original table is not modified.

Parameters
----------
column : Column
The column to be added.

Returns
-------
result : TaggedTable
Expand All @@ -208,7 +233,7 @@ def add_column_as_feature(self, column: Column) -> TaggedTable:
DuplicateColumnNameError
If the new column already exists.
ColumnSizeError
If the size of the column does not match the amount of rows.
If the size of the column does not match the number of rows.
"""
return TaggedTable._from_table(
super().add_column(column),
Expand All @@ -222,6 +247,11 @@ def add_columns_as_features(self, columns: list[Column] | Table) -> TaggedTable:

The original table is not modified.

Parameters
----------
columns : list[Column] | Table
The columns to be added as features.

Returns
-------
result : TaggedTable
Expand All @@ -230,9 +260,9 @@ def add_columns_as_features(self, columns: list[Column] | Table) -> TaggedTable:
Raises
------
DuplicateColumnNameError
If the new column already exists.
If any of the new feature columns already exist.
ColumnSizeError
If the size of the column does not match the amount of rows.
If the size of any feature column does not match the number of rows.
"""
return TaggedTable._from_table(
super().add_columns(columns),
Expand Down Expand Up @@ -270,6 +300,11 @@ def add_column(self, column: Column) -> TaggedTable:

The original table is not modified.

Parameters
----------
column : Column
The column to be added.

Returns
-------
result : TaggedTable
Expand All @@ -280,7 +315,7 @@ def add_column(self, column: Column) -> TaggedTable:
DuplicateColumnNameError
If the new column already exists.
ColumnSizeError
If the size of the column does not match the amount of rows.
If the size of the column does not match the number of rows.
"""
return TaggedTable._from_table(
super().add_column(column),
Expand All @@ -306,10 +341,10 @@ def add_columns(self, columns: list[Column] | Table) -> TaggedTable:

Raises
------
ColumnSizeError
If at least one of the column sizes from the provided column list does not match the table.
DuplicateColumnNameError
If at least one column name from the provided column list already exists in the table.
ColumnSizeError
If at least one of the column sizes from the provided column list does not match the table.
"""
return TaggedTable._from_table(
super().add_columns(columns),
Expand All @@ -335,8 +370,8 @@ def add_row(self, row: Row) -> TaggedTable:

Raises
------
SchemaMismatchError
If the schema of the row does not match the table schema.
UnknownColumnNameError
If the row has different column names than the table.
"""
return TaggedTable._from_table(super().add_row(row), target_name=self.target.name)

Expand All @@ -358,8 +393,8 @@ def add_rows(self, rows: list[Row] | Table) -> TaggedTable:

Raises
------
SchemaMismatchError
If the schema of on of the row does not match the table schema.
UnknownColumnNameError
If at least one of the rows have different column names than the table.
"""
return TaggedTable._from_table(super().add_rows(rows), target_name=self.target.name)

Expand Down Expand Up @@ -587,9 +622,9 @@ def rename_column(self, old_name: str, new_name: str) -> TaggedTable:
Parameters
----------
old_name : str
The old name of the target column
The old name of the target column.
new_name : str
The new name of the target column
The new name of the target column.

Returns
-------
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest
from safeds.data.tabular.containers import Column, Table, TaggedTable
from safeds.exceptions import ColumnSizeError, DuplicateColumnNameError

from tests.helpers import assert_that_tagged_tables_are_equal

Expand Down Expand Up @@ -29,9 +30,49 @@
],
ids=["new column as feature", "table contains a non feature/target column"],
)
def test_add_column_as_feature(
def test_should_add_column_as_feature(
tagged_table: TaggedTable,
column: Column,
tagged_table_with_new_column: TaggedTable,
) -> None:
assert_that_tagged_tables_are_equal(tagged_table.add_column_as_feature(column), tagged_table_with_new_column)


@pytest.mark.parametrize(
("tagged_table", "column", "error_msg"),
[
(
TaggedTable({"A": [1, 2, 3], "B": [4, 5, 6]}, target_name="B", feature_names=["A"]),
Column("A", [7, 8, 9]),
r"Column 'A' already exists.",
),
],
ids=["column_already_exists"],
)
def test_should_raise_duplicate_column_name_if_column_already_exists(
tagged_table: TaggedTable,
column: Column,
error_msg: str,
) -> None:
with pytest.raises(DuplicateColumnNameError, match=error_msg):
tagged_table.add_column_as_feature(column)


@pytest.mark.parametrize(
("tagged_table", "column", "error_msg"),
[
(
TaggedTable({"A": [1, 2, 3], "B": [4, 5, 6]}, target_name="B", feature_names=["A"]),
Column("C", [5, 7, 8, 9]),
r"Expected a column of size 3 but got column of size 4.",
),
],
ids=["column_is_oversize"],
)
def test_should_raise_column_size_error_if_column_is_oversize(
tagged_table: TaggedTable,
column: Column,
error_msg: str,
) -> None:
with pytest.raises(ColumnSizeError, match=error_msg):
tagged_table.add_column_as_feature(column)
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest
from safeds.data.tabular.containers import Column, Table, TaggedTable
from safeds.exceptions import ColumnSizeError, DuplicateColumnNameError

from tests.helpers import assert_that_tagged_tables_are_equal

Expand Down Expand Up @@ -43,3 +44,43 @@ def test_add_columns_as_features(
tagged_table_with_new_columns: TaggedTable,
) -> None:
assert_that_tagged_tables_are_equal(tagged_table.add_columns_as_features(columns), tagged_table_with_new_columns)


@pytest.mark.parametrize(
("tagged_table", "columns", "error_msg"),
[
(
TaggedTable({"A": [1, 2, 3], "B": [4, 5, 6]}, target_name="B", feature_names=["A"]),
[Column("A", [7, 8, 9]), Column("D", [10, 11, 12])],
r"Column 'A' already exists.",
),
],
ids=["column_already_exist"],
)
def test_add_columns_raise_duplicate_column_name_if_column_already_exist(
tagged_table: TaggedTable,
columns: list[Column] | Table,
error_msg: str,
) -> None:
with pytest.raises(DuplicateColumnNameError, match=error_msg):
tagged_table.add_columns_as_features(columns)


@pytest.mark.parametrize(
("tagged_table", "columns", "error_msg"),
[
(
TaggedTable({"A": [1, 2, 3], "B": [4, 5, 6]}, target_name="B", feature_names=["A"]),
[Column("C", [5, 7, 8, 9]), Column("D", [4, 10, 11, 12])],
r"Expected a column of size 3 but got column of size 4.",
),
],
ids=["columns_are_oversize"],
)
def test_should_raise_column_size_error_if_columns_are_oversize(
tagged_table: TaggedTable,
columns: list[Column] | Table,
error_msg: str,
) -> None:
with pytest.raises(ColumnSizeError, match=error_msg):
tagged_table.add_columns_as_features(columns)
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest
from safeds.data.tabular.containers import Row, TaggedTable
from safeds.exceptions import UnknownColumnNameError

from tests.helpers import assert_that_tagged_tables_are_equal

Expand Down Expand Up @@ -34,3 +35,42 @@
)
def test_should_add_row(table: TaggedTable, row: Row, expected: TaggedTable) -> None:
assert_that_tagged_tables_are_equal(table.add_row(row), expected)


@pytest.mark.parametrize(
("tagged_table", "row", "error_msg"),
[
(
TaggedTable({"feature": [], "target": []}, "target", ["feature"]),
Row({"feat": None, "targ": None}),
r"Could not find column\(s\) 'feature, target'",
),
],
ids=["columns_missing"],
)
def test_should_raise_an_error_if_row_schema_invalid(
tagged_table: TaggedTable,
row: Row,
error_msg: str,
) -> None:
with pytest.raises(UnknownColumnNameError, match=error_msg):
tagged_table.add_row(row)


@pytest.mark.parametrize(
("tagged_table", "row", "expected_table"),
[
(
TaggedTable({"feature": [], "target": []}, "target"),
Row({"feature": 2, "target": 5}),
TaggedTable({"feature": [2], "target": [5]}, "target"),
),
],
ids=["empty_feature_column"],
)
def test_should_add_row_to_empty_table(
tagged_table: TaggedTable,
row: Row,
expected_table: TaggedTable,
) -> None:
assert_that_tagged_tables_are_equal(tagged_table.add_row(row), expected_table)
Loading