From c5da544554d1895aad274ba5a4775fd63257d65f Mon Sep 17 00:00:00 2001 From: Junior Atemebang <129027012+jxnior01@users.noreply.github.com> Date: Thu, 13 Jul 2023 23:12:07 +0200 Subject: [PATCH] feat: Improve error handling of TaggedTable (#450) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #150 ### Summary of Changes * feat: Validated inputs of functions * feat: Raised appropriate exceptions with appropriate messages * docs: Modified docs * tests: Tested all exceptions --------- Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Co-authored-by: Alexander Gréus Co-authored-by: Alexander <47296670+Marsmaennchen221@users.noreply.github.com> --- src/safeds/data/tabular/containers/_table.py | 23 ++++--- .../data/tabular/containers/_tagged_table.py | 61 +++++++++++++++---- .../test_add_column_as_feature.py | 43 ++++++++++++- .../test_add_columns_as_features.py | 41 +++++++++++++ .../_table/_tagged_table/test_add_row.py | 40 ++++++++++++ .../_table/_tagged_table/test_add_rows.py | 23 ++++++- .../_tagged_table/test_remove_columns.py | 7 ++- .../_tagged_table/test_rename_column.py | 2 +- .../tabular/containers/_table/test_add_row.py | 7 ++- .../containers/_table/test_add_rows.py | 9 ++- .../{test_rename.py => test_rename_column.py} | 0 11 files changed, 229 insertions(+), 27 deletions(-) rename tests/safeds/data/tabular/containers/_table/{test_rename.py => test_rename_column.py} (100%) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 1a8181978..63c9a7cff 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -861,7 +861,7 @@ def add_column(self, column: Column) -> Table: DuplicateColumnNameError If the new column already exists. ColumnSizeError - If the size of the column does not match the amount of rows. + If the size of the column does not match the number of rows. Examples -------- @@ -902,10 +902,10 @@ def add_columns(self, columns: list[Column] | Table) -> Table: Raises ------ - ColumnSizeError - If at least one of the column sizes from the provided column list does not match the table. DuplicateColumnNameError If at least one column name from the provided column list already exists in the table. + ColumnSizeError + If at least one of the column sizes from the provided column list does not match the table. Examples -------- @@ -973,7 +973,12 @@ def add_row(self, row: Row) -> Table: if self.number_of_columns == 0: return Table.from_rows([row]) if len(set(self.column_names) - set(row.column_names)) > 0: - raise UnknownColumnNameError(list(set(self.column_names) - set(row.column_names))) + raise UnknownColumnNameError( + sorted( + set(self.column_names) - set(row.column_names), + key={val: ix for ix, val in enumerate(self.column_names)}.__getitem__, + ), + ) if result.number_of_rows == 0: int_columns = list(filter(lambda name: isinstance(row[name], int | np.int64 | np.int32), row.column_names)) @@ -1026,16 +1031,20 @@ def add_rows(self, rows: list[Row] | Table) -> Table: """ if isinstance(rows, Table): rows = rows.to_rows() - result = self._copy() if len(rows) == 0: return self._copy() different_column_names = set() for row in rows: - different_column_names.update(set(rows[0].column_names) - set(row.column_names)) + different_column_names.update(set(self.column_names) - set(row.column_names)) if len(different_column_names) > 0: - raise UnknownColumnNameError(list(different_column_names)) + raise UnknownColumnNameError( + sorted( + different_column_names, + key={val: ix for ix, val in enumerate(self.column_names)}.__getitem__, + ), + ) result = self._copy() diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index a18c3dbb1..cc045a7ec 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -4,7 +4,11 @@ from typing import TYPE_CHECKING from safeds.data.tabular.containers import Column, Row, Table -from safeds.exceptions import ColumnIsTargetError, IllegalSchemaModificationError, UnknownColumnNameError +from safeds.exceptions import ( + ColumnIsTargetError, + IllegalSchemaModificationError, + UnknownColumnNameError, +) if TYPE_CHECKING: from collections.abc import Callable, Mapping, Sequence @@ -167,10 +171,26 @@ def __init__( @property def features(self) -> Table: + """ + Get the feature columns of the tagged table. + + Returns + ------- + Table + The table containing the feature columns. + """ return self._features @property def target(self) -> Column: + """ + Get the target column of the tagged table. + + Returns + ------- + Column + The target column. + """ return self._target # ------------------------------------------------------------------------------------------------------------------ @@ -198,6 +218,11 @@ def add_column_as_feature(self, column: Column) -> TaggedTable: the original table is not modified. + Parameters + ---------- + column : Column + The column to be added. + Returns ------- result : TaggedTable @@ -208,7 +233,7 @@ def add_column_as_feature(self, column: Column) -> TaggedTable: DuplicateColumnNameError If the new column already exists. ColumnSizeError - If the size of the column does not match the amount of rows. + If the size of the column does not match the number of rows. """ return TaggedTable._from_table( super().add_column(column), @@ -222,6 +247,11 @@ def add_columns_as_features(self, columns: list[Column] | Table) -> TaggedTable: The original table is not modified. + Parameters + ---------- + columns : list[Column] | Table + The columns to be added as features. + Returns ------- result : TaggedTable @@ -230,9 +260,9 @@ def add_columns_as_features(self, columns: list[Column] | Table) -> TaggedTable: Raises ------ DuplicateColumnNameError - If the new column already exists. + If any of the new feature columns already exist. ColumnSizeError - If the size of the column does not match the amount of rows. + If the size of any feature column does not match the number of rows. """ return TaggedTable._from_table( super().add_columns(columns), @@ -270,6 +300,11 @@ def add_column(self, column: Column) -> TaggedTable: The original table is not modified. + Parameters + ---------- + column : Column + The column to be added. + Returns ------- result : TaggedTable @@ -280,7 +315,7 @@ def add_column(self, column: Column) -> TaggedTable: DuplicateColumnNameError If the new column already exists. ColumnSizeError - If the size of the column does not match the amount of rows. + If the size of the column does not match the number of rows. """ return TaggedTable._from_table( super().add_column(column), @@ -306,10 +341,10 @@ def add_columns(self, columns: list[Column] | Table) -> TaggedTable: Raises ------ - ColumnSizeError - If at least one of the column sizes from the provided column list does not match the table. DuplicateColumnNameError If at least one column name from the provided column list already exists in the table. + ColumnSizeError + If at least one of the column sizes from the provided column list does not match the table. """ return TaggedTable._from_table( super().add_columns(columns), @@ -335,8 +370,8 @@ def add_row(self, row: Row) -> TaggedTable: Raises ------ - SchemaMismatchError - If the schema of the row does not match the table schema. + UnknownColumnNameError + If the row has different column names than the table. """ return TaggedTable._from_table(super().add_row(row), target_name=self.target.name) @@ -358,8 +393,8 @@ def add_rows(self, rows: list[Row] | Table) -> TaggedTable: Raises ------ - SchemaMismatchError - If the schema of on of the row does not match the table schema. + UnknownColumnNameError + If at least one of the rows have different column names than the table. """ return TaggedTable._from_table(super().add_rows(rows), target_name=self.target.name) @@ -587,9 +622,9 @@ def rename_column(self, old_name: str, new_name: str) -> TaggedTable: Parameters ---------- old_name : str - The old name of the target column + The old name of the target column. new_name : str - The new name of the target column + The new name of the target column. Returns ------- diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py index 325df54db..97566e34b 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py @@ -1,5 +1,6 @@ import pytest from safeds.data.tabular.containers import Column, Table, TaggedTable +from safeds.exceptions import ColumnSizeError, DuplicateColumnNameError from tests.helpers import assert_that_tagged_tables_are_equal @@ -29,9 +30,49 @@ ], ids=["new column as feature", "table contains a non feature/target column"], ) -def test_add_column_as_feature( +def test_should_add_column_as_feature( tagged_table: TaggedTable, column: Column, tagged_table_with_new_column: TaggedTable, ) -> None: assert_that_tagged_tables_are_equal(tagged_table.add_column_as_feature(column), tagged_table_with_new_column) + + +@pytest.mark.parametrize( + ("tagged_table", "column", "error_msg"), + [ + ( + TaggedTable({"A": [1, 2, 3], "B": [4, 5, 6]}, target_name="B", feature_names=["A"]), + Column("A", [7, 8, 9]), + r"Column 'A' already exists.", + ), + ], + ids=["column_already_exists"], +) +def test_should_raise_duplicate_column_name_if_column_already_exists( + tagged_table: TaggedTable, + column: Column, + error_msg: str, +) -> None: + with pytest.raises(DuplicateColumnNameError, match=error_msg): + tagged_table.add_column_as_feature(column) + + +@pytest.mark.parametrize( + ("tagged_table", "column", "error_msg"), + [ + ( + TaggedTable({"A": [1, 2, 3], "B": [4, 5, 6]}, target_name="B", feature_names=["A"]), + Column("C", [5, 7, 8, 9]), + r"Expected a column of size 3 but got column of size 4.", + ), + ], + ids=["column_is_oversize"], +) +def test_should_raise_column_size_error_if_column_is_oversize( + tagged_table: TaggedTable, + column: Column, + error_msg: str, +) -> None: + with pytest.raises(ColumnSizeError, match=error_msg): + tagged_table.add_column_as_feature(column) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py index f1e7716b8..a9f479a06 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py @@ -1,5 +1,6 @@ import pytest from safeds.data.tabular.containers import Column, Table, TaggedTable +from safeds.exceptions import ColumnSizeError, DuplicateColumnNameError from tests.helpers import assert_that_tagged_tables_are_equal @@ -43,3 +44,43 @@ def test_add_columns_as_features( tagged_table_with_new_columns: TaggedTable, ) -> None: assert_that_tagged_tables_are_equal(tagged_table.add_columns_as_features(columns), tagged_table_with_new_columns) + + +@pytest.mark.parametrize( + ("tagged_table", "columns", "error_msg"), + [ + ( + TaggedTable({"A": [1, 2, 3], "B": [4, 5, 6]}, target_name="B", feature_names=["A"]), + [Column("A", [7, 8, 9]), Column("D", [10, 11, 12])], + r"Column 'A' already exists.", + ), + ], + ids=["column_already_exist"], +) +def test_add_columns_raise_duplicate_column_name_if_column_already_exist( + tagged_table: TaggedTable, + columns: list[Column] | Table, + error_msg: str, +) -> None: + with pytest.raises(DuplicateColumnNameError, match=error_msg): + tagged_table.add_columns_as_features(columns) + + +@pytest.mark.parametrize( + ("tagged_table", "columns", "error_msg"), + [ + ( + TaggedTable({"A": [1, 2, 3], "B": [4, 5, 6]}, target_name="B", feature_names=["A"]), + [Column("C", [5, 7, 8, 9]), Column("D", [4, 10, 11, 12])], + r"Expected a column of size 3 but got column of size 4.", + ), + ], + ids=["columns_are_oversize"], +) +def test_should_raise_column_size_error_if_columns_are_oversize( + tagged_table: TaggedTable, + columns: list[Column] | Table, + error_msg: str, +) -> None: + with pytest.raises(ColumnSizeError, match=error_msg): + tagged_table.add_columns_as_features(columns) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py index 2badeec11..2161fcaa1 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py @@ -1,5 +1,6 @@ import pytest from safeds.data.tabular.containers import Row, TaggedTable +from safeds.exceptions import UnknownColumnNameError from tests.helpers import assert_that_tagged_tables_are_equal @@ -34,3 +35,42 @@ ) def test_should_add_row(table: TaggedTable, row: Row, expected: TaggedTable) -> None: assert_that_tagged_tables_are_equal(table.add_row(row), expected) + + +@pytest.mark.parametrize( + ("tagged_table", "row", "error_msg"), + [ + ( + TaggedTable({"feature": [], "target": []}, "target", ["feature"]), + Row({"feat": None, "targ": None}), + r"Could not find column\(s\) 'feature, target'", + ), + ], + ids=["columns_missing"], +) +def test_should_raise_an_error_if_row_schema_invalid( + tagged_table: TaggedTable, + row: Row, + error_msg: str, +) -> None: + with pytest.raises(UnknownColumnNameError, match=error_msg): + tagged_table.add_row(row) + + +@pytest.mark.parametrize( + ("tagged_table", "row", "expected_table"), + [ + ( + TaggedTable({"feature": [], "target": []}, "target"), + Row({"feature": 2, "target": 5}), + TaggedTable({"feature": [2], "target": [5]}, "target"), + ), + ], + ids=["empty_feature_column"], +) +def test_should_add_row_to_empty_table( + tagged_table: TaggedTable, + row: Row, + expected_table: TaggedTable, +) -> None: + assert_that_tagged_tables_are_equal(tagged_table.add_row(row), expected_table) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py index da8c37a5a..31b614776 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py @@ -1,5 +1,6 @@ import pytest -from safeds.data.tabular.containers import Row, TaggedTable +from safeds.data.tabular.containers import Row, Table, TaggedTable +from safeds.exceptions import UnknownColumnNameError from tests.helpers import assert_that_tagged_tables_are_equal @@ -37,3 +38,23 @@ ) def test_should_add_rows(table: TaggedTable, rows: list[Row], expected: TaggedTable) -> None: assert_that_tagged_tables_are_equal(table.add_rows(rows), expected) + + +@pytest.mark.parametrize( + ("tagged_table", "rows", "error_msg"), + [ + ( + TaggedTable({"feature": [], "target": []}, "target", ["feature"]), + [Row({"feat": None, "targ": None}), Row({"targ": None, "feat": None})], + r"Could not find column\(s\) 'feature, target'", + ), + ], + ids=["columns_missing"], +) +def test_should_raise_an_error_if_rows_schemas_are_invalid( + tagged_table: TaggedTable, + rows: list[Row] | Table, + error_msg: str, +) -> None: + with pytest.raises(UnknownColumnNameError, match=error_msg): + tagged_table.add_rows(rows) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py index 9e8435885..519e570f8 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py @@ -169,7 +169,12 @@ def test_should_remove_columns(table: TaggedTable, columns: list[str], expected: r"Illegal schema modification: You cannot remove every feature column.", ), ], - ids=["remove_only_target", "remove_non_feat_and_target", "remove_all_features", "remove_non_feat_and_all_features"], + ids=[ + "remove_only_target", + "remove_non_feat_and_target", + "remove_all_features", + "remove_non_feat_and_all_features", + ], ) def test_should_raise_in_remove_columns( table: TaggedTable, diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py index 051c7fb90..75ee76e34 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py @@ -76,7 +76,7 @@ ], ids=["rename_feature_column", "rename_target_column", "rename_non_feature_column"], ) -def test_should_add_column( +def test_should_rename_column( original_table: TaggedTable, old_column_name: str, new_column_name: str, diff --git a/tests/safeds/data/tabular/containers/_table/test_add_row.py b/tests/safeds/data/tabular/containers/_table/test_add_row.py index d64e47f41..ab5f81367 100644 --- a/tests/safeds/data/tabular/containers/_table/test_add_row.py +++ b/tests/safeds/data/tabular/containers/_table/test_add_row.py @@ -62,8 +62,13 @@ def test_should_add_row(table: Table, row: Row, expected: Table, expected_schema Row({"col1": 5, "col3": "Hallo"}), r"Could not find column\(s\) 'col2'", ), + ( + Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), + Row({"col3": 5, "col4": "Hallo"}), + r"Could not find column\(s\) 'col1, col2'", + ), ], - ids=["unknown column col2 in row"], + ids=["unknown column col2 in row", "multiple columns missing"], ) def test_should_raise_error_if_row_column_names_invalid(table: Table, row: Row, expected_error_msg: str) -> None: with raises(UnknownColumnNameError, match=expected_error_msg): diff --git a/tests/safeds/data/tabular/containers/_table/test_add_rows.py b/tests/safeds/data/tabular/containers/_table/test_add_rows.py index b5cd8742a..76b1f345f 100644 --- a/tests/safeds/data/tabular/containers/_table/test_add_rows.py +++ b/tests/safeds/data/tabular/containers/_table/test_add_rows.py @@ -73,10 +73,15 @@ def test_should_add_rows_from_table(table1: Table, table2: Table, expected: Tabl ( Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), [Row({"col1": 2, "col3": 4}), Row({"col1": 5, "col2": "Hallo"})], - r"Could not find column\(s\) 'col3'", + r"Could not find column\(s\) 'col2'", + ), + ( + Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), + [Row({"col1": 2, "col3": 4}), Row({"colA": 5, "col2": "Hallo"})], + r"Could not find column\(s\) 'col1, col2'", ), ], - ids=["column names do not match"], + ids=["column names do not match", "multiple columns missing"], ) def test_should_raise_error_if_row_column_names_invalid(table: Table, rows: list[Row], expected_error_msg: str) -> None: with pytest.raises(UnknownColumnNameError, match=expected_error_msg): diff --git a/tests/safeds/data/tabular/containers/_table/test_rename.py b/tests/safeds/data/tabular/containers/_table/test_rename_column.py similarity index 100% rename from tests/safeds/data/tabular/containers/_table/test_rename.py rename to tests/safeds/data/tabular/containers/_table/test_rename_column.py