From 0a5c611ab245a83b615628582ef3821c12b6cf5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Mon, 29 May 2023 13:13:42 +0200 Subject: [PATCH 001/149] Add exception classes --- src/safeds/exceptions/__init__.py | 4 ++++ src/safeds/exceptions/_data.py | 14 ++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py index 48af59135..c1a527f61 100644 --- a/src/safeds/exceptions/__init__.py +++ b/src/safeds/exceptions/__init__.py @@ -11,6 +11,8 @@ UnknownColumnNameError, ValueNotPresentWhenFittedError, WrongFileExtensionError, + IllegalSchemaModificationError, + ColumnIsTaggedError, ) from safeds.exceptions._ml import ( DatasetContainsTargetError, @@ -33,6 +35,8 @@ "UnknownColumnNameError", "ValueNotPresentWhenFittedError", "WrongFileExtensionError", + "IllegalSchemaModificationError", + "ColumnIsTaggedError", # ML exceptions "DatasetContainsTargetError", "DatasetMissesFeaturesError", diff --git a/src/safeds/exceptions/_data.py b/src/safeds/exceptions/_data.py index 26e402c1c..0d6e31b49 100644 --- a/src/safeds/exceptions/_data.py +++ b/src/safeds/exceptions/_data.py @@ -112,3 +112,17 @@ def __init__(self, file: str | Path, file_extension: str | list[str]) -> None: f" {file_extension}" ), ) + + +class IllegalSchemaModificationError(Exception): + """Exception raised when modifying the schema in a way that is not consistent with the subclass's requirements.""" + + def __init__(self, msg: str) -> None: + super().__init__(f"Illegal schema modification: {msg}") + + +class ColumnIsTaggedError(IllegalSchemaModificationError): + """Exception raised in overriden methods of the Table class when removing tagged Columns from a TaggedTable.""" + + def __init__(self, column_name: str) -> None: + super().__init__(f'Column "{column_name}" is tagged and cannot be removed.') From 615d10a8109e6a64fdb1100ac1234acb80cd59be Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Mon, 29 May 2023 11:17:04 +0000 Subject: [PATCH 002/149] style: apply automated linter fixes --- src/safeds/exceptions/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py index c1a527f61..da4c790ed 100644 --- a/src/safeds/exceptions/__init__.py +++ b/src/safeds/exceptions/__init__.py @@ -1,9 +1,11 @@ """Custom exceptions that can be raised by Safe-DS.""" from safeds.exceptions._data import ( + ColumnIsTaggedError, ColumnLengthMismatchError, ColumnSizeError, DuplicateColumnNameError, + IllegalSchemaModificationError, IndexOutOfBoundsError, NonNumericColumnError, SchemaMismatchError, @@ -11,8 +13,6 @@ UnknownColumnNameError, ValueNotPresentWhenFittedError, WrongFileExtensionError, - IllegalSchemaModificationError, - ColumnIsTaggedError, ) from safeds.exceptions._ml import ( DatasetContainsTargetError, From 2ab4c9359b88c4ed9a64c4444c1286b98b31d80f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Mon, 29 May 2023 13:28:40 +0200 Subject: [PATCH 003/149] Add implementation for add_column --- .../data/tabular/containers/_tagged_table.py | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index d06fa154b..df2ba0cc0 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -164,3 +164,29 @@ def features(self) -> Table: @property def target(self) -> Column: return self._target + + # ------------------------------------------------------------------------------------------------------------------ + # Overriden methods from Table class: + # ------------------------------------------------------------------------------------------------------------------ + + def add_column(self, column: Column) -> TaggedTable: + """ + Return the original table with the provided column attached at the end, as a feature column. + + This table is not modified. + + Returns + ------- + result : Table + The table with the column attached as a feature column. + + Raises + ------ + DuplicateColumnNameError + If the new column already exists. + + ColumnSizeError + If the size of the column does not match the amount of rows. + + """ + return TaggedTable._from_table(super().add_column(column), target_name=self._target.name, feature_names=None) From 1183c3ee8488f2a87201a687d66a985d335c6711 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Mon, 29 May 2023 13:43:47 +0200 Subject: [PATCH 004/149] Add test for overriden add_columns method --- .../_table/_tagged_table/test_add_column.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py new file mode 100644 index 000000000..eb0985101 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py @@ -0,0 +1,27 @@ +import pytest + +from safeds.data.tabular.containers import TaggedTable, Column + + +def test_should_add_column(): + table = TaggedTable( + { + "feature_1": [0, 1, 2], + "target": [3, 4, 5], + }, + "target", + None, + ) + col = Column("feature_2", [6, 7, 8]) + new_table = table.add_column(col) + expected = TaggedTable( + { + "feature_1": [0, 1, 2], + "target": [3, 4, 5], + "feature_2": [6, 7, 8], + }, + "target", + None, + ) + assert new_table.schema == expected.schema + assert new_table == expected From 09110cace5ac32b3fe60443576b780039d1d9a75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Mon, 29 May 2023 13:47:11 +0200 Subject: [PATCH 005/149] Add __init.py__ to _tagged_table test directory --- .../data/tabular/containers/_table/_tagged_table/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/__init__.py diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/__init__.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/__init__.py new file mode 100644 index 000000000..e69de29bb From ce347fec2e210d4b1edb5dfc491f03ba5971ebd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Mon, 29 May 2023 13:51:32 +0200 Subject: [PATCH 006/149] Add return type annotations --- .../tabular/containers/_table/_tagged_table/test_add_column.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py index eb0985101..615f6e40f 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py @@ -3,7 +3,7 @@ from safeds.data.tabular.containers import TaggedTable, Column -def test_should_add_column(): +def test_should_add_column() -> None: table = TaggedTable( { "feature_1": [0, 1, 2], From 24c6aef4e90e31a5597c78f25544fec3625cbcbb Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Mon, 29 May 2023 11:53:42 +0000 Subject: [PATCH 007/149] style: apply automated linter fixes --- .../tabular/containers/_table/_tagged_table/test_add_column.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py index 615f6e40f..47cc1ad5c 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py @@ -1,6 +1,5 @@ -import pytest -from safeds.data.tabular.containers import TaggedTable, Column +from safeds.data.tabular.containers import Column, TaggedTable def test_should_add_column() -> None: From dc6de5390b92dda6a95bde51c8f2b5a3579f1d5b Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Mon, 29 May 2023 11:55:30 +0000 Subject: [PATCH 008/149] style: apply automated linter fixes --- .../tabular/containers/_table/_tagged_table/test_add_column.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py index 47cc1ad5c..a32ae60f1 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py @@ -1,4 +1,3 @@ - from safeds.data.tabular.containers import Column, TaggedTable From fd5e6ab6ecebcc54bdd24114f80f326fb62accdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Mon, 29 May 2023 14:04:03 +0200 Subject: [PATCH 009/149] Add specific tests for features and target --- .../tabular/containers/_table/_tagged_table/test_add_column.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py index 47cc1ad5c..6b2daf6df 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py @@ -23,4 +23,6 @@ def test_should_add_column() -> None: None, ) assert new_table.schema == expected.schema + assert new_table.features == expected.features + assert new_table.target == expected.target assert new_table == expected From 5370f9fe79a726491285cb2830a8b6a1a46dff16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Mon, 29 May 2023 14:22:50 +0200 Subject: [PATCH 010/149] Add add_columns implementation --- .../data/tabular/containers/_tagged_table.py | 27 +++++++++++++++- .../_table/_tagged_table/test_add_columns.py | 31 +++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index df2ba0cc0..6e0b0672e 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -177,7 +177,7 @@ def add_column(self, column: Column) -> TaggedTable: Returns ------- - result : Table + result : TaggedTable The table with the column attached as a feature column. Raises @@ -190,3 +190,28 @@ def add_column(self, column: Column) -> TaggedTable: """ return TaggedTable._from_table(super().add_column(column), target_name=self._target.name, feature_names=None) + + def add_columns(self, columns: list[Column] | Table) -> TaggedTable: + """ + Add multiple columns to the table, as feature columns. + + This table is not modified. + + Parameters + ---------- + columns : list[Column] or Table + The columns to be added. + + Returns + ------- + result: TaggedTable + A new table combining the original table and the given columns as feature columns. + + Raises + ------ + ColumnSizeError + If at least one of the column sizes from the provided column list does not match the table. + DuplicateColumnNameError + If at least one column name from the provided column list already exists in the table. + """ + return TaggedTable._from_table(super().add_columns(columns), target_name=self._target.name, feature_names=None) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py new file mode 100644 index 000000000..f77efa6b3 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py @@ -0,0 +1,31 @@ +from safeds.data.tabular.containers import Column, TaggedTable + + +def test_should_add_columns() -> None: + table = TaggedTable( + { + "feature_1": [0, 1, 2], + "target": [3, 4, 5], + }, + "target", + None, + ) + cols = [ + Column("feature_2", [6, 7, 8]), + Column("feature_3", [9, 6, 3]), + ] + new_table = table.add_columns(cols) + expected = TaggedTable( + { + "feature_1": [0, 1, 2], + "target": [3, 4, 5], + "feature_2": [6, 7, 8], + "feature_3": [9, 6, 3], + }, + "target", + None, + ) + assert new_table.schema == expected.schema + assert new_table.features == expected.features + assert new_table.target == expected.target + assert new_table == expected From 6b2846b6fe50ed5f5a7c595f409406a26369f356 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Mon, 29 May 2023 14:59:17 +0200 Subject: [PATCH 011/149] Use public getter to access target column --- src/safeds/data/tabular/containers/_tagged_table.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 6e0b0672e..bb1eab605 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -189,7 +189,7 @@ def add_column(self, column: Column) -> TaggedTable: If the size of the column does not match the amount of rows. """ - return TaggedTable._from_table(super().add_column(column), target_name=self._target.name, feature_names=None) + return TaggedTable._from_table(super().add_column(column), target_name=self.target.name, feature_names=None) def add_columns(self, columns: list[Column] | Table) -> TaggedTable: """ @@ -214,4 +214,4 @@ def add_columns(self, columns: list[Column] | Table) -> TaggedTable: DuplicateColumnNameError If at least one column name from the provided column list already exists in the table. """ - return TaggedTable._from_table(super().add_columns(columns), target_name=self._target.name, feature_names=None) + return TaggedTable._from_table(super().add_columns(columns), target_name=self.target.name, feature_names=None) From 46aa81a969f37b0b2abb7393924cd6df4ca06745 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Mon, 29 May 2023 15:13:33 +0200 Subject: [PATCH 012/149] Add more implementations --- .../data/tabular/containers/_tagged_table.py | 92 ++++++++++++++++++- 1 file changed, 90 insertions(+), 2 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index bb1eab605..828db55fa 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -2,10 +2,10 @@ from typing import TYPE_CHECKING -from safeds.data.tabular.containers import Column, Table +from safeds.data.tabular.containers import Column, Table, Row if TYPE_CHECKING: - from collections.abc import Mapping, Sequence + from collections.abc import Callable, Mapping, Sequence from typing import Any @@ -215,3 +215,91 @@ def add_columns(self, columns: list[Column] | Table) -> TaggedTable: If at least one column name from the provided column list already exists in the table. """ return TaggedTable._from_table(super().add_columns(columns), target_name=self.target.name, feature_names=None) + + def add_row(self, row: Row) -> TaggedTable: + """ + Add a row to the table. + + This table is not modified. + + Parameters + ---------- + row : Row + The row to be added. + + Returns + ------- + table : TaggedTable + A new table with the added row at the end. + + Raises + ------ + SchemaMismatchError + If the schema of the row does not match the table schema. + """ + return TaggedTable._from_table(super().add_row(row), target_name=self.target.name, feature_names=None) + + def add_rows(self, rows: list[Row] | Table) -> TaggedTable: + """ + Add multiple rows to a table. + + This table is not modified. + + Parameters + ---------- + rows : list[Row] or Table + The rows to be added. + + Returns + ------- + result : TaggedTable + A new table which combines the original table and the given rows. + + Raises + ------ + SchemaMismatchError + If the schema of on of the row does not match the table schema. + """ + return TaggedTable._from_table(super().add_rows(rows), target_name=self.target.name, feature_names=None) + + def filter_rows(self, query: Callable[[Row], bool]) -> TaggedTable: + """ + Return a table with rows filtered by Callable (e.g. lambda function). + + This table is not modified. + + Parameters + ---------- + query : lambda function + A Callable that is applied to all rows. + + Returns + ------- + table : TaggedTable + A table containing only the rows filtered by the query. + """ + return TaggedTable._from_table(super().filter_rows(query), target_name=self.target.name, feature_names=None) + + def keep_only_columns(self, column_names: list[str]) -> Table: + """ + Return a table with only the given column(s). + + This table is not modified. + + Parameters + ---------- + column_names : list[str] + A list containing only the columns to be kept. + + Returns + ------- + table : Table + A table containing only the given column(s). + + Raises + ------ + UnknownColumnNameError + If any of the given columns does not exist. + """ + # TODO: Change return type to TaggedTable, throw exception if appropriate, fix pytest errors + return super().keep_only_columns(column_names) From f9f302c61b6a6c7623fe0f93df9b5d91e560e2da Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Mon, 29 May 2023 13:15:20 +0000 Subject: [PATCH 013/149] style: apply automated linter fixes --- src/safeds/data/tabular/containers/_tagged_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 828db55fa..9781f7757 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING -from safeds.data.tabular.containers import Column, Table, Row +from safeds.data.tabular.containers import Column, Row, Table if TYPE_CHECKING: from collections.abc import Callable, Mapping, Sequence From 18d37fb5172aed58942023d623054891ccabea90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Mon, 29 May 2023 15:44:22 +0200 Subject: [PATCH 014/149] Start keep_only_columns implementation --- src/safeds/data/tabular/containers/_tagged_table.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 9781f7757..b57f94bd4 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING from safeds.data.tabular.containers import Column, Row, Table +from safeds.exceptions import IllegalSchemaModificationError if TYPE_CHECKING: from collections.abc import Callable, Mapping, Sequence @@ -300,6 +301,11 @@ def keep_only_columns(self, column_names: list[str]) -> Table: ------ UnknownColumnNameError If any of the given columns does not exist. + IllegalSchemaModificationError + If none of the given columns is the target column. """ - # TODO: Change return type to TaggedTable, throw exception if appropriate, fix pytest errors + # TODO: Change return type to TaggedTable (2x in docstring, 1x in function definition), + # throw exception if appropriate, investigate and fix pytest errors + # if self.target.name not in column_names: + # raise IllegalSchemaModificationError(f'Must keep target column "{self.target.name}".') return super().keep_only_columns(column_names) From 30b85b60491cca8fcb4aa53b5885a096b6c2ae31 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Mon, 29 May 2023 13:46:35 +0000 Subject: [PATCH 015/149] style: apply automated linter fixes --- src/safeds/data/tabular/containers/_tagged_table.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index b57f94bd4..06d966d53 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -3,7 +3,6 @@ from typing import TYPE_CHECKING from safeds.data.tabular.containers import Column, Row, Table -from safeds.exceptions import IllegalSchemaModificationError if TYPE_CHECKING: from collections.abc import Callable, Mapping, Sequence @@ -307,5 +306,5 @@ def keep_only_columns(self, column_names: list[str]) -> Table: # TODO: Change return type to TaggedTable (2x in docstring, 1x in function definition), # throw exception if appropriate, investigate and fix pytest errors # if self.target.name not in column_names: - # raise IllegalSchemaModificationError(f'Must keep target column "{self.target.name}".') + # raise IllegalSchemaModificationError(f'Must keep target column "{self.target.name}".') return super().keep_only_columns(column_names) From 6a637a7c673f0eff276736a1e8d81355bf32f3ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Mon, 29 May 2023 16:00:52 +0200 Subject: [PATCH 016/149] Start remove_columns implementation --- .../data/tabular/containers/_tagged_table.py | 36 ++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 06d966d53..ec9204ad9 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -304,7 +304,41 @@ def keep_only_columns(self, column_names: list[str]) -> Table: If none of the given columns is the target column. """ # TODO: Change return type to TaggedTable (2x in docstring, 1x in function definition), - # throw exception if appropriate, investigate and fix pytest errors + # re-build TaggedTable before returning, + # throw exception if appropriate, + # investigate and fix pytest errors # if self.target.name not in column_names: # raise IllegalSchemaModificationError(f'Must keep target column "{self.target.name}".') return super().keep_only_columns(column_names) + + def remove_columns(self, column_names: list[str]) -> Table: + """ + Return a table without the given column(s). + + This table is not modified. + + Parameters + ---------- + column_names : list[str] + A list containing all columns to be dropped. + + Returns + ------- + table : Table + A table without the given columns. + + Raises + ------ + UnknownColumnNameError + If any of the given columns does not exist. + ColumnIsTaggedError + If any of the given columns is the target column. + """ + # TODO: Change return type to TaggedTable (2x in docstring, 1x in function definition), + # re-build TaggedTable before returning, + # throw exception if appropriate, + # investigate and fix pytest errors + if self.target.name in column_names: + # raise ColumnIsTaggedError({self.target.name}) + pass + return super().remove_columns(column_names) From 4f0fc8008602f68dfefe049b7c4a2605c7f1534c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Mon, 29 May 2023 16:10:41 +0200 Subject: [PATCH 017/149] Override remove_columns_with_missing_values --- .../data/tabular/containers/_tagged_table.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index ec9204ad9..033b08316 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING from safeds.data.tabular.containers import Column, Row, Table +from safeds.exceptions import ColumnIsTaggedError if TYPE_CHECKING: from collections.abc import Callable, Mapping, Sequence @@ -342,3 +343,26 @@ def remove_columns(self, column_names: list[str]) -> Table: # raise ColumnIsTaggedError({self.target.name}) pass return super().remove_columns(column_names) + + def remove_columns_with_missing_values(self) -> TaggedTable: + """ + Return a table without the columns that contain missing values. + + This table is not modified. + + Returns + ------- + table : TaggedTable + A table without the columns that contain missing values. + + Raises + ------ + ColumnIsTaggedError + If any of the columns to be removed is the target column. + """ + table = super().remove_columns_with_missing_values() + try: + tagged = TaggedTable._from_table(table, self.target.name, None) + except ValueError: # TODO: Check if this is actually the error that would e raised + raise ColumnIsTaggedError(self.target.name) from None + return tagged From c02a34ae1d6325b131cdb0c08db993cf5c9041fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Mon, 29 May 2023 16:14:25 +0200 Subject: [PATCH 018/149] Fix typo in todo comment --- src/safeds/data/tabular/containers/_tagged_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 033b08316..2ae371b2f 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -363,6 +363,6 @@ def remove_columns_with_missing_values(self) -> TaggedTable: table = super().remove_columns_with_missing_values() try: tagged = TaggedTable._from_table(table, self.target.name, None) - except ValueError: # TODO: Check if this is actually the error that would e raised + except ValueError: # TODO: Check if this is actually the error that would be raised raise ColumnIsTaggedError(self.target.name) from None return tagged From ab44cf80cbf387f23c33d107c8e0c9d53a688300 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Mon, 29 May 2023 17:01:14 +0200 Subject: [PATCH 019/149] Catch correct exception --- src/safeds/data/tabular/containers/_tagged_table.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 2ae371b2f..900dfb810 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -3,7 +3,7 @@ from typing import TYPE_CHECKING from safeds.data.tabular.containers import Column, Row, Table -from safeds.exceptions import ColumnIsTaggedError +from safeds.exceptions import ColumnIsTaggedError, UnknownColumnNameError if TYPE_CHECKING: from collections.abc import Callable, Mapping, Sequence @@ -363,6 +363,6 @@ def remove_columns_with_missing_values(self) -> TaggedTable: table = super().remove_columns_with_missing_values() try: tagged = TaggedTable._from_table(table, self.target.name, None) - except ValueError: # TODO: Check if this is actually the error that would be raised + except UnknownColumnNameError: raise ColumnIsTaggedError(self.target.name) from None return tagged From e6621f3740966ef740f63cf3bb845834ceba9a12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Mon, 29 May 2023 17:19:26 +0200 Subject: [PATCH 020/149] Add tests --- .../_table/_tagged_table/test_add_row.py | 29 +++++++++++++++ .../_table/_tagged_table/test_add_rows.py | 35 +++++++++++++++++++ ...test_remove_columns_with_missing_values.py | 26 ++++++++++++++ 3 files changed, 90 insertions(+) create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py new file mode 100644 index 000000000..3ef4052d4 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py @@ -0,0 +1,29 @@ +from safeds.data.tabular.containers import TaggedTable, Row + + +def test_should_add_row() -> None: + table = TaggedTable( + { + "feature": [0, 1], + "target": [3, 4], + }, + "target", + None, + ) + row = Row({ + "feature": 2, + "target": 5, + }) + new_table = table.add_row(row) + expected = TaggedTable( + { + "feature": [0, 1, 2], + "target": [3, 4, 5], + }, + "target", + None, + ) + assert new_table.schema == expected.schema + assert new_table.features == expected.features + assert new_table.target == expected.target + assert new_table == expected diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py new file mode 100644 index 000000000..994c93ba3 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py @@ -0,0 +1,35 @@ +from safeds.data.tabular.containers import TaggedTable, Row + + +def test_should_add_rows() -> None: + table = TaggedTable( + { + "feature": [0, 1], + "target": [4, 5], + }, + "target", + None, + ) + rows = [ + Row({ + "feature": 2, + "target": 6, + }), + Row({ + "feature": 3, + "target": 7 + }) + ] + new_table = table.add_rows(rows) + expected = TaggedTable( + { + "feature": [0, 1, 2, 3], + "target": [4, 5, 6, 7], + }, + "target", + None, + ) + assert new_table.schema == expected.schema + assert new_table.features == expected.features + assert new_table.target == expected.target + assert new_table == expected diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py new file mode 100644 index 000000000..8fd27089a --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py @@ -0,0 +1,26 @@ +from safeds.data.tabular.containers import TaggedTable, Column + + +def test_should_remove_column() -> None: + table = TaggedTable( + { + "feature_complete": [0, 1, 2], + "feature_incomplete": [3, None, 5], + "target": [6, 7, 8], + }, + "target", + None, + ) + new_table = table.remove_columns_with_missing_values() + expected = TaggedTable( + { + "feature_complete": [0, 1, 2], + "target": [6, 7, 8], + }, + "target", + None, + ) + assert new_table.schema == expected.schema + assert new_table.features == expected.features + assert new_table.target == expected.target + assert new_table == expected From 973f90741cbc799d951da897ccaec6dec012c401 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Mon, 29 May 2023 15:21:11 +0000 Subject: [PATCH 021/149] style: apply automated linter fixes --- .../_table/_tagged_table/test_add_row.py | 12 +++++++----- .../_table/_tagged_table/test_add_rows.py | 17 ++++++++--------- .../test_remove_columns_with_missing_values.py | 2 +- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py index 3ef4052d4..66e3e7feb 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py @@ -1,4 +1,4 @@ -from safeds.data.tabular.containers import TaggedTable, Row +from safeds.data.tabular.containers import Row, TaggedTable def test_should_add_row() -> None: @@ -10,10 +10,12 @@ def test_should_add_row() -> None: "target", None, ) - row = Row({ - "feature": 2, - "target": 5, - }) + row = Row( + { + "feature": 2, + "target": 5, + }, + ) new_table = table.add_row(row) expected = TaggedTable( { diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py index 994c93ba3..834df50ce 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py @@ -1,4 +1,4 @@ -from safeds.data.tabular.containers import TaggedTable, Row +from safeds.data.tabular.containers import Row, TaggedTable def test_should_add_rows() -> None: @@ -11,14 +11,13 @@ def test_should_add_rows() -> None: None, ) rows = [ - Row({ - "feature": 2, - "target": 6, - }), - Row({ - "feature": 3, - "target": 7 - }) + Row( + { + "feature": 2, + "target": 6, + }, + ), + Row({"feature": 3, "target": 7}), ] new_table = table.add_rows(rows) expected = TaggedTable( diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py index 8fd27089a..37e676012 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py @@ -1,4 +1,4 @@ -from safeds.data.tabular.containers import TaggedTable, Column +from safeds.data.tabular.containers import TaggedTable def test_should_remove_column() -> None: From c8ecbefe6bcec7d0b4d794c2b6ce9b53114896df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Mon, 29 May 2023 17:34:58 +0200 Subject: [PATCH 022/149] Add test for ColumnIsTaggedError --- .../test_remove_columns_with_missing_values.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py index 37e676012..cf4070def 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py @@ -1,4 +1,6 @@ +import pytest from safeds.data.tabular.containers import TaggedTable +from safeds.exceptions import ColumnIsTaggedError def test_should_remove_column() -> None: @@ -24,3 +26,17 @@ def test_should_remove_column() -> None: assert new_table.features == expected.features assert new_table.target == expected.target assert new_table == expected + + +def test_should_throw_column_is_tagged() -> None: + table = TaggedTable( + { + "feature": [0, 1, 2], + "target": [3, None, 5], + }, + "target", + None, + ) + with pytest.raises(ColumnIsTaggedError, match='Illegal schema modification: Column "target" is tagged and cannot ' + 'be removed.'): + table.remove_columns_with_missing_values() From 9f9e03a302f731db7c82ad7cf551caede702a0b7 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Mon, 29 May 2023 15:36:44 +0000 Subject: [PATCH 023/149] style: apply automated linter fixes --- .../_tagged_table/test_remove_columns_with_missing_values.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py index cf4070def..07b211397 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py @@ -37,6 +37,7 @@ def test_should_throw_column_is_tagged() -> None: "target", None, ) - with pytest.raises(ColumnIsTaggedError, match='Illegal schema modification: Column "target" is tagged and cannot ' - 'be removed.'): + with pytest.raises( + ColumnIsTaggedError, match='Illegal schema modification: Column "target" is tagged and cannot be removed.', + ): table.remove_columns_with_missing_values() From 348ab7218974df3c15619ff9723f3ea7f0d3206d Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Mon, 29 May 2023 15:38:16 +0000 Subject: [PATCH 024/149] style: apply automated linter fixes --- .../_tagged_table/test_remove_columns_with_missing_values.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py index 07b211397..2baa52a6e 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py @@ -38,6 +38,7 @@ def test_should_throw_column_is_tagged() -> None: None, ) with pytest.raises( - ColumnIsTaggedError, match='Illegal schema modification: Column "target" is tagged and cannot be removed.', + ColumnIsTaggedError, + match='Illegal schema modification: Column "target" is tagged and cannot be removed.', ): table.remove_columns_with_missing_values() From f111adf8846dedbb9c5f622bf09ee1c261cdc9a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Mon, 29 May 2023 18:01:17 +0200 Subject: [PATCH 025/149] Use try-catch for remove_columns --- src/safeds/data/tabular/containers/_tagged_table.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 900dfb810..be53b6e01 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -313,6 +313,7 @@ def keep_only_columns(self, column_names: list[str]) -> Table: return super().keep_only_columns(column_names) def remove_columns(self, column_names: list[str]) -> Table: + # TODO: Change return type to TaggedTable (in function definition and in docstring). """ Return a table without the given column(s). @@ -335,14 +336,12 @@ def remove_columns(self, column_names: list[str]) -> Table: ColumnIsTaggedError If any of the given columns is the target column. """ - # TODO: Change return type to TaggedTable (2x in docstring, 1x in function definition), - # re-build TaggedTable before returning, - # throw exception if appropriate, - # investigate and fix pytest errors - if self.target.name in column_names: + try: + return TaggedTable._from_table(super().remove_columns(column_names), self.target.name, None) + except UnknownColumnNameError: + # TODO: Don't return; throw exception and handle it correctly in tests. # raise ColumnIsTaggedError({self.target.name}) - pass - return super().remove_columns(column_names) + return super().remove_columns(column_names) def remove_columns_with_missing_values(self) -> TaggedTable: """ From eae61a922e582136fa4a83cf7826d9293fdc03ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Mon, 29 May 2023 18:22:35 +0200 Subject: [PATCH 026/149] Implement remove_columns_with_non_numerical_values Also added tests. --- .../data/tabular/containers/_tagged_table.py | 32 ++++++++++++-- ...emove_columns_with_non_numerical_values.py | 44 +++++++++++++++++++ 2 files changed, 72 insertions(+), 4 deletions(-) create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index be53b6e01..4e6a8ee24 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -282,6 +282,7 @@ def filter_rows(self, query: Callable[[Row], bool]) -> TaggedTable: return TaggedTable._from_table(super().filter_rows(query), target_name=self.target.name, feature_names=None) def keep_only_columns(self, column_names: list[str]) -> Table: + # TODO: Change return type to TaggedTable (in function definition and in docstring). """ Return a table with only the given column(s). @@ -304,10 +305,10 @@ def keep_only_columns(self, column_names: list[str]) -> Table: IllegalSchemaModificationError If none of the given columns is the target column. """ - # TODO: Change return type to TaggedTable (2x in docstring, 1x in function definition), - # re-build TaggedTable before returning, + # TODO: + # Re-build TaggedTable before returning, # throw exception if appropriate, - # investigate and fix pytest errors + # investigate and fix pytest errors. # if self.target.name not in column_names: # raise IllegalSchemaModificationError(f'Must keep target column "{self.target.name}".') return super().keep_only_columns(column_names) @@ -340,7 +341,7 @@ def remove_columns(self, column_names: list[str]) -> Table: return TaggedTable._from_table(super().remove_columns(column_names), self.target.name, None) except UnknownColumnNameError: # TODO: Don't return; throw exception and handle it correctly in tests. - # raise ColumnIsTaggedError({self.target.name}) + # raise ColumnIsTaggedError({self.target.name}) from None return super().remove_columns(column_names) def remove_columns_with_missing_values(self) -> TaggedTable: @@ -365,3 +366,26 @@ def remove_columns_with_missing_values(self) -> TaggedTable: except UnknownColumnNameError: raise ColumnIsTaggedError(self.target.name) from None return tagged + + def remove_columns_with_non_numerical_values(self) -> TaggedTable: + """ + Return a table without the columns that contain non-numerical values. + + This table is not modified. + + Returns + ------- + table : TaggedTable + A table without the columns that contain non-numerical values. + + Raises + ------ + ColumnIsTaggedError + If any of the columns to be removed is the target column. + """ + table = super().remove_columns_with_non_numerical_values() + try: + tagged = TaggedTable._from_table(table, self.target.name, None) + except UnknownColumnNameError: + raise ColumnIsTaggedError(self.target.name) from None + return tagged diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py new file mode 100644 index 000000000..61e3ecd3f --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py @@ -0,0 +1,44 @@ +import pytest +from safeds.data.tabular.containers import TaggedTable +from safeds.exceptions import ColumnIsTaggedError + + +def test_should_remove_column() -> None: + table = TaggedTable( + { + "feature_numerical": [0, 1, 2], + "feature_non_numerical": ["a", "b", "c"], + "target": [3, 4, 5], + }, + "target", + None, + ) + new_table = table.remove_columns_with_non_numerical_values() + expected = TaggedTable( + { + "feature_numerical": [0, 1, 2], + "target": [3, 4, 5], + }, + "target", + None, + ) + assert new_table.schema == expected.schema + assert new_table.features == expected.features + assert new_table.target == expected.target + assert new_table == expected + + +def test_should_throw_column_is_tagged() -> None: + table = TaggedTable( + { + "feature": [0, 1, 2], + "target": ["a", "b", "c"], + }, + "target", + None, + ) + with pytest.raises( + ColumnIsTaggedError, + match='Illegal schema modification: Column "target" is tagged and cannot be removed.', + ): + table.remove_columns_with_non_numerical_values() From 2a0fac8fa71598f920fc5902e00ddf0f42d677b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Mon, 29 May 2023 18:47:50 +0200 Subject: [PATCH 027/149] Implement more row removing functions Also added tests. --- .../data/tabular/containers/_tagged_table.py | 43 +++++++++++++++++++ .../test_remove_duplicate_rows.py | 25 +++++++++++ .../test_remove_rows_with_missing_values.py | 25 +++++++++++ .../test_remove_rows_with_outliers.py | 25 +++++++++++ 4 files changed, 118 insertions(+) create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_duplicate_rows.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_missing_values.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 4e6a8ee24..a2d9fbe8a 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -389,3 +389,46 @@ def remove_columns_with_non_numerical_values(self) -> TaggedTable: except UnknownColumnNameError: raise ColumnIsTaggedError(self.target.name) from None return tagged + + def remove_duplicate_rows(self) -> TaggedTable: + """ + Return a copy of the table with every duplicate row removed. + + This table is not modified. + + Returns + ------- + result : TaggedTable + The table with the duplicate rows removed. + """ + return TaggedTable._from_table(super().remove_duplicate_rows(), self.target.name, None) + + def remove_rows_with_missing_values(self) -> TaggedTable: + """ + Return a table without the rows that contain missing values. + + This table is not modified. + + Returns + ------- + table : TaggedTable + A table without the rows that contain missing values. + """ + return TaggedTable._from_table(super().remove_rows_with_missing_values(), self.target.name, None) + + def remove_rows_with_outliers(self) -> TaggedTable: + """ + Remove all rows from the table that contain at least one outlier. + + We define an outlier as a value that has a distance of more than 3 standard deviations from the column mean. + Missing values are not considered outliers. They are also ignored during the calculation of the standard + deviation. + + This table is not modified. + + Returns + ------- + new_table : TaggedTable + A new table without rows containing outliers. + """ + return TaggedTable._from_table(super().remove_rows_with_outliers(), self.target.name, None) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_duplicate_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_duplicate_rows.py new file mode 100644 index 000000000..b1b0da57c --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_duplicate_rows.py @@ -0,0 +1,25 @@ +from safeds.data.tabular.containers import TaggedTable + + +def test_should_remove_row() -> None: + table = TaggedTable( + { + "feature": [0, 0, 1], + "target": [2, 2, 3], + }, + "target", + None, + ) + new_table = table.remove_duplicate_rows() + expected = TaggedTable( + { + "feature": [0, 1], + "target": [2, 3], + }, + "target", + None, + ) + assert new_table.schema == expected.schema + assert new_table.features == expected.features + assert new_table.target == expected.target + assert new_table == expected diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_missing_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_missing_values.py new file mode 100644 index 000000000..70156644b --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_missing_values.py @@ -0,0 +1,25 @@ +from safeds.data.tabular.containers import TaggedTable + + +def test_should_remove_row() -> None: + table = TaggedTable( + { + "feature": [0.0, None, 2.0], + "target": [3.0, 4.0, 5.0], + }, + "target", + None, + ) + new_table = table.remove_rows_with_missing_values() + expected = TaggedTable( + { + "feature": [0.0, 2.0], + "target": [3.0, 5.0], + }, + "target", + None, + ) + assert new_table.schema == expected.schema + assert new_table.features == expected.features + assert new_table.target == expected.target + assert new_table == expected diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py new file mode 100644 index 000000000..1c843d2b3 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py @@ -0,0 +1,25 @@ +from safeds.data.tabular.containers import TaggedTable + + +def test_should_remove_row() -> None: + table = TaggedTable( + { + "feature": [1.0, 11.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + "target": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + }, + "target", + None, + ) + new_table = table.remove_rows_with_outliers() + expected = TaggedTable( + { + "feature": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + "target": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + }, + "target", + None, + ) + assert new_table.schema == expected.schema + assert new_table.features == expected.features + assert new_table.target == expected.target + assert new_table == expected From f691f5b64706f45ad985616ab0a0a7c773dd5018 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Mon, 29 May 2023 19:09:19 +0200 Subject: [PATCH 028/149] Implement rename_column and add tests --- .../data/tabular/containers/_tagged_table.py | 28 +++++++++++++++++++ .../_tagged_table/test_rename_column.py | 25 +++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index a2d9fbe8a..16fd4342a 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -432,3 +432,31 @@ def remove_rows_with_outliers(self) -> TaggedTable: A new table without rows containing outliers. """ return TaggedTable._from_table(super().remove_rows_with_outliers(), self.target.name, None) + + def rename_column(self, old_name: str, new_name: str) -> TaggedTable: + """ + Rename a single column. + + This table is not modified. + + Parameters + ---------- + old_name : str + The old name of the target column + new_name : str + The new name of the target column + + Returns + ------- + table : TaggedTable + The Table with the renamed column. + + Raises + ------ + UnknownColumnNameError + If the specified old target column name does not exist. + DuplicateColumnNameError + If the specified new target column name already exists. + """ + return TaggedTable._from_table(super().rename_column(old_name, new_name), + new_name if self.target.name == old_name else self.target.name, None) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py new file mode 100644 index 000000000..311701b78 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py @@ -0,0 +1,25 @@ +from safeds.data.tabular.containers import TaggedTable + + +def test_should_add_column() -> None: + table = TaggedTable( + { + "feature_1": [0, 1, 2], + "target": [3, 4, 5], + }, + "target", + None, + ) + new_table = table.rename_column("feature_1", "feature_2") + expected = TaggedTable( + { + "feature_2": [0, 1, 2], + "target": [3, 4, 5], + }, + "target", + None, + ) + assert new_table.schema == expected.schema + assert new_table.features == expected.features + assert new_table.target == expected.target + assert new_table == expected From 20b00ded4e78dc72932e4bdad188184b7d703596 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Mon, 29 May 2023 17:11:25 +0000 Subject: [PATCH 029/149] style: apply automated linter fixes --- src/safeds/data/tabular/containers/_tagged_table.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 16fd4342a..e01f27859 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -458,5 +458,8 @@ def rename_column(self, old_name: str, new_name: str) -> TaggedTable: DuplicateColumnNameError If the specified new target column name already exists. """ - return TaggedTable._from_table(super().rename_column(old_name, new_name), - new_name if self.target.name == old_name else self.target.name, None) + return TaggedTable._from_table( + super().rename_column(old_name, new_name), + new_name if self.target.name == old_name else self.target.name, + None, + ) From e4f9b19b4735d64ac141ae4afb046d7804377d0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Mon, 29 May 2023 20:55:29 +0200 Subject: [PATCH 030/149] Add test for filter_rows --- .../_table/_tagged_table/test_filter_rows.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_filter_rows.py diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_filter_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_filter_rows.py new file mode 100644 index 000000000..7e8c714c7 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_filter_rows.py @@ -0,0 +1,27 @@ +from safeds.data.tabular.containers import TaggedTable + + +def test_should_remove_row() -> None: + table = TaggedTable( + { + "feature_1": [3, 9, 6], + "feature_2": [6, 12, 9], + "target": [1, 3, 2], + }, + "target", + None + ) + new_table = table.filter_rows(lambda row: all(row.get_value(col) < 10 for col in table.column_names)) + expected = TaggedTable( + { + "feature_1": [3, 6], + "feature_2": [6, 9], + "target": [1, 2], + }, + "target", + None + ) + assert new_table.schema == expected.schema + assert new_table.features == expected.features + assert new_table.target == expected.target + assert new_table == expected From 28474b8feb77c66cb7f1b1c96b97f052695441c7 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Mon, 29 May 2023 18:57:26 +0000 Subject: [PATCH 031/149] style: apply automated linter fixes --- .../containers/_table/_tagged_table/test_filter_rows.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_filter_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_filter_rows.py index 7e8c714c7..f3ef45b18 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_filter_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_filter_rows.py @@ -9,7 +9,7 @@ def test_should_remove_row() -> None: "target": [1, 3, 2], }, "target", - None + None, ) new_table = table.filter_rows(lambda row: all(row.get_value(col) < 10 for col in table.column_names)) expected = TaggedTable( @@ -19,7 +19,7 @@ def test_should_remove_row() -> None: "target": [1, 2], }, "target", - None + None, ) assert new_table.schema == expected.schema assert new_table.features == expected.features From d71b57213ef7056d8786d112bc3c19fa9a5f814a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Mon, 29 May 2023 23:12:20 +0200 Subject: [PATCH 032/149] Implement replace_column and parametrize tests --- .../data/tabular/containers/_tagged_table.py | 40 ++++++++++ .../_tagged_table/test_rename_column.py | 76 +++++++++++++------ .../_tagged_table/test_replace_column.py | 63 +++++++++++++++ 3 files changed, 157 insertions(+), 22 deletions(-) create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index e01f27859..9f292b9be 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -463,3 +463,43 @@ def rename_column(self, old_name: str, new_name: str) -> TaggedTable: new_name if self.target.name == old_name else self.target.name, None, ) + + def replace_column(self, old_column_name: str, new_column: Column) -> TaggedTable: + """ + Return a copy of the table with the specified old column replaced by a new column. + + The order of columns is kept. + + The column to be replaced may be the target column. + + This table is not modified. + + Parameters + ---------- + old_column_name : str + The name of the column to be replaced. + + new_column : Column + The new column replacing the old column. + + Returns + ------- + result : TaggedTable + A table with the old column replaced by the new column. + + Raises + ------ + UnknownColumnNameError + If the old column does not exist. + + DuplicateColumnNameError + If the new column already exists and the existing column is not affected by the replacement. + + ColumnSizeError + If the size of the column does not match the amount of rows. + """ + return TaggedTable._from_table( + super().replace_column(old_column_name, new_column), + new_column.name if self.target.name == old_column_name else self.target.name, + None, + ) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py index 311701b78..6689b0ea8 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py @@ -1,25 +1,57 @@ +import pytest from safeds.data.tabular.containers import TaggedTable -def test_should_add_column() -> None: - table = TaggedTable( - { - "feature_1": [0, 1, 2], - "target": [3, 4, 5], - }, - "target", - None, - ) - new_table = table.rename_column("feature_1", "feature_2") - expected = TaggedTable( - { - "feature_2": [0, 1, 2], - "target": [3, 4, 5], - }, - "target", - None, - ) - assert new_table.schema == expected.schema - assert new_table.features == expected.features - assert new_table.target == expected.target - assert new_table == expected +@pytest.mark.parametrize( + ("original_table", "old_column_name", "new_column_name", "result_table"), + [ + ( + TaggedTable( + { + "feature_old": [0, 1, 2], + "target": [3, 4, 5], + }, + "target", + None, + ), + "feature_old", + "feature_new", + TaggedTable( + { + "feature_new": [0, 1, 2], + "target": [3, 4, 5], + }, + "target", + None, + ) + ), + ( + TaggedTable( + { + "feature": [0, 1, 2], + "target_old": [3, 4, 5], + }, + "target_old", + None, + ), + "target_old", + "target_new", + TaggedTable( + { + "feature": [0, 1, 2], + "target_new": [3, 4, 5], + }, + "target_new", + None, + ) + ), + ], + ids=["rename_feature_column", "rename_target_column"], +) +def test_should_add_column(original_table: TaggedTable, old_column_name: str, new_column_name: str, + result_table: TaggedTable) -> None: + new_table = original_table.rename_column(old_column_name, new_column_name) + assert new_table.schema == result_table.schema + assert new_table.features == result_table.features + assert new_table.target == result_table.target + assert new_table == result_table diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py new file mode 100644 index 000000000..bcbd5d52a --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py @@ -0,0 +1,63 @@ +import pytest +from safeds.data.tabular.containers import Column, TaggedTable + + +@pytest.mark.parametrize( + ("original_table", "new_column", "column_name_to_be_replaced", "result_table"), + [ + ( + TaggedTable( + { + "feature_old": [0, 1, 2], + "target_old": [3, 4, 5], + }, + "target_old", + None, + ), + Column( + "feature_new", + [2, 1, 0] + ), + "feature_old", + TaggedTable( + { + "feature_new": [2, 1, 0], + "target_old": [3, 4, 5], + }, + "target_old", + None + ) + ), + ( + TaggedTable( + { + "feature_old": [0, 1, 2], + "target_old": [3, 4, 5], + }, + "target_old", + None, + ), + Column( + "target_new", + [2, 1, 0] + ), + "target_old", + TaggedTable( + { + "feature_old": [0, 1, 2], + "target_new": [2, 1, 0], + }, + "target_new", + None + ) + ), + ], + ids=["replace_feature_column", "replace_target_column"], +) +def test_should_replace_column(original_table: TaggedTable, new_column: Column, column_name_to_be_replaced: str, + result_table: TaggedTable) -> None: + new_table = original_table.replace_column(column_name_to_be_replaced, new_column) + assert new_table.schema == result_table.schema + assert new_table.features == result_table.features + assert new_table.target == result_table.target + assert new_table == result_table From ce68cc9eace741fc9bbe74540ff49f08c6a2bfe5 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Mon, 29 May 2023 21:14:24 +0000 Subject: [PATCH 033/149] style: apply automated linter fixes --- .../_tagged_table/test_rename_column.py | 9 ++++---- .../_tagged_table/test_replace_column.py | 23 ++++++++----------- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py index 6689b0ea8..16cd8e882 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py @@ -23,7 +23,7 @@ }, "target", None, - ) + ), ), ( TaggedTable( @@ -43,13 +43,14 @@ }, "target_new", None, - ) + ), ), ], ids=["rename_feature_column", "rename_target_column"], ) -def test_should_add_column(original_table: TaggedTable, old_column_name: str, new_column_name: str, - result_table: TaggedTable) -> None: +def test_should_add_column( + original_table: TaggedTable, old_column_name: str, new_column_name: str, result_table: TaggedTable, +) -> None: new_table = original_table.rename_column(old_column_name, new_column_name) assert new_table.schema == result_table.schema assert new_table.features == result_table.features diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py index bcbd5d52a..be464300e 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py @@ -14,10 +14,7 @@ "target_old", None, ), - Column( - "feature_new", - [2, 1, 0] - ), + Column("feature_new", [2, 1, 0]), "feature_old", TaggedTable( { @@ -25,8 +22,8 @@ "target_old": [3, 4, 5], }, "target_old", - None - ) + None, + ), ), ( TaggedTable( @@ -37,10 +34,7 @@ "target_old", None, ), - Column( - "target_new", - [2, 1, 0] - ), + Column("target_new", [2, 1, 0]), "target_old", TaggedTable( { @@ -48,14 +42,15 @@ "target_new": [2, 1, 0], }, "target_new", - None - ) + None, + ), ), ], ids=["replace_feature_column", "replace_target_column"], ) -def test_should_replace_column(original_table: TaggedTable, new_column: Column, column_name_to_be_replaced: str, - result_table: TaggedTable) -> None: +def test_should_replace_column( + original_table: TaggedTable, new_column: Column, column_name_to_be_replaced: str, result_table: TaggedTable, +) -> None: new_table = original_table.replace_column(column_name_to_be_replaced, new_column) assert new_table.schema == result_table.schema assert new_table.features == result_table.features From c59f552791cb66a65bb94d85d4c07dfeb82e011c Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Mon, 29 May 2023 21:15:58 +0000 Subject: [PATCH 034/149] style: apply automated linter fixes --- .../containers/_table/_tagged_table/test_rename_column.py | 5 ++++- .../containers/_table/_tagged_table/test_replace_column.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py index 16cd8e882..87e1096ee 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py @@ -49,7 +49,10 @@ ids=["rename_feature_column", "rename_target_column"], ) def test_should_add_column( - original_table: TaggedTable, old_column_name: str, new_column_name: str, result_table: TaggedTable, + original_table: TaggedTable, + old_column_name: str, + new_column_name: str, + result_table: TaggedTable, ) -> None: new_table = original_table.rename_column(old_column_name, new_column_name) assert new_table.schema == result_table.schema diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py index be464300e..793857ea5 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py @@ -49,7 +49,10 @@ ids=["replace_feature_column", "replace_target_column"], ) def test_should_replace_column( - original_table: TaggedTable, new_column: Column, column_name_to_be_replaced: str, result_table: TaggedTable, + original_table: TaggedTable, + new_column: Column, + column_name_to_be_replaced: str, + result_table: TaggedTable, ) -> None: new_table = original_table.replace_column(column_name_to_be_replaced, new_column) assert new_table.schema == result_table.schema From d42a9b47b7730b260ff30f9bd83541c76812674f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Tue, 30 May 2023 12:57:31 +0200 Subject: [PATCH 035/149] Implement shuffle_rows --- .../data/tabular/containers/_tagged_table.py | 14 +++++ .../_table/_tagged_table/test_shuffle_rows.py | 51 +++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_shuffle_rows.py diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 9f292b9be..9fee15923 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -503,3 +503,17 @@ def replace_column(self, old_column_name: str, new_column: Column) -> TaggedTabl new_column.name if self.target.name == old_column_name else self.target.name, None, ) + + def shuffle_rows(self) -> TaggedTable: + """ + Shuffle the table randomly. + + This table is not modified. + + Returns + ------- + result : TaggedTable + The shuffled Table. + + """ + return TaggedTable._from_table(super().shuffle_rows(), self.target.name, None) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_shuffle_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_shuffle_rows.py new file mode 100644 index 000000000..7da6138ac --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_shuffle_rows.py @@ -0,0 +1,51 @@ +from safeds.data.tabular.containers import TaggedTable + + +def test_should_shuffle_rows() -> None: + table = TaggedTable( + { + "feature_a": [0, 1, 2], + "feature_b": [3, 4, 5], + "target": [6, 7, 8], + }, + "target", + None + ) + shuffled = table.shuffle_rows() + assert table.schema == shuffled.schema + assert table.features.column_names == shuffled.features.column_names + assert table.target.name == shuffled.target.name + # Use filter_rows to extract the individual rows and compare them one-by-one: + row_0 = shuffled.filter_rows(lambda row: any(row.get_value(col) == 0 for col in table.column_names)) + row_1 = shuffled.filter_rows(lambda row: any(row.get_value(col) == 1 for col in table.column_names)) + row_2 = shuffled.filter_rows(lambda row: any(row.get_value(col) == 2 for col in table.column_names)) + expected_0 = TaggedTable( + { + "feature_a": [0], + "feature_b": [3], + "target": [6], + }, + "target", + None + ) + expected_1 = TaggedTable( + { + "feature_a": [1], + "feature_b": [4], + "target": [7], + }, + "target", + None + ) + expected_2 = TaggedTable( + { + "feature_a": [2], + "feature_b": [5], + "target": [8], + }, + "target", + None + ) + assert row_0 == expected_0 + assert row_1 == expected_1 + assert row_2 == expected_2 From 4b0a71383179c22f5beb6a0bafdfa8b5dc533c1c Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Tue, 30 May 2023 10:59:23 +0000 Subject: [PATCH 036/149] style: apply automated linter fixes --- .../containers/_table/_tagged_table/test_shuffle_rows.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_shuffle_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_shuffle_rows.py index 7da6138ac..7662e3819 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_shuffle_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_shuffle_rows.py @@ -9,7 +9,7 @@ def test_should_shuffle_rows() -> None: "target": [6, 7, 8], }, "target", - None + None, ) shuffled = table.shuffle_rows() assert table.schema == shuffled.schema @@ -26,7 +26,7 @@ def test_should_shuffle_rows() -> None: "target": [6], }, "target", - None + None, ) expected_1 = TaggedTable( { @@ -35,7 +35,7 @@ def test_should_shuffle_rows() -> None: "target": [7], }, "target", - None + None, ) expected_2 = TaggedTable( { @@ -44,7 +44,7 @@ def test_should_shuffle_rows() -> None: "target": [8], }, "target", - None + None, ) assert row_0 == expected_0 assert row_1 == expected_1 From 1ccf1ddb4b490c05b5bac161aa4b68f22e304a0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Tue, 30 May 2023 13:16:06 +0200 Subject: [PATCH 037/149] Override slice_rows --- .../data/tabular/containers/_tagged_table.py | 32 ++++++++++++ .../_table/_tagged_table/test_slice_rows.py | 50 +++++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_slice_rows.py diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 9fee15923..cba84a2d3 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -517,3 +517,35 @@ def shuffle_rows(self) -> TaggedTable: """ return TaggedTable._from_table(super().shuffle_rows(), self.target.name, None) + + def slice_rows( + self, + start: int | None = None, + end: int | None = None, + step: int = 1, + ) -> TaggedTable: + """ + Slice a part of the table into a new table. + + This table is not modified. + + Parameters + ---------- + start : int + The first index of the range to be copied into a new table, None by default. + end : int + The last index of the range to be copied into a new table, None by default. + step : int + The step size used to iterate through the table, 1 by default. + + Returns + ------- + result : TaggedTable + The resulting table. + + Raises + ------ + IndexOutOfBoundsError + If the index is out of bounds. + """ + return TaggedTable._from_table(super().slice_rows(start, end, step), self.target.name, None) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_slice_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_slice_rows.py new file mode 100644 index 000000000..9e882185e --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_slice_rows.py @@ -0,0 +1,50 @@ +import pytest +from _pytest.python_api import raises +from safeds.data.tabular.containers import TaggedTable +from safeds.exceptions import IndexOutOfBoundsError + + +@pytest.mark.parametrize( + ("table", "test_table", "second_test_table"), + [ + ( + TaggedTable({"feature": [1, 2, 1], "target": [1, 2, 4]}, "target", None), + TaggedTable({"feature": [1, 2], "target": [1, 2]}, "target", None), + TaggedTable({"feature": [1, 1], "target": [1, 4]}, "target", None), + ), + ], + ids=["Table with three rows"], +) +def test_should_slice_rows(table: TaggedTable, test_table: TaggedTable, second_test_table: TaggedTable) -> None: + new_table = table.slice_rows(0, 2, 1) + second_new_table = table.slice_rows(0, 3, 2) + third_new_table = table.slice_rows() + assert new_table.schema == test_table.schema + assert new_table.features == test_table.features + assert new_table.target == test_table.target + assert new_table == test_table + assert second_new_table.schema == second_test_table.schema + assert second_new_table.features == second_test_table.features + assert second_new_table.target == second_test_table.target + assert second_new_table == second_test_table + assert third_new_table.schema == table.schema + assert third_new_table.features == table.features + assert third_new_table.target == table.target + assert third_new_table == table + + +@pytest.mark.parametrize( + ("start", "end", "step", "error_message"), + [ + (3, 2, 1, r"There is no element in the range \[3, 2\]"), + (4, 0, 1, r"There is no element in the range \[4, 0\]"), + (0, 4, 1, r"There is no element at index '4'"), + (-4, 0, 1, r"There is no element at index '-4'"), + (0, -4, 1, r"There is no element in the range \[0, -4\]"), + ], +) +def test_should_raise_if_index_out_of_bounds(start: int, end: int, step: int, error_message: str) -> None: + table = TaggedTable({"feature": [1, 2, 1], "target": [1, 2, 4]}, "target", None) + + with raises(IndexOutOfBoundsError, match=error_message): + table.slice_rows(start, end, step) From 2d28696a32b3d49fdf12ad9f346109b68b4959b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Tue, 30 May 2023 13:57:27 +0200 Subject: [PATCH 038/149] Override sort_columns --- .../data/tabular/containers/_tagged_table.py | 29 ++++++++++ .../_table/_tagged_table/test_sort_columns.py | 55 +++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_columns.py diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index cba84a2d3..d69eb4faf 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -549,3 +549,32 @@ def slice_rows( If the index is out of bounds. """ return TaggedTable._from_table(super().slice_rows(start, end, step), self.target.name, None) + + def sort_columns( + self, + comparator: Callable[[Column, Column], int] = lambda col1, col2: (col1.name > col2.name) + - (col1.name < col2.name), + ) -> TaggedTable: + """ + Sort the columns of a `TaggedTable` with the given comparator and return a new `TaggedTable`. + + The original table is not modified. The comparator is a function that takes two columns `col1` and `col2` and + returns an integer: + + * If `col1` should be ordered before `col2`, the function should return a negative number. + * If `col1` should be ordered after `col2`, the function should return a positive number. + * If the original order of `col1` and `col2` should be kept, the function should return 0. + + If no comparator is given, the columns will be sorted alphabetically by their name. + + Parameters + ---------- + comparator : Callable[[Column, Column], int] + The function used to compare two columns. + + Returns + ------- + new_table : TaggedTable + A new table with sorted columns. + """ + return TaggedTable._from_table(super().sort_columns(comparator), self.target.name, None) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_columns.py new file mode 100644 index 000000000..576be33d7 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_columns.py @@ -0,0 +1,55 @@ +from collections.abc import Callable + +import pytest +from safeds.data.tabular.containers import Column, TaggedTable + + +@pytest.mark.parametrize( + ("query", "col1", "col2", "col3", "col4"), + [ + (None, 0, 1, 2, 3), + ( + lambda col1, col2: (col1.name < col2.name) - (col1.name > col2.name), + 3, + 2, + 1, + 0, + ), + ], + ids=["no query", "with query"], +) +def test_should_return_sorted_table( + query: Callable[[Column, Column], int], + col1: int, + col2: int, + col3: int, + col4: int, +) -> None: + columns = [ + Column("col1", ["A", "B", "C", "A", "D"]), + Column("col2", ["Test1", "Test1", "Test3", "Test1", "Test4"]), + Column("col3", [1, 2, 3, 4, 5]), + Column("col4", [2, 3, 1, 4, 6]), + ] + table1 = TaggedTable( + { + "col2": ["Test1", "Test1", "Test3", "Test1", "Test4"], + "col3": [1, 2, 3, 4, 5], + "col4": [2, 3, 1, 4, 6], + "col1": ["A", "B", "C", "A", "D"], + }, + "col1", + None + ) + if query is not None: + table_sorted = table1.sort_columns(query) + else: + table_sorted = table1.sort_columns() + table_sorted_columns = table_sorted.to_columns() + assert table_sorted.schema == table1.schema + assert table_sorted_columns[0] == columns[col1] + assert table_sorted_columns[1] == columns[col2] + assert table_sorted_columns[2] == columns[col3] + assert table_sorted_columns[3] == columns[col4] + assert table_sorted.features == table1.features + assert table_sorted.target == table1.target From 5df6fba87730064822b25d5097791dcdadf5c77f Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Tue, 30 May 2023 11:59:29 +0000 Subject: [PATCH 039/149] style: apply automated linter fixes --- .../containers/_table/_tagged_table/test_sort_columns.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_columns.py index 576be33d7..aaf86a4bb 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_columns.py @@ -39,7 +39,7 @@ def test_should_return_sorted_table( "col1": ["A", "B", "C", "A", "D"], }, "col1", - None + None, ) if query is not None: table_sorted = table1.sort_columns(query) From 34376ceeb081e7c4a0388ea0a9189aecb0e1c06d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Tue, 30 May 2023 14:36:02 +0200 Subject: [PATCH 040/149] Override sort_rows --- .../data/tabular/containers/_tagged_table.py | 23 +++++++ .../_table/_tagged_table/test_sort_rows.py | 62 +++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_rows.py diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index d69eb4faf..aea45745b 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -578,3 +578,26 @@ def sort_columns( A new table with sorted columns. """ return TaggedTable._from_table(super().sort_columns(comparator), self.target.name, None) + + def sort_rows(self, comparator: Callable[[Row, Row], int]) -> TaggedTable: + """ + Sort the rows of a `TaggedTable` with the given comparator and return a new `TaggedTable`. + + The original table is not modified. The comparator is a function that takes two rows `row1` and `row2` and + returns an integer: + + * If `row1` should be ordered before `row2`, the function should return a negative number. + * If `row1` should be ordered after `row2`, the function should return a positive number. + * If the original order of `row1` and `row2` should be kept, the function should return 0. + + Parameters + ---------- + comparator : Callable[[Row, Row], int] + The function used to compare two rows. + + Returns + ------- + new_table : TaggedTable + A new table with sorted rows. + """ + return TaggedTable._from_table(super().sort_rows(comparator), self.target.name, None) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_rows.py new file mode 100644 index 000000000..a2b51eab1 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_rows.py @@ -0,0 +1,62 @@ +from collections.abc import Callable + +import pytest +from safeds.data.tabular.containers import Row, TaggedTable + + +@pytest.mark.parametrize( + ("table", "comparator", "expected"), + [ + # TODO: Check that it works with an empty table + ( + TaggedTable({"feature": [3, 2, 1], "target": [0, 0, 0]}, "target", None), + lambda row1, row2: row1["feature"] - row2["feature"], + TaggedTable({"feature": [1, 2, 3], "target": [0, 0, 0]}, "target", None), + ), + ( + TaggedTable({"feature": [1, 2, 3], "target": [0, 0, 0]}, "target", None), + lambda row1, row2: row1["feature"] - row2["feature"], + TaggedTable({"feature": [1, 2, 3], "target": [0, 0, 0]}, "target", None), + ), + ], + ids=["unsorted", "already_sorted"] +) +def test_should_sort_table( + table: TaggedTable, + comparator: Callable[[Row, Row], int], + expected: TaggedTable, +) -> None: + table_sorted = table.sort_rows(comparator) + assert table_sorted.schema == expected.schema + assert table_sorted.features == expected.features + assert table_sorted.target == expected.target + assert table_sorted == expected + + +@pytest.mark.parametrize( + ("table", "comparator", "table_copy"), + [ + # TODO: Check that it works with an empty table + ( + TaggedTable({"feature": [3, 2, 1], "target": [0, 0, 0]}, "target", None), + lambda row1, row2: row1["feature"] - row2["feature"], + TaggedTable({"feature": [3, 2, 1], "target": [0, 0, 0]}, "target", None), + ), + ( + TaggedTable({"feature": [1, 2, 3], "target": [0, 0, 0]}, "target", None), + lambda row1, row2: row1["feature"] - row2["feature"], + TaggedTable({"feature": [1, 2, 3], "target": [0, 0, 0]}, "target", None), + ), + ], + ids=["unsorted", "already_sorted"] +) +def test_should_not_modify_original_table( + table: TaggedTable, + comparator: Callable[[Row, Row], int], + table_copy: TaggedTable, +) -> None: + table.sort_rows(comparator) + assert table.schema == table_copy.schema + assert table.features == table_copy.features + assert table.target == table_copy.target + assert table == table_copy From 4f1b19fa72c129bbe84b9ef1fe3bc0022c0e7260 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Tue, 30 May 2023 12:37:39 +0000 Subject: [PATCH 041/149] style: apply automated linter fixes --- .../tabular/containers/_table/_tagged_table/test_sort_rows.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_rows.py index a2b51eab1..d9cb81a27 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_rows.py @@ -19,7 +19,7 @@ TaggedTable({"feature": [1, 2, 3], "target": [0, 0, 0]}, "target", None), ), ], - ids=["unsorted", "already_sorted"] + ids=["unsorted", "already_sorted"], ) def test_should_sort_table( table: TaggedTable, @@ -48,7 +48,7 @@ def test_should_sort_table( TaggedTable({"feature": [1, 2, 3], "target": [0, 0, 0]}, "target", None), ), ], - ids=["unsorted", "already_sorted"] + ids=["unsorted", "already_sorted"], ) def test_should_not_modify_original_table( table: TaggedTable, From 20a4856d586aca78b8aa90a0ab0dcd5020ebcdee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Tue, 30 May 2023 19:12:00 +0200 Subject: [PATCH 042/149] Override transform_column --- .../data/tabular/containers/_tagged_table.py | 19 +++++++++ .../_tagged_table/test_transform_column.py | 42 +++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_column.py diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index aea45745b..f26d0fc96 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -601,3 +601,22 @@ def sort_rows(self, comparator: Callable[[Row, Row], int]) -> TaggedTable: A new table with sorted rows. """ return TaggedTable._from_table(super().sort_rows(comparator), self.target.name, None) + + def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> TaggedTable: + """ + Transform provided column by calling provided transformer. + + This table is not modified. + + Returns + ------- + result : TaggedTable + The table with the transformed column. + + Raises + ------ + UnknownColumnNameError + If the column does not exist. + + """ + return TaggedTable._from_table(super().transform_column(name, transformer), self.target.name, None) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_column.py new file mode 100644 index 000000000..5c762e805 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_column.py @@ -0,0 +1,42 @@ +import pytest +from safeds.data.tabular.containers import TaggedTable +from safeds.exceptions import UnknownColumnNameError + + +@pytest.mark.parametrize( + ("table", "column_name", "table_transformed"), + [ + ( + TaggedTable({"feature_a": [1, 2, 3], "feature_b": [4, 5, 6], "target": [1, 2, 3]}, "target"), + "feature_a", + TaggedTable({"feature_a": [2, 4, 6], "feature_b": [4, 5, 6], "target": [1, 2, 3]}, "target"), + ), + ( + TaggedTable({"feature_a": [1, 2, 3], "feature_b": [4, 5, 6], "target": [1, 2, 3]}, "target"), + "target", + TaggedTable({"feature_a": [1, 2, 3], "feature_b": [4, 5, 6], "target": [2, 4, 6]}, "target"), + ), + ], + ids=["transform_feature_column", "transform_target_column"], +) +def test_should_transform_column(table: TaggedTable, column_name: str, table_transformed: TaggedTable) -> None: + result = table.transform_column(column_name, lambda row: row.get_value(column_name) * 2) + + assert result.schema == table_transformed.schema + assert result.features == table_transformed.features + assert result.target == table_transformed.target + assert result == table_transformed + + +def test_should_raise_if_column_not_found() -> None: + input_table = TaggedTable( + { + "A": [1, 2, 3], + "B": [4, 5, 6], + "C": ["a", "b", "c"], + }, + "C", + ) + + with pytest.raises(UnknownColumnNameError, match=r"Could not find column\(s\) 'D'"): + input_table.transform_column("D", lambda row: row.get_value("A") * 2) From 4ad330a7a4f29573b4c170c1efcd9867f64ad40f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Thu, 1 Jun 2023 14:06:34 +0200 Subject: [PATCH 043/149] style: Drop unused keyword parameters --- .../data/tabular/containers/_tagged_table.py | 32 +++++++++---------- .../_table/_tagged_table/test_add_column.py | 2 -- .../_table/_tagged_table/test_add_columns.py | 2 -- .../_table/_tagged_table/test_add_row.py | 2 -- .../_table/_tagged_table/test_add_rows.py | 2 -- .../_table/_tagged_table/test_filter_rows.py | 2 -- ...test_remove_columns_with_missing_values.py | 3 -- ...emove_columns_with_non_numerical_values.py | 3 -- .../test_remove_duplicate_rows.py | 2 -- .../test_remove_rows_with_missing_values.py | 2 -- .../test_remove_rows_with_outliers.py | 2 -- .../_tagged_table/test_rename_column.py | 4 --- .../_tagged_table/test_replace_column.py | 4 --- .../_table/_tagged_table/test_shuffle_rows.py | 4 --- .../_table/_tagged_table/test_slice_rows.py | 8 ++--- .../_table/_tagged_table/test_sort_columns.py | 1 - .../_table/_tagged_table/test_sort_rows.py | 16 +++++----- 17 files changed, 27 insertions(+), 64 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index f26d0fc96..fc59d94fd 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -190,7 +190,7 @@ def add_column(self, column: Column) -> TaggedTable: If the size of the column does not match the amount of rows. """ - return TaggedTable._from_table(super().add_column(column), target_name=self.target.name, feature_names=None) + return TaggedTable._from_table(super().add_column(column), target_name=self.target.name) def add_columns(self, columns: list[Column] | Table) -> TaggedTable: """ @@ -215,7 +215,7 @@ def add_columns(self, columns: list[Column] | Table) -> TaggedTable: DuplicateColumnNameError If at least one column name from the provided column list already exists in the table. """ - return TaggedTable._from_table(super().add_columns(columns), target_name=self.target.name, feature_names=None) + return TaggedTable._from_table(super().add_columns(columns), target_name=self.target.name) def add_row(self, row: Row) -> TaggedTable: """ @@ -238,7 +238,7 @@ def add_row(self, row: Row) -> TaggedTable: SchemaMismatchError If the schema of the row does not match the table schema. """ - return TaggedTable._from_table(super().add_row(row), target_name=self.target.name, feature_names=None) + return TaggedTable._from_table(super().add_row(row), target_name=self.target.name) def add_rows(self, rows: list[Row] | Table) -> TaggedTable: """ @@ -261,7 +261,7 @@ def add_rows(self, rows: list[Row] | Table) -> TaggedTable: SchemaMismatchError If the schema of on of the row does not match the table schema. """ - return TaggedTable._from_table(super().add_rows(rows), target_name=self.target.name, feature_names=None) + return TaggedTable._from_table(super().add_rows(rows), target_name=self.target.name) def filter_rows(self, query: Callable[[Row], bool]) -> TaggedTable: """ @@ -279,7 +279,7 @@ def filter_rows(self, query: Callable[[Row], bool]) -> TaggedTable: table : TaggedTable A table containing only the rows filtered by the query. """ - return TaggedTable._from_table(super().filter_rows(query), target_name=self.target.name, feature_names=None) + return TaggedTable._from_table(super().filter_rows(query), target_name=self.target.name) def keep_only_columns(self, column_names: list[str]) -> Table: # TODO: Change return type to TaggedTable (in function definition and in docstring). @@ -338,7 +338,7 @@ def remove_columns(self, column_names: list[str]) -> Table: If any of the given columns is the target column. """ try: - return TaggedTable._from_table(super().remove_columns(column_names), self.target.name, None) + return TaggedTable._from_table(super().remove_columns(column_names), self.target.name) except UnknownColumnNameError: # TODO: Don't return; throw exception and handle it correctly in tests. # raise ColumnIsTaggedError({self.target.name}) from None @@ -385,7 +385,7 @@ def remove_columns_with_non_numerical_values(self) -> TaggedTable: """ table = super().remove_columns_with_non_numerical_values() try: - tagged = TaggedTable._from_table(table, self.target.name, None) + tagged = TaggedTable._from_table(table, self.target.name) except UnknownColumnNameError: raise ColumnIsTaggedError(self.target.name) from None return tagged @@ -401,7 +401,7 @@ def remove_duplicate_rows(self) -> TaggedTable: result : TaggedTable The table with the duplicate rows removed. """ - return TaggedTable._from_table(super().remove_duplicate_rows(), self.target.name, None) + return TaggedTable._from_table(super().remove_duplicate_rows(), self.target.name) def remove_rows_with_missing_values(self) -> TaggedTable: """ @@ -414,7 +414,7 @@ def remove_rows_with_missing_values(self) -> TaggedTable: table : TaggedTable A table without the rows that contain missing values. """ - return TaggedTable._from_table(super().remove_rows_with_missing_values(), self.target.name, None) + return TaggedTable._from_table(super().remove_rows_with_missing_values(), self.target.name) def remove_rows_with_outliers(self) -> TaggedTable: """ @@ -431,7 +431,7 @@ def remove_rows_with_outliers(self) -> TaggedTable: new_table : TaggedTable A new table without rows containing outliers. """ - return TaggedTable._from_table(super().remove_rows_with_outliers(), self.target.name, None) + return TaggedTable._from_table(super().remove_rows_with_outliers(), self.target.name) def rename_column(self, old_name: str, new_name: str) -> TaggedTable: """ @@ -461,7 +461,6 @@ def rename_column(self, old_name: str, new_name: str) -> TaggedTable: return TaggedTable._from_table( super().rename_column(old_name, new_name), new_name if self.target.name == old_name else self.target.name, - None, ) def replace_column(self, old_column_name: str, new_column: Column) -> TaggedTable: @@ -501,7 +500,6 @@ def replace_column(self, old_column_name: str, new_column: Column) -> TaggedTabl return TaggedTable._from_table( super().replace_column(old_column_name, new_column), new_column.name if self.target.name == old_column_name else self.target.name, - None, ) def shuffle_rows(self) -> TaggedTable: @@ -516,7 +514,7 @@ def shuffle_rows(self) -> TaggedTable: The shuffled Table. """ - return TaggedTable._from_table(super().shuffle_rows(), self.target.name, None) + return TaggedTable._from_table(super().shuffle_rows(), self.target.name) def slice_rows( self, @@ -548,7 +546,7 @@ def slice_rows( IndexOutOfBoundsError If the index is out of bounds. """ - return TaggedTable._from_table(super().slice_rows(start, end, step), self.target.name, None) + return TaggedTable._from_table(super().slice_rows(start, end, step), self.target.name) def sort_columns( self, @@ -577,7 +575,7 @@ def sort_columns( new_table : TaggedTable A new table with sorted columns. """ - return TaggedTable._from_table(super().sort_columns(comparator), self.target.name, None) + return TaggedTable._from_table(super().sort_columns(comparator), self.target.name) def sort_rows(self, comparator: Callable[[Row, Row], int]) -> TaggedTable: """ @@ -600,7 +598,7 @@ def sort_rows(self, comparator: Callable[[Row, Row], int]) -> TaggedTable: new_table : TaggedTable A new table with sorted rows. """ - return TaggedTable._from_table(super().sort_rows(comparator), self.target.name, None) + return TaggedTable._from_table(super().sort_rows(comparator), self.target.name) def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> TaggedTable: """ @@ -619,4 +617,4 @@ def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> Tagg If the column does not exist. """ - return TaggedTable._from_table(super().transform_column(name, transformer), self.target.name, None) + return TaggedTable._from_table(super().transform_column(name, transformer), self.target.name) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py index 3d7a6e336..df3270038 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py @@ -8,7 +8,6 @@ def test_should_add_column() -> None: "target": [3, 4, 5], }, "target", - None, ) col = Column("feature_2", [6, 7, 8]) new_table = table.add_column(col) @@ -19,7 +18,6 @@ def test_should_add_column() -> None: "feature_2": [6, 7, 8], }, "target", - None, ) assert new_table.schema == expected.schema assert new_table.features == expected.features diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py index f77efa6b3..20cbd78a1 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py @@ -8,7 +8,6 @@ def test_should_add_columns() -> None: "target": [3, 4, 5], }, "target", - None, ) cols = [ Column("feature_2", [6, 7, 8]), @@ -23,7 +22,6 @@ def test_should_add_columns() -> None: "feature_3": [9, 6, 3], }, "target", - None, ) assert new_table.schema == expected.schema assert new_table.features == expected.features diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py index 66e3e7feb..c70bf935b 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py @@ -8,7 +8,6 @@ def test_should_add_row() -> None: "target": [3, 4], }, "target", - None, ) row = Row( { @@ -23,7 +22,6 @@ def test_should_add_row() -> None: "target": [3, 4, 5], }, "target", - None, ) assert new_table.schema == expected.schema assert new_table.features == expected.features diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py index 834df50ce..56bb819ff 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py @@ -8,7 +8,6 @@ def test_should_add_rows() -> None: "target": [4, 5], }, "target", - None, ) rows = [ Row( @@ -26,7 +25,6 @@ def test_should_add_rows() -> None: "target": [4, 5, 6, 7], }, "target", - None, ) assert new_table.schema == expected.schema assert new_table.features == expected.features diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_filter_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_filter_rows.py index f3ef45b18..3c442ca76 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_filter_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_filter_rows.py @@ -9,7 +9,6 @@ def test_should_remove_row() -> None: "target": [1, 3, 2], }, "target", - None, ) new_table = table.filter_rows(lambda row: all(row.get_value(col) < 10 for col in table.column_names)) expected = TaggedTable( @@ -19,7 +18,6 @@ def test_should_remove_row() -> None: "target": [1, 2], }, "target", - None, ) assert new_table.schema == expected.schema assert new_table.features == expected.features diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py index 2baa52a6e..f4faee203 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py @@ -11,7 +11,6 @@ def test_should_remove_column() -> None: "target": [6, 7, 8], }, "target", - None, ) new_table = table.remove_columns_with_missing_values() expected = TaggedTable( @@ -20,7 +19,6 @@ def test_should_remove_column() -> None: "target": [6, 7, 8], }, "target", - None, ) assert new_table.schema == expected.schema assert new_table.features == expected.features @@ -35,7 +33,6 @@ def test_should_throw_column_is_tagged() -> None: "target": [3, None, 5], }, "target", - None, ) with pytest.raises( ColumnIsTaggedError, diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py index 61e3ecd3f..3e4f868d2 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py @@ -11,7 +11,6 @@ def test_should_remove_column() -> None: "target": [3, 4, 5], }, "target", - None, ) new_table = table.remove_columns_with_non_numerical_values() expected = TaggedTable( @@ -20,7 +19,6 @@ def test_should_remove_column() -> None: "target": [3, 4, 5], }, "target", - None, ) assert new_table.schema == expected.schema assert new_table.features == expected.features @@ -35,7 +33,6 @@ def test_should_throw_column_is_tagged() -> None: "target": ["a", "b", "c"], }, "target", - None, ) with pytest.raises( ColumnIsTaggedError, diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_duplicate_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_duplicate_rows.py index b1b0da57c..4446df3a9 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_duplicate_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_duplicate_rows.py @@ -8,7 +8,6 @@ def test_should_remove_row() -> None: "target": [2, 2, 3], }, "target", - None, ) new_table = table.remove_duplicate_rows() expected = TaggedTable( @@ -17,7 +16,6 @@ def test_should_remove_row() -> None: "target": [2, 3], }, "target", - None, ) assert new_table.schema == expected.schema assert new_table.features == expected.features diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_missing_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_missing_values.py index 70156644b..434951381 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_missing_values.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_missing_values.py @@ -8,7 +8,6 @@ def test_should_remove_row() -> None: "target": [3.0, 4.0, 5.0], }, "target", - None, ) new_table = table.remove_rows_with_missing_values() expected = TaggedTable( @@ -17,7 +16,6 @@ def test_should_remove_row() -> None: "target": [3.0, 5.0], }, "target", - None, ) assert new_table.schema == expected.schema assert new_table.features == expected.features diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py index 1c843d2b3..451a0ea55 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py @@ -8,7 +8,6 @@ def test_should_remove_row() -> None: "target": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], }, "target", - None, ) new_table = table.remove_rows_with_outliers() expected = TaggedTable( @@ -17,7 +16,6 @@ def test_should_remove_row() -> None: "target": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], }, "target", - None, ) assert new_table.schema == expected.schema assert new_table.features == expected.features diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py index 87e1096ee..d3fc62247 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py @@ -12,7 +12,6 @@ "target": [3, 4, 5], }, "target", - None, ), "feature_old", "feature_new", @@ -22,7 +21,6 @@ "target": [3, 4, 5], }, "target", - None, ), ), ( @@ -32,7 +30,6 @@ "target_old": [3, 4, 5], }, "target_old", - None, ), "target_old", "target_new", @@ -42,7 +39,6 @@ "target_new": [3, 4, 5], }, "target_new", - None, ), ), ], diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py index 793857ea5..dfe6e773a 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py @@ -12,7 +12,6 @@ "target_old": [3, 4, 5], }, "target_old", - None, ), Column("feature_new", [2, 1, 0]), "feature_old", @@ -22,7 +21,6 @@ "target_old": [3, 4, 5], }, "target_old", - None, ), ), ( @@ -32,7 +30,6 @@ "target_old": [3, 4, 5], }, "target_old", - None, ), Column("target_new", [2, 1, 0]), "target_old", @@ -42,7 +39,6 @@ "target_new": [2, 1, 0], }, "target_new", - None, ), ), ], diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_shuffle_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_shuffle_rows.py index 7662e3819..70c8f56ea 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_shuffle_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_shuffle_rows.py @@ -9,7 +9,6 @@ def test_should_shuffle_rows() -> None: "target": [6, 7, 8], }, "target", - None, ) shuffled = table.shuffle_rows() assert table.schema == shuffled.schema @@ -26,7 +25,6 @@ def test_should_shuffle_rows() -> None: "target": [6], }, "target", - None, ) expected_1 = TaggedTable( { @@ -35,7 +33,6 @@ def test_should_shuffle_rows() -> None: "target": [7], }, "target", - None, ) expected_2 = TaggedTable( { @@ -44,7 +41,6 @@ def test_should_shuffle_rows() -> None: "target": [8], }, "target", - None, ) assert row_0 == expected_0 assert row_1 == expected_1 diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_slice_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_slice_rows.py index 9e882185e..47768b111 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_slice_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_slice_rows.py @@ -8,9 +8,9 @@ ("table", "test_table", "second_test_table"), [ ( - TaggedTable({"feature": [1, 2, 1], "target": [1, 2, 4]}, "target", None), - TaggedTable({"feature": [1, 2], "target": [1, 2]}, "target", None), - TaggedTable({"feature": [1, 1], "target": [1, 4]}, "target", None), + TaggedTable({"feature": [1, 2, 1], "target": [1, 2, 4]}, "target"), + TaggedTable({"feature": [1, 2], "target": [1, 2]}, "target"), + TaggedTable({"feature": [1, 1], "target": [1, 4]}, "target"), ), ], ids=["Table with three rows"], @@ -44,7 +44,7 @@ def test_should_slice_rows(table: TaggedTable, test_table: TaggedTable, second_t ], ) def test_should_raise_if_index_out_of_bounds(start: int, end: int, step: int, error_message: str) -> None: - table = TaggedTable({"feature": [1, 2, 1], "target": [1, 2, 4]}, "target", None) + table = TaggedTable({"feature": [1, 2, 1], "target": [1, 2, 4]}, "target") with raises(IndexOutOfBoundsError, match=error_message): table.slice_rows(start, end, step) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_columns.py index aaf86a4bb..948147bdf 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_columns.py @@ -39,7 +39,6 @@ def test_should_return_sorted_table( "col1": ["A", "B", "C", "A", "D"], }, "col1", - None, ) if query is not None: table_sorted = table1.sort_columns(query) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_rows.py index d9cb81a27..4a4b6f58d 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_rows.py @@ -9,14 +9,14 @@ [ # TODO: Check that it works with an empty table ( - TaggedTable({"feature": [3, 2, 1], "target": [0, 0, 0]}, "target", None), + TaggedTable({"feature": [3, 2, 1], "target": [0, 0, 0]}, "target"), lambda row1, row2: row1["feature"] - row2["feature"], - TaggedTable({"feature": [1, 2, 3], "target": [0, 0, 0]}, "target", None), + TaggedTable({"feature": [1, 2, 3], "target": [0, 0, 0]}, "target"), ), ( - TaggedTable({"feature": [1, 2, 3], "target": [0, 0, 0]}, "target", None), + TaggedTable({"feature": [1, 2, 3], "target": [0, 0, 0]}, "target"), lambda row1, row2: row1["feature"] - row2["feature"], - TaggedTable({"feature": [1, 2, 3], "target": [0, 0, 0]}, "target", None), + TaggedTable({"feature": [1, 2, 3], "target": [0, 0, 0]}, "target"), ), ], ids=["unsorted", "already_sorted"], @@ -38,14 +38,14 @@ def test_should_sort_table( [ # TODO: Check that it works with an empty table ( - TaggedTable({"feature": [3, 2, 1], "target": [0, 0, 0]}, "target", None), + TaggedTable({"feature": [3, 2, 1], "target": [0, 0, 0]}, "target"), lambda row1, row2: row1["feature"] - row2["feature"], - TaggedTable({"feature": [3, 2, 1], "target": [0, 0, 0]}, "target", None), + TaggedTable({"feature": [3, 2, 1], "target": [0, 0, 0]}, "target"), ), ( - TaggedTable({"feature": [1, 2, 3], "target": [0, 0, 0]}, "target", None), + TaggedTable({"feature": [1, 2, 3], "target": [0, 0, 0]}, "target"), lambda row1, row2: row1["feature"] - row2["feature"], - TaggedTable({"feature": [1, 2, 3], "target": [0, 0, 0]}, "target", None), + TaggedTable({"feature": [1, 2, 3], "target": [0, 0, 0]}, "target"), ), ], ids=["unsorted", "already_sorted"], From 319f44f8c4db492e4d32abbd57676c06a42a76ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Thu, 1 Jun 2023 14:53:05 +0200 Subject: [PATCH 044/149] feat: Override transform_table --- .../data/tabular/containers/_tagged_table.py | 41 +++++++++++++++++++ .../_tagged_table/test_transform_table.py | 34 +++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_table.py diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index fc59d94fd..6044d2781 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING from safeds.data.tabular.containers import Column, Row, Table +from safeds.data.tabular.transformation import TableTransformer from safeds.exceptions import ColumnIsTaggedError, UnknownColumnNameError if TYPE_CHECKING: @@ -618,3 +619,43 @@ def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> Tagg """ return TaggedTable._from_table(super().transform_column(name, transformer), self.target.name) + + def transform_table(self, transformer: TableTransformer) -> TaggedTable: + """ + Apply a learned transformation onto this table. + + This table is not modified. + + Parameters + ---------- + transformer : TableTransformer + The transformer which transforms the given table. + + Returns + ------- + transformed_table : TaggedTable + The transformed table. + + Raises + ------ + TransformerNotFittedError + If the transformer has not been fitted yet. + ColunmIsTaggedError + If the transformer tries to remove or replace the target column. + + Examples + -------- + >>> from safeds.data.tabular.transformation import OneHotEncoder + >>> from safeds.data.tabular.containers import TaggedTable + >>> table = TaggedTable({"feat1": ["a", "b", "a"], "feat2": ["a", "b", "d"], "target": [1, 2, 3]},"target") + >>> transformer = OneHotEncoder().fit(table, table.features.column_names) + >>> table.transform_table(transformer) + feat1__a feat1__b feat2__a feat2__b feat2__d target + 0 1.0 0.0 1.0 0.0 0.0 1 + 1 0.0 1.0 0.0 1.0 0.0 2 + 2 1.0 0.0 0.0 0.0 1.0 3 + """ + transformed_table = transformer.transform(self) + if self.target.name in transformer.get_names_of_removed_columns(): + raise ColumnIsTaggedError(self.target.name) + return TaggedTable._from_table(transformed_table, self.target.name) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_table.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_table.py new file mode 100644 index 000000000..80ed7e355 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_table.py @@ -0,0 +1,34 @@ +import pytest +from safeds.data.tabular.containers import TaggedTable +from safeds.data.tabular.transformation import OneHotEncoder +from safeds.exceptions import ColumnIsTaggedError + + +def test_should_transform_table() -> None: + table = TaggedTable({"feat1": ["a", "b", "a"], "feat2": ["a", "b", "d"], "target": [1, 2, 3]}, "target") + transformer = OneHotEncoder().fit(table, table.features.column_names) + transformed_table = table.transform_table(transformer) + expected = TaggedTable( + { + "feat1__a": [1.0, 0.0, 1.0], + "feat1__b": [0.0, 1.0, 0.0], + "feat2__a": [1.0, 0.0, 0.0], + "feat2__b": [0.0, 1.0, 0.0], + "feat2__d": [0.0, 0.0, 1.0], + "target": [1, 2, 3] + }, + "target", + ) + assert transformed_table.schema == expected.schema + assert transformed_table.features == expected.features + assert transformed_table.target == expected.target + assert transformed_table == expected + + +def test_should_raise_column_is_tagged() -> None: + table = TaggedTable({"feat1": ["a", "b", "a"], "feat2": ["a", "b", "d"], "target": [1, 2, 3]}, "target") + transformer = OneHotEncoder().fit(table, None) + # Passing None means all columns get one-hot-encoded, i.e. also the target column! + with pytest.raises(ColumnIsTaggedError, match='Illegal schema modification: Column "target" is tagged and cannot ' + 'be removed.'): + table.transform_table(transformer) From 0a236382abed66de7f2531dd3573a12f7eae5668 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Thu, 1 Jun 2023 15:05:07 +0200 Subject: [PATCH 045/149] Move import to type checking block Because the linter said so. --- src/safeds/data/tabular/containers/_tagged_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 6044d2781..984a2bd92 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -3,11 +3,11 @@ from typing import TYPE_CHECKING from safeds.data.tabular.containers import Column, Row, Table -from safeds.data.tabular.transformation import TableTransformer from safeds.exceptions import ColumnIsTaggedError, UnknownColumnNameError if TYPE_CHECKING: from collections.abc import Callable, Mapping, Sequence + from safeds.data.tabular.transformation import TableTransformer from typing import Any From ab40aaad2340cd4e21b9c7557090ca2e9495d70c Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Thu, 1 Jun 2023 13:08:05 +0000 Subject: [PATCH 046/149] style: apply automated linter fixes --- src/safeds/data/tabular/containers/_tagged_table.py | 3 ++- .../_table/_tagged_table/test_transform_table.py | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 984a2bd92..75dfb2c9d 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -7,9 +7,10 @@ if TYPE_CHECKING: from collections.abc import Callable, Mapping, Sequence - from safeds.data.tabular.transformation import TableTransformer from typing import Any + from safeds.data.tabular.transformation import TableTransformer + class TaggedTable(Table): """ diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_table.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_table.py index 80ed7e355..c4c89f197 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_table.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_table.py @@ -15,7 +15,7 @@ def test_should_transform_table() -> None: "feat2__a": [1.0, 0.0, 0.0], "feat2__b": [0.0, 1.0, 0.0], "feat2__d": [0.0, 0.0, 1.0], - "target": [1, 2, 3] + "target": [1, 2, 3], }, "target", ) @@ -29,6 +29,7 @@ def test_should_raise_column_is_tagged() -> None: table = TaggedTable({"feat1": ["a", "b", "a"], "feat2": ["a", "b", "d"], "target": [1, 2, 3]}, "target") transformer = OneHotEncoder().fit(table, None) # Passing None means all columns get one-hot-encoded, i.e. also the target column! - with pytest.raises(ColumnIsTaggedError, match='Illegal schema modification: Column "target" is tagged and cannot ' - 'be removed.'): + with pytest.raises( + ColumnIsTaggedError, match='Illegal schema modification: Column "target" is tagged and cannot be removed.', + ): table.transform_table(transformer) From b42a8a9cd99f5d4f97c5e5b2bc216520b4fd774a Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Thu, 1 Jun 2023 13:09:43 +0000 Subject: [PATCH 047/149] style: apply automated linter fixes --- .../containers/_table/_tagged_table/test_transform_table.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_table.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_table.py index c4c89f197..c766cd209 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_table.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_table.py @@ -30,6 +30,7 @@ def test_should_raise_column_is_tagged() -> None: transformer = OneHotEncoder().fit(table, None) # Passing None means all columns get one-hot-encoded, i.e. also the target column! with pytest.raises( - ColumnIsTaggedError, match='Illegal schema modification: Column "target" is tagged and cannot be removed.', + ColumnIsTaggedError, + match='Illegal schema modification: Column "target" is tagged and cannot be removed.', ): table.transform_table(transformer) From 131bf7aa237f291c5c2186b1dc91f90ce029e836 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Thu, 1 Jun 2023 17:36:57 +0200 Subject: [PATCH 048/149] feat: add helper asserting TaggedTables are equal --- tests/helpers/__init__.py | 3 ++- tests/helpers/_assertions.py | 8 ++++++++ .../_table/_tagged_table/test_add_column.py | 7 +++---- .../_table/_tagged_table/test_add_columns.py | 7 +++---- .../_table/_tagged_table/test_add_row.py | 7 +++---- .../_table/_tagged_table/test_add_rows.py | 7 +++---- .../_table/_tagged_table/test_filter_rows.py | 7 +++---- .../test_remove_columns_with_missing_values.py | 7 +++---- ..._remove_columns_with_non_numerical_values.py | 7 +++---- .../_tagged_table/test_remove_duplicate_rows.py | 7 +++---- .../test_remove_rows_with_missing_values.py | 7 +++---- .../test_remove_rows_with_outliers.py | 7 +++---- .../_table/_tagged_table/test_rename_column.py | 7 +++---- .../_table/_tagged_table/test_replace_column.py | 7 +++---- .../_table/_tagged_table/test_slice_rows.py | 17 +++++------------ .../_table/_tagged_table/test_sort_rows.py | 12 ++++-------- .../_tagged_table/test_transform_column.py | 7 +++---- .../_tagged_table/test_transform_table.py | 7 +++---- 18 files changed, 61 insertions(+), 77 deletions(-) create mode 100644 tests/helpers/_assertions.py diff --git a/tests/helpers/__init__.py b/tests/helpers/__init__.py index 019d5cbe7..9db7c834f 100644 --- a/tests/helpers/__init__.py +++ b/tests/helpers/__init__.py @@ -1,3 +1,4 @@ +from ._assertions import assert_that_tagged_tables_are_equal from ._resources import resolve_resource_path -__all__ = ["resolve_resource_path"] +__all__ = ["assert_that_tagged_tables_are_equal", "resolve_resource_path"] diff --git a/tests/helpers/_assertions.py b/tests/helpers/_assertions.py new file mode 100644 index 000000000..7335c19fb --- /dev/null +++ b/tests/helpers/_assertions.py @@ -0,0 +1,8 @@ +from safeds.data.tabular.containers import TaggedTable + + +def assert_that_tagged_tables_are_equal(table1: TaggedTable, table2: TaggedTable) -> None: + assert table1.schema == table2.schema + assert table1.features == table2.features + assert table1.target == table2.target + assert table1 == table2 diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py index df3270038..497a87723 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py @@ -1,5 +1,7 @@ from safeds.data.tabular.containers import Column, TaggedTable +from tests.helpers import assert_that_tagged_tables_are_equal + def test_should_add_column() -> None: table = TaggedTable( @@ -19,7 +21,4 @@ def test_should_add_column() -> None: }, "target", ) - assert new_table.schema == expected.schema - assert new_table.features == expected.features - assert new_table.target == expected.target - assert new_table == expected + assert_that_tagged_tables_are_equal(new_table, expected) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py index 20cbd78a1..b36d87e5e 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py @@ -1,5 +1,7 @@ from safeds.data.tabular.containers import Column, TaggedTable +from tests.helpers import assert_that_tagged_tables_are_equal + def test_should_add_columns() -> None: table = TaggedTable( @@ -23,7 +25,4 @@ def test_should_add_columns() -> None: }, "target", ) - assert new_table.schema == expected.schema - assert new_table.features == expected.features - assert new_table.target == expected.target - assert new_table == expected + assert_that_tagged_tables_are_equal(new_table, expected) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py index c70bf935b..1c5012955 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py @@ -1,5 +1,7 @@ from safeds.data.tabular.containers import Row, TaggedTable +from tests.helpers import assert_that_tagged_tables_are_equal + def test_should_add_row() -> None: table = TaggedTable( @@ -23,7 +25,4 @@ def test_should_add_row() -> None: }, "target", ) - assert new_table.schema == expected.schema - assert new_table.features == expected.features - assert new_table.target == expected.target - assert new_table == expected + assert_that_tagged_tables_are_equal(new_table, expected) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py index 56bb819ff..fea52761f 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py @@ -1,5 +1,7 @@ from safeds.data.tabular.containers import Row, TaggedTable +from tests.helpers import assert_that_tagged_tables_are_equal + def test_should_add_rows() -> None: table = TaggedTable( @@ -26,7 +28,4 @@ def test_should_add_rows() -> None: }, "target", ) - assert new_table.schema == expected.schema - assert new_table.features == expected.features - assert new_table.target == expected.target - assert new_table == expected + assert_that_tagged_tables_are_equal(new_table, expected) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_filter_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_filter_rows.py index 3c442ca76..8640d2acb 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_filter_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_filter_rows.py @@ -1,5 +1,7 @@ from safeds.data.tabular.containers import TaggedTable +from tests.helpers import assert_that_tagged_tables_are_equal + def test_should_remove_row() -> None: table = TaggedTable( @@ -19,7 +21,4 @@ def test_should_remove_row() -> None: }, "target", ) - assert new_table.schema == expected.schema - assert new_table.features == expected.features - assert new_table.target == expected.target - assert new_table == expected + assert_that_tagged_tables_are_equal(new_table, expected) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py index f4faee203..d276bcf74 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py @@ -2,6 +2,8 @@ from safeds.data.tabular.containers import TaggedTable from safeds.exceptions import ColumnIsTaggedError +from tests.helpers import assert_that_tagged_tables_are_equal + def test_should_remove_column() -> None: table = TaggedTable( @@ -20,10 +22,7 @@ def test_should_remove_column() -> None: }, "target", ) - assert new_table.schema == expected.schema - assert new_table.features == expected.features - assert new_table.target == expected.target - assert new_table == expected + assert_that_tagged_tables_are_equal(new_table, expected) def test_should_throw_column_is_tagged() -> None: diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py index 3e4f868d2..9153f4e79 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py @@ -2,6 +2,8 @@ from safeds.data.tabular.containers import TaggedTable from safeds.exceptions import ColumnIsTaggedError +from tests.helpers import assert_that_tagged_tables_are_equal + def test_should_remove_column() -> None: table = TaggedTable( @@ -20,10 +22,7 @@ def test_should_remove_column() -> None: }, "target", ) - assert new_table.schema == expected.schema - assert new_table.features == expected.features - assert new_table.target == expected.target - assert new_table == expected + assert_that_tagged_tables_are_equal(new_table, expected) def test_should_throw_column_is_tagged() -> None: diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_duplicate_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_duplicate_rows.py index 4446df3a9..4431bee34 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_duplicate_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_duplicate_rows.py @@ -1,5 +1,7 @@ from safeds.data.tabular.containers import TaggedTable +from tests.helpers import assert_that_tagged_tables_are_equal + def test_should_remove_row() -> None: table = TaggedTable( @@ -17,7 +19,4 @@ def test_should_remove_row() -> None: }, "target", ) - assert new_table.schema == expected.schema - assert new_table.features == expected.features - assert new_table.target == expected.target - assert new_table == expected + assert_that_tagged_tables_are_equal(new_table, expected) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_missing_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_missing_values.py index 434951381..b47bbbe06 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_missing_values.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_missing_values.py @@ -1,5 +1,7 @@ from safeds.data.tabular.containers import TaggedTable +from tests.helpers import assert_that_tagged_tables_are_equal + def test_should_remove_row() -> None: table = TaggedTable( @@ -17,7 +19,4 @@ def test_should_remove_row() -> None: }, "target", ) - assert new_table.schema == expected.schema - assert new_table.features == expected.features - assert new_table.target == expected.target - assert new_table == expected + assert_that_tagged_tables_are_equal(new_table, expected) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py index 451a0ea55..20fbe40b1 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py @@ -1,5 +1,7 @@ from safeds.data.tabular.containers import TaggedTable +from tests.helpers import assert_that_tagged_tables_are_equal + def test_should_remove_row() -> None: table = TaggedTable( @@ -17,7 +19,4 @@ def test_should_remove_row() -> None: }, "target", ) - assert new_table.schema == expected.schema - assert new_table.features == expected.features - assert new_table.target == expected.target - assert new_table == expected + assert_that_tagged_tables_are_equal(new_table, expected) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py index d3fc62247..24d175318 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py @@ -1,6 +1,8 @@ import pytest from safeds.data.tabular.containers import TaggedTable +from tests.helpers import assert_that_tagged_tables_are_equal + @pytest.mark.parametrize( ("original_table", "old_column_name", "new_column_name", "result_table"), @@ -51,7 +53,4 @@ def test_should_add_column( result_table: TaggedTable, ) -> None: new_table = original_table.rename_column(old_column_name, new_column_name) - assert new_table.schema == result_table.schema - assert new_table.features == result_table.features - assert new_table.target == result_table.target - assert new_table == result_table + assert_that_tagged_tables_are_equal(new_table, result_table) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py index dfe6e773a..497e9b9f6 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py @@ -1,6 +1,8 @@ import pytest from safeds.data.tabular.containers import Column, TaggedTable +from tests.helpers import assert_that_tagged_tables_are_equal + @pytest.mark.parametrize( ("original_table", "new_column", "column_name_to_be_replaced", "result_table"), @@ -51,7 +53,4 @@ def test_should_replace_column( result_table: TaggedTable, ) -> None: new_table = original_table.replace_column(column_name_to_be_replaced, new_column) - assert new_table.schema == result_table.schema - assert new_table.features == result_table.features - assert new_table.target == result_table.target - assert new_table == result_table + assert_that_tagged_tables_are_equal(new_table, result_table) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_slice_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_slice_rows.py index 47768b111..189b3d908 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_slice_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_slice_rows.py @@ -3,6 +3,8 @@ from safeds.data.tabular.containers import TaggedTable from safeds.exceptions import IndexOutOfBoundsError +from tests.helpers import assert_that_tagged_tables_are_equal + @pytest.mark.parametrize( ("table", "test_table", "second_test_table"), @@ -19,18 +21,9 @@ def test_should_slice_rows(table: TaggedTable, test_table: TaggedTable, second_t new_table = table.slice_rows(0, 2, 1) second_new_table = table.slice_rows(0, 3, 2) third_new_table = table.slice_rows() - assert new_table.schema == test_table.schema - assert new_table.features == test_table.features - assert new_table.target == test_table.target - assert new_table == test_table - assert second_new_table.schema == second_test_table.schema - assert second_new_table.features == second_test_table.features - assert second_new_table.target == second_test_table.target - assert second_new_table == second_test_table - assert third_new_table.schema == table.schema - assert third_new_table.features == table.features - assert third_new_table.target == table.target - assert third_new_table == table + assert_that_tagged_tables_are_equal(new_table, test_table) + assert_that_tagged_tables_are_equal(second_new_table, second_test_table) + assert_that_tagged_tables_are_equal(third_new_table, table) @pytest.mark.parametrize( diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_rows.py index 4a4b6f58d..0d63af283 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_rows.py @@ -3,6 +3,8 @@ import pytest from safeds.data.tabular.containers import Row, TaggedTable +from tests.helpers import assert_that_tagged_tables_are_equal + @pytest.mark.parametrize( ("table", "comparator", "expected"), @@ -27,10 +29,7 @@ def test_should_sort_table( expected: TaggedTable, ) -> None: table_sorted = table.sort_rows(comparator) - assert table_sorted.schema == expected.schema - assert table_sorted.features == expected.features - assert table_sorted.target == expected.target - assert table_sorted == expected + assert_that_tagged_tables_are_equal(table_sorted, expected) @pytest.mark.parametrize( @@ -56,7 +55,4 @@ def test_should_not_modify_original_table( table_copy: TaggedTable, ) -> None: table.sort_rows(comparator) - assert table.schema == table_copy.schema - assert table.features == table_copy.features - assert table.target == table_copy.target - assert table == table_copy + assert_that_tagged_tables_are_equal(table, table_copy) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_column.py index 5c762e805..c02656440 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_column.py @@ -2,6 +2,8 @@ from safeds.data.tabular.containers import TaggedTable from safeds.exceptions import UnknownColumnNameError +from tests.helpers import assert_that_tagged_tables_are_equal + @pytest.mark.parametrize( ("table", "column_name", "table_transformed"), @@ -22,10 +24,7 @@ def test_should_transform_column(table: TaggedTable, column_name: str, table_transformed: TaggedTable) -> None: result = table.transform_column(column_name, lambda row: row.get_value(column_name) * 2) - assert result.schema == table_transformed.schema - assert result.features == table_transformed.features - assert result.target == table_transformed.target - assert result == table_transformed + assert_that_tagged_tables_are_equal(result, table_transformed) def test_should_raise_if_column_not_found() -> None: diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_table.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_table.py index c766cd209..e38d3dad5 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_table.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_table.py @@ -3,6 +3,8 @@ from safeds.data.tabular.transformation import OneHotEncoder from safeds.exceptions import ColumnIsTaggedError +from tests.helpers import assert_that_tagged_tables_are_equal + def test_should_transform_table() -> None: table = TaggedTable({"feat1": ["a", "b", "a"], "feat2": ["a", "b", "d"], "target": [1, 2, 3]}, "target") @@ -19,10 +21,7 @@ def test_should_transform_table() -> None: }, "target", ) - assert transformed_table.schema == expected.schema - assert transformed_table.features == expected.features - assert transformed_table.target == expected.target - assert transformed_table == expected + assert_that_tagged_tables_are_equal(transformed_table, expected) def test_should_raise_column_is_tagged() -> None: From 9b7919d112f29e9057d280181e802a2f3bbf5580 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Thu, 1 Jun 2023 18:16:40 +0200 Subject: [PATCH 049/149] feat: Override inverse_transform_table --- .../data/tabular/containers/_tagged_table.py | 53 ++++++++++++++++++- .../test_inverse_transform_table.py | 26 +++++++++ 2 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_inverse_transform_table.py diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 75dfb2c9d..378031b96 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -9,7 +9,7 @@ from collections.abc import Callable, Mapping, Sequence from typing import Any - from safeds.data.tabular.transformation import TableTransformer + from safeds.data.tabular.transformation import TableTransformer, InvertibleTableTransformer class TaggedTable(Table): @@ -649,6 +649,11 @@ def transform_table(self, transformer: TableTransformer) -> TaggedTable: >>> from safeds.data.tabular.transformation import OneHotEncoder >>> from safeds.data.tabular.containers import TaggedTable >>> table = TaggedTable({"feat1": ["a", "b", "a"], "feat2": ["a", "b", "d"], "target": [1, 2, 3]},"target") + >>> table + feat1 feat2 target + 0 "a" "a" 1 + 1 "b" "b" 2 + 2 "a" "d" 3 >>> transformer = OneHotEncoder().fit(table, table.features.column_names) >>> table.transform_table(transformer) feat1__a feat1__b feat2__a feat2__b feat2__d target @@ -660,3 +665,49 @@ def transform_table(self, transformer: TableTransformer) -> TaggedTable: if self.target.name in transformer.get_names_of_removed_columns(): raise ColumnIsTaggedError(self.target.name) return TaggedTable._from_table(transformed_table, self.target.name) + + def inverse_transform_table(self, transformer: InvertibleTableTransformer) -> TaggedTable: + """ + Invert the transformation applied by the given transformer. + + This table is not modified. + + Parameters + ---------- + transformer : InvertibleTableTransformer + The transformer that was used to create this table. + + Returns + ------- + table : TaggedTable + The original table. + + Raises + ------ + TransformerNotFittedError + If the transformer has not been fitted yet. + + Examples + -------- + >>> from safeds.data.tabular.transformation import OneHotEncoder + >>> from safeds.data.tabular.containers import TaggedTable + >>> table = TaggedTable({"feat1": ["a", "b", "a"], "feat2": ["a", "b", "d"], "target": [1, 2, 3]}, "target") + >>> table + feat1 feat2 target + 0 "a" "a" 1 + 1 "b" "b" 2 + 2 "a" "d" 3 + >>> transformer = OneHotEncoder().fit(table, table.features.column_names) + >>> transformed_table = table.transform_table(transformer) + >>> transformed_table + feat1__a feat1__b feat2__a feat2__b feat2__d target + 0 1.0 0.0 1.0 0.0 0.0 1 + 1 0.0 1.0 0.0 1.0 0.0 2 + 2 1.0 0.0 0.0 0.0 1.0 3 + >>> transformed_table.inverse_transform_table(transformer) + feat1 feat2 target + 0 "a" "a" 1 + 1 "b" "b" 2 + 2 "a" "d" 3 + """ + return TaggedTable._from_table(transformer.inverse_transform(self), self.target.name) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_inverse_transform_table.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_inverse_transform_table.py new file mode 100644 index 000000000..a8cc21562 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_inverse_transform_table.py @@ -0,0 +1,26 @@ +import pytest +from safeds.data.tabular.containers import TaggedTable +from safeds.data.tabular.transformation import OneHotEncoder +from safeds.exceptions import ColumnIsTaggedError + +from tests.helpers import assert_that_tagged_tables_are_equal + + +def test_should_inverse_transform_table() -> None: + table = TaggedTable({"feat1": ["a", "b", "a"], "feat2": ["a", "b", "d"], "target": [1, 2, 3]}, "target") + transformer = OneHotEncoder().fit(table, table.features.column_names) + transformed_table = table.transform_table(transformer) + expected_transformed = TaggedTable( + { + "feat1__a": [1.0, 0.0, 1.0], + "feat1__b": [0.0, 1.0, 0.0], + "feat2__a": [1.0, 0.0, 0.0], + "feat2__b": [0.0, 1.0, 0.0], + "feat2__d": [0.0, 0.0, 1.0], + "target": [1, 2, 3], + }, + "target", + ) + assert_that_tagged_tables_are_equal(transformed_table, expected_transformed) + inverse_transformed_table = transformed_table.inverse_transform_table(transformer) + assert_that_tagged_tables_are_equal(inverse_transformed_table, table) From 19847076e013999fa6218bb38b6b241693411b44 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Thu, 1 Jun 2023 16:18:59 +0000 Subject: [PATCH 050/149] style: apply automated linter fixes --- src/safeds/data/tabular/containers/_tagged_table.py | 2 +- .../_table/_tagged_table/test_inverse_transform_table.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 378031b96..f236a70c2 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -9,7 +9,7 @@ from collections.abc import Callable, Mapping, Sequence from typing import Any - from safeds.data.tabular.transformation import TableTransformer, InvertibleTableTransformer + from safeds.data.tabular.transformation import InvertibleTableTransformer, TableTransformer class TaggedTable(Table): diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_inverse_transform_table.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_inverse_transform_table.py index a8cc21562..ca7197f16 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_inverse_transform_table.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_inverse_transform_table.py @@ -1,7 +1,5 @@ -import pytest from safeds.data.tabular.containers import TaggedTable from safeds.data.tabular.transformation import OneHotEncoder -from safeds.exceptions import ColumnIsTaggedError from tests.helpers import assert_that_tagged_tables_are_equal From 8925a17a0e2c6378f915858d164e4b09e00bd205 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Thu, 1 Jun 2023 18:22:06 +0200 Subject: [PATCH 051/149] docs: remove quotes in console examples --- .../data/tabular/containers/_tagged_table.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 378031b96..2de2d0619 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -651,9 +651,9 @@ def transform_table(self, transformer: TableTransformer) -> TaggedTable: >>> table = TaggedTable({"feat1": ["a", "b", "a"], "feat2": ["a", "b", "d"], "target": [1, 2, 3]},"target") >>> table feat1 feat2 target - 0 "a" "a" 1 - 1 "b" "b" 2 - 2 "a" "d" 3 + 0 a a 1 + 1 b b 2 + 2 a d 3 >>> transformer = OneHotEncoder().fit(table, table.features.column_names) >>> table.transform_table(transformer) feat1__a feat1__b feat2__a feat2__b feat2__d target @@ -694,9 +694,9 @@ def inverse_transform_table(self, transformer: InvertibleTableTransformer) -> Ta >>> table = TaggedTable({"feat1": ["a", "b", "a"], "feat2": ["a", "b", "d"], "target": [1, 2, 3]}, "target") >>> table feat1 feat2 target - 0 "a" "a" 1 - 1 "b" "b" 2 - 2 "a" "d" 3 + 0 a a 1 + 1 b b 2 + 2 a d 3 >>> transformer = OneHotEncoder().fit(table, table.features.column_names) >>> transformed_table = table.transform_table(transformer) >>> transformed_table @@ -706,8 +706,8 @@ def inverse_transform_table(self, transformer: InvertibleTableTransformer) -> Ta 2 1.0 0.0 0.0 0.0 1.0 3 >>> transformed_table.inverse_transform_table(transformer) feat1 feat2 target - 0 "a" "a" 1 - 1 "b" "b" 2 - 2 "a" "d" 3 + 0 a a 1 + 1 b b 2 + 2 a d 3 """ return TaggedTable._from_table(transformer.inverse_transform(self), self.target.name) From 2bf77fa7ec906111c4355d2723a7f28d5f52a447 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Thu, 1 Jun 2023 18:33:56 +0200 Subject: [PATCH 052/149] docs: fix indentation in transform_table --- src/safeds/data/tabular/containers/_tagged_table.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 8c4aac91a..9f240ecc7 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -650,10 +650,10 @@ def transform_table(self, transformer: TableTransformer) -> TaggedTable: >>> from safeds.data.tabular.containers import TaggedTable >>> table = TaggedTable({"feat1": ["a", "b", "a"], "feat2": ["a", "b", "d"], "target": [1, 2, 3]},"target") >>> table - feat1 feat2 target - 0 a a 1 - 1 b b 2 - 2 a d 3 + feat1 feat2 target + 0 a a 1 + 1 b b 2 + 2 a d 3 >>> transformer = OneHotEncoder().fit(table, table.features.column_names) >>> table.transform_table(transformer) feat1__a feat1__b feat2__a feat2__b feat2__d target From ec37d5647ba3b057c767784857bed2af9034bcd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Thu, 1 Jun 2023 18:36:58 +0200 Subject: [PATCH 053/149] docs: fix indentation in console examples --- .../data/tabular/containers/_tagged_table.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 9f240ecc7..0bbe9aebd 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -650,10 +650,10 @@ def transform_table(self, transformer: TableTransformer) -> TaggedTable: >>> from safeds.data.tabular.containers import TaggedTable >>> table = TaggedTable({"feat1": ["a", "b", "a"], "feat2": ["a", "b", "d"], "target": [1, 2, 3]},"target") >>> table - feat1 feat2 target - 0 a a 1 - 1 b b 2 - 2 a d 3 + feat1 feat2 target + 0 a a 1 + 1 b b 2 + 2 a d 3 >>> transformer = OneHotEncoder().fit(table, table.features.column_names) >>> table.transform_table(transformer) feat1__a feat1__b feat2__a feat2__b feat2__d target @@ -693,10 +693,10 @@ def inverse_transform_table(self, transformer: InvertibleTableTransformer) -> Ta >>> from safeds.data.tabular.containers import TaggedTable >>> table = TaggedTable({"feat1": ["a", "b", "a"], "feat2": ["a", "b", "d"], "target": [1, 2, 3]}, "target") >>> table - feat1 feat2 target - 0 a a 1 - 1 b b 2 - 2 a d 3 + feat1 feat2 target + 0 a a 1 + 1 b b 2 + 2 a d 3 >>> transformer = OneHotEncoder().fit(table, table.features.column_names) >>> transformed_table = table.transform_table(transformer) >>> transformed_table @@ -705,9 +705,9 @@ def inverse_transform_table(self, transformer: InvertibleTableTransformer) -> Ta 1 0.0 1.0 0.0 1.0 0.0 2 2 1.0 0.0 0.0 0.0 1.0 3 >>> transformed_table.inverse_transform_table(transformer) - feat1 feat2 target - 0 a a 1 - 1 b b 2 - 2 a d 3 + feat1 feat2 target + 0 a a 1 + 1 b b 2 + 2 a d 3 """ return TaggedTable._from_table(transformer.inverse_transform(self), self.target.name) From 55456212ba36951772537d90bf026bc0b44c8efb Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Thu, 8 Jun 2023 15:03:32 +0000 Subject: [PATCH 054/149] style: apply automated linter fixes --- tests/helpers/_assertions.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/helpers/_assertions.py b/tests/helpers/_assertions.py index c302536a2..2dcbcd1e0 100644 --- a/tests/helpers/_assertions.py +++ b/tests/helpers/_assertions.py @@ -1,6 +1,5 @@ import pytest -from safeds.data.tabular.containers import Table -from safeds.data.tabular.containers import TaggedTable +from safeds.data.tabular.containers import Table, TaggedTable def assert_that_tables_are_close(table1: Table, table2: Table) -> None: From 3f00be607f6fbf658fb7799f3a98bf1fe62e9d61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 9 Jun 2023 10:28:05 +0200 Subject: [PATCH 055/149] Rename ColumnIsTaggedError to ColumnIsTargetError --- .../data/tabular/containers/_tagged_table.py | 18 +++++++++--------- src/safeds/exceptions/__init__.py | 4 ++-- src/safeds/exceptions/_data.py | 4 ++-- .../test_remove_columns_with_missing_values.py | 8 ++++---- ...remove_columns_with_non_numerical_values.py | 8 ++++---- .../_tagged_table/test_transform_table.py | 8 ++++---- 6 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 631af4e2c..6c6486021 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -3,7 +3,7 @@ from typing import TYPE_CHECKING from safeds.data.tabular.containers import Column, Row, Table -from safeds.exceptions import ColumnIsTaggedError, UnknownColumnNameError +from safeds.exceptions import ColumnIsTargetError, UnknownColumnNameError if TYPE_CHECKING: from collections.abc import Callable, Mapping, Sequence @@ -340,14 +340,14 @@ def remove_columns(self, column_names: list[str]) -> Table: ------ UnknownColumnNameError If any of the given columns does not exist. - ColumnIsTaggedError + ColumnIsTargetError If any of the given columns is the target column. """ try: return TaggedTable._from_table(super().remove_columns(column_names), self.target.name) except UnknownColumnNameError: # TODO: Don't return; throw exception and handle it correctly in tests. - # raise ColumnIsTaggedError({self.target.name}) from None + # raise ColumnIsTargetError({self.target.name}) from None return super().remove_columns(column_names) def remove_columns_with_missing_values(self) -> TaggedTable: @@ -363,14 +363,14 @@ def remove_columns_with_missing_values(self) -> TaggedTable: Raises ------ - ColumnIsTaggedError + ColumnIsTargetError If any of the columns to be removed is the target column. """ table = super().remove_columns_with_missing_values() try: tagged = TaggedTable._from_table(table, self.target.name, None) except UnknownColumnNameError: - raise ColumnIsTaggedError(self.target.name) from None + raise ColumnIsTargetError(self.target.name) from None return tagged def remove_columns_with_non_numerical_values(self) -> TaggedTable: @@ -386,14 +386,14 @@ def remove_columns_with_non_numerical_values(self) -> TaggedTable: Raises ------ - ColumnIsTaggedError + ColumnIsTargetError If any of the columns to be removed is the target column. """ table = super().remove_columns_with_non_numerical_values() try: tagged = TaggedTable._from_table(table, self.target.name) except UnknownColumnNameError: - raise ColumnIsTaggedError(self.target.name) from None + raise ColumnIsTargetError(self.target.name) from None return tagged def remove_duplicate_rows(self) -> TaggedTable: @@ -645,7 +645,7 @@ def transform_table(self, transformer: TableTransformer) -> TaggedTable: ------ TransformerNotFittedError If the transformer has not been fitted yet. - ColunmIsTaggedError + ColunmIsTargetError If the transformer tries to remove or replace the target column. Examples @@ -667,7 +667,7 @@ def transform_table(self, transformer: TableTransformer) -> TaggedTable: """ transformed_table = transformer.transform(self) if self.target.name in transformer.get_names_of_removed_columns(): - raise ColumnIsTaggedError(self.target.name) + raise ColumnIsTargetError(self.target.name) return TaggedTable._from_table(transformed_table, self.target.name) def inverse_transform_table(self, transformer: InvertibleTableTransformer) -> TaggedTable: diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py index da4c790ed..cc86466f5 100644 --- a/src/safeds/exceptions/__init__.py +++ b/src/safeds/exceptions/__init__.py @@ -1,7 +1,7 @@ """Custom exceptions that can be raised by Safe-DS.""" from safeds.exceptions._data import ( - ColumnIsTaggedError, + ColumnIsTargetError, ColumnLengthMismatchError, ColumnSizeError, DuplicateColumnNameError, @@ -36,7 +36,7 @@ "ValueNotPresentWhenFittedError", "WrongFileExtensionError", "IllegalSchemaModificationError", - "ColumnIsTaggedError", + "ColumnIsTargetError", # ML exceptions "DatasetContainsTargetError", "DatasetMissesFeaturesError", diff --git a/src/safeds/exceptions/_data.py b/src/safeds/exceptions/_data.py index 0d6e31b49..db60e7baf 100644 --- a/src/safeds/exceptions/_data.py +++ b/src/safeds/exceptions/_data.py @@ -121,8 +121,8 @@ def __init__(self, msg: str) -> None: super().__init__(f"Illegal schema modification: {msg}") -class ColumnIsTaggedError(IllegalSchemaModificationError): +class ColumnIsTargetError(IllegalSchemaModificationError): """Exception raised in overriden methods of the Table class when removing tagged Columns from a TaggedTable.""" def __init__(self, column_name: str) -> None: - super().__init__(f'Column "{column_name}" is tagged and cannot be removed.') + super().__init__(f'Column "{column_name}" is the target column and cannot be removed.') diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py index d276bcf74..119a82d56 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py @@ -1,6 +1,6 @@ import pytest from safeds.data.tabular.containers import TaggedTable -from safeds.exceptions import ColumnIsTaggedError +from safeds.exceptions import ColumnIsTargetError from tests.helpers import assert_that_tagged_tables_are_equal @@ -25,7 +25,7 @@ def test_should_remove_column() -> None: assert_that_tagged_tables_are_equal(new_table, expected) -def test_should_throw_column_is_tagged() -> None: +def test_should_throw_column_is_target() -> None: table = TaggedTable( { "feature": [0, 1, 2], @@ -34,7 +34,7 @@ def test_should_throw_column_is_tagged() -> None: "target", ) with pytest.raises( - ColumnIsTaggedError, - match='Illegal schema modification: Column "target" is tagged and cannot be removed.', + ColumnIsTargetError, + match='Illegal schema modification: Column "target" is the target column and cannot be removed.', ): table.remove_columns_with_missing_values() diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py index 9153f4e79..3d6719746 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py @@ -1,6 +1,6 @@ import pytest from safeds.data.tabular.containers import TaggedTable -from safeds.exceptions import ColumnIsTaggedError +from safeds.exceptions import ColumnIsTargetError from tests.helpers import assert_that_tagged_tables_are_equal @@ -25,7 +25,7 @@ def test_should_remove_column() -> None: assert_that_tagged_tables_are_equal(new_table, expected) -def test_should_throw_column_is_tagged() -> None: +def test_should_throw_column_is_target() -> None: table = TaggedTable( { "feature": [0, 1, 2], @@ -34,7 +34,7 @@ def test_should_throw_column_is_tagged() -> None: "target", ) with pytest.raises( - ColumnIsTaggedError, - match='Illegal schema modification: Column "target" is tagged and cannot be removed.', + ColumnIsTargetError, + match='Illegal schema modification: Column "target" is the target column and cannot be removed.', ): table.remove_columns_with_non_numerical_values() diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_table.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_table.py index e38d3dad5..189334c63 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_table.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_table.py @@ -1,7 +1,7 @@ import pytest from safeds.data.tabular.containers import TaggedTable from safeds.data.tabular.transformation import OneHotEncoder -from safeds.exceptions import ColumnIsTaggedError +from safeds.exceptions import ColumnIsTargetError from tests.helpers import assert_that_tagged_tables_are_equal @@ -24,12 +24,12 @@ def test_should_transform_table() -> None: assert_that_tagged_tables_are_equal(transformed_table, expected) -def test_should_raise_column_is_tagged() -> None: +def test_should_raise_column_is_target() -> None: table = TaggedTable({"feat1": ["a", "b", "a"], "feat2": ["a", "b", "d"], "target": [1, 2, 3]}, "target") transformer = OneHotEncoder().fit(table, None) # Passing None means all columns get one-hot-encoded, i.e. also the target column! with pytest.raises( - ColumnIsTaggedError, - match='Illegal schema modification: Column "target" is tagged and cannot be removed.', + ColumnIsTargetError, + match='Illegal schema modification: Column "target" is the target column and cannot be removed.', ): table.transform_table(transformer) From e5936b8ae17e8de17fdb8765838f16c97ae883b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 9 Jun 2023 10:33:37 +0200 Subject: [PATCH 056/149] Document exceptions in parent class (Not actually thrown yet, as the methods are not yet overriden.) --- src/safeds/data/tabular/containers/_table.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 2293fc727..8c7d5ca43 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -841,6 +841,8 @@ def keep_only_columns(self, column_names: list[str]) -> Table: ------ UnknownColumnNameError If any of the given columns does not exist. + IllegalSchemaModificationError + If removing the columns would violate an invariant in the subclass. """ invalid_columns = [] for name in column_names: @@ -873,6 +875,8 @@ def remove_columns(self, column_names: list[str]) -> Table: ------ UnknownColumnNameError If any of the given columns does not exist. + IllegalSchemaModificationError + If removing the columns would violate an invariant in the subclass. """ invalid_columns = [] for name in column_names: From 0d231f0d9f32b9a6acd85e7cb0500a51ca9aa59c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 9 Jun 2023 10:49:21 +0200 Subject: [PATCH 057/149] Override remove_columns --- src/safeds/data/tabular/containers/_tagged_table.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 6c6486021..96cb72478 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -346,9 +346,7 @@ def remove_columns(self, column_names: list[str]) -> Table: try: return TaggedTable._from_table(super().remove_columns(column_names), self.target.name) except UnknownColumnNameError: - # TODO: Don't return; throw exception and handle it correctly in tests. - # raise ColumnIsTargetError({self.target.name}) from None - return super().remove_columns(column_names) + raise ColumnIsTargetError(self.target.name) from None def remove_columns_with_missing_values(self) -> TaggedTable: """ From 7d5a5746f91a2ef443940709d1a8ff5d05efbb55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 9 Jun 2023 14:25:55 +0200 Subject: [PATCH 058/149] Adapt tests to new replace_column signature --- .../data/tabular/containers/_tagged_table.py | 35 ++++++++++++------- .../_tagged_table/test_replace_column.py | 11 +++--- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 96cb72478..1fd2cf22b 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -3,7 +3,7 @@ from typing import TYPE_CHECKING from safeds.data.tabular.containers import Column, Row, Table -from safeds.exceptions import ColumnIsTargetError, UnknownColumnNameError +from safeds.exceptions import ColumnIsTargetError, UnknownColumnNameError, IllegalSchemaModificationError if TYPE_CHECKING: from collections.abc import Callable, Mapping, Sequence @@ -346,7 +346,9 @@ def remove_columns(self, column_names: list[str]) -> Table: try: return TaggedTable._from_table(super().remove_columns(column_names), self.target.name) except UnknownColumnNameError: - raise ColumnIsTargetError(self.target.name) from None + # raise ColumnIsTargetError(self.target.name) from None + # TODO: Revert this again + return super().remove_columns(column_names) def remove_columns_with_missing_values(self) -> TaggedTable: """ @@ -467,23 +469,23 @@ def rename_column(self, old_name: str, new_name: str) -> TaggedTable: new_name if self.target.name == old_name else self.target.name, ) - def replace_column(self, old_column_name: str, new_column: Column) -> TaggedTable: + def replace_column(self, old_column_name: str, new_columns: list[Column]) -> TaggedTable: """ - Return a copy of the table with the specified old column replaced by a new column. + Return a copy of the table with the specified column replaced by new columns. The order of columns is kept. - The column to be replaced may be the target column. + If the column to be replaced is the target column, it must be replaced by exactly one column. - This table is not modified. + The original is not modified. Parameters ---------- old_column_name : str The name of the column to be replaced. - new_column : Column - The new column replacing the old column. + new_columns : list[Column] + The new columns replacing the old column. Returns ------- @@ -500,11 +502,18 @@ def replace_column(self, old_column_name: str, new_column: Column) -> TaggedTabl ColumnSizeError If the size of the column does not match the amount of rows. - """ - return TaggedTable._from_table( - super().replace_column(old_column_name, new_column), - new_column.name if self.target.name == old_column_name else self.target.name, - ) + + IllegalSchemaModificationError + If the target column would be removed or replaced by more than one column. + """ + if old_column_name == self.target.name: + if len(new_columns) != 1: + raise IllegalSchemaModificationError(f"Column {self.target.name} can only be replaced by exactly one " + f"new column.") + else: + return TaggedTable._from_table(super().replace_column(old_column_name, new_columns), new_columns[0].name) + else: + return TaggedTable._from_table(super().replace_column(old_column_name, new_columns), self.target.name) def shuffle_rows(self) -> TaggedTable: """ diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py index 497e9b9f6..7c404d9c3 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py @@ -5,7 +5,8 @@ @pytest.mark.parametrize( - ("original_table", "new_column", "column_name_to_be_replaced", "result_table"), + ("original_table", "new_columns", "column_name_to_be_replaced", "result_table"), + # TODO: Add multicolumn cases, add illegal cases [ ( TaggedTable( @@ -15,7 +16,7 @@ }, "target_old", ), - Column("feature_new", [2, 1, 0]), + [Column("feature_new", [2, 1, 0])], "feature_old", TaggedTable( { @@ -33,7 +34,7 @@ }, "target_old", ), - Column("target_new", [2, 1, 0]), + [Column("target_new", [2, 1, 0])], "target_old", TaggedTable( { @@ -48,9 +49,9 @@ ) def test_should_replace_column( original_table: TaggedTable, - new_column: Column, + new_columns: list[Column], column_name_to_be_replaced: str, result_table: TaggedTable, ) -> None: - new_table = original_table.replace_column(column_name_to_be_replaced, new_column) + new_table = original_table.replace_column(column_name_to_be_replaced, new_columns) assert_that_tagged_tables_are_equal(new_table, result_table) From d84688567d1763ae5293e31d929679701b3ca283 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 9 Jun 2023 12:28:19 +0000 Subject: [PATCH 059/149] style: apply automated linter fixes --- src/safeds/data/tabular/containers/_tagged_table.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 1fd2cf22b..caf20e388 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -3,7 +3,7 @@ from typing import TYPE_CHECKING from safeds.data.tabular.containers import Column, Row, Table -from safeds.exceptions import ColumnIsTargetError, UnknownColumnNameError, IllegalSchemaModificationError +from safeds.exceptions import ColumnIsTargetError, IllegalSchemaModificationError, UnknownColumnNameError if TYPE_CHECKING: from collections.abc import Callable, Mapping, Sequence @@ -508,10 +508,13 @@ def replace_column(self, old_column_name: str, new_columns: list[Column]) -> Tag """ if old_column_name == self.target.name: if len(new_columns) != 1: - raise IllegalSchemaModificationError(f"Column {self.target.name} can only be replaced by exactly one " - f"new column.") + raise IllegalSchemaModificationError( + f"Column {self.target.name} can only be replaced by exactly one new column.", + ) else: - return TaggedTable._from_table(super().replace_column(old_column_name, new_columns), new_columns[0].name) + return TaggedTable._from_table( + super().replace_column(old_column_name, new_columns), new_columns[0].name, + ) else: return TaggedTable._from_table(super().replace_column(old_column_name, new_columns), self.target.name) From 23cb5f87cc2fb4e67513fdc3da1c6889195556de Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 9 Jun 2023 12:29:55 +0000 Subject: [PATCH 060/149] style: apply automated linter fixes --- src/safeds/data/tabular/containers/_tagged_table.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index caf20e388..e45da1772 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -513,7 +513,8 @@ def replace_column(self, old_column_name: str, new_columns: list[Column]) -> Tag ) else: return TaggedTable._from_table( - super().replace_column(old_column_name, new_columns), new_columns[0].name, + super().replace_column(old_column_name, new_columns), + new_columns[0].name, ) else: return TaggedTable._from_table(super().replace_column(old_column_name, new_columns), self.target.name) From e3bcac2ba91e96e8d8c6688f513657210fb1527d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 9 Jun 2023 14:42:57 +0200 Subject: [PATCH 061/149] Add replace_column testcases that would throw --- .../data/tabular/containers/_tagged_table.py | 2 +- .../_tagged_table/test_replace_column.py | 60 ++++++++++++++++++- 2 files changed, 59 insertions(+), 3 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index caf20e388..7303d8f2d 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -509,7 +509,7 @@ def replace_column(self, old_column_name: str, new_columns: list[Column]) -> Tag if old_column_name == self.target.name: if len(new_columns) != 1: raise IllegalSchemaModificationError( - f"Column {self.target.name} can only be replaced by exactly one new column.", + f'Target column "{self.target.name}" can only be replaced by exactly one new column.', ) else: return TaggedTable._from_table( diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py index 7c404d9c3..768fa2cd8 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py @@ -1,12 +1,12 @@ import pytest from safeds.data.tabular.containers import Column, TaggedTable +from safeds.exceptions import IllegalSchemaModificationError from tests.helpers import assert_that_tagged_tables_are_equal @pytest.mark.parametrize( ("original_table", "new_columns", "column_name_to_be_replaced", "result_table"), - # TODO: Add multicolumn cases, add illegal cases [ ( TaggedTable( @@ -26,6 +26,25 @@ "target_old", ), ), + ( + TaggedTable( + { + "feature_old": [0, 1, 2], + "target_old": [3, 4, 5], + }, + "target_old", + ), + [Column("feature_new_a", [2, 1, 0]), Column("feature_new_b", [4, 2, 0])], + "feature_old", + TaggedTable( + { + "feature_new_a": [2, 1, 0], + "feature_new_b": [4, 2, 0], + "target_old": [3, 4, 5], + }, + "target_old", + ), + ), ( TaggedTable( { @@ -45,7 +64,7 @@ ), ), ], - ids=["replace_feature_column", "replace_target_column"], + ids=["replace_feature_column_with_one", "replace_feature_column_with_multiple", "replace_target_column"], ) def test_should_replace_column( original_table: TaggedTable, @@ -55,3 +74,40 @@ def test_should_replace_column( ) -> None: new_table = original_table.replace_column(column_name_to_be_replaced, new_columns) assert_that_tagged_tables_are_equal(new_table, result_table) + +@pytest.mark.parametrize( + ("original_table", "new_columns", "column_name_to_be_replaced"), + [ + ( + TaggedTable( + { + "feature_old": [0, 1, 2], + "target_old": [3, 4, 5], + }, + "target_old", + ), + [], + "target_old", + ), + ( + TaggedTable( + { + "feature_old": [0, 1, 2], + "target_old": [3, 4, 5], + }, + "target_old", + ), + [Column("target_new_a", [2, 1, 0]), Column("target_new_b"), [4, 2, 0]], + "target_old", + ), + ], + ids=["zero_columns", "multiple_columns"] +) +def test_should_throw_illegal_schema_modification( + original_table: TaggedTable, + new_columns: list[Column], + column_name_to_be_replaced: str +) -> None: + with pytest.raises(IllegalSchemaModificationError, match='Target column "target_old" can only be replaced by ' + 'exactly one new column.'): + original_table.replace_column(column_name_to_be_replaced, new_columns) From 11fd2f6e7da96e6609a47bfde0669753a0760d71 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 9 Jun 2023 12:46:48 +0000 Subject: [PATCH 062/149] style: apply automated linter fixes --- .../_table/_tagged_table/test_replace_column.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py index 768fa2cd8..39810d3fa 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py @@ -75,6 +75,7 @@ def test_should_replace_column( new_table = original_table.replace_column(column_name_to_be_replaced, new_columns) assert_that_tagged_tables_are_equal(new_table, result_table) + @pytest.mark.parametrize( ("original_table", "new_columns", "column_name_to_be_replaced"), [ @@ -101,13 +102,13 @@ def test_should_replace_column( "target_old", ), ], - ids=["zero_columns", "multiple_columns"] + ids=["zero_columns", "multiple_columns"], ) def test_should_throw_illegal_schema_modification( - original_table: TaggedTable, - new_columns: list[Column], - column_name_to_be_replaced: str + original_table: TaggedTable, new_columns: list[Column], column_name_to_be_replaced: str, ) -> None: - with pytest.raises(IllegalSchemaModificationError, match='Target column "target_old" can only be replaced by ' - 'exactly one new column.'): + with pytest.raises( + IllegalSchemaModificationError, + match='Target column "target_old" can only be replaced by exactly one new column.', + ): original_table.replace_column(column_name_to_be_replaced, new_columns) From d219a4fd701369dbe74e5236cf162be5c10162d2 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 9 Jun 2023 12:48:33 +0000 Subject: [PATCH 063/149] style: apply automated linter fixes --- .../containers/_table/_tagged_table/test_replace_column.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py index 39810d3fa..02793de82 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py @@ -105,7 +105,9 @@ def test_should_replace_column( ids=["zero_columns", "multiple_columns"], ) def test_should_throw_illegal_schema_modification( - original_table: TaggedTable, new_columns: list[Column], column_name_to_be_replaced: str, + original_table: TaggedTable, + new_columns: list[Column], + column_name_to_be_replaced: str, ) -> None: with pytest.raises( IllegalSchemaModificationError, From eb09acce6c087150c3ad4c1174b56620f62be3a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 9 Jun 2023 15:03:56 +0200 Subject: [PATCH 064/149] feat: add remove_target_column to TaggedTable Also added test. --- .../data/tabular/containers/_tagged_table.py | 20 +++++++++++- .../test_remove_target_column.py | 32 +++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_target_column.py diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index deb4828f3..0c8252d0f 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -316,7 +316,7 @@ def keep_only_columns(self, column_names: list[str]) -> Table: # throw exception if appropriate, # investigate and fix pytest errors. # if self.target.name not in column_names: - # raise IllegalSchemaModificationError(f'Must keep target column "{self.target.name}".') + # raise IllegalSchemaModificationError(f'Must keep target column "self.target.name".') return super().keep_only_columns(column_names) def remove_columns(self, column_names: list[str]) -> Table: @@ -726,3 +726,21 @@ def inverse_transform_table(self, transformer: InvertibleTableTransformer) -> Ta 2 a d 3 """ return TaggedTable._from_table(transformer.inverse_transform(self), self.target.name) + + # ------------------------------------------------------------------------------------------------------------------ + # New methods specific to `TaggedTable`s: + # ------------------------------------------------------------------------------------------------------------------ + def remove_target_column(self) -> Table: + """ + Return a new table consisting of only the feature columns. + + The original TaggedTable is not modified. + + Returns + ------- + table : Table + The table, without the target column. + + """ + return super().remove_columns([self.target.name]) + diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_target_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_target_column.py new file mode 100644 index 000000000..cc989ec4c --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_target_column.py @@ -0,0 +1,32 @@ +import pytest +from safeds.data.tabular.containers import TaggedTable, Table + + +@pytest.mark.parametrize( + ("table", "expected"), + [ + ( + TaggedTable._from_table( + Table( + { + "feature_a": [0, 1, 2], + "feature_b": [3, 4, 5], + "target": [6, 7, 8], + } + ), + "target" + ), + Table( + { + "feature_a": [0, 1, 2], + "feature_b": [3, 4, 5], + } + ) + ), + ], + ids=["normal"], +) +def test_should_remove_target_column(table: TaggedTable, expected: Table) -> None: + new_table = table.remove_target_column() + assert new_table.schema == expected.schema + assert new_table == expected From 1809eb527a10ae5b5bb06d8c7ea1f6a09b4d02a7 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 9 Jun 2023 13:05:54 +0000 Subject: [PATCH 065/149] style: apply automated linter fixes --- src/safeds/data/tabular/containers/_tagged_table.py | 1 - .../_table/_tagged_table/test_remove_target_column.py | 10 +++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 0c8252d0f..a8c878c9e 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -743,4 +743,3 @@ def remove_target_column(self) -> Table: """ return super().remove_columns([self.target.name]) - diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_target_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_target_column.py index cc989ec4c..6f9c01025 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_target_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_target_column.py @@ -1,5 +1,5 @@ import pytest -from safeds.data.tabular.containers import TaggedTable, Table +from safeds.data.tabular.containers import Table, TaggedTable @pytest.mark.parametrize( @@ -12,16 +12,16 @@ "feature_a": [0, 1, 2], "feature_b": [3, 4, 5], "target": [6, 7, 8], - } + }, ), - "target" + "target", ), Table( { "feature_a": [0, 1, 2], "feature_b": [3, 4, 5], - } - ) + }, + ), ), ], ids=["normal"], From 925c100c7f0e0fd1a4d8448b58d1c27f93db96f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 9 Jun 2023 15:11:41 +0200 Subject: [PATCH 066/149] Make test_classifier.py use remove_target_column --- tests/safeds/ml/classical/classification/test_classifier.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index c198c46ac..22b0ab26b 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -122,8 +122,8 @@ def test_should_include_features_of_input_table(self, classifier: Classifier, va def test_should_include_complete_input_table(self, classifier: Classifier, valid_data: TaggedTable) -> None: fitted_regressor = classifier.fit(valid_data) - prediction = fitted_regressor.predict(valid_data.remove_columns(["target"])) - assert prediction.remove_columns(["target"]) == valid_data.remove_columns(["target"]) + prediction = fitted_regressor.predict(valid_data.remove_target_column()) + assert prediction.remove_target_column() == valid_data.remove_target_column() def test_should_set_correct_target_name(self, classifier: Classifier, valid_data: TaggedTable) -> None: fitted_classifier = classifier.fit(valid_data) @@ -149,7 +149,7 @@ def test_should_raise_if_dataset_contains_target(self, classifier: Classifier, v def test_should_raise_if_dataset_misses_features(self, classifier: Classifier, valid_data: TaggedTable) -> None: fitted_classifier = classifier.fit(valid_data) with pytest.raises(DatasetMissesFeaturesError, match="[feat1, feat2]"): - fitted_classifier.predict(valid_data.remove_columns(["feat1", "feat2", "target"])) + fitted_classifier.predict(valid_data.remove_target_column().remove_columns(["feat1", "feat2"])) def test_should_raise_on_invalid_data( self, From b44e8a45aa29ed3cae197d9cde47df811d7eb2cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 9 Jun 2023 15:15:34 +0200 Subject: [PATCH 067/149] Make test_regressor.py use remove_target_column --- tests/safeds/ml/classical/regression/test_regressor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index 21aa4816d..08777fb5e 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -133,8 +133,8 @@ def test_should_include_features_of_input_table(self, regressor: Regressor, vali def test_should_include_complete_input_table(self, regressor: Regressor, valid_data: TaggedTable) -> None: fitted_regressor = regressor.fit(valid_data) - prediction = fitted_regressor.predict(valid_data.remove_columns(["target"])) - assert prediction.remove_columns(["target"]) == valid_data.remove_columns(["target"]) + prediction = fitted_regressor.predict(valid_data.remove_target_column()) + assert prediction.remove_target_column() == valid_data.remove_target_column() def test_should_set_correct_target_name(self, regressor: Regressor, valid_data: TaggedTable) -> None: fitted_regressor = regressor.fit(valid_data) @@ -160,7 +160,7 @@ def test_should_raise_if_dataset_contains_target(self, regressor: Regressor, val def test_should_raise_if_dataset_misses_features(self, regressor: Regressor, valid_data: TaggedTable) -> None: fitted_regressor = regressor.fit(valid_data) with pytest.raises(DatasetMissesFeaturesError, match="[feat1, feat2]"): - fitted_regressor.predict(valid_data.remove_columns(["feat1", "feat2", "target"])) + fitted_regressor.predict(valid_data.remove_target_column().remove_columns(["feat1", "feat2"])) def test_should_raise_on_invalid_data( self, From 3ce32f70b7183dc32b1e59e2462c088c4dcbe459 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 9 Jun 2023 15:20:40 +0200 Subject: [PATCH 068/149] Override remove_column --- src/safeds/data/tabular/containers/_tagged_table.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index a8c878c9e..b8fdeaecf 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -319,8 +319,7 @@ def keep_only_columns(self, column_names: list[str]) -> Table: # raise IllegalSchemaModificationError(f'Must keep target column "self.target.name".') return super().keep_only_columns(column_names) - def remove_columns(self, column_names: list[str]) -> Table: - # TODO: Change return type to TaggedTable (in function definition and in docstring). + def remove_columns(self, column_names: list[str]) -> TaggedTable: """ Return a table without the given column(s). @@ -333,7 +332,7 @@ def remove_columns(self, column_names: list[str]) -> Table: Returns ------- - table : Table + table : TaggedTable A table without the given columns. Raises @@ -346,9 +345,7 @@ def remove_columns(self, column_names: list[str]) -> Table: try: return TaggedTable._from_table(super().remove_columns(column_names), self.target.name) except UnknownColumnNameError: - # raise ColumnIsTargetError(self.target.name) from None - # TODO: Revert this again - return super().remove_columns(column_names) + raise ColumnIsTargetError(self.target.name) from None def remove_columns_with_missing_values(self) -> TaggedTable: """ From ffe64f5aae8cdd5989bb47ff85d6a069e01d2dec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 9 Jun 2023 15:39:26 +0200 Subject: [PATCH 069/149] Comment-out currently unreachable lines --- src/safeds/data/tabular/containers/_tagged_table.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index b8fdeaecf..84aa39dc0 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -673,9 +673,14 @@ def transform_table(self, transformer: TableTransformer) -> TaggedTable: 1 0.0 1.0 0.0 1.0 0.0 2 2 1.0 0.0 0.0 0.0 1.0 3 """ - transformed_table = transformer.transform(self) - if self.target.name in transformer.get_names_of_removed_columns(): - raise ColumnIsTargetError(self.target.name) + try: + transformed_table = transformer.transform(self) + except ColumnIsTargetError as e: # can happen for example with OneHotEncoder + raise ColumnIsTargetError(self.target.name) from e # Re-throw for shorter stacktrace + # For future transformers, it may also happen that they remove the target column without throwing. + # If this ever happens, comment-in these lines (currently out-commented b/c of code coverage): + # if self.target.name in transformer.get_names_of_removed_columns(): + # raise ColumnIsTargetError(self.target.name) return TaggedTable._from_table(transformed_table, self.target.name) def inverse_transform_table(self, transformer: InvertibleTableTransformer) -> TaggedTable: From de0dc5791ccc309eadd01f5cb7f841a6f9bc95f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 9 Jun 2023 15:48:13 +0200 Subject: [PATCH 070/149] Adapt signature of keep_only_columns --- src/safeds/data/tabular/containers/_tagged_table.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 84aa39dc0..a2c243f0b 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -287,7 +287,7 @@ def filter_rows(self, query: Callable[[Row], bool]) -> TaggedTable: """ return TaggedTable._from_table(super().filter_rows(query), target_name=self.target.name) - def keep_only_columns(self, column_names: list[str]) -> Table: + def keep_only_columns(self, column_names: list[str]) -> TaggedTable: # TODO: Change return type to TaggedTable (in function definition and in docstring). """ Return a table with only the given column(s). @@ -301,7 +301,7 @@ def keep_only_columns(self, column_names: list[str]) -> Table: Returns ------- - table : Table + table : TaggedTable A table containing only the given column(s). Raises @@ -316,7 +316,7 @@ def keep_only_columns(self, column_names: list[str]) -> Table: # throw exception if appropriate, # investigate and fix pytest errors. # if self.target.name not in column_names: - # raise IllegalSchemaModificationError(f'Must keep target column "self.target.name".') + # raise IllegalSchemaModificationError(f'Must keep target column and at least one feature column.') return super().keep_only_columns(column_names) def remove_columns(self, column_names: list[str]) -> TaggedTable: From a1d7fb1bcda7c38265c7bfd5b763ebda6b17ceab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 9 Jun 2023 15:50:34 +0200 Subject: [PATCH 071/149] style: Drop solved todo comment --- src/safeds/data/tabular/containers/_tagged_table.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index a2c243f0b..8556da7ab 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -288,7 +288,6 @@ def filter_rows(self, query: Callable[[Row], bool]) -> TaggedTable: return TaggedTable._from_table(super().filter_rows(query), target_name=self.target.name) def keep_only_columns(self, column_names: list[str]) -> TaggedTable: - # TODO: Change return type to TaggedTable (in function definition and in docstring). """ Return a table with only the given column(s). From adbb9bde88b69a8d36b1c7a26d66def2594237f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 9 Jun 2023 15:51:38 +0200 Subject: [PATCH 072/149] style: drop unneeded format string --- src/safeds/data/tabular/containers/_tagged_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 8556da7ab..b5c079a73 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -315,7 +315,7 @@ def keep_only_columns(self, column_names: list[str]) -> TaggedTable: # throw exception if appropriate, # investigate and fix pytest errors. # if self.target.name not in column_names: - # raise IllegalSchemaModificationError(f'Must keep target column and at least one feature column.') + # raise IllegalSchemaModificationError("Must keep target column and at least one feature column.") return super().keep_only_columns(column_names) def remove_columns(self, column_names: list[str]) -> TaggedTable: From 1dd8256da18a38cb21b8f149c3d9fa813458095d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 9 Jun 2023 15:57:31 +0200 Subject: [PATCH 073/149] Fix comments --- src/safeds/data/tabular/containers/_tagged_table.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index b5c079a73..55f0237d1 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -287,7 +287,8 @@ def filter_rows(self, query: Callable[[Row], bool]) -> TaggedTable: """ return TaggedTable._from_table(super().filter_rows(query), target_name=self.target.name) - def keep_only_columns(self, column_names: list[str]) -> TaggedTable: + def keep_only_columns(self, column_names: list[str]) -> Table: + # TODO: Change return type in signature and docstring. """ Return a table with only the given column(s). @@ -300,7 +301,7 @@ def keep_only_columns(self, column_names: list[str]) -> TaggedTable: Returns ------- - table : TaggedTable + table : Table A table containing only the given column(s). Raises @@ -310,13 +311,14 @@ def keep_only_columns(self, column_names: list[str]) -> TaggedTable: IllegalSchemaModificationError If none of the given columns is the target column. """ + return super().keep_only_columns(column_names) # TODO: # Re-build TaggedTable before returning, # throw exception if appropriate, # investigate and fix pytest errors. # if self.target.name not in column_names: - # raise IllegalSchemaModificationError("Must keep target column and at least one feature column.") - return super().keep_only_columns(column_names) + # raise IllegalSchemaModificationError("Must keep target column and at least one feature column.") + # return TaggedTable._from_table(super().keep_only_columns(column_names), self.target.name) def remove_columns(self, column_names: list[str]) -> TaggedTable: """ From a3f431b36952ab7fd483000ce7262a11e694633f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 9 Jun 2023 16:24:14 +0200 Subject: [PATCH 074/149] Add instanceof check to util_sklearn methods --- src/safeds/ml/classical/_util_sklearn.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/safeds/ml/classical/_util_sklearn.py b/src/safeds/ml/classical/_util_sklearn.py index 6b581c3d8..5991e4847 100644 --- a/src/safeds/ml/classical/_util_sklearn.py +++ b/src/safeds/ml/classical/_util_sklearn.py @@ -83,6 +83,8 @@ def predict(model: Any, dataset: Table, feature_names: list[str] | None, target_ if missing_feature_names: raise DatasetMissesFeaturesError(missing_feature_names) + if isinstance(dataset, TaggedTable): + dataset = dataset.remove_target_column() dataset_df = dataset.keep_only_columns(feature_names)._data dataset_df.columns = feature_names From f08a8af00973cd6fd183aae89659a1b0bcfe161e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 9 Jun 2023 16:33:17 +0200 Subject: [PATCH 075/149] Do type check as early as possible --- src/safeds/ml/classical/_util_sklearn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/safeds/ml/classical/_util_sklearn.py b/src/safeds/ml/classical/_util_sklearn.py index 5991e4847..3c23fa5b6 100644 --- a/src/safeds/ml/classical/_util_sklearn.py +++ b/src/safeds/ml/classical/_util_sklearn.py @@ -82,9 +82,9 @@ def predict(model: Any, dataset: Table, feature_names: list[str] | None, target_ missing_feature_names = [feature_name for feature_name in feature_names if not dataset.has_column(feature_name)] if missing_feature_names: raise DatasetMissesFeaturesError(missing_feature_names) - if isinstance(dataset, TaggedTable): - dataset = dataset.remove_target_column() + dataset = dataset.remove_target_column() # Cast to Table type, so Python will call the right methods... + dataset_df = dataset.keep_only_columns(feature_names)._data dataset_df.columns = feature_names From 65078249d3d4ce2fa2950d878eac3f54320b594d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 23 Jun 2023 09:37:26 +0200 Subject: [PATCH 076/149] Add to_table method to TaggedTable class --- .../data/tabular/containers/_tagged_table.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 55f0237d1..e3c1705c3 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -107,6 +107,26 @@ def _from_table( return result + @staticmethod + def to_table(table: TaggedTable) -> Table: + """ + Remove the tagging from a TaggedTable. + + The original TaggedTable is not modified. + + Parameters + ---------- + table: TaggedTable + The TaggedTable. + + Returns + ------- + table: Table + The table as an untagged Table, i.e. without the information about which columns are features or target. + + """ + return table.features.add_column(table.target) + # ------------------------------------------------------------------------------------------------------------------ # Dunder methods # ------------------------------------------------------------------------------------------------------------------ From 76ed6ad86f10e2807492019ecf3ee538d3a95355 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 23 Jun 2023 09:57:57 +0200 Subject: [PATCH 077/149] Drop remove_target_column method from TaggedTable The `features` property already does exactly the same. --- .../data/tabular/containers/_tagged_table.py | 17 ---------- src/safeds/ml/classical/_util_sklearn.py | 2 +- .../test_remove_target_column.py | 32 ------------------- .../classification/test_classifier.py | 6 ++-- .../ml/classical/regression/test_regressor.py | 6 ++-- 5 files changed, 7 insertions(+), 56 deletions(-) delete mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_target_column.py diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index e3c1705c3..fed8a5c58 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -749,20 +749,3 @@ def inverse_transform_table(self, transformer: InvertibleTableTransformer) -> Ta 2 a d 3 """ return TaggedTable._from_table(transformer.inverse_transform(self), self.target.name) - - # ------------------------------------------------------------------------------------------------------------------ - # New methods specific to `TaggedTable`s: - # ------------------------------------------------------------------------------------------------------------------ - def remove_target_column(self) -> Table: - """ - Return a new table consisting of only the feature columns. - - The original TaggedTable is not modified. - - Returns - ------- - table : Table - The table, without the target column. - - """ - return super().remove_columns([self.target.name]) diff --git a/src/safeds/ml/classical/_util_sklearn.py b/src/safeds/ml/classical/_util_sklearn.py index 2355f050f..8cd2c0229 100644 --- a/src/safeds/ml/classical/_util_sklearn.py +++ b/src/safeds/ml/classical/_util_sklearn.py @@ -127,7 +127,7 @@ def predict(model: Any, dataset: Table, feature_names: list[str] | None, target_ if missing_feature_names: raise DatasetMissesFeaturesError(missing_feature_names) if isinstance(dataset, TaggedTable): - dataset = dataset.remove_target_column() # Cast to Table type, so Python will call the right methods... + dataset = dataset.features # Cast to Table type, so Python will call the right methods... if dataset.number_of_rows == 0: raise DatasetMissesDataError diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_target_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_target_column.py deleted file mode 100644 index 6f9c01025..000000000 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_target_column.py +++ /dev/null @@ -1,32 +0,0 @@ -import pytest -from safeds.data.tabular.containers import Table, TaggedTable - - -@pytest.mark.parametrize( - ("table", "expected"), - [ - ( - TaggedTable._from_table( - Table( - { - "feature_a": [0, 1, 2], - "feature_b": [3, 4, 5], - "target": [6, 7, 8], - }, - ), - "target", - ), - Table( - { - "feature_a": [0, 1, 2], - "feature_b": [3, 4, 5], - }, - ), - ), - ], - ids=["normal"], -) -def test_should_remove_target_column(table: TaggedTable, expected: Table) -> None: - new_table = table.remove_target_column() - assert new_table.schema == expected.schema - assert new_table == expected diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index ed58fe6f8..aae25aade 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -169,8 +169,8 @@ def test_should_include_features_of_input_table(self, classifier: Classifier, va def test_should_include_complete_input_table(self, classifier: Classifier, valid_data: TaggedTable) -> None: fitted_regressor = classifier.fit(valid_data) - prediction = fitted_regressor.predict(valid_data.remove_target_column()) - assert prediction.remove_target_column() == valid_data.remove_target_column() + prediction = fitted_regressor.predict(valid_data.features) + assert prediction.features == valid_data.features def test_should_set_correct_target_name(self, classifier: Classifier, valid_data: TaggedTable) -> None: fitted_classifier = classifier.fit(valid_data) @@ -196,7 +196,7 @@ def test_should_raise_if_dataset_contains_target(self, classifier: Classifier, v def test_should_raise_if_dataset_misses_features(self, classifier: Classifier, valid_data: TaggedTable) -> None: fitted_classifier = classifier.fit(valid_data) with pytest.raises(DatasetMissesFeaturesError, match="[feat1, feat2]"): - fitted_classifier.predict(valid_data.remove_target_column().remove_columns(["feat1", "feat2"])) + fitted_classifier.predict(valid_data.features.remove_columns(["feat1", "feat2"])) @pytest.mark.parametrize( ("invalid_data", "expected_error", "expected_error_msg"), diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index 078fe8ad7..cfa5a08c3 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -170,8 +170,8 @@ def test_should_include_features_of_input_table(self, regressor: Regressor, vali def test_should_include_complete_input_table(self, regressor: Regressor, valid_data: TaggedTable) -> None: fitted_regressor = regressor.fit(valid_data) - prediction = fitted_regressor.predict(valid_data.remove_target_column()) - assert prediction.remove_target_column() == valid_data.remove_target_column() + prediction = fitted_regressor.predict(valid_data.features) + assert prediction.features == valid_data.features def test_should_set_correct_target_name(self, regressor: Regressor, valid_data: TaggedTable) -> None: fitted_regressor = regressor.fit(valid_data) @@ -197,7 +197,7 @@ def test_should_raise_if_dataset_contains_target(self, regressor: Regressor, val def test_should_raise_if_dataset_misses_features(self, regressor: Regressor, valid_data: TaggedTable) -> None: fitted_regressor = regressor.fit(valid_data) with pytest.raises(DatasetMissesFeaturesError, match="[feat1, feat2]"): - fitted_regressor.predict(valid_data.remove_target_column().remove_columns(["feat1", "feat2"])) + fitted_regressor.predict(valid_data.features.remove_columns(["feat1", "feat2"])) @pytest.mark.parametrize( ("invalid_data", "expected_error", "expected_error_msg"), From c4185c9f1da5d99e8b22080849ff675482fd79d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 23 Jun 2023 10:03:51 +0200 Subject: [PATCH 078/149] Add test for to_table --- .../_table/_tagged_table/test_to_table.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_to_table.py diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_to_table.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_to_table.py new file mode 100644 index 000000000..cc1888595 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_to_table.py @@ -0,0 +1,22 @@ +from safeds.data.tabular.containers import TaggedTable, Table + + +def test_should_return_table() -> None: + tagged_table = TaggedTable( + { + "feature_1": [3, 9, 6], + "feature_2": [6, 12, 9], + "target": [1, 3, 2], + }, + "target", + ) + expected = Table( + { + "feature_1": [3, 9, 6], + "feature_2": [6, 12, 9], + "target": [1, 3, 2], + }, + ) + table = TaggedTable.to_table(tagged_table) + assert table.schema == expected.schema + assert table == expected From 5696f9140fb3d4220d9f8d775e528f486261a3fc Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 23 Jun 2023 08:05:49 +0000 Subject: [PATCH 079/149] style: apply automated linter fixes --- .../tabular/containers/_table/_tagged_table/test_to_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_to_table.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_to_table.py index cc1888595..e9c968d5e 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_to_table.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_to_table.py @@ -1,4 +1,4 @@ -from safeds.data.tabular.containers import TaggedTable, Table +from safeds.data.tabular.containers import Table, TaggedTable def test_should_return_table() -> None: From 68dfc0017da8afaf4716a5b8e0de23c9e3763fc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 23 Jun 2023 10:10:02 +0200 Subject: [PATCH 080/149] Make to_table an instance method --- .../data/tabular/containers/_tagged_table.py | 43 ++++++++++--------- .../_table/_tagged_table/test_to_table.py | 2 +- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index fed8a5c58..2adbf503f 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -107,26 +107,6 @@ def _from_table( return result - @staticmethod - def to_table(table: TaggedTable) -> Table: - """ - Remove the tagging from a TaggedTable. - - The original TaggedTable is not modified. - - Parameters - ---------- - table: TaggedTable - The TaggedTable. - - Returns - ------- - table: Table - The table as an untagged Table, i.e. without the information about which columns are features or target. - - """ - return table.features.add_column(table.target) - # ------------------------------------------------------------------------------------------------------------------ # Dunder methods # ------------------------------------------------------------------------------------------------------------------ @@ -192,6 +172,29 @@ def features(self) -> Table: def target(self) -> Column: return self._target + # ------------------------------------------------------------------------------------------------------------------ + # Conversion back to table + # ------------------------------------------------------------------------------------------------------------------ + + def to_table(self: TaggedTable) -> Table: + """ + Remove the tagging from a TaggedTable. + + The original TaggedTable is not modified. + + Parameters + ---------- + self: TaggedTable + The TaggedTable. + + Returns + ------- + table: Table + The table as an untagged Table, i.e. without the information about which columns are features or target. + + """ + return self.features.add_column(self.target) + # ------------------------------------------------------------------------------------------------------------------ # Overriden methods from Table class: # ------------------------------------------------------------------------------------------------------------------ diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_to_table.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_to_table.py index cc1888595..932f1a586 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_to_table.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_to_table.py @@ -17,6 +17,6 @@ def test_should_return_table() -> None: "target": [1, 3, 2], }, ) - table = TaggedTable.to_table(tagged_table) + table = tagged_table.to_table() assert table.schema == expected.schema assert table == expected From 8af11535d2dc243cc0a07ecd28461d19be66e402 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 23 Jun 2023 10:24:35 +0200 Subject: [PATCH 081/149] Remove method calls on semi-initialized table --- src/safeds/data/tabular/containers/_tagged_table.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 2adbf503f..29449dd41 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -102,8 +102,8 @@ def _from_table( result._data = table._data result._schema = table.schema - result._features = result.keep_only_columns(feature_names) - result._target = result.get_column(target_name) + result._features = table.keep_only_columns(feature_names) + result._target = table.get_column(target_name) return result @@ -157,8 +157,8 @@ def __init__( if len(feature_names) == 0: raise ValueError("At least one feature column must be specified.") - self._features: Table = self.keep_only_columns(feature_names) - self._target: Column = self.get_column(target_name) + self._features: Table = super().keep_only_columns(feature_names) + self._target: Column = super().get_column(target_name) # ------------------------------------------------------------------------------------------------------------------ # Properties From 94b0a1881272d4092e8807168baa3481ff4ab566 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 23 Jun 2023 11:12:12 +0200 Subject: [PATCH 082/149] Add type check to from_table method --- src/safeds/data/tabular/containers/_tagged_table.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 29449dd41..a933dd756 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -83,6 +83,9 @@ def _from_table( >>> table = Table({"col1": ["a", "b", "c", "a"], "col2": [1, 2, 3, 4]}) >>> tagged_table = TaggedTable._from_table(table, "col2", ["col1"]) """ + # Cast to normal Table if necessary: + if isinstance(table, TaggedTable): + table = table.to_table() if target_name not in table.column_names: raise UnknownColumnNameError([target_name]) From 6cb25c59cbaf4f74638c765837a04b27e1544ae1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 23 Jun 2023 11:21:17 +0200 Subject: [PATCH 083/149] Implement keep_only_columns --- .../data/tabular/containers/_tagged_table.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index a933dd756..92b641fab 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -313,8 +313,7 @@ def filter_rows(self, query: Callable[[Row], bool]) -> TaggedTable: """ return TaggedTable._from_table(super().filter_rows(query), target_name=self.target.name) - def keep_only_columns(self, column_names: list[str]) -> Table: - # TODO: Change return type in signature and docstring. + def keep_only_columns(self, column_names: list[str]) -> TaggedTable: """ Return a table with only the given column(s). @@ -327,7 +326,7 @@ def keep_only_columns(self, column_names: list[str]) -> Table: Returns ------- - table : Table + table : TaggedTable A table containing only the given column(s). Raises @@ -337,14 +336,9 @@ def keep_only_columns(self, column_names: list[str]) -> Table: IllegalSchemaModificationError If none of the given columns is the target column. """ - return super().keep_only_columns(column_names) - # TODO: - # Re-build TaggedTable before returning, - # throw exception if appropriate, - # investigate and fix pytest errors. - # if self.target.name not in column_names: - # raise IllegalSchemaModificationError("Must keep target column and at least one feature column.") - # return TaggedTable._from_table(super().keep_only_columns(column_names), self.target.name) + if self.target.name not in column_names: + raise IllegalSchemaModificationError("Must keep target column and at least one feature column.") + return TaggedTable._from_table(super().keep_only_columns(column_names), self.target.name) def remove_columns(self, column_names: list[str]) -> TaggedTable: """ From 636a4d0fd139e836eb2e31c86a4dcc9a6c958784 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 23 Jun 2023 13:23:48 +0200 Subject: [PATCH 084/149] Add as_table method --- src/safeds/data/tabular/containers/_table.py | 14 +++++++++++++ .../containers/_table/test_as_table.py | 21 +++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 tests/safeds/data/tabular/containers/_table/test_as_table.py diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index ad6861b4b..bfabef36c 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -612,6 +612,20 @@ def summary(self) -> Table: # Transformations # ------------------------------------------------------------------------------------------------------------------ + # This method is meant as a way to "cast" instances of subclasses of `Table` to a proper `Table`, dropping any + # additional constraints that might have to hold in the subclass. + # Override accordingly in subclasses. + def _as_table(self: Table) -> Table: + """ + Transform the table to an instance of the Table class. + + Returns + ------- + table: Table + The table, as an instance of the Table class. + """ + return self + def add_column(self, column: Column) -> Table: """ Return the original table with the provided column attached at the end. diff --git a/tests/safeds/data/tabular/containers/_table/test_as_table.py b/tests/safeds/data/tabular/containers/_table/test_as_table.py new file mode 100644 index 000000000..1cf7abd69 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/test_as_table.py @@ -0,0 +1,21 @@ +import pytest +from safeds.data.tabular.containers import Table + + +@pytest.mark.parametrize( + "table", + [ + Table( + { + "col1": [1, 2], + "col2:": [3, 4] + } + ), + Table() + ], + ids=["table", "empty"] +) +def should_return_table(table: Table) -> None: + new_table = table._as_table() + assert table.schema == new_table.schema + assert table == new_table From 0e0e8f6603cde854b78a5cd0f6bbde99fade7e51 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 23 Jun 2023 11:25:35 +0000 Subject: [PATCH 085/149] style: apply automated linter fixes --- .../tabular/containers/_table/test_as_table.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/test_as_table.py b/tests/safeds/data/tabular/containers/_table/test_as_table.py index 1cf7abd69..7f72e76e8 100644 --- a/tests/safeds/data/tabular/containers/_table/test_as_table.py +++ b/tests/safeds/data/tabular/containers/_table/test_as_table.py @@ -2,19 +2,7 @@ from safeds.data.tabular.containers import Table -@pytest.mark.parametrize( - "table", - [ - Table( - { - "col1": [1, 2], - "col2:": [3, 4] - } - ), - Table() - ], - ids=["table", "empty"] -) +@pytest.mark.parametrize("table", [Table({"col1": [1, 2], "col2:": [3, 4]}), Table()], ids=["table", "empty"]) def should_return_table(table: Table) -> None: new_table = table._as_table() assert table.schema == new_table.schema From 67f694cd2ed781d6feb4985f351cc58c77172c95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 23 Jun 2023 13:31:39 +0200 Subject: [PATCH 086/149] Rename to _as_table --- src/safeds/data/tabular/containers/_table.py | 2 ++ src/safeds/data/tabular/containers/_tagged_table.py | 12 +++--------- .../{test_to_table.py => test_as_table.py} | 2 +- 3 files changed, 6 insertions(+), 10 deletions(-) rename tests/safeds/data/tabular/containers/_table/_tagged_table/{test_to_table.py => test_as_table.py} (93%) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index bfabef36c..6ed96e449 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -619,6 +619,8 @@ def _as_table(self: Table) -> Table: """ Transform the table to an instance of the Table class. + The original table is not modified. + Returns ------- table: Table diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 92b641fab..17031dad2 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -83,9 +83,7 @@ def _from_table( >>> table = Table({"col1": ["a", "b", "c", "a"], "col2": [1, 2, 3, 4]}) >>> tagged_table = TaggedTable._from_table(table, "col2", ["col1"]) """ - # Cast to normal Table if necessary: - if isinstance(table, TaggedTable): - table = table.to_table() + table = table._as_table() if target_name not in table.column_names: raise UnknownColumnNameError([target_name]) @@ -176,10 +174,10 @@ def target(self) -> Column: return self._target # ------------------------------------------------------------------------------------------------------------------ - # Conversion back to table + # Overriden methods from Table class: # ------------------------------------------------------------------------------------------------------------------ - def to_table(self: TaggedTable) -> Table: + def _as_table(self: TaggedTable) -> Table: """ Remove the tagging from a TaggedTable. @@ -198,10 +196,6 @@ def to_table(self: TaggedTable) -> Table: """ return self.features.add_column(self.target) - # ------------------------------------------------------------------------------------------------------------------ - # Overriden methods from Table class: - # ------------------------------------------------------------------------------------------------------------------ - def add_column(self, column: Column) -> TaggedTable: """ Return the original table with the provided column attached at the end, as a feature column. diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_to_table.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_as_table.py similarity index 93% rename from tests/safeds/data/tabular/containers/_table/_tagged_table/test_to_table.py rename to tests/safeds/data/tabular/containers/_table/_tagged_table/test_as_table.py index c7309a1ac..a1e255135 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_to_table.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_as_table.py @@ -17,6 +17,6 @@ def test_should_return_table() -> None: "target": [1, 3, 2], }, ) - table = tagged_table.to_table() + table = tagged_table._as_table() assert table.schema == expected.schema assert table == expected From 813ca3f2067799cec2b1621c5e23ba41301e4b67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 23 Jun 2023 13:34:45 +0200 Subject: [PATCH 087/149] Fix call in OneHotEncoder --- src/safeds/data/tabular/transformation/_one_hot_encoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/transformation/_one_hot_encoder.py b/src/safeds/data/tabular/transformation/_one_hot_encoder.py index 1070478fe..7c4ed1f63 100644 --- a/src/safeds/data/tabular/transformation/_one_hot_encoder.py +++ b/src/safeds/data/tabular/transformation/_one_hot_encoder.py @@ -103,7 +103,7 @@ def fit(self, table: Table, column_names: list[str] | None) -> OneHotEncoder: if table.number_of_rows == 0: raise ValueError("The OneHotEncoder cannot be fitted because the table contains 0 rows") - if table.keep_only_columns(column_names).remove_columns_with_non_numerical_values().number_of_columns > 0: + if table._as_table().keep_only_columns(column_names).remove_columns_with_non_numerical_values().number_of_columns > 0: warnings.warn( ( "The columns" From cbea9c3bb8a94057d6aba483b0894ddfbc52e19c Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 23 Jun 2023 11:37:01 +0000 Subject: [PATCH 088/149] style: apply automated linter fixes --- .../data/tabular/transformation/_one_hot_encoder.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/transformation/_one_hot_encoder.py b/src/safeds/data/tabular/transformation/_one_hot_encoder.py index 7c4ed1f63..81df7d4c3 100644 --- a/src/safeds/data/tabular/transformation/_one_hot_encoder.py +++ b/src/safeds/data/tabular/transformation/_one_hot_encoder.py @@ -103,7 +103,13 @@ def fit(self, table: Table, column_names: list[str] | None) -> OneHotEncoder: if table.number_of_rows == 0: raise ValueError("The OneHotEncoder cannot be fitted because the table contains 0 rows") - if table._as_table().keep_only_columns(column_names).remove_columns_with_non_numerical_values().number_of_columns > 0: + if ( + table._as_table() + .keep_only_columns(column_names) + .remove_columns_with_non_numerical_values() + .number_of_columns + > 0 + ): warnings.warn( ( "The columns" From 0d3eae1a9036c479fc6ce6bf5f2e54f5fc8229b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 23 Jun 2023 13:37:24 +0200 Subject: [PATCH 089/149] Fix next call in OneHotEncoder --- src/safeds/data/tabular/transformation/_one_hot_encoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/transformation/_one_hot_encoder.py b/src/safeds/data/tabular/transformation/_one_hot_encoder.py index 7c4ed1f63..f39e8859b 100644 --- a/src/safeds/data/tabular/transformation/_one_hot_encoder.py +++ b/src/safeds/data/tabular/transformation/_one_hot_encoder.py @@ -271,7 +271,7 @@ def inverse_transform(self, transformed_table: Table) -> Table: if len(missing_columns) > 0: raise UnknownColumnNameError(missing_columns) - if transformed_table.keep_only_columns( + if transformed_table._as_table().keep_only_columns( _transformed_column_names, ).remove_columns_with_non_numerical_values().number_of_columns < len(_transformed_column_names): raise NonNumericColumnError( From ddeebd317189b9421e9d0ee6b89145e80d76134a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 23 Jun 2023 13:42:37 +0200 Subject: [PATCH 090/149] Fix last call in OneHotEncoder --- src/safeds/data/tabular/transformation/_one_hot_encoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/transformation/_one_hot_encoder.py b/src/safeds/data/tabular/transformation/_one_hot_encoder.py index ce7e87ab9..9a198ea0e 100644 --- a/src/safeds/data/tabular/transformation/_one_hot_encoder.py +++ b/src/safeds/data/tabular/transformation/_one_hot_encoder.py @@ -113,7 +113,7 @@ def fit(self, table: Table, column_names: list[str] | None) -> OneHotEncoder: warnings.warn( ( "The columns" - f" {table.keep_only_columns(column_names).remove_columns_with_non_numerical_values().column_names} contain" + f" {table._as_table().keep_only_columns(column_names).remove_columns_with_non_numerical_values().column_names} contain" " numerical data. The OneHotEncoder is designed to encode non-numerical values into numerical" " values" ), From 1a9ece78fd6190222b9d4ae690841e8a6690544e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 23 Jun 2023 13:51:09 +0200 Subject: [PATCH 091/149] Add generic exceptions in docstrings of superclass --- src/safeds/data/tabular/containers/_table.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 6ed96e449..1df722690 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -913,6 +913,11 @@ def remove_columns_with_missing_values(self) -> Table: ------- table : Table A table without the columns that contain missing values. + + Raises + ------ + IllegalSchemaModificationError + If removing the columns would violate an invariant in the subclass. """ return Table.from_columns([column for column in self.to_columns() if not column.has_missing_values()]) @@ -927,6 +932,10 @@ def remove_columns_with_non_numerical_values(self) -> Table: table : Table A table without the columns that contain non-numerical values. + Raises + ------ + IllegalSchemaModificationError + If removing the columns would violate an invariant in the subclass. """ return Table.from_columns([column for column in self.to_columns() if column.type.is_numeric()]) @@ -1048,6 +1057,9 @@ def replace_column(self, old_column_name: str, new_columns: list[Column]) -> Tab ColumnSizeError If the size of at least one of the new columns does not match the amount of rows. + + IllegalSchemaModificationError + If replacing the column would violate an invariant in the subclass. """ if old_column_name not in self._schema.column_names: raise UnknownColumnNameError([old_column_name]) @@ -1263,7 +1275,6 @@ def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> Tabl ------ UnknownColumnNameError If the column does not exist. - """ if self.has_column(name): items: list = [transformer(item) for item in self.to_rows()] @@ -1291,6 +1302,8 @@ def transform_table(self, transformer: TableTransformer) -> Table: ------ TransformerNotFittedError If the transformer has not been fitted yet. + IllegalSchemaModificationError + If replacing the column would violate an invariant in the subclass. Examples -------- From 256ba64a6708e764501e0b88091fc50bc6e1b84b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 23 Jun 2023 14:11:27 +0200 Subject: [PATCH 092/149] Add tests for keep_only_columns --- .../_tagged_table/test_keep_only_columns.py | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py new file mode 100644 index 000000000..1804f86cb --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py @@ -0,0 +1,62 @@ +import pytest +from safeds.data.tabular.containers import Table, TaggedTable +from safeds.exceptions import IllegalSchemaModificationError + +from tests.helpers import assert_that_tagged_tables_are_equal + + +@pytest.mark.parametrize( + ("table", "column_names", "expected"), + [ + ( + TaggedTable._from_table( + Table( + { + "feat1": [1, 2, 3], + "feat2": [4, 5, 6], + "target": [7, 8, 9], + } + ), + "target" + ), + ["feat1", "target"], + TaggedTable._from_table( + Table( + { + "feat1": [1, 2, 3], + "target": [7, 8, 9], + }, + ), + "target" + ) + ) + ], + ids=["table"], +) +def test_should_return_table(table: TaggedTable, column_names: list[str], expected: TaggedTable) -> None: + new_table = table.keep_only_columns(column_names) + assert_that_tagged_tables_are_equal(new_table, expected) + + +@pytest.mark.parametrize( + ("table", "column_names"), + [ + ( + TaggedTable._from_table( + Table( + { + "feat1": [1, 2, 3], + "feat2": [4, 5, 6], + "target": [7, 8, 9], + } + ), + "target" + ), + ["feat1", "feat2"], + ) + ], + ids=["table"], +) +def should_raise_illegal_schema_modification(table: TaggedTable, column_names: list[str]) -> None: + with pytest.raises(IllegalSchemaModificationError, match="Must keep target column and at least one feature column."): + table.keep_only_columns(column_names) From 85125d7ee975e11031fd64d7d6e4ac1e448902d1 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 23 Jun 2023 12:13:13 +0000 Subject: [PATCH 093/149] style: apply automated linter fixes --- .../_tagged_table/test_keep_only_columns.py | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py index 1804f86cb..a66726570 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py @@ -15,9 +15,9 @@ "feat1": [1, 2, 3], "feat2": [4, 5, 6], "target": [7, 8, 9], - } + }, ), - "target" + "target", ), ["feat1", "target"], TaggedTable._from_table( @@ -27,9 +27,9 @@ "target": [7, 8, 9], }, ), - "target" - ) - ) + "target", + ), + ), ], ids=["table"], ) @@ -48,15 +48,17 @@ def test_should_return_table(table: TaggedTable, column_names: list[str], expect "feat1": [1, 2, 3], "feat2": [4, 5, 6], "target": [7, 8, 9], - } + }, ), - "target" + "target", ), ["feat1", "feat2"], - ) + ), ], ids=["table"], ) def should_raise_illegal_schema_modification(table: TaggedTable, column_names: list[str]) -> None: - with pytest.raises(IllegalSchemaModificationError, match="Must keep target column and at least one feature column."): + with pytest.raises( + IllegalSchemaModificationError, match="Must keep target column and at least one feature column.", + ): table.keep_only_columns(column_names) From 871460ba1bb5464467d19ba795fc177f34a5f839 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 23 Jun 2023 12:14:50 +0000 Subject: [PATCH 094/149] style: apply automated linter fixes --- .../containers/_table/_tagged_table/test_keep_only_columns.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py index a66726570..95be30e05 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py @@ -59,6 +59,7 @@ def test_should_return_table(table: TaggedTable, column_names: list[str], expect ) def should_raise_illegal_schema_modification(table: TaggedTable, column_names: list[str]) -> None: with pytest.raises( - IllegalSchemaModificationError, match="Must keep target column and at least one feature column.", + IllegalSchemaModificationError, + match="Must keep target column and at least one feature column.", ): table.keep_only_columns(column_names) From 66bcf8e3e6744e230f8731cdbf2de2317b56eedb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 23 Jun 2023 14:29:00 +0200 Subject: [PATCH 095/149] Fix name of test function --- .../containers/_table/_tagged_table/test_keep_only_columns.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py index 95be30e05..a51790e3e 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py @@ -57,7 +57,7 @@ def test_should_return_table(table: TaggedTable, column_names: list[str], expect ], ids=["table"], ) -def should_raise_illegal_schema_modification(table: TaggedTable, column_names: list[str]) -> None: +def test_should_raise_illegal_schema_modification(table: TaggedTable, column_names: list[str]) -> None: with pytest.raises( IllegalSchemaModificationError, match="Must keep target column and at least one feature column.", From e0f758775b22ab4a33e855c8ae6502348e421bb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Fri, 23 Jun 2023 15:43:34 +0200 Subject: [PATCH 096/149] feat: Added methods `TaggedTable.add_column_as_feature` and `Taggedtable.add_columns_as_features` feat: Added specific `target_names` to each method in `TaggedTable` --- .../data/tabular/containers/_tagged_table.py | 200 ++++++------------ .../test_add_column_as_feature.py | 19 ++ .../test_add_columns_as_features.py | 23 ++ 3 files changed, 111 insertions(+), 131 deletions(-) create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 29449dd41..84ba9a5a3 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -196,10 +196,10 @@ def to_table(self: TaggedTable) -> Table: return self.features.add_column(self.target) # ------------------------------------------------------------------------------------------------------------------ - # Overriden methods from Table class: + # Specific methods from TaggedTable class: # ------------------------------------------------------------------------------------------------------------------ - def add_column(self, column: Column) -> TaggedTable: + def add_column_as_feature(self, column: Column) -> TaggedTable: """ Return the original table with the provided column attached at the end, as a feature column. @@ -214,16 +214,58 @@ def add_column(self, column: Column) -> TaggedTable: ------ DuplicateColumnNameError If the new column already exists. + ColumnSizeError + If the size of the column does not match the amount of rows. + """ + return TaggedTable._from_table(super().add_column(column), target_name=self.target.name, feature_names=self.features.column_names + [column.name]) + def add_columns_as_features(self, columns: list[Column] | Table) -> TaggedTable: + """ + Return the original table with the provided column attached at the end, as feature columns. + + This table is not modified. + + Returns + ------- + result : TaggedTable + The table with the column attached as feature columns. + + Raises + ------ + DuplicateColumnNameError + If the new column already exists. ColumnSizeError If the size of the column does not match the amount of rows. + """ + return TaggedTable._from_table(super().add_columns(columns), target_name=self.target.name, feature_names=self.features.column_names + list(map(lambda col: col.name, columns.to_columns() if isinstance(columns, Table) else columns))) + # ------------------------------------------------------------------------------------------------------------------ + # Overriden methods from Table class: + # ------------------------------------------------------------------------------------------------------------------ + + def add_column(self, column: Column) -> TaggedTable: + """ + Return the original table with the provided column attached at the end, as neither target nor feature column. + + This table is not modified. + + Returns + ------- + result : TaggedTable + The table with the column attached as neither target nor feature column. + + Raises + ------ + DuplicateColumnNameError + If the new column already exists. + ColumnSizeError + If the size of the column does not match the amount of rows. """ - return TaggedTable._from_table(super().add_column(column), target_name=self.target.name) + return TaggedTable._from_table(super().add_column(column), target_name=self.target.name, feature_names=self.features.column_names) def add_columns(self, columns: list[Column] | Table) -> TaggedTable: """ - Add multiple columns to the table, as feature columns. + Add multiple columns to the table, as neither target nor feature columns. This table is not modified. @@ -235,7 +277,7 @@ def add_columns(self, columns: list[Column] | Table) -> TaggedTable: Returns ------- result: TaggedTable - A new table combining the original table and the given columns as feature columns. + A new table combining the original table and the given columns as neither target nor feature columns. Raises ------ @@ -244,7 +286,7 @@ def add_columns(self, columns: list[Column] | Table) -> TaggedTable: DuplicateColumnNameError If at least one column name from the provided column list already exists in the table. """ - return TaggedTable._from_table(super().add_columns(columns), target_name=self.target.name) + return TaggedTable._from_table(super().add_columns(columns), target_name=self.target.name, feature_names=self.features.column_names) def add_row(self, row: Row) -> TaggedTable: """ @@ -366,10 +408,9 @@ def remove_columns(self, column_names: list[str]) -> TaggedTable: ColumnIsTargetError If any of the given columns is the target column. """ - try: - return TaggedTable._from_table(super().remove_columns(column_names), self.target.name) - except UnknownColumnNameError: - raise ColumnIsTargetError(self.target.name) from None + if self.target.name in column_names: + raise ColumnIsTargetError(self.target.name) + return TaggedTable._from_table(super().remove_columns(column_names), target_name=self.target.name, feature_names=sorted(set(self.features.column_names) - set(column_names), key={val: ix for ix, val in enumerate(self.features.column_names)}.get)) def remove_columns_with_missing_values(self) -> TaggedTable: """ @@ -388,11 +429,9 @@ def remove_columns_with_missing_values(self) -> TaggedTable: If any of the columns to be removed is the target column. """ table = super().remove_columns_with_missing_values() - try: - tagged = TaggedTable._from_table(table, self.target.name, None) - except UnknownColumnNameError: - raise ColumnIsTargetError(self.target.name) from None - return tagged + if self.target.name not in table.column_names: + raise ColumnIsTargetError(self.target.name) + return TaggedTable._from_table(table, self.target.name, feature_names=sorted(set(self.features.column_names).intersection(set(table.column_names)), key={val: ix for ix, val in enumerate(self.features.column_names)}.get)) def remove_columns_with_non_numerical_values(self) -> TaggedTable: """ @@ -411,11 +450,9 @@ def remove_columns_with_non_numerical_values(self) -> TaggedTable: If any of the columns to be removed is the target column. """ table = super().remove_columns_with_non_numerical_values() - try: - tagged = TaggedTable._from_table(table, self.target.name) - except UnknownColumnNameError: - raise ColumnIsTargetError(self.target.name) from None - return tagged + if self.target.name not in table.column_names: + raise ColumnIsTargetError(self.target.name) + return TaggedTable._from_table(table, self.target.name, feature_names=sorted(set(self.features.column_names).intersection(set(table.column_names)), key={val: ix for ix, val in enumerate(self.features.column_names)}.get)) def remove_duplicate_rows(self) -> TaggedTable: """ @@ -428,7 +465,7 @@ def remove_duplicate_rows(self) -> TaggedTable: result : TaggedTable The table with the duplicate rows removed. """ - return TaggedTable._from_table(super().remove_duplicate_rows(), self.target.name) + return TaggedTable._from_table(super().remove_duplicate_rows(), target_name=self.target.name, feature_names=self.features.column_names) def remove_rows_with_missing_values(self) -> TaggedTable: """ @@ -441,7 +478,7 @@ def remove_rows_with_missing_values(self) -> TaggedTable: table : TaggedTable A table without the rows that contain missing values. """ - return TaggedTable._from_table(super().remove_rows_with_missing_values(), self.target.name) + return TaggedTable._from_table(super().remove_rows_with_missing_values(), target_name=self.target.name, feature_names=self.features.column_names) def remove_rows_with_outliers(self) -> TaggedTable: """ @@ -458,7 +495,7 @@ def remove_rows_with_outliers(self) -> TaggedTable: new_table : TaggedTable A new table without rows containing outliers. """ - return TaggedTable._from_table(super().remove_rows_with_outliers(), self.target.name) + return TaggedTable._from_table(super().remove_rows_with_outliers(), target_name=self.target.name, feature_names=self.features.column_names) def rename_column(self, old_name: str, new_name: str) -> TaggedTable: """ @@ -487,7 +524,7 @@ def rename_column(self, old_name: str, new_name: str) -> TaggedTable: """ return TaggedTable._from_table( super().rename_column(old_name, new_name), - new_name if self.target.name == old_name else self.target.name, + target_name=new_name if self.target.name == old_name else self.target.name, feature_names=self.features.column_names if old_name not in self.features.column_names else [column_name if column_name != old_name else new_name for column_name in self.features.column_names] ) def replace_column(self, old_column_name: str, new_columns: list[Column]) -> TaggedTable: @@ -504,7 +541,6 @@ def replace_column(self, old_column_name: str, new_columns: list[Column]) -> Tag ---------- old_column_name : str The name of the column to be replaced. - new_columns : list[Column] The new columns replacing the old column. @@ -517,13 +553,10 @@ def replace_column(self, old_column_name: str, new_columns: list[Column]) -> Tag ------ UnknownColumnNameError If the old column does not exist. - DuplicateColumnNameError If the new column already exists and the existing column is not affected by the replacement. - ColumnSizeError If the size of the column does not match the amount of rows. - IllegalSchemaModificationError If the target column would be removed or replaced by more than one column. """ @@ -535,10 +568,10 @@ def replace_column(self, old_column_name: str, new_columns: list[Column]) -> Tag else: return TaggedTable._from_table( super().replace_column(old_column_name, new_columns), - new_columns[0].name, + target_name=new_columns[0].name, feature_names=self.features.column_names ) else: - return TaggedTable._from_table(super().replace_column(old_column_name, new_columns), self.target.name) + return TaggedTable._from_table(super().replace_column(old_column_name, new_columns), target_name=self.target.name, feature_names=self.features.column_names if old_column_name not in self.features.column_names else self.features.column_names[:self.features.column_names.index(old_column_name)] + list(map(lambda col: col.name, new_columns)) + self.features.column_names[self.features.column_names.index(old_column_name):]) def shuffle_rows(self) -> TaggedTable: """ @@ -552,7 +585,7 @@ def shuffle_rows(self) -> TaggedTable: The shuffled Table. """ - return TaggedTable._from_table(super().shuffle_rows(), self.target.name) + return TaggedTable._from_table(super().shuffle_rows(), target_name=self.target.name, feature_names=self.features.column_names) def slice_rows( self, @@ -584,7 +617,7 @@ def slice_rows( IndexOutOfBoundsError If the index is out of bounds. """ - return TaggedTable._from_table(super().slice_rows(start, end, step), self.target.name) + return TaggedTable._from_table(super().slice_rows(start, end, step), target_name=self.target.name, feature_names=self.features.column_names) def sort_columns( self, @@ -613,7 +646,8 @@ def sort_columns( new_table : TaggedTable A new table with sorted columns. """ - return TaggedTable._from_table(super().sort_columns(comparator), self.target.name) + sorted_table = super().sort_columns(comparator) + return TaggedTable._from_table(sorted_table, target_name=self.target.name, feature_names=sorted(set(sorted_table.column_names).intersection(self.features.column_names), key={val: ix for ix, val in enumerate(sorted_table.column_names)}.get)) def sort_rows(self, comparator: Callable[[Row, Row], int]) -> TaggedTable: """ @@ -636,7 +670,7 @@ def sort_rows(self, comparator: Callable[[Row, Row], int]) -> TaggedTable: new_table : TaggedTable A new table with sorted rows. """ - return TaggedTable._from_table(super().sort_rows(comparator), self.target.name) + return TaggedTable._from_table(super().sort_rows(comparator), target_name=self.target.name, feature_names=self.features.column_names) def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> TaggedTable: """ @@ -653,102 +687,6 @@ def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> Tagg ------ UnknownColumnNameError If the column does not exist. - """ - return TaggedTable._from_table(super().transform_column(name, transformer), self.target.name) + return TaggedTable._from_table(super().transform_column(name, transformer), target_name=self.target.name, feature_names=self.features.column_names) - def transform_table(self, transformer: TableTransformer) -> TaggedTable: - """ - Apply a learned transformation onto this table. - - This table is not modified. - - Parameters - ---------- - transformer : TableTransformer - The transformer which transforms the given table. - - Returns - ------- - transformed_table : TaggedTable - The transformed table. - - Raises - ------ - TransformerNotFittedError - If the transformer has not been fitted yet. - ColunmIsTargetError - If the transformer tries to remove or replace the target column. - - Examples - -------- - >>> from safeds.data.tabular.transformation import OneHotEncoder - >>> from safeds.data.tabular.containers import TaggedTable - >>> table = TaggedTable({"feat1": ["a", "b", "a"], "feat2": ["a", "b", "d"], "target": [1, 2, 3]},"target") - >>> table - feat1 feat2 target - 0 a a 1 - 1 b b 2 - 2 a d 3 - >>> transformer = OneHotEncoder().fit(table, table.features.column_names) - >>> table.transform_table(transformer) - feat1__a feat1__b feat2__a feat2__b feat2__d target - 0 1.0 0.0 1.0 0.0 0.0 1 - 1 0.0 1.0 0.0 1.0 0.0 2 - 2 1.0 0.0 0.0 0.0 1.0 3 - """ - try: - transformed_table = transformer.transform(self) - except ColumnIsTargetError as e: # can happen for example with OneHotEncoder - raise ColumnIsTargetError(self.target.name) from e # Re-throw for shorter stacktrace - # For future transformers, it may also happen that they remove the target column without throwing. - # If this ever happens, comment-in these lines (currently out-commented b/c of code coverage): - # if self.target.name in transformer.get_names_of_removed_columns(): - # raise ColumnIsTargetError(self.target.name) - return TaggedTable._from_table(transformed_table, self.target.name) - - def inverse_transform_table(self, transformer: InvertibleTableTransformer) -> TaggedTable: - """ - Invert the transformation applied by the given transformer. - - This table is not modified. - - Parameters - ---------- - transformer : InvertibleTableTransformer - The transformer that was used to create this table. - - Returns - ------- - table : TaggedTable - The original table. - - Raises - ------ - TransformerNotFittedError - If the transformer has not been fitted yet. - - Examples - -------- - >>> from safeds.data.tabular.transformation import OneHotEncoder - >>> from safeds.data.tabular.containers import TaggedTable - >>> table = TaggedTable({"feat1": ["a", "b", "a"], "feat2": ["a", "b", "d"], "target": [1, 2, 3]}, "target") - >>> table - feat1 feat2 target - 0 a a 1 - 1 b b 2 - 2 a d 3 - >>> transformer = OneHotEncoder().fit(table, table.features.column_names) - >>> transformed_table = table.transform_table(transformer) - >>> transformed_table - feat1__a feat1__b feat2__a feat2__b feat2__d target - 0 1.0 0.0 1.0 0.0 0.0 1 - 1 0.0 1.0 0.0 1.0 0.0 2 - 2 1.0 0.0 0.0 0.0 1.0 3 - >>> transformed_table.inverse_transform_table(transformer) - feat1 feat2 target - 0 a a 1 - 1 b b 2 - 2 a d 3 - """ - return TaggedTable._from_table(transformer.inverse_transform(self), self.target.name) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py new file mode 100644 index 000000000..22f650666 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py @@ -0,0 +1,19 @@ +import pytest + +from safeds.data.tabular.containers import TaggedTable, Column, Table + + +@pytest.mark.parametrize( + ("tagged_table", "column", "tagged_table_with_new_column"), + [( + Table({"f1": [1, 2], "target": [2, 3]}).tag_columns(target_name="target", feature_names=["f1"]), + Column("f2", [4, 5]), + Table({"f1": [1, 2], "target": [2, 3], "f2": [4, 5]}).tag_columns(target_name="target", feature_names=["f1", "f2"]) + ),( + Table({"f1": [1, 2], "target": [2, 3], "other": [0, -1]}).tag_columns(target_name="target", feature_names=["f1"]), + Column("f2", [4, 5]), + Table({"f1": [1, 2], "target": [2, 3], "other": [0, -1], "f2": [4, 5]}).tag_columns(target_name="target", feature_names=["f1", "f2"]) + )], ids=["new column as feature", "table contains a non feature/target column"] +) +def test_add_column_as_feature(tagged_table: TaggedTable, column: Column, tagged_table_with_new_column) -> None: + assert tagged_table.add_column_as_feature(column) == tagged_table_with_new_column diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py new file mode 100644 index 000000000..5880207cc --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py @@ -0,0 +1,23 @@ +import pytest + +from safeds.data.tabular.containers import TaggedTable, Column, Table + + +@pytest.mark.parametrize( + ("tagged_table", "columns", "tagged_table_with_new_columns"), + [( + Table({"f1": [1, 2], "target": [2, 3]}).tag_columns(target_name="target", feature_names=["f1"]), + [Column("f2", [4, 5]), Column("f3", [6, 7])], + Table({"f1": [1, 2], "target": [2, 3], "f2": [4, 5], "f3": [6, 7]}).tag_columns(target_name="target", feature_names=["f1", "f2", "f3"]) + ),( + Table({"f1": [1, 2], "target": [2, 3]}).tag_columns(target_name="target", feature_names=["f1"]), + Table.from_columns([Column("f2", [4, 5]), Column("f3", [6, 7])]), + Table({"f1": [1, 2], "target": [2, 3], "f2": [4, 5], "f3": [6, 7]}).tag_columns(target_name="target", feature_names=["f1", "f2", "f3"]) + ),( + Table({"f1": [1, 2], "target": [2, 3], "other": [0, -1]}).tag_columns(target_name="target", feature_names=["f1"]), + Table.from_columns([Column("f2", [4, 5]), Column("f3", [6, 7])]), + Table({"f1": [1, 2], "target": [2, 3], "other": [0, -1], "f2": [4, 5], "f3": [6, 7]}).tag_columns(target_name="target", feature_names=["f1", "f2", "f3"]) + )], ids=["new columns as feature", "table added as features", "table contains a non feature/target column"] +) +def test_add_columns_as_features(tagged_table: TaggedTable, columns: list[Column] | Table, tagged_table_with_new_columns) -> None: + assert tagged_table.add_columns_as_features(columns) == tagged_table_with_new_columns From 8e25c86a3ce6a6dc49f509d1134e18354470d5d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Fri, 23 Jun 2023 16:03:59 +0200 Subject: [PATCH 097/149] test: Corrected tests after merge --- .../data/tabular/containers/_tagged_table.py | 2 +- .../_table/_tagged_table/test_add_column.py | 5 +-- .../_table/_tagged_table/test_add_columns.py | 9 ++--- .../test_inverse_transform_table.py | 24 ------------- .../_tagged_table/test_transform_table.py | 35 ------------------- 5 files changed, 9 insertions(+), 66 deletions(-) delete mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_inverse_transform_table.py delete mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_table.py diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 9cb84cbd7..f8ef1c9d9 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -563,7 +563,7 @@ def replace_column(self, old_column_name: str, new_columns: list[Column]) -> Tag target_name=new_columns[0].name, feature_names=self.features.column_names ) else: - return TaggedTable._from_table(super().replace_column(old_column_name, new_columns), target_name=self.target.name, feature_names=self.features.column_names if old_column_name not in self.features.column_names else self.features.column_names[:self.features.column_names.index(old_column_name)] + list(map(lambda col: col.name, new_columns)) + self.features.column_names[self.features.column_names.index(old_column_name):]) + return TaggedTable._from_table(super().replace_column(old_column_name, new_columns), target_name=self.target.name, feature_names=self.features.column_names if old_column_name not in self.features.column_names else self.features.column_names[:self.features.column_names.index(old_column_name)] + list(map(lambda col: col.name, new_columns)) + self.features.column_names[self.features.column_names.index(old_column_name) + 1:]) def shuffle_rows(self) -> TaggedTable: """ diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py index 497a87723..f02e68456 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py @@ -11,14 +11,15 @@ def test_should_add_column() -> None: }, "target", ) - col = Column("feature_2", [6, 7, 8]) + col = Column("other", [6, 7, 8]) new_table = table.add_column(col) expected = TaggedTable( { "feature_1": [0, 1, 2], "target": [3, 4, 5], - "feature_2": [6, 7, 8], + "other": [6, 7, 8], }, "target", + ["feature_1"] ) assert_that_tagged_tables_are_equal(new_table, expected) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py index b36d87e5e..351e17788 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py @@ -12,17 +12,18 @@ def test_should_add_columns() -> None: "target", ) cols = [ - Column("feature_2", [6, 7, 8]), - Column("feature_3", [9, 6, 3]), + Column("other", [6, 7, 8]), + Column("other2", [9, 6, 3]), ] new_table = table.add_columns(cols) expected = TaggedTable( { "feature_1": [0, 1, 2], "target": [3, 4, 5], - "feature_2": [6, 7, 8], - "feature_3": [9, 6, 3], + "other": [6, 7, 8], + "other2": [9, 6, 3], }, "target", + ["feature_1"] ) assert_that_tagged_tables_are_equal(new_table, expected) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_inverse_transform_table.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_inverse_transform_table.py deleted file mode 100644 index ca7197f16..000000000 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_inverse_transform_table.py +++ /dev/null @@ -1,24 +0,0 @@ -from safeds.data.tabular.containers import TaggedTable -from safeds.data.tabular.transformation import OneHotEncoder - -from tests.helpers import assert_that_tagged_tables_are_equal - - -def test_should_inverse_transform_table() -> None: - table = TaggedTable({"feat1": ["a", "b", "a"], "feat2": ["a", "b", "d"], "target": [1, 2, 3]}, "target") - transformer = OneHotEncoder().fit(table, table.features.column_names) - transformed_table = table.transform_table(transformer) - expected_transformed = TaggedTable( - { - "feat1__a": [1.0, 0.0, 1.0], - "feat1__b": [0.0, 1.0, 0.0], - "feat2__a": [1.0, 0.0, 0.0], - "feat2__b": [0.0, 1.0, 0.0], - "feat2__d": [0.0, 0.0, 1.0], - "target": [1, 2, 3], - }, - "target", - ) - assert_that_tagged_tables_are_equal(transformed_table, expected_transformed) - inverse_transformed_table = transformed_table.inverse_transform_table(transformer) - assert_that_tagged_tables_are_equal(inverse_transformed_table, table) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_table.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_table.py deleted file mode 100644 index 189334c63..000000000 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_table.py +++ /dev/null @@ -1,35 +0,0 @@ -import pytest -from safeds.data.tabular.containers import TaggedTable -from safeds.data.tabular.transformation import OneHotEncoder -from safeds.exceptions import ColumnIsTargetError - -from tests.helpers import assert_that_tagged_tables_are_equal - - -def test_should_transform_table() -> None: - table = TaggedTable({"feat1": ["a", "b", "a"], "feat2": ["a", "b", "d"], "target": [1, 2, 3]}, "target") - transformer = OneHotEncoder().fit(table, table.features.column_names) - transformed_table = table.transform_table(transformer) - expected = TaggedTable( - { - "feat1__a": [1.0, 0.0, 1.0], - "feat1__b": [0.0, 1.0, 0.0], - "feat2__a": [1.0, 0.0, 0.0], - "feat2__b": [0.0, 1.0, 0.0], - "feat2__d": [0.0, 0.0, 1.0], - "target": [1, 2, 3], - }, - "target", - ) - assert_that_tagged_tables_are_equal(transformed_table, expected) - - -def test_should_raise_column_is_target() -> None: - table = TaggedTable({"feat1": ["a", "b", "a"], "feat2": ["a", "b", "d"], "target": [1, 2, 3]}, "target") - transformer = OneHotEncoder().fit(table, None) - # Passing None means all columns get one-hot-encoded, i.e. also the target column! - with pytest.raises( - ColumnIsTargetError, - match='Illegal schema modification: Column "target" is the target column and cannot be removed.', - ): - table.transform_table(transformer) From 1ca61c76093de6483414dd689c66a9d726c9fca3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 23 Jun 2023 16:08:17 +0200 Subject: [PATCH 098/149] Apply suggestions from code review Co-authored-by: Alexander <47296670+Marsmaennchen221@users.noreply.github.com> --- src/safeds/data/tabular/containers/_tagged_table.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index f8ef1c9d9..7970caa40 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -230,12 +230,12 @@ def _as_table(self: TaggedTable) -> Table: Parameters ---------- self: TaggedTable - The TaggedTable. + The TaggedTable. Returns ------- table: Table - The table as an untagged Table, i.e. without the information about which columns are features or target. + The table as an untagged Table, i.e. without the information about which columns are features or target. """ return self.features.add_column(self.target) From b29be631c9a02b4ceeac0c6b7d4767c943d47ca3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Fri, 23 Jun 2023 16:18:40 +0200 Subject: [PATCH 099/149] refactor: Added changes for linters --- src/safeds/data/tabular/containers/_tagged_table.py | 8 +++----- .../_table/_tagged_table/test_add_column_as_feature.py | 2 +- .../_table/_tagged_table/test_add_columns_as_features.py | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 7970caa40..144d24e3e 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -9,8 +9,6 @@ from collections.abc import Callable, Mapping, Sequence from typing import Any - from safeds.data.tabular.transformation import InvertibleTableTransformer, TableTransformer - class TaggedTable(Table): """ @@ -195,7 +193,7 @@ def add_column_as_feature(self, column: Column) -> TaggedTable: ColumnSizeError If the size of the column does not match the amount of rows. """ - return TaggedTable._from_table(super().add_column(column), target_name=self.target.name, feature_names=self.features.column_names + [column.name]) + return TaggedTable._from_table(super().add_column(column), target_name=self.target.name, feature_names=[*self.features.column_names, column.name]) def add_columns_as_features(self, columns: list[Column] | Table) -> TaggedTable: """ @@ -215,7 +213,7 @@ def add_columns_as_features(self, columns: list[Column] | Table) -> TaggedTable: ColumnSizeError If the size of the column does not match the amount of rows. """ - return TaggedTable._from_table(super().add_columns(columns), target_name=self.target.name, feature_names=self.features.column_names + list(map(lambda col: col.name, columns.to_columns() if isinstance(columns, Table) else columns))) + return TaggedTable._from_table(super().add_columns(columns), target_name=self.target.name, feature_names=self.features.column_names + [col.name for col in (columns.to_columns() if isinstance(columns, Table) else columns)]) # ------------------------------------------------------------------------------------------------------------------ # Overriden methods from Table class: @@ -563,7 +561,7 @@ def replace_column(self, old_column_name: str, new_columns: list[Column]) -> Tag target_name=new_columns[0].name, feature_names=self.features.column_names ) else: - return TaggedTable._from_table(super().replace_column(old_column_name, new_columns), target_name=self.target.name, feature_names=self.features.column_names if old_column_name not in self.features.column_names else self.features.column_names[:self.features.column_names.index(old_column_name)] + list(map(lambda col: col.name, new_columns)) + self.features.column_names[self.features.column_names.index(old_column_name) + 1:]) + return TaggedTable._from_table(super().replace_column(old_column_name, new_columns), target_name=self.target.name, feature_names=self.features.column_names if old_column_name not in self.features.column_names else self.features.column_names[:self.features.column_names.index(old_column_name)] + [col.name for col in new_columns] + self.features.column_names[self.features.column_names.index(old_column_name) + 1:]) def shuffle_rows(self) -> TaggedTable: """ diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py index 22f650666..a18d6f78e 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py @@ -15,5 +15,5 @@ Table({"f1": [1, 2], "target": [2, 3], "other": [0, -1], "f2": [4, 5]}).tag_columns(target_name="target", feature_names=["f1", "f2"]) )], ids=["new column as feature", "table contains a non feature/target column"] ) -def test_add_column_as_feature(tagged_table: TaggedTable, column: Column, tagged_table_with_new_column) -> None: +def test_add_column_as_feature(tagged_table: TaggedTable, column: Column, tagged_table_with_new_column: TaggedTable) -> None: assert tagged_table.add_column_as_feature(column) == tagged_table_with_new_column diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py index 5880207cc..15adc68c8 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py @@ -19,5 +19,5 @@ Table({"f1": [1, 2], "target": [2, 3], "other": [0, -1], "f2": [4, 5], "f3": [6, 7]}).tag_columns(target_name="target", feature_names=["f1", "f2", "f3"]) )], ids=["new columns as feature", "table added as features", "table contains a non feature/target column"] ) -def test_add_columns_as_features(tagged_table: TaggedTable, columns: list[Column] | Table, tagged_table_with_new_columns) -> None: +def test_add_columns_as_features(tagged_table: TaggedTable, columns: list[Column] | Table, tagged_table_with_new_columns: TaggedTable) -> None: assert tagged_table.add_columns_as_features(columns) == tagged_table_with_new_columns From 8d335a31d7853cc93f0c54de7971bd3bc8ba9361 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Fri, 23 Jun 2023 16:23:56 +0200 Subject: [PATCH 100/149] refactor: Just testing if the linter will accept this --- src/safeds/data/tabular/containers/_tagged_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 144d24e3e..f6b96a1fb 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -637,7 +637,7 @@ def sort_columns( A new table with sorted columns. """ sorted_table = super().sort_columns(comparator) - return TaggedTable._from_table(sorted_table, target_name=self.target.name, feature_names=sorted(set(sorted_table.column_names).intersection(self.features.column_names), key={val: ix for ix, val in enumerate(sorted_table.column_names)}.get)) + return TaggedTable._from_table(sorted_table, target_name=self.target.name, feature_names=sorted(set(sorted_table.column_names).intersection(self.features.column_names), key={val: ix for ix, val in enumerate(sorted_table.column_names)}.__getitem__)) def sort_rows(self, comparator: Callable[[Row, Row], int]) -> TaggedTable: """ From 65e0e722f5f369cae573e08c1a469b9d0896ccaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Fri, 23 Jun 2023 16:28:39 +0200 Subject: [PATCH 101/149] refactor: The linter should be happy now --- src/safeds/data/tabular/containers/_tagged_table.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index f6b96a1fb..2a99fae70 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -373,7 +373,7 @@ def keep_only_columns(self, column_names: list[str]) -> TaggedTable: if self.target.name not in column_names: raise IllegalSchemaModificationError("Must keep target column and at least one feature column.") table = super().keep_only_columns(column_names) - return TaggedTable._from_table(table, target_name=self.target.name, feature_names=sorted(set(self.features.column_names).intersection(set(table.column_names)), key={val: ix for ix, val in enumerate(self.features.column_names)}.get)) + return TaggedTable._from_table(table, target_name=self.target.name, feature_names=sorted(set(self.features.column_names).intersection(set(table.column_names)), key={val: ix for ix, val in enumerate(self.features.column_names)}.__getitem__)) def remove_columns(self, column_names: list[str]) -> TaggedTable: """ @@ -400,7 +400,7 @@ def remove_columns(self, column_names: list[str]) -> TaggedTable: """ if self.target.name in column_names: raise ColumnIsTargetError(self.target.name) - return TaggedTable._from_table(super().remove_columns(column_names), target_name=self.target.name, feature_names=sorted(set(self.features.column_names) - set(column_names), key={val: ix for ix, val in enumerate(self.features.column_names)}.get)) + return TaggedTable._from_table(super().remove_columns(column_names), target_name=self.target.name, feature_names=sorted(set(self.features.column_names) - set(column_names), key={val: ix for ix, val in enumerate(self.features.column_names)}.__getitem__)) def remove_columns_with_missing_values(self) -> TaggedTable: """ @@ -421,7 +421,7 @@ def remove_columns_with_missing_values(self) -> TaggedTable: table = super().remove_columns_with_missing_values() if self.target.name not in table.column_names: raise ColumnIsTargetError(self.target.name) - return TaggedTable._from_table(table, self.target.name, feature_names=sorted(set(self.features.column_names).intersection(set(table.column_names)), key={val: ix for ix, val in enumerate(self.features.column_names)}.get)) + return TaggedTable._from_table(table, self.target.name, feature_names=sorted(set(self.features.column_names).intersection(set(table.column_names)), key={val: ix for ix, val in enumerate(self.features.column_names)}.__getitem__)) def remove_columns_with_non_numerical_values(self) -> TaggedTable: """ @@ -442,7 +442,7 @@ def remove_columns_with_non_numerical_values(self) -> TaggedTable: table = super().remove_columns_with_non_numerical_values() if self.target.name not in table.column_names: raise ColumnIsTargetError(self.target.name) - return TaggedTable._from_table(table, self.target.name, feature_names=sorted(set(self.features.column_names).intersection(set(table.column_names)), key={val: ix for ix, val in enumerate(self.features.column_names)}.get)) + return TaggedTable._from_table(table, self.target.name, feature_names=sorted(set(self.features.column_names).intersection(set(table.column_names)), key={val: ix for ix, val in enumerate(self.features.column_names)}.__getitem__)) def remove_duplicate_rows(self) -> TaggedTable: """ From bf42687a0f83ff4b1a79a7b37469a69cde468315 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 23 Jun 2023 14:30:31 +0000 Subject: [PATCH 102/149] style: apply automated linter fixes --- .../data/tabular/containers/_tagged_table.py | 123 +++++++++++++++--- .../_table/_tagged_table/test_add_column.py | 2 +- .../test_add_column_as_feature.py | 35 +++-- .../_table/_tagged_table/test_add_columns.py | 2 +- .../test_add_columns_as_features.py | 46 ++++--- 5 files changed, 158 insertions(+), 50 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 2a99fae70..ef83121f5 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -193,7 +193,11 @@ def add_column_as_feature(self, column: Column) -> TaggedTable: ColumnSizeError If the size of the column does not match the amount of rows. """ - return TaggedTable._from_table(super().add_column(column), target_name=self.target.name, feature_names=[*self.features.column_names, column.name]) + return TaggedTable._from_table( + super().add_column(column), + target_name=self.target.name, + feature_names=[*self.features.column_names, column.name], + ) def add_columns_as_features(self, columns: list[Column] | Table) -> TaggedTable: """ @@ -213,7 +217,12 @@ def add_columns_as_features(self, columns: list[Column] | Table) -> TaggedTable: ColumnSizeError If the size of the column does not match the amount of rows. """ - return TaggedTable._from_table(super().add_columns(columns), target_name=self.target.name, feature_names=self.features.column_names + [col.name for col in (columns.to_columns() if isinstance(columns, Table) else columns)]) + return TaggedTable._from_table( + super().add_columns(columns), + target_name=self.target.name, + feature_names=self.features.column_names + + [col.name for col in (columns.to_columns() if isinstance(columns, Table) else columns)], + ) # ------------------------------------------------------------------------------------------------------------------ # Overriden methods from Table class: @@ -256,7 +265,9 @@ def add_column(self, column: Column) -> TaggedTable: ColumnSizeError If the size of the column does not match the amount of rows. """ - return TaggedTable._from_table(super().add_column(column), target_name=self.target.name, feature_names=self.features.column_names) + return TaggedTable._from_table( + super().add_column(column), target_name=self.target.name, feature_names=self.features.column_names, + ) def add_columns(self, columns: list[Column] | Table) -> TaggedTable: """ @@ -281,7 +292,9 @@ def add_columns(self, columns: list[Column] | Table) -> TaggedTable: DuplicateColumnNameError If at least one column name from the provided column list already exists in the table. """ - return TaggedTable._from_table(super().add_columns(columns), target_name=self.target.name, feature_names=self.features.column_names) + return TaggedTable._from_table( + super().add_columns(columns), target_name=self.target.name, feature_names=self.features.column_names, + ) def add_row(self, row: Row) -> TaggedTable: """ @@ -373,7 +386,14 @@ def keep_only_columns(self, column_names: list[str]) -> TaggedTable: if self.target.name not in column_names: raise IllegalSchemaModificationError("Must keep target column and at least one feature column.") table = super().keep_only_columns(column_names) - return TaggedTable._from_table(table, target_name=self.target.name, feature_names=sorted(set(self.features.column_names).intersection(set(table.column_names)), key={val: ix for ix, val in enumerate(self.features.column_names)}.__getitem__)) + return TaggedTable._from_table( + table, + target_name=self.target.name, + feature_names=sorted( + set(self.features.column_names).intersection(set(table.column_names)), + key={val: ix for ix, val in enumerate(self.features.column_names)}.__getitem__, + ), + ) def remove_columns(self, column_names: list[str]) -> TaggedTable: """ @@ -400,7 +420,14 @@ def remove_columns(self, column_names: list[str]) -> TaggedTable: """ if self.target.name in column_names: raise ColumnIsTargetError(self.target.name) - return TaggedTable._from_table(super().remove_columns(column_names), target_name=self.target.name, feature_names=sorted(set(self.features.column_names) - set(column_names), key={val: ix for ix, val in enumerate(self.features.column_names)}.__getitem__)) + return TaggedTable._from_table( + super().remove_columns(column_names), + target_name=self.target.name, + feature_names=sorted( + set(self.features.column_names) - set(column_names), + key={val: ix for ix, val in enumerate(self.features.column_names)}.__getitem__, + ), + ) def remove_columns_with_missing_values(self) -> TaggedTable: """ @@ -421,7 +448,14 @@ def remove_columns_with_missing_values(self) -> TaggedTable: table = super().remove_columns_with_missing_values() if self.target.name not in table.column_names: raise ColumnIsTargetError(self.target.name) - return TaggedTable._from_table(table, self.target.name, feature_names=sorted(set(self.features.column_names).intersection(set(table.column_names)), key={val: ix for ix, val in enumerate(self.features.column_names)}.__getitem__)) + return TaggedTable._from_table( + table, + self.target.name, + feature_names=sorted( + set(self.features.column_names).intersection(set(table.column_names)), + key={val: ix for ix, val in enumerate(self.features.column_names)}.__getitem__, + ), + ) def remove_columns_with_non_numerical_values(self) -> TaggedTable: """ @@ -442,7 +476,14 @@ def remove_columns_with_non_numerical_values(self) -> TaggedTable: table = super().remove_columns_with_non_numerical_values() if self.target.name not in table.column_names: raise ColumnIsTargetError(self.target.name) - return TaggedTable._from_table(table, self.target.name, feature_names=sorted(set(self.features.column_names).intersection(set(table.column_names)), key={val: ix for ix, val in enumerate(self.features.column_names)}.__getitem__)) + return TaggedTable._from_table( + table, + self.target.name, + feature_names=sorted( + set(self.features.column_names).intersection(set(table.column_names)), + key={val: ix for ix, val in enumerate(self.features.column_names)}.__getitem__, + ), + ) def remove_duplicate_rows(self) -> TaggedTable: """ @@ -455,7 +496,9 @@ def remove_duplicate_rows(self) -> TaggedTable: result : TaggedTable The table with the duplicate rows removed. """ - return TaggedTable._from_table(super().remove_duplicate_rows(), target_name=self.target.name, feature_names=self.features.column_names) + return TaggedTable._from_table( + super().remove_duplicate_rows(), target_name=self.target.name, feature_names=self.features.column_names, + ) def remove_rows_with_missing_values(self) -> TaggedTable: """ @@ -468,7 +511,11 @@ def remove_rows_with_missing_values(self) -> TaggedTable: table : TaggedTable A table without the rows that contain missing values. """ - return TaggedTable._from_table(super().remove_rows_with_missing_values(), target_name=self.target.name, feature_names=self.features.column_names) + return TaggedTable._from_table( + super().remove_rows_with_missing_values(), + target_name=self.target.name, + feature_names=self.features.column_names, + ) def remove_rows_with_outliers(self) -> TaggedTable: """ @@ -485,7 +532,9 @@ def remove_rows_with_outliers(self) -> TaggedTable: new_table : TaggedTable A new table without rows containing outliers. """ - return TaggedTable._from_table(super().remove_rows_with_outliers(), target_name=self.target.name, feature_names=self.features.column_names) + return TaggedTable._from_table( + super().remove_rows_with_outliers(), target_name=self.target.name, feature_names=self.features.column_names, + ) def rename_column(self, old_name: str, new_name: str) -> TaggedTable: """ @@ -514,7 +563,14 @@ def rename_column(self, old_name: str, new_name: str) -> TaggedTable: """ return TaggedTable._from_table( super().rename_column(old_name, new_name), - target_name=new_name if self.target.name == old_name else self.target.name, feature_names=self.features.column_names if old_name not in self.features.column_names else [column_name if column_name != old_name else new_name for column_name in self.features.column_names] + target_name=new_name if self.target.name == old_name else self.target.name, + feature_names=( + self.features.column_names + if old_name not in self.features.column_names + else [ + column_name if column_name != old_name else new_name for column_name in self.features.column_names + ] + ), ) def replace_column(self, old_column_name: str, new_columns: list[Column]) -> TaggedTable: @@ -558,10 +614,21 @@ def replace_column(self, old_column_name: str, new_columns: list[Column]) -> Tag else: return TaggedTable._from_table( super().replace_column(old_column_name, new_columns), - target_name=new_columns[0].name, feature_names=self.features.column_names + target_name=new_columns[0].name, + feature_names=self.features.column_names, ) else: - return TaggedTable._from_table(super().replace_column(old_column_name, new_columns), target_name=self.target.name, feature_names=self.features.column_names if old_column_name not in self.features.column_names else self.features.column_names[:self.features.column_names.index(old_column_name)] + [col.name for col in new_columns] + self.features.column_names[self.features.column_names.index(old_column_name) + 1:]) + return TaggedTable._from_table( + super().replace_column(old_column_name, new_columns), + target_name=self.target.name, + feature_names=( + self.features.column_names + if old_column_name not in self.features.column_names + else self.features.column_names[: self.features.column_names.index(old_column_name)] + + [col.name for col in new_columns] + + self.features.column_names[self.features.column_names.index(old_column_name) + 1 :] + ), + ) def shuffle_rows(self) -> TaggedTable: """ @@ -575,7 +642,9 @@ def shuffle_rows(self) -> TaggedTable: The shuffled Table. """ - return TaggedTable._from_table(super().shuffle_rows(), target_name=self.target.name, feature_names=self.features.column_names) + return TaggedTable._from_table( + super().shuffle_rows(), target_name=self.target.name, feature_names=self.features.column_names, + ) def slice_rows( self, @@ -607,7 +676,9 @@ def slice_rows( IndexOutOfBoundsError If the index is out of bounds. """ - return TaggedTable._from_table(super().slice_rows(start, end, step), target_name=self.target.name, feature_names=self.features.column_names) + return TaggedTable._from_table( + super().slice_rows(start, end, step), target_name=self.target.name, feature_names=self.features.column_names, + ) def sort_columns( self, @@ -637,7 +708,14 @@ def sort_columns( A new table with sorted columns. """ sorted_table = super().sort_columns(comparator) - return TaggedTable._from_table(sorted_table, target_name=self.target.name, feature_names=sorted(set(sorted_table.column_names).intersection(self.features.column_names), key={val: ix for ix, val in enumerate(sorted_table.column_names)}.__getitem__)) + return TaggedTable._from_table( + sorted_table, + target_name=self.target.name, + feature_names=sorted( + set(sorted_table.column_names).intersection(self.features.column_names), + key={val: ix for ix, val in enumerate(sorted_table.column_names)}.__getitem__, + ), + ) def sort_rows(self, comparator: Callable[[Row, Row], int]) -> TaggedTable: """ @@ -660,7 +738,9 @@ def sort_rows(self, comparator: Callable[[Row, Row], int]) -> TaggedTable: new_table : TaggedTable A new table with sorted rows. """ - return TaggedTable._from_table(super().sort_rows(comparator), target_name=self.target.name, feature_names=self.features.column_names) + return TaggedTable._from_table( + super().sort_rows(comparator), target_name=self.target.name, feature_names=self.features.column_names, + ) def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> TaggedTable: """ @@ -678,5 +758,8 @@ def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> Tagg UnknownColumnNameError If the column does not exist. """ - return TaggedTable._from_table(super().transform_column(name, transformer), target_name=self.target.name, feature_names=self.features.column_names) - + return TaggedTable._from_table( + super().transform_column(name, transformer), + target_name=self.target.name, + feature_names=self.features.column_names, + ) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py index f02e68456..ef5382f47 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py @@ -20,6 +20,6 @@ def test_should_add_column() -> None: "other": [6, 7, 8], }, "target", - ["feature_1"] + ["feature_1"], ) assert_that_tagged_tables_are_equal(new_table, expected) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py index a18d6f78e..b9a20fd56 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py @@ -1,19 +1,30 @@ import pytest - -from safeds.data.tabular.containers import TaggedTable, Column, Table +from safeds.data.tabular.containers import Column, Table, TaggedTable @pytest.mark.parametrize( ("tagged_table", "column", "tagged_table_with_new_column"), - [( - Table({"f1": [1, 2], "target": [2, 3]}).tag_columns(target_name="target", feature_names=["f1"]), - Column("f2", [4, 5]), - Table({"f1": [1, 2], "target": [2, 3], "f2": [4, 5]}).tag_columns(target_name="target", feature_names=["f1", "f2"]) - ),( - Table({"f1": [1, 2], "target": [2, 3], "other": [0, -1]}).tag_columns(target_name="target", feature_names=["f1"]), - Column("f2", [4, 5]), - Table({"f1": [1, 2], "target": [2, 3], "other": [0, -1], "f2": [4, 5]}).tag_columns(target_name="target", feature_names=["f1", "f2"]) - )], ids=["new column as feature", "table contains a non feature/target column"] + [ + ( + Table({"f1": [1, 2], "target": [2, 3]}).tag_columns(target_name="target", feature_names=["f1"]), + Column("f2", [4, 5]), + Table({"f1": [1, 2], "target": [2, 3], "f2": [4, 5]}).tag_columns( + target_name="target", feature_names=["f1", "f2"], + ), + ), + ( + Table({"f1": [1, 2], "target": [2, 3], "other": [0, -1]}).tag_columns( + target_name="target", feature_names=["f1"], + ), + Column("f2", [4, 5]), + Table({"f1": [1, 2], "target": [2, 3], "other": [0, -1], "f2": [4, 5]}).tag_columns( + target_name="target", feature_names=["f1", "f2"], + ), + ), + ], + ids=["new column as feature", "table contains a non feature/target column"], ) -def test_add_column_as_feature(tagged_table: TaggedTable, column: Column, tagged_table_with_new_column: TaggedTable) -> None: +def test_add_column_as_feature( + tagged_table: TaggedTable, column: Column, tagged_table_with_new_column: TaggedTable, +) -> None: assert tagged_table.add_column_as_feature(column) == tagged_table_with_new_column diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py index 351e17788..cb513c832 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py @@ -24,6 +24,6 @@ def test_should_add_columns() -> None: "other2": [9, 6, 3], }, "target", - ["feature_1"] + ["feature_1"], ) assert_that_tagged_tables_are_equal(new_table, expected) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py index 15adc68c8..725b2a955 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py @@ -1,23 +1,37 @@ import pytest - -from safeds.data.tabular.containers import TaggedTable, Column, Table +from safeds.data.tabular.containers import Column, Table, TaggedTable @pytest.mark.parametrize( ("tagged_table", "columns", "tagged_table_with_new_columns"), - [( - Table({"f1": [1, 2], "target": [2, 3]}).tag_columns(target_name="target", feature_names=["f1"]), - [Column("f2", [4, 5]), Column("f3", [6, 7])], - Table({"f1": [1, 2], "target": [2, 3], "f2": [4, 5], "f3": [6, 7]}).tag_columns(target_name="target", feature_names=["f1", "f2", "f3"]) - ),( - Table({"f1": [1, 2], "target": [2, 3]}).tag_columns(target_name="target", feature_names=["f1"]), - Table.from_columns([Column("f2", [4, 5]), Column("f3", [6, 7])]), - Table({"f1": [1, 2], "target": [2, 3], "f2": [4, 5], "f3": [6, 7]}).tag_columns(target_name="target", feature_names=["f1", "f2", "f3"]) - ),( - Table({"f1": [1, 2], "target": [2, 3], "other": [0, -1]}).tag_columns(target_name="target", feature_names=["f1"]), - Table.from_columns([Column("f2", [4, 5]), Column("f3", [6, 7])]), - Table({"f1": [1, 2], "target": [2, 3], "other": [0, -1], "f2": [4, 5], "f3": [6, 7]}).tag_columns(target_name="target", feature_names=["f1", "f2", "f3"]) - )], ids=["new columns as feature", "table added as features", "table contains a non feature/target column"] + [ + ( + Table({"f1": [1, 2], "target": [2, 3]}).tag_columns(target_name="target", feature_names=["f1"]), + [Column("f2", [4, 5]), Column("f3", [6, 7])], + Table({"f1": [1, 2], "target": [2, 3], "f2": [4, 5], "f3": [6, 7]}).tag_columns( + target_name="target", feature_names=["f1", "f2", "f3"], + ), + ), + ( + Table({"f1": [1, 2], "target": [2, 3]}).tag_columns(target_name="target", feature_names=["f1"]), + Table.from_columns([Column("f2", [4, 5]), Column("f3", [6, 7])]), + Table({"f1": [1, 2], "target": [2, 3], "f2": [4, 5], "f3": [6, 7]}).tag_columns( + target_name="target", feature_names=["f1", "f2", "f3"], + ), + ), + ( + Table({"f1": [1, 2], "target": [2, 3], "other": [0, -1]}).tag_columns( + target_name="target", feature_names=["f1"], + ), + Table.from_columns([Column("f2", [4, 5]), Column("f3", [6, 7])]), + Table({"f1": [1, 2], "target": [2, 3], "other": [0, -1], "f2": [4, 5], "f3": [6, 7]}).tag_columns( + target_name="target", feature_names=["f1", "f2", "f3"], + ), + ), + ], + ids=["new columns as feature", "table added as features", "table contains a non feature/target column"], ) -def test_add_columns_as_features(tagged_table: TaggedTable, columns: list[Column] | Table, tagged_table_with_new_columns: TaggedTable) -> None: +def test_add_columns_as_features( + tagged_table: TaggedTable, columns: list[Column] | Table, tagged_table_with_new_columns: TaggedTable, +) -> None: assert tagged_table.add_columns_as_features(columns) == tagged_table_with_new_columns From da93e494be0f89d2f22246e68d8c1e08dc9a3e70 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 23 Jun 2023 14:32:26 +0000 Subject: [PATCH 103/149] style: apply automated linter fixes --- .../data/tabular/containers/_tagged_table.py | 28 ++++++++++++++----- .../test_add_column_as_feature.py | 13 ++++++--- .../test_add_columns_as_features.py | 16 +++++++---- 3 files changed, 41 insertions(+), 16 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index ef83121f5..120fc88b2 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -266,7 +266,9 @@ def add_column(self, column: Column) -> TaggedTable: If the size of the column does not match the amount of rows. """ return TaggedTable._from_table( - super().add_column(column), target_name=self.target.name, feature_names=self.features.column_names, + super().add_column(column), + target_name=self.target.name, + feature_names=self.features.column_names, ) def add_columns(self, columns: list[Column] | Table) -> TaggedTable: @@ -293,7 +295,9 @@ def add_columns(self, columns: list[Column] | Table) -> TaggedTable: If at least one column name from the provided column list already exists in the table. """ return TaggedTable._from_table( - super().add_columns(columns), target_name=self.target.name, feature_names=self.features.column_names, + super().add_columns(columns), + target_name=self.target.name, + feature_names=self.features.column_names, ) def add_row(self, row: Row) -> TaggedTable: @@ -497,7 +501,9 @@ def remove_duplicate_rows(self) -> TaggedTable: The table with the duplicate rows removed. """ return TaggedTable._from_table( - super().remove_duplicate_rows(), target_name=self.target.name, feature_names=self.features.column_names, + super().remove_duplicate_rows(), + target_name=self.target.name, + feature_names=self.features.column_names, ) def remove_rows_with_missing_values(self) -> TaggedTable: @@ -533,7 +539,9 @@ def remove_rows_with_outliers(self) -> TaggedTable: A new table without rows containing outliers. """ return TaggedTable._from_table( - super().remove_rows_with_outliers(), target_name=self.target.name, feature_names=self.features.column_names, + super().remove_rows_with_outliers(), + target_name=self.target.name, + feature_names=self.features.column_names, ) def rename_column(self, old_name: str, new_name: str) -> TaggedTable: @@ -643,7 +651,9 @@ def shuffle_rows(self) -> TaggedTable: """ return TaggedTable._from_table( - super().shuffle_rows(), target_name=self.target.name, feature_names=self.features.column_names, + super().shuffle_rows(), + target_name=self.target.name, + feature_names=self.features.column_names, ) def slice_rows( @@ -677,7 +687,9 @@ def slice_rows( If the index is out of bounds. """ return TaggedTable._from_table( - super().slice_rows(start, end, step), target_name=self.target.name, feature_names=self.features.column_names, + super().slice_rows(start, end, step), + target_name=self.target.name, + feature_names=self.features.column_names, ) def sort_columns( @@ -739,7 +751,9 @@ def sort_rows(self, comparator: Callable[[Row, Row], int]) -> TaggedTable: A new table with sorted rows. """ return TaggedTable._from_table( - super().sort_rows(comparator), target_name=self.target.name, feature_names=self.features.column_names, + super().sort_rows(comparator), + target_name=self.target.name, + feature_names=self.features.column_names, ) def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> TaggedTable: diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py index b9a20fd56..8ab706903 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py @@ -9,22 +9,27 @@ Table({"f1": [1, 2], "target": [2, 3]}).tag_columns(target_name="target", feature_names=["f1"]), Column("f2", [4, 5]), Table({"f1": [1, 2], "target": [2, 3], "f2": [4, 5]}).tag_columns( - target_name="target", feature_names=["f1", "f2"], + target_name="target", + feature_names=["f1", "f2"], ), ), ( Table({"f1": [1, 2], "target": [2, 3], "other": [0, -1]}).tag_columns( - target_name="target", feature_names=["f1"], + target_name="target", + feature_names=["f1"], ), Column("f2", [4, 5]), Table({"f1": [1, 2], "target": [2, 3], "other": [0, -1], "f2": [4, 5]}).tag_columns( - target_name="target", feature_names=["f1", "f2"], + target_name="target", + feature_names=["f1", "f2"], ), ), ], ids=["new column as feature", "table contains a non feature/target column"], ) def test_add_column_as_feature( - tagged_table: TaggedTable, column: Column, tagged_table_with_new_column: TaggedTable, + tagged_table: TaggedTable, + column: Column, + tagged_table_with_new_column: TaggedTable, ) -> None: assert tagged_table.add_column_as_feature(column) == tagged_table_with_new_column diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py index 725b2a955..2bd14f005 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py @@ -9,29 +9,35 @@ Table({"f1": [1, 2], "target": [2, 3]}).tag_columns(target_name="target", feature_names=["f1"]), [Column("f2", [4, 5]), Column("f3", [6, 7])], Table({"f1": [1, 2], "target": [2, 3], "f2": [4, 5], "f3": [6, 7]}).tag_columns( - target_name="target", feature_names=["f1", "f2", "f3"], + target_name="target", + feature_names=["f1", "f2", "f3"], ), ), ( Table({"f1": [1, 2], "target": [2, 3]}).tag_columns(target_name="target", feature_names=["f1"]), Table.from_columns([Column("f2", [4, 5]), Column("f3", [6, 7])]), Table({"f1": [1, 2], "target": [2, 3], "f2": [4, 5], "f3": [6, 7]}).tag_columns( - target_name="target", feature_names=["f1", "f2", "f3"], + target_name="target", + feature_names=["f1", "f2", "f3"], ), ), ( Table({"f1": [1, 2], "target": [2, 3], "other": [0, -1]}).tag_columns( - target_name="target", feature_names=["f1"], + target_name="target", + feature_names=["f1"], ), Table.from_columns([Column("f2", [4, 5]), Column("f3", [6, 7])]), Table({"f1": [1, 2], "target": [2, 3], "other": [0, -1], "f2": [4, 5], "f3": [6, 7]}).tag_columns( - target_name="target", feature_names=["f1", "f2", "f3"], + target_name="target", + feature_names=["f1", "f2", "f3"], ), ), ], ids=["new columns as feature", "table added as features", "table contains a non feature/target column"], ) def test_add_columns_as_features( - tagged_table: TaggedTable, columns: list[Column] | Table, tagged_table_with_new_columns: TaggedTable, + tagged_table: TaggedTable, + columns: list[Column] | Table, + tagged_table_with_new_columns: TaggedTable, ) -> None: assert tagged_table.add_columns_as_features(columns) == tagged_table_with_new_columns From e3a8ce5b5eacfbade39ac3cff7b99dbd214d27d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 23 Jun 2023 16:33:37 +0200 Subject: [PATCH 104/149] Add tests for remove_column --- .../_tagged_table/test_remove_columns.py | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py new file mode 100644 index 000000000..b98cd7dea --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py @@ -0,0 +1,61 @@ +import pytest +from safeds.data.tabular.containers import Table, TaggedTable +from safeds.exceptions import ColumnIsTargetError + +from tests.helpers import assert_that_tagged_tables_are_equal + + +@pytest.mark.parametrize( + ("table", "columns", "expected"), + [ + ( + TaggedTable._from_table( + Table( + { + "feat1": [1, 2, 3], + "feat2": [4, 5, 6], + "target": [7, 8, 9] + } + ), + "target" + ), + ["feat2"], + TaggedTable._from_table( + Table( + { + "feat1": [1, 2, 3], + "target": [7, 8, 9] + } + ), + "target" + ) + ), + ], + ids=["only_features_remove_feature"] +) +def test_should_remove_columns(table: TaggedTable, columns: list[str], expected: TaggedTable) -> None: + new_table = table.remove_columns(columns) + assert_that_tagged_tables_are_equal(new_table, expected) + + +@pytest.mark.parametrize( + ("table", "columns"), + [ + ( + TaggedTable._from_table( + Table( + { + "feat": [1, 2, 3], + "target": [4, 5, 6] + } + ), + "target" + ), + ["target"] + ) + ], + ids=["only_features_and_target"] +) +def test_should_raise_column_is_target_error(table: TaggedTable, columns: list[str]) -> None: + with pytest.raises(ColumnIsTargetError, match=r'Illegal schema modification: Column "target" is the target column and cannot be removed.'): + table.remove_columns(columns) From 9c2c75081b2c8862c85046e6fb12cd1adb3bfd2e Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 23 Jun 2023 14:35:58 +0000 Subject: [PATCH 105/149] style: apply automated linter fixes --- .../_tagged_table/test_remove_columns.py | 45 ++++--------------- 1 file changed, 9 insertions(+), 36 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py index b98cd7dea..52ee725d6 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py @@ -9,29 +9,12 @@ ("table", "columns", "expected"), [ ( - TaggedTable._from_table( - Table( - { - "feat1": [1, 2, 3], - "feat2": [4, 5, 6], - "target": [7, 8, 9] - } - ), - "target" - ), + TaggedTable._from_table(Table({"feat1": [1, 2, 3], "feat2": [4, 5, 6], "target": [7, 8, 9]}), "target"), ["feat2"], - TaggedTable._from_table( - Table( - { - "feat1": [1, 2, 3], - "target": [7, 8, 9] - } - ), - "target" - ) + TaggedTable._from_table(Table({"feat1": [1, 2, 3], "target": [7, 8, 9]}), "target"), ), ], - ids=["only_features_remove_feature"] + ids=["only_features_remove_feature"], ) def test_should_remove_columns(table: TaggedTable, columns: list[str], expected: TaggedTable) -> None: new_table = table.remove_columns(columns) @@ -40,22 +23,12 @@ def test_should_remove_columns(table: TaggedTable, columns: list[str], expected: @pytest.mark.parametrize( ("table", "columns"), - [ - ( - TaggedTable._from_table( - Table( - { - "feat": [1, 2, 3], - "target": [4, 5, 6] - } - ), - "target" - ), - ["target"] - ) - ], - ids=["only_features_and_target"] + [(TaggedTable._from_table(Table({"feat": [1, 2, 3], "target": [4, 5, 6]}), "target"), ["target"])], + ids=["only_features_and_target"], ) def test_should_raise_column_is_target_error(table: TaggedTable, columns: list[str]) -> None: - with pytest.raises(ColumnIsTargetError, match=r'Illegal schema modification: Column "target" is the target column and cannot be removed.'): + with pytest.raises( + ColumnIsTargetError, + match=r'Illegal schema modification: Column "target" is the target column and cannot be removed.', + ): table.remove_columns(columns) From 7437be152a33fde10ee64dcfa1b785a29e53517f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Fri, 30 Jun 2023 10:12:17 +0200 Subject: [PATCH 106/149] docs: Removed unnecessary empty lines in comment --- src/safeds/data/tabular/containers/_table.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 022e09c21..b3429b4eb 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -1377,13 +1377,10 @@ def replace_column(self, old_column_name: str, new_columns: list[Column]) -> Tab ------ UnknownColumnNameError If the old column does not exist. - DuplicateColumnNameError If at least one of the new columns already exists and the existing column is not affected by the replacement. - ColumnSizeError If the size of at least one of the new columns does not match the amount of rows. - IllegalSchemaModificationError If replacing the column would violate an invariant in the subclass. From 75b3a23fe6fdd4614878f60532b58b90bee87791 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Fri, 30 Jun 2023 10:54:30 +0200 Subject: [PATCH 107/149] feat: Using a table instead of super in `TaggedTable.__init__` because of problems with deepcopy --- src/safeds/data/tabular/containers/_tagged_table.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 120fc88b2..6e76e081b 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -143,10 +143,11 @@ def __init__( >>> table = TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]) """ super().__init__(data) + _data = Table(data) # If no feature names are specified, use all columns except the target column if feature_names is None: - feature_names = self.column_names + feature_names = _data.column_names if target_name in feature_names: feature_names.remove(target_name) @@ -156,8 +157,8 @@ def __init__( if len(feature_names) == 0: raise ValueError("At least one feature column must be specified.") - self._features: Table = super().keep_only_columns(feature_names) - self._target: Column = super().get_column(target_name) + self._features: Table = _data.keep_only_columns(feature_names) + self._target: Column = _data.get_column(target_name) # ------------------------------------------------------------------------------------------------------------------ # Properties From 7c366dcfd38766a2c4007b72adfbd0da79562c98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 30 Jun 2023 13:17:07 +0200 Subject: [PATCH 108/149] Fix _as_table --- .../data/tabular/containers/_tagged_table.py | 2 +- .../_tagged_table/test_transform_column.py | 52 ++++++++++++++----- 2 files changed, 39 insertions(+), 15 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 6e76e081b..3dcee30bf 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -246,7 +246,7 @@ def _as_table(self: TaggedTable) -> Table: The table as an untagged Table, i.e. without the information about which columns are features or target. """ - return self.features.add_column(self.target) + return Table.from_columns(super().to_columns()) def add_column(self, column: Column) -> TaggedTable: """ diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_column.py index c02656440..2243414ed 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_column.py @@ -18,24 +18,48 @@ "target", TaggedTable({"feature_a": [1, 2, 3], "feature_b": [4, 5, 6], "target": [2, 4, 6]}, "target"), ), + ( + TaggedTable({"feature_a": [1, 2, 3], "b": [4, 5, 6], "target": [1, 2, 3]}, target_name="target", feature_names=["feature_a"]), + "b", + TaggedTable({"feature_a": [1, 2, 3], "b": [8, 10, 12], "target": [1, 2, 3]}, target_name="target", feature_names=["feature_a"]), + ), ], - ids=["transform_feature_column", "transform_target_column"], + ids=["transform_feature_column", "transform_target_column", "transform_column_that_is_neither"], ) def test_should_transform_column(table: TaggedTable, column_name: str, table_transformed: TaggedTable) -> None: result = table.transform_column(column_name, lambda row: row.get_value(column_name) * 2) - assert_that_tagged_tables_are_equal(result, table_transformed) -def test_should_raise_if_column_not_found() -> None: - input_table = TaggedTable( - { - "A": [1, 2, 3], - "B": [4, 5, 6], - "C": ["a", "b", "c"], - }, - "C", - ) - - with pytest.raises(UnknownColumnNameError, match=r"Could not find column\(s\) 'D'"): - input_table.transform_column("D", lambda row: row.get_value("A") * 2) +@pytest.mark.parametrize( + ("table", "column_name"), + [ + ( + TaggedTable( + { + "A": [1, 2, 3], + "B": [4, 5, 6], + "C": ["a", "b", "c"], + }, + "C", + ), + "D", + ), + ( + TaggedTable( + { + "A": [1, 2, 3], + "B": [4, 5, 6], + "C": ["a", "b", "c"], + }, + target_name="C", + feature_names=["A"], + ), + "D" + ) + ], + ids=["has_only_features_and_target", "has_columns_that_are_neither"] +) +def test_should_raise_if_column_not_found(table: TaggedTable, column_name: str) -> None: + with pytest.raises(UnknownColumnNameError, match=fr"Could not find column\(s\) '{column_name}'"): + table.transform_column(column_name, lambda row: row.get_value("A") * 2) From a31769d2b776cae772dcf5e242e59007ceec91a1 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 30 Jun 2023 11:19:04 +0000 Subject: [PATCH 109/149] style: apply automated linter fixes --- .../_tagged_table/test_transform_column.py | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_column.py index 2243414ed..efcc6bc1c 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_transform_column.py @@ -19,9 +19,17 @@ TaggedTable({"feature_a": [1, 2, 3], "feature_b": [4, 5, 6], "target": [2, 4, 6]}, "target"), ), ( - TaggedTable({"feature_a": [1, 2, 3], "b": [4, 5, 6], "target": [1, 2, 3]}, target_name="target", feature_names=["feature_a"]), + TaggedTable( + {"feature_a": [1, 2, 3], "b": [4, 5, 6], "target": [1, 2, 3]}, + target_name="target", + feature_names=["feature_a"], + ), "b", - TaggedTable({"feature_a": [1, 2, 3], "b": [8, 10, 12], "target": [1, 2, 3]}, target_name="target", feature_names=["feature_a"]), + TaggedTable( + {"feature_a": [1, 2, 3], "b": [8, 10, 12], "target": [1, 2, 3]}, + target_name="target", + feature_names=["feature_a"], + ), ), ], ids=["transform_feature_column", "transform_target_column", "transform_column_that_is_neither"], @@ -55,11 +63,11 @@ def test_should_transform_column(table: TaggedTable, column_name: str, table_tra target_name="C", feature_names=["A"], ), - "D" - ) + "D", + ), ], - ids=["has_only_features_and_target", "has_columns_that_are_neither"] + ids=["has_only_features_and_target", "has_columns_that_are_neither"], ) def test_should_raise_if_column_not_found(table: TaggedTable, column_name: str) -> None: - with pytest.raises(UnknownColumnNameError, match=fr"Could not find column\(s\) '{column_name}'"): + with pytest.raises(UnknownColumnNameError, match=rf"Could not find column\(s\) '{column_name}'"): table.transform_column(column_name, lambda row: row.get_value("A") * 2) From f7219d51b979c6585a91eaf0f9124d411119e72e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 30 Jun 2023 14:48:05 +0200 Subject: [PATCH 110/149] Update tests for sort_rows --- .../containers/_table/_tagged_table/test_sort_rows.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_rows.py index 0d63af283..5d04eaf28 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_rows.py @@ -9,16 +9,15 @@ @pytest.mark.parametrize( ("table", "comparator", "expected"), [ - # TODO: Check that it works with an empty table ( - TaggedTable({"feature": [3, 2, 1], "target": [0, 0, 0]}, "target"), + TaggedTable({"feature": [3, 2, 1], "non_feature": [1, 1, 1], "target": [0, 0, 0]}, "target"), lambda row1, row2: row1["feature"] - row2["feature"], - TaggedTable({"feature": [1, 2, 3], "target": [0, 0, 0]}, "target"), + TaggedTable({"feature": [1, 2, 3], "non_feature": [1, 1, 1], "target": [0, 0, 0]}, "target"), ), ( - TaggedTable({"feature": [1, 2, 3], "target": [0, 0, 0]}, "target"), + TaggedTable({"feature": [1, 2, 3], "non_feature": [1, 1, 1], "target": [0, 0, 0]}, "target"), lambda row1, row2: row1["feature"] - row2["feature"], - TaggedTable({"feature": [1, 2, 3], "target": [0, 0, 0]}, "target"), + TaggedTable({"feature": [1, 2, 3], "non_feature": [1, 1, 1], "target": [0, 0, 0]}, "target"), ), ], ids=["unsorted", "already_sorted"], @@ -35,7 +34,6 @@ def test_should_sort_table( @pytest.mark.parametrize( ("table", "comparator", "table_copy"), [ - # TODO: Check that it works with an empty table ( TaggedTable({"feature": [3, 2, 1], "target": [0, 0, 0]}, "target"), lambda row1, row2: row1["feature"] - row2["feature"], From c01f208904e72c8b5d58bf8cf5d9f8d2d4f5d95a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 30 Jun 2023 14:49:58 +0200 Subject: [PATCH 111/149] Update inplace test --- .../containers/_table/_tagged_table/test_sort_rows.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_rows.py index 5d04eaf28..1cd86c6b4 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_rows.py @@ -35,14 +35,14 @@ def test_should_sort_table( ("table", "comparator", "table_copy"), [ ( - TaggedTable({"feature": [3, 2, 1], "target": [0, 0, 0]}, "target"), + TaggedTable({"feature": [3, 2, 1], "non_feature": [1, 1, 1], "target": [0, 0, 0]}, "target"), lambda row1, row2: row1["feature"] - row2["feature"], - TaggedTable({"feature": [3, 2, 1], "target": [0, 0, 0]}, "target"), + TaggedTable({"feature": [3, 2, 1], "non_feature": [1, 1, 1], "target": [0, 0, 0]}, "target"), ), ( - TaggedTable({"feature": [1, 2, 3], "target": [0, 0, 0]}, "target"), + TaggedTable({"feature": [1, 2, 3], "non_feature": [1, 1, 1], "target": [0, 0, 0]}, "target"), lambda row1, row2: row1["feature"] - row2["feature"], - TaggedTable({"feature": [1, 2, 3], "target": [0, 0, 0]}, "target"), + TaggedTable({"feature": [1, 2, 3], "non_feature": [1, 1, 1], "target": [0, 0, 0]}, "target"), ), ], ids=["unsorted", "already_sorted"], From 1ffe1c944ec985e59b10610abeaa012e8a1b37bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 30 Jun 2023 16:10:02 +0200 Subject: [PATCH 112/149] Update test for shuffle_rows --- .../_table/_tagged_table/test_shuffle_rows.py | 78 +++++++++---------- .../_table/_tagged_table/test_slice_rows.py | 6 +- .../_table/_tagged_table/test_sort_columns.py | 3 +- 3 files changed, 42 insertions(+), 45 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_shuffle_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_shuffle_rows.py index 70c8f56ea..f1b015ee1 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_shuffle_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_shuffle_rows.py @@ -1,47 +1,43 @@ -from safeds.data.tabular.containers import TaggedTable +import pytest +from safeds.data.tabular.containers import Row, Table, TaggedTable +from tests.helpers import assert_that_tagged_tables_are_equal -def test_should_shuffle_rows() -> None: - table = TaggedTable( - { - "feature_a": [0, 1, 2], - "feature_b": [3, 4, 5], - "target": [6, 7, 8], - }, - "target", - ) + +@pytest.mark.parametrize( + ("rows", "target_name", "feature_names"), + [ + ( + [ + Row( + {"feature_a": 0, "feature_b": 3, "no_feature": 6, "target": 9} + ), + Row( + {"feature_a": 1, "feature_b": 4, "no_feature": 7, "target": 10} + ), + Row( + {"feature_a": 2, "feature_b": 5, "no_feature": 8, "target": 11} + ), + ], + "target", + ["feature_a", "feature_b"], + ), + ], + ids=["table"] +) +def test_should_shuffle_rows(rows: list[Row], target_name: str, feature_names: list[str]) -> None: + table = TaggedTable._from_table(Table.from_rows(rows), target_name=target_name, feature_names=feature_names) shuffled = table.shuffle_rows() assert table.schema == shuffled.schema assert table.features.column_names == shuffled.features.column_names assert table.target.name == shuffled.target.name - # Use filter_rows to extract the individual rows and compare them one-by-one: - row_0 = shuffled.filter_rows(lambda row: any(row.get_value(col) == 0 for col in table.column_names)) - row_1 = shuffled.filter_rows(lambda row: any(row.get_value(col) == 1 for col in table.column_names)) - row_2 = shuffled.filter_rows(lambda row: any(row.get_value(col) == 2 for col in table.column_names)) - expected_0 = TaggedTable( - { - "feature_a": [0], - "feature_b": [3], - "target": [6], - }, - "target", - ) - expected_1 = TaggedTable( - { - "feature_a": [1], - "feature_b": [4], - "target": [7], - }, - "target", - ) - expected_2 = TaggedTable( - { - "feature_a": [2], - "feature_b": [5], - "target": [8], - }, - "target", - ) - assert row_0 == expected_0 - assert row_1 == expected_1 - assert row_2 == expected_2 + + # Check that shuffled contains the original rows: + for i in range(table.number_of_rows): + assert shuffled.get_row(i) in rows + + # Assert that table and shuffled are equal after sorting: + def comparator(r1: Row, r2: Row) -> int: + return 1 if r1.__repr__() < r2.__repr__() else -1 + + assert_that_tagged_tables_are_equal(table.sort_rows(comparator), shuffled.sort_rows(comparator)) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_slice_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_slice_rows.py index 189b3d908..f3521e18c 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_slice_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_slice_rows.py @@ -10,9 +10,9 @@ ("table", "test_table", "second_test_table"), [ ( - TaggedTable({"feature": [1, 2, 1], "target": [1, 2, 4]}, "target"), - TaggedTable({"feature": [1, 2], "target": [1, 2]}, "target"), - TaggedTable({"feature": [1, 1], "target": [1, 4]}, "target"), + TaggedTable({"feature": [1, 2, 1], "non_feature": [0, 2, 4], "target": [1, 2, 4]}, target_name="target", feature_names=["non_feature"]), + TaggedTable({"feature": [1, 2], "non_feature": [0, 2], "target": [1, 2]}, target_name="target", feature_names=["non_feature"]), + TaggedTable({"feature": [1, 1], "non_feature": [0, 4], "target": [1, 4]}, target_name="target", feature_names=["non_feature"]), ), ], ids=["Table with three rows"], diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_columns.py index 948147bdf..4ecdb78a7 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_columns.py @@ -38,7 +38,8 @@ def test_should_return_sorted_table( "col4": [2, 3, 1, 4, 6], "col1": ["A", "B", "C", "A", "D"], }, - "col1", + target_name="col1", + feature_names=["col4", "col3"], ) if query is not None: table_sorted = table1.sort_columns(query) From 0af1b01df33b892fa1ae379ab27d54699894c63d Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 30 Jun 2023 14:12:16 +0000 Subject: [PATCH 113/149] style: apply automated linter fixes --- src/safeds/data/tabular/containers/_row.py | 2 +- .../_table/_tagged_table/test_shuffle_rows.py | 14 ++++---------- .../_table/_tagged_table/test_slice_rows.py | 18 +++++++++++++++--- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/src/safeds/data/tabular/containers/_row.py b/src/safeds/data/tabular/containers/_row.py index 0e338caed..f70b2195d 100644 --- a/src/safeds/data/tabular/containers/_row.py +++ b/src/safeds/data/tabular/containers/_row.py @@ -263,7 +263,7 @@ def __repr__(self) -> str: >>> repr(row) "Row({'a': 1})" """ - return f"Row({str(self)})" + return f"Row({self!s})" def __str__(self) -> str: """ diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_shuffle_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_shuffle_rows.py index f1b015ee1..45e52f481 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_shuffle_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_shuffle_rows.py @@ -9,21 +9,15 @@ [ ( [ - Row( - {"feature_a": 0, "feature_b": 3, "no_feature": 6, "target": 9} - ), - Row( - {"feature_a": 1, "feature_b": 4, "no_feature": 7, "target": 10} - ), - Row( - {"feature_a": 2, "feature_b": 5, "no_feature": 8, "target": 11} - ), + Row({"feature_a": 0, "feature_b": 3, "no_feature": 6, "target": 9}), + Row({"feature_a": 1, "feature_b": 4, "no_feature": 7, "target": 10}), + Row({"feature_a": 2, "feature_b": 5, "no_feature": 8, "target": 11}), ], "target", ["feature_a", "feature_b"], ), ], - ids=["table"] + ids=["table"], ) def test_should_shuffle_rows(rows: list[Row], target_name: str, feature_names: list[str]) -> None: table = TaggedTable._from_table(Table.from_rows(rows), target_name=target_name, feature_names=feature_names) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_slice_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_slice_rows.py index f3521e18c..c4dcfeee2 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_slice_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_slice_rows.py @@ -10,9 +10,21 @@ ("table", "test_table", "second_test_table"), [ ( - TaggedTable({"feature": [1, 2, 1], "non_feature": [0, 2, 4], "target": [1, 2, 4]}, target_name="target", feature_names=["non_feature"]), - TaggedTable({"feature": [1, 2], "non_feature": [0, 2], "target": [1, 2]}, target_name="target", feature_names=["non_feature"]), - TaggedTable({"feature": [1, 1], "non_feature": [0, 4], "target": [1, 4]}, target_name="target", feature_names=["non_feature"]), + TaggedTable( + {"feature": [1, 2, 1], "non_feature": [0, 2, 4], "target": [1, 2, 4]}, + target_name="target", + feature_names=["non_feature"], + ), + TaggedTable( + {"feature": [1, 2], "non_feature": [0, 2], "target": [1, 2]}, + target_name="target", + feature_names=["non_feature"], + ), + TaggedTable( + {"feature": [1, 1], "non_feature": [0, 4], "target": [1, 4]}, + target_name="target", + feature_names=["non_feature"], + ), ), ], ids=["Table with three rows"], From f38ccdcd1f3ca484cf74645c4399a86e590aeedf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 30 Jun 2023 16:31:43 +0200 Subject: [PATCH 114/149] Update tests for replace_column --- .../_tagged_table/test_replace_column.py | 63 ++++++++++++++++++- .../_table/_tagged_table/test_shuffle_rows.py | 2 +- 2 files changed, 63 insertions(+), 2 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py index 02793de82..f67015788 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py @@ -12,27 +12,33 @@ TaggedTable( { "feature_old": [0, 1, 2], + "no_feature_old": [2, 3, 4], "target_old": [3, 4, 5], }, "target_old", + ["feature_old"], ), [Column("feature_new", [2, 1, 0])], "feature_old", TaggedTable( { "feature_new": [2, 1, 0], + "no_feature_old": [2, 3, 4], "target_old": [3, 4, 5], }, "target_old", + ["feature_new"], ), ), ( TaggedTable( { "feature_old": [0, 1, 2], + "no_feature_old": [2, 3, 4], "target_old": [3, 4, 5], }, "target_old", + ["feature_old"], ), [Column("feature_new_a", [2, 1, 0]), Column("feature_new_b", [4, 2, 0])], "feature_old", @@ -40,31 +46,86 @@ { "feature_new_a": [2, 1, 0], "feature_new_b": [4, 2, 0], + "no_feature_old": [2, 3, 4], "target_old": [3, 4, 5], }, "target_old", + ["feature_new_a", "feature_new_b"], ), ), ( TaggedTable( { "feature_old": [0, 1, 2], + "no_feature_old": [2, 3, 4], "target_old": [3, 4, 5], }, "target_old", + ["feature_old"], + ), + [Column("no_feature_new", [2, 1, 0])], + "no_feature_old", + TaggedTable( + { + "feature_old": [0, 1, 2], + "no_feature_new": [2, 1, 0], + "target_old": [3, 4, 5], + }, + "target_old", + ["feature_old"], + ), + ), + ( + TaggedTable( + { + "feature_old": [0, 1, 2], + "no_feature_old": [2, 3, 4], + "target_old": [3, 4, 5], + }, + "target_old", + ["feature_old"], + ), + [Column("no_feature_new_a", [2, 1, 0]), Column("no_feature_new_b", [4, 2, 0])], + "no_feature_old", + TaggedTable( + { + "feature_old": [0, 1, 2], + "no_feature_new_a": [2, 1, 0], + "no_feature_new_b": [4, 2, 0], + "target_old": [3, 4, 5], + }, + "target_old", + ["feature_old"], + ), + ), + ( + TaggedTable( + { + "feature_old": [0, 1, 2], + "no_feature_old": [2, 3, 4], + "target_old": [3, 4, 5], + }, + "target_old", + ["feature_old"], ), [Column("target_new", [2, 1, 0])], "target_old", TaggedTable( { "feature_old": [0, 1, 2], + "no_feature_old": [2, 3, 4], "target_new": [2, 1, 0], }, "target_new", + ["feature_old"], ), ), ], - ids=["replace_feature_column_with_one", "replace_feature_column_with_multiple", "replace_target_column"], + ids=["replace_feature_column_with_one", + "replace_feature_column_with_multiple", + "replace_non_feature_column_with_one", + "replace_non_feature_column_with_multiple", + "replace_target_column"], ) def test_should_replace_column( original_table: TaggedTable, diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_shuffle_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_shuffle_rows.py index f1b015ee1..4b8cdf2ea 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_shuffle_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_shuffle_rows.py @@ -36,7 +36,7 @@ def test_should_shuffle_rows(rows: list[Row], target_name: str, feature_names: l for i in range(table.number_of_rows): assert shuffled.get_row(i) in rows - # Assert that table and shuffled are equal after sorting: + # Assert that table and shuffled are equal again after sorting: def comparator(r1: Row, r2: Row) -> int: return 1 if r1.__repr__() < r2.__repr__() else -1 From e49ff3d3153137f7acbbbca543d32d6b44d39746 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Fri, 30 Jun 2023 16:34:19 +0200 Subject: [PATCH 115/149] test: Moved tests from `test_tagged_table.py` to their own files in _tagged_table test: parametrized tests for `TaggedTable.add_column`, `TaggedTable.add_column_as_feature`, `TaggedTable.add_columns`, `TaggedTable.add_columns_as_features` --- .../_table/_tagged_table/test_add_column.py | 49 ++++++----- .../test_add_column_as_feature.py | 3 +- .../_table/_tagged_table/test_add_columns.py | 57 +++++++----- .../test_add_columns_as_features.py | 3 +- .../_table/_tagged_table/test_features.py | 31 +++++++ .../_table/_tagged_table/test_from_table.py | 71 +++++++++++++++ .../_table/_tagged_table/test_init.py | 75 ++++++++++++++++ .../_table/_tagged_table/test_target.py | 22 +++++ .../tabular/containers/test_tagged_table.py | 87 ------------------- 9 files changed, 265 insertions(+), 133 deletions(-) create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_features.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_from_table.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_target.py delete mode 100644 tests/safeds/data/tabular/containers/test_tagged_table.py diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py index ef5382f47..ac2661d71 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py @@ -1,25 +1,34 @@ +import pytest + from safeds.data.tabular.containers import Column, TaggedTable from tests.helpers import assert_that_tagged_tables_are_equal -def test_should_add_column() -> None: - table = TaggedTable( - { - "feature_1": [0, 1, 2], - "target": [3, 4, 5], - }, - "target", - ) - col = Column("other", [6, 7, 8]) - new_table = table.add_column(col) - expected = TaggedTable( - { - "feature_1": [0, 1, 2], - "target": [3, 4, 5], - "other": [6, 7, 8], - }, - "target", - ["feature_1"], - ) - assert_that_tagged_tables_are_equal(new_table, expected) +@pytest.mark.parametrize( + ("tagged_table", "column", "expected_tagged_table"), + [ + ( + TaggedTable( + { + "feature_1": [0, 1, 2], + "target": [3, 4, 5], + }, + "target", None + ), + Column("other", [6, 7, 8]), + TaggedTable( + { + "feature_1": [0, 1, 2], + "target": [3, 4, 5], + "other": [6, 7, 8], + }, + "target", + ["feature_1"], + ) + ) + ], + ids=["add_column_as_non_feature"] +) +def test_should_add_column(tagged_table: TaggedTable, column: Column, expected_tagged_table: TaggedTable) -> None: + assert_that_tagged_tables_are_equal(tagged_table.add_column(column), expected_tagged_table) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py index 8ab706903..d2e31a753 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py @@ -1,5 +1,6 @@ import pytest from safeds.data.tabular.containers import Column, Table, TaggedTable +from tests.helpers import assert_that_tagged_tables_are_equal @pytest.mark.parametrize( @@ -32,4 +33,4 @@ def test_add_column_as_feature( column: Column, tagged_table_with_new_column: TaggedTable, ) -> None: - assert tagged_table.add_column_as_feature(column) == tagged_table_with_new_column + assert_that_tagged_tables_are_equal(tagged_table.add_column_as_feature(column), tagged_table_with_new_column) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py index cb513c832..48a255213 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py @@ -1,29 +1,38 @@ +import pytest + from safeds.data.tabular.containers import Column, TaggedTable from tests.helpers import assert_that_tagged_tables_are_equal -def test_should_add_columns() -> None: - table = TaggedTable( - { - "feature_1": [0, 1, 2], - "target": [3, 4, 5], - }, - "target", - ) - cols = [ - Column("other", [6, 7, 8]), - Column("other2", [9, 6, 3]), - ] - new_table = table.add_columns(cols) - expected = TaggedTable( - { - "feature_1": [0, 1, 2], - "target": [3, 4, 5], - "other": [6, 7, 8], - "other2": [9, 6, 3], - }, - "target", - ["feature_1"], - ) - assert_that_tagged_tables_are_equal(new_table, expected) +@pytest.mark.parametrize( + ("tagged_table", "columns", "expected_tagged_table"), + [ + ( + TaggedTable( + { + "feature_1": [0, 1, 2], + "target": [3, 4, 5], + }, + "target", None + ), + [ + Column("other", [6, 7, 8]), + Column("other2", [9, 6, 3]) + ], + TaggedTable( + { + "feature_1": [0, 1, 2], + "target": [3, 4, 5], + "other": [6, 7, 8], + "other2": [9, 6, 3], + }, + "target", + ["feature_1"], + ) + ) + ], + ids=["add_columns_as_non_feature"] +) +def test_should_add_columns(tagged_table: TaggedTable, columns: list[Column], expected_tagged_table: TaggedTable) -> None: + assert_that_tagged_tables_are_equal(tagged_table.add_columns(columns), expected_tagged_table) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py index 2bd14f005..db0eed488 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py @@ -1,5 +1,6 @@ import pytest from safeds.data.tabular.containers import Column, Table, TaggedTable +from tests.helpers import assert_that_tagged_tables_are_equal @pytest.mark.parametrize( @@ -40,4 +41,4 @@ def test_add_columns_as_features( columns: list[Column] | Table, tagged_table_with_new_columns: TaggedTable, ) -> None: - assert tagged_table.add_columns_as_features(columns) == tagged_table_with_new_columns + assert_that_tagged_tables_are_equal(tagged_table.add_columns_as_features(columns), tagged_table_with_new_columns) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_features.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_features.py new file mode 100644 index 000000000..a5bff98af --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_features.py @@ -0,0 +1,31 @@ +import pytest + +from safeds.data.tabular.containers import TaggedTable, Table + + +@pytest.mark.parametrize( + ("tagged_table", "features"), + [ + ( + TaggedTable({ + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, target_name="T"), + Table({"A": [1, 4], "B": [2, 5], "C": [3, 6]}) + ), + ( + TaggedTable({ + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, target_name="T", feature_names=["A", "C"]), + Table({"A": [1, 4], "C": [3, 6]}) + ) + ], + ids=["all_columns_are_feature", "not_all_columns_are_features"] +) +def test_should_return_features(tagged_table: TaggedTable, features: Table) -> None: + assert tagged_table.features == features diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_from_table.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_from_table.py new file mode 100644 index 000000000..17e9a18af --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_from_table.py @@ -0,0 +1,71 @@ +import pytest + +from safeds.data.tabular.containers import Table, TaggedTable +from safeds.exceptions import UnknownColumnNameError + + +@pytest.mark.parametrize( + ("table", "target_name", "feature_names", "error", "error_msg"), + [ + ( + Table({ + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }), "T", ["A", "B", "C", "D", "E"], UnknownColumnNameError, r"Could not find column\(s\) 'D, E'" + ),( + Table({ + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }), "D", ["A", "B", "C"], UnknownColumnNameError, r"Could not find column\(s\) 'D'" + ),( + Table({ + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }), "A", ["A", "B", "C"], ValueError, r"Column 'A' cannot be both feature and target." + ),( + Table({ + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }), "A", [], ValueError, r"At least one feature column must be specified." + ),( + Table({ + "A": [1, 4], + }), "A", None, ValueError, r"At least one feature column must be specified." + ), + ], + ids=["feature_does_not_exist", "target_does_not_exist", "target_and_feature_overlap", "features_are_empty-explicitly", "features_are_empty_implicitly"] +) +def test_should_raise_error(table: Table, target_name: str, feature_names: list[str] | None, error: type[Exception], error_msg: str) -> None: + with pytest.raises(error, match=error_msg): + TaggedTable._from_table(table, target_name=target_name, feature_names=feature_names) + + +@pytest.mark.parametrize( + ("table", "target_name", "feature_names"), + [ + ( + Table({ + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }), "T", ["A", "B", "C"] + ) + ], + ids=["create_tagged_table"] +) +def test_should_create_a_tagged_table(table: Table, target_name: str, feature_names: list[str]) -> None: + tagged_table = TaggedTable._from_table(table, target_name=target_name, feature_names=feature_names) + assert isinstance(tagged_table, TaggedTable) + assert tagged_table._features.column_names == feature_names + assert tagged_table._target.name == target_name + assert tagged_table._features == table.keep_only_columns(feature_names) + assert tagged_table._target == table.get_column(target_name) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py new file mode 100644 index 000000000..a331c3983 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py @@ -0,0 +1,75 @@ +import pytest + +from safeds.data.tabular.containers import TaggedTable, Table +from safeds.exceptions import UnknownColumnNameError + + +@pytest.mark.parametrize( + ("data", "target_name", "feature_names", "error", "error_msg"), + [ + ( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, "T", ["A", "B", "C", "D", "E"], UnknownColumnNameError, r"Could not find column\(s\) 'D, E'" + ), + ( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, "D", ["A", "B", "C"], UnknownColumnNameError, r"Could not find column\(s\) 'D'" + ), + ( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, "A", ["A", "B", "C"], ValueError, r"Column 'A' cannot be both feature and target." + ), + ( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, "D", [], ValueError, r"At least one feature column must be specified." + ), + ( + { + "A": [1, 4], + }, "A", None, ValueError, r"At least one feature column must be specified." + ) + ], + ids=["feature_does_not_exist", "target_does_not_exist", "target_and_feature_overlap", "features_are_empty-explicitly", "features_are_empty_implicitly"] +) +def test_should_raise_error(data: dict[str, list[int]], target_name: str, feature_names: list[str] | None, error: type[Exception], error_msg: str) -> None: + with pytest.raises(error, match=error_msg): + TaggedTable(data, target_name=target_name, feature_names=feature_names) + + +@pytest.mark.parametrize( + ("data", "target_name", "feature_names"), + [ + ( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, "T", ["A", "B", "C"] + ) + ], + ids=["create_tagged_table"] +) +def test_should_create_a_tagged_table(data: dict[str, list[int]], target_name: str, feature_names: list[str]) -> None: + tagged_table = TaggedTable(data, target_name=target_name, feature_names=feature_names) + assert isinstance(tagged_table, TaggedTable) + assert tagged_table._features.column_names == feature_names + assert tagged_table._target.name == target_name + assert tagged_table._features == Table(data).keep_only_columns(feature_names) + assert tagged_table._target == Table(data).get_column(target_name) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_target.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_target.py new file mode 100644 index 000000000..416dd7f91 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_target.py @@ -0,0 +1,22 @@ +import pytest + +from safeds.data.tabular.containers import TaggedTable, Column + + +@pytest.mark.parametrize( + ("tagged_table", "target_column"), + [ + ( + TaggedTable({ + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, target_name="T"), + Column("T", [0, 1]) + ) + ], + ids=["target"] +) +def test_should_return_target(tagged_table: TaggedTable, target_column: Column) -> None: + assert tagged_table.target == target_column diff --git a/tests/safeds/data/tabular/containers/test_tagged_table.py b/tests/safeds/data/tabular/containers/test_tagged_table.py deleted file mode 100644 index 9a1ae7210..000000000 --- a/tests/safeds/data/tabular/containers/test_tagged_table.py +++ /dev/null @@ -1,87 +0,0 @@ -import pytest -from safeds.data.tabular.containers import Column, Table, TaggedTable -from safeds.exceptions import UnknownColumnNameError - - -@pytest.fixture() -def data() -> dict[str, list[int]]: - return { - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - "T": [0, 1], - } - - -@pytest.fixture() -def table(data: dict[str, list[int]]) -> Table: - return Table(data) - - -@pytest.fixture() -def tagged_table(table: Table) -> TaggedTable: - return table.tag_columns(target_name="T") - - -class TestFromTable: - def test_should_raise_if_a_feature_does_not_exist(self, table: Table) -> None: - with pytest.raises(UnknownColumnNameError): - TaggedTable._from_table(table, target_name="T", feature_names=["A", "B", "C", "D"]) - - def test_should_raise_if_target_does_not_exist(self, table: Table) -> None: - with pytest.raises(UnknownColumnNameError): - TaggedTable._from_table(table, target_name="D") - - def test_should_raise_if_features_and_target_overlap(self, table: Table) -> None: - with pytest.raises(ValueError, match="Column 'A' cannot be both feature and target."): - TaggedTable._from_table(table, target_name="A", feature_names=["A", "B", "C"]) - - def test_should_raise_if_features_are_empty_explicitly(self, table: Table) -> None: - with pytest.raises(ValueError, match="At least one feature column must be specified."): - TaggedTable._from_table(table, target_name="A", feature_names=[]) - - def test_should_raise_if_features_are_empty_implicitly(self) -> None: - table = Table({"A": [1, 4]}) - - with pytest.raises(ValueError, match="At least one feature column must be specified."): - TaggedTable._from_table(table, target_name="A") - - -class TestInit: - def test_should_raise_if_a_feature_does_not_exist(self, data: dict[str, list[int]]) -> None: - with pytest.raises(UnknownColumnNameError): - TaggedTable(data, target_name="T", feature_names=["A", "B", "C", "D"]) - - def test_should_raise_if_target_does_not_exist(self, data: dict[str, list[int]]) -> None: - with pytest.raises(UnknownColumnNameError): - TaggedTable(data, target_name="D") - - def test_should_raise_if_features_and_target_overlap(self, data: dict[str, list[int]]) -> None: - with pytest.raises(ValueError, match="Column 'A' cannot be both feature and target."): - TaggedTable(data, target_name="A", feature_names=["A", "B", "C"]) - - def test_should_raise_if_features_are_empty_explicitly(self, data: dict[str, list[int]]) -> None: - with pytest.raises(ValueError, match="At least one feature column must be specified."): - TaggedTable(data, target_name="A", feature_names=[]) - - def test_should_raise_if_features_are_empty_implicitly(self) -> None: - data = {"A": [1, 4]} - - with pytest.raises(ValueError, match="At least one feature column must be specified."): - TaggedTable(data, target_name="A") - - -class TestFeatures: - def test_should_return_features(self, tagged_table: TaggedTable) -> None: - assert tagged_table.features == Table( - { - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - }, - ) - - -class TestTarget: - def test_should_return_target(self, tagged_table: TaggedTable) -> None: - assert tagged_table.target == Column("T", [0, 1]) From d50a943555d1df9dbb1793cfddfbcd66aa15720d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 30 Jun 2023 18:38:25 +0200 Subject: [PATCH 116/149] Update test_rename_column.py --- .../_tagged_table/test_rename_column.py | 40 ++++++++++++++++--- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py index 24d175318..3c84c219f 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py @@ -11,40 +11,70 @@ TaggedTable( { "feature_old": [0, 1, 2], + "no_feature": [2, 3, 4], "target": [3, 4, 5], }, - "target", + target_name="target", + feature_names=["feature_old"] ), "feature_old", "feature_new", TaggedTable( { "feature_new": [0, 1, 2], + "no_feature": [2, 3, 4], "target": [3, 4, 5], }, - "target", + target_name="target", + feature_names=["feature_new"] ), ), ( TaggedTable( { "feature": [0, 1, 2], + "no_feature": [2, 3, 4], "target_old": [3, 4, 5], }, - "target_old", + target_name="target_old", + feature_names=["feature"] ), "target_old", "target_new", TaggedTable( { "feature": [0, 1, 2], + "no_feature": [2, 3, 4], "target_new": [3, 4, 5], }, - "target_new", + target_name="target_new", + feature_names=["feature"] + ), + ), + ( + TaggedTable( + { + "feature": [0, 1, 2], + "no_feature_old": [2, 3, 4], + "target": [3, 4, 5], + }, + target_name="target", + feature_names=["feature"] + ), + "no_feature_old", + "no_feature_new", + TaggedTable( + { + "feature": [0, 1, 2], + "no_feature_new": [2, 3, 4], + "target": [3, 4, 5], + }, + target_name="target", + feature_names=["feature"] ), ), ], - ids=["rename_feature_column", "rename_target_column"], + ids=["rename_feature_column", "rename_target_column", "rename_non_feature_column"], ) def test_should_add_column( original_table: TaggedTable, From cc26bdf116f36a1a3cd8240acb6d3c5305de3e1b Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 30 Jun 2023 17:13:56 +0000 Subject: [PATCH 117/149] style: apply automated linter fixes --- .../_table/_tagged_table/test_add_column.py | 10 +- .../test_add_column_as_feature.py | 1 + .../_table/_tagged_table/test_add_columns.py | 19 ++- .../test_add_columns_as_features.py | 1 + .../_table/_tagged_table/test_features.py | 42 +++--- .../_table/_tagged_table/test_from_table.py | 129 ++++++++++++------ .../_table/_tagged_table/test_init.py | 59 ++++++-- .../_tagged_table/test_rename_column.py | 12 +- .../_tagged_table/test_replace_column.py | 12 +- .../_table/_tagged_table/test_target.py | 24 ++-- 10 files changed, 199 insertions(+), 110 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py index ac2661d71..22bd87605 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py @@ -1,5 +1,4 @@ import pytest - from safeds.data.tabular.containers import Column, TaggedTable from tests.helpers import assert_that_tagged_tables_are_equal @@ -14,7 +13,8 @@ "feature_1": [0, 1, 2], "target": [3, 4, 5], }, - "target", None + "target", + None, ), Column("other", [6, 7, 8]), TaggedTable( @@ -25,10 +25,10 @@ }, "target", ["feature_1"], - ) - ) + ), + ), ], - ids=["add_column_as_non_feature"] + ids=["add_column_as_non_feature"], ) def test_should_add_column(tagged_table: TaggedTable, column: Column, expected_tagged_table: TaggedTable) -> None: assert_that_tagged_tables_are_equal(tagged_table.add_column(column), expected_tagged_table) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py index d2e31a753..325df54db 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py @@ -1,5 +1,6 @@ import pytest from safeds.data.tabular.containers import Column, Table, TaggedTable + from tests.helpers import assert_that_tagged_tables_are_equal diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py index 48a255213..2ca4e5fb1 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py @@ -1,5 +1,4 @@ import pytest - from safeds.data.tabular.containers import Column, TaggedTable from tests.helpers import assert_that_tagged_tables_are_equal @@ -14,12 +13,10 @@ "feature_1": [0, 1, 2], "target": [3, 4, 5], }, - "target", None + "target", + None, ), - [ - Column("other", [6, 7, 8]), - Column("other2", [9, 6, 3]) - ], + [Column("other", [6, 7, 8]), Column("other2", [9, 6, 3])], TaggedTable( { "feature_1": [0, 1, 2], @@ -29,10 +26,12 @@ }, "target", ["feature_1"], - ) - ) + ), + ), ], - ids=["add_columns_as_non_feature"] + ids=["add_columns_as_non_feature"], ) -def test_should_add_columns(tagged_table: TaggedTable, columns: list[Column], expected_tagged_table: TaggedTable) -> None: +def test_should_add_columns( + tagged_table: TaggedTable, columns: list[Column], expected_tagged_table: TaggedTable, +) -> None: assert_that_tagged_tables_are_equal(tagged_table.add_columns(columns), expected_tagged_table) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py index db0eed488..f1e7716b8 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py @@ -1,5 +1,6 @@ import pytest from safeds.data.tabular.containers import Column, Table, TaggedTable + from tests.helpers import assert_that_tagged_tables_are_equal diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_features.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_features.py index a5bff98af..54a327227 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_features.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_features.py @@ -1,31 +1,37 @@ import pytest - -from safeds.data.tabular.containers import TaggedTable, Table +from safeds.data.tabular.containers import Table, TaggedTable @pytest.mark.parametrize( ("tagged_table", "features"), [ ( - TaggedTable({ - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - "T": [0, 1], - }, target_name="T"), - Table({"A": [1, 4], "B": [2, 5], "C": [3, 6]}) + TaggedTable( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + target_name="T", + ), + Table({"A": [1, 4], "B": [2, 5], "C": [3, 6]}), ), ( - TaggedTable({ - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - "T": [0, 1], - }, target_name="T", feature_names=["A", "C"]), - Table({"A": [1, 4], "C": [3, 6]}) - ) + TaggedTable( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + target_name="T", + feature_names=["A", "C"], + ), + Table({"A": [1, 4], "C": [3, 6]}), + ), ], - ids=["all_columns_are_feature", "not_all_columns_are_features"] + ids=["all_columns_are_feature", "not_all_columns_are_features"], ) def test_should_return_features(tagged_table: TaggedTable, features: Table) -> None: assert tagged_table.features == features diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_from_table.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_from_table.py index 17e9a18af..3ba2ba7ea 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_from_table.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_from_table.py @@ -1,5 +1,4 @@ import pytest - from safeds.data.tabular.containers import Table, TaggedTable from safeds.exceptions import UnknownColumnNameError @@ -8,42 +7,84 @@ ("table", "target_name", "feature_names", "error", "error_msg"), [ ( - Table({ - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - "T": [0, 1], - }), "T", ["A", "B", "C", "D", "E"], UnknownColumnNameError, r"Could not find column\(s\) 'D, E'" - ),( - Table({ - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - "T": [0, 1], - }), "D", ["A", "B", "C"], UnknownColumnNameError, r"Could not find column\(s\) 'D'" - ),( - Table({ - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - "T": [0, 1], - }), "A", ["A", "B", "C"], ValueError, r"Column 'A' cannot be both feature and target." - ),( - Table({ - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - "T": [0, 1], - }), "A", [], ValueError, r"At least one feature column must be specified." - ),( - Table({ - "A": [1, 4], - }), "A", None, ValueError, r"At least one feature column must be specified." + Table( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "T", + ["A", "B", "C", "D", "E"], + UnknownColumnNameError, + r"Could not find column\(s\) 'D, E'", + ), + ( + Table( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "D", + ["A", "B", "C"], + UnknownColumnNameError, + r"Could not find column\(s\) 'D'", + ), + ( + Table( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "A", + ["A", "B", "C"], + ValueError, + r"Column 'A' cannot be both feature and target.", + ), + ( + Table( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "A", + [], + ValueError, + r"At least one feature column must be specified.", + ), + ( + Table( + { + "A": [1, 4], + }, + ), + "A", + None, + ValueError, + r"At least one feature column must be specified.", ), ], - ids=["feature_does_not_exist", "target_does_not_exist", "target_and_feature_overlap", "features_are_empty-explicitly", "features_are_empty_implicitly"] + ids=[ + "feature_does_not_exist", + "target_does_not_exist", + "target_and_feature_overlap", + "features_are_empty-explicitly", + "features_are_empty_implicitly", + ], ) -def test_should_raise_error(table: Table, target_name: str, feature_names: list[str] | None, error: type[Exception], error_msg: str) -> None: +def test_should_raise_error( + table: Table, target_name: str, feature_names: list[str] | None, error: type[Exception], error_msg: str, +) -> None: with pytest.raises(error, match=error_msg): TaggedTable._from_table(table, target_name=target_name, feature_names=feature_names) @@ -52,15 +93,19 @@ def test_should_raise_error(table: Table, target_name: str, feature_names: list[ ("table", "target_name", "feature_names"), [ ( - Table({ - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - "T": [0, 1], - }), "T", ["A", "B", "C"] - ) + Table( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "T", + ["A", "B", "C"], + ), ], - ids=["create_tagged_table"] + ids=["create_tagged_table"], ) def test_should_create_a_tagged_table(table: Table, target_name: str, feature_names: list[str]) -> None: tagged_table = TaggedTable._from_table(table, target_name=target_name, feature_names=feature_names) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py index a331c3983..071bb0ffb 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py @@ -1,6 +1,5 @@ import pytest - -from safeds.data.tabular.containers import TaggedTable, Table +from safeds.data.tabular.containers import Table, TaggedTable from safeds.exceptions import UnknownColumnNameError @@ -13,7 +12,11 @@ "B": [2, 5], "C": [3, 6], "T": [0, 1], - }, "T", ["A", "B", "C", "D", "E"], UnknownColumnNameError, r"Could not find column\(s\) 'D, E'" + }, + "T", + ["A", "B", "C", "D", "E"], + UnknownColumnNameError, + r"Could not find column\(s\) 'D, E'", ), ( { @@ -21,7 +24,11 @@ "B": [2, 5], "C": [3, 6], "T": [0, 1], - }, "D", ["A", "B", "C"], UnknownColumnNameError, r"Could not find column\(s\) 'D'" + }, + "D", + ["A", "B", "C"], + UnknownColumnNameError, + r"Could not find column\(s\) 'D'", ), ( { @@ -29,7 +36,11 @@ "B": [2, 5], "C": [3, 6], "T": [0, 1], - }, "A", ["A", "B", "C"], ValueError, r"Column 'A' cannot be both feature and target." + }, + "A", + ["A", "B", "C"], + ValueError, + r"Column 'A' cannot be both feature and target.", ), ( { @@ -37,17 +48,37 @@ "B": [2, 5], "C": [3, 6], "T": [0, 1], - }, "D", [], ValueError, r"At least one feature column must be specified." + }, + "D", + [], + ValueError, + r"At least one feature column must be specified.", ), ( { "A": [1, 4], - }, "A", None, ValueError, r"At least one feature column must be specified." - ) + }, + "A", + None, + ValueError, + r"At least one feature column must be specified.", + ), + ], + ids=[ + "feature_does_not_exist", + "target_does_not_exist", + "target_and_feature_overlap", + "features_are_empty-explicitly", + "features_are_empty_implicitly", ], - ids=["feature_does_not_exist", "target_does_not_exist", "target_and_feature_overlap", "features_are_empty-explicitly", "features_are_empty_implicitly"] ) -def test_should_raise_error(data: dict[str, list[int]], target_name: str, feature_names: list[str] | None, error: type[Exception], error_msg: str) -> None: +def test_should_raise_error( + data: dict[str, list[int]], + target_name: str, + feature_names: list[str] | None, + error: type[Exception], + error_msg: str, +) -> None: with pytest.raises(error, match=error_msg): TaggedTable(data, target_name=target_name, feature_names=feature_names) @@ -61,10 +92,12 @@ def test_should_raise_error(data: dict[str, list[int]], target_name: str, featur "B": [2, 5], "C": [3, 6], "T": [0, 1], - }, "T", ["A", "B", "C"] - ) + }, + "T", + ["A", "B", "C"], + ), ], - ids=["create_tagged_table"] + ids=["create_tagged_table"], ) def test_should_create_a_tagged_table(data: dict[str, list[int]], target_name: str, feature_names: list[str]) -> None: tagged_table = TaggedTable(data, target_name=target_name, feature_names=feature_names) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py index 3c84c219f..051c7fb90 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py @@ -15,7 +15,7 @@ "target": [3, 4, 5], }, target_name="target", - feature_names=["feature_old"] + feature_names=["feature_old"], ), "feature_old", "feature_new", @@ -26,7 +26,7 @@ "target": [3, 4, 5], }, target_name="target", - feature_names=["feature_new"] + feature_names=["feature_new"], ), ), ( @@ -37,7 +37,7 @@ "target_old": [3, 4, 5], }, target_name="target_old", - feature_names=["feature"] + feature_names=["feature"], ), "target_old", "target_new", @@ -48,7 +48,7 @@ "target_new": [3, 4, 5], }, target_name="target_new", - feature_names=["feature"] + feature_names=["feature"], ), ), ( @@ -59,7 +59,7 @@ "target": [3, 4, 5], }, target_name="target", - feature_names=["feature"] + feature_names=["feature"], ), "no_feature_old", "no_feature_new", @@ -70,7 +70,7 @@ "target": [3, 4, 5], }, target_name="target", - feature_names=["feature"] + feature_names=["feature"], ), ), ], diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py index f67015788..72b773adc 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py @@ -121,11 +121,13 @@ ), ), ], - ids=["replace_feature_column_with_one", - "replace_feature_column_with_multiple", - "replace_non_feature_column_with_one", - "replace_non_feature_column_with_multiple", - "replace_target_column"], + ids=[ + "replace_feature_column_with_one", + "replace_feature_column_with_multiple", + "replace_non_feature_column_with_one", + "replace_non_feature_column_with_multiple", + "replace_target_column", + ], ) def test_should_replace_column( original_table: TaggedTable, diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_target.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_target.py index 416dd7f91..755721123 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_target.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_target.py @@ -1,22 +1,24 @@ import pytest - -from safeds.data.tabular.containers import TaggedTable, Column +from safeds.data.tabular.containers import Column, TaggedTable @pytest.mark.parametrize( ("tagged_table", "target_column"), [ ( - TaggedTable({ - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - "T": [0, 1], - }, target_name="T"), - Column("T", [0, 1]) - ) + TaggedTable( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + target_name="T", + ), + Column("T", [0, 1]), + ), ], - ids=["target"] + ids=["target"], ) def test_should_return_target(tagged_table: TaggedTable, target_column: Column) -> None: assert tagged_table.target == target_column From 4c834910f6c9f0f8751b5c3c7b1a5736428809e3 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 30 Jun 2023 17:15:33 +0000 Subject: [PATCH 118/149] style: apply automated linter fixes --- .../containers/_table/_tagged_table/test_add_columns.py | 4 +++- .../containers/_table/_tagged_table/test_from_table.py | 6 +++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py index 2ca4e5fb1..8773e3695 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py @@ -32,6 +32,8 @@ ids=["add_columns_as_non_feature"], ) def test_should_add_columns( - tagged_table: TaggedTable, columns: list[Column], expected_tagged_table: TaggedTable, + tagged_table: TaggedTable, + columns: list[Column], + expected_tagged_table: TaggedTable, ) -> None: assert_that_tagged_tables_are_equal(tagged_table.add_columns(columns), expected_tagged_table) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_from_table.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_from_table.py index 3ba2ba7ea..b15b5a117 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_from_table.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_from_table.py @@ -83,7 +83,11 @@ ], ) def test_should_raise_error( - table: Table, target_name: str, feature_names: list[str] | None, error: type[Exception], error_msg: str, + table: Table, + target_name: str, + feature_names: list[str] | None, + error: type[Exception], + error_msg: str, ) -> None: with pytest.raises(error, match=error_msg): TaggedTable._from_table(table, target_name=target_name, feature_names=feature_names) From 447dd82533d4fb047f7ef28be96bbfa69ad5ede5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 7 Jul 2023 09:36:52 +0200 Subject: [PATCH 119/149] Update test_remove_rows_with_outliers.py --- .../test_remove_rows_with_outliers.py | 38 +++++++++++-------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py index 20fbe40b1..e5832f403 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py @@ -1,22 +1,30 @@ +import pytest from safeds.data.tabular.containers import TaggedTable from tests.helpers import assert_that_tagged_tables_are_equal -def test_should_remove_row() -> None: - table = TaggedTable( - { - "feature": [1.0, 11.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], - "target": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], - }, - "target", - ) +@pytest.mark.parametrize( + ("table", "expected"), + [ + ( + TaggedTable( + { + "feature": [1.0, 11.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + "target": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + }, + "target", + ), + TaggedTable( + { + "feature": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + "target": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + }, + "target", + ) + ), + ] +) +def test_should_remove_row(table: TaggedTable, expected: TaggedTable) -> None: new_table = table.remove_rows_with_outliers() - expected = TaggedTable( - { - "feature": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], - "target": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], - }, - "target", - ) assert_that_tagged_tables_are_equal(new_table, expected) From 1016eefc134c307bc8c7e1e9f317368ac795ea86 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 7 Jul 2023 07:38:50 +0000 Subject: [PATCH 120/149] style: apply automated linter fixes --- .../_table/_tagged_table/test_remove_rows_with_outliers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py index e5832f403..e7ba1fe5d 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py @@ -21,9 +21,9 @@ "target": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], }, "target", - ) + ), ), - ] + ], ) def test_should_remove_row(table: TaggedTable, expected: TaggedTable) -> None: new_table = table.remove_rows_with_outliers() From 31c83f3190601a60b9090a609c4c43f082646c33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 7 Jul 2023 09:44:59 +0200 Subject: [PATCH 121/149] Add new testcase to test_remove_rows_with_outliers --- .../test_remove_rows_with_outliers.py | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py index e5832f403..728fc6550 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py @@ -23,8 +23,25 @@ "target", ) ), - ] + ( + TaggedTable( + { + "feature": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + "target": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + }, + "target", + ), + TaggedTable( + { + "feature": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + "target": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + }, + "target", + ) + ), + ], + ids=["with_outliers", "no_outliers"] ) -def test_should_remove_row(table: TaggedTable, expected: TaggedTable) -> None: +def test_should_remove_rows_with_outliers(table: TaggedTable, expected: TaggedTable) -> None: new_table = table.remove_rows_with_outliers() assert_that_tagged_tables_are_equal(new_table, expected) From ae7a1051217da4e512b3851f7f62f6cfae0dec88 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 7 Jul 2023 07:48:29 +0000 Subject: [PATCH 122/149] style: apply automated linter fixes --- .../_table/_tagged_table/test_remove_rows_with_outliers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py index c7a09cdff..59a5704f0 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_outliers.py @@ -37,10 +37,10 @@ "target": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], }, "target", - ) + ), ), ], - ids=["with_outliers", "no_outliers"] + ids=["with_outliers", "no_outliers"], ) def test_should_remove_rows_with_outliers(table: TaggedTable, expected: TaggedTable) -> None: new_table = table.remove_rows_with_outliers() From 5e3c1d4842f2b2b344a7bef514bac8af3b9e5bfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 7 Jul 2023 09:51:15 +0200 Subject: [PATCH 123/149] Update test_remove_rows_with_missing_values.py --- .../test_remove_rows_with_missing_values.py | 55 ++++++++++++++----- 1 file changed, 40 insertions(+), 15 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_missing_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_missing_values.py index b47bbbe06..160bc14cf 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_missing_values.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_missing_values.py @@ -1,22 +1,47 @@ +import pytest from safeds.data.tabular.containers import TaggedTable from tests.helpers import assert_that_tagged_tables_are_equal -def test_should_remove_row() -> None: - table = TaggedTable( - { - "feature": [0.0, None, 2.0], - "target": [3.0, 4.0, 5.0], - }, - "target", - ) +@pytest.mark.parametrize( + ("table" ,"expected"), + [ + ( + TaggedTable( + { + "feature": [0.0, None, 2.0], + "target": [3.0, 4.0, 5.0], + }, + "target", + ), + TaggedTable( + { + "feature": [0.0, 2.0], + "target": [3.0, 5.0], + }, + "target", + ) + ), + ( + TaggedTable( + { + "feature": [0.0, 1.0, 2.0], + "target": [3.0, 4.0, 5.0], + }, + "target", + ), + TaggedTable( + { + "feature": [0.0, 1.0, 2.0], + "target": [3.0, 4.0, 5.0], + }, + "target", + ) + ), + ], + ids=["with_missing_values", "without_missing_values"] +) +def test_should_remove_rows_with_missing_values(table: TaggedTable, expected: TaggedTable) -> None: new_table = table.remove_rows_with_missing_values() - expected = TaggedTable( - { - "feature": [0.0, 2.0], - "target": [3.0, 5.0], - }, - "target", - ) assert_that_tagged_tables_are_equal(new_table, expected) From b352d53bfe9cdd598da6da5564f811a1353dfca3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 7 Jul 2023 09:54:51 +0200 Subject: [PATCH 124/149] Update test_remove_duplicate_rows.py --- .../test_remove_duplicate_rows.py | 55 ++++++++++++++----- 1 file changed, 40 insertions(+), 15 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_duplicate_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_duplicate_rows.py index 4431bee34..00818edbc 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_duplicate_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_duplicate_rows.py @@ -1,22 +1,47 @@ +import pytest from safeds.data.tabular.containers import TaggedTable from tests.helpers import assert_that_tagged_tables_are_equal -def test_should_remove_row() -> None: - table = TaggedTable( - { - "feature": [0, 0, 1], - "target": [2, 2, 3], - }, - "target", - ) +@pytest.mark.parametrize( + ("table", "expected"), + [ + ( + TaggedTable( + { + "feature": [0, 0, 1], + "target": [2, 2, 3], + }, + "target", + ), + TaggedTable( + { + "feature": [0, 1], + "target": [2, 3], + }, + "target", + ) + ), + ( + TaggedTable( + { + "feature": [0, 1, 2], + "target": [2, 2, 3], + }, + "target", + ), + TaggedTable( + { + "feature": [0, 1, 2], + "target": [2, 2, 3], + }, + "target", + ) + ), + ], + ids=["with_duplicate_rows", "without_duplicate_rows"] +) +def test_should_remove_duplicate_rows(table: TaggedTable, expected: TaggedTable) -> None: new_table = table.remove_duplicate_rows() - expected = TaggedTable( - { - "feature": [0, 1], - "target": [2, 3], - }, - "target", - ) assert_that_tagged_tables_are_equal(new_table, expected) From 8ef1442938f5dc2710793322bba682e9d69a6dcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 7 Jul 2023 10:03:05 +0200 Subject: [PATCH 125/149] Parametrize non_numerical tests --- ...emove_columns_with_non_numerical_values.py | 64 ++++++++++++------- 1 file changed, 40 insertions(+), 24 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py index 3d6719746..e0a8e29ef 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py @@ -5,34 +5,50 @@ from tests.helpers import assert_that_tagged_tables_are_equal -def test_should_remove_column() -> None: - table = TaggedTable( - { - "feature_numerical": [0, 1, 2], - "feature_non_numerical": ["a", "b", "c"], - "target": [3, 4, 5], - }, - "target", - ) +@pytest.mark.parametrize( + ("table", "expected"), + [ + ( + TaggedTable( + { + "feature_numerical": [0, 1, 2], + "feature_non_numerical": ["a", "b", "c"], + "target": [3, 4, 5], + }, + "target", + ), + TaggedTable( + { + "feature_numerical": [0, 1, 2], + "target": [3, 4, 5], + }, + "target", + ) + ), + ], + ids=["with_non_numerical"] +) +def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, expected: TaggedTable) -> None: new_table = table.remove_columns_with_non_numerical_values() - expected = TaggedTable( - { - "feature_numerical": [0, 1, 2], - "target": [3, 4, 5], - }, - "target", - ) assert_that_tagged_tables_are_equal(new_table, expected) -def test_should_throw_column_is_target() -> None: - table = TaggedTable( - { - "feature": [0, 1, 2], - "target": ["a", "b", "c"], - }, - "target", - ) +@pytest.mark.parametrize( + "table", + [ + ( + TaggedTable( + { + "feature": [0, 1, 2], + "target": ["a", "b", "c"], + }, + "target", + ) + ), + ], + ids=["only_target_has_non_numerical"] +) +def test_should_throw_column_is_target(table: TaggedTable) -> None: with pytest.raises( ColumnIsTargetError, match='Illegal schema modification: Column "target" is the target column and cannot be removed.', From 6cffec3fe0b1c0bf341b72f93d60e5de60100d55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 7 Jul 2023 10:10:16 +0200 Subject: [PATCH 126/149] Add testcases for non_numerical tests --- ...emove_columns_with_non_numerical_values.py | 51 +++++++++++++++++-- 1 file changed, 48 insertions(+), 3 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py index e0a8e29ef..2d8d5f322 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py @@ -13,20 +13,65 @@ { "feature_numerical": [0, 1, 2], "feature_non_numerical": ["a", "b", "c"], + "non_feature_numerical": [7, 8, 9], "target": [3, 4, 5], }, "target", + ["feature_numerical", "feature_non_numerical"], ), TaggedTable( { "feature_numerical": [0, 1, 2], + "non_feature_numerical": [7, 8, 9], "target": [3, 4, 5], }, "target", - ) - ), + ["feature_numerical"], + ) + ), + ( + TaggedTable( + { + "feature_numerical": [0, 1, 2], + "non_feature_numerical": [7, 8, 9], + "non_feature_non_numerical": ["a", "b", "c"], + "target": [3, 4, 5], + }, + "target", + ["feature_numerical"], + ), + TaggedTable( + { + "feature_numerical": [0, 1, 2], + "non_feature_numerical": [7, 8, 9], + "target": [3, 4, 5], + }, + "target", + ["feature_numerical"] + ) + ), + ( + TaggedTable( + { + "feature_numerical": [0, 1, 2], + "non_feature_numerical": [7, 8, 9], + "target": [3, 4, 5], + }, + "target", + ["feature_numerical"], + ), + TaggedTable( + { + "feature_numerical": [0, 1, 2], + "non_feature_numerical": [7, 8, 9], + "target": [3, 4, 5], + }, + "target", + ["feature_numerical"] + ) + ), ], - ids=["with_non_numerical"] + ids=["non_numerical_feature", "non_numerical_non_feature", "all_numerical"] ) def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, expected: TaggedTable) -> None: new_table = table.remove_columns_with_non_numerical_values() From 9212532235b02a9ea3fc356ad1b257097b47f189 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 7 Jul 2023 10:13:17 +0200 Subject: [PATCH 127/149] Update error test for non_numerical --- ...emove_columns_with_non_numerical_values.py | 42 ++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py index 2d8d5f322..3841e7b7d 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py @@ -85,13 +85,53 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp TaggedTable( { "feature": [0, 1, 2], + "non_feature": [1, 2, 3], "target": ["a", "b", "c"], }, "target", + ["feature"], + ) + ), + ( + TaggedTable( + { + "feature": [0, "x", 2], + "non_feature": [1, 2, 3], + "target": ["a", "b", "c"], + }, + "target", + ["feature"], + ) + ), + ( + TaggedTable( + { + "feature": [0, 1, 2], + "non_feature": [1, "x", 3], + "target": ["a", "b", "c"], + }, + "target", + ["feature"], + ) + ), + ( + TaggedTable( + { + "feature": [0, "x", 2], + "non_feature": [1, "x", 3], + "target": ["a", "b", "c"], + }, + "target", + ["feature"], ) ), ], - ids=["only_target_has_non_numerical"] + ids=[ + "only_target_non_numerical", + "also_feature_non_numerical", + "also_non_feature_non_numerical", + "all_non_numerical", + ] ) def test_should_throw_column_is_target(table: TaggedTable) -> None: with pytest.raises( From 6ea0147fa8b15d12e94baca0707019ba8135c074 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 7 Jul 2023 10:21:01 +0200 Subject: [PATCH 128/149] Update test_remove_columns_with_missing_values.py --- ...test_remove_columns_with_missing_values.py | 149 +++++++++++++++--- 1 file changed, 125 insertions(+), 24 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py index 119a82d56..1ccd4f43a 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py @@ -5,34 +5,135 @@ from tests.helpers import assert_that_tagged_tables_are_equal -def test_should_remove_column() -> None: - table = TaggedTable( - { - "feature_complete": [0, 1, 2], - "feature_incomplete": [3, None, 5], - "target": [6, 7, 8], - }, - "target", - ) +@pytest.mark.parametrize( + ("table", "expected"), + [ + ( + TaggedTable( + { + "feature_complete": [0, 1, 2], + "feature_incomplete": [3, None, 5], + "non_feature_complete": [7, 8, 9], + "target": [3, 4, 5], + }, + "target", + ["feature_complete", "feature_incomplete"], + ), + TaggedTable( + { + "feature_complete": [0, 1, 2], + "non_feature_complete": [7, 8, 9], + "target": [3, 4, 5], + }, + "target", + ["feature_complete"], + ) + ), + ( + TaggedTable( + { + "feature_complete": [0, 1, 2], + "non_feature_complete": [7, 8, 9], + "non_feature_incomplete": [3, None, 5], + "target": [3, 4, 5], + }, + "target", + ["feature_complete"], + ), + TaggedTable( + { + "feature_complete": [0, 1, 2], + "non_feature_complete": [7, 8, 9], + "target": [3, 4, 5], + }, + "target", + ["feature_complete"] + ) + ), + ( + TaggedTable( + { + "feature_complete": [0, 1, 2], + "non_feature_complete": [7, 8, 9], + "target": [3, 4, 5], + }, + "target", + ["feature_complete"], + ), + TaggedTable( + { + "feature_complete": [0, 1, 2], + "non_feature_complete": [7, 8, 9], + "target": [3, 4, 5], + }, + "target", + ["feature_complete"] + ) + ), + ], + ids=["incomplete_feature", "incomplete_non_feature", "all_complete"] +) +def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, expected: TaggedTable) -> None: new_table = table.remove_columns_with_missing_values() - expected = TaggedTable( - { - "feature_complete": [0, 1, 2], - "target": [6, 7, 8], - }, - "target", - ) assert_that_tagged_tables_are_equal(new_table, expected) -def test_should_throw_column_is_target() -> None: - table = TaggedTable( - { - "feature": [0, 1, 2], - "target": [3, None, 5], - }, - "target", - ) +@pytest.mark.parametrize( + "table", + [ + ( + TaggedTable( + { + "feature": [0, 1, 2], + "non_feature": [1, 2, 3], + "target": [3, None, 5], + }, + "target", + ["feature"], + ) + ), + ( + TaggedTable( + { + "feature": [0, None, 2], + "non_feature": [1, 2, 3], + "target": [None, 4, 5], + }, + "target", + ["feature"], + ) + ), + ( + TaggedTable( + { + "feature": [0, 1, 2], + "non_feature": [1, None, 3], + "target": [3, 4, None], + }, + "target", + ["feature"], + ) + ), + ( + TaggedTable( + { + "feature": [0, None, 2], + "non_feature": [1, None, 3], + "target": [3, None, 5], + }, + "target", + ["feature"], + ) + ), + ], + ids=[ + "only_target_incomplete", + "also_feature_incomplete", + "also_non_feature_incomplete", + "all_incomplete", + ] +) +def test_should_throw_column_is_target(table: TaggedTable) -> None: with pytest.raises( ColumnIsTargetError, match='Illegal schema modification: Column "target" is the target column and cannot be removed.', From 70f9e9b79ab3ce84f4f91eaf866f43e114b7ae2e Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 7 Jul 2023 08:37:08 +0000 Subject: [PATCH 129/149] style: apply automated linter fixes --- .../test_remove_columns_with_missing_values.py | 14 +++++++------- ...est_remove_columns_with_non_numerical_values.py | 14 +++++++------- .../_tagged_table/test_remove_duplicate_rows.py | 6 +++--- .../test_remove_rows_with_missing_values.py | 8 ++++---- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py index 1ccd4f43a..4a7117ae4 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py @@ -27,7 +27,7 @@ }, "target", ["feature_complete"], - ) + ), ), ( TaggedTable( @@ -47,8 +47,8 @@ "target": [3, 4, 5], }, "target", - ["feature_complete"] - ) + ["feature_complete"], + ), ), ( TaggedTable( @@ -67,11 +67,11 @@ "target": [3, 4, 5], }, "target", - ["feature_complete"] - ) + ["feature_complete"], + ), ), ], - ids=["incomplete_feature", "incomplete_non_feature", "all_complete"] + ids=["incomplete_feature", "incomplete_non_feature", "all_complete"], ) def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, expected: TaggedTable) -> None: new_table = table.remove_columns_with_missing_values() @@ -131,7 +131,7 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp "also_feature_incomplete", "also_non_feature_incomplete", "all_incomplete", - ] + ], ) def test_should_throw_column_is_target(table: TaggedTable) -> None: with pytest.raises( diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py index 3841e7b7d..b374d0435 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py @@ -27,7 +27,7 @@ }, "target", ["feature_numerical"], - ) + ), ), ( TaggedTable( @@ -47,8 +47,8 @@ "target": [3, 4, 5], }, "target", - ["feature_numerical"] - ) + ["feature_numerical"], + ), ), ( TaggedTable( @@ -67,11 +67,11 @@ "target": [3, 4, 5], }, "target", - ["feature_numerical"] - ) + ["feature_numerical"], + ), ), ], - ids=["non_numerical_feature", "non_numerical_non_feature", "all_numerical"] + ids=["non_numerical_feature", "non_numerical_non_feature", "all_numerical"], ) def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, expected: TaggedTable) -> None: new_table = table.remove_columns_with_non_numerical_values() @@ -131,7 +131,7 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp "also_feature_non_numerical", "also_non_feature_non_numerical", "all_non_numerical", - ] + ], ) def test_should_throw_column_is_target(table: TaggedTable) -> None: with pytest.raises( diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_duplicate_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_duplicate_rows.py index 00818edbc..1cc6936e3 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_duplicate_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_duplicate_rows.py @@ -21,7 +21,7 @@ "target": [2, 3], }, "target", - ) + ), ), ( TaggedTable( @@ -37,10 +37,10 @@ "target": [2, 2, 3], }, "target", - ) + ), ), ], - ids=["with_duplicate_rows", "without_duplicate_rows"] + ids=["with_duplicate_rows", "without_duplicate_rows"], ) def test_should_remove_duplicate_rows(table: TaggedTable, expected: TaggedTable) -> None: new_table = table.remove_duplicate_rows() diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_missing_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_missing_values.py index 160bc14cf..2f22f1489 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_missing_values.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_rows_with_missing_values.py @@ -5,7 +5,7 @@ @pytest.mark.parametrize( - ("table" ,"expected"), + ("table", "expected"), [ ( TaggedTable( @@ -21,7 +21,7 @@ "target": [3.0, 5.0], }, "target", - ) + ), ), ( TaggedTable( @@ -37,10 +37,10 @@ "target": [3.0, 4.0, 5.0], }, "target", - ) + ), ), ], - ids=["with_missing_values", "without_missing_values"] + ids=["with_missing_values", "without_missing_values"], ) def test_should_remove_rows_with_missing_values(table: TaggedTable, expected: TaggedTable) -> None: new_table = table.remove_rows_with_missing_values() From 09bbadb6400e1ccceb2d54af8b4b420eddc7cfd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 7 Jul 2023 10:45:24 +0200 Subject: [PATCH 130/149] Add testcases for remove_columns --- .../_tagged_table/test_remove_columns.py | 146 +++++++++++++++++- 1 file changed, 142 insertions(+), 4 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py index 52ee725d6..7df8ae6ea 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py @@ -9,12 +9,150 @@ ("table", "columns", "expected"), [ ( - TaggedTable._from_table(Table({"feat1": [1, 2, 3], "feat2": [4, 5, 6], "target": [7, 8, 9]}), "target"), - ["feat2"], - TaggedTable._from_table(Table({"feat1": [1, 2, 3], "target": [7, 8, 9]}), "target"), + TaggedTable._from_table( + Table( + { + "feat_1": [1, 2, 3], + "feat_2": [4, 5, 6], + "non_feat_1": [2, 4, 6], + "non_feat_2": [3, 6, 9], + "target": [7, 8, 9] + } + ), + "target", + ["feat_1", "feat_2"], + ), + ["feat_2"], + TaggedTable._from_table( + Table( + { + "feat_1": [1, 2, 3], + "non_feat_1": [2, 4, 6], + "non_feat_2": [3, 6, 9], + "target": [7, 8, 9] + } + ), + "target", + ["feat_1"], + ), ), + ( + TaggedTable._from_table( + Table( + { + "feat_1": [1, 2, 3], + "feat_2": [4, 5, 6], + "non_feat_1": [2, 4, 6], + "non_feat_2": [3, 6, 9], + "target": [7, 8, 9] + } + ), + "target", + ["feat_1", "feat_2"], + ), + ["non_feat_2"], + TaggedTable._from_table( + Table( + { + "feat_1": [1, 2, 3], + "feat_2": [4, 5, 6], + "non_feat_1": [2, 4, 6], + "target": [7, 8, 9] + } + ), + "target", + ["feat_1", "feat_2"], + ), + ), + ( + TaggedTable._from_table( + Table( + { + "feat_1": [1, 2, 3], + "feat_2": [4, 5, 6], + "non_feat_1": [2, 4, 6], + "non_feat_2": [3, 6, 9], + "target": [7, 8, 9] + } + ), + "target", + ["feat_1", "feat_2"], + ), + ["non_feat_1", "non_feat_2"], + TaggedTable._from_table( + Table( + { + "feat_1": [1, 2, 3], + "feat_2": [4, 5, 6], + "target": [7, 8, 9] + } + ), + "target", + ["feat_1", "feat_2"], + ), + ), + ( + TaggedTable._from_table( + Table( + { + "feat_1": [1, 2, 3], + "feat_2": [4, 5, 6], + "non_feat_1": [2, 4, 6], + "non_feat_2": [3, 6, 9], + "target": [7, 8, 9] + } + ), + "target", + ["feat_1", "feat_2"], + ), + ["feat_2", "non_feat_2"], + TaggedTable._from_table( + Table( + { + "feat_1": [1, 2, 3], + "non_feat_1": [2, 4, 6], + "target": [7, 8, 9] + } + ), + "target", + ["feat_1"], + ), + ), + ( + TaggedTable._from_table( + Table( + { + "feat_1": [1, 2, 3], + "feat_2": [4, 5, 6], + "non_feat_1": [2, 4, 6], + "non_feat_2": [3, 6, 9], + "target": [7, 8, 9] + } + ), + "target", + ["feat_1", "feat_2"], + ), + ["feat_2", "non_feat_2"], + TaggedTable._from_table( + Table( + { + "feat_1": [1, 2, 3], + "non_feat_1": [2, 4, 6], + "target": [7, 8, 9] + } + ), + "target", + ["feat_1"], + ), + ), + ], + ids=[ + "remove_feature", + "remove_non_feature", + "remove_all_non_features", + "remove_some_feat_and_some_non_feat", + "remove_nothing" ], - ids=["only_features_remove_feature"], ) def test_should_remove_columns(table: TaggedTable, columns: list[str], expected: TaggedTable) -> None: new_table = table.remove_columns(columns) From aecf49ac9b78b8c7186be8fea7218df6519c5a5c Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 7 Jul 2023 08:47:33 +0000 Subject: [PATCH 131/149] style: apply automated linter fixes --- .../_tagged_table/test_remove_columns.py | 64 +++++-------------- 1 file changed, 16 insertions(+), 48 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py index 7df8ae6ea..36bc225b0 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py @@ -16,22 +16,15 @@ "feat_2": [4, 5, 6], "non_feat_1": [2, 4, 6], "non_feat_2": [3, 6, 9], - "target": [7, 8, 9] - } + "target": [7, 8, 9], + }, ), "target", ["feat_1", "feat_2"], ), ["feat_2"], TaggedTable._from_table( - Table( - { - "feat_1": [1, 2, 3], - "non_feat_1": [2, 4, 6], - "non_feat_2": [3, 6, 9], - "target": [7, 8, 9] - } - ), + Table({"feat_1": [1, 2, 3], "non_feat_1": [2, 4, 6], "non_feat_2": [3, 6, 9], "target": [7, 8, 9]}), "target", ["feat_1"], ), @@ -44,22 +37,15 @@ "feat_2": [4, 5, 6], "non_feat_1": [2, 4, 6], "non_feat_2": [3, 6, 9], - "target": [7, 8, 9] - } + "target": [7, 8, 9], + }, ), "target", ["feat_1", "feat_2"], ), ["non_feat_2"], TaggedTable._from_table( - Table( - { - "feat_1": [1, 2, 3], - "feat_2": [4, 5, 6], - "non_feat_1": [2, 4, 6], - "target": [7, 8, 9] - } - ), + Table({"feat_1": [1, 2, 3], "feat_2": [4, 5, 6], "non_feat_1": [2, 4, 6], "target": [7, 8, 9]}), "target", ["feat_1", "feat_2"], ), @@ -72,21 +58,15 @@ "feat_2": [4, 5, 6], "non_feat_1": [2, 4, 6], "non_feat_2": [3, 6, 9], - "target": [7, 8, 9] - } + "target": [7, 8, 9], + }, ), "target", ["feat_1", "feat_2"], ), ["non_feat_1", "non_feat_2"], TaggedTable._from_table( - Table( - { - "feat_1": [1, 2, 3], - "feat_2": [4, 5, 6], - "target": [7, 8, 9] - } - ), + Table({"feat_1": [1, 2, 3], "feat_2": [4, 5, 6], "target": [7, 8, 9]}), "target", ["feat_1", "feat_2"], ), @@ -99,21 +79,15 @@ "feat_2": [4, 5, 6], "non_feat_1": [2, 4, 6], "non_feat_2": [3, 6, 9], - "target": [7, 8, 9] - } + "target": [7, 8, 9], + }, ), "target", ["feat_1", "feat_2"], ), ["feat_2", "non_feat_2"], TaggedTable._from_table( - Table( - { - "feat_1": [1, 2, 3], - "non_feat_1": [2, 4, 6], - "target": [7, 8, 9] - } - ), + Table({"feat_1": [1, 2, 3], "non_feat_1": [2, 4, 6], "target": [7, 8, 9]}), "target", ["feat_1"], ), @@ -126,21 +100,15 @@ "feat_2": [4, 5, 6], "non_feat_1": [2, 4, 6], "non_feat_2": [3, 6, 9], - "target": [7, 8, 9] - } + "target": [7, 8, 9], + }, ), "target", ["feat_1", "feat_2"], ), ["feat_2", "non_feat_2"], TaggedTable._from_table( - Table( - { - "feat_1": [1, 2, 3], - "non_feat_1": [2, 4, 6], - "target": [7, 8, 9] - } - ), + Table({"feat_1": [1, 2, 3], "non_feat_1": [2, 4, 6], "target": [7, 8, 9]}), "target", ["feat_1"], ), @@ -151,7 +119,7 @@ "remove_non_feature", "remove_all_non_features", "remove_some_feat_and_some_non_feat", - "remove_nothing" + "remove_nothing", ], ) def test_should_remove_columns(table: TaggedTable, columns: list[str], expected: TaggedTable) -> None: From 03efb3c58c71eb279eba687223b2b3babace090b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 7 Jul 2023 10:49:00 +0200 Subject: [PATCH 132/149] Add testcases for error in remove_columns --- .../_tagged_table/test_remove_columns.py | 33 +++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py index 7df8ae6ea..40a78c9a2 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py @@ -161,8 +161,37 @@ def test_should_remove_columns(table: TaggedTable, columns: list[str], expected: @pytest.mark.parametrize( ("table", "columns"), - [(TaggedTable._from_table(Table({"feat": [1, 2, 3], "target": [4, 5, 6]}), "target"), ["target"])], - ids=["only_features_and_target"], + [ + ( + TaggedTable._from_table( + Table( + { + "feat": [1, 2, 3], + "non_feat": [1, 2, 3], + "target": [4, 5, 6] + } + ), + "target", + ["feat"], + ), + ["target"], + ), + ( + TaggedTable._from_table( + Table( + { + "feat": [1, 2, 3], + "non_feat": [1, 2, 3], + "target": [4, 5, 6] + } + ), + "target", + ["feat"], + ), + ["non_feat", "target"], + ), + ], + ids=["remove_only_target", "remove_non_feat_and_target"], ) def test_should_raise_column_is_target_error(table: TaggedTable, columns: list[str]) -> None: with pytest.raises( From 8e7f3658a6d8c89c187620ce706eb1b6a86929c6 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 7 Jul 2023 08:51:07 +0000 Subject: [PATCH 133/149] style: apply automated linter fixes --- .../_table/_tagged_table/test_remove_columns.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py index 48a36d966..5b24b9a2c 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py @@ -132,13 +132,7 @@ def test_should_remove_columns(table: TaggedTable, columns: list[str], expected: [ ( TaggedTable._from_table( - Table( - { - "feat": [1, 2, 3], - "non_feat": [1, 2, 3], - "target": [4, 5, 6] - } - ), + Table({"feat": [1, 2, 3], "non_feat": [1, 2, 3], "target": [4, 5, 6]}), "target", ["feat"], ), @@ -146,13 +140,7 @@ def test_should_remove_columns(table: TaggedTable, columns: list[str], expected: ), ( TaggedTable._from_table( - Table( - { - "feat": [1, 2, 3], - "non_feat": [1, 2, 3], - "target": [4, 5, 6] - } - ), + Table({"feat": [1, 2, 3], "non_feat": [1, 2, 3], "target": [4, 5, 6]}), "target", ["feat"], ), From bffa9748008cbad7bbbdb64478296ddc17d621dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 7 Jul 2023 10:59:55 +0200 Subject: [PATCH 134/149] Fix empty list case in test_remove_columns.py --- .../_table/_tagged_table/test_remove_columns.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py index 48a36d966..f29094880 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py @@ -97,18 +97,22 @@ Table( { "feat_1": [1, 2, 3], - "feat_2": [4, 5, 6], "non_feat_1": [2, 4, 6], - "non_feat_2": [3, 6, 9], "target": [7, 8, 9], }, ), "target", - ["feat_1", "feat_2"], + ["feat_1"], ), - ["feat_2", "non_feat_2"], + [], TaggedTable._from_table( - Table({"feat_1": [1, 2, 3], "non_feat_1": [2, 4, 6], "target": [7, 8, 9]}), + Table( + { + "feat_1": [1, 2, 3], + "non_feat_1": [2, 4, 6], + "target": [7, 8, 9] + } + ), "target", ["feat_1"], ), From 7fb14a873ddcb38970ad05ae2aaab72406223d8b Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 7 Jul 2023 09:04:03 +0000 Subject: [PATCH 135/149] style: apply automated linter fixes --- .../_table/_tagged_table/test_remove_columns.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py index 5767ac075..7d48095d5 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py @@ -106,13 +106,7 @@ ), [], TaggedTable._from_table( - Table( - { - "feat_1": [1, 2, 3], - "non_feat_1": [2, 4, 6], - "target": [7, 8, 9] - } - ), + Table({"feat_1": [1, 2, 3], "non_feat_1": [2, 4, 6], "target": [7, 8, 9]}), "target", ["feat_1"], ), From af7a716033feed5dbe6244df7511e88e36c711f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Fri, 7 Jul 2023 11:04:38 +0200 Subject: [PATCH 136/149] test: added parametrization to tests for `TaggedTable` and finished the first tests fix: Fixed bug for `TaggedTable.filter_rows` feat: Improved error messages for `Table.keep_only_columns` --- .../data/tabular/containers/_tagged_table.py | 8 +- .../_table/_tagged_table/test_add_row.py | 53 ++++---- .../_table/_tagged_table/test_add_rows.py | 53 ++++---- .../_table/_tagged_table/test_as_table.py | 63 +++++++--- .../_table/_tagged_table/test_copy.py | 19 +++ .../_table/_tagged_table/test_filter_rows.py | 117 +++++++++++++++--- .../_table/_tagged_table/test_from_table.py | 29 ++++- .../_table/_tagged_table/test_init.py | 27 +++- .../_tagged_table/test_keep_only_columns.py | 75 ++++++++++- .../tabular/containers/test_tagged_table.py | 17 --- 10 files changed, 349 insertions(+), 112 deletions(-) create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_copy.py delete mode 100644 tests/safeds/data/tabular/containers/test_tagged_table.py diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 841081bea..c4422f1ee 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -379,7 +379,7 @@ def filter_rows(self, query: Callable[[Row], bool]) -> TaggedTable: table : TaggedTable A table containing only the rows filtered by the query. """ - return TaggedTable._from_table(super().filter_rows(query), target_name=self.target.name) + return TaggedTable._from_table(super().filter_rows(query), target_name=self.target.name, feature_names=self.features.column_names) def keep_only_columns(self, column_names: list[str]) -> TaggedTable: """ @@ -402,10 +402,12 @@ def keep_only_columns(self, column_names: list[str]) -> TaggedTable: UnknownColumnNameError If any of the given columns does not exist. IllegalSchemaModificationError - If none of the given columns is the target column. + If none of the given columns is the target column or any of the feature columns. """ if self.target.name not in column_names: - raise IllegalSchemaModificationError("Must keep target column and at least one feature column.") + raise IllegalSchemaModificationError("Must keep the target column.") + if len(set(self.features.column_names).intersection(set(column_names))) == 0: + raise IllegalSchemaModificationError("Must keep at least one feature column.") table = super().keep_only_columns(column_names) return TaggedTable._from_table( table, diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py index 1c5012955..95ea8cceb 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py @@ -1,28 +1,35 @@ +import pytest + from safeds.data.tabular.containers import Row, TaggedTable from tests.helpers import assert_that_tagged_tables_are_equal -def test_should_add_row() -> None: - table = TaggedTable( - { - "feature": [0, 1], - "target": [3, 4], - }, - "target", - ) - row = Row( - { - "feature": 2, - "target": 5, - }, - ) - new_table = table.add_row(row) - expected = TaggedTable( - { - "feature": [0, 1, 2], - "target": [3, 4, 5], - }, - "target", - ) - assert_that_tagged_tables_are_equal(new_table, expected) +@pytest.mark.parametrize( + ("table", "row", "expected"), + [( + TaggedTable( + { + "feature": [0, 1], + "target": [3, 4], + }, + "target", + ), + Row( + { + "feature": 2, + "target": 5, + }, + ), + TaggedTable( + { + "feature": [0, 1, 2], + "target": [3, 4, 5], + }, + "target", + ) + )], + ids=["add_row"] +) +def test_should_add_row(table: TaggedTable, row: Row, expected: TaggedTable) -> None: + assert_that_tagged_tables_are_equal(table.add_row(row), expected) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py index fea52761f..ab403c64d 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py @@ -1,31 +1,38 @@ +import pytest + from safeds.data.tabular.containers import Row, TaggedTable from tests.helpers import assert_that_tagged_tables_are_equal -def test_should_add_rows() -> None: - table = TaggedTable( - { - "feature": [0, 1], - "target": [4, 5], - }, - "target", - ) - rows = [ - Row( +@pytest.mark.parametrize( + ("table", "rows", "expected"), + [( + TaggedTable( { - "feature": 2, - "target": 6, + "feature": [0, 1], + "target": [4, 5], }, + "target", ), - Row({"feature": 3, "target": 7}), - ] - new_table = table.add_rows(rows) - expected = TaggedTable( - { - "feature": [0, 1, 2, 3], - "target": [4, 5, 6, 7], - }, - "target", - ) - assert_that_tagged_tables_are_equal(new_table, expected) + [ + Row( + { + "feature": 2, + "target": 6, + }, + ), + Row({"feature": 3, "target": 7}), + ], + TaggedTable( + { + "feature": [0, 1, 2, 3], + "target": [4, 5, 6, 7], + }, + "target", + ) + )], + ids=["add_rows"] +) +def test_should_add_rows(table: TaggedTable, rows: list[Row], expected: TaggedTable) -> None: + assert_that_tagged_tables_are_equal(table.add_rows(rows), expected) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_as_table.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_as_table.py index a1e255135..c979301f4 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_as_table.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_as_table.py @@ -1,22 +1,53 @@ +import pytest + from safeds.data.tabular.containers import Table, TaggedTable -def test_should_return_table() -> None: - tagged_table = TaggedTable( - { - "feature_1": [3, 9, 6], - "feature_2": [6, 12, 9], - "target": [1, 3, 2], - }, - "target", - ) - expected = Table( - { - "feature_1": [3, 9, 6], - "feature_2": [6, 12, 9], - "target": [1, 3, 2], - }, - ) +@pytest.mark.parametrize( + ("tagged_table", "expected"), + [ + ( + TaggedTable( + { + "feature_1": [3, 9, 6], + "feature_2": [6, 12, 9], + "target": [1, 3, 2], + }, + "target", + ["feature_1", "feature_2"] + ), + Table( + { + "feature_1": [3, 9, 6], + "feature_2": [6, 12, 9], + "target": [1, 3, 2], + }, + ) + ), + ( + TaggedTable( + { + "feature_1": [3, 9, 6], + "feature_2": [6, 12, 9], + "other": [3, 9, 12], + "target": [1, 3, 2], + }, + "target", + ["feature_1", "feature_2"] + ), + Table( + { + "feature_1": [3, 9, 6], + "feature_2": [6, 12, 9], + "other": [3, 9, 12], + "target": [1, 3, 2], + }, + ) + ) + ], + ids=["normal", "table_with_column_as_non_feature"] +) +def test_should_return_table(tagged_table: TaggedTable, expected: Table) -> None: table = tagged_table._as_table() assert table.schema == expected.schema assert table == expected diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_copy.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_copy.py new file mode 100644 index 000000000..43bfcd943 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_copy.py @@ -0,0 +1,19 @@ +import pytest + +from safeds.data.tabular.containers import TaggedTable + + +@pytest.mark.parametrize( + "tagged_table", + [ + TaggedTable({"a": [], "b": []}, target_name="b", feature_names=["a"]), + TaggedTable({"a": ["a", 3, 0.1], "b": [True, False, None]}, target_name="b", feature_names=["a"]), + TaggedTable({"a": ["a", 3, 0.1], "b": [True, False, None], "c": ["a", "b", "c"]}, target_name="b", feature_names=["a"]), + TaggedTable({"a": [], "b": [], "c": []}, target_name="b", feature_names=["a"]), + ], + ids=["empty-rows", "normal", "column_as_non_feature", "column_as_non_feature_with_empty_rows"], +) +def test_should_copy_tagged_table(tagged_table: TaggedTable) -> None: + copied = tagged_table._copy() + assert copied == tagged_table + assert copied is not tagged_table diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_filter_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_filter_rows.py index 8640d2acb..1bb13a4a4 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_filter_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_filter_rows.py @@ -1,24 +1,101 @@ -from safeds.data.tabular.containers import TaggedTable +from typing import Callable + +import pytest + +from safeds.data.tabular.containers import TaggedTable, Row from tests.helpers import assert_that_tagged_tables_are_equal -def test_should_remove_row() -> None: - table = TaggedTable( - { - "feature_1": [3, 9, 6], - "feature_2": [6, 12, 9], - "target": [1, 3, 2], - }, - "target", - ) - new_table = table.filter_rows(lambda row: all(row.get_value(col) < 10 for col in table.column_names)) - expected = TaggedTable( - { - "feature_1": [3, 6], - "feature_2": [6, 9], - "target": [1, 2], - }, - "target", - ) - assert_that_tagged_tables_are_equal(new_table, expected) +@pytest.mark.parametrize( + ("table", "expected", "query"), + [ + ( + TaggedTable( + { + "feature_1": [3, 9, 6], + "feature_2": [6, 12, 9], + "target": [1, 3, 2], + }, + "target", + ), + TaggedTable( + { + "feature_1": [3, 6], + "feature_2": [6, 9], + "target": [1, 2], + }, + "target", + ), + lambda row: all(row.get_value(col) < 10 for col in row.column_names) + ), + ( + TaggedTable( + { + "feature_1": [3, 9, 6, 2], + "feature_2": [6, 12, 9, 3], + "other": [1, 2, 3, 10], + "target": [1, 3, 2, 4], + }, + "target", + ["feature_1", "feature_2"] + ), + TaggedTable( + { + "feature_1": [3, 6], + "feature_2": [6, 9], + "other": [1, 3], + "target": [1, 2], + }, + "target", + ["feature_1", "feature_2"] + ), + lambda row: all(row.get_value(col) < 10 for col in row.column_names) + ), +( + TaggedTable( + { + "feature_1": [3, 9, 6], + "feature_2": [6, 12, 9], + "target": [1, 3, 2], + }, + "target", + ), + TaggedTable( + { + "feature_1": [3, 9, 6], + "feature_2": [6, 12, 9], + "target": [1, 3, 2], + }, + "target", + ), + lambda row: all(row.get_value(col) < 20 for col in row.column_names) + ), + ( + TaggedTable( + { + "feature_1": [3, 9, 6, 2], + "feature_2": [6, 12, 9, 3], + "other": [1, 2, 3, 10], + "target": [1, 3, 2, 4], + }, + "target", + ["feature_1", "feature_2"] + ), + TaggedTable( + { + "feature_1": [3, 9, 6, 2], + "feature_2": [6, 12, 9, 3], + "other": [1, 2, 3, 10], + "target": [1, 3, 2, 4], + }, + "target", + ["feature_1", "feature_2"] + ), + lambda row: all(row.get_value(col) < 20 for col in row.column_names) + ) + ], + ids=["remove_rows_with_values_greater_9", "remove_rows_with_values_greater_9_non_feature_columns", "remove_no_rows", "remove_no_rows_non_feature_columns"] +) +def test_should_filter_rows(table: TaggedTable, expected: TaggedTable, query: Callable[[Row], bool]) -> None: + assert_that_tagged_tables_are_equal(table.filter_rows(query), expected) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_from_table.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_from_table.py index b15b5a117..fc99ff58d 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_from_table.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_from_table.py @@ -108,11 +108,36 @@ def test_should_raise_error( "T", ["A", "B", "C"], ), + ( + Table( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "T", + ["A", "C"], + ), + ( + Table( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "T", + None, + ), ], - ids=["create_tagged_table"], + ids=["create_tagged_table", "tagged_table_not_all_columns_are_features", "tagged_table_with_feature_names_as_None"], ) -def test_should_create_a_tagged_table(table: Table, target_name: str, feature_names: list[str]) -> None: +def test_should_create_a_tagged_table(table: Table, target_name: str, feature_names: list[str] | None) -> None: tagged_table = TaggedTable._from_table(table, target_name=target_name, feature_names=feature_names) + feature_names = feature_names if feature_names is not None else table.remove_columns([target_name]).column_names assert isinstance(tagged_table, TaggedTable) assert tagged_table._features.column_names == feature_names assert tagged_table._target.name == target_name diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py index 071bb0ffb..12306c187 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py @@ -96,11 +96,34 @@ def test_should_raise_error( "T", ["A", "B", "C"], ), + ( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + "T", + ["A", "C"], + ), + ( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + "T", + None, + ), ], - ids=["create_tagged_table"], + ids=["create_tagged_table", "tagged_table_not_all_columns_are_features", "tagged_table_with_feature_names_as_None"], ) -def test_should_create_a_tagged_table(data: dict[str, list[int]], target_name: str, feature_names: list[str]) -> None: +def test_should_create_a_tagged_table(data: dict[str, list[int]], target_name: str, feature_names: list[str] | None) -> None: tagged_table = TaggedTable(data, target_name=target_name, feature_names=feature_names) + if feature_names is None: + feature_names = list(data.keys()) + feature_names.remove(target_name) assert isinstance(tagged_table, TaggedTable) assert tagged_table._features.column_names == feature_names assert tagged_table._target.name == target_name diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py index a51790e3e..0f44ab48b 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py @@ -30,8 +30,55 @@ "target", ), ), + ( + TaggedTable._from_table( + Table( + { + "feat1": [1, 2, 3], + "feat2": [4, 5, 6], + "other": [3, 4, 5], + "target": [7, 8, 9], + }, + ), + "target", + ), + ["feat1", "other", "target"], + TaggedTable._from_table( + Table( + { + "feat1": [1, 2, 3], + "other": [3, 4, 5], + "target": [7, 8, 9], + }, + ), + "target", + ), + ), + ( + TaggedTable._from_table( + Table( + { + "feat1": [1, 2, 3], + "feat2": [4, 5, 6], + "other": [3, 4, 5], + "target": [7, 8, 9], + }, + ), + "target", + ), + ["feat1", "target"], + TaggedTable._from_table( + Table( + { + "feat1": [1, 2, 3], + "target": [7, 8, 9], + }, + ), + "target", + ), + ), ], - ids=["table"], + ids=["table", "table_keep_non_feature_column", "table_not_keep_non_feature_column"], ) def test_should_return_table(table: TaggedTable, column_names: list[str], expected: TaggedTable) -> None: new_table = table.keep_only_columns(column_names) @@ -39,7 +86,7 @@ def test_should_return_table(table: TaggedTable, column_names: list[str], expect @pytest.mark.parametrize( - ("table", "column_names"), + ("table", "column_names", "error_msg"), [ ( TaggedTable._from_table( @@ -47,19 +94,35 @@ def test_should_return_table(table: TaggedTable, column_names: list[str], expect { "feat1": [1, 2, 3], "feat2": [4, 5, 6], + "other": [3, 5, 7], "target": [7, 8, 9], }, ), - "target", + "target", ["feat1", "feat2"] ), ["feat1", "feat2"], + r"Illegal schema modification: Must keep the target column." + ),( + TaggedTable._from_table( + Table( + { + "feat1": [1, 2, 3], + "feat2": [4, 5, 6], + "other": [3, 5, 7], + "target": [7, 8, 9], + }, + ), + "target", ["feat1", "feat2"] + ), + ["target", "other"], + r"Illegal schema modification: Must keep at least one feature column." ), ], - ids=["table"], + ids=["table_remove_target", "table_remove_all_features"], ) -def test_should_raise_illegal_schema_modification(table: TaggedTable, column_names: list[str]) -> None: +def test_should_raise_illegal_schema_modification(table: TaggedTable, column_names: list[str], error_msg: str) -> None: with pytest.raises( IllegalSchemaModificationError, - match="Must keep target column and at least one feature column.", + match=error_msg, ): table.keep_only_columns(column_names) diff --git a/tests/safeds/data/tabular/containers/test_tagged_table.py b/tests/safeds/data/tabular/containers/test_tagged_table.py deleted file mode 100644 index 5287e5a00..000000000 --- a/tests/safeds/data/tabular/containers/test_tagged_table.py +++ /dev/null @@ -1,17 +0,0 @@ -import pytest -from safeds.data.tabular.containers import TaggedTable - - -class TestCopy: - @pytest.mark.parametrize( - "tagged_table", - [ - TaggedTable({"a": [], "b": []}, target_name="b", feature_names=["a"]), - TaggedTable({"a": ["a", 3, 0.1], "b": [True, False, None]}, target_name="b", feature_names=["a"]), - ], - ids=["empty-rows", "normal"], - ) - def test_should_copy_tagged_table(self, tagged_table: TaggedTable) -> None: - copied = tagged_table._copy() - assert copied == tagged_table - assert copied is not tagged_table From afbefab2fc232b8be7ee5a1bd2531ff275ca3005 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 7 Jul 2023 09:07:34 +0000 Subject: [PATCH 137/149] style: apply automated linter fixes --- .../data/tabular/containers/_tagged_table.py | 4 +- .../_table/_tagged_table/test_add_row.py | 47 +++++++++--------- .../_table/_tagged_table/test_add_rows.py | 49 ++++++++++--------- .../_table/_tagged_table/test_as_table.py | 13 +++-- .../_table/_tagged_table/test_copy.py | 5 +- .../_table/_tagged_table/test_filter_rows.py | 32 ++++++------ .../_table/_tagged_table/test_init.py | 4 +- .../_tagged_table/test_keep_only_columns.py | 13 +++-- 8 files changed, 90 insertions(+), 77 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index c4422f1ee..2ee75f9e2 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -379,7 +379,9 @@ def filter_rows(self, query: Callable[[Row], bool]) -> TaggedTable: table : TaggedTable A table containing only the rows filtered by the query. """ - return TaggedTable._from_table(super().filter_rows(query), target_name=self.target.name, feature_names=self.features.column_names) + return TaggedTable._from_table( + super().filter_rows(query), target_name=self.target.name, feature_names=self.features.column_names, + ) def keep_only_columns(self, column_names: list[str]) -> TaggedTable: """ diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py index 95ea8cceb..2badeec11 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py @@ -1,5 +1,4 @@ import pytest - from safeds.data.tabular.containers import Row, TaggedTable from tests.helpers import assert_that_tagged_tables_are_equal @@ -7,29 +6,31 @@ @pytest.mark.parametrize( ("table", "row", "expected"), - [( - TaggedTable( - { - "feature": [0, 1], - "target": [3, 4], - }, - "target", - ), - Row( - { - "feature": 2, - "target": 5, - }, + [ + ( + TaggedTable( + { + "feature": [0, 1], + "target": [3, 4], + }, + "target", + ), + Row( + { + "feature": 2, + "target": 5, + }, + ), + TaggedTable( + { + "feature": [0, 1, 2], + "target": [3, 4, 5], + }, + "target", + ), ), - TaggedTable( - { - "feature": [0, 1, 2], - "target": [3, 4, 5], - }, - "target", - ) - )], - ids=["add_row"] + ], + ids=["add_row"], ) def test_should_add_row(table: TaggedTable, row: Row, expected: TaggedTable) -> None: assert_that_tagged_tables_are_equal(table.add_row(row), expected) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py index ab403c64d..da8c37a5a 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py @@ -1,5 +1,4 @@ import pytest - from safeds.data.tabular.containers import Row, TaggedTable from tests.helpers import assert_that_tagged_tables_are_equal @@ -7,32 +6,34 @@ @pytest.mark.parametrize( ("table", "rows", "expected"), - [( - TaggedTable( - { - "feature": [0, 1], - "target": [4, 5], - }, - "target", - ), - [ - Row( + [ + ( + TaggedTable( + { + "feature": [0, 1], + "target": [4, 5], + }, + "target", + ), + [ + Row( + { + "feature": 2, + "target": 6, + }, + ), + Row({"feature": 3, "target": 7}), + ], + TaggedTable( { - "feature": 2, - "target": 6, + "feature": [0, 1, 2, 3], + "target": [4, 5, 6, 7], }, + "target", ), - Row({"feature": 3, "target": 7}), - ], - TaggedTable( - { - "feature": [0, 1, 2, 3], - "target": [4, 5, 6, 7], - }, - "target", - ) - )], - ids=["add_rows"] + ), + ], + ids=["add_rows"], ) def test_should_add_rows(table: TaggedTable, rows: list[Row], expected: TaggedTable) -> None: assert_that_tagged_tables_are_equal(table.add_rows(rows), expected) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_as_table.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_as_table.py index c979301f4..6c9480671 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_as_table.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_as_table.py @@ -1,5 +1,4 @@ import pytest - from safeds.data.tabular.containers import Table, TaggedTable @@ -14,7 +13,7 @@ "target": [1, 3, 2], }, "target", - ["feature_1", "feature_2"] + ["feature_1", "feature_2"], ), Table( { @@ -22,7 +21,7 @@ "feature_2": [6, 12, 9], "target": [1, 3, 2], }, - ) + ), ), ( TaggedTable( @@ -33,7 +32,7 @@ "target": [1, 3, 2], }, "target", - ["feature_1", "feature_2"] + ["feature_1", "feature_2"], ), Table( { @@ -42,10 +41,10 @@ "other": [3, 9, 12], "target": [1, 3, 2], }, - ) - ) + ), + ), ], - ids=["normal", "table_with_column_as_non_feature"] + ids=["normal", "table_with_column_as_non_feature"], ) def test_should_return_table(tagged_table: TaggedTable, expected: Table) -> None: table = tagged_table._as_table() diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_copy.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_copy.py index 43bfcd943..2ce92ce7f 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_copy.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_copy.py @@ -1,5 +1,4 @@ import pytest - from safeds.data.tabular.containers import TaggedTable @@ -8,7 +7,9 @@ [ TaggedTable({"a": [], "b": []}, target_name="b", feature_names=["a"]), TaggedTable({"a": ["a", 3, 0.1], "b": [True, False, None]}, target_name="b", feature_names=["a"]), - TaggedTable({"a": ["a", 3, 0.1], "b": [True, False, None], "c": ["a", "b", "c"]}, target_name="b", feature_names=["a"]), + TaggedTable( + {"a": ["a", 3, 0.1], "b": [True, False, None], "c": ["a", "b", "c"]}, target_name="b", feature_names=["a"], + ), TaggedTable({"a": [], "b": [], "c": []}, target_name="b", feature_names=["a"]), ], ids=["empty-rows", "normal", "column_as_non_feature", "column_as_non_feature_with_empty_rows"], diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_filter_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_filter_rows.py index 1bb13a4a4..98ba1bba6 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_filter_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_filter_rows.py @@ -1,8 +1,7 @@ -from typing import Callable +from collections.abc import Callable import pytest - -from safeds.data.tabular.containers import TaggedTable, Row +from safeds.data.tabular.containers import Row, TaggedTable from tests.helpers import assert_that_tagged_tables_are_equal @@ -27,7 +26,7 @@ }, "target", ), - lambda row: all(row.get_value(col) < 10 for col in row.column_names) + lambda row: all(row.get_value(col) < 10 for col in row.column_names), ), ( TaggedTable( @@ -38,7 +37,7 @@ "target": [1, 3, 2, 4], }, "target", - ["feature_1", "feature_2"] + ["feature_1", "feature_2"], ), TaggedTable( { @@ -48,11 +47,11 @@ "target": [1, 2], }, "target", - ["feature_1", "feature_2"] + ["feature_1", "feature_2"], ), - lambda row: all(row.get_value(col) < 10 for col in row.column_names) + lambda row: all(row.get_value(col) < 10 for col in row.column_names), ), -( + ( TaggedTable( { "feature_1": [3, 9, 6], @@ -69,7 +68,7 @@ }, "target", ), - lambda row: all(row.get_value(col) < 20 for col in row.column_names) + lambda row: all(row.get_value(col) < 20 for col in row.column_names), ), ( TaggedTable( @@ -80,7 +79,7 @@ "target": [1, 3, 2, 4], }, "target", - ["feature_1", "feature_2"] + ["feature_1", "feature_2"], ), TaggedTable( { @@ -90,12 +89,17 @@ "target": [1, 3, 2, 4], }, "target", - ["feature_1", "feature_2"] + ["feature_1", "feature_2"], ), - lambda row: all(row.get_value(col) < 20 for col in row.column_names) - ) + lambda row: all(row.get_value(col) < 20 for col in row.column_names), + ), + ], + ids=[ + "remove_rows_with_values_greater_9", + "remove_rows_with_values_greater_9_non_feature_columns", + "remove_no_rows", + "remove_no_rows_non_feature_columns", ], - ids=["remove_rows_with_values_greater_9", "remove_rows_with_values_greater_9_non_feature_columns", "remove_no_rows", "remove_no_rows_non_feature_columns"] ) def test_should_filter_rows(table: TaggedTable, expected: TaggedTable, query: Callable[[Row], bool]) -> None: assert_that_tagged_tables_are_equal(table.filter_rows(query), expected) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py index 12306c187..7dea7eb96 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py @@ -119,7 +119,9 @@ def test_should_raise_error( ], ids=["create_tagged_table", "tagged_table_not_all_columns_are_features", "tagged_table_with_feature_names_as_None"], ) -def test_should_create_a_tagged_table(data: dict[str, list[int]], target_name: str, feature_names: list[str] | None) -> None: +def test_should_create_a_tagged_table( + data: dict[str, list[int]], target_name: str, feature_names: list[str] | None, +) -> None: tagged_table = TaggedTable(data, target_name=target_name, feature_names=feature_names) if feature_names is None: feature_names = list(data.keys()) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py index 0f44ab48b..1a7fbd37c 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py @@ -98,11 +98,13 @@ def test_should_return_table(table: TaggedTable, column_names: list[str], expect "target": [7, 8, 9], }, ), - "target", ["feat1", "feat2"] + "target", + ["feat1", "feat2"], ), ["feat1", "feat2"], - r"Illegal schema modification: Must keep the target column." - ),( + r"Illegal schema modification: Must keep the target column.", + ), + ( TaggedTable._from_table( Table( { @@ -112,10 +114,11 @@ def test_should_return_table(table: TaggedTable, column_names: list[str], expect "target": [7, 8, 9], }, ), - "target", ["feat1", "feat2"] + "target", + ["feat1", "feat2"], ), ["target", "other"], - r"Illegal schema modification: Must keep at least one feature column." + r"Illegal schema modification: Must keep at least one feature column.", ), ], ids=["table_remove_target", "table_remove_all_features"], From 3c6feb0bb903d932f62e83fd9244e6f255d94336 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 7 Jul 2023 09:09:17 +0000 Subject: [PATCH 138/149] style: apply automated linter fixes --- src/safeds/data/tabular/containers/_tagged_table.py | 4 +++- .../data/tabular/containers/_table/_tagged_table/test_copy.py | 4 +++- .../data/tabular/containers/_table/_tagged_table/test_init.py | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 2ee75f9e2..a804b4587 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -380,7 +380,9 @@ def filter_rows(self, query: Callable[[Row], bool]) -> TaggedTable: A table containing only the rows filtered by the query. """ return TaggedTable._from_table( - super().filter_rows(query), target_name=self.target.name, feature_names=self.features.column_names, + super().filter_rows(query), + target_name=self.target.name, + feature_names=self.features.column_names, ) def keep_only_columns(self, column_names: list[str]) -> TaggedTable: diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_copy.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_copy.py index 2ce92ce7f..8819aff05 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_copy.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_copy.py @@ -8,7 +8,9 @@ TaggedTable({"a": [], "b": []}, target_name="b", feature_names=["a"]), TaggedTable({"a": ["a", 3, 0.1], "b": [True, False, None]}, target_name="b", feature_names=["a"]), TaggedTable( - {"a": ["a", 3, 0.1], "b": [True, False, None], "c": ["a", "b", "c"]}, target_name="b", feature_names=["a"], + {"a": ["a", 3, 0.1], "b": [True, False, None], "c": ["a", "b", "c"]}, + target_name="b", + feature_names=["a"], ), TaggedTable({"a": [], "b": [], "c": []}, target_name="b", feature_names=["a"]), ], diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py index 7dea7eb96..723dfc990 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py @@ -120,7 +120,9 @@ def test_should_raise_error( ids=["create_tagged_table", "tagged_table_not_all_columns_are_features", "tagged_table_with_feature_names_as_None"], ) def test_should_create_a_tagged_table( - data: dict[str, list[int]], target_name: str, feature_names: list[str] | None, + data: dict[str, list[int]], + target_name: str, + feature_names: list[str] | None, ) -> None: tagged_table = TaggedTable(data, target_name=target_name, feature_names=feature_names) if feature_names is None: From a00f440d1467e5f65630fff9ee355d53c4184b85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Fri, 7 Jul 2023 11:44:19 +0200 Subject: [PATCH 139/149] feat: Improved error messages for `TaggedTable.remove_columns`, `TaggedTable.remove_columns_with_missing_values` and `TaggedTable.remove_columns_with_non_numerical_values` --- src/safeds/data/tabular/containers/_table.py | 13 +++-- .../data/tabular/containers/_tagged_table.py | 25 +++++++-- .../_tagged_table/test_remove_columns.py | 37 ++++++++++--- ...test_remove_columns_with_missing_values.py | 54 +++++++++++++++--- ...emove_columns_with_non_numerical_values.py | 55 +++++++++++++++---- 5 files changed, 146 insertions(+), 38 deletions(-) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 985fe29c5..176585411 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -809,7 +809,7 @@ def _as_table(self: Table) -> Table: Returns ------- table: Table - The table, as an instance of the Table class. + The table, as an instance of the Table class. """ return self @@ -904,8 +904,9 @@ def add_row(self, row: Row) -> Table: """ Add a row to the table. + If the table happens to be empty beforehand, respective columns will be added automatically. + This table is not modified. - If the table happens to be empty beforehand, respective features will be added automatically. Parameters ---------- @@ -1361,7 +1362,9 @@ def rename_column(self, old_name: str, new_name: str) -> Table: def replace_column(self, old_column_name: str, new_columns: list[Column]) -> Table: """ - Return a copy of the table with the specified old column replaced by a list of new columns. Keeps the order of columns. + Return a copy of the table with the specified old column replaced by a list of new columns. + + The order of columns is kept. This table is not modified. @@ -1505,7 +1508,7 @@ def sort_columns( """ Sort the columns of a `Table` with the given comparator and return a new `Table`. - The original table is not modified. The comparator is a function that takes two columns `col1` and `col2` and + The comparator is a function that takes two columns `col1` and `col2` and returns an integer: * If `col1` should be ordered before `col2`, the function should return a negative number. @@ -1549,7 +1552,7 @@ def sort_rows(self, comparator: Callable[[Row, Row], int]) -> Table: """ Sort the rows of a `Table` with the given comparator and return a new `Table`. - The original table is not modified. The comparator is a function that takes two rows `row1` and `row2` and + The comparator is a function that takes two rows `row1` and `row2` and returns an integer: * If `row1` should be ordered before `row2`, the function should return a negative number. diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index c4422f1ee..24428b56f 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -440,9 +440,13 @@ def remove_columns(self, column_names: list[str]) -> TaggedTable: If any of the given columns does not exist. ColumnIsTargetError If any of the given columns is the target column. + IllegalSchemaModificationError + If the given columns contain all the feature columns. """ if self.target.name in column_names: raise ColumnIsTargetError(self.target.name) + if len(set(self.features.column_names) - set(column_names)) == 0: + raise IllegalSchemaModificationError("You cannot remove every feature column.") return TaggedTable._from_table( super().remove_columns(column_names), target_name=self.target.name, @@ -467,10 +471,14 @@ def remove_columns_with_missing_values(self) -> TaggedTable: ------ ColumnIsTargetError If any of the columns to be removed is the target column. + IllegalSchemaModificationError + If the columns to remove contain all the feature columns. """ table = super().remove_columns_with_missing_values() if self.target.name not in table.column_names: raise ColumnIsTargetError(self.target.name) + if len(set(self.features.column_names).intersection(set(table.column_names))) == 0: + raise IllegalSchemaModificationError("You cannot remove every feature column.") return TaggedTable._from_table( table, self.target.name, @@ -495,10 +503,14 @@ def remove_columns_with_non_numerical_values(self) -> TaggedTable: ------ ColumnIsTargetError If any of the columns to be removed is the target column. + IllegalSchemaModificationError + If the columns to remove contain all the feature columns. """ table = super().remove_columns_with_non_numerical_values() if self.target.name not in table.column_names: raise ColumnIsTargetError(self.target.name) + if len(set(self.features.column_names).intersection(set(table.column_names))) == 0: + raise IllegalSchemaModificationError("You cannot remove every feature column.") return TaggedTable._from_table( table, self.target.name, @@ -602,13 +614,13 @@ def rename_column(self, old_name: str, new_name: str) -> TaggedTable: def replace_column(self, old_column_name: str, new_columns: list[Column]) -> TaggedTable: """ - Return a copy of the table with the specified column replaced by new columns. + Return a copy of the table with the specified old column replaced by a list of new columns. The order of columns is kept. If the column to be replaced is the target column, it must be replaced by exactly one column. - The original is not modified. + This table is not modified. Parameters ---------- @@ -667,7 +679,6 @@ def shuffle_rows(self) -> TaggedTable: ------- result : TaggedTable The shuffled Table. - """ return TaggedTable._from_table( super().shuffle_rows(), @@ -719,7 +730,7 @@ def sort_columns( """ Sort the columns of a `TaggedTable` with the given comparator and return a new `TaggedTable`. - The original table is not modified. The comparator is a function that takes two columns `col1` and `col2` and + The comparator is a function that takes two columns `col1` and `col2` and returns an integer: * If `col1` should be ordered before `col2`, the function should return a negative number. @@ -728,6 +739,8 @@ def sort_columns( If no comparator is given, the columns will be sorted alphabetically by their name. + This table is not modified. + Parameters ---------- comparator : Callable[[Column, Column], int] @@ -752,13 +765,15 @@ def sort_rows(self, comparator: Callable[[Row, Row], int]) -> TaggedTable: """ Sort the rows of a `TaggedTable` with the given comparator and return a new `TaggedTable`. - The original table is not modified. The comparator is a function that takes two rows `row1` and `row2` and + The comparator is a function that takes two rows `row1` and `row2` and returns an integer: * If `row1` should be ordered before `row2`, the function should return a negative number. * If `row1` should be ordered after `row2`, the function should return a positive number. * If the original order of `row1` and `row2` should be kept, the function should return 0. + This table is not modified. + Parameters ---------- comparator : Callable[[Row, Row], int] diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py index 7d48095d5..5ce86c2f5 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py @@ -1,6 +1,6 @@ import pytest from safeds.data.tabular.containers import Table, TaggedTable -from safeds.exceptions import ColumnIsTargetError +from safeds.exceptions import ColumnIsTargetError, IllegalSchemaModificationError from tests.helpers import assert_that_tagged_tables_are_equal @@ -126,7 +126,7 @@ def test_should_remove_columns(table: TaggedTable, columns: list[str], expected: @pytest.mark.parametrize( - ("table", "columns"), + ("table", "columns", "error", "error_msg"), [ ( TaggedTable._from_table( @@ -135,6 +135,8 @@ def test_should_remove_columns(table: TaggedTable, columns: list[str], expected: ["feat"], ), ["target"], + ColumnIsTargetError, + r'Illegal schema modification: Column "target" is the target column and cannot be removed.' ), ( TaggedTable._from_table( @@ -143,13 +145,32 @@ def test_should_remove_columns(table: TaggedTable, columns: list[str], expected: ["feat"], ), ["non_feat", "target"], + ColumnIsTargetError, + r'Illegal schema modification: Column "target" is the target column and cannot be removed.' + ), +( + TaggedTable._from_table( + Table({"feat": [1, 2, 3], "non_feat": [1, 2, 3], "target": [4, 5, 6]}), + "target", + ["feat"], + ), + ["feat"], + IllegalSchemaModificationError, + r'Illegal schema modification: You cannot remove every feature column.' + ), + ( + TaggedTable._from_table( + Table({"feat": [1, 2, 3], "non_feat": [1, 2, 3], "target": [4, 5, 6]}), + "target", + ["feat"], + ), + ["feat", "non_feat"], + IllegalSchemaModificationError, + r'Illegal schema modification: You cannot remove every feature column.' ), ], - ids=["remove_only_target", "remove_non_feat_and_target"], + ids=["remove_only_target", "remove_non_feat_and_target", "remove_all_features", "remove_non_feat_and_all_features"], ) -def test_should_raise_column_is_target_error(table: TaggedTable, columns: list[str]) -> None: - with pytest.raises( - ColumnIsTargetError, - match=r'Illegal schema modification: Column "target" is the target column and cannot be removed.', - ): +def test_should_raise_in_remove_columns(table: TaggedTable, columns: list[str], error: type[Exception], error_msg: str) -> None: + with pytest.raises(error, match=error_msg): table.remove_columns(columns) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py index 4a7117ae4..38fab4c92 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py @@ -1,6 +1,6 @@ import pytest from safeds.data.tabular.containers import TaggedTable -from safeds.exceptions import ColumnIsTargetError +from safeds.exceptions import ColumnIsTargetError, IllegalSchemaModificationError from tests.helpers import assert_that_tagged_tables_are_equal @@ -79,7 +79,7 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp @pytest.mark.parametrize( - "table", + ("table", "error", "error_msg"), [ ( TaggedTable( @@ -90,7 +90,9 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp }, "target", ["feature"], - ) + ), + ColumnIsTargetError, + 'Illegal schema modification: Column "target" is the target column and cannot be removed.' ), ( TaggedTable( @@ -101,7 +103,9 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp }, "target", ["feature"], - ) + ), + ColumnIsTargetError, + 'Illegal schema modification: Column "target" is the target column and cannot be removed.' ), ( TaggedTable( @@ -112,7 +116,9 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp }, "target", ["feature"], - ) + ), + ColumnIsTargetError, + 'Illegal schema modification: Column "target" is the target column and cannot be removed.' ), ( TaggedTable( @@ -123,7 +129,35 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp }, "target", ["feature"], - ) + ), + ColumnIsTargetError, + 'Illegal schema modification: Column "target" is the target column and cannot be removed.' + ), + ( + TaggedTable( + { + "feature": [0, None, 2], + "non_feature": [1, 2, 3], + "target": [3, 2, 5], + }, + "target", + ["feature"], + ), + IllegalSchemaModificationError, + 'Illegal schema modification: You cannot remove every feature column.' + ), + ( + TaggedTable( + { + "feature": [0, None, 2], + "non_feature": [1, None, 3], + "target": [3, 2, 5], + }, + "target", + ["feature"], + ), + IllegalSchemaModificationError, + 'Illegal schema modification: You cannot remove every feature column.' ), ], ids=[ @@ -131,11 +165,13 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp "also_feature_incomplete", "also_non_feature_incomplete", "all_incomplete", + "all_features_incomplete", + "all_features_and_non_feature_incomplete" ], ) -def test_should_throw_column_is_target(table: TaggedTable) -> None: +def test_should_raise_in_remove_columns_with_missing_values(table: TaggedTable, error: type[Exception], error_msg: str) -> None: with pytest.raises( - ColumnIsTargetError, - match='Illegal schema modification: Column "target" is the target column and cannot be removed.', + error, + match=error_msg, ): table.remove_columns_with_missing_values() diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py index b374d0435..a829e75a7 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py @@ -1,6 +1,6 @@ import pytest from safeds.data.tabular.containers import TaggedTable -from safeds.exceptions import ColumnIsTargetError +from safeds.exceptions import ColumnIsTargetError, IllegalSchemaModificationError from tests.helpers import assert_that_tagged_tables_are_equal @@ -79,7 +79,7 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp @pytest.mark.parametrize( - "table", + ("table", "error", "error_msg"), [ ( TaggedTable( @@ -90,7 +90,9 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp }, "target", ["feature"], - ) + ), + ColumnIsTargetError, + r'Illegal schema modification: Column "target" is the target column and cannot be removed.' ), ( TaggedTable( @@ -101,7 +103,9 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp }, "target", ["feature"], - ) + ), + ColumnIsTargetError, + r'Illegal schema modification: Column "target" is the target column and cannot be removed.' ), ( TaggedTable( @@ -112,7 +116,9 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp }, "target", ["feature"], - ) + ), + ColumnIsTargetError, + r'Illegal schema modification: Column "target" is the target column and cannot be removed.' ), ( TaggedTable( @@ -123,7 +129,35 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp }, "target", ["feature"], - ) + ), + ColumnIsTargetError, + r'Illegal schema modification: Column "target" is the target column and cannot be removed.' + ), + ( + TaggedTable( + { + "feature": [0, "a", 2], + "non_feature": [1, 2, 3], + "target": [3, 2, 5], + }, + "target", + ["feature"], + ), + IllegalSchemaModificationError, + r'Illegal schema modification: You cannot remove every feature column.' + ), + ( + TaggedTable( + { + "feature": [0, "a", 2], + "non_feature": [1, "b", 3], + "target": [3, 2, 5], + }, + "target", + ["feature"], + ), + IllegalSchemaModificationError, + r'Illegal schema modification: You cannot remove every feature column.' ), ], ids=[ @@ -131,11 +165,10 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp "also_feature_non_numerical", "also_non_feature_non_numerical", "all_non_numerical", + "all_features_incomplete", + "all_features_and_non_feature_incomplete" ], ) -def test_should_throw_column_is_target(table: TaggedTable) -> None: - with pytest.raises( - ColumnIsTargetError, - match='Illegal schema modification: Column "target" is the target column and cannot be removed.', - ): +def test_should_raise_in_remove_columns_with_non_numerical_values(table: TaggedTable, error: type[Exception], error_msg: str) -> None: + with pytest.raises(error, match=error_msg): table.remove_columns_with_non_numerical_values() From c3bf71df0738c9745f528f95cc3be1fbb3bc2716 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 7 Jul 2023 09:46:35 +0000 Subject: [PATCH 140/149] style: apply automated linter fixes --- .../_tagged_table/test_remove_columns.py | 14 ++++++++------ .../test_remove_columns_with_missing_values.py | 18 ++++++++++-------- ...remove_columns_with_non_numerical_values.py | 18 ++++++++++-------- 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py index 5ce86c2f5..0a1554e24 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py @@ -136,7 +136,7 @@ def test_should_remove_columns(table: TaggedTable, columns: list[str], expected: ), ["target"], ColumnIsTargetError, - r'Illegal schema modification: Column "target" is the target column and cannot be removed.' + r'Illegal schema modification: Column "target" is the target column and cannot be removed.', ), ( TaggedTable._from_table( @@ -146,9 +146,9 @@ def test_should_remove_columns(table: TaggedTable, columns: list[str], expected: ), ["non_feat", "target"], ColumnIsTargetError, - r'Illegal schema modification: Column "target" is the target column and cannot be removed.' + r'Illegal schema modification: Column "target" is the target column and cannot be removed.', ), -( + ( TaggedTable._from_table( Table({"feat": [1, 2, 3], "non_feat": [1, 2, 3], "target": [4, 5, 6]}), "target", @@ -156,7 +156,7 @@ def test_should_remove_columns(table: TaggedTable, columns: list[str], expected: ), ["feat"], IllegalSchemaModificationError, - r'Illegal schema modification: You cannot remove every feature column.' + r"Illegal schema modification: You cannot remove every feature column.", ), ( TaggedTable._from_table( @@ -166,11 +166,13 @@ def test_should_remove_columns(table: TaggedTable, columns: list[str], expected: ), ["feat", "non_feat"], IllegalSchemaModificationError, - r'Illegal schema modification: You cannot remove every feature column.' + r"Illegal schema modification: You cannot remove every feature column.", ), ], ids=["remove_only_target", "remove_non_feat_and_target", "remove_all_features", "remove_non_feat_and_all_features"], ) -def test_should_raise_in_remove_columns(table: TaggedTable, columns: list[str], error: type[Exception], error_msg: str) -> None: +def test_should_raise_in_remove_columns( + table: TaggedTable, columns: list[str], error: type[Exception], error_msg: str, +) -> None: with pytest.raises(error, match=error_msg): table.remove_columns(columns) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py index 38fab4c92..32758003f 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py @@ -92,7 +92,7 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp ["feature"], ), ColumnIsTargetError, - 'Illegal schema modification: Column "target" is the target column and cannot be removed.' + 'Illegal schema modification: Column "target" is the target column and cannot be removed.', ), ( TaggedTable( @@ -105,7 +105,7 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp ["feature"], ), ColumnIsTargetError, - 'Illegal schema modification: Column "target" is the target column and cannot be removed.' + 'Illegal schema modification: Column "target" is the target column and cannot be removed.', ), ( TaggedTable( @@ -118,7 +118,7 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp ["feature"], ), ColumnIsTargetError, - 'Illegal schema modification: Column "target" is the target column and cannot be removed.' + 'Illegal schema modification: Column "target" is the target column and cannot be removed.', ), ( TaggedTable( @@ -131,7 +131,7 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp ["feature"], ), ColumnIsTargetError, - 'Illegal schema modification: Column "target" is the target column and cannot be removed.' + 'Illegal schema modification: Column "target" is the target column and cannot be removed.', ), ( TaggedTable( @@ -144,7 +144,7 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp ["feature"], ), IllegalSchemaModificationError, - 'Illegal schema modification: You cannot remove every feature column.' + "Illegal schema modification: You cannot remove every feature column.", ), ( TaggedTable( @@ -157,7 +157,7 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp ["feature"], ), IllegalSchemaModificationError, - 'Illegal schema modification: You cannot remove every feature column.' + "Illegal schema modification: You cannot remove every feature column.", ), ], ids=[ @@ -166,10 +166,12 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp "also_non_feature_incomplete", "all_incomplete", "all_features_incomplete", - "all_features_and_non_feature_incomplete" + "all_features_and_non_feature_incomplete", ], ) -def test_should_raise_in_remove_columns_with_missing_values(table: TaggedTable, error: type[Exception], error_msg: str) -> None: +def test_should_raise_in_remove_columns_with_missing_values( + table: TaggedTable, error: type[Exception], error_msg: str, +) -> None: with pytest.raises( error, match=error_msg, diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py index a829e75a7..ce86c117f 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py @@ -92,7 +92,7 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp ["feature"], ), ColumnIsTargetError, - r'Illegal schema modification: Column "target" is the target column and cannot be removed.' + r'Illegal schema modification: Column "target" is the target column and cannot be removed.', ), ( TaggedTable( @@ -105,7 +105,7 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp ["feature"], ), ColumnIsTargetError, - r'Illegal schema modification: Column "target" is the target column and cannot be removed.' + r'Illegal schema modification: Column "target" is the target column and cannot be removed.', ), ( TaggedTable( @@ -118,7 +118,7 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp ["feature"], ), ColumnIsTargetError, - r'Illegal schema modification: Column "target" is the target column and cannot be removed.' + r'Illegal schema modification: Column "target" is the target column and cannot be removed.', ), ( TaggedTable( @@ -131,7 +131,7 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp ["feature"], ), ColumnIsTargetError, - r'Illegal schema modification: Column "target" is the target column and cannot be removed.' + r'Illegal schema modification: Column "target" is the target column and cannot be removed.', ), ( TaggedTable( @@ -144,7 +144,7 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp ["feature"], ), IllegalSchemaModificationError, - r'Illegal schema modification: You cannot remove every feature column.' + r"Illegal schema modification: You cannot remove every feature column.", ), ( TaggedTable( @@ -157,7 +157,7 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp ["feature"], ), IllegalSchemaModificationError, - r'Illegal schema modification: You cannot remove every feature column.' + r"Illegal schema modification: You cannot remove every feature column.", ), ], ids=[ @@ -166,9 +166,11 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp "also_non_feature_non_numerical", "all_non_numerical", "all_features_incomplete", - "all_features_and_non_feature_incomplete" + "all_features_and_non_feature_incomplete", ], ) -def test_should_raise_in_remove_columns_with_non_numerical_values(table: TaggedTable, error: type[Exception], error_msg: str) -> None: +def test_should_raise_in_remove_columns_with_non_numerical_values( + table: TaggedTable, error: type[Exception], error_msg: str, +) -> None: with pytest.raises(error, match=error_msg): table.remove_columns_with_non_numerical_values() From f7a8c6f15e4b368f79bcd15b3f31eade23cab715 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 7 Jul 2023 09:48:13 +0000 Subject: [PATCH 141/149] style: apply automated linter fixes --- .../containers/_table/_tagged_table/test_remove_columns.py | 5 ++++- .../_tagged_table/test_remove_columns_with_missing_values.py | 4 +++- .../test_remove_columns_with_non_numerical_values.py | 4 +++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py index 0a1554e24..9e8435885 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py @@ -172,7 +172,10 @@ def test_should_remove_columns(table: TaggedTable, columns: list[str], expected: ids=["remove_only_target", "remove_non_feat_and_target", "remove_all_features", "remove_non_feat_and_all_features"], ) def test_should_raise_in_remove_columns( - table: TaggedTable, columns: list[str], error: type[Exception], error_msg: str, + table: TaggedTable, + columns: list[str], + error: type[Exception], + error_msg: str, ) -> None: with pytest.raises(error, match=error_msg): table.remove_columns(columns) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py index 32758003f..b442fe0d0 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_missing_values.py @@ -170,7 +170,9 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp ], ) def test_should_raise_in_remove_columns_with_missing_values( - table: TaggedTable, error: type[Exception], error_msg: str, + table: TaggedTable, + error: type[Exception], + error_msg: str, ) -> None: with pytest.raises( error, diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py index ce86c117f..5d2b809a9 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py @@ -170,7 +170,9 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp ], ) def test_should_raise_in_remove_columns_with_non_numerical_values( - table: TaggedTable, error: type[Exception], error_msg: str, + table: TaggedTable, + error: type[Exception], + error_msg: str, ) -> None: with pytest.raises(error, match=error_msg): table.remove_columns_with_non_numerical_values() From 0d159c2b33fd07c200105507ba62c5297146a2c0 Mon Sep 17 00:00:00 2001 From: Alexander <47296670+Marsmaennchen221@users.noreply.github.com> Date: Fri, 7 Jul 2023 14:33:35 +0200 Subject: [PATCH 142/149] Apply suggestions from code review Co-authored-by: patrikguempel --- .../data/tabular/containers/_tagged_table.py | 36 +++++++++---------- src/safeds/exceptions/_data.py | 2 +- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 23c70a211..5fdddbf98 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -201,7 +201,7 @@ def add_column_as_feature(self, column: Column) -> TaggedTable: Returns ------- result : TaggedTable - The table with the column attached as a feature column. + The table with the attached feature column. Raises ------ @@ -218,14 +218,14 @@ def add_column_as_feature(self, column: Column) -> TaggedTable: def add_columns_as_features(self, columns: list[Column] | Table) -> TaggedTable: """ - Return the original table with the provided column attached at the end, as feature columns. + Return the original table with the provided columns attached at the end, as feature columns. This table is not modified. Returns ------- result : TaggedTable - The table with the column attached as feature columns. + The table with the attached feature columns. Raises ------ @@ -342,7 +342,7 @@ def add_row(self, row: Row) -> TaggedTable: def add_rows(self, rows: list[Row] | Table) -> TaggedTable: """ - Add multiple rows to a table. + Add multiple rows to the table. This table is not modified. @@ -365,7 +365,7 @@ def add_rows(self, rows: list[Row] | Table) -> TaggedTable: def filter_rows(self, query: Callable[[Row], bool]) -> TaggedTable: """ - Return a table with rows filtered by Callable (e.g. lambda function). + Return a table containing only rows that match the given Callable (e.g. lambda function). This table is not modified. @@ -377,7 +377,7 @@ def filter_rows(self, query: Callable[[Row], bool]) -> TaggedTable: Returns ------- table : TaggedTable - A table containing only the rows filtered by the query. + A table containing only the rows to match the query. """ return TaggedTable._from_table( super().filter_rows(query), @@ -424,14 +424,14 @@ def keep_only_columns(self, column_names: list[str]) -> TaggedTable: def remove_columns(self, column_names: list[str]) -> TaggedTable: """ - Return a table without the given column(s). + Remove the given column(s) from the table. This table is not modified. Parameters ---------- column_names : list[str] - A list containing all columns to be dropped. + The names of all columns to be dropped. Returns ------- @@ -462,7 +462,7 @@ def remove_columns(self, column_names: list[str]) -> TaggedTable: def remove_columns_with_missing_values(self) -> TaggedTable: """ - Return a table without the columns that contain missing values. + Remove every column that misses values. This table is not modified. @@ -494,7 +494,7 @@ def remove_columns_with_missing_values(self) -> TaggedTable: def remove_columns_with_non_numerical_values(self) -> TaggedTable: """ - Return a table without the columns that contain non-numerical values. + Remove every column that contains non-numerical values. This table is not modified. @@ -526,7 +526,7 @@ def remove_columns_with_non_numerical_values(self) -> TaggedTable: def remove_duplicate_rows(self) -> TaggedTable: """ - Return a copy of the table with every duplicate row removed. + Remove all row duplicates. This table is not modified. @@ -618,7 +618,7 @@ def rename_column(self, old_name: str, new_name: str) -> TaggedTable: def replace_column(self, old_column_name: str, new_columns: list[Column]) -> TaggedTable: """ - Return a copy of the table with the specified old column replaced by a list of new columns. + Replace the specified old column by a list of new columns. The order of columns is kept. @@ -737,9 +737,9 @@ def sort_columns( The comparator is a function that takes two columns `col1` and `col2` and returns an integer: - * If `col1` should be ordered before `col2`, the function should return a negative number. - * If `col1` should be ordered after `col2`, the function should return a positive number. - * If the original order of `col1` and `col2` should be kept, the function should return 0. + * If the function returns a negative number, `col1` will be ordered before `col2`. + * If the function returns a positive number, `col1` will be ordered after `col2`. + * If the function returns 0, the original order of `col1` and `col2` will be kept. If no comparator is given, the columns will be sorted alphabetically by their name. @@ -772,9 +772,9 @@ def sort_rows(self, comparator: Callable[[Row, Row], int]) -> TaggedTable: The comparator is a function that takes two rows `row1` and `row2` and returns an integer: - * If `row1` should be ordered before `row2`, the function should return a negative number. - * If `row1` should be ordered after `row2`, the function should return a positive number. - * If the original order of `row1` and `row2` should be kept, the function should return 0. + * If the function returns a negative number, `row1` will be ordered before `row2`. + * If the function returns a positive number, `row1` will be ordered after `row2`. + * If the function returns 0, the original order of `row1` and `row2` will be kept. This table is not modified. diff --git a/src/safeds/exceptions/_data.py b/src/safeds/exceptions/_data.py index bd413534e..f11c7a334 100644 --- a/src/safeds/exceptions/_data.py +++ b/src/safeds/exceptions/_data.py @@ -141,7 +141,7 @@ def __init__(self, file: str | Path, file_extension: str | list[str]) -> None: class IllegalSchemaModificationError(Exception): - """Exception raised when modifying the schema in a way that is not consistent with the subclass's requirements.""" + """Exception raised when modifying a schema in a way that is inconsistent with the subclass's requirements.""" def __init__(self, msg: str) -> None: super().__init__(f"Illegal schema modification: {msg}") From 6e65b1218af4f1065d7693ac75c900be6b8c9056 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Sat, 8 Jul 2023 15:10:26 +0200 Subject: [PATCH 143/149] Update src/safeds/data/tabular/containers/_tagged_table.py Co-authored-by: Simon Breuer <86068340+sibre28@users.noreply.github.com> --- src/safeds/data/tabular/containers/_tagged_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 5fdddbf98..30dd5d168 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -622,7 +622,7 @@ def replace_column(self, old_column_name: str, new_columns: list[Column]) -> Tag The order of columns is kept. - If the column to be replaced is the target column, it must be replaced by exactly one column. + If the column to be replaced is the target column, it must be replaced by exactly one column. That column becomes the new target column. This table is not modified. From d5908240a2342d7f67d7d6ce4f92f04f059da026 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Sat, 8 Jul 2023 15:19:55 +0200 Subject: [PATCH 144/149] Update tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py Co-authored-by: Simon Breuer <86068340+sibre28@users.noreply.github.com> --- .../test_remove_columns_with_non_numerical_values.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py index 5d2b809a9..5a6251d20 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns_with_non_numerical_values.py @@ -165,8 +165,8 @@ def test_should_remove_columns_with_non_numerical_values(table: TaggedTable, exp "also_feature_non_numerical", "also_non_feature_non_numerical", "all_non_numerical", - "all_features_incomplete", - "all_features_and_non_feature_incomplete", + "all_features_non_numerical", + "all_features_and_non_feature_non_numerical", ], ) def test_should_raise_in_remove_columns_with_non_numerical_values( From fd2e1e3260865e4fb4d04f8c64a72806d425e972 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Sat, 8 Jul 2023 15:22:23 +0200 Subject: [PATCH 145/149] Update tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py Co-authored-by: Simon Breuer <86068340+sibre28@users.noreply.github.com> --- .../containers/_table/_tagged_table/test_keep_only_columns.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py index 1a7fbd37c..975dca9e9 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py @@ -78,7 +78,7 @@ ), ), ], - ids=["table", "table_keep_non_feature_column", "table_not_keep_non_feature_column"], + ids=["keep_feature_and_target_column", "keep_non_feature_column", "don't_keep_non_feature_column"], ) def test_should_return_table(table: TaggedTable, column_names: list[str], expected: TaggedTable) -> None: new_table = table.keep_only_columns(column_names) From d0f2451fb2c2fa4f5511a1d2b80ad8d66aa01f93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Sat, 8 Jul 2023 15:24:48 +0200 Subject: [PATCH 146/149] Improve testcase naming in test_features.py --- .../tabular/containers/_table/_tagged_table/test_features.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_features.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_features.py index 54a327227..2af13d37c 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_features.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_features.py @@ -31,7 +31,7 @@ Table({"A": [1, 4], "C": [3, 6]}), ), ], - ids=["all_columns_are_feature", "not_all_columns_are_features"], + ids=["only_target_and_features", "target_features_and_other"], ) def test_should_return_features(tagged_table: TaggedTable, features: Table) -> None: assert tagged_table.features == features From ea1a46cf5dd7c4ec6e1df114ced6082343c9227f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Sat, 8 Jul 2023 15:28:10 +0200 Subject: [PATCH 147/149] Update src/safeds/data/tabular/containers/_tagged_table.py Co-authored-by: Simon Breuer <86068340+sibre28@users.noreply.github.com> --- src/safeds/data/tabular/containers/_tagged_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 30dd5d168..2df8ef0a8 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -623,7 +623,7 @@ def replace_column(self, old_column_name: str, new_columns: list[Column]) -> Tag The order of columns is kept. If the column to be replaced is the target column, it must be replaced by exactly one column. That column becomes the new target column. - +If the column to be replaced is a feature column, the new columns that replace it all become feature columns. This table is not modified. Parameters From f37586bc47bc2139e7740dc943376c6bb7dbfc3f Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Sat, 8 Jul 2023 13:29:56 +0000 Subject: [PATCH 148/149] style: apply automated linter fixes --- .../data/tabular/containers/_tagged_table.py | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 2df8ef0a8..74125b5c3 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -618,36 +618,36 @@ def rename_column(self, old_name: str, new_name: str) -> TaggedTable: def replace_column(self, old_column_name: str, new_columns: list[Column]) -> TaggedTable: """ - Replace the specified old column by a list of new columns. + Replace the specified old column by a list of new columns. - The order of columns is kept. + The order of columns is kept. - If the column to be replaced is the target column, it must be replaced by exactly one column. That column becomes the new target column. -If the column to be replaced is a feature column, the new columns that replace it all become feature columns. - This table is not modified. + If the column to be replaced is the target column, it must be replaced by exactly one column. That column becomes the new target column. + If the column to be replaced is a feature column, the new columns that replace it all become feature columns. + This table is not modified. Parameters ---------- - old_column_name : str - The name of the column to be replaced. - new_columns : list[Column] - The new columns replacing the old column. + old_column_name : str + The name of the column to be replaced. + new_columns : list[Column] + The new columns replacing the old column. Returns ------- - result : TaggedTable - A table with the old column replaced by the new column. + result : TaggedTable + A table with the old column replaced by the new column. Raises ------ - UnknownColumnNameError - If the old column does not exist. - DuplicateColumnNameError - If the new column already exists and the existing column is not affected by the replacement. - ColumnSizeError - If the size of the column does not match the amount of rows. - IllegalSchemaModificationError - If the target column would be removed or replaced by more than one column. + UnknownColumnNameError + If the old column does not exist. + DuplicateColumnNameError + If the new column already exists and the existing column is not affected by the replacement. + ColumnSizeError + If the size of the column does not match the amount of rows. + IllegalSchemaModificationError + If the target column would be removed or replaced by more than one column. """ if old_column_name == self.target.name: if len(new_columns) != 1: From ff3782bb561c996a101f662d24609398ebed466c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Sat, 8 Jul 2023 15:38:05 +0200 Subject: [PATCH 149/149] Update src/safeds/data/tabular/containers/_tagged_table.py Co-authored-by: Simon Breuer <86068340+sibre28@users.noreply.github.com> --- src/safeds/data/tabular/containers/_tagged_table.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 74125b5c3..0f3c6e674 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -412,12 +412,11 @@ def keep_only_columns(self, column_names: list[str]) -> TaggedTable: raise IllegalSchemaModificationError("Must keep the target column.") if len(set(self.features.column_names).intersection(set(column_names))) == 0: raise IllegalSchemaModificationError("Must keep at least one feature column.") - table = super().keep_only_columns(column_names) return TaggedTable._from_table( - table, + super().keep_only_columns(column_names), target_name=self.target.name, feature_names=sorted( - set(self.features.column_names).intersection(set(table.column_names)), + set(self.features.column_names).intersection(set(column_names)), key={val: ix for ix, val in enumerate(self.features.column_names)}.__getitem__, ), )