feat: Table.keep_only_rows (#721)

### Summary of Changes * New method `Table.keep_only_rows`. This is consistent with `Table.keep_only_columns`. * Deprecate `Table.filter_rows`, which does the same thing. It will be removed in a future version.
Safe-DS · May 4, 2024 · 923a6c2 · 923a6c2
1 parent a1cdaef
commit 923a6c2
Show file tree

Hide file tree

Showing 3 changed files with 90 additions and 13 deletions.
diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py
@@ -1130,6 +1130,9 @@ def filter_rows(self, query: Callable[[Row], bool]) -> Table:
 
         The original table is not modified.
 
+        !!! warning "Deprecated"
+            Use [keep_only_rows][safeds.data.tabular.containers._table.Table.keep_only_rows] instead.
+
         Parameters
         ----------
         query:
@@ -1142,7 +1145,7 @@ def filter_rows(self, query: Callable[[Row], bool]) -> Table:
 
         See Also
         --------
-        remove_rows:
+        [remove_rows][safeds.data.tabular.containers._table.Table.remove_rows]:
             Remove rows that satifsfy a query.
 
         Examples
@@ -1153,14 +1156,12 @@ def filter_rows(self, query: Callable[[Row], bool]) -> Table:
            a  b
         0  1  2
         """
-        import pandas as pd
-
-        rows: list[Row] = [row for row in self.to_rows() if query(row)]
-        if len(rows) == 0:
-            result_table = Table._from_pandas_dataframe(pd.DataFrame(), self._schema)
-        else:
-            result_table = self.from_rows(rows)
-        return result_table
+        warnings.warn(
+            "This method is deprecated and will be removed in a future version. Use `Table.keep_only_rows` instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self.keep_only_rows(query)
 
     _T = TypeVar("_T")
 
@@ -1342,6 +1343,44 @@ def remove_columns_with_non_numerical_values(self) -> Table:
         """
         return Table.from_columns([column for column in self.to_columns() if column.type.is_numeric()])
 
+    def keep_only_rows(self, query: Callable[[Row], bool]) -> Table:
+        """
+        Return a new table containing only the rows that satisfy the query.
+
+        The original table is not modified.
+
+        Parameters
+        ----------
+        query:
+            A callable that returns True if a row should be included in the new table.
+
+        Returns
+        -------
+        table:
+            A table containing only the rows that satisfy the query.
+
+        See Also
+        --------
+        [remove_rows][safeds.data.tabular.containers._table.Table.remove_rows]:
+            Remove rows that satifsfy a query.
+
+        Examples
+        --------
+        >>> from safeds.data.tabular.containers import Table
+        >>> table = Table.from_dict({"a": [1, 3], "b": [2, 4]})
+        >>> table.keep_only_rows(lambda x: x["a"] < 2)
+           a  b
+        0  1  2
+        """
+        import pandas as pd
+
+        rows: list[Row] = [row for row in self.to_rows() if query(row)]
+        if len(rows) == 0:
+            result_table = Table._from_pandas_dataframe(pd.DataFrame(), self._schema)
+        else:
+            result_table = self.from_rows(rows)
+        return result_table
+
     def remove_rows(self, query: Callable[[Row], bool]) -> Table:
         """
         Return a new table without the rows that satisfy the query.
@@ -1360,7 +1399,7 @@ def remove_rows(self, query: Callable[[Row], bool]) -> Table:
 
         See Also
         --------
-        filter_rows:
+        [keep_only_rows][safeds.data.tabular.containers._table.Table.keep_only_rows]:
             Create a table containing only the rows that satisfy a query.
 
         Examples

diff --git a/tests/safeds/data/tabular/containers/_table/test_keep_only_rows.py b/tests/safeds/data/tabular/containers/_table/test_keep_only_rows.py
@@ -0,0 +1,38 @@
+import pandas as pd
+import pytest
+from safeds.data.tabular.containers import Table
+from safeds.data.tabular.typing import ColumnType, Integer, Schema
+
+
+@pytest.mark.parametrize(
+    ("table1", "filter_column", "filter_value", "table2"),
+    [
+        (
+            Table(),
+            "col1",
+            1,
+            Table._from_pandas_dataframe(pd.DataFrame(), Schema({})),
+        ),
+        (
+            Table({"col1": [3, 2, 4], "col2": [1, 2, 4]}),
+            "col1",
+            1,
+            Table._from_pandas_dataframe(pd.DataFrame(), Schema({"col1": Integer(), "col2": Integer()})),
+        ),
+        (
+            Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}),
+            "col1",
+            1,
+            Table({"col1": [1, 1], "col2": [1, 4]}),
+        ),
+    ],
+    ids=[
+        "empty table",
+        "no matches",
+        "matches",
+    ],
+)
+def test_should_keep_only_rows(table1: Table, filter_column: str, filter_value: ColumnType, table2: Table) -> None:
+    table1 = table1.keep_only_rows(lambda row: row.get_value(filter_column) == filter_value)
+    assert table1.schema == table2.schema
+    assert table2 == table1
diff --git a/tests/safeds/data/tabular/containers/_table/test_remove_rows.py b/tests/safeds/data/tabular/containers/_table/test_remove_rows.py
@@ -5,7 +5,7 @@
 
 
 @pytest.mark.parametrize(
-    ("table1", "remove_column", "remove_value", "table2"),
+    ("table1", "filter_column", "filter_value", "table2"),
     [
         (
             Table(),
@@ -32,7 +32,7 @@
         "matches",
     ],
 )
-def test_should_remove_rows(table1: Table, remove_column: str, remove_value: ColumnType, table2: Table) -> None:
-    table1 = table1.remove_rows(lambda row: row.get_value(remove_column) == remove_value)
+def test_should_remove_rows(table1: Table, filter_column: str, filter_value: ColumnType, table2: Table) -> None:
+    table1 = table1.remove_rows(lambda row: row.get_value(filter_column) == filter_value)
     assert table1.schema == table2.schema
     assert table2 == table1