Skip to content

Commit

Permalink
feat: Table.keep_only_rows (#721)
Browse files Browse the repository at this point in the history
### Summary of Changes

* New method `Table.keep_only_rows`. This is consistent with
`Table.keep_only_columns`.
* Deprecate `Table.filter_rows`, which does the same thing. It will be
removed in a future version.
  • Loading branch information
lars-reimann authored May 4, 2024
1 parent a1cdaef commit 923a6c2
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 13 deletions.
59 changes: 49 additions & 10 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1130,6 +1130,9 @@ def filter_rows(self, query: Callable[[Row], bool]) -> Table:
The original table is not modified.
!!! warning "Deprecated"
Use [keep_only_rows][safeds.data.tabular.containers._table.Table.keep_only_rows] instead.
Parameters
----------
query:
Expand All @@ -1142,7 +1145,7 @@ def filter_rows(self, query: Callable[[Row], bool]) -> Table:
See Also
--------
remove_rows:
[remove_rows][safeds.data.tabular.containers._table.Table.remove_rows]:
Remove rows that satifsfy a query.
Examples
Expand All @@ -1153,14 +1156,12 @@ def filter_rows(self, query: Callable[[Row], bool]) -> Table:
a b
0 1 2
"""
import pandas as pd

rows: list[Row] = [row for row in self.to_rows() if query(row)]
if len(rows) == 0:
result_table = Table._from_pandas_dataframe(pd.DataFrame(), self._schema)
else:
result_table = self.from_rows(rows)
return result_table
warnings.warn(
"This method is deprecated and will be removed in a future version. Use `Table.keep_only_rows` instead.",
DeprecationWarning,
stacklevel=2,
)
return self.keep_only_rows(query)

_T = TypeVar("_T")

Expand Down Expand Up @@ -1342,6 +1343,44 @@ def remove_columns_with_non_numerical_values(self) -> Table:
"""
return Table.from_columns([column for column in self.to_columns() if column.type.is_numeric()])

def keep_only_rows(self, query: Callable[[Row], bool]) -> Table:
"""
Return a new table containing only the rows that satisfy the query.
The original table is not modified.
Parameters
----------
query:
A callable that returns True if a row should be included in the new table.
Returns
-------
table:
A table containing only the rows that satisfy the query.
See Also
--------
[remove_rows][safeds.data.tabular.containers._table.Table.remove_rows]:
Remove rows that satifsfy a query.
Examples
--------
>>> from safeds.data.tabular.containers import Table
>>> table = Table.from_dict({"a": [1, 3], "b": [2, 4]})
>>> table.keep_only_rows(lambda x: x["a"] < 2)
a b
0 1 2
"""
import pandas as pd

rows: list[Row] = [row for row in self.to_rows() if query(row)]
if len(rows) == 0:
result_table = Table._from_pandas_dataframe(pd.DataFrame(), self._schema)
else:
result_table = self.from_rows(rows)
return result_table

def remove_rows(self, query: Callable[[Row], bool]) -> Table:
"""
Return a new table without the rows that satisfy the query.
Expand All @@ -1360,7 +1399,7 @@ def remove_rows(self, query: Callable[[Row], bool]) -> Table:
See Also
--------
filter_rows:
[keep_only_rows][safeds.data.tabular.containers._table.Table.keep_only_rows]:
Create a table containing only the rows that satisfy a query.
Examples
Expand Down
38 changes: 38 additions & 0 deletions tests/safeds/data/tabular/containers/_table/test_keep_only_rows.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pandas as pd
import pytest
from safeds.data.tabular.containers import Table
from safeds.data.tabular.typing import ColumnType, Integer, Schema


@pytest.mark.parametrize(
("table1", "filter_column", "filter_value", "table2"),
[
(
Table(),
"col1",
1,
Table._from_pandas_dataframe(pd.DataFrame(), Schema({})),
),
(
Table({"col1": [3, 2, 4], "col2": [1, 2, 4]}),
"col1",
1,
Table._from_pandas_dataframe(pd.DataFrame(), Schema({"col1": Integer(), "col2": Integer()})),
),
(
Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}),
"col1",
1,
Table({"col1": [1, 1], "col2": [1, 4]}),
),
],
ids=[
"empty table",
"no matches",
"matches",
],
)
def test_should_keep_only_rows(table1: Table, filter_column: str, filter_value: ColumnType, table2: Table) -> None:
table1 = table1.keep_only_rows(lambda row: row.get_value(filter_column) == filter_value)
assert table1.schema == table2.schema
assert table2 == table1
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


@pytest.mark.parametrize(
("table1", "remove_column", "remove_value", "table2"),
("table1", "filter_column", "filter_value", "table2"),
[
(
Table(),
Expand All @@ -32,7 +32,7 @@
"matches",
],
)
def test_should_remove_rows(table1: Table, remove_column: str, remove_value: ColumnType, table2: Table) -> None:
table1 = table1.remove_rows(lambda row: row.get_value(remove_column) == remove_value)
def test_should_remove_rows(table1: Table, filter_column: str, filter_value: ColumnType, table2: Table) -> None:
table1 = table1.remove_rows(lambda row: row.get_value(filter_column) == filter_value)
assert table1.schema == table2.schema
assert table2 == table1

0 comments on commit 923a6c2

Please sign in to comment.