Skip to content

Commit

Permalink
feat: rename several Table methods for consistency (#445)
Browse files Browse the repository at this point in the history
Closes #439

### Summary of Changes

* Rename `group_by` to `group_rows_by`
* Rename `split` to `split_rows`
* Rename `summary` to `summarize_statistics`
  • Loading branch information
lars-reimann authored Jul 11, 2023
1 parent 1f37e4a commit 9954986
Show file tree
Hide file tree
Showing 6 changed files with 16 additions and 16 deletions.
4 changes: 2 additions & 2 deletions docs/development/project_guidelines.md
Original file line number Diff line number Diff line change
Expand Up @@ -265,13 +265,13 @@ Passing values that are commonly used together around separately is tedious, ver
!!! success "**DO** (client code):"

```py
training_data, validation_data = split(full_data)
training_data, validation_data = split_rows(full_data)
```

!!! failure "**DON'T** (client code):"

```py
training_feature_vectors, validation_feature_vectors, training_target_values, validation_target_values = split(feature_vectors, target_values)
training_feature_vectors, validation_feature_vectors, training_target_values, validation_target_values = split_rows(feature_vectors, target_values)
```

## Docstrings
Expand Down
2 changes: 1 addition & 1 deletion docs/tutorials/getting_started.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
"execution_count": 2,
"outputs": [],
"source": [
"split_tuple = titanic.split(0.60)\n",
"split_tuple = titanic.split_rows(0.60)\n",
"\n",
"train_table = split_tuple[0]\n",
"test_table = split_tuple[1]\n",
Expand Down
10 changes: 5 additions & 5 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -695,7 +695,7 @@ def get_row(self, index: int) -> Row:
# Information
# ------------------------------------------------------------------------------------------------------------------

def summary(self) -> Table:
def summarize_statistics(self) -> Table:
"""
Return a table with a number of statistical key values.
Expand All @@ -710,7 +710,7 @@ def summary(self) -> Table:
--------
>>> from safeds.data.tabular.containers import Table
>>> table = Table.from_dict({"a": [1, 3], "b": [2, 4]})
>>> table.summary()
>>> table.summarize_statistics()
metrics a b
0 maximum 3 4
1 minimum 1 2
Expand Down Expand Up @@ -1048,7 +1048,7 @@ def filter_rows(self, query: Callable[[Row], bool]) -> Table:

_T = TypeVar("_T")

def group_by(self, key_selector: Callable[[Row], _T]) -> dict[_T, Table]:
def group_rows_by(self, key_selector: Callable[[Row], _T]) -> dict[_T, Table]:
"""
Return a dictionary with the output tables as values and the keys from the key_selector.
Expand Down Expand Up @@ -1595,7 +1595,7 @@ def sort_rows(self, comparator: Callable[[Row, Row], int]) -> Table:
rows.sort(key=functools.cmp_to_key(comparator))
return Table.from_rows(rows)

def split(self, percentage_in_first: float) -> tuple[Table, Table]:
def split_rows(self, percentage_in_first: float) -> tuple[Table, Table]:
"""
Split the table into two new tables.
Expand All @@ -1621,7 +1621,7 @@ def split(self, percentage_in_first: float) -> tuple[Table, Table]:
--------
>>> from safeds.data.tabular.containers import Table
>>> table = Table.from_dict({"temperature": [10, 15, 20, 25, 30], "sales": [54, 74, 90, 206, 210]})
>>> slices = table.split(0.4)
>>> slices = table.split_rows(0.4)
>>> slices[0]
temperature sales
0 10 54
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,6 @@
],
ids=["select by row1", "different types in column", "empty table", "table with no rows"],
)
def test_group_by(table: Table, selector: Callable, expected: dict) -> None:
out = table.group_by(selector)
def test_should_group_rows(table: Table, selector: Callable, expected: dict) -> None:
out = table.group_rows_by(selector)
assert out == expected
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def test_should_split_table(
result_train_table: Table,
percentage_in_first: int,
) -> None:
train_table, test_table = table.split(percentage_in_first)
train_table, test_table = table.split_rows(percentage_in_first)
assert result_test_table == test_table
assert result_train_table.schema == train_table.schema
assert result_train_table == train_table
Expand All @@ -52,10 +52,10 @@ def test_should_raise_if_value_not_in_range(percentage_in_first: float) -> None:
table = Table({"col1": [1, 2, 1], "col2": [1, 2, 4]})

with pytest.raises(ValueError, match=r"The given percentage is not between 0 and 1"):
table.split(percentage_in_first)
table.split_rows(percentage_in_first)


def test_should_split_empty_table() -> None:
t1, t2 = Table().split(0.4)
t1, t2 = Table().split_rows(0.4)
assert t1.number_of_rows == 0
assert t2.number_of_rows == 0
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,6 @@
],
ids=["Column of integers and Column of characters", "empty", "empty with columns", "Column of None"],
)
def test_should_make_summary(table: Table, expected: Table) -> None:
assert expected.schema == table.summary().schema
assert expected == table.summary()
def test_should_summarize_statistics(table: Table, expected: Table) -> None:
assert expected.schema == table.summarize_statistics().schema
assert expected == table.summarize_statistics()

0 comments on commit 9954986

Please sign in to comment.