Skip to content

Commit

Permalink
feat: improve table.summary. Catch ValueError thrown by `column.s…
Browse files Browse the repository at this point in the history
…tability` (#390)

Closes #320 

### Summary of Changes

The stability of a column with only null values should just be skipped
and displayed in the returned table as a minus: -

<!-- Please provide a summary of changes in this pull request, ensuring
all changes are explained. -->

---------

Co-authored-by: patrikguempel <[email protected]>
Co-authored-by: megalinter-bot <[email protected]>
  • Loading branch information
3 people authored Jun 23, 2023
1 parent d97d32a commit dbbe0e3
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 4 deletions.
6 changes: 6 additions & 0 deletions src/safeds/data/tabular/containers/_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,8 @@ def stability(self) -> float:
\frac{\text{number of occurrences of most common non-null value}}{\text{number of non-null values}}
$$
The stability cannot be calculated for a column with only null values.
Returns
-------
stability : float
Expand All @@ -516,6 +518,10 @@ def stability(self) -> float:
"""
if self._data.size == 0:
raise ColumnSizeError("> 0", "0")

if self.all(lambda x: x is None):
raise ValueError("Stability cannot be calculated for a column with only null values.")

return self._data.value_counts()[self.mode()[0]] / self._data.count()

def standard_deviation(self) -> float:
Expand Down
2 changes: 1 addition & 1 deletion src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,7 +600,7 @@ def summary(self) -> Table:
for function in statistics.values():
try:
values.append(str(function()))
except NonNumericColumnError:
except (NonNumericColumnError, ValueError):
values.append("-")

result = pd.concat([result, pd.DataFrame(values)], axis=1)
Expand Down
10 changes: 8 additions & 2 deletions tests/safeds/data/tabular/containers/_column/test_stability.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,13 @@ def test_should_return_stability_of_column(values: list[Any], expected: float) -
assert column.stability() == expected


def test_should_raise_if_column_is_empty() -> None:
def test_should_raise_column_size_error_if_column_is_empty() -> None:
column: Column[Any] = Column("A", [])
with pytest.raises(ColumnSizeError):
with pytest.raises(ColumnSizeError, match="Expected a column of size > 0 but got column of size 0."):
column.stability()


def test_should_raise_value_error_if_column_contains_only_none() -> None:
column: Column[Any] = Column("A", [None, None])
with pytest.raises(ValueError, match="Stability cannot be calculated for a column with only null values."):
column.stability()
22 changes: 21 additions & 1 deletion tests/safeds/data/tabular/containers/_table/test_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,28 @@
},
),
),
(
Table({"col": [None, None]}),
Table(
{
"metrics": [
"maximum",
"minimum",
"mean",
"mode",
"median",
"sum",
"variance",
"standard deviation",
"idness",
"stability",
],
"col": ["-", "-", "-", "[]", "-", "-", "-", "-", "0.0", "-"],
},
),
),
],
ids=["Column of integers and Column of characters", "empty", "empty with columns"],
ids=["Column of integers and Column of characters", "empty", "empty with columns", "Column of None"],
)
def test_should_make_summary(table: Table, expected: Table) -> None:
assert expected.schema == table.summary().schema
Expand Down

0 comments on commit dbbe0e3

Please sign in to comment.