Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: remove operations without replacement from tabular containers #747

Merged
merged 15 commits into from
May 10, 2024
Merged
59 changes: 4 additions & 55 deletions docs/tutorials/data_processing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -66,29 +66,7 @@
},
{
"cell_type": "markdown",
"source": [
"3. Extract a `Row` from your `Table`:"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"titanic_slice.get_row(0)"
],
"metadata": {
"collapsed": false
},
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"4. Extract a `Column` from your `Table`:"
],
"source": "3. Extract a `Column` from your `Table`:",
"metadata": {
"collapsed": false
}
Expand All @@ -106,32 +84,7 @@
},
{
"cell_type": "markdown",
"source": [
"5. Combine a list of `Row`s to a `Table` (make sure the `Row`s have the same columns):"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"Table.from_rows([\n",
" titanic_slice.get_row(0),\n",
" titanic_slice.get_row(1)\n",
"])"
],
"metadata": {
"collapsed": false
},
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"6. Combine a list of `Column`s to a `Table` (make sure the `Column`s have the same amount of rows):"
],
"source": "4. Combine a list of `Column`s to a `Table` (make sure the `Column`s have the same amount of rows):",
"metadata": {
"collapsed": false
}
Expand All @@ -152,9 +105,7 @@
},
{
"cell_type": "markdown",
"source": [
"7. Drop columns from a `Table`:"
],
"source": "5. Drop columns from a `Table`:",
"metadata": {
"collapsed": false
}
Expand All @@ -179,9 +130,7 @@
},
{
"cell_type": "markdown",
"source": [
"8. Keep only specified columns of a `Table`:"
],
"source": "6. Keep only specified columns of a `Table`:",
"metadata": {
"collapsed": false
}
Expand Down
30 changes: 2 additions & 28 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ python = "^3.11,<3.13"
apipkg = "^3.0.2"
levenshtein = ">=0.21.1,<0.26.0"
matplotlib = "^3.6.3"
openpyxl = "^3.1.2"
pandas = "^2.0.0"
pillow = ">=9.5,<11.0"
polars = {extras = ["numpy", "pyarrow"], version = "^0.20.25"}
Expand Down
25 changes: 0 additions & 25 deletions src/safeds/data/tabular/containers/_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -916,31 +916,6 @@ def standard_deviation(self) -> float:
raise NonNumericColumnError(f"{self.name} is of type {self._type}.")
return self._data.std()

def sum(self) -> float:
"""
Return the sum of the column. The column has to be numerical.

Returns
-------
sum:
The sum of all values.

Raises
------
NonNumericColumnError
If the data contains non-numerical data.

Examples
--------
>>> from safeds.data.tabular.containers import Column
>>> column = Column("test", [1, 2, 3])
>>> column.sum()
6
"""
if not self.type.is_numeric():
raise NonNumericColumnError(f"{self.name} is of type {self._type}.")
return self._data.sum()

def variance(self) -> float:
"""
Return the variance of the column. The column has to be numerical.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,7 @@ def correlation_with(self, other: ExperimentalColumn) -> float:
Calculate the Pearson correlation between this column and another column.

The Pearson correlation is a value between -1 and 1 that indicates how much the two columns are linearly related:

* A correlation of -1 indicates a perfect negative linear relationship.
* A correlation of 0 indicates no linear relationship.
* A correlation of 1 indicates a perfect positive linear relationship.
Expand Down
71 changes: 1 addition & 70 deletions src/safeds/data/tabular/containers/_row.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from __future__ import annotations

import functools
import sys
from collections.abc import Callable, Mapping
from collections.abc import Mapping
from typing import TYPE_CHECKING, Any

from safeds._utils import _structural_hash
Expand Down Expand Up @@ -470,57 +469,6 @@ def get_column_type(self, column_name: str) -> ColumnType:
"""
return self._schema.get_column_type(column_name)

# ------------------------------------------------------------------------------------------------------------------
# Transformations
# ------------------------------------------------------------------------------------------------------------------

def sort_columns(
self,
comparator: Callable[[str, Any, str, Any], int] = lambda name_1, _value_1, name_2, _value_2: (
name_1[0] > name_2[0]
)
- (name_1[0] < name_2[0]),
) -> Row:
"""
Sort the columns of a `Row` with the given comparator and return a new `Row`.

The original row is not modified. The comparator is a function with four parameters:

* `name_1` is the name of the first column.
* `value_1` is the value of the first column.
* `name_2` is the name of the second column.
* `value_2` is the value of the second column.

It should return an integer, indicating the desired order of the columns:

* If `col1` should be ordered before `col2`, the function should return a negative number.
* If `col1` should be ordered after `col2`, the function should return a positive number.
* If the original order of `col1` and `col2` should be kept, the function should return 0.

If no comparator is given, the columns will be sorted alphabetically by their name.

Parameters
----------
comparator:
The function used to compare two tuples of (ColumnName, Value).

Returns
-------
new_row:
A new row with sorted columns.
"""

def cmp(column_1: tuple[str, Any], column_2: tuple[str, Any]) -> int:
return comparator(column_1[0], column_1[1], column_2[0], column_2[1])

sorted_row_dict = dict(
sorted(
self.to_dict().items(),
key=functools.cmp_to_key(cmp),
),
)
return Row.from_dict(sorted_row_dict)

# ------------------------------------------------------------------------------------------------------------------
# Conversion
# ------------------------------------------------------------------------------------------------------------------
Expand All @@ -543,23 +491,6 @@ def to_dict(self) -> dict[str, Any]:
"""
return {column_name: self.get_value(column_name) for column_name in self.column_names}

def to_html(self) -> str:
"""
Return an HTML representation of the row.

Returns
-------
output:
The generated HTML.

Examples
--------
>>> from safeds.data.tabular.containers import Row
>>> row = Row({"a": 1, "b": 2})
>>> html = row.to_html()
"""
return self._data.to_html(max_rows=1, max_cols=self._data.shape[1])

# ------------------------------------------------------------------------------------------------------------------
# IPython integration
# ------------------------------------------------------------------------------------------------------------------
Expand Down
Loading