feat: rename drop_XY methods of Table to remove_XY (#122)

### Summary of Changes Rename the `drop_XY` methods of `Table` to `remove_XY`. When users want to get rid of columns, they will probably look for "remove", particularly, since the names of methods to include new columns and rows start with "add". --------- Co-authored-by: lars-reimann <[email protected]>
Safe-DS · Mar 30, 2023 · 98d76a4 · 98d76a4
1 parent 76a7112
commit 98d76a4
Show file tree

Hide file tree

Showing 24 changed files with 121 additions and 129 deletions.
diff --git a/docs/tutorials/data_processing.ipynb b/docs/tutorials/data_processing.ipynb
@@ -157,7 +157,7 @@
    "execution_count": null,
    "outputs": [],
    "source": [
-    "titanic_slice.drop_columns([\n",
+    "titanic_slice.remove_columns([\n",
     "    \"id\",\n",
     "    \"name\",\n",
     "    \"ticket\",\n",

diff --git a/docs/tutorials/data_visualization.ipynb b/docs/tutorials/data_visualization.ipynb
@@ -70,7 +70,7 @@
    "execution_count": null,
    "outputs": [],
    "source": [
-    "titanic_numerical = titanic.drop_columns(\n",
+    "titanic_numerical = titanic.remove_columns(\n",
     "    [\"id\", \"name\", \"sex\", \"ticket\", \"cabin\", \"port_embarked\"]\n",
     ")"
    ],

diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py
@@ -532,7 +532,60 @@ def add_rows(self, rows: Union[list[Row], Table]) -> Table:
         result.columns = self._schema.get_column_names()
         return Table(result)
 
-    def drop_columns(self, column_names: list[str]) -> Table:
+    def filter_rows(self, query: Callable[[Row], bool]) -> Table:
+        """
+        Return a table with rows filtered by Callable (e.g. lambda function).
+
+        Parameters
+        ----------
+        query : lambda function
+            A Callable that is applied to all rows.
+
+        Returns
+        -------
+        table : Table
+            A table containing only the rows filtered by the query.
+        """
+        rows: list[Row] = [row for row in self.to_rows() if query(row)]
+        if len(rows) == 0:
+            result_table = Table([], self._schema)
+        else:
+            result_table = self.from_rows(rows)
+        return result_table
+
+    def keep_only_columns(self, column_names: list[str]) -> Table:
+        """
+        Return a table with only the given column(s).
+
+        Parameters
+        ----------
+        column_names : list[str]
+            A list containing only the columns to be kept.
+
+        Returns
+        -------
+        table : Table
+            A table containing only the given column(s).
+
+        Raises
+        ------
+        ColumnNameError
+            If any of the given columns do not exist.
+        """
+        invalid_columns = []
+        column_indices = []
+        for name in column_names:
+            if not self._schema.has_column(name):
+                invalid_columns.append(name)
+            else:
+                column_indices.append(self._schema._get_column_index_by_name(name))
+        if len(invalid_columns) != 0:
+            raise UnknownColumnNameError(invalid_columns)
+        transformed_data = self._data[column_indices]
+        transformed_data.columns = list(name for name in self._schema.get_column_names() if name in column_names)
+        return Table(transformed_data)
+
+    def remove_columns(self, column_names: list[str]) -> Table:
         """
         Return a table without the given column(s).
 
@@ -564,7 +617,7 @@ def drop_columns(self, column_names: list[str]) -> Table:
         transformed_data.columns = list(name for name in self._schema.get_column_names() if name not in column_names)
         return Table(transformed_data)
 
-    def drop_columns_with_missing_values(self) -> Table:
+    def remove_columns_with_missing_values(self) -> Table:
         """
         Return a table without the columns that contain missing values.
 
@@ -575,7 +628,7 @@ def drop_columns_with_missing_values(self) -> Table:
         """
         return Table.from_columns([column for column in self.to_columns() if not column.has_missing_values()])
 
-    def drop_columns_with_non_numerical_values(self) -> Table:
+    def remove_columns_with_non_numerical_values(self) -> Table:
         """
         Return a table without the columns that contain non-numerical values.
 
@@ -587,7 +640,7 @@ def drop_columns_with_non_numerical_values(self) -> Table:
         """
         return Table.from_columns([column for column in self.to_columns() if column.type.is_numeric()])
 
-    def drop_duplicate_rows(self) -> Table:
+    def remove_duplicate_rows(self) -> Table:
         """
         Return a copy of the table with every duplicate row removed.
 
@@ -600,7 +653,7 @@ def drop_duplicate_rows(self) -> Table:
         df.columns = self._schema.get_column_names()
         return Table(df)
 
-    def drop_rows_with_missing_values(self) -> Table:
+    def remove_rows_with_missing_values(self) -> Table:
         """
         Return a table without the rows that contain missing values.
 
@@ -613,7 +666,7 @@ def drop_rows_with_missing_values(self) -> Table:
         result = result.dropna(axis="index")
         return Table(result, self._schema)
 
-    def drop_rows_with_outliers(self) -> Table:
+    def remove_rows_with_outliers(self) -> Table:
         """
         Remove all rows from the table that contain at least one outlier.
 
@@ -628,65 +681,12 @@ def drop_rows_with_outliers(self) -> Table:
         """
         copy = self._data.copy(deep=True)
 
-        table_without_nonnumericals = self.drop_columns_with_non_numerical_values()
+        table_without_nonnumericals = self.remove_columns_with_non_numerical_values()
         z_scores = np.absolute(stats.zscore(table_without_nonnumericals._data, nan_policy="omit"))
         filter_ = ((z_scores < 3) | np.isnan(z_scores)).all(axis=1)
 
         return Table(copy[filter_], self._schema)
 
-    def filter_rows(self, query: Callable[[Row], bool]) -> Table:
-        """
-        Return a table with rows filtered by Callable (e.g. lambda function).
-
-        Parameters
-        ----------
-        query : lambda function
-            A Callable that is applied to all rows.
-
-        Returns
-        -------
-        table : Table
-            A table containing only the rows filtered by the query.
-        """
-        rows: list[Row] = [row for row in self.to_rows() if query(row)]
-        if len(rows) == 0:
-            result_table = Table([], self._schema)
-        else:
-            result_table = self.from_rows(rows)
-        return result_table
-
-    def keep_only_columns(self, column_names: list[str]) -> Table:
-        """
-        Return a table with only the given column(s).
-
-        Parameters
-        ----------
-        column_names : list[str]
-            A list containing only the columns to be kept.
-
-        Returns
-        -------
-        table : Table
-            A table containing only the given column(s).
-
-        Raises
-        ------
-        ColumnNameError
-            If any of the given columns do not exist.
-        """
-        invalid_columns = []
-        column_indices = []
-        for name in column_names:
-            if not self._schema.has_column(name):
-                invalid_columns.append(name)
-            else:
-                column_indices.append(self._schema._get_column_index_by_name(name))
-        if len(invalid_columns) != 0:
-            raise UnknownColumnNameError(invalid_columns)
-        transformed_data = self._data[column_indices]
-        transformed_data.columns = list(name for name in self._schema.get_column_names() if name in column_names)
-        return Table(transformed_data)
-
     def rename_column(self, old_name: str, new_name: str) -> Table:
         """
         Rename a single column.
@@ -955,7 +955,7 @@ def correlation_heatmap(self) -> None:
         """
         Plot a correlation heatmap for all numerical columns of this `Table`.
         """
-        only_numerical = self.drop_columns_with_non_numerical_values()
+        only_numerical = self.remove_columns_with_non_numerical_values()
 
         sns.heatmap(
             data=only_numerical._data.corr(),

diff --git a/...lar/containers/_table/test_column_drop.py → .../containers/_table/test_remove_columns.py b/...lar/containers/_table/test_column_drop.py → .../containers/_table/test_remove_columns.py
@@ -4,13 +4,13 @@
 from tests.helpers import resolve_resource_path
 
 
-def test_table_column_drop() -> None:
+def test_table_remove_columns() -> None:
     table = Table.from_csv_file(resolve_resource_path("test_table_from_csv_file.csv"))
-    transformed_table = table.drop_columns(["A"])
+    transformed_table = table.remove_columns(["A"])
     assert transformed_table.schema.has_column("B") and not transformed_table.schema.has_column("A")
 
 
-def test_table_column_drop_warning() -> None:
+def test_table_remove_columns_warning() -> None:
     table = Table.from_csv_file(resolve_resource_path("test_table_from_csv_file.csv"))
     with pytest.raises(UnknownColumnNameError):
-        table.drop_columns(["C"])
+        table.remove_columns(["C"])
diff --git a/.../test_drop_columns_with_missing_values.py → ...est_remove_columns_with_missing_values.py b/.../test_drop_columns_with_missing_values.py → ...est_remove_columns_with_missing_values.py
@@ -4,7 +4,7 @@
 from safeds.data.tabular.typing import ColumnType, TableSchema
 
 
-def test_drop_columns_with_missing_values_valid() -> None:
+def test_remove_columns_with_missing_values_valid() -> None:
     table = Table(
         pd.DataFrame(
             data={
@@ -15,13 +15,11 @@ def test_drop_columns_with_missing_values_valid() -> None:
             }
         )
     )
-    updated_table = table.drop_columns_with_missing_values()
+    updated_table = table.remove_columns_with_missing_values()
     assert updated_table.get_column_names() == ["col3", "col4"]
 
 
-def test_drop_columns_with_missing_values_empty() -> None:
-    table = Table(
-        [], TableSchema({"col1": ColumnType.from_numpy_dtype(np.dtype(float))})
-    )
-    updated_table = table.drop_columns_with_missing_values()
+def test_remove_columns_with_missing_values_empty() -> None:
+    table = Table([], TableSchema({"col1": ColumnType.from_numpy_dtype(np.dtype(float))}))
+    updated_table = table.remove_columns_with_missing_values()
     assert updated_table.get_column_names() == ["col1"]
diff --git a/...drop_columns_with_non_numerical_values.py → ...move_columns_with_non_numerical_values.py b/...drop_columns_with_non_numerical_values.py → ...move_columns_with_non_numerical_values.py
@@ -4,7 +4,7 @@
 from safeds.data.tabular.typing import ColumnType, TableSchema
 
 
-def test_drop_columns_with_non_numerical_values_valid() -> None:
+def test_remove_columns_with_non_numerical_values_valid() -> None:
     table = Table(
         pd.DataFrame(
             data={
@@ -15,13 +15,11 @@ def test_drop_columns_with_non_numerical_values_valid() -> None:
             }
         )
     )
-    updated_table = table.drop_columns_with_non_numerical_values()
+    updated_table = table.remove_columns_with_non_numerical_values()
     assert updated_table.get_column_names() == ["col3", "col4"]
 
 
-def test_drop_columns_with_non_numerical_values_empty() -> None:
-    table = Table(
-        [], TableSchema({"col1": ColumnType.from_numpy_dtype(np.dtype(float))})
-    )
-    updated_table = table.drop_columns_with_non_numerical_values()
+def test_remove_columns_with_non_numerical_values_empty() -> None:
+    table = Table([], TableSchema({"col1": ColumnType.from_numpy_dtype(np.dtype(float))}))
+    updated_table = table.remove_columns_with_non_numerical_values()
     assert updated_table.get_column_names() == ["col1"]
diff --git a/...ainers/_table/test_drop_duplicate_rows.py → ...ners/_table/test_remove_duplicate_rows.py b/...ainers/_table/test_drop_duplicate_rows.py → ...ners/_table/test_remove_duplicate_rows.py
@@ -11,8 +11,8 @@
         "test_table_duplicate_rows_no_duplicates.csv",
     ],
 )
-def test_drop_duplicate_rows(path: str) -> None:
+def test_remove_duplicate_rows(path: str) -> None:
     expected_table = Table(pd.DataFrame(data={"A": [1, 4], "B": [2, 5]}))
     table = Table.from_csv_file(resolve_resource_path(path))
-    result_table = table.drop_duplicate_rows()
+    result_table = table.remove_duplicate_rows()
     assert expected_table == result_table
diff --git a/...ble/test_drop_rows_with_missing_values.py → ...e/test_remove_rows_with_missing_values.py b/...ble/test_drop_rows_with_missing_values.py → ...e/test_remove_rows_with_missing_values.py
@@ -4,7 +4,7 @@
 from safeds.data.tabular.typing import ColumnType, TableSchema
 
 
-def test_drop_rows_with_missing_values_valid() -> None:
+def test_remove_rows_with_missing_values_valid() -> None:
     table = Table(
         pd.DataFrame(
             data={
@@ -15,13 +15,11 @@ def test_drop_rows_with_missing_values_valid() -> None:
             }
         )
     )
-    updated_table = table.drop_rows_with_missing_values()
+    updated_table = table.remove_rows_with_missing_values()
     assert updated_table.count_rows() == 2
 
 
-def test_drop_rows_with_missing_values_empty() -> None:
-    table = Table(
-        [], TableSchema({"col1": ColumnType.from_numpy_dtype(np.dtype(float))})
-    )
-    updated_table = table.drop_rows_with_missing_values()
+def test_remove_rows_with_missing_values_empty() -> None:
+    table = Table([], TableSchema({"col1": ColumnType.from_numpy_dtype(np.dtype(float))}))
+    updated_table = table.remove_rows_with_missing_values()
     assert updated_table.get_column_names() == ["col1"]
diff --git a/...rs/_table/test_drop_rows_with_outliers.py → .../_table/test_remove_rows_with_outliers.py b/...rs/_table/test_drop_rows_with_outliers.py → .../_table/test_remove_rows_with_outliers.py
@@ -4,7 +4,7 @@
 from safeds.data.tabular.typing import ColumnType, TableSchema
 
 
-def test_drop_rows_with_outliers_no_outliers() -> None:
+def test_remove_rows_with_outliers_no_outliers() -> None:
     table = Table(
         pd.DataFrame(
             data={
@@ -15,13 +15,13 @@ def test_drop_rows_with_outliers_no_outliers() -> None:
         )
     )
     names = table.get_column_names()
-    result = table.drop_rows_with_outliers()
+    result = table.remove_rows_with_outliers()
     assert result.count_rows() == 3
     assert result.count_columns() == 3
     assert names == table.get_column_names()
 
 
-def test_drop_rows_with_outliers_with_outliers() -> None:
+def test_remove_rows_with_outliers_with_outliers() -> None:
     input_ = Table(
         pd.DataFrame(
             data={
@@ -44,7 +44,7 @@ def test_drop_rows_with_outliers_with_outliers() -> None:
             }
         )
     )
-    result = input_.drop_rows_with_outliers()
+    result = input_.remove_rows_with_outliers()
 
     expected = Table(
         pd.DataFrame(
@@ -59,10 +59,8 @@ def test_drop_rows_with_outliers_with_outliers() -> None:
     assert result == expected
 
 
-def test_drop_rows_with_outliers_no_rows() -> None:
-    table = Table(
-        [], TableSchema({"col1": ColumnType.from_numpy_dtype(np.dtype(float))})
-    )
-    result = table.drop_rows_with_outliers()
+def test_remove_rows_with_outliers_no_rows() -> None:
+    table = Table([], TableSchema({"col1": ColumnType.from_numpy_dtype(np.dtype(float))}))
+    result = table.remove_rows_with_outliers()
     assert result.count_rows() == 0
     assert result.count_columns() == 1
diff --git a/tests/safeds/ml/classification/test_ada_boost.py b/tests/safeds/ml/classification/test_ada_boost.py
@@ -51,8 +51,8 @@ def test_should_include_features_of_prediction_input(self, classifier: Classifie
 
     def test_should_include_complete_prediction_input(self, classifier: Classifier, valid_data: TaggedTable) -> None:
         fitted_regressor = classifier.fit(valid_data)
-        prediction = fitted_regressor.predict(valid_data.drop_columns(["target"]))
-        assert prediction.drop_columns(["target"]) == valid_data.drop_columns(["target"])
+        prediction = fitted_regressor.predict(valid_data.remove_columns(["target"]))
+        assert prediction.remove_columns(["target"]) == valid_data.remove_columns(["target"])
 
     def test_should_set_correct_target_name(self, classifier: Classifier, valid_data: TaggedTable) -> None:
         fitted_classifier = classifier.fit(valid_data)

diff --git a/tests/safeds/ml/classification/test_decision_tree.py b/tests/safeds/ml/classification/test_decision_tree.py
@@ -51,8 +51,8 @@ def test_should_include_features_of_prediction_input(self, classifier: Classifie
 
     def test_should_include_complete_prediction_input(self, classifier: Classifier, valid_data: TaggedTable) -> None:
         fitted_regressor = classifier.fit(valid_data)
-        prediction = fitted_regressor.predict(valid_data.drop_columns(["target"]))
-        assert prediction.drop_columns(["target"]) == valid_data.drop_columns(["target"])
+        prediction = fitted_regressor.predict(valid_data.remove_columns(["target"]))
+        assert prediction.remove_columns(["target"]) == valid_data.remove_columns(["target"])
 
     def test_should_set_correct_target_name(self, classifier: Classifier, valid_data: TaggedTable) -> None:
         fitted_classifier = classifier.fit(valid_data)

diff --git a/tests/safeds/ml/classification/test_gradient_boosting.py b/tests/safeds/ml/classification/test_gradient_boosting.py
@@ -51,8 +51,8 @@ def test_should_include_features_of_prediction_input(self, classifier: Classifie
 
     def test_should_include_complete_prediction_input(self, classifier: Classifier, valid_data: TaggedTable) -> None:
         fitted_regressor = classifier.fit(valid_data)
-        prediction = fitted_regressor.predict(valid_data.drop_columns(["target"]))
-        assert prediction.drop_columns(["target"]) == valid_data.drop_columns(["target"])
+        prediction = fitted_regressor.predict(valid_data.remove_columns(["target"]))
+        assert prediction.remove_columns(["target"]) == valid_data.remove_columns(["target"])
 
     def test_should_set_correct_target_name(self, classifier: Classifier, valid_data: TaggedTable) -> None:
         fitted_classifier = classifier.fit(valid_data)