From a2efcb76aa61dc1dcea152da018994d4ed04b917 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 14 May 2024 08:50:40 +0200 Subject: [PATCH 01/21] style: format with ruff --- .../data/image/containers/_image_list.py | 2 - .../data/labeled/containers/_image_dataset.py | 2 - .../transformation/_table_transformer.py | 4 +- .../ml/classical/_bases/_ada_boost_base.py | 1 - .../classical/_bases/_decision_tree_base.py | 1 - .../_bases/_gradient_boosting_base.py | 1 - .../_bases/_k_nearest_neighbors_base.py | 1 - .../classical/_bases/_random_forest_base.py | 1 - .../_bases/_support_vector_machine_base.py | 4 - src/safeds/ml/nn/_model.py | 1 - .../nn/converters/_output_converter_image.py | 4 - .../ml/nn/layers/_convolutional2d_layer.py | 1 - src/safeds/ml/nn/layers/_pooling2d_layer.py | 2 - .../data/image/containers/test_image.py | 11 +-- .../data/image/containers/test_image_list.py | 26 ------ .../data/image/typing/test_image_size.py | 7 -- .../labeled/containers/test_image_dataset.py | 83 ++++++++++++++----- .../tabular/containers/_table/test_hash.py | 6 +- .../transformation/test_one_hot_encoder.py | 1 - .../converters/test_input_converter_image.py | 4 - .../converters/test_output_converter_image.py | 6 -- .../test_output_converter_time_series.py | 3 - .../nn/layers/test_convolutional2d_layer.py | 4 - .../safeds/ml/nn/layers/test_flatten_layer.py | 4 - tests/safeds/ml/nn/layers/test_lstm_layer.py | 1 - .../ml/nn/layers/test_pooling2d_layer.py | 4 - tests/safeds/ml/nn/test_cnn_workflow.py | 3 - 27 files changed, 73 insertions(+), 115 deletions(-) diff --git a/src/safeds/data/image/containers/_image_list.py b/src/safeds/data/image/containers/_image_list.py index 635958613..b35d00e99 100644 --- a/src/safeds/data/image/containers/_image_list.py +++ b/src/safeds/data/image/containers/_image_list.py @@ -283,7 +283,6 @@ def from_files( return image_list class _FromFileThreadPackage: - def __init__( self, im_files: list[str], @@ -323,7 +322,6 @@ def __len__(self) -> int: return len(self._im_files) class _FromImageThread(Thread): - def __init__(self, packages: list[ImageList._FromFileThreadPackage]) -> None: super().__init__() self._packages = packages diff --git a/src/safeds/data/labeled/containers/_image_dataset.py b/src/safeds/data/labeled/containers/_image_dataset.py index 41af7f9f6..2beb0fbe5 100644 --- a/src/safeds/data/labeled/containers/_image_dataset.py +++ b/src/safeds/data/labeled/containers/_image_dataset.py @@ -289,7 +289,6 @@ def shuffle(self) -> ImageDataset[T]: class _TableAsTensor: - def __init__(self, table: Table) -> None: import torch @@ -345,7 +344,6 @@ def _to_table(self) -> Table: class _ColumnAsTensor: - def __init__(self, column: Column) -> None: import torch diff --git a/src/safeds/data/tabular/transformation/_table_transformer.py b/src/safeds/data/tabular/transformation/_table_transformer.py index 2968df41f..822baf6ef 100644 --- a/src/safeds/data/tabular/transformation/_table_transformer.py +++ b/src/safeds/data/tabular/transformation/_table_transformer.py @@ -139,7 +139,9 @@ def get_names_of_removed_columns(self) -> list[str]: """ def fit_and_transform( - self, table: Table, column_names: list[str] | None = None, + self, + table: Table, + column_names: list[str] | None = None, ) -> tuple[Self, Table]: """ Learn a transformation for a set of columns in a table and apply the learned transformation to the same table. diff --git a/src/safeds/ml/classical/_bases/_ada_boost_base.py b/src/safeds/ml/classical/_bases/_ada_boost_base.py index e6492a564..5264bc3a0 100644 --- a/src/safeds/ml/classical/_bases/_ada_boost_base.py +++ b/src/safeds/ml/classical/_bases/_ada_boost_base.py @@ -11,7 +11,6 @@ class _AdaBoostBase(ABC): - # ------------------------------------------------------------------------------------------------------------------ # Dunder methods # ------------------------------------------------------------------------------------------------------------------ diff --git a/src/safeds/ml/classical/_bases/_decision_tree_base.py b/src/safeds/ml/classical/_bases/_decision_tree_base.py index 6dd19d058..f71823696 100644 --- a/src/safeds/ml/classical/_bases/_decision_tree_base.py +++ b/src/safeds/ml/classical/_bases/_decision_tree_base.py @@ -7,7 +7,6 @@ class _DecisionTreeBase(ABC): - # ------------------------------------------------------------------------------------------------------------------ # Dunder methods # ------------------------------------------------------------------------------------------------------------------ diff --git a/src/safeds/ml/classical/_bases/_gradient_boosting_base.py b/src/safeds/ml/classical/_bases/_gradient_boosting_base.py index 51b4f9913..357577aeb 100644 --- a/src/safeds/ml/classical/_bases/_gradient_boosting_base.py +++ b/src/safeds/ml/classical/_bases/_gradient_boosting_base.py @@ -7,7 +7,6 @@ class _GradientBoostingBase(ABC): - # ------------------------------------------------------------------------------------------------------------------ # Dunder methods # ------------------------------------------------------------------------------------------------------------------ diff --git a/src/safeds/ml/classical/_bases/_k_nearest_neighbors_base.py b/src/safeds/ml/classical/_bases/_k_nearest_neighbors_base.py index ecb722238..70022b65a 100644 --- a/src/safeds/ml/classical/_bases/_k_nearest_neighbors_base.py +++ b/src/safeds/ml/classical/_bases/_k_nearest_neighbors_base.py @@ -7,7 +7,6 @@ class _KNearestNeighborsBase(ABC): - # ------------------------------------------------------------------------------------------------------------------ # Dunder methods # ------------------------------------------------------------------------------------------------------------------ diff --git a/src/safeds/ml/classical/_bases/_random_forest_base.py b/src/safeds/ml/classical/_bases/_random_forest_base.py index 94122e5d4..42c397b30 100644 --- a/src/safeds/ml/classical/_bases/_random_forest_base.py +++ b/src/safeds/ml/classical/_bases/_random_forest_base.py @@ -7,7 +7,6 @@ class _RandomForestBase(ABC): - # ------------------------------------------------------------------------------------------------------------------ # Dunder methods # ------------------------------------------------------------------------------------------------------------------ diff --git a/src/safeds/ml/classical/_bases/_support_vector_machine_base.py b/src/safeds/ml/classical/_bases/_support_vector_machine_base.py index 09e5577a1..fc85a4b58 100644 --- a/src/safeds/ml/classical/_bases/_support_vector_machine_base.py +++ b/src/safeds/ml/classical/_bases/_support_vector_machine_base.py @@ -117,7 +117,6 @@ def kernel(self) -> _SupportVectorMachineBase.Kernel: class _Linear(_SupportVectorMachineBase.Kernel): - # ------------------------------------------------------------------------------------------------------------------ # Dunder methods # ------------------------------------------------------------------------------------------------------------------ @@ -142,7 +141,6 @@ def _apply(self, model: SklearnSVC) -> None: class _Polynomial(_SupportVectorMachineBase.Kernel): - # ------------------------------------------------------------------------------------------------------------------ # Dunder methods # ------------------------------------------------------------------------------------------------------------------ @@ -188,7 +186,6 @@ def _apply(self, model: SklearnSVC) -> None: class _RadialBasisFunction(_SupportVectorMachineBase.Kernel): - # ------------------------------------------------------------------------------------------------------------------ # Dunder methods # ------------------------------------------------------------------------------------------------------------------ @@ -213,7 +210,6 @@ def _apply(self, model: SklearnSVC) -> None: class _Sigmoid(_SupportVectorMachineBase.Kernel): - # ------------------------------------------------------------------------------------------------------------------ # Dunder methods # ------------------------------------------------------------------------------------------------------------------ diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 6cdea9df0..3954606a3 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -493,7 +493,6 @@ def _create_internal_model( class _InternalModel(nn.Module): def __init__(self, layers: list[Layer], is_for_classification: bool) -> None: - super().__init__() self._layer_list = layers internal_layers = [] diff --git a/src/safeds/ml/nn/converters/_output_converter_image.py b/src/safeds/ml/nn/converters/_output_converter_image.py index e435a7543..959fe2caf 100644 --- a/src/safeds/ml/nn/converters/_output_converter_image.py +++ b/src/safeds/ml/nn/converters/_output_converter_image.py @@ -19,7 +19,6 @@ class _OutputConversionImage(OutputConversion[ImageList, ImageDataset], ABC): - @abstractmethod def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Any) -> ImageDataset: pass # pragma: no cover @@ -66,7 +65,6 @@ def __sizeof__(self) -> int: class OutputConversionImageToColumn(_OutputConversionImage): - def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Any) -> ImageDataset[Column]: import torch @@ -100,7 +98,6 @@ def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: class OutputConversionImageToTable(_OutputConversionImage): - def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Any) -> ImageDataset[Table]: import torch @@ -133,7 +130,6 @@ def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: class OutputConversionImageToImage(_OutputConversionImage): - def _data_conversion( self, input_data: ImageList, diff --git a/src/safeds/ml/nn/layers/_convolutional2d_layer.py b/src/safeds/ml/nn/layers/_convolutional2d_layer.py index 3adbf0d8e..70b717487 100644 --- a/src/safeds/ml/nn/layers/_convolutional2d_layer.py +++ b/src/safeds/ml/nn/layers/_convolutional2d_layer.py @@ -246,7 +246,6 @@ def __sizeof__(self) -> int: class ConvolutionalTranspose2DLayer(Convolutional2DLayer): - def __init__( self, output_channel: int, diff --git a/src/safeds/ml/nn/layers/_pooling2d_layer.py b/src/safeds/ml/nn/layers/_pooling2d_layer.py index 9a615b376..ffd6c2f9d 100644 --- a/src/safeds/ml/nn/layers/_pooling2d_layer.py +++ b/src/safeds/ml/nn/layers/_pooling2d_layer.py @@ -177,7 +177,6 @@ def __sizeof__(self) -> int: class MaxPooling2DLayer(_Pooling2DLayer): - def __init__(self, kernel_size: int, *, stride: int = -1, padding: int = 0) -> None: """ Create a maximum Pooling 2D Layer. @@ -195,7 +194,6 @@ def __init__(self, kernel_size: int, *, stride: int = -1, padding: int = 0) -> N class AveragePooling2DLayer(_Pooling2DLayer): - def __init__(self, kernel_size: int, *, stride: int = -1, padding: int = 0) -> None: """ Create a average Pooling 2D Layer. diff --git a/tests/safeds/data/image/containers/test_image.py b/tests/safeds/data/image/containers/test_image.py index 5b1e51672..ad3ae820b 100644 --- a/tests/safeds/data/image/containers/test_image.py +++ b/tests/safeds/data/image/containers/test_image.py @@ -109,7 +109,6 @@ def test_should_write_and_load_bytes_png(self, resource_path: str | Path, device @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestToNumpyArray: - @pytest.mark.parametrize( "resource_path", images_all(), @@ -192,9 +191,12 @@ def test_should_raise_if_image_has_alpha_channel(self, resource_path: str | Path image = Image.from_file(resolve_resource_path(resource_path)) with NamedTemporaryFile(suffix=".jpg") as tmp_jpeg_file: tmp_jpeg_file.close() - with Path(tmp_jpeg_file.name).open("w", encoding="utf-8") as tmp_file, pytest.raises( - IllegalFormatError, - match=r"This format is illegal. Use one of the following formats: png", + with ( + Path(tmp_jpeg_file.name).open("w", encoding="utf-8") as tmp_file, + pytest.raises( + IllegalFormatError, + match=r"This format is illegal. Use one of the following formats: png", + ), ): image.to_jpeg_file(tmp_file.name) @@ -1025,7 +1027,6 @@ def test_should_return_edges_of_image( class TestFilterEdgesKernel: - def test_should_kernel_change_device(self) -> None: assert Image._filter_edges_kernel().device == _get_device() configure_test_with_device(device_cpu) diff --git a/tests/safeds/data/image/containers/test_image_list.py b/tests/safeds/data/image/containers/test_image_list.py index 5e8f8fb96..c547742ca 100644 --- a/tests/safeds/data/image/containers/test_image_list.py +++ b/tests/safeds/data/image/containers/test_image_list.py @@ -42,7 +42,6 @@ @pytest.mark.parametrize("resource_path1", images_all(), ids=images_all_ids()) @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestAllImageCombinations: - def test_from_files(self, resource_path1: str, resource_path2: str, resource_path3: str, device: Device) -> None: # Setup configure_test_with_device(device) @@ -442,7 +441,6 @@ def test_from_files(self, resource_path1: str, resource_path2: str, resource_pat @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestFromFiles: - @pytest.mark.parametrize( "resource_path", [ @@ -569,7 +567,6 @@ def test_create_from_single_sized_image_lists( @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestToImages: - @pytest.mark.parametrize( "resource_path", [images_all(), [plane_png_path, plane_jpg_path] * 2], @@ -600,7 +597,6 @@ def test_from_files_creation(self, resource_path: list[str], device: Device) -> @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestToJpegFiles: - @pytest.mark.parametrize( "resource_path", [images_all(), [plane_png_path, plane_jpg_path]], @@ -757,7 +753,6 @@ def test_should_save_images_in_files(self, resource_path: list[str], device: Dev @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestToPngFiles: - @pytest.mark.parametrize( "resource_path", [images_all(), [plane_png_path, plane_jpg_path], [grayscale_png_path, grayscale_png_path]], @@ -842,7 +837,6 @@ def test_should_save_images_in_files(self, resource_path: list[str], device: Dev @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestShuffleImages: - @pytest.mark.parametrize( "resource_path", [images_all(), [plane_png_path, plane_jpg_path] * 2], @@ -871,7 +865,6 @@ def test_shuffle_images( @pytest.mark.parametrize("resource_path1", images_all_channel(), ids=images_all_channel_ids()) @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestTransformsEqualImageTransforms: - @pytest.mark.parametrize( ("method", "attributes"), [ @@ -1016,9 +1009,7 @@ def test_change_channel_of_tensor(self, channel_in: int, channel_out: int, devic ) @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestErrorsAndWarningsWithoutEmptyImageList: - class TestAddImageTensor: - def test_should_raise(self, resource_path: list[str], device: Device) -> None: configure_test_with_device(device) image_list = ImageList.from_files(resolve_resource_path(resource_path)) @@ -1026,14 +1017,12 @@ def test_should_raise(self, resource_path: list[str], device: Device) -> None: image_list._add_image_tensor(image_list.to_images([0])[0]._image_tensor, 0) class TestEquals: - def test_should_raise(self, resource_path: list[str], device: Device) -> None: configure_test_with_device(device) image_list_original = ImageList.from_files(resolve_resource_path(resource_path)) assert (image_list_original.__eq__(image_list_original.to_images([0]))) is NotImplemented class TestCrop: - @pytest.mark.parametrize( ("new_x", "new_y"), [(10000, 1), (1, 10000), (10000, 10000)], @@ -1057,7 +1046,6 @@ def test_should_warn_if_coordinates_outsize_image( assert torch.all(torch.eq(cropped_image_list._as_single_size_image_list()._tensor, image_blank_tensor)) class TestAdjustColorBalance: - def test_should_not_adjust_color_balance_channel_1( self, resource_path: list[str], @@ -1084,9 +1072,7 @@ def test_should_not_adjust_color_balance_channel_1( ) @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestErrorsAndWarningsWithEmptyImageList: - class TestChangeChannel: - @pytest.mark.parametrize( "channel", [-1, 0, 2, 5], @@ -1102,7 +1088,6 @@ def test_should_raise(self, resource_path: list[str], channel: int, device: Devi image_list.change_channel(channel) class TestRemoveImageByIndex: - def test_should_raise_invalid_index(self, resource_path: list[str], device: Device) -> None: configure_test_with_device(device) image_list = ImageList.from_files(resolve_resource_path(resource_path)) @@ -1112,7 +1097,6 @@ def test_should_raise_invalid_index(self, resource_path: list[str], device: Devi image_list.remove_image_by_index(len(image_list)) class TestRemoveImagesWithSize: - @pytest.mark.parametrize( ("width", "height"), [(-10, 10), (10, -10), (-10, -10)], @@ -1127,7 +1111,6 @@ def test_should_raise_negative_size( image_list.remove_images_with_size(width, height) class TestResize: - @pytest.mark.parametrize( ("new_width", "new_height"), [(-10, 10), (10, -10), (-10, -10)], @@ -1142,7 +1125,6 @@ def test_should_raise_new_size( image_list.resize(new_width, new_height) class TestCrop: - @pytest.mark.parametrize( ("new_width", "new_height"), [(-10, 1), (1, -10), (-10, -1)], @@ -1170,7 +1152,6 @@ def test_should_raise_invalid_coordinates( image_list.crop(new_x, new_y, 100, 100) class TestAddNoise: - @pytest.mark.parametrize( "standard_deviation", [-1], @@ -1187,7 +1168,6 @@ def test_should_raise_standard_deviation( assert image_list_original == image_list_clone class TestAdjustBrightness: - @pytest.mark.parametrize( "factor", [-1], @@ -1223,7 +1203,6 @@ def test_should_not_brighten( assert image_list_original == image_list_clone class TestAdjustContrast: - @pytest.mark.parametrize( "factor", [-1], @@ -1259,7 +1238,6 @@ def test_should_not_adjust( assert image_list_original == image_list_clone class TestAdjustColorBalance: - @pytest.mark.parametrize( "factor", [-1], @@ -1295,7 +1273,6 @@ def test_should_not_adjust_color_balance_factor_1( assert image_list_original == image_list_clone class TestBlur: - def test_should_raise_radius_out_of_bounds(self, resource_path: str, device: Device) -> None: configure_test_with_device(device) image_list_original = ImageList.from_files(resolve_resource_path(resource_path)) @@ -1325,7 +1302,6 @@ def test_should_not_blur(self, resource_path: str, device: Device) -> None: assert image_list_original == image_list_clone class TestSharpen: - @pytest.mark.parametrize( "factor", [-1], @@ -1363,7 +1339,6 @@ def test_should_not_adjust( @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestSingleSizeImageList: - @pytest.mark.parametrize( "tensor", [ @@ -1467,7 +1442,6 @@ def test_get_batch_and_iterate_4_dim(self, tensor: Tensor, device: Device) -> No @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestEmptyImageList: - def test_warn_empty_image_list(self, device: Device) -> None: configure_test_with_device(device) with pytest.warns( diff --git a/tests/safeds/data/image/typing/test_image_size.py b/tests/safeds/data/image/typing/test_image_size.py index 88cfe0b8e..e1e4cb0c3 100644 --- a/tests/safeds/data/image/typing/test_image_size.py +++ b/tests/safeds/data/image/typing/test_image_size.py @@ -19,7 +19,6 @@ class TestFromImage: - @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) @pytest.mark.parametrize("resource_path", images_all(), ids=images_all_ids()) def test_should_create(self, resource_path: str, device: Device) -> None: @@ -30,7 +29,6 @@ def test_should_create(self, resource_path: str, device: Device) -> None: class TestEq: - @pytest.mark.parametrize(("image_size", "width", "height", "channel"), [(ImageSize(1, 2, 3), 1, 2, 3)]) def test_should_be_equal(self, image_size: ImageSize, width: int, height: int, channel: int) -> None: assert image_size == ImageSize(width, height, channel) @@ -52,7 +50,6 @@ def test_should_be_not_implemented(self, image_size: ImageSize, other: Any) -> N class TestHash: - @pytest.mark.parametrize( "resource_path", images_all(), @@ -68,21 +65,18 @@ def test_hash_should_not_be_equal(self) -> None: class TestSizeOf: - @pytest.mark.parametrize("image_size", [ImageSize(1, 2, 3)]) def test_should_size_be_greater_than_normal_object(self, image_size: ImageSize) -> None: assert sys.getsizeof(image_size) >= sys.getsizeof(0) * 3 class TestStr: - @pytest.mark.parametrize("image_size", [ImageSize(1, 2, 3)]) def test_should_size_be_greater_than_normal_object(self, image_size: ImageSize) -> None: assert str(image_size) == f"{image_size.width}x{image_size.height}x{image_size.channel} (WxHxC)" class TestProperties: - @pytest.mark.parametrize("width", list(range(1, 5))) @pytest.mark.parametrize("height", list(range(1, 5))) @pytest.mark.parametrize("channel", [1, 3, 4]) @@ -98,7 +92,6 @@ def test_should_ignore_invalid_channel(self, channel: int) -> None: class TestErrors: - @pytest.mark.parametrize("width", [-1, 0]) def test_should_raise_invalid_width(self, width: int) -> None: with pytest.raises(OutOfBoundsError): diff --git a/tests/safeds/data/labeled/containers/test_image_dataset.py b/tests/safeds/data/labeled/containers/test_image_dataset.py index 120e18d80..51052488a 100644 --- a/tests/safeds/data/labeled/containers/test_image_dataset.py +++ b/tests/safeds/data/labeled/containers/test_image_dataset.py @@ -38,7 +38,6 @@ @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestImageDatasetInit: - @pytest.mark.parametrize( ("input_data", "output_data", "error", "error_msg"), [ @@ -100,7 +99,12 @@ class TestImageDatasetInit: ], ) def test_should_raise_with_invalid_data( - self, input_data: ImageList, output_data: T, error: type[Exception], error_msg: str, device: Device, + self, + input_data: ImageList, + output_data: T, + error: type[Exception], + error_msg: str, + device: Device, ) -> None: configure_test_with_device(device) with pytest.raises(error, match=error_msg): @@ -109,7 +113,6 @@ def test_should_raise_with_invalid_data( @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestLength: - def test_should_return_length(self, device: Device) -> None: configure_test_with_device(device) image_dataset = ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [1])) @@ -120,7 +123,6 @@ def test_should_return_length(self, device: Device) -> None: @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestEq: - @pytest.mark.parametrize( "image_dataset_output", [ @@ -131,8 +133,18 @@ class TestEq: ) def test_should_be_equal(self, image_dataset_output: str | Column | Table, device: Device) -> None: configure_test_with_device(device) - image_dataset1 = ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), ImageList.from_files(resolve_resource_path(image_dataset_output)) if isinstance(image_dataset_output, str) else image_dataset_output) # type: ignore[type-var] - image_dataset2 = ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), ImageList.from_files(resolve_resource_path(image_dataset_output)) if isinstance(image_dataset_output, str) else image_dataset_output) # type: ignore[type-var] + image_dataset1 = ImageDataset( + ImageList.from_files(resolve_resource_path(plane_png_path)), + ImageList.from_files(resolve_resource_path(image_dataset_output)) + if isinstance(image_dataset_output, str) + else image_dataset_output, + ) # type: ignore[type-var] + image_dataset2 = ImageDataset( + ImageList.from_files(resolve_resource_path(plane_png_path)), + ImageList.from_files(resolve_resource_path(image_dataset_output)) + if isinstance(image_dataset_output, str) + else image_dataset_output, + ) # type: ignore[type-var] assert image_dataset1 is not image_dataset2 assert image_dataset1 == image_dataset2 assert image_dataset1._input._tensor.device == _get_device() @@ -169,8 +181,18 @@ def test_should_not_be_equal( device: Device, ) -> None: configure_test_with_device(device) - image_dataset1 = ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), ImageList.from_files(resolve_resource_path(image_dataset1_output)) if isinstance(image_dataset1_output, str) else image_dataset1_output) # type: ignore[type-var] - image_dataset2 = ImageDataset(ImageList.from_files(resolve_resource_path(image_dataset2_input)), ImageList.from_files(resolve_resource_path(image_dataset2_output)) if isinstance(image_dataset2_output, str) else image_dataset2_output) # type: ignore[type-var] + image_dataset1 = ImageDataset( + ImageList.from_files(resolve_resource_path(plane_png_path)), + ImageList.from_files(resolve_resource_path(image_dataset1_output)) + if isinstance(image_dataset1_output, str) + else image_dataset1_output, + ) # type: ignore[type-var] + image_dataset2 = ImageDataset( + ImageList.from_files(resolve_resource_path(image_dataset2_input)), + ImageList.from_files(resolve_resource_path(image_dataset2_output)) + if isinstance(image_dataset2_output, str) + else image_dataset2_output, + ) # type: ignore[type-var] assert image_dataset1 != image_dataset2 assert image_dataset1._input._tensor.device == _get_device() assert image_dataset1._output._tensor.device == _get_device() @@ -186,7 +208,6 @@ def test_should_be_not_implemented(self, device: Device) -> None: @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestHash: - @pytest.mark.parametrize( "image_dataset_output", [ @@ -197,8 +218,18 @@ class TestHash: ) def test_hash_should_be_equal(self, image_dataset_output: str | Column | Table, device: Device) -> None: configure_test_with_device(device) - image_dataset1 = ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), ImageList.from_files(resolve_resource_path(image_dataset_output)) if isinstance(image_dataset_output, str) else image_dataset_output) # type: ignore[type-var] - image_dataset2 = ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), ImageList.from_files(resolve_resource_path(image_dataset_output)) if isinstance(image_dataset_output, str) else image_dataset_output) # type: ignore[type-var] + image_dataset1 = ImageDataset( + ImageList.from_files(resolve_resource_path(plane_png_path)), + ImageList.from_files(resolve_resource_path(image_dataset_output)) + if isinstance(image_dataset_output, str) + else image_dataset_output, + ) # type: ignore[type-var] + image_dataset2 = ImageDataset( + ImageList.from_files(resolve_resource_path(plane_png_path)), + ImageList.from_files(resolve_resource_path(image_dataset_output)) + if isinstance(image_dataset_output, str) + else image_dataset_output, + ) # type: ignore[type-var] assert image_dataset1 is not image_dataset2 assert hash(image_dataset1) == hash(image_dataset2) assert image_dataset1._input._tensor.device == _get_device() @@ -235,8 +266,18 @@ def test_hash_should_not_be_equal( device: Device, ) -> None: configure_test_with_device(device) - image_dataset1 = ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), ImageList.from_files(resolve_resource_path(image_dataset1_output)) if isinstance(image_dataset1_output, str) else image_dataset1_output) # type: ignore[type-var] - image_dataset2 = ImageDataset(ImageList.from_files(resolve_resource_path(image_dataset2_input)), ImageList.from_files(resolve_resource_path(image_dataset2_output)) if isinstance(image_dataset2_output, str) else image_dataset2_output) # type: ignore[type-var] + image_dataset1 = ImageDataset( + ImageList.from_files(resolve_resource_path(plane_png_path)), + ImageList.from_files(resolve_resource_path(image_dataset1_output)) + if isinstance(image_dataset1_output, str) + else image_dataset1_output, + ) # type: ignore[type-var] + image_dataset2 = ImageDataset( + ImageList.from_files(resolve_resource_path(image_dataset2_input)), + ImageList.from_files(resolve_resource_path(image_dataset2_output)) + if isinstance(image_dataset2_output, str) + else image_dataset2_output, + ) # type: ignore[type-var] assert hash(image_dataset1) != hash(image_dataset2) assert image_dataset1._input._tensor.device == _get_device() assert image_dataset1._output._tensor.device == _get_device() @@ -246,7 +287,6 @@ def test_hash_should_not_be_equal( @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestSizeOf: - @pytest.mark.parametrize( "image_dataset_output", [ @@ -256,10 +296,17 @@ class TestSizeOf: ], ) def test_should_size_be_greater_than_normal_object( - self, image_dataset_output: str | Column | Table, device: Device, + self, + image_dataset_output: str | Column | Table, + device: Device, ) -> None: configure_test_with_device(device) - image_dataset = ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), ImageList.from_files(resolve_resource_path(image_dataset_output)) if isinstance(image_dataset_output, str) else image_dataset_output) # type: ignore[type-var] + image_dataset = ImageDataset( + ImageList.from_files(resolve_resource_path(plane_png_path)), + ImageList.from_files(resolve_resource_path(image_dataset_output)) + if isinstance(image_dataset_output, str) + else image_dataset_output, + ) # type: ignore[type-var] assert sys.getsizeof(image_dataset) > sys.getsizeof(object()) assert image_dataset._input._tensor.device == _get_device() assert image_dataset._output._tensor.device == _get_device() @@ -267,7 +314,6 @@ def test_should_size_be_greater_than_normal_object( @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestShuffle: - def test_should_be_different_order(self, device: Device) -> None: configure_test_with_device(device) torch.manual_seed(1234) @@ -284,7 +330,6 @@ def test_should_be_different_order(self, device: Device) -> None: @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestBatch: - @pytest.mark.parametrize( ("batch_number", "batch_size"), [ @@ -320,7 +365,6 @@ def test_get_batch_device(self, device: Device) -> None: @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestTableAsTensor: - def test_should_raise_if_not_one_hot_encoded(self, device: Device) -> None: configure_test_with_device(device) with pytest.raises( @@ -350,7 +394,6 @@ def test_eq_should_be_not_implemented(self, device: Device) -> None: @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestColumnAsTensor: - @pytest.mark.parametrize( ("tensor", "one_hot_encoder", "error", "error_msg"), [ diff --git a/tests/safeds/data/tabular/containers/_table/test_hash.py b/tests/safeds/data/tabular/containers/_table/test_hash.py index 1a5144053..33db60600 100644 --- a/tests/safeds/data/tabular/containers/_table/test_hash.py +++ b/tests/safeds/data/tabular/containers/_table/test_hash.py @@ -30,11 +30,7 @@ def test_should_return_same_hash_for_equal_tables(table1: Table, table2: Table) (Table({"col1": [1, 2, 3]}), Table({"col1": ["1", "2", "3"]})), (Table({"col1": [1, 2, 3]}), Table({"col1": [1, 2, 3, 4]})), ], - ids=[ - "different column names", - "different types", - "different number of rows" - ], + ids=["different column names", "different types", "different number of rows"], ) def test_should_return_different_hash_for_unequal_tables(table1: Table, table2: Table) -> None: assert hash(table1) != hash(table2) diff --git a/tests/safeds/data/tabular/transformation/test_one_hot_encoder.py b/tests/safeds/data/tabular/transformation/test_one_hot_encoder.py index 0eec11bee..5b0ec5a9c 100644 --- a/tests/safeds/data/tabular/transformation/test_one_hot_encoder.py +++ b/tests/safeds/data/tabular/transformation/test_one_hot_encoder.py @@ -12,7 +12,6 @@ class TestEq: - def test_should_be_not_implemented(self) -> None: assert OneHotEncoder().__eq__(Table()) is NotImplemented diff --git a/tests/safeds/ml/nn/converters/test_input_converter_image.py b/tests/safeds/ml/nn/converters/test_input_converter_image.py index 15eba9a50..46822d0d0 100644 --- a/tests/safeds/ml/nn/converters/test_input_converter_image.py +++ b/tests/safeds/ml/nn/converters/test_input_converter_image.py @@ -13,7 +13,6 @@ class TestIsFitDataValid: - @pytest.mark.parametrize( ("image_dataset_valid", "image_dataset_invalid"), [ @@ -78,7 +77,6 @@ def test_should_return_false_if_fit_data_is_invalid( class TestEq: - @pytest.mark.parametrize( ("input_conversion_image1", "input_conversion_image2"), [(InputConversionImage(ImageSize(1, 2, 3)), InputConversionImage(ImageSize(1, 2, 3)))], @@ -114,7 +112,6 @@ def test_should_be_not_implemented(self) -> None: class TestHash: - @pytest.mark.parametrize( ("input_conversion_image1", "input_conversion_image2"), [(InputConversionImage(ImageSize(1, 2, 3)), InputConversionImage(ImageSize(1, 2, 3)))], @@ -145,7 +142,6 @@ def test_hash_should_not_be_equal( class TestSizeOf: - @pytest.mark.parametrize("input_conversion_image", [InputConversionImage(ImageSize(1, 2, 3))]) def test_should_size_be_greater_than_normal_object(self, input_conversion_image: InputConversionImage) -> None: assert sys.getsizeof(input_conversion_image) > sys.getsizeof(object()) diff --git a/tests/safeds/ml/nn/converters/test_output_converter_image.py b/tests/safeds/ml/nn/converters/test_output_converter_image.py index be751a1ca..584eae4cc 100644 --- a/tests/safeds/ml/nn/converters/test_output_converter_image.py +++ b/tests/safeds/ml/nn/converters/test_output_converter_image.py @@ -15,7 +15,6 @@ class TestDataConversionImage: - @pytest.mark.parametrize( ("output_conversion", "kwargs"), [ @@ -33,7 +32,6 @@ def test_should_raise_if_input_data_is_multi_size( output_conversion._data_conversion(input_data=_MultiSizeImageList(), output_data=torch.empty(1), **kwargs) class TestEq: - @pytest.mark.parametrize( ("output_conversion_image1", "output_conversion_image2"), [ @@ -65,7 +63,6 @@ def test_should_be_not_implemented(self) -> None: assert output_conversion_image_to_column.__eq__(output_conversion_image_to_image) is NotImplemented class TestHash: - @pytest.mark.parametrize( ("output_conversion_image1", "output_conversion_image2"), [ @@ -90,7 +87,6 @@ def test_hash_should_not_be_equal(self) -> None: assert hash(output_conversion_image_to_table) != hash(output_conversion_image_to_column) class TestSizeOf: - @pytest.mark.parametrize( "output_conversion_image", [ @@ -107,7 +103,6 @@ def test_should_size_be_greater_than_normal_object( class TestOutputConversionImageToColumn: - def test_should_raise_if_column_name_not_set(self) -> None: with pytest.raises( ValueError, @@ -132,7 +127,6 @@ def test_should_raise_if_one_hot_encoder_not_set(self) -> None: class TestOutputConversionImageToTable: - def test_should_raise_if_column_names_not_set(self) -> None: with pytest.raises( ValueError, diff --git a/tests/safeds/ml/nn/converters/test_output_converter_time_series.py b/tests/safeds/ml/nn/converters/test_output_converter_time_series.py index f85266d40..131c5c369 100644 --- a/tests/safeds/ml/nn/converters/test_output_converter_time_series.py +++ b/tests/safeds/ml/nn/converters/test_output_converter_time_series.py @@ -38,7 +38,6 @@ def test_output_conversion_time_series_2() -> None: class TestEq: - @pytest.mark.parametrize( ("output_conversion_ts1", "output_conversion_ts2"), [ @@ -70,7 +69,6 @@ def test_should_not_be_equal( class TestHash: - @pytest.mark.parametrize( ("output_conversion_ts1", "output_conversion_ts2"), [ @@ -96,7 +94,6 @@ def test_hash_should_not_be_equal(self) -> None: class TestSizeOf: - @pytest.mark.parametrize( "output_conversion_ts", [ diff --git a/tests/safeds/ml/nn/layers/test_convolutional2d_layer.py b/tests/safeds/ml/nn/layers/test_convolutional2d_layer.py index fdc234a52..4b8ac362e 100644 --- a/tests/safeds/ml/nn/layers/test_convolutional2d_layer.py +++ b/tests/safeds/ml/nn/layers/test_convolutional2d_layer.py @@ -8,7 +8,6 @@ class TestConvolutional2DLayer: - @pytest.mark.parametrize( ("activation_function", "activation_layer"), [("sigmoid", nn.Sigmoid), ("relu", nn.ReLU), ("softmax", nn.Softmax)], @@ -159,7 +158,6 @@ def test_should_raise_if_input_size_is_set_with_int( layer._set_input_size(1) class TestEq: - @pytest.mark.parametrize( ("conv2dlayer1", "conv2dlayer2"), [ @@ -214,7 +212,6 @@ def test_should_be_not_implemented(self) -> None: assert convtranspose2dlayer.__eq__(conv2dlayer) is NotImplemented class TestHash: - @pytest.mark.parametrize( ("conv2dlayer1", "conv2dlayer2"), [ @@ -265,7 +262,6 @@ def test_hash_should_not_be_equal( assert hash(conv2dlayer1) != hash(conv2dlayer2) class TestSizeOf: - @pytest.mark.parametrize( "conv2dlayer", [ diff --git a/tests/safeds/ml/nn/layers/test_flatten_layer.py b/tests/safeds/ml/nn/layers/test_flatten_layer.py index 9e998ddc1..b034a7620 100644 --- a/tests/safeds/ml/nn/layers/test_flatten_layer.py +++ b/tests/safeds/ml/nn/layers/test_flatten_layer.py @@ -8,7 +8,6 @@ class TestFlattenLayer: - def test_should_create_flatten_layer(self) -> None: layer = FlattenLayer() input_size = ImageSize(10, 20, 30, _ignore_invalid_channel=True) @@ -33,7 +32,6 @@ def test_should_raise_if_input_size_is_set_with_int(self) -> None: layer._set_input_size(1) class TestEq: - def test_should_be_equal(self) -> None: assert FlattenLayer() == FlattenLayer() @@ -41,11 +39,9 @@ def test_should_be_not_implemented(self) -> None: assert FlattenLayer().__eq__(Table()) is NotImplemented class TestHash: - def test_hash_should_be_equal(self) -> None: assert hash(FlattenLayer()) == hash(FlattenLayer()) class TestSizeOf: - def test_should_size_be_greater_than_normal_object(self) -> None: assert sys.getsizeof(FlattenLayer()) > sys.getsizeof(object()) diff --git a/tests/safeds/ml/nn/layers/test_lstm_layer.py b/tests/safeds/ml/nn/layers/test_lstm_layer.py index 386cfd9db..dd23d3191 100644 --- a/tests/safeds/ml/nn/layers/test_lstm_layer.py +++ b/tests/safeds/ml/nn/layers/test_lstm_layer.py @@ -29,7 +29,6 @@ def test_should_raise_if_input_size_out_of_bounds(input_size: int) -> None: ids=["one", "twenty"], ) def test_should_raise_if_input_size_doesnt_match(input_size: int) -> None: - assert LSTMLayer(output_size=1, input_size=input_size).input_size == input_size diff --git a/tests/safeds/ml/nn/layers/test_pooling2d_layer.py b/tests/safeds/ml/nn/layers/test_pooling2d_layer.py index e5dc243f5..4c5fcb6e3 100644 --- a/tests/safeds/ml/nn/layers/test_pooling2d_layer.py +++ b/tests/safeds/ml/nn/layers/test_pooling2d_layer.py @@ -10,7 +10,6 @@ class TestPooling2DLayer: - @pytest.mark.parametrize( ("strategy", "torch_layer"), [ @@ -58,7 +57,6 @@ def test_should_raise_if_input_size_is_set_with_int(self, strategy: Literal["max layer._set_input_size(1) class TestEq: - @pytest.mark.parametrize( ("pooling_2d_layer_1", "pooling_2d_layer_2"), [ @@ -114,7 +112,6 @@ def test_should_be_not_implemented(self) -> None: assert avg_pooling_2d_layer.__eq__(max_pooling_2d_layer) is NotImplemented class TestHash: - @pytest.mark.parametrize( ("pooling_2d_layer_1", "pooling_2d_layer_2"), [ @@ -161,7 +158,6 @@ def test_hash_should_not_be_equal( assert hash(pooling_2d_layer_1) != hash(pooling_2d_layer_2) class TestSizeOf: - @pytest.mark.parametrize( "pooling_2d_layer", [ diff --git a/tests/safeds/ml/nn/test_cnn_workflow.py b/tests/safeds/ml/nn/test_cnn_workflow.py index c4e581a3e..f927cd88b 100644 --- a/tests/safeds/ml/nn/test_cnn_workflow.py +++ b/tests/safeds/ml/nn/test_cnn_workflow.py @@ -36,7 +36,6 @@ class TestImageToTableClassifier: - @pytest.mark.parametrize( ("seed", "device", "prediction_label"), [ @@ -104,7 +103,6 @@ def test_should_train_and_predict_model( class TestImageToColumnClassifier: - @pytest.mark.parametrize( ("seed", "device", "prediction_label"), [ @@ -171,7 +169,6 @@ def test_should_train_and_predict_model( class TestImageToImageRegressor: - @pytest.mark.parametrize( ("seed", "device"), [ From 3aaedba9524154fbab57f6e087bb47a8d4806f24 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 14 May 2024 09:13:54 +0200 Subject: [PATCH 02/21] test: fix some failing tests --- src/safeds/data/tabular/containers/_table.py | 5 +++++ .../tabular/containers/_table/test_remove_duplicate_rows.py | 3 ++- .../containers/_table/test_remove_rows_with_outliers.py | 2 -- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index d05c433cf..8c1d13825 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -1033,6 +1033,9 @@ def remove_duplicate_rows(self) -> Table: | 2 | 5 | +-----+-----+ """ + if self.number_of_columns == 0: + return self # Workaround for https://github.com/pola-rs/polars/issues/16207 + return Table._from_polars_lazy_frame( self._lazy_frame.unique(maintain_order=True), ) @@ -1221,6 +1224,8 @@ def remove_rows_with_outliers( | null | 8 | +------+-----+ """ + if self.number_of_rows == 0: + return self # polars raises a ComputeError for tables without rows if column_names is None: column_names = self.column_names diff --git a/tests/safeds/data/tabular/containers/_table/test_remove_duplicate_rows.py b/tests/safeds/data/tabular/containers/_table/test_remove_duplicate_rows.py index 3a3470cd8..186563154 100644 --- a/tests/safeds/data/tabular/containers/_table/test_remove_duplicate_rows.py +++ b/tests/safeds/data/tabular/containers/_table/test_remove_duplicate_rows.py @@ -15,8 +15,9 @@ Table({"A": [1, 4], "B": [2, 5]}), ), (Table(), Table()), + (Table({"col1": []}), Table({"col1": []})), ], - ids=["duplicate rows", "empty"], + ids=["duplicate rows", "empty", "no rows"], ) def test_should_remove_duplicate_rows(table: Table, expected: Table) -> None: result_table = table.remove_duplicate_rows() diff --git a/tests/safeds/data/tabular/containers/_table/test_remove_rows_with_outliers.py b/tests/safeds/data/tabular/containers/_table/test_remove_rows_with_outliers.py index 9ebf37167..70642e501 100644 --- a/tests/safeds/data/tabular/containers/_table/test_remove_rows_with_outliers.py +++ b/tests/safeds/data/tabular/containers/_table/test_remove_rows_with_outliers.py @@ -261,6 +261,4 @@ ) def test_should_remove_rows_with_outliers(table: Table, expected: Table) -> None: updated_table = table.remove_rows_with_outliers() - assert updated_table.schema == expected.schema - assert updated_table.number_of_rows == expected.number_of_rows assert updated_table == expected From 503af37480b0be810b2ffc7a5fc12e089502dc14 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 14 May 2024 09:24:49 +0200 Subject: [PATCH 03/21] docs: split_rows shuffles by default --- src/safeds/data/tabular/containers/_table.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 8c1d13825..dad1ab991 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -1445,7 +1445,10 @@ def split_rows( The first table contains a percentage of the rows specified by `percentage_in_first`, and the second table contains the remaining rows. - **Note:** The original table is not modified. + **Notes:** + + - The original table is not modified. + - By default, the rows are shuffled before splitting. You can disable this by setting `shuffle` to False. Parameters ---------- From 213a109d07120a20e28d72caa9b4381897afc57e Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 14 May 2024 10:39:09 +0200 Subject: [PATCH 04/21] WIP --- src/safeds/data/tabular/containers/_table.py | 2 +- .../transformation/_table_transformer.py | 128 +++++++++--------- src/safeds/ml/classical/_supervised_model.py | 2 +- 3 files changed, 64 insertions(+), 68 deletions(-) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index dad1ab991..d5c590c0f 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -323,7 +323,7 @@ def __init__(self, data: Mapping[str, Sequence[Any]] | None = None) -> None: # Implementation self._lazy_frame: pl.LazyFrame = pl.LazyFrame(data) - self.__data_frame_cache: pl.DataFrame | None = None + self.__data_frame_cache: pl.DataFrame | None = None # Scramble the name to prevent access from outside def __eq__(self, other: object) -> bool: if not isinstance(other, Table): diff --git a/src/safeds/data/tabular/transformation/_table_transformer.py b/src/safeds/data/tabular/transformation/_table_transformer.py index 822baf6ef..fdfa4026f 100644 --- a/src/safeds/data/tabular/transformation/_table_transformer.py +++ b/src/safeds/data/tabular/transformation/_table_transformer.py @@ -7,6 +7,7 @@ if TYPE_CHECKING: from safeds.data.tabular.containers import Table + from safeds.data.tabular.typing import Schema class TableTransformer(ABC): @@ -16,31 +17,43 @@ class TableTransformer(ABC): # Dunder methods # ------------------------------------------------------------------------------------------------------------------ - def __hash__(self) -> int: - """ - Return a deterministic hash value for a table transformer. + # The decorator is needed so the class really cannot be instantiated + @abstractmethod + def __init__(self) -> None: + # Schema of input table + self._input: Schema | None = None - Returns - ------- - hash: - The hash value. - """ - added = self.get_names_of_added_columns() if self.is_fitted else [] - changed = self.get_names_of_changed_columns() if self.is_fitted else [] - removed = self.get_names_of_removed_columns() if self.is_fitted else [] - return _structural_hash(self.__class__.__qualname__, self.is_fitted, added, changed, removed) + # Schema of added columns + self._added: Schema | None = None + + # Map of column names to the schema of their replacements + self._replaced: dict[str, Schema] | None = None + + # Names of columns that were removed + self._removed: list[str] | None = None + + # The decorator ensures that the method is overridden in all subclasses + @abstractmethod + def __hash__(self) -> int: + return _structural_hash( + self.__class__.__qualname__, + self._input, + self._added, + self._replaced, + self._removed, + ) # ------------------------------------------------------------------------------------------------------------------ # Properties # ------------------------------------------------------------------------------------------------------------------ @property - @abstractmethod def is_fitted(self) -> bool: """Whether the transformer is fitted.""" + return None not in (self._input, self._added, self._replaced, self._removed) # ------------------------------------------------------------------------------------------------------------------ - # Methods + # Learning and transformation # ------------------------------------------------------------------------------------------------------------------ @abstractmethod @@ -86,58 +99,6 @@ def transform(self, table: Table) -> Table: If the transformer has not been fitted yet. """ - # ------------------------------------------------------------------------------------------------------------------ - # Introspection - # ------------------------------------------------------------------------------------------------------------------ - - @abstractmethod - def get_names_of_added_columns(self) -> list[str]: - """ - Get the names of all new columns that have been added by the transformer. - - Returns - ------- - added_columns: - A list of names of the added columns, ordered as they will appear in the table. - - Raises - ------ - TransformerNotFittedError - If the transformer has not been fitted yet. - """ - - @abstractmethod - def get_names_of_changed_columns(self) -> list[str]: - """ - Get the names of all columns that have been changed by the transformer. - - Returns - ------- - changed_columns: - A list of names of changed columns, ordered as they appear in the table. - - Raises - ------ - TransformerNotFittedError - If the transformer has not been fitted yet. - """ - - @abstractmethod - def get_names_of_removed_columns(self) -> list[str]: - """ - Get the names of all columns that have been removed by the transformer. - - Returns - ------- - removed_columns: - A list of names of the removed columns, ordered as they appear in the table the transformer was fitted on. - - Raises - ------ - TransformerNotFittedError - If the transformer has not been fitted yet. - """ - def fit_and_transform( self, table: Table, @@ -165,3 +126,38 @@ def fit_and_transform( fitted_transformer = self.fit(table, column_names) transformed_table = fitted_transformer.transform(table) return fitted_transformer, transformed_table + + # ------------------------------------------------------------------------------------------------------------------ + # Template methods + # ------------------------------------------------------------------------------------------------------------------ + + def _check_additional_fit_preconditions(self, table: Table) -> None: # noqa: B027 + """ + Check additional preconditions for fitting the transformer and raise an error if any are violated. + + Parameters + ---------- + table: + The table used to fit the transformer. + """ + + def _check_additional_transform_preconditions(self, table: Table) -> None: # noqa: B027 + """ + Check additional preconditions for transforming with the transformer and raise an error if any are violated. + + Parameters + ---------- + table: + The table to which the learned transformation is applied. + """ + + @abstractmethod + def _clone(self) -> Self: + """ + Return a new instance of this transformer with the same settings. + + Returns + ------- + clone: + A new instance of this transformer. + """ diff --git a/src/safeds/ml/classical/_supervised_model.py b/src/safeds/ml/classical/_supervised_model.py index 7824cf9e9..c4658bfc1 100644 --- a/src/safeds/ml/classical/_supervised_model.py +++ b/src/safeds/ml/classical/_supervised_model.py @@ -59,7 +59,7 @@ def is_fitted(self) -> bool: return None not in (self._feature_schema, self._target_name, self._target_type, self._wrapped_model) # ------------------------------------------------------------------------------------------------------------------ - # Machine learning + # Learning and prediction # ------------------------------------------------------------------------------------------------------------------ def fit(self, training_set: TabularDataset) -> Self: From 85c34530e3fe84b8721a142e6d14e91181413f8b Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 14 May 2024 12:54:36 +0200 Subject: [PATCH 05/21] feat: simplify interface We can later add introspection again once we know what we actually want. --- .../transformation/_table_transformer.py | 58 +------------------ 1 file changed, 2 insertions(+), 56 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_table_transformer.py b/src/safeds/data/tabular/transformation/_table_transformer.py index fdfa4026f..366187d96 100644 --- a/src/safeds/data/tabular/transformation/_table_transformer.py +++ b/src/safeds/data/tabular/transformation/_table_transformer.py @@ -7,7 +7,6 @@ if TYPE_CHECKING: from safeds.data.tabular.containers import Table - from safeds.data.tabular.typing import Schema class TableTransformer(ABC): @@ -17,30 +16,12 @@ class TableTransformer(ABC): # Dunder methods # ------------------------------------------------------------------------------------------------------------------ - # The decorator is needed so the class really cannot be instantiated - @abstractmethod - def __init__(self) -> None: - # Schema of input table - self._input: Schema | None = None - - # Schema of added columns - self._added: Schema | None = None - - # Map of column names to the schema of their replacements - self._replaced: dict[str, Schema] | None = None - - # Names of columns that were removed - self._removed: list[str] | None = None - # The decorator ensures that the method is overridden in all subclasses @abstractmethod def __hash__(self) -> int: return _structural_hash( self.__class__.__qualname__, - self._input, - self._added, - self._replaced, - self._removed, + self.is_fitted, ) # ------------------------------------------------------------------------------------------------------------------ @@ -48,9 +29,9 @@ def __hash__(self) -> int: # ------------------------------------------------------------------------------------------------------------------ @property + @abstractmethod def is_fitted(self) -> bool: """Whether the transformer is fitted.""" - return None not in (self._input, self._added, self._replaced, self._removed) # ------------------------------------------------------------------------------------------------------------------ # Learning and transformation @@ -126,38 +107,3 @@ def fit_and_transform( fitted_transformer = self.fit(table, column_names) transformed_table = fitted_transformer.transform(table) return fitted_transformer, transformed_table - - # ------------------------------------------------------------------------------------------------------------------ - # Template methods - # ------------------------------------------------------------------------------------------------------------------ - - def _check_additional_fit_preconditions(self, table: Table) -> None: # noqa: B027 - """ - Check additional preconditions for fitting the transformer and raise an error if any are violated. - - Parameters - ---------- - table: - The table used to fit the transformer. - """ - - def _check_additional_transform_preconditions(self, table: Table) -> None: # noqa: B027 - """ - Check additional preconditions for transforming with the transformer and raise an error if any are violated. - - Parameters - ---------- - table: - The table to which the learned transformation is applied. - """ - - @abstractmethod - def _clone(self) -> Self: - """ - Return a new instance of this transformer with the same settings. - - Returns - ------- - clone: - A new instance of this transformer. - """ From 0cb2fa84dde8788cf3acc7d4372800898b065fe2 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 14 May 2024 15:01:06 +0200 Subject: [PATCH 06/21] WIP --- .../tabular/transformation/_discretizer.py | 74 ++++--------------- .../tabular/transformation/_label_encoder.py | 63 +--------------- .../transformation/_one_hot_encoder.py | 66 +---------------- .../tabular/transformation/_range_scaler.py | 74 ++----------------- .../tabular/transformation/_simple_imputer.py | 65 ++-------------- .../transformation/_standard_scaler.py | 57 +------------- .../transformation/_table_transformer.py | 9 ++- 7 files changed, 41 insertions(+), 367 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_discretizer.py b/src/safeds/data/tabular/transformation/_discretizer.py index 614a8cbff..e82ec2590 100644 --- a/src/safeds/data/tabular/transformation/_discretizer.py +++ b/src/safeds/data/tabular/transformation/_discretizer.py @@ -30,13 +30,26 @@ class Discretizer(TableTransformer): If the given number_of_bins is less than 2. """ + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + def __init__(self, number_of_bins: int = 5): + super().__init__() + _check_bounds("number_of_bins", number_of_bins, lower_bound=_ClosedBound(2)) - self._column_names: list[str] | None = None self._wrapped_transformer: sk_KBinsDiscretizer | None = None self._number_of_bins = number_of_bins + # ------------------------------------------------------------------------------------------------------------------ + # Properties + # ------------------------------------------------------------------------------------------------------------------ + + @property + def number_of_bins(self) -> int: + return self._number_of_bins + def fit(self, table: Table, column_names: list[str] | None) -> Discretizer: """ Learn a transformation for a set of columns in a table. @@ -137,62 +150,3 @@ def transform(self, table: Table) -> Table: return Table._from_polars_lazy_frame( table._lazy_frame.update(new_data.lazy()), ) - - @property - def is_fitted(self) -> bool: - """Whether the transformer is fitted.""" - return self._wrapped_transformer is not None - - def get_names_of_added_columns(self) -> list[str]: - """ - Get the names of all new columns that have been added by the Discretizer. - - Returns - ------- - added_columns: - A list of names of the added columns, ordered as they will appear in the table. - - Raises - ------ - TransformerNotFittedError - If the transformer has not been fitted yet. - """ - if not self.is_fitted: - raise TransformerNotFittedError - return [] - - def get_names_of_changed_columns(self) -> list[str]: - """ - Get the names of all columns that may have been changed by the Discretizer. - - Returns - ------- - changed_columns: - The list of (potentially) changed column names, as passed to fit. - - Raises - ------ - TransformerNotFittedError - If the transformer has not been fitted yet. - """ - if self._column_names is None: - raise TransformerNotFittedError - return self._column_names - - def get_names_of_removed_columns(self) -> list[str]: - """ - Get the names of all columns that have been removed by the Discretizer. - - Returns - ------- - removed_columns: - A list of names of the removed columns, ordered as they appear in the table the Discretizer was fitted on. - - Raises - ------ - TransformerNotFittedError - If the transformer has not been fitted yet. - """ - if not self.is_fitted: - raise TransformerNotFittedError - return [] diff --git a/src/safeds/data/tabular/transformation/_label_encoder.py b/src/safeds/data/tabular/transformation/_label_encoder.py index 11f788dbb..75627c5ac 100644 --- a/src/safeds/data/tabular/transformation/_label_encoder.py +++ b/src/safeds/data/tabular/transformation/_label_encoder.py @@ -17,8 +17,9 @@ class LabelEncoder(InvertibleTableTransformer): """The LabelEncoder encodes one or more given columns into labels.""" def __init__(self) -> None: + super().__init__() + self._wrapped_transformer: sk_OrdinalEncoder | None = None - self._column_names: list[str] | None = None def fit(self, table: Table, column_names: list[str] | None) -> LabelEncoder: """ @@ -179,63 +180,3 @@ def inverse_transform(self, transformed_table: Table) -> Table: return Table._from_polars_lazy_frame( transformed_table._lazy_frame.update(new_data.lazy()), ) - - @property - def is_fitted(self) -> bool: - """Whether the transformer is fitted.""" - return self._wrapped_transformer is not None - - def get_names_of_added_columns(self) -> list[str]: - """ - Get the names of all new columns that have been added by the LabelEncoder. - - Returns - ------- - added_columns: - A list of names of the added columns, ordered as they will appear in the table. - - Raises - ------ - TransformerNotFittedError - If the transformer has not been fitted yet. - """ - if not self.is_fitted: - raise TransformerNotFittedError - return [] - - # (Must implement abstract method, cannot instantiate class otherwise.) - def get_names_of_changed_columns(self) -> list[str]: - """ - Get the names of all columns that may have been changed by the LabelEncoder. - - Returns - ------- - changed_columns: - The list of (potentially) changed column names, as passed to fit. - - Raises - ------ - TransformerNotFittedError - If the transformer has not been fitted yet. - """ - if self._column_names is None: - raise TransformerNotFittedError - return self._column_names - - def get_names_of_removed_columns(self) -> list[str]: - """ - Get the names of all columns that have been removed by the LabelEncoder. - - Returns - ------- - removed_columns: - A list of names of the removed columns, ordered as they appear in the table the LabelEncoder was fitted on. - - Raises - ------ - TransformerNotFittedError - If the transformer has not been fitted yet. - """ - if not self.is_fitted: - raise TransformerNotFittedError - return [] diff --git a/src/safeds/data/tabular/transformation/_one_hot_encoder.py b/src/safeds/data/tabular/transformation/_one_hot_encoder.py index 8b56fda61..f43a8425c 100644 --- a/src/safeds/data/tabular/transformation/_one_hot_encoder.py +++ b/src/safeds/data/tabular/transformation/_one_hot_encoder.py @@ -57,6 +57,8 @@ class OneHotEncoder(InvertibleTableTransformer): """ def __init__(self) -> None: + super().__init__() + # Maps each old column to (list of) new columns created from it: self._column_names: dict[str, list[str]] | None = None # Maps concrete values (tuples of old column and value) to corresponding new column names: @@ -310,67 +312,3 @@ def inverse_transform(self, transformed_table: Table) -> Table: # Drop old column names: table = table.remove_columns(list(self._value_to_column.values())) return table.remove_columns(list(self._value_to_column_nans.values())) - - @property - def is_fitted(self) -> bool: - """Whether the transformer is fitted.""" - return ( - self._column_names is not None - and self._value_to_column is not None - and self._value_to_column_nans is not None - ) - - def get_names_of_added_columns(self) -> list[str]: - """ - Get the names of all new columns that have been added by the OneHotEncoder. - - Returns - ------- - added_columns: - A list of names of the added columns, ordered as they will appear in the table. - - Raises - ------ - TransformerNotFittedError - If the transformer has not been fitted yet. - """ - if self._column_names is None: - raise TransformerNotFittedError - return [name for column_names in self._column_names.values() for name in column_names] - - # (Must implement abstract method, cannot instantiate class otherwise.) - def get_names_of_changed_columns(self) -> list[str]: - """ - Get the names of all columns that have been changed by the OneHotEncoder (none). - - Returns - ------- - changed_columns: - The empty list. - - Raises - ------ - TransformerNotFittedError - If the transformer has not been fitted yet. - """ - if not self.is_fitted: - raise TransformerNotFittedError - return [] - - def get_names_of_removed_columns(self) -> list[str]: - """ - Get the names of all columns that have been removed by the OneHotEncoder. - - Returns - ------- - removed_columns: - A list of names of the removed columns, ordered as they appear in the table the OneHotEncoder was fitted on. - - Raises - ------ - TransformerNotFittedError - If the transformer has not been fitted yet. - """ - if self._column_names is None: - raise TransformerNotFittedError - return list(self._column_names.keys()) diff --git a/src/safeds/data/tabular/transformation/_range_scaler.py b/src/safeds/data/tabular/transformation/_range_scaler.py index 0a0d0a1a2..7e3eec5f3 100644 --- a/src/safeds/data/tabular/transformation/_range_scaler.py +++ b/src/safeds/data/tabular/transformation/_range_scaler.py @@ -1,6 +1,5 @@ from __future__ import annotations -from typing import TYPE_CHECKING from safeds._validation import _check_columns_exist from safeds.data.tabular.containers import Table @@ -8,8 +7,6 @@ from ._invertible_table_transformer import InvertibleTableTransformer -if TYPE_CHECKING: - from sklearn.preprocessing import MinMaxScaler as sk_MinMaxScaler class RangeScaler(InvertibleTableTransformer): @@ -29,11 +26,16 @@ class RangeScaler(InvertibleTableTransformer): If the given minimum is greater or equal to the given maximum """ + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + def __init__(self, min_: float = 0.0, max_: float = 1.0): - self._column_names: list[str] | None = None - self._wrapped_transformer: sk_MinMaxScaler | None = None + super().__init__() + if min_ >= max_: - raise ValueError('Parameter "maximum" must be higher than parameter "minimum".') + raise ValueError('Parameter "max_" must be greate than parameter "min_".') + self._minimum = min_ self._maximum = max_ @@ -228,63 +230,3 @@ def inverse_transform(self, transformed_table: Table) -> Table: return Table._from_polars_data_frame( transformed_table._data_frame.update(new_data), ) - - @property - def is_fitted(self) -> bool: - """Whether the transformer is fitted.""" - return self._wrapped_transformer is not None - - def get_names_of_added_columns(self) -> list[str]: - """ - Get the names of all new columns that have been added by the RangeScaler. - - Returns - ------- - added_columns: - A list of names of the added columns, ordered as they will appear in the table. - - Raises - ------ - TransformerNotFittedError - If the transformer has not been fitted yet. - """ - if not self.is_fitted: - raise TransformerNotFittedError - return [] - - # (Must implement abstract method, cannot instantiate class otherwise.) - def get_names_of_changed_columns(self) -> list[str]: - """ - Get the names of all columns that may have been changed by the RangeScaler. - - Returns - ------- - changed_columns: - The list of (potentially) changed column names, as passed to fit. - - Raises - ------ - TransformerNotFittedError - If the transformer has not been fitted yet. - """ - if self._column_names is None: - raise TransformerNotFittedError - return self._column_names - - def get_names_of_removed_columns(self) -> list[str]: - """ - Get the names of all columns that have been removed by the RangeScaler. - - Returns - ------- - removed_columns: - A list of names of the removed columns, ordered as they appear in the table the RangeScaler was fitted on. - - Raises - ------ - TransformerNotFittedError - If the transformer has not been fitted yet. - """ - if not self.is_fitted: - raise TransformerNotFittedError - return [] diff --git a/src/safeds/data/tabular/transformation/_simple_imputer.py b/src/safeds/data/tabular/transformation/_simple_imputer.py index ac0e1ea93..79dcc5fdd 100644 --- a/src/safeds/data/tabular/transformation/_simple_imputer.py +++ b/src/safeds/data/tabular/transformation/_simple_imputer.py @@ -88,6 +88,8 @@ def Mode() -> SimpleImputer.Strategy: # noqa: N802 return _Mode() def __init__(self, strategy: SimpleImputer.Strategy, *, value_to_replace: float | str | None = None): + super().__init__() + if value_to_replace is None: value_to_replace = pd.NA @@ -97,6 +99,10 @@ def __init__(self, strategy: SimpleImputer.Strategy, *, value_to_replace: float self._wrapped_transformer: sk_SimpleImputer | None = None self._column_names: list[str] | None = None + @property + def is_fitted(self) -> bool: + return self._wrapped_transformer is not None + @property def strategy(self) -> SimpleImputer.Strategy: """The strategy used to replace missing values.""" @@ -107,11 +113,6 @@ def value_to_replace(self) -> Any: """The value that should be replaced.""" return self._value_to_replace - @property - def is_fitted(self) -> bool: - """Whether the transformer is fitted.""" - return self._wrapped_transformer is not None - def fit(self, table: Table, column_names: list[str] | None) -> SimpleImputer: """ Learn a transformation for a set of columns in a table. @@ -233,60 +234,6 @@ def transform(self, table: Table) -> Table: table._lazy_frame.update(new_data.lazy()), ) - def get_names_of_added_columns(self) -> list[str]: - """ - Get the names of all new columns that have been added by the Imputer. - - Returns - ------- - added_columns: - A list of names of the added columns, ordered as they will appear in the table. - - Raises - ------ - TransformerNotFittedError - If the transformer has not been fitted yet. - """ - if not self.is_fitted: - raise TransformerNotFittedError - return [] - - def get_names_of_changed_columns(self) -> list[str]: - """ - Get the names of all columns that may have been changed by the Imputer. - - Returns - ------- - changed_columns: - The list of (potentially) changed column names, as passed to fit. - - Raises - ------ - TransformerNotFittedError - If the transformer has not been fitted yet. - """ - if self._column_names is None: - raise TransformerNotFittedError - return self._column_names - - def get_names_of_removed_columns(self) -> list[str]: - """ - Get the names of all columns that have been removed by the Imputer. - - Returns - ------- - removed_columns: - A list of names of the removed columns, ordered as they appear in the table the Imputer was fitted on. - - Raises - ------ - TransformerNotFittedError - If the transformer has not been fitted yet. - """ - if not self.is_fitted: - raise TransformerNotFittedError - return [] - # ---------------------------------------------------------------------------------------------------------------------- # Imputation strategies diff --git a/src/safeds/data/tabular/transformation/_standard_scaler.py b/src/safeds/data/tabular/transformation/_standard_scaler.py index 8a8122eb1..668b7f295 100644 --- a/src/safeds/data/tabular/transformation/_standard_scaler.py +++ b/src/safeds/data/tabular/transformation/_standard_scaler.py @@ -16,6 +16,8 @@ class StandardScaler(InvertibleTableTransformer): """The StandardScaler transforms column values to a range by removing the mean and scaling to unit variance.""" def __init__(self) -> None: + super().__init__() + self._column_names: list[str] | None = None self._wrapped_transformer: sk_StandardScaler | None = None @@ -204,59 +206,4 @@ def inverse_transform(self, transformed_table: Table) -> Table: @property def is_fitted(self) -> bool: - """Whether the transformer is fitted.""" return self._wrapped_transformer is not None - - def get_names_of_added_columns(self) -> list[str]: - """ - Get the names of all new columns that have been added by the StandardScaler. - - Returns - ------- - added_columns: - A list of names of the added columns, ordered as they will appear in the table. - - Raises - ------ - TransformerNotFittedError - If the transformer has not been fitted yet. - """ - if not self.is_fitted: - raise TransformerNotFittedError - return [] - - def get_names_of_changed_columns(self) -> list[str]: - """ - Get the names of all columns that may have been changed by the StandardScaler. - - Returns - ------- - changed_columns: - The list of (potentially) changed column names, as passed to fit. - - Raises - ------ - TransformerNotFittedError - If the transformer has not been fitted yet. - """ - if self._column_names is None: - raise TransformerNotFittedError - return self._column_names - - def get_names_of_removed_columns(self) -> list[str]: - """ - Get the names of all columns that have been removed by the StandardScaler. - - Returns - ------- - removed_columns: - A list of names of the removed columns, ordered as they appear in the table the StandardScaler was fitted on. - - Raises - ------ - TransformerNotFittedError - If the transformer has not been fitted yet. - """ - if not self.is_fitted: - raise TransformerNotFittedError - return [] diff --git a/src/safeds/data/tabular/transformation/_table_transformer.py b/src/safeds/data/tabular/transformation/_table_transformer.py index 366187d96..714fc503f 100644 --- a/src/safeds/data/tabular/transformation/_table_transformer.py +++ b/src/safeds/data/tabular/transformation/_table_transformer.py @@ -16,12 +16,17 @@ class TableTransformer(ABC): # Dunder methods # ------------------------------------------------------------------------------------------------------------------ + # The decorator is needed so the class really cannot be instantiated + @abstractmethod + def __init__(self) -> None: + self._column_names: list[str] | None = None + # The decorator ensures that the method is overridden in all subclasses @abstractmethod def __hash__(self) -> int: return _structural_hash( self.__class__.__qualname__, - self.is_fitted, + self._column_names, ) # ------------------------------------------------------------------------------------------------------------------ @@ -29,9 +34,9 @@ def __hash__(self) -> int: # ------------------------------------------------------------------------------------------------------------------ @property - @abstractmethod def is_fitted(self) -> bool: """Whether the transformer is fitted.""" + return self._column_names is not None # ------------------------------------------------------------------------------------------------------------------ # Learning and transformation From 42a05f95265fc37c4d98edce4c16cfed50684073 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 14 May 2024 17:32:19 +0200 Subject: [PATCH 07/21] style: fix ruff errors --- src/safeds/data/image/containers/_image.py | 3 +- .../containers/_multi_size_image_list.py | 4 +- .../containers/_single_size_image_list.py | 4 +- .../tabular/transformation/_label_encoder.py | 4 ++ .../transformation/_one_hot_encoder.py | 4 ++ .../tabular/transformation/_range_scaler.py | 2 - .../tabular/transformation/_simple_imputer.py | 39 ++++++++----- .../_gradient_boosting_classifier.py | 3 - .../_support_vector_classifier.py | 8 --- .../data/image/containers/test_image_list.py | 37 ++++++------ .../data/image/typing/test_image_size.py | 2 +- .../_tabular_dataset/test_into_dataloader.py | 5 +- .../tabular/containers/_column/test_repr.py | 6 +- .../tabular/containers/_column/test_str.py | 6 +- .../tabular/containers/_table/test_hash.py | 3 +- .../_table/test_number_of_columns.py | 3 +- .../containers/_table/test_number_of_rows.py | 3 +- .../containers/_table/test_plot_histograms.py | 2 +- .../containers/_table/test_transform_table.py | 2 +- .../transformation/test_discretizer.py | 2 +- .../transformation/test_label_encoder.py | 2 +- .../transformation/test_one_hot_encoder.py | 2 +- .../transformation/test_range_scaler.py | 2 +- .../transformation/test_simple_imputer.py | 56 +++++++++---------- .../transformation/test_standard_scaler.py | 2 +- .../transformation/test_table_transformer.py | 4 +- 26 files changed, 105 insertions(+), 105 deletions(-) diff --git a/src/safeds/data/image/containers/_image.py b/src/safeds/data/image/containers/_image.py index fb104ce8d..8e8dda7c6 100644 --- a/src/safeds/data/image/containers/_image.py +++ b/src/safeds/data/image/containers/_image.py @@ -1,7 +1,6 @@ from __future__ import annotations import io -import os.path import sys import warnings from pathlib import Path @@ -81,7 +80,7 @@ def from_file(path: str | Path) -> Image: _init_default_device() - if not os.path.isfile(path): + if not path.is_file(): raise FileNotFoundError(f"No such file or directory: '{path}'") return Image(image_tensor=read_image(str(path)).to(_get_device())) diff --git a/src/safeds/data/image/containers/_multi_size_image_list.py b/src/safeds/data/image/containers/_multi_size_image_list.py index be1ca1789..5fcaceaca 100644 --- a/src/safeds/data/image/containers/_multi_size_image_list.py +++ b/src/safeds/data/image/containers/_multi_size_image_list.py @@ -66,7 +66,7 @@ def _create_from_single_sized_image_lists(single_size_image_lists: list[_SingleS single_size_image_list._indices_to_tensor_positions.keys(), [image_size] * len(single_size_image_list), strict=False, - ) + ), ) if max_channel is None: max_channel = single_size_image_list.channel @@ -80,7 +80,7 @@ def _create_from_single_sized_image_lists(single_size_image_lists: list[_SingleS for size in image_list._image_list_dict: if max_channel is not None and image_list._image_list_dict[size].channel != max_channel: image_list._image_list_dict[size] = image_list._image_list_dict[size].change_channel( - int(max_channel) + int(max_channel), ) return image_list diff --git a/src/safeds/data/image/containers/_single_size_image_list.py b/src/safeds/data/image/containers/_single_size_image_list.py index fea3c3529..1d31d7887 100644 --- a/src/safeds/data/image/containers/_single_size_image_list.py +++ b/src/safeds/data/image/containers/_single_size_image_list.py @@ -6,7 +6,7 @@ from pathlib import Path from typing import TYPE_CHECKING -from safeds._config import _init_default_device, _get_device +from safeds._config import _get_device, _init_default_device from safeds._utils import _structural_hash from safeds.data.image._utils._image_transformation_error_and_warning_checks import ( _check_add_noise_errors, @@ -82,7 +82,7 @@ def _create_image_list_from_files( image_list = _SingleSizeImageList() images_tensor = torch.empty( - number_of_images, max_channel, height, width, dtype=torch.uint8, device=_get_device() + number_of_images, max_channel, height, width, dtype=torch.uint8, device=_get_device(), ) thread_packages: list[ImageList._FromFileThreadPackage] = [] diff --git a/src/safeds/data/tabular/transformation/_label_encoder.py b/src/safeds/data/tabular/transformation/_label_encoder.py index 75627c5ac..70b4ccbcc 100644 --- a/src/safeds/data/tabular/transformation/_label_encoder.py +++ b/src/safeds/data/tabular/transformation/_label_encoder.py @@ -16,6 +16,10 @@ class LabelEncoder(InvertibleTableTransformer): """The LabelEncoder encodes one or more given columns into labels.""" + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + def __init__(self) -> None: super().__init__() diff --git a/src/safeds/data/tabular/transformation/_one_hot_encoder.py b/src/safeds/data/tabular/transformation/_one_hot_encoder.py index f43a8425c..763c4358d 100644 --- a/src/safeds/data/tabular/transformation/_one_hot_encoder.py +++ b/src/safeds/data/tabular/transformation/_one_hot_encoder.py @@ -56,6 +56,10 @@ class OneHotEncoder(InvertibleTableTransformer): 3 1.0 0.0 0.0 """ + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + def __init__(self) -> None: super().__init__() diff --git a/src/safeds/data/tabular/transformation/_range_scaler.py b/src/safeds/data/tabular/transformation/_range_scaler.py index 7e3eec5f3..9943e7b40 100644 --- a/src/safeds/data/tabular/transformation/_range_scaler.py +++ b/src/safeds/data/tabular/transformation/_range_scaler.py @@ -1,6 +1,5 @@ from __future__ import annotations - from safeds._validation import _check_columns_exist from safeds.data.tabular.containers import Table from safeds.exceptions import NonNumericColumnError, TransformerNotFittedError @@ -8,7 +7,6 @@ from ._invertible_table_transformer import InvertibleTableTransformer - class RangeScaler(InvertibleTableTransformer): """ The RangeScaler transforms column values by scaling each value to a given range. diff --git a/src/safeds/data/tabular/transformation/_simple_imputer.py b/src/safeds/data/tabular/transformation/_simple_imputer.py index 79dcc5fdd..62a625644 100644 --- a/src/safeds/data/tabular/transformation/_simple_imputer.py +++ b/src/safeds/data/tabular/transformation/_simple_imputer.py @@ -40,12 +40,16 @@ class SimpleImputer(TableTransformer): ... Column("b", [None, 2, 3]), ... ], ... ) - >>> transformer = SimpleImputer(SimpleImputer.Strategy.Constant(0)) + >>> transformer = SimpleImputer(SimpleImputer.Strategy.constant(0)) >>> transformed_table = transformer.fit_and_transform(table) """ class Strategy(ABC): - """Various strategies to replace missing values. Use the inner classes to create instances of this class.""" + """ + Various strategies to replace missing values. + + Use the static factory methods to create instances of this class. + """ @abstractmethod def __eq__(self, other: object) -> bool: ... @@ -61,7 +65,7 @@ def _apply(self, imputer: sk_SimpleImputer) -> None: """Set the imputer strategy of the given imputer.""" @staticmethod - def Constant(value: Any) -> SimpleImputer.Strategy: # noqa: N802 + def constant(value: Any) -> SimpleImputer.Strategy: """ Replace missing values with the given constant value. @@ -73,20 +77,24 @@ def Constant(value: Any) -> SimpleImputer.Strategy: # noqa: N802 return _Constant(value) @staticmethod - def Mean() -> SimpleImputer.Strategy: # noqa: N802 + def mean() -> SimpleImputer.Strategy: """Replace missing values with the mean of each column.""" return _Mean() @staticmethod - def Median() -> SimpleImputer.Strategy: # noqa: N802 + def median() -> SimpleImputer.Strategy: """Replace missing values with the median of each column.""" return _Median() @staticmethod - def Mode() -> SimpleImputer.Strategy: # noqa: N802 + def mode() -> SimpleImputer.Strategy: """Replace missing values with the mode of each column.""" return _Mode() + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + def __init__(self, strategy: SimpleImputer.Strategy, *, value_to_replace: float | str | None = None): super().__init__() @@ -97,11 +105,10 @@ def __init__(self, strategy: SimpleImputer.Strategy, *, value_to_replace: float self._value_to_replace = value_to_replace self._wrapped_transformer: sk_SimpleImputer | None = None - self._column_names: list[str] | None = None - @property - def is_fitted(self) -> bool: - return self._wrapped_transformer is not None + # ------------------------------------------------------------------------------------------------------------------ + # Properties + # ------------------------------------------------------------------------------------------------------------------ @property def strategy(self) -> SimpleImputer.Strategy: @@ -113,6 +120,10 @@ def value_to_replace(self) -> Any: """The value that should be replaced.""" return self._value_to_replace + # ------------------------------------------------------------------------------------------------------------------ + # Learning and transformation + # ------------------------------------------------------------------------------------------------------------------ + def fit(self, table: Table, column_names: list[str] | None) -> SimpleImputer: """ Learn a transformation for a set of columns in a table. @@ -319,7 +330,7 @@ def _apply(self, imputer: sk_SimpleImputer) -> None: # Override the methods with classes, so they can be used in `isinstance` calls. Unlike methods, classes define a type. # This is needed for the DSL, where imputer strategies are variants of an enum. -SimpleImputer.Strategy.Constant = _Constant # type: ignore[method-assign] -SimpleImputer.Strategy.Mean = _Mean # type: ignore[method-assign] -SimpleImputer.Strategy.Median = _Median # type: ignore[method-assign] -SimpleImputer.Strategy.Mode = _Mode # type: ignore[method-assign] +SimpleImputer.Strategy.constant = _Constant # type: ignore[method-assign] +SimpleImputer.Strategy.mean = _Mean # type: ignore[method-assign] +SimpleImputer.Strategy.median = _Median # type: ignore[method-assign] +SimpleImputer.Strategy.mode = _Mode # type: ignore[method-assign] diff --git a/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py b/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py index 0d3c6ace2..73294108e 100644 --- a/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py +++ b/src/safeds/ml/classical/classification/_gradient_boosting_classifier.py @@ -10,9 +10,6 @@ if TYPE_CHECKING: from sklearn.base import ClassifierMixin - from safeds.data.labeled.containers import TabularDataset - from safeds.data.tabular.containers import Table - class GradientBoostingClassifier(Classifier, _GradientBoostingBase): """ diff --git a/src/safeds/ml/classical/classification/_support_vector_classifier.py b/src/safeds/ml/classical/classification/_support_vector_classifier.py index 509503765..407c8f97a 100644 --- a/src/safeds/ml/classical/classification/_support_vector_classifier.py +++ b/src/safeds/ml/classical/classification/_support_vector_classifier.py @@ -71,14 +71,6 @@ def _clone(self) -> SupportVectorClassifier: ) def _get_sklearn_model(self) -> ClassifierMixin: - """ - Return a new wrapped Classifier from sklearn. - - Returns - ------- - wrapped_classifier: - The sklearn Classifier. - """ from sklearn.svm import SVC as SklearnSVC # noqa: N811 result = SklearnSVC( diff --git a/tests/safeds/data/image/containers/test_image_list.py b/tests/safeds/data/image/containers/test_image_list.py index c547742ca..4f0a46776 100644 --- a/tests/safeds/data/image/containers/test_image_list.py +++ b/tests/safeds/data/image/containers/test_image_list.py @@ -6,8 +6,6 @@ import pytest import torch -from torch.types import Device - from safeds.data.image.containers import Image, ImageList from safeds.data.image.containers._empty_image_list import _EmptyImageList from safeds.data.image.containers._multi_size_image_list import _MultiSizeImageList @@ -16,8 +14,12 @@ from safeds.exceptions import DuplicateIndexError, IllegalFormatError, IndexOutOfBoundsError, OutOfBoundsError from syrupy import SnapshotAssertion from torch import Tensor +from torch.types import Device from tests.helpers import ( + configure_test_with_device, + get_devices, + get_devices_ids, grayscale_jpg_path, grayscale_png_path, images_all, @@ -31,9 +33,6 @@ skip_if_os, test_images_folder, white_square_jpg_path, - get_devices, - get_devices_ids, - configure_test_with_device, ) @@ -461,7 +460,7 @@ class TestFromFiles: ], ) def test_from_files_creation_return_filenames( - self, resource_path: str | Path, snapshot_png_image_list: SnapshotAssertion, device: Device + self, resource_path: str | Path, snapshot_png_image_list: SnapshotAssertion, device: Device, ) -> None: configure_test_with_device(device) image_list = ImageList.from_files(resolve_resource_path(resource_path)) @@ -493,7 +492,7 @@ def test_from_files_creation_return_filenames( ], ) def test_from_files_creation_load_percentage( - self, resource_path: str | Path, snapshot_png_image_list: SnapshotAssertion, device: Device + self, resource_path: str | Path, snapshot_png_image_list: SnapshotAssertion, device: Device, ) -> None: random.seed(420) configure_test_with_device(device) @@ -532,7 +531,7 @@ def test_should_raise_if_one_file_or_directory_not_found(self, resource_path: st [-1.0, 2.0], ) def test_should_raise_if_load_percentage_out_of_bounds( - self, resource_path: str | Path, load_percentage: float, device: Device + self, resource_path: str | Path, load_percentage: float, device: Device, ) -> None: configure_test_with_device(device) with pytest.raises(OutOfBoundsError): @@ -542,7 +541,7 @@ def test_create_from_single_sized_image_lists_one_image_list(self, device: Devic configure_test_with_device(device) assert isinstance( _MultiSizeImageList()._create_from_single_sized_image_lists( - [ImageList.from_files(resolve_resource_path(plane_png_path))._as_single_size_image_list()] + [ImageList.from_files(resolve_resource_path(plane_png_path))._as_single_size_image_list()], ), _SingleSizeImageList, ) @@ -553,7 +552,7 @@ def test_create_from_single_sized_image_lists_one_image_list(self, device: Devic ids=["all-images"], ) def test_create_from_single_sized_image_lists( - self, resource_path: str | Path, snapshot_png_image_list: SnapshotAssertion, device: Device + self, resource_path: str | Path, snapshot_png_image_list: SnapshotAssertion, device: Device, ) -> None: configure_test_with_device(device) image_lists = ImageList.from_files(resolve_resource_path(resource_path)) @@ -688,7 +687,7 @@ def test_should_save_images_in_directory(self, resource_path: list[str], device: ids=["all-jpg-images", "jpg-planes", "jpg-grayscale"], ) def test_should_save_images_in_directories_for_different_sizes( - self, resource_path: list[str], device: Device + self, resource_path: list[str], device: Device, ) -> None: configure_test_with_device(device) image_list = ImageList.from_files(resolve_resource_path(resource_path)) @@ -789,7 +788,7 @@ def test_should_save_images_in_directory(self, resource_path: list[str], device: ids=["all-images", "planes", "grayscale"], ) def test_should_save_images_in_directories_for_different_sizes( - self, resource_path: list[str], device: Device + self, resource_path: list[str], device: Device, ) -> None: configure_test_with_device(device) image_list = ImageList.from_files(resolve_resource_path(resource_path)) @@ -843,7 +842,7 @@ class TestShuffleImages: ids=["all-images", "planes"], ) def test_shuffle_images( - self, resource_path: list[str], snapshot_png_image_list: SnapshotAssertion, device: Device + self, resource_path: list[str], snapshot_png_image_list: SnapshotAssertion, device: Device, ) -> None: configure_test_with_device(device) image_list_original = ImageList.from_files(resolve_resource_path(resource_path)) @@ -1103,7 +1102,7 @@ class TestRemoveImagesWithSize: ids=["invalid width", "invalid height", "invalid width and height"], ) def test_should_raise_negative_size( - self, resource_path: list[str], width: int, height: int, device: Device + self, resource_path: list[str], width: int, height: int, device: Device, ) -> None: configure_test_with_device(device) image_list = ImageList.from_files(resolve_resource_path(resource_path)) @@ -1117,7 +1116,7 @@ class TestResize: ids=["invalid width", "invalid height", "invalid width and height"], ) def test_should_raise_new_size( - self, resource_path: list[str], new_width: int, new_height: int, device: Device + self, resource_path: list[str], new_width: int, new_height: int, device: Device, ) -> None: configure_test_with_device(device) image_list = ImageList.from_files(resolve_resource_path(resource_path)) @@ -1131,7 +1130,7 @@ class TestCrop: ids=["invalid width", "invalid height", "invalid width and height"], ) def test_should_raise_invalid_size( - self, resource_path: list[str], new_width: int, new_height: int, device: Device + self, resource_path: list[str], new_width: int, new_height: int, device: Device, ) -> None: configure_test_with_device(device) image_list = ImageList.from_files(resolve_resource_path(resource_path)) @@ -1144,7 +1143,7 @@ def test_should_raise_invalid_size( ids=["invalid x", "invalid y", "invalid x and y"], ) def test_should_raise_invalid_coordinates( - self, resource_path: list[str], new_x: int, new_y: int, device: Device + self, resource_path: list[str], new_x: int, new_y: int, device: Device, ) -> None: configure_test_with_device(device) image_list = ImageList.from_files(resolve_resource_path(resource_path)) @@ -1158,7 +1157,7 @@ class TestAddNoise: ids=["sigma below zero"], ) def test_should_raise_standard_deviation( - self, resource_path: list[str], standard_deviation: float, device: Device + self, resource_path: list[str], standard_deviation: float, device: Device, ) -> None: configure_test_with_device(device) image_list_original = ImageList.from_files(resolve_resource_path(resource_path)) @@ -1467,7 +1466,7 @@ def test_create_image_list(self, image_list: ImageList, device: Device) -> None: def test_create_image_list_from_files(self, device: Device) -> None: configure_test_with_device(device) assert isinstance( - _SingleSizeImageList()._create_image_list_from_files({}, 0, 4, 1, 1, {}, 5)[0], _EmptyImageList + _SingleSizeImageList()._create_image_list_from_files({}, 0, 4, 1, 1, {}, 5)[0], _EmptyImageList, ) def test_create_from_single_sized_image_lists(self, device: Device) -> None: diff --git a/tests/safeds/data/image/typing/test_image_size.py b/tests/safeds/data/image/typing/test_image_size.py index e1e4cb0c3..f8622c2de 100644 --- a/tests/safeds/data/image/typing/test_image_size.py +++ b/tests/safeds/data/image/typing/test_image_size.py @@ -8,13 +8,13 @@ from torch.types import Device from tests.helpers import ( + configure_test_with_device, get_devices, get_devices_ids, images_all, images_all_ids, plane_png_path, resolve_resource_path, - configure_test_with_device, ) diff --git a/tests/safeds/data/labeled/containers/_tabular_dataset/test_into_dataloader.py b/tests/safeds/data/labeled/containers/_tabular_dataset/test_into_dataloader.py index 5ff34f91c..512cc1b14 100644 --- a/tests/safeds/data/labeled/containers/_tabular_dataset/test_into_dataloader.py +++ b/tests/safeds/data/labeled/containers/_tabular_dataset/test_into_dataloader.py @@ -1,11 +1,10 @@ import pytest -from torch.types import Device - from safeds._config import _get_device from safeds.data.tabular.containers import Table +from torch.types import Device from torch.utils.data import DataLoader -from tests.helpers import get_devices, get_devices_ids, configure_test_with_device +from tests.helpers import configure_test_with_device, get_devices, get_devices_ids @pytest.mark.parametrize( diff --git a/tests/safeds/data/tabular/containers/_column/test_repr.py b/tests/safeds/data/tabular/containers/_column/test_repr.py index 0482372a2..7b3b6dee3 100644 --- a/tests/safeds/data/tabular/containers/_column/test_repr.py +++ b/tests/safeds/data/tabular/containers/_column/test_repr.py @@ -7,15 +7,15 @@ [ ( Column("a", []), - "+------+\n" "| a |\n" "| --- |\n" "| null |\n" "+======+\n" "+------+", + "+------+\n| a |\n| --- |\n| null |\n+======+\n+------+", ), ( Column("a", [0]), - "+-----+\n" "| a |\n" "| --- |\n" "| i64 |\n" "+=====+\n" "| 0 |\n" "+-----+", + "+-----+\n| a |\n| --- |\n| i64 |\n+=====+\n| 0 |\n+-----+", ), ( Column("a", [0, "1"]), - "+------+\n" "| a |\n" "| --- |\n" "| str |\n" "+======+\n" "| null |\n" "| 1 |\n" "+------+", + "+------+\n| a |\n| --- |\n| str |\n+======+\n| null |\n| 1 |\n+------+", ), ], ids=[ diff --git a/tests/safeds/data/tabular/containers/_column/test_str.py b/tests/safeds/data/tabular/containers/_column/test_str.py index 0482372a2..7b3b6dee3 100644 --- a/tests/safeds/data/tabular/containers/_column/test_str.py +++ b/tests/safeds/data/tabular/containers/_column/test_str.py @@ -7,15 +7,15 @@ [ ( Column("a", []), - "+------+\n" "| a |\n" "| --- |\n" "| null |\n" "+======+\n" "+------+", + "+------+\n| a |\n| --- |\n| null |\n+======+\n+------+", ), ( Column("a", [0]), - "+-----+\n" "| a |\n" "| --- |\n" "| i64 |\n" "+=====+\n" "| 0 |\n" "+-----+", + "+-----+\n| a |\n| --- |\n| i64 |\n+=====+\n| 0 |\n+-----+", ), ( Column("a", [0, "1"]), - "+------+\n" "| a |\n" "| --- |\n" "| str |\n" "+======+\n" "| null |\n" "| 1 |\n" "+------+", + "+------+\n| a |\n| --- |\n| str |\n+======+\n| null |\n| 1 |\n+------+", ), ], ids=[ diff --git a/tests/safeds/data/tabular/containers/_table/test_hash.py b/tests/safeds/data/tabular/containers/_table/test_hash.py index 33db60600..08eefc4d6 100644 --- a/tests/safeds/data/tabular/containers/_table/test_hash.py +++ b/tests/safeds/data/tabular/containers/_table/test_hash.py @@ -1,7 +1,6 @@ -from typing import Any import pytest -from safeds.data.tabular.containers import Row, Table +from safeds.data.tabular.containers import Table @pytest.mark.parametrize( diff --git a/tests/safeds/data/tabular/containers/_table/test_number_of_columns.py b/tests/safeds/data/tabular/containers/_table/test_number_of_columns.py index 66be91460..347e18dd8 100644 --- a/tests/safeds/data/tabular/containers/_table/test_number_of_columns.py +++ b/tests/safeds/data/tabular/containers/_table/test_number_of_columns.py @@ -5,12 +5,11 @@ @pytest.mark.parametrize( ("table", "expected"), [ - (Table(), 0), (Table(), 0), (Table({"col1": []}), 1), (Table({"col1": [], "col2": []}), 2), ], - ids=["empty", "empty 2", "a column", "2 columns"], + ids=["empty", "a column", "2 columns"], ) def test_should_return_number_of_columns(table: Table, expected: int) -> None: assert table.number_of_columns == expected diff --git a/tests/safeds/data/tabular/containers/_table/test_number_of_rows.py b/tests/safeds/data/tabular/containers/_table/test_number_of_rows.py index db26ebc90..0c3e91214 100644 --- a/tests/safeds/data/tabular/containers/_table/test_number_of_rows.py +++ b/tests/safeds/data/tabular/containers/_table/test_number_of_rows.py @@ -5,12 +5,11 @@ @pytest.mark.parametrize( ("table", "expected"), [ - (Table(), 0), (Table(), 0), (Table({"col1": [1]}), 1), (Table({"col1": [1, 2]}), 2), ], - ids=["empty", "empty 2", "a row", "2 rows"], + ids=["empty", "a row", "2 rows"], ) def test_should_return_number_of_rows(table: Table, expected: int) -> None: assert table.number_of_rows == expected diff --git a/tests/safeds/data/tabular/containers/_table/test_plot_histograms.py b/tests/safeds/data/tabular/containers/_table/test_plot_histograms.py index 2ba38d7c1..2b50b775f 100644 --- a/tests/safeds/data/tabular/containers/_table/test_plot_histograms.py +++ b/tests/safeds/data/tabular/containers/_table/test_plot_histograms.py @@ -61,7 +61,7 @@ "g", "a", ], - } + }, ), ], ids=["one column", "four columns", "two columns with compressed visualization"], diff --git a/tests/safeds/data/tabular/containers/_table/test_transform_table.py b/tests/safeds/data/tabular/containers/_table/test_transform_table.py index 5335c9f3a..065ebf457 100644 --- a/tests/safeds/data/tabular/containers/_table/test_transform_table.py +++ b/tests/safeds/data/tabular/containers/_table/test_transform_table.py @@ -1,7 +1,7 @@ import pytest from safeds.data.tabular.containers import Table from safeds.data.tabular.transformation import OneHotEncoder -from safeds.exceptions import TransformerNotFittedError, ColumnNotFoundError +from safeds.exceptions import ColumnNotFoundError, TransformerNotFittedError @pytest.mark.parametrize( diff --git a/tests/safeds/data/tabular/transformation/test_discretizer.py b/tests/safeds/data/tabular/transformation/test_discretizer.py index 11d03718b..32974d712 100644 --- a/tests/safeds/data/tabular/transformation/test_discretizer.py +++ b/tests/safeds/data/tabular/transformation/test_discretizer.py @@ -1,7 +1,7 @@ import pytest from safeds.data.tabular.containers import Table from safeds.data.tabular.transformation import Discretizer -from safeds.exceptions import NonNumericColumnError, OutOfBoundsError, TransformerNotFittedError, ColumnNotFoundError +from safeds.exceptions import ColumnNotFoundError, NonNumericColumnError, OutOfBoundsError, TransformerNotFittedError class TestInit: diff --git a/tests/safeds/data/tabular/transformation/test_label_encoder.py b/tests/safeds/data/tabular/transformation/test_label_encoder.py index 787c73b10..dcbb59404 100644 --- a/tests/safeds/data/tabular/transformation/test_label_encoder.py +++ b/tests/safeds/data/tabular/transformation/test_label_encoder.py @@ -1,7 +1,7 @@ import pytest from safeds.data.tabular.containers import Table from safeds.data.tabular.transformation import LabelEncoder -from safeds.exceptions import NonNumericColumnError, TransformerNotFittedError, ColumnNotFoundError +from safeds.exceptions import ColumnNotFoundError, NonNumericColumnError, TransformerNotFittedError class TestFit: diff --git a/tests/safeds/data/tabular/transformation/test_one_hot_encoder.py b/tests/safeds/data/tabular/transformation/test_one_hot_encoder.py index 5b0ec5a9c..c6bfd0b3f 100644 --- a/tests/safeds/data/tabular/transformation/test_one_hot_encoder.py +++ b/tests/safeds/data/tabular/transformation/test_one_hot_encoder.py @@ -4,9 +4,9 @@ from safeds.data.tabular.containers import Table from safeds.data.tabular.transformation import OneHotEncoder from safeds.exceptions import ( + ColumnNotFoundError, NonNumericColumnError, TransformerNotFittedError, - ColumnNotFoundError, ValueNotPresentWhenFittedError, ) diff --git a/tests/safeds/data/tabular/transformation/test_range_scaler.py b/tests/safeds/data/tabular/transformation/test_range_scaler.py index 1229e12b3..631b7e817 100644 --- a/tests/safeds/data/tabular/transformation/test_range_scaler.py +++ b/tests/safeds/data/tabular/transformation/test_range_scaler.py @@ -1,7 +1,7 @@ import pytest from safeds.data.tabular.containers import Table from safeds.data.tabular.transformation import RangeScaler -from safeds.exceptions import NonNumericColumnError, TransformerNotFittedError, ColumnNotFoundError +from safeds.exceptions import ColumnNotFoundError, NonNumericColumnError, TransformerNotFittedError class TestInit: diff --git a/tests/safeds/data/tabular/transformation/test_simple_imputer.py b/tests/safeds/data/tabular/transformation/test_simple_imputer.py index 03c0b7e23..312cd50b1 100644 --- a/tests/safeds/data/tabular/transformation/test_simple_imputer.py +++ b/tests/safeds/data/tabular/transformation/test_simple_imputer.py @@ -5,7 +5,7 @@ from safeds.data.tabular.containers import Table from safeds.data.tabular.transformation import SimpleImputer from safeds.data.tabular.transformation._simple_imputer import _Mode -from safeds.exceptions import NonNumericColumnError, TransformerNotFittedError, ColumnNotFoundError +from safeds.exceptions import ColumnNotFoundError, NonNumericColumnError, TransformerNotFittedError def strategies() -> list[SimpleImputer.Strategy]: @@ -21,25 +21,25 @@ def strategies() -> list[SimpleImputer.Strategy]: The list of classifiers to test. """ return [ - SimpleImputer.Strategy.Constant(2), - SimpleImputer.Strategy.Mean(), - SimpleImputer.Strategy.Median(), - SimpleImputer.Strategy.Mode(), + SimpleImputer.Strategy.constant(2), + SimpleImputer.Strategy.mean(), + SimpleImputer.Strategy.median(), + SimpleImputer.Strategy.mode(), ] class TestStrategyClass: def test_should_be_able_to_get_value_of_constant_strategy(self) -> None: - assert SimpleImputer.Strategy.Constant(1).value == 1 # type: ignore[attr-defined] + assert SimpleImputer.Strategy.constant(1).value == 1 # type: ignore[attr-defined] @pytest.mark.parametrize( ("strategy", "type_", "expected"), [ - (SimpleImputer.Strategy.Constant(0), SimpleImputer.Strategy.Constant, True), - (SimpleImputer.Strategy.Mean(), SimpleImputer.Strategy.Mean, True), - (SimpleImputer.Strategy.Median(), SimpleImputer.Strategy.Median, True), - (SimpleImputer.Strategy.Mode(), SimpleImputer.Strategy.Mode, True), - (SimpleImputer.Strategy.Mode(), SimpleImputer.Strategy.Mean, False), + (SimpleImputer.Strategy.constant(0), SimpleImputer.Strategy.constant, True), + (SimpleImputer.Strategy.mean(), SimpleImputer.Strategy.mean, True), + (SimpleImputer.Strategy.median(), SimpleImputer.Strategy.median, True), + (SimpleImputer.Strategy.mode(), SimpleImputer.Strategy.mode, True), + (SimpleImputer.Strategy.mode(), SimpleImputer.Strategy.mean, False), ], ) def test_should_be_able_to_use_strategy_in_isinstance( @@ -114,7 +114,7 @@ def test_should_return_different_hash_for_unequal_strategy( class TestSizeof: @pytest.mark.parametrize( "strategy", - ([SimpleImputer.Strategy.Constant(1)]), + ([SimpleImputer.Strategy.constant(1)]), ids=lambda x: x.__class__.__name__, ) def test_sizeof_strategy( @@ -127,15 +127,15 @@ class TestStr: @pytest.mark.parametrize( ("strategy", "expected"), [ - (SimpleImputer.Strategy.Constant(0), "Constant(0)"), - (SimpleImputer.Strategy.Mean(), "Mean"), - (SimpleImputer.Strategy.Median(), "Median"), - (SimpleImputer.Strategy.Mode(), "Mode"), + (SimpleImputer.Strategy.constant(0), "Constant(0)"), + (SimpleImputer.Strategy.mean(), "Mean"), + (SimpleImputer.Strategy.median(), "Median"), + (SimpleImputer.Strategy.mode(), "Mode"), ], ids=lambda x: x.__class__.__name__, ) def test_should_return_correct_string_representation( - self, strategy: SimpleImputer.Strategy, expected: str + self, strategy: SimpleImputer.Strategy, expected: str, ) -> None: assert str(strategy) == expected @@ -157,7 +157,7 @@ class TestValueToReplaceProperty: ) def test_should_return_correct_value_to_replace(self, value_to_replace: float | str | None) -> None: assert ( - SimpleImputer(SimpleImputer.Strategy.Mode(), value_to_replace=value_to_replace).value_to_replace + SimpleImputer(SimpleImputer.Strategy.mode(), value_to_replace=value_to_replace).value_to_replace == value_to_replace ) @@ -182,8 +182,8 @@ def test_should_raise_if_table_contains_no_rows(self, strategy: SimpleImputer.St @pytest.mark.parametrize( ("table", "col_names", "strategy"), [ - (Table({"col1": [1, None, "ok"], "col2": [1, 2, "3"]}), ["col1", "col2"], SimpleImputer.Strategy.Mean()), - (Table({"col1": [1, None, "ok"], "col2": [1, 2, "3"]}), ["col1", "col2"], SimpleImputer.Strategy.Median()), + (Table({"col1": [1, None, "ok"], "col2": [1, 2, "3"]}), ["col1", "col2"], SimpleImputer.Strategy.mean()), + (Table({"col1": [1, None, "ok"], "col2": [1, 2, "3"]}), ["col1", "col2"], SimpleImputer.Strategy.median()), ], ids=["Strategy Mean", "Strategy Median"], ) @@ -220,7 +220,7 @@ def test_should_warn_if_multiple_mode_values(self, table: Table, most_frequent: rf" values:\n{most_frequent}" ), ): - SimpleImputer(SimpleImputer.Strategy.Mode()).fit(table, None) + SimpleImputer(SimpleImputer.Strategy.mode()).fit(table, None) @pytest.mark.parametrize("strategy", strategies(), ids=lambda x: x.__class__.__name__) def test_should_not_change_original_transformer(self, strategy: SimpleImputer.Strategy) -> None: @@ -316,7 +316,7 @@ class TestFitAndTransform: }, ), None, - SimpleImputer.Strategy.Constant(0.0), + SimpleImputer.Strategy.constant(0.0), None, Table( { @@ -331,7 +331,7 @@ class TestFitAndTransform: }, ), None, - SimpleImputer.Strategy.Mean(), + SimpleImputer.Strategy.mean(), None, Table( { @@ -346,7 +346,7 @@ class TestFitAndTransform: }, ), None, - SimpleImputer.Strategy.Median(), + SimpleImputer.Strategy.median(), None, Table( { @@ -361,7 +361,7 @@ class TestFitAndTransform: }, ), None, - SimpleImputer.Strategy.Mode(), + SimpleImputer.Strategy.mode(), None, Table( { @@ -377,7 +377,7 @@ class TestFitAndTransform: }, ), ["a"], - SimpleImputer.Strategy.Constant(0.0), + SimpleImputer.Strategy.constant(0.0), None, Table( { @@ -393,7 +393,7 @@ class TestFitAndTransform: }, ), ["a"], - SimpleImputer.Strategy.Mode(), + SimpleImputer.Strategy.mode(), None, Table({"a": [1.0, 1.0, 2.0, 2.0, 1.0]}), ), @@ -404,7 +404,7 @@ class TestFitAndTransform: }, ), None, - SimpleImputer.Strategy.Constant(1.0), + SimpleImputer.Strategy.constant(1.0), 0.0, Table( { diff --git a/tests/safeds/data/tabular/transformation/test_standard_scaler.py b/tests/safeds/data/tabular/transformation/test_standard_scaler.py index 7d745f46e..e692cd5a2 100644 --- a/tests/safeds/data/tabular/transformation/test_standard_scaler.py +++ b/tests/safeds/data/tabular/transformation/test_standard_scaler.py @@ -1,7 +1,7 @@ import pytest from safeds.data.tabular.containers import Table from safeds.data.tabular.transformation import StandardScaler -from safeds.exceptions import NonNumericColumnError, TransformerNotFittedError, ColumnNotFoundError +from safeds.exceptions import ColumnNotFoundError, NonNumericColumnError, TransformerNotFittedError from tests.helpers import assert_that_tables_are_close diff --git a/tests/safeds/data/tabular/transformation/test_table_transformer.py b/tests/safeds/data/tabular/transformation/test_table_transformer.py index fb4640119..ff80701a2 100644 --- a/tests/safeds/data/tabular/transformation/test_table_transformer.py +++ b/tests/safeds/data/tabular/transformation/test_table_transformer.py @@ -66,7 +66,7 @@ def transformers() -> list[TableTransformer]: transformers_numeric() + transformers_non_numeric() + [ - SimpleImputer(strategy=SimpleImputer.Strategy.Mode()), + SimpleImputer(strategy=SimpleImputer.Strategy.mode()), ] ) @@ -175,6 +175,6 @@ def test_should_return_different_hash_for_imputer_fit( transformer2: TableTransformer, valid_data_imputer: Table, ) -> None: - transformer1 = SimpleImputer(strategy=SimpleImputer.Strategy.Mode()) + transformer1 = SimpleImputer(strategy=SimpleImputer.Strategy.mode()) transformer1_fit = transformer1.fit(valid_data_imputer, ["col1"]) assert hash(transformer2) != hash(transformer1_fit) From 3cfd7c2f4ae9a093bde09598bfc7489bc0aa63cb Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 14 May 2024 17:52:23 +0200 Subject: [PATCH 08/21] fix: some mypy errors --- src/safeds/data/image/containers/_image.py | 3 ++ .../data/labeled/containers/_image_dataset.py | 2 + .../tabular/transformation/_discretizer.py | 15 ++++++- .../tabular/transformation/_label_encoder.py | 9 +++- .../transformation/_one_hot_encoder.py | 10 +++-- .../tabular/transformation/_range_scaler.py | 38 +++++++++++++--- .../tabular/transformation/_simple_imputer.py | 11 ++++- .../transformation/_standard_scaler.py | 17 ++++++- .../transformation/test_discretizer.py | 38 ---------------- .../transformation/test_label_encoder.py | 38 ---------------- .../transformation/test_one_hot_encoder.py | 39 ---------------- .../transformation/test_range_scaler.py | 38 ---------------- .../transformation/test_simple_imputer.py | 44 ------------------- .../transformation/test_standard_scaler.py | 38 ---------------- 14 files changed, 89 insertions(+), 251 deletions(-) diff --git a/src/safeds/data/image/containers/_image.py b/src/safeds/data/image/containers/_image.py index 8e8dda7c6..c8a316812 100644 --- a/src/safeds/data/image/containers/_image.py +++ b/src/safeds/data/image/containers/_image.py @@ -78,6 +78,9 @@ def from_file(path: str | Path) -> Image: """ from torchvision.io import read_image + if isinstance(path, str): + path = Path(path) + _init_default_device() if not path.is_file(): diff --git a/src/safeds/data/labeled/containers/_image_dataset.py b/src/safeds/data/labeled/containers/_image_dataset.py index 2beb0fbe5..4f4abe86a 100644 --- a/src/safeds/data/labeled/containers/_image_dataset.py +++ b/src/safeds/data/labeled/containers/_image_dataset.py @@ -357,6 +357,8 @@ def __init__(self, column: Column) -> None: message=rf"The columns \['{self._column_name}'\] contain numerical data. The OneHotEncoder is designed to encode non-numerical values into numerical values", category=UserWarning, ) + # TODO: should not one-hot-encode the target. label encoding without order is sufficient. should also not + # be done automatically? self._one_hot_encoder = OneHotEncoder().fit(column_as_table, [self._column_name]) self._tensor = torch.Tensor(self._one_hot_encoder.transform(column_as_table)._data_frame.to_torch()).to( _get_device(), diff --git a/src/safeds/data/tabular/transformation/_discretizer.py b/src/safeds/data/tabular/transformation/_discretizer.py index e82ec2590..f55809dd6 100644 --- a/src/safeds/data/tabular/transformation/_discretizer.py +++ b/src/safeds/data/tabular/transformation/_discretizer.py @@ -2,6 +2,7 @@ from typing import TYPE_CHECKING +from safeds._utils import _structural_hash from safeds._validation import _check_bounds, _check_columns_exist, _ClosedBound from safeds.data.tabular.containers import Table from safeds.exceptions import ( @@ -34,14 +35,20 @@ class Discretizer(TableTransformer): # Dunder methods # ------------------------------------------------------------------------------------------------------------------ - def __init__(self, number_of_bins: int = 5): - super().__init__() + def __init__(self, number_of_bins: int = 5) -> None: + TableTransformer.__init__(self) _check_bounds("number_of_bins", number_of_bins, lower_bound=_ClosedBound(2)) self._wrapped_transformer: sk_KBinsDiscretizer | None = None self._number_of_bins = number_of_bins + def __hash__(self) -> int: + return _structural_hash( + TableTransformer.__hash__(self), + self._number_of_bins, + ) + # ------------------------------------------------------------------------------------------------------------------ # Properties # ------------------------------------------------------------------------------------------------------------------ @@ -50,6 +57,10 @@ def __init__(self, number_of_bins: int = 5): def number_of_bins(self) -> int: return self._number_of_bins + # ------------------------------------------------------------------------------------------------------------------ + # Learning and transformation + # ------------------------------------------------------------------------------------------------------------------ + def fit(self, table: Table, column_names: list[str] | None) -> Discretizer: """ Learn a transformation for a set of columns in a table. diff --git a/src/safeds/data/tabular/transformation/_label_encoder.py b/src/safeds/data/tabular/transformation/_label_encoder.py index 70b4ccbcc..c32715106 100644 --- a/src/safeds/data/tabular/transformation/_label_encoder.py +++ b/src/safeds/data/tabular/transformation/_label_encoder.py @@ -21,10 +21,17 @@ class LabelEncoder(InvertibleTableTransformer): # ------------------------------------------------------------------------------------------------------------------ def __init__(self) -> None: - super().__init__() + InvertibleTableTransformer.__init__(self) self._wrapped_transformer: sk_OrdinalEncoder | None = None + def __hash__(self) -> int: + return super().__hash__() + + # ------------------------------------------------------------------------------------------------------------------ + # Learning and transformation + # ------------------------------------------------------------------------------------------------------------------ + def fit(self, table: Table, column_names: list[str] | None) -> LabelEncoder: """ Learn a transformation for a set of columns in a table. diff --git a/src/safeds/data/tabular/transformation/_one_hot_encoder.py b/src/safeds/data/tabular/transformation/_one_hot_encoder.py index 763c4358d..cb0c95e01 100644 --- a/src/safeds/data/tabular/transformation/_one_hot_encoder.py +++ b/src/safeds/data/tabular/transformation/_one_hot_encoder.py @@ -70,9 +70,6 @@ def __init__(self) -> None: # Maps nan values (str of old column) to corresponding new column name self._value_to_column_nans: dict[str, str] | None = None - def __hash__(self) -> int: - return super().__hash__() - def __eq__(self, other: object) -> bool: if not isinstance(other, OneHotEncoder): return NotImplemented @@ -82,6 +79,13 @@ def __eq__(self, other: object) -> bool: and self._value_to_column_nans == other._value_to_column_nans ) + def __hash__(self) -> int: + return super().__hash__() + + # ------------------------------------------------------------------------------------------------------------------ + # Learning and transformation + # ------------------------------------------------------------------------------------------------------------------ + def fit(self, table: Table, column_names: list[str] | None) -> OneHotEncoder: """ Learn a transformation for a set of columns in a table. diff --git a/src/safeds/data/tabular/transformation/_range_scaler.py b/src/safeds/data/tabular/transformation/_range_scaler.py index 9943e7b40..c9b52fd55 100644 --- a/src/safeds/data/tabular/transformation/_range_scaler.py +++ b/src/safeds/data/tabular/transformation/_range_scaler.py @@ -1,5 +1,6 @@ from __future__ import annotations +from safeds._utils import _structural_hash from safeds._validation import _check_columns_exist from safeds.data.tabular.containers import Table from safeds.exceptions import NonNumericColumnError, TransformerNotFittedError @@ -28,14 +29,39 @@ class RangeScaler(InvertibleTableTransformer): # Dunder methods # ------------------------------------------------------------------------------------------------------------------ - def __init__(self, min_: float = 0.0, max_: float = 1.0): - super().__init__() + def __init__(self, min_: float = 0.0, max_: float = 1.0) -> None: + InvertibleTableTransformer.__init__(self) if min_ >= max_: - raise ValueError('Parameter "max_" must be greate than parameter "min_".') + raise ValueError('Parameter "max_" must be greater than parameter "min_".') - self._minimum = min_ - self._maximum = max_ + self._min = min_ + self._max = max_ + + def __hash__(self) -> int: + return _structural_hash( + InvertibleTableTransformer.__hash__(self), + self._min, + self._max, + ) + + # ------------------------------------------------------------------------------------------------------------------ + # Properties + # ------------------------------------------------------------------------------------------------------------------ + + @property + def min(self) -> float: + """The minimum of the new range after the transformation.""" + return self._min + + @property + def max(self) -> float: + """The maximum of the new range after the transformation.""" + return self._max + + # ------------------------------------------------------------------------------------------------------------------ + # Learning and transformation + # ------------------------------------------------------------------------------------------------------------------ def fit(self, table: Table, column_names: list[str] | None) -> RangeScaler: """ @@ -89,7 +115,7 @@ def fit(self, table: Table, column_names: list[str] | None) -> RangeScaler: ), ) - wrapped_transformer = sk_MinMaxScaler((self._minimum, self._maximum)) + wrapped_transformer = sk_MinMaxScaler((self._min, self._max)) wrapped_transformer.set_output(transform="polars") wrapped_transformer.fit( table.remove_columns_except(column_names)._data_frame, diff --git a/src/safeds/data/tabular/transformation/_simple_imputer.py b/src/safeds/data/tabular/transformation/_simple_imputer.py index 62a625644..bb53e38e6 100644 --- a/src/safeds/data/tabular/transformation/_simple_imputer.py +++ b/src/safeds/data/tabular/transformation/_simple_imputer.py @@ -95,8 +95,8 @@ def mode() -> SimpleImputer.Strategy: # Dunder methods # ------------------------------------------------------------------------------------------------------------------ - def __init__(self, strategy: SimpleImputer.Strategy, *, value_to_replace: float | str | None = None): - super().__init__() + def __init__(self, strategy: SimpleImputer.Strategy, *, value_to_replace: float | str | None = None) -> None: + TableTransformer.__init__(self) if value_to_replace is None: value_to_replace = pd.NA @@ -106,6 +106,13 @@ def __init__(self, strategy: SimpleImputer.Strategy, *, value_to_replace: float self._wrapped_transformer: sk_SimpleImputer | None = None + def __hash__(self) -> int: + return _structural_hash( + SimpleImputer.__hash__(self), + self._strategy, + self._value_to_replace, + ) + # ------------------------------------------------------------------------------------------------------------------ # Properties # ------------------------------------------------------------------------------------------------------------------ diff --git a/src/safeds/data/tabular/transformation/_standard_scaler.py b/src/safeds/data/tabular/transformation/_standard_scaler.py index 668b7f295..2b9fd2055 100644 --- a/src/safeds/data/tabular/transformation/_standard_scaler.py +++ b/src/safeds/data/tabular/transformation/_standard_scaler.py @@ -2,6 +2,7 @@ from typing import TYPE_CHECKING +from safeds._utils import _structural_hash from safeds._validation import _check_columns_exist from safeds.data.tabular.containers import Table from safeds.exceptions import NonNumericColumnError, TransformerNotFittedError @@ -15,12 +16,24 @@ class StandardScaler(InvertibleTableTransformer): """The StandardScaler transforms column values to a range by removing the mean and scaling to unit variance.""" + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + def __init__(self) -> None: - super().__init__() + InvertibleTableTransformer.__init__(self) - self._column_names: list[str] | None = None self._wrapped_transformer: sk_StandardScaler | None = None + def __hash__(self) -> int: + return _structural_hash( + InvertibleTableTransformer.__hash__(self), + ) + + # ------------------------------------------------------------------------------------------------------------------ + # Learning and transformation + # ------------------------------------------------------------------------------------------------------------------ + def fit(self, table: Table, column_names: list[str] | None) -> StandardScaler: """ Learn a transformation for a set of columns in a table. diff --git a/tests/safeds/data/tabular/transformation/test_discretizer.py b/tests/safeds/data/tabular/transformation/test_discretizer.py index 32974d712..63b50a1a8 100644 --- a/tests/safeds/data/tabular/transformation/test_discretizer.py +++ b/tests/safeds/data/tabular/transformation/test_discretizer.py @@ -265,41 +265,3 @@ def test_should_not_change_original_table(self) -> None: ) assert table == expected - - def test_get_names_of_added_columns(self) -> None: - transformer = Discretizer() - with pytest.raises(TransformerNotFittedError, match=r"The transformer has not been fitted yet."): - transformer.get_names_of_added_columns() - - table = Table( - { - "a": [0.0], - }, - ) - transformer = transformer.fit(table, None) - assert transformer.get_names_of_added_columns() == [] - - def test_get_names_of_changed_columns(self) -> None: - transformer = Discretizer() - with pytest.raises(TransformerNotFittedError, match=r"The transformer has not been fitted yet."): - transformer.get_names_of_changed_columns() - table = Table( - { - "a": [0.0], - }, - ) - transformer = transformer.fit(table, None) - assert transformer.get_names_of_changed_columns() == ["a"] - - def test_get_names_of_removed_columns(self) -> None: - transformer = Discretizer() - with pytest.raises(TransformerNotFittedError, match=r"The transformer has not been fitted yet."): - transformer.get_names_of_removed_columns() - - table = Table( - { - "a": [0.0], - }, - ) - transformer = transformer.fit(table, None) - assert transformer.get_names_of_removed_columns() == [] diff --git a/tests/safeds/data/tabular/transformation/test_label_encoder.py b/tests/safeds/data/tabular/transformation/test_label_encoder.py index dcbb59404..410f7a8d2 100644 --- a/tests/safeds/data/tabular/transformation/test_label_encoder.py +++ b/tests/safeds/data/tabular/transformation/test_label_encoder.py @@ -159,44 +159,6 @@ def test_should_not_change_original_table(self) -> None: assert table == expected - def test_get_names_of_added_columns(self) -> None: - transformer = LabelEncoder() - with pytest.raises(TransformerNotFittedError): - transformer.get_names_of_added_columns() - - table = Table( - { - "a": ["b"], - }, - ) - transformer = transformer.fit(table, None) - assert transformer.get_names_of_added_columns() == [] - - def test_get_names_of_changed_columns(self) -> None: - transformer = LabelEncoder() - with pytest.raises(TransformerNotFittedError): - transformer.get_names_of_changed_columns() - table = Table( - { - "a": ["b"], - }, - ) - transformer = transformer.fit(table, None) - assert transformer.get_names_of_changed_columns() == ["a"] - - def test_get_names_of_removed_columns(self) -> None: - transformer = LabelEncoder() - with pytest.raises(TransformerNotFittedError): - transformer.get_names_of_removed_columns() - - table = Table( - { - "a": ["b"], - }, - ) - transformer = transformer.fit(table, None) - assert transformer.get_names_of_removed_columns() == [] - class TestInverseTransform: @pytest.mark.parametrize( diff --git a/tests/safeds/data/tabular/transformation/test_one_hot_encoder.py b/tests/safeds/data/tabular/transformation/test_one_hot_encoder.py index c6bfd0b3f..d95bd97d2 100644 --- a/tests/safeds/data/tabular/transformation/test_one_hot_encoder.py +++ b/tests/safeds/data/tabular/transformation/test_one_hot_encoder.py @@ -310,45 +310,6 @@ def test_should_not_change_original_table(self) -> None: assert table == expected - def test_get_names_of_added_columns(self) -> None: - transformer = OneHotEncoder() - with pytest.raises(TransformerNotFittedError): - transformer.get_names_of_added_columns() - - table = Table( - {"a__b": ["c", "d"], "a": ["b__c", "d"], "b": ["a", float("nan")]}, - ) - added_columns = ["a__b__c", "a__b__d", "a__b__c#2", "a__d", "b__a", "b__nan"] - - transformer = transformer.fit(table, None) - assert transformer.get_names_of_added_columns() == added_columns - - def test_get_names_of_changed_columns(self) -> None: - transformer = OneHotEncoder() - with pytest.raises(TransformerNotFittedError): - transformer.get_names_of_changed_columns() - - table = Table( - { - "a": ["b"], - }, - ) - transformer = transformer.fit(table, None) - assert transformer.get_names_of_changed_columns() == [] - - def test_get_names_of_removed_columns(self) -> None: - transformer = OneHotEncoder() - with pytest.raises(TransformerNotFittedError): - transformer.get_names_of_removed_columns() - - table = Table( - { - "a": ["b"], - }, - ) - transformer = transformer.fit(table, None) - assert transformer.get_names_of_removed_columns() == ["a"] - class TestInverseTransform: @pytest.mark.parametrize( diff --git a/tests/safeds/data/tabular/transformation/test_range_scaler.py b/tests/safeds/data/tabular/transformation/test_range_scaler.py index 631b7e817..c37e813a6 100644 --- a/tests/safeds/data/tabular/transformation/test_range_scaler.py +++ b/tests/safeds/data/tabular/transformation/test_range_scaler.py @@ -218,44 +218,6 @@ def test_should_not_change_original_table(self) -> None: assert table == expected - def test_get_names_of_added_columns(self) -> None: - transformer = RangeScaler() - with pytest.raises(TransformerNotFittedError): - transformer.get_names_of_added_columns() - - table = Table( - { - "a": [0.0], - }, - ) - transformer = transformer.fit(table, None) - assert transformer.get_names_of_added_columns() == [] - - def test_get_names_of_changed_columns(self) -> None: - transformer = RangeScaler() - with pytest.raises(TransformerNotFittedError): - transformer.get_names_of_changed_columns() - table = Table( - { - "a": [0.0], - }, - ) - transformer = transformer.fit(table, None) - assert transformer.get_names_of_changed_columns() == ["a"] - - def test_get_names_of_removed_columns(self) -> None: - transformer = RangeScaler() - with pytest.raises(TransformerNotFittedError): - transformer.get_names_of_removed_columns() - - table = Table( - { - "a": [0.0], - }, - ) - transformer = transformer.fit(table, None) - assert transformer.get_names_of_removed_columns() == [] - class TestInverseTransform: @pytest.mark.parametrize( diff --git a/tests/safeds/data/tabular/transformation/test_simple_imputer.py b/tests/safeds/data/tabular/transformation/test_simple_imputer.py index 312cd50b1..27060d5a2 100644 --- a/tests/safeds/data/tabular/transformation/test_simple_imputer.py +++ b/tests/safeds/data/tabular/transformation/test_simple_imputer.py @@ -462,47 +462,3 @@ def test_should_not_change_original_table(self, strategy: SimpleImputer.Strategy ) assert table == expected - - @pytest.mark.parametrize("strategy", strategies(), ids=lambda x: x.__class__.__name__) - def test_get_names_of_added_columns(self, strategy: SimpleImputer.Strategy) -> None: - transformer = SimpleImputer(strategy=strategy) - with pytest.raises(TransformerNotFittedError): - transformer.get_names_of_added_columns() - - table = Table( - { - "a": [1, None], - "b": [1, 1], - }, - ) - transformer = transformer.fit(table, None) - assert transformer.get_names_of_added_columns() == [] - - @pytest.mark.parametrize("strategy", strategies(), ids=lambda x: x.__class__.__name__) - def test_get_names_of_changed_columns(self, strategy: SimpleImputer.Strategy) -> None: - transformer = SimpleImputer(strategy=strategy) - with pytest.raises(TransformerNotFittedError): - transformer.get_names_of_changed_columns() - table = Table( - { - "a": [1, None], - "b": [1, 1], - }, - ) - transformer = transformer.fit(table, None) - assert transformer.get_names_of_changed_columns() == ["a", "b"] - - @pytest.mark.parametrize("strategy", strategies(), ids=lambda x: x.__class__.__name__) - def test_get_names_of_removed_columns(self, strategy: SimpleImputer.Strategy) -> None: - transformer = SimpleImputer(strategy=strategy) - with pytest.raises(TransformerNotFittedError): - transformer.get_names_of_removed_columns() - - table = Table( - { - "a": [1, None], - "b": [1, 1], - }, - ) - transformer = transformer.fit(table, None) - assert transformer.get_names_of_removed_columns() == [] diff --git a/tests/safeds/data/tabular/transformation/test_standard_scaler.py b/tests/safeds/data/tabular/transformation/test_standard_scaler.py index e692cd5a2..5350026d5 100644 --- a/tests/safeds/data/tabular/transformation/test_standard_scaler.py +++ b/tests/safeds/data/tabular/transformation/test_standard_scaler.py @@ -160,44 +160,6 @@ def test_should_not_change_original_table(self) -> None: assert table == expected - def test_get_names_of_added_columns(self) -> None: - transformer = StandardScaler() - with pytest.raises(TransformerNotFittedError): - transformer.get_names_of_added_columns() - - table = Table( - { - "a": [0.0], - }, - ) - transformer = transformer.fit(table, None) - assert transformer.get_names_of_added_columns() == [] - - def test_get_names_of_changed_columns(self) -> None: - transformer = StandardScaler() - with pytest.raises(TransformerNotFittedError): - transformer.get_names_of_changed_columns() - table = Table( - { - "a": [0.0], - }, - ) - transformer = transformer.fit(table, None) - assert transformer.get_names_of_changed_columns() == ["a"] - - def test_get_names_of_removed_columns(self) -> None: - transformer = StandardScaler() - with pytest.raises(TransformerNotFittedError): - transformer.get_names_of_removed_columns() - - table = Table( - { - "a": [0.0], - }, - ) - transformer = transformer.fit(table, None) - assert transformer.get_names_of_removed_columns() == [] - class TestInverseTransform: @pytest.mark.parametrize( From 61121a2fe77679864a2e14f7701903a219c42c34 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 14 May 2024 20:12:58 +0200 Subject: [PATCH 09/21] perf: range scaler with polars --- .../_validation/_check_columns_are_numeric.py | 63 +++++++ .../tabular/transformation/_range_scaler.py | 159 ++++++++---------- src/safeds/exceptions/__init__.py | 5 + .../transformation/test_range_scaler.py | 32 +--- 4 files changed, 143 insertions(+), 116 deletions(-) create mode 100644 src/safeds/_validation/_check_columns_are_numeric.py diff --git a/src/safeds/_validation/_check_columns_are_numeric.py b/src/safeds/_validation/_check_columns_are_numeric.py new file mode 100644 index 000000000..9123e1178 --- /dev/null +++ b/src/safeds/_validation/_check_columns_are_numeric.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from safeds.exceptions import ColumnTypeError + +if TYPE_CHECKING: + from collections.abc import Container + + from safeds.data.tabular.containers import Table + from safeds.data.tabular.typing import Schema + + +def _check_columns_are_numeric( + table_or_schema: Table | Schema, + column_names: str | list[str], + *, + operation: str = "do a numeric operation", +) -> None: + """ + Check if the columns with the specified names are numeric and raise an error if they are not. + + Missing columns are ignored. Use `_check_columns_exist` to check for missing columns. + + Parameters + ---------- + table_or_schema: + The table or schema to check. + column_names: + The column names to check. + operation: + The operation that is performed on the columns. This is used in the error message. + + Raises + ------ + ColumnTypeError + If a column exists but is not numeric. + """ + from safeds.data.tabular.containers import Table # circular import + + if isinstance(table_or_schema, Table): + table_or_schema = table_or_schema.schema + if isinstance(column_names, str): + column_names = [column_names] + + if len(column_names) > 1: + # Create a set for faster containment checks + known_names: Container = set(table_or_schema.column_names) + else: + known_names = table_or_schema.column_names + + non_numeric_names = [ + name + for name in column_names + if name in known_names and not table_or_schema.get_column_type(name).is_numeric + ] + if non_numeric_names: + message = _build_error_message( non_numeric_names, operation) + raise ColumnTypeError(message) + + +def _build_error_message(non_numeric_names: list[str], operation: str) -> str: + return f"Tried to {operation} on non-numeric columns {non_numeric_names}." diff --git a/src/safeds/data/tabular/transformation/_range_scaler.py b/src/safeds/data/tabular/transformation/_range_scaler.py index c9b52fd55..0b4f0ab52 100644 --- a/src/safeds/data/tabular/transformation/_range_scaler.py +++ b/src/safeds/data/tabular/transformation/_range_scaler.py @@ -1,12 +1,18 @@ from __future__ import annotations +from typing import TYPE_CHECKING + from safeds._utils import _structural_hash from safeds._validation import _check_columns_exist +from safeds._validation._check_columns_are_numeric import _check_columns_are_numeric from safeds.data.tabular.containers import Table -from safeds.exceptions import NonNumericColumnError, TransformerNotFittedError +from safeds.exceptions import TransformerNotFittedError from ._invertible_table_transformer import InvertibleTableTransformer +if TYPE_CHECKING: + import polars as pl + class RangeScaler(InvertibleTableTransformer): """ @@ -35,14 +41,21 @@ def __init__(self, min_: float = 0.0, max_: float = 1.0) -> None: if min_ >= max_: raise ValueError('Parameter "max_" must be greater than parameter "min_".') - self._min = min_ - self._max = max_ + # Parameters + self._min: float = min_ + self._max: float = max_ + + # Internal state + self._data_min: pl.DataFrame | None = None + self._data_max: pl.DataFrame | None = None def __hash__(self) -> int: return _structural_hash( InvertibleTableTransformer.__hash__(self), self._min, self._max, + self._data_min, + self._data_max, ) # ------------------------------------------------------------------------------------------------------------------ @@ -74,7 +87,7 @@ def fit(self, table: Table, column_names: list[str] | None) -> RangeScaler: table: The table used to fit the transformer. column_names: - The list of columns from the table used to fit the transformer. If `None`, all columns are used. + The list of columns from the table used to fit the transformer. If None, all numeric columns are used. Returns ------- @@ -85,45 +98,33 @@ def fit(self, table: Table, column_names: list[str] | None) -> RangeScaler: ------ ColumnNotFoundError If column_names contain a column name that is missing in the table. - NonNumericColumnError - If at least one of the specified columns in the table contains non-numerical data. + ColumnTypeError + If at least one of the specified columns in the table is not numeric. ValueError If the table contains 0 rows. """ - from sklearn.preprocessing import MinMaxScaler as sk_MinMaxScaler - if column_names is None: - column_names = table.column_names + column_names = [ + name + for name in table.column_names + if table.get_column_type(name).is_numeric + ] else: _check_columns_exist(table, column_names) + _check_columns_are_numeric(table, column_names, operation="fit a RangeScaler") if table.number_of_rows == 0: raise ValueError("The RangeScaler cannot be fitted because the table contains 0 rows") - if ( - table.remove_columns_except(column_names).remove_non_numeric_columns().number_of_columns - < table.remove_columns_except(column_names).number_of_columns - ): - raise NonNumericColumnError( - str( - sorted( - set(table.remove_columns_except(column_names).column_names) - - set( - table.remove_columns_except(column_names).remove_non_numeric_columns().column_names, - ), - ), - ), - ) + # Learn the transformation + _data_min = table._lazy_frame.select(column_names).min().collect() + _data_max = table._lazy_frame.select(column_names).max().collect() - wrapped_transformer = sk_MinMaxScaler((self._min, self._max)) - wrapped_transformer.set_output(transform="polars") - wrapped_transformer.fit( - table.remove_columns_except(column_names)._data_frame, - ) - - result = RangeScaler() - result._wrapped_transformer = wrapped_transformer + # Create a copy with the learned transformation + result = RangeScaler(min_=self._min, max_=self._max) result._column_names = column_names + result._data_min = _data_min + result._data_max = _data_max return result @@ -149,41 +150,30 @@ def transform(self, table: Table) -> Table: If the transformer has not been fitted yet. ColumnNotFoundError If the input table does not contain all columns used to fit the transformer. - NonNumericColumnError + ColumnTypeError If at least one of the columns in the input table that is used to fit contains non-numerical data. - ValueError - If the table contains 0 rows. """ - # Transformer has not been fitted yet - if self._wrapped_transformer is None or self._column_names is None: + import polars as pl + + # Used in favor of is_fitted, so the type checker is happy + if self._column_names is None or self._data_min is None or self._data_max is None: raise TransformerNotFittedError - # Input table does not contain all columns used to fit the transformer _check_columns_exist(table, self._column_names) - - if table.number_of_rows == 0: - raise ValueError("The RangeScaler cannot transform the table because it contains 0 rows") - - if ( - table.remove_columns_except(self._column_names).remove_non_numeric_columns().number_of_columns - < table.remove_columns_except(self._column_names).number_of_columns - ): - raise NonNumericColumnError( - str( - sorted( - set(table.remove_columns_except(self._column_names).column_names) - - set( - table.remove_columns_except(self._column_names).remove_non_numeric_columns().column_names, - ), - ), - ), + _check_columns_are_numeric(table, self._column_names, operation="transform with a RangeScaler") + + columns = [ + ( + (pl.col(name) - self._data_min.get_column(name)) + / (self._data_max.get_column(name) - self._data_min.get_column(name)) + * (self._max - self._min) + + self._min ) + for name in self._column_names + ] - new_data = self._wrapped_transformer.transform( - table.remove_columns_except(self._column_names)._data_frame, - ) return Table._from_polars_lazy_frame( - table._lazy_frame.update(new_data.lazy()), + table._lazy_frame.with_columns(columns), ) def inverse_transform(self, transformed_table: Table) -> Table: @@ -213,44 +203,29 @@ def inverse_transform(self, transformed_table: Table) -> Table: ValueError If the table contains 0 rows. """ - # Transformer has not been fitted yet - if self._wrapped_transformer is None or self._column_names is None: + import polars as pl + + # Used in favor of is_fitted, so the type checker is happy + if self._column_names is None or self._data_min is None or self._data_max is None: raise TransformerNotFittedError _check_columns_exist(transformed_table, self._column_names) - - if transformed_table.number_of_rows == 0: - raise ValueError("The RangeScaler cannot transform the table because it contains 0 rows") - - if ( - transformed_table.remove_columns_except(self._column_names).remove_non_numeric_columns().number_of_columns - < transformed_table.remove_columns_except(self._column_names).number_of_columns - ): - raise NonNumericColumnError( - str( - sorted( - set(transformed_table.remove_columns_except(self._column_names).column_names) - - set( - transformed_table.remove_columns_except(self._column_names) - .remove_non_numeric_columns() - .column_names, - ), - ), - ), - ) - - import polars as pl - - new_data = pl.DataFrame( - self._wrapped_transformer.inverse_transform( - transformed_table.remove_columns_except(self._column_names)._data_frame, - ), + _check_columns_are_numeric( + transformed_table, + self._column_names, + operation="inverse-transform with a RangeScaler", ) - name_mapping = dict(zip(new_data.columns, self._column_names, strict=True)) - - new_data = new_data.rename(name_mapping) + columns = [ + ( + (pl.col(name) - self._min) + / (self._max - self._min) + * (self._data_max.get_column(name) - self._data_min.get_column(name)) + + self._data_min.get_column(name) + ) + for name in self._column_names + ] - return Table._from_polars_data_frame( - transformed_table._data_frame.update(new_data), + return Table._from_polars_lazy_frame( + transformed_table._lazy_frame.with_columns(columns), ) diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py index 7592a1972..8f1e9de6d 100644 --- a/src/safeds/exceptions/__init__.py +++ b/src/safeds/exceptions/__init__.py @@ -34,6 +34,10 @@ class ColumnNotFoundError(SafeDsError): """Exception raised when trying to access an invalid column name.""" +class ColumnTypeError(SafeDsError): + """Exception raised when a column has the wrong type.""" + + class FileExtensionError(SafeDsError): """Exception raised when a path has the wrong file extension.""" @@ -45,6 +49,7 @@ class OutOfBoundsError(SafeDsError): __all__ = [ "SafeDsError", "ColumnNotFoundError", + "ColumnTypeError", "FileExtensionError", "OutOfBoundsError", # TODO diff --git a/tests/safeds/data/tabular/transformation/test_range_scaler.py b/tests/safeds/data/tabular/transformation/test_range_scaler.py index c37e813a6..2204e93fa 100644 --- a/tests/safeds/data/tabular/transformation/test_range_scaler.py +++ b/tests/safeds/data/tabular/transformation/test_range_scaler.py @@ -1,12 +1,12 @@ import pytest from safeds.data.tabular.containers import Table from safeds.data.tabular.transformation import RangeScaler -from safeds.exceptions import ColumnNotFoundError, NonNumericColumnError, TransformerNotFittedError +from safeds.exceptions import ColumnNotFoundError, ColumnTypeError, TransformerNotFittedError class TestInit: def test_should_raise_value_error(self) -> None: - with pytest.raises(ValueError, match='Parameter "maximum" must be higher than parameter "minimum".'): + with pytest.raises(ValueError, match='Parameter "max_" must be greater than parameter "min_".'): _ = RangeScaler(min_=10, max_=0) @@ -22,15 +22,12 @@ def test_should_raise_if_column_not_found(self) -> None: RangeScaler().fit(table, ["col2", "col3"]) def test_should_raise_if_table_contains_non_numerical_data(self) -> None: - with pytest.raises( - NonNumericColumnError, - match=r"Tried to do a numerical operation on one or multiple non-numerical columns: \n\['col1', 'col2'\]", - ): + with pytest.raises(ColumnTypeError): RangeScaler().fit(Table({"col1": ["a", "b"], "col2": [1, "c"]}), ["col1", "col2"]) def test_should_raise_if_table_contains_no_rows(self) -> None: with pytest.raises(ValueError, match=r"The RangeScaler cannot be fitted because the table contains 0 rows"): - RangeScaler().fit(Table({"col1": []}), ["col1"]) + RangeScaler().fit(Table({"col1": []}), None) def test_should_not_change_original_transformer(self) -> None: table = Table( @@ -42,8 +39,9 @@ def test_should_not_change_original_transformer(self) -> None: transformer = RangeScaler() transformer.fit(table, None) - assert transformer._wrapped_transformer is None assert transformer._column_names is None + assert transformer._data_min is None + assert transformer._data_max is None class TestTransform: @@ -79,18 +77,11 @@ def test_should_raise_if_not_fitted(self) -> None: transformer.transform(table) def test_should_raise_if_table_contains_non_numerical_data(self) -> None: - with pytest.raises( - NonNumericColumnError, - match=r"Tried to do a numerical operation on one or multiple non-numerical columns: \n\['col1', 'col2'\]", - ): + with pytest.raises(ColumnTypeError): RangeScaler().fit(Table({"col1": [1, 2, 3], "col2": [2, 3, 4]}), ["col1", "col2"]).transform( Table({"col1": ["a", "b", "c"], "col2": ["c", "d", "e"]}), ) - def test_should_raise_if_table_contains_no_rows(self) -> None: - with pytest.raises(ValueError, match=r"The RangeScaler cannot transform the table because it contains 0 rows"): - RangeScaler().fit(Table({"col1": [1, 2, 3]}), ["col1"]).transform(Table({"col1": []})) - class TestIsFitted: def test_should_return_false_before_fitting(self) -> None: @@ -273,14 +264,7 @@ def test_should_raise_if_column_not_found(self) -> None: ) def test_should_raise_if_table_contains_non_numerical_data(self) -> None: - with pytest.raises( - NonNumericColumnError, - match=r"Tried to do a numerical operation on one or multiple non-numerical columns: \n\['col1', 'col2'\]", - ): + with pytest.raises(ColumnTypeError): RangeScaler().fit(Table({"col1": [1, 2, 3], "col2": [2, 3, 4]}), ["col1", "col2"]).inverse_transform( Table({"col1": ["1", "2", "three"], "col2": [1, 2, "four"]}), ) - - def test_should_raise_if_table_contains_no_rows(self) -> None: - with pytest.raises(ValueError, match=r"The RangeScaler cannot transform the table because it contains 0 rows"): - RangeScaler().fit(Table({"col1": [1, 2, 3]}), ["col1"]).inverse_transform(Table({"col1": []})) From 99f243271fda008f5c0738e84dd0613985a2367e Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 14 May 2024 20:19:41 +0200 Subject: [PATCH 10/21] fix: mypy errors --- .../data/labeled/containers/_image_dataset.py | 8 ++--- .../transformation/_one_hot_encoder.py | 36 +++++++++++-------- .../nn/converters/_output_converter_image.py | 2 +- 3 files changed, 26 insertions(+), 20 deletions(-) diff --git a/src/safeds/data/labeled/containers/_image_dataset.py b/src/safeds/data/labeled/containers/_image_dataset.py index 4f4abe86a..32430554a 100644 --- a/src/safeds/data/labeled/containers/_image_dataset.py +++ b/src/safeds/data/labeled/containers/_image_dataset.py @@ -89,7 +89,7 @@ def __init__(self, input_data: ImageList, output_data: T, batch_size: int = 1, s _output_size: int | ImageSize = output_data.number_of_columns elif isinstance(output_data, Column): _column_as_tensor = _ColumnAsTensor(output_data) - _output_size = len(_column_as_tensor._one_hot_encoder.get_names_of_added_columns()) + _output_size = len(_column_as_tensor._one_hot_encoder._get_names_of_added_columns()) _output = _column_as_tensor elif isinstance(output_data, _SingleSizeImageList): _output = output_data._clone()._as_single_size_image_list() @@ -394,9 +394,9 @@ def _from_tensor(tensor: Tensor, column_name: str, one_hot_encoder: OneHotEncode raise ValueError(f"Tensor has an invalid amount of dimensions. Needed 2 dimensions but got {tensor.dim()}.") if not one_hot_encoder.is_fitted: raise TransformerNotFittedError - if tensor.size(dim=1) != len(one_hot_encoder.get_names_of_added_columns()): + if tensor.size(dim=1) != len(one_hot_encoder._get_names_of_added_columns()): raise ValueError( - f"Tensor and one_hot_encoder have different amounts of classes ({tensor.size(dim=1)}!={len(one_hot_encoder.get_names_of_added_columns())}).", + f"Tensor and one_hot_encoder have different amounts of classes ({tensor.size(dim=1)}!={len(one_hot_encoder._get_names_of_added_columns())}).", ) table_as_tensor = _ColumnAsTensor.__new__(_ColumnAsTensor) table_as_tensor._tensor = tensor @@ -406,6 +406,6 @@ def _from_tensor(tensor: Tensor, column_name: str, one_hot_encoder: OneHotEncode def _to_column(self) -> Column: table = Table( - dict(zip(self._one_hot_encoder.get_names_of_added_columns(), self._tensor.T.tolist(), strict=False)), + dict(zip(self._one_hot_encoder._get_names_of_added_columns(), self._tensor.T.tolist(), strict=False)), ) return self._one_hot_encoder.inverse_transform(table).get_column(self._column_name) diff --git a/src/safeds/data/tabular/transformation/_one_hot_encoder.py b/src/safeds/data/tabular/transformation/_one_hot_encoder.py index cb0c95e01..3556393f2 100644 --- a/src/safeds/data/tabular/transformation/_one_hot_encoder.py +++ b/src/safeds/data/tabular/transformation/_one_hot_encoder.py @@ -64,7 +64,7 @@ def __init__(self) -> None: super().__init__() # Maps each old column to (list of) new columns created from it: - self._column_names: dict[str, list[str]] | None = None + self._column_map: dict[str, list[str]] | None = None # Maps concrete values (tuples of old column and value) to corresponding new column names: self._value_to_column: dict[tuple[str, Any], str] | None = None # Maps nan values (str of old column) to corresponding new column name @@ -74,7 +74,7 @@ def __eq__(self, other: object) -> bool: if not isinstance(other, OneHotEncoder): return NotImplemented return ( - self._column_names == other._column_names + self._column_map == other._column_map and self._value_to_column == other._value_to_column and self._value_to_column_nans == other._value_to_column_nans ) @@ -131,8 +131,8 @@ def fit(self, table: Table, column_names: list[str] | None) -> OneHotEncoder: ) result = OneHotEncoder() - - result._column_names = {} + result._column_names = column_names + result._column_map = {} result._value_to_column = {} result._value_to_column_nans = {} @@ -142,7 +142,7 @@ def fit(self, table: Table, column_names: list[str] | None) -> OneHotEncoder: # Iterate through all columns to-be-changed: for column in column_names: - result._column_names[column] = [] + result._column_map[column] = [] for element in table.get_column(column).get_distinct_values(): base_name = f"{column}__{element}" name_counter[base_name] += 1 @@ -151,7 +151,7 @@ def fit(self, table: Table, column_names: list[str] | None) -> OneHotEncoder: if name_counter[base_name] > 1: new_column_name += f"#{name_counter[base_name]}" # Update dictionary entries: - result._column_names[column] += [new_column_name] + result._column_map[column] += [new_column_name] if isinstance(element, float) and np.isnan(element): result._value_to_column_nans[column] = new_column_name else: @@ -190,11 +190,11 @@ def transform(self, table: Table) -> Table: import numpy as np # Transformer has not been fitted yet - if self._column_names is None or self._value_to_column is None or self._value_to_column_nans is None: + if self._column_map is None or self._value_to_column is None or self._value_to_column_nans is None: raise TransformerNotFittedError # Input table does not contain all columns used to fit the transformer - _check_columns_exist(table, list(self._column_names.keys())) + _check_columns_exist(table, list(self._column_map.keys())) if table.number_of_rows == 0: raise ValueError("The LabelEncoder cannot transform the table because it contains 0 rows") @@ -206,7 +206,7 @@ def transform(self, table: Table) -> Table: encoded_values[new_column_name] = [0.0 for _ in range(table.number_of_rows)] values_not_present_when_fitted = [] - for old_column_name in self._column_names: + for old_column_name in self._column_map: for i in range(table.number_of_rows): value = table.get_column(old_column_name).get_value(i) try: @@ -220,7 +220,7 @@ def transform(self, table: Table) -> Table: # already present in the table the OneHotEncoder was fitted on. values_not_present_when_fitted.append((value, old_column_name)) - for new_column in self._column_names[old_column_name]: + for new_column in self._column_map[old_column_name]: table = table.add_columns([Column(new_column, encoded_values[new_column])]) if len(values_not_present_when_fitted) > 0: @@ -229,7 +229,7 @@ def transform(self, table: Table) -> Table: # New columns may not be sorted: column_names = [] for name in table.column_names: - if name not in self._column_names: + if name not in self._column_map: column_names.append(name) else: column_names.extend( @@ -240,7 +240,7 @@ def transform(self, table: Table) -> Table: # Drop old, non-encoded columns: # (Don't do this earlier - we need the old column nams for sorting, # plus we need to prevent the table from possibly having 0 columns temporarily.) - return table.remove_columns(list(self._column_names.keys())) + return table.remove_columns(list(self._column_map.keys())) def inverse_transform(self, transformed_table: Table) -> Table: """ @@ -270,10 +270,10 @@ def inverse_transform(self, transformed_table: Table) -> Table: If the table contains 0 rows. """ # Transformer has not been fitted yet - if self._column_names is None or self._value_to_column is None or self._value_to_column_nans is None: + if self._column_map is None or self._value_to_column is None or self._value_to_column_nans is None: raise TransformerNotFittedError - _transformed_column_names = [item for sublist in self._column_names.values() for item in sublist] + _transformed_column_names = [item for sublist in self._column_map.values() for item in sublist] _check_columns_exist(transformed_table, _transformed_column_names) @@ -297,7 +297,7 @@ def inverse_transform(self, transformed_table: Table) -> Table: ) original_columns = {} - for original_column_name in self._column_names: + for original_column_name in self._column_map: original_columns[original_column_name] = [None for _ in range(transformed_table.number_of_rows)] for original_column_name, value in self._value_to_column: @@ -320,3 +320,9 @@ def inverse_transform(self, transformed_table: Table) -> Table: # Drop old column names: table = table.remove_columns(list(self._value_to_column.values())) return table.remove_columns(list(self._value_to_column_nans.values())) + + # TODO: remove / replace with consistent introspection methods across all transformers + def _get_names_of_added_columns(self) -> list[str]: + if self._column_map is None: + raise TransformerNotFittedError + return [name for column_names in self._column_map.values() for name in column_names] diff --git a/src/safeds/ml/nn/converters/_output_converter_image.py b/src/safeds/ml/nn/converters/_output_converter_image.py index 959fe2caf..5aad88599 100644 --- a/src/safeds/ml/nn/converters/_output_converter_image.py +++ b/src/safeds/ml/nn/converters/_output_converter_image.py @@ -83,7 +83,7 @@ def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: one_hot_encoder: OneHotEncoder = kwargs["one_hot_encoder"] column_name: str = kwargs["column_name"] - output = torch.zeros(len(input_data), len(one_hot_encoder.get_names_of_added_columns())) + output = torch.zeros(len(input_data), len(one_hot_encoder._get_names_of_added_columns())) output[torch.arange(len(input_data)), output_data] = 1 im_dataset: ImageDataset[Column] = ImageDataset[Column].__new__(ImageDataset) From b88ec0c89ca50ce3e50811e07fa6b570dafe34c5 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 14 May 2024 20:33:17 +0200 Subject: [PATCH 11/21] fix: failing test --- .../data/tabular/transformation/_one_hot_encoder.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_one_hot_encoder.py b/src/safeds/data/tabular/transformation/_one_hot_encoder.py index 3556393f2..bad344949 100644 --- a/src/safeds/data/tabular/transformation/_one_hot_encoder.py +++ b/src/safeds/data/tabular/transformation/_one_hot_encoder.py @@ -4,6 +4,7 @@ from collections import Counter from typing import Any +from safeds._utils import _structural_hash from safeds._validation import _check_columns_exist from safeds.data.tabular.containers import Column, Table from safeds.exceptions import ( @@ -61,7 +62,7 @@ class OneHotEncoder(InvertibleTableTransformer): # ------------------------------------------------------------------------------------------------------------------ def __init__(self) -> None: - super().__init__() + InvertibleTableTransformer.__init__(self) # Maps each old column to (list of) new columns created from it: self._column_map: dict[str, list[str]] | None = None @@ -80,7 +81,12 @@ def __eq__(self, other: object) -> bool: ) def __hash__(self) -> int: - return super().__hash__() + return _structural_hash( + InvertibleTableTransformer.__hash__(self), + self._column_map, + self._value_to_column, + self._value_to_column_nans, + ) # ------------------------------------------------------------------------------------------------------------------ # Learning and transformation From 1ad6771151622e2c59dbab602190e877172a80ec Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 14 May 2024 20:56:10 +0200 Subject: [PATCH 12/21] build: short tracebacks when running pytest --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a5314c608..beb354ae3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,7 +78,7 @@ omit = [ ] [tool.pytest.ini_options] -addopts = "--snapshot-warn-unused" +addopts = "--snapshot-warn-unused --tb=short" filterwarnings = [ "ignore:Deprecated call to `pkg_resources.declare_namespace", "ignore:Jupyter is migrating its paths to use standard platformdirs" From e2e8ac135904c488cc6bf9969d66c2ccd857aaee Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 14 May 2024 21:12:25 +0200 Subject: [PATCH 13/21] perf: standard scaler with polars --- .../tabular/transformation/_range_scaler.py | 2 - .../transformation/_standard_scaler.py | 128 ++++++------------ tests/helpers/__init__.py | 4 +- tests/helpers/_assertions.py | 14 +- .../transformation/test_standard_scaler.py | 42 ++---- 5 files changed, 60 insertions(+), 130 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_range_scaler.py b/src/safeds/data/tabular/transformation/_range_scaler.py index 0b4f0ab52..aac4c868a 100644 --- a/src/safeds/data/tabular/transformation/_range_scaler.py +++ b/src/safeds/data/tabular/transformation/_range_scaler.py @@ -54,8 +54,6 @@ def __hash__(self) -> int: InvertibleTableTransformer.__hash__(self), self._min, self._max, - self._data_min, - self._data_max, ) # ------------------------------------------------------------------------------------------------------------------ diff --git a/src/safeds/data/tabular/transformation/_standard_scaler.py b/src/safeds/data/tabular/transformation/_standard_scaler.py index 2b9fd2055..e2c704d73 100644 --- a/src/safeds/data/tabular/transformation/_standard_scaler.py +++ b/src/safeds/data/tabular/transformation/_standard_scaler.py @@ -4,13 +4,14 @@ from safeds._utils import _structural_hash from safeds._validation import _check_columns_exist +from safeds._validation._check_columns_are_numeric import _check_columns_are_numeric from safeds.data.tabular.containers import Table -from safeds.exceptions import NonNumericColumnError, TransformerNotFittedError +from safeds.exceptions import TransformerNotFittedError from ._invertible_table_transformer import InvertibleTableTransformer if TYPE_CHECKING: - from sklearn.preprocessing import StandardScaler as sk_StandardScaler + import polars as pl class StandardScaler(InvertibleTableTransformer): @@ -23,7 +24,9 @@ class StandardScaler(InvertibleTableTransformer): def __init__(self) -> None: InvertibleTableTransformer.__init__(self) - self._wrapped_transformer: sk_StandardScaler | None = None + # Internal state + self._data_mean: pl.DataFrame | None = None + self._data_standard_deviation: pl.DataFrame | None = None def __hash__(self) -> int: return _structural_hash( @@ -61,40 +64,28 @@ def fit(self, table: Table, column_names: list[str] | None) -> StandardScaler: ValueError If the table contains 0 rows. """ - from sklearn.preprocessing import StandardScaler as sk_StandardScaler - if column_names is None: - column_names = table.column_names + column_names = [ + name + for name in table.column_names + if table.get_column_type(name).is_numeric + ] else: _check_columns_exist(table, column_names) + _check_columns_are_numeric(table, column_names, operation="fit a StandardScaler") if table.number_of_rows == 0: raise ValueError("The StandardScaler cannot be fitted because the table contains 0 rows") - if ( - table.remove_columns_except(column_names).remove_non_numeric_columns().number_of_columns - < table.remove_columns_except(column_names).number_of_columns - ): - raise NonNumericColumnError( - str( - sorted( - set(table.remove_columns_except(column_names).column_names) - - set( - table.remove_columns_except(column_names).remove_non_numeric_columns().column_names, - ), - ), - ), - ) - - wrapped_transformer = sk_StandardScaler() - wrapped_transformer.set_output(transform="polars") - wrapped_transformer.fit( - table.remove_columns_except(column_names)._data_frame, - ) + # Learn the transformation (ddof=0 is used to match the behavior of scikit-learn) + _data_mean = table._lazy_frame.select(column_names).mean().collect() + _data_standard_deviation = table._lazy_frame.select(column_names).std(ddof=0).collect() + # Create a copy with the learned transformation result = StandardScaler() - result._wrapped_transformer = wrapped_transformer result._column_names = column_names + result._data_mean = _data_mean + result._data_standard_deviation = _data_standard_deviation return result @@ -125,36 +116,22 @@ def transform(self, table: Table) -> Table: ValueError If the table contains 0 rows. """ - # Transformer has not been fitted yet - if self._wrapped_transformer is None or self._column_names is None: + import polars as pl + + # Used in favor of is_fitted, so the type checker is happy + if self._column_names is None or self._data_mean is None or self._data_standard_deviation is None: raise TransformerNotFittedError - # Input table does not contain all columns used to fit the transformer _check_columns_exist(table, self._column_names) + _check_columns_are_numeric(table, self._column_names, operation="transform with a StandardScaler") + + columns = [ + (pl.col(name) - self._data_mean.get_column(name)) / self._data_standard_deviation.get_column(name) + for name in self._column_names + ] - if table.number_of_rows == 0: - raise ValueError("The StandardScaler cannot transform the table because it contains 0 rows") - - if ( - table.remove_columns_except(self._column_names).remove_non_numeric_columns().number_of_columns - < table.remove_columns_except(self._column_names).number_of_columns - ): - raise NonNumericColumnError( - str( - sorted( - set(table.remove_columns_except(self._column_names).column_names) - - set( - table.remove_columns_except(self._column_names).remove_non_numeric_columns().column_names, - ), - ), - ), - ) - - new_data = self._wrapped_transformer.transform( - table.remove_columns_except(self._column_names)._data_frame, - ) return Table._from_polars_lazy_frame( - table._lazy_frame.update(new_data.lazy()), + table._lazy_frame.with_columns(columns), ) def inverse_transform(self, transformed_table: Table) -> Table: @@ -184,39 +161,24 @@ def inverse_transform(self, transformed_table: Table) -> Table: ValueError If the table contains 0 rows. """ - # Transformer has not been fitted yet - if self._wrapped_transformer is None or self._column_names is None: + import polars as pl + + # Used in favor of is_fitted, so the type checker is happy + if self._column_names is None or self._data_mean is None or self._data_standard_deviation is None: raise TransformerNotFittedError _check_columns_exist(transformed_table, self._column_names) - - if transformed_table.number_of_rows == 0: - raise ValueError("The StandardScaler cannot transform the table because it contains 0 rows") - - if ( - transformed_table.remove_columns_except(self._column_names).remove_non_numeric_columns().number_of_columns - < transformed_table.remove_columns_except(self._column_names).number_of_columns - ): - raise NonNumericColumnError( - str( - sorted( - set(transformed_table.remove_columns_except(self._column_names).column_names) - - set( - transformed_table.remove_columns_except(self._column_names) - .remove_non_numeric_columns() - .column_names, - ), - ), - ), - ) - - new_data = self._wrapped_transformer.inverse_transform( - transformed_table.remove_columns_except(self._column_names)._data_frame, - ) - return Table._from_polars_data_frame( - transformed_table._data_frame.update(new_data), + _check_columns_are_numeric( + transformed_table, + self._column_names, + operation="inverse-transform with a StandardScaler", ) - @property - def is_fitted(self) -> bool: - return self._wrapped_transformer is not None + columns = [ + pl.col(name) * self._data_standard_deviation.get_column(name) + self._data_mean.get_column(name) + for name in self._column_names + ] + + return Table._from_polars_lazy_frame( + transformed_table._lazy_frame.with_columns(columns), + ) diff --git a/tests/helpers/__init__.py b/tests/helpers/__init__.py index 883650a52..20a9ac339 100644 --- a/tests/helpers/__init__.py +++ b/tests/helpers/__init__.py @@ -1,5 +1,5 @@ from ._assertions import ( - assert_that_tables_are_close, + assert_tables_equal, assert_that_tabular_datasets_are_equal, ) from ._devices import ( @@ -36,7 +36,7 @@ from ._resources import resolve_resource_path __all__ = [ - "assert_that_tables_are_close", + "assert_tables_equal", "assert_that_tabular_datasets_are_equal", "configure_test_with_device", "device_cpu", diff --git a/tests/helpers/_assertions.py b/tests/helpers/_assertions.py index be13f978a..8976fc063 100644 --- a/tests/helpers/_assertions.py +++ b/tests/helpers/_assertions.py @@ -1,9 +1,9 @@ -import pytest +from polars.testing import assert_frame_equal from safeds.data.labeled.containers import TabularDataset from safeds.data.tabular.containers import Table -def assert_that_tables_are_close(table1: Table, table2: Table) -> None: +def assert_tables_equal(table1: Table, table2: Table) -> None: """ Assert that two tables are almost equal. @@ -14,15 +14,7 @@ def assert_that_tables_are_close(table1: Table, table2: Table) -> None: table2: Table The table to compare the first table to. """ - assert table1.schema == table2.schema - for column_name in table1.column_names: - assert table1.get_column(column_name).type == table2.get_column(column_name).type - assert table1.get_column(column_name).type.is_numeric - assert table2.get_column(column_name).type.is_numeric - for i in range(table1.number_of_rows): - entry_1 = table1.get_column(column_name).get_value(i) - entry_2 = table2.get_column(column_name).get_value(i) - assert entry_1 == pytest.approx(entry_2) + assert_frame_equal(table1._data_frame, table2._data_frame) def assert_that_tabular_datasets_are_equal(table1: TabularDataset, table2: TabularDataset) -> None: diff --git a/tests/safeds/data/tabular/transformation/test_standard_scaler.py b/tests/safeds/data/tabular/transformation/test_standard_scaler.py index 5350026d5..911e82bca 100644 --- a/tests/safeds/data/tabular/transformation/test_standard_scaler.py +++ b/tests/safeds/data/tabular/transformation/test_standard_scaler.py @@ -1,9 +1,9 @@ import pytest from safeds.data.tabular.containers import Table from safeds.data.tabular.transformation import StandardScaler -from safeds.exceptions import ColumnNotFoundError, NonNumericColumnError, TransformerNotFittedError +from safeds.exceptions import ColumnNotFoundError, ColumnTypeError, TransformerNotFittedError -from tests.helpers import assert_that_tables_are_close +from tests.helpers import assert_tables_equal class TestFit: @@ -18,10 +18,7 @@ def test_should_raise_if_column_not_found(self) -> None: StandardScaler().fit(table, ["col2", "col3"]) def test_should_raise_if_table_contains_non_numerical_data(self) -> None: - with pytest.raises( - NonNumericColumnError, - match=r"Tried to do a numerical operation on one or multiple non-numerical columns: \n\['col1', 'col2'\]", - ): + with pytest.raises(ColumnTypeError): StandardScaler().fit( Table({"col1": ["one", "two", "apple"], "col2": ["three", "four", "banana"]}), ["col1", "col2"], @@ -29,7 +26,7 @@ def test_should_raise_if_table_contains_non_numerical_data(self) -> None: def test_should_raise_if_table_contains_no_rows(self) -> None: with pytest.raises(ValueError, match=r"The StandardScaler cannot be fitted because the table contains 0 rows"): - StandardScaler().fit(Table({"col1": []}), ["col1"]) + StandardScaler().fit(Table({"col1": []}), None) def test_should_not_change_original_transformer(self) -> None: table = Table( @@ -41,8 +38,9 @@ def test_should_not_change_original_transformer(self) -> None: transformer = StandardScaler() transformer.fit(table, None) - assert transformer._wrapped_transformer is None assert transformer._column_names is None + assert transformer._data_mean is None + assert transformer._data_standard_deviation is None class TestTransform: @@ -78,21 +76,11 @@ def test_should_raise_if_not_fitted(self) -> None: transformer.transform(table) def test_should_raise_if_table_contains_non_numerical_data(self) -> None: - with pytest.raises( - NonNumericColumnError, - match=r"Tried to do a numerical operation on one or multiple non-numerical columns: \n\['col1', 'col2'\]", - ): + with pytest.raises(ColumnTypeError): StandardScaler().fit(Table({"col1": [1, 2, 3], "col2": [2, 3, 4]}), ["col1", "col2"]).transform( Table({"col1": ["a", "b", "c"], "col2": ["b", "c", "e"]}), ) - def test_should_raise_if_table_contains_no_rows(self) -> None: - with pytest.raises( - ValueError, - match=r"The StandardScaler cannot transform the table because it contains 0 rows", - ): - StandardScaler().fit(Table({"col1": [1, 2, 3]}), ["col1"]).transform(Table({"col1": []})) - class TestIsFitted: def test_should_return_false_before_fitting(self) -> None: @@ -141,7 +129,7 @@ def test_should_return_fitted_transformer_and_transformed_table( ) -> None: fitted_transformer, transformed_table = StandardScaler().fit_and_transform(table, column_names) assert fitted_transformer.is_fitted - assert_that_tables_are_close(transformed_table, expected) + assert_tables_equal(transformed_table, expected) def test_should_not_change_original_table(self) -> None: table = Table( @@ -195,7 +183,7 @@ def test_should_not_change_transformed_table(self) -> None: }, ) - assert_that_tables_are_close(transformed_table, expected) + assert_tables_equal(transformed_table, expected) def test_should_raise_if_not_fitted(self) -> None: table = Table( @@ -216,17 +204,7 @@ def test_should_raise_if_column_not_found(self) -> None: ) def test_should_raise_if_table_contains_non_numerical_data(self) -> None: - with pytest.raises( - NonNumericColumnError, - match=r"Tried to do a numerical operation on one or multiple non-numerical columns: \n\['col1', 'col2'\]", - ): + with pytest.raises(ColumnTypeError): StandardScaler().fit(Table({"col1": [1, 2, 4], "col2": [2, 3, 4]}), ["col1", "col2"]).inverse_transform( Table({"col1": ["one", "two", "apple"], "col2": ["three", "four", "banana"]}), ) - - def test_should_raise_if_table_contains_no_rows(self) -> None: - with pytest.raises( - ValueError, - match=r"The StandardScaler cannot transform the table because it contains 0 rows", - ): - StandardScaler().fit(Table({"col1": [1, 2, 4]}), ["col1"]).inverse_transform(Table({"col1": []})) From 2fd9209386b261e27f5bb5b66661ead320620c66 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 14 May 2024 21:14:53 +0200 Subject: [PATCH 14/21] fix: endless recursion --- src/safeds/data/tabular/transformation/_simple_imputer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/transformation/_simple_imputer.py b/src/safeds/data/tabular/transformation/_simple_imputer.py index bb53e38e6..a62f67a1e 100644 --- a/src/safeds/data/tabular/transformation/_simple_imputer.py +++ b/src/safeds/data/tabular/transformation/_simple_imputer.py @@ -108,7 +108,7 @@ def __init__(self, strategy: SimpleImputer.Strategy, *, value_to_replace: float def __hash__(self) -> int: return _structural_hash( - SimpleImputer.__hash__(self), + TableTransformer.__hash__(self), self._strategy, self._value_to_replace, ) From 14c37181b3c942d973954c9acf737051ff176353 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 14 May 2024 21:28:20 +0200 Subject: [PATCH 15/21] refactor: some cleanup --- src/safeds/data/tabular/transformation/_label_encoder.py | 2 +- src/safeds/data/tabular/transformation/_one_hot_encoder.py | 4 ++-- src/safeds/data/tabular/transformation/_range_scaler.py | 4 ++-- src/safeds/data/tabular/transformation/_simple_imputer.py | 4 ++-- src/safeds/data/tabular/transformation/_standard_scaler.py | 7 ++----- 5 files changed, 9 insertions(+), 12 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_label_encoder.py b/src/safeds/data/tabular/transformation/_label_encoder.py index c32715106..a3330175c 100644 --- a/src/safeds/data/tabular/transformation/_label_encoder.py +++ b/src/safeds/data/tabular/transformation/_label_encoder.py @@ -21,7 +21,7 @@ class LabelEncoder(InvertibleTableTransformer): # ------------------------------------------------------------------------------------------------------------------ def __init__(self) -> None: - InvertibleTableTransformer.__init__(self) + super().__init__() self._wrapped_transformer: sk_OrdinalEncoder | None = None diff --git a/src/safeds/data/tabular/transformation/_one_hot_encoder.py b/src/safeds/data/tabular/transformation/_one_hot_encoder.py index bad344949..7882f663e 100644 --- a/src/safeds/data/tabular/transformation/_one_hot_encoder.py +++ b/src/safeds/data/tabular/transformation/_one_hot_encoder.py @@ -62,7 +62,7 @@ class OneHotEncoder(InvertibleTableTransformer): # ------------------------------------------------------------------------------------------------------------------ def __init__(self) -> None: - InvertibleTableTransformer.__init__(self) + super().__init__() # Maps each old column to (list of) new columns created from it: self._column_map: dict[str, list[str]] | None = None @@ -82,7 +82,7 @@ def __eq__(self, other: object) -> bool: def __hash__(self) -> int: return _structural_hash( - InvertibleTableTransformer.__hash__(self), + super().__hash__(), self._column_map, self._value_to_column, self._value_to_column_nans, diff --git a/src/safeds/data/tabular/transformation/_range_scaler.py b/src/safeds/data/tabular/transformation/_range_scaler.py index aac4c868a..916bb7373 100644 --- a/src/safeds/data/tabular/transformation/_range_scaler.py +++ b/src/safeds/data/tabular/transformation/_range_scaler.py @@ -36,7 +36,7 @@ class RangeScaler(InvertibleTableTransformer): # ------------------------------------------------------------------------------------------------------------------ def __init__(self, min_: float = 0.0, max_: float = 1.0) -> None: - InvertibleTableTransformer.__init__(self) + super().__init__() if min_ >= max_: raise ValueError('Parameter "max_" must be greater than parameter "min_".') @@ -51,7 +51,7 @@ def __init__(self, min_: float = 0.0, max_: float = 1.0) -> None: def __hash__(self) -> int: return _structural_hash( - InvertibleTableTransformer.__hash__(self), + super().__hash__(), self._min, self._max, ) diff --git a/src/safeds/data/tabular/transformation/_simple_imputer.py b/src/safeds/data/tabular/transformation/_simple_imputer.py index a62f67a1e..200174bb6 100644 --- a/src/safeds/data/tabular/transformation/_simple_imputer.py +++ b/src/safeds/data/tabular/transformation/_simple_imputer.py @@ -96,7 +96,7 @@ def mode() -> SimpleImputer.Strategy: # ------------------------------------------------------------------------------------------------------------------ def __init__(self, strategy: SimpleImputer.Strategy, *, value_to_replace: float | str | None = None) -> None: - TableTransformer.__init__(self) + super().__init__() if value_to_replace is None: value_to_replace = pd.NA @@ -108,7 +108,7 @@ def __init__(self, strategy: SimpleImputer.Strategy, *, value_to_replace: float def __hash__(self) -> int: return _structural_hash( - TableTransformer.__hash__(self), + super().__hash__(), self._strategy, self._value_to_replace, ) diff --git a/src/safeds/data/tabular/transformation/_standard_scaler.py b/src/safeds/data/tabular/transformation/_standard_scaler.py index e2c704d73..11871fa99 100644 --- a/src/safeds/data/tabular/transformation/_standard_scaler.py +++ b/src/safeds/data/tabular/transformation/_standard_scaler.py @@ -2,7 +2,6 @@ from typing import TYPE_CHECKING -from safeds._utils import _structural_hash from safeds._validation import _check_columns_exist from safeds._validation._check_columns_are_numeric import _check_columns_are_numeric from safeds.data.tabular.containers import Table @@ -22,16 +21,14 @@ class StandardScaler(InvertibleTableTransformer): # ------------------------------------------------------------------------------------------------------------------ def __init__(self) -> None: - InvertibleTableTransformer.__init__(self) + super().__init__() # Internal state self._data_mean: pl.DataFrame | None = None self._data_standard_deviation: pl.DataFrame | None = None def __hash__(self) -> int: - return _structural_hash( - InvertibleTableTransformer.__hash__(self), - ) + return super().__hash__() # ------------------------------------------------------------------------------------------------------------------ # Learning and transformation From 5db34d4a197ec1de2fa4a2e3fbcd7eeeb2bf0ccd Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 14 May 2024 22:04:27 +0200 Subject: [PATCH 16/21] feat: specify partial order in label encoder --- .../tabular/transformation/_label_encoder.py | 147 ++++++++++-------- .../tabular/transformation/_range_scaler.py | 4 +- .../transformation/_standard_scaler.py | 10 +- .../transformation/test_label_encoder.py | 23 +-- 4 files changed, 91 insertions(+), 93 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_label_encoder.py b/src/safeds/data/tabular/transformation/_label_encoder.py index a3330175c..86bcecedf 100644 --- a/src/safeds/data/tabular/transformation/_label_encoder.py +++ b/src/safeds/data/tabular/transformation/_label_encoder.py @@ -1,32 +1,50 @@ from __future__ import annotations import warnings -from typing import TYPE_CHECKING +from typing import Any +from safeds._utils import _structural_hash from safeds._validation import _check_columns_exist +from safeds._validation._check_columns_are_numeric import _check_columns_are_numeric from safeds.data.tabular.containers import Table from safeds.exceptions import NonNumericColumnError, TransformerNotFittedError from ._invertible_table_transformer import InvertibleTableTransformer -if TYPE_CHECKING: - from sklearn.preprocessing import OrdinalEncoder as sk_OrdinalEncoder - class LabelEncoder(InvertibleTableTransformer): - """The LabelEncoder encodes one or more given columns into labels.""" + """ + The LabelEncoder encodes one or more given columns into labels. + + Parameters + ---------- + partial_order: + The partial order of the labels. The labels are encoded in the order of the given list. Additional values are + encoded as the next integer after the last value in the list in the order they appear in the data. + """ # ------------------------------------------------------------------------------------------------------------------ # Dunder methods # ------------------------------------------------------------------------------------------------------------------ - def __init__(self) -> None: + def __init__(self, *, partial_order: list[Any] | None = None) -> None: super().__init__() - self._wrapped_transformer: sk_OrdinalEncoder | None = None + if partial_order is None: + partial_order = [] + + # Parameters + self._partial_order = partial_order + + # Internal state + self._mapping: dict[str, dict[Any, int]] | None = None + self._inverse_mapping: dict[str, dict[int, Any]] | None = None def __hash__(self) -> int: - return super().__hash__() + return _structural_hash( + super().__hash__(), + self._partial_order, + ) # ------------------------------------------------------------------------------------------------------------------ # Learning and transformation @@ -57,8 +75,6 @@ def fit(self, table: Table, column_names: list[str] | None) -> LabelEncoder: ValueError If the table contains 0 rows. """ - from sklearn.preprocessing import OrdinalEncoder as sk_OrdinalEncoder - if column_names is None: column_names = table.column_names else: @@ -67,27 +83,29 @@ def fit(self, table: Table, column_names: list[str] | None) -> LabelEncoder: if table.number_of_rows == 0: raise ValueError("The LabelEncoder cannot transform the table because it contains 0 rows") - if table.remove_columns_except(column_names).remove_non_numeric_columns().number_of_columns > 0: - warnings.warn( - "The columns" - f" {table.remove_columns_except(column_names).remove_non_numeric_columns().column_names} contain" - " numerical data. The LabelEncoder is designed to encode non-numerical values into numerical values", - UserWarning, - stacklevel=2, - ) - - # TODO: use polars Enum type instead: - # my_enum = pl.Enum(['A', 'B', 'C']) <-- create this from the given order - # my_data = pl.Series(['A', 'A', 'B'], dtype=my_enum) - wrapped_transformer = sk_OrdinalEncoder() - wrapped_transformer.set_output(transform="polars") - wrapped_transformer.fit( - table.remove_columns_except(column_names)._data_frame, - ) + _warn_if_columns_are_numeric(table, column_names) + + # Learn the transformation + mapping = {} + reverse_mapping = {} + + for name in column_names: + # Remember partial order + mapping[name] = {value: index for index, value in enumerate(self._partial_order)} + reverse_mapping[name] = {index: value for value, index in mapping[name].items()} - result = LabelEncoder() - result._wrapped_transformer = wrapped_transformer + unique_values = table.get_column(name).get_distinct_values() + for value in unique_values: + if value not in mapping[name]: + label = len(mapping[name]) + mapping[name][value] = label + reverse_mapping[name][label] = value + + # Create a copy with the learned transformation + result = LabelEncoder(partial_order=self._partial_order) result._column_names = column_names + result._mapping = mapping + result._inverse_mapping = reverse_mapping return result @@ -116,21 +134,21 @@ def transform(self, table: Table) -> Table: ValueError If the table contains 0 rows. """ - # Transformer has not been fitted yet - if self._wrapped_transformer is None or self._column_names is None: + import polars as pl + + # Used in favor of is_fitted, so the type checker is happy + if self._mapping is None: raise TransformerNotFittedError - # Input table does not contain all columns used to fit the transformer _check_columns_exist(table, self._column_names) - if table.number_of_rows == 0: - raise ValueError("The LabelEncoder cannot transform the table because it contains 0 rows") + columns = [ + pl.col(name).replace(self._mapping[name], return_dtype=pl.UInt32) + for name in self._column_names + ] - new_data = self._wrapped_transformer.transform( - table.remove_columns_except(self._column_names)._data_frame, - ) return Table._from_polars_lazy_frame( - table._lazy_frame.update(new_data.lazy()), + table._lazy_frame.with_columns(columns), ) def inverse_transform(self, transformed_table: Table) -> Table: @@ -155,39 +173,38 @@ def inverse_transform(self, transformed_table: Table) -> Table: If the transformer has not been fitted yet. ColumnNotFoundError If the input table does not contain all columns used to fit the transformer. - NonNumericColumnError + ColumnTypeError If the specified columns of the input table contain non-numerical data. - ValueError - If the table contains 0 rows. """ - # Transformer has not been fitted yet - if self._wrapped_transformer is None or self._column_names is None: + import polars as pl + + # Used in favor of is_fitted, so the type checker is happy + if self._inverse_mapping is None: raise TransformerNotFittedError _check_columns_exist(transformed_table, self._column_names) - - if transformed_table.number_of_rows == 0: - raise ValueError("The LabelEncoder cannot inverse transform the table because it contains 0 rows") - - if transformed_table.remove_columns_except( + _check_columns_are_numeric( + transformed_table, self._column_names, - ).remove_non_numeric_columns().number_of_columns < len(self._column_names): - raise NonNumericColumnError( - str( - sorted( - set(self._column_names) - - set( - transformed_table.remove_columns_except(self._column_names) - .remove_non_numeric_columns() - .column_names, - ), - ), - ), - ) - - new_data = self._wrapped_transformer.inverse_transform( - transformed_table.remove_columns_except(self._column_names)._data_frame, + operation="inverse-transform with a LabelEncoder", ) + + columns = [ + pl.col(name).replace(self._inverse_mapping[name]) + for name in self._column_names + ] + return Table._from_polars_lazy_frame( - transformed_table._lazy_frame.update(new_data.lazy()), + transformed_table._lazy_frame.with_columns(columns), + ) + + +def _warn_if_columns_are_numeric(table: Table, column_names: list[str]): + numeric_columns = table.remove_columns_except(column_names).remove_non_numeric_columns().column_names + if numeric_columns: + warnings.warn( + f"The columns {numeric_columns} contain numerical data. " + "The LabelEncoder is designed to encode non-numerical values into numerical values", + UserWarning, + stacklevel=2, ) diff --git a/src/safeds/data/tabular/transformation/_range_scaler.py b/src/safeds/data/tabular/transformation/_range_scaler.py index 916bb7373..5c304ef8a 100644 --- a/src/safeds/data/tabular/transformation/_range_scaler.py +++ b/src/safeds/data/tabular/transformation/_range_scaler.py @@ -196,10 +196,8 @@ def inverse_transform(self, transformed_table: Table) -> Table: If the transformer has not been fitted yet. ColumnNotFoundError If the input table does not contain all columns used to fit the transformer. - NonNumericColumnError + ColumnTypeError If the transformed columns of the input table contain non-numerical data. - ValueError - If the table contains 0 rows. """ import polars as pl diff --git a/src/safeds/data/tabular/transformation/_standard_scaler.py b/src/safeds/data/tabular/transformation/_standard_scaler.py index 11871fa99..582004f92 100644 --- a/src/safeds/data/tabular/transformation/_standard_scaler.py +++ b/src/safeds/data/tabular/transformation/_standard_scaler.py @@ -56,7 +56,7 @@ def fit(self, table: Table, column_names: list[str] | None) -> StandardScaler: ------ ColumnNotFoundError If column_names contain a column name that is missing in the table. - NonNumericColumnError + ColumnTypeError If at least one of the specified columns in the table contains non-numerical data. ValueError If the table contains 0 rows. @@ -108,10 +108,8 @@ def transform(self, table: Table) -> Table: If the transformer has not been fitted yet. ColumnNotFoundError If the input table does not contain all columns used to fit the transformer. - NonNumericColumnError + ColumnTypeError If at least one of the columns in the input table that is used to fit contains non-numerical data. - ValueError - If the table contains 0 rows. """ import polars as pl @@ -153,10 +151,8 @@ def inverse_transform(self, transformed_table: Table) -> Table: If the transformer has not been fitted yet. ColumnNotFoundError If the input table does not contain all columns used to fit the transformer. - NonNumericColumnError + ColumnTypeError If the transformed columns of the input table contain non-numerical data. - ValueError - If the table contains 0 rows. """ import polars as pl diff --git a/tests/safeds/data/tabular/transformation/test_label_encoder.py b/tests/safeds/data/tabular/transformation/test_label_encoder.py index 410f7a8d2..8cdda886f 100644 --- a/tests/safeds/data/tabular/transformation/test_label_encoder.py +++ b/tests/safeds/data/tabular/transformation/test_label_encoder.py @@ -1,7 +1,7 @@ import pytest from safeds.data.tabular.containers import Table from safeds.data.tabular.transformation import LabelEncoder -from safeds.exceptions import ColumnNotFoundError, NonNumericColumnError, TransformerNotFittedError +from safeds.exceptions import ColumnNotFoundError, NonNumericColumnError, TransformerNotFittedError, ColumnTypeError class TestFit: @@ -39,8 +39,9 @@ def test_should_not_change_original_transformer(self) -> None: transformer = LabelEncoder() transformer.fit(table, None) - assert transformer._wrapped_transformer is None assert transformer._column_names is None + assert transformer._mapping is None + assert transformer._inverse_mapping is None class TestTransform: @@ -60,7 +61,7 @@ def test_should_raise_if_column_not_found(self) -> None: }, ) - with pytest.raises(ColumnNotFoundError, match=r"Could not find column\(s\) 'col1, col2'"): + with pytest.raises(ColumnNotFoundError): transformer.transform(table_to_transform) def test_should_raise_if_not_fitted(self) -> None: @@ -75,10 +76,6 @@ def test_should_raise_if_not_fitted(self) -> None: with pytest.raises(TransformerNotFittedError, match=r"The transformer has not been fitted yet."): transformer.transform(table) - def test_should_raise_if_table_contains_no_rows(self) -> None: - with pytest.raises(ValueError, match=r"The LabelEncoder cannot transform the table because it contains 0 rows"): - LabelEncoder().fit(Table({"col1": ["one", "two"]}), ["col1"]).transform(Table({"col1": []})) - class TestIsFitted: def test_should_return_false_before_fitting(self) -> None: @@ -216,18 +213,8 @@ def test_should_raise_if_column_not_found(self) -> None: ).inverse_transform(Table({"col3": [1.0, 0.0]})) def test_should_raise_if_table_contains_non_numerical_data(self) -> None: - with pytest.raises( - NonNumericColumnError, - match=r"Tried to do a numerical operation on one or multiple non-numerical columns: \n\['col1', 'col2'\]", - ): + with pytest.raises(ColumnTypeError): LabelEncoder().fit( Table({"col1": ["one", "two"], "col2": ["three", "four"]}), ["col1", "col2"], ).inverse_transform(Table({"col1": ["1", "null"], "col2": ["2", "apple"]})) - - def test_should_raise_if_table_contains_no_rows(self) -> None: - with pytest.raises( - ValueError, - match=r"The LabelEncoder cannot inverse transform the table because it contains 0 rows", - ): - LabelEncoder().fit(Table({"col1": ["one", "two"]}), ["col1"]).inverse_transform(Table({"col1": []})) From 4c9c8076171f6a3dc141445cc1b252d846038c32 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 14 May 2024 22:05:38 +0200 Subject: [PATCH 17/21] fix: mypy error --- src/safeds/data/tabular/transformation/_label_encoder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_label_encoder.py b/src/safeds/data/tabular/transformation/_label_encoder.py index 86bcecedf..c9d6f68b6 100644 --- a/src/safeds/data/tabular/transformation/_label_encoder.py +++ b/src/safeds/data/tabular/transformation/_label_encoder.py @@ -137,7 +137,7 @@ def transform(self, table: Table) -> Table: import polars as pl # Used in favor of is_fitted, so the type checker is happy - if self._mapping is None: + if self._column_names is None or self._mapping is None: raise TransformerNotFittedError _check_columns_exist(table, self._column_names) @@ -179,7 +179,7 @@ def inverse_transform(self, transformed_table: Table) -> Table: import polars as pl # Used in favor of is_fitted, so the type checker is happy - if self._inverse_mapping is None: + if self._column_names is None or self._inverse_mapping is None: raise TransformerNotFittedError _check_columns_exist(transformed_table, self._column_names) From 51f751941bce3a27839481b0fa371e40cc3dc120 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 14 May 2024 22:08:48 +0200 Subject: [PATCH 18/21] style: remove unused import --- src/safeds/data/tabular/transformation/_label_encoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/transformation/_label_encoder.py b/src/safeds/data/tabular/transformation/_label_encoder.py index c9d6f68b6..8d99687e4 100644 --- a/src/safeds/data/tabular/transformation/_label_encoder.py +++ b/src/safeds/data/tabular/transformation/_label_encoder.py @@ -7,7 +7,7 @@ from safeds._validation import _check_columns_exist from safeds._validation._check_columns_are_numeric import _check_columns_are_numeric from safeds.data.tabular.containers import Table -from safeds.exceptions import NonNumericColumnError, TransformerNotFittedError +from safeds.exceptions import TransformerNotFittedError from ._invertible_table_transformer import InvertibleTableTransformer From 3bf10d29b7d8cd00a7a0f0d140dc6225a311407b Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 14 May 2024 22:09:24 +0200 Subject: [PATCH 19/21] style: format --- .../_validation/_check_columns_are_numeric.py | 6 +- .../containers/_single_size_image_list.py | 7 +- .../tabular/transformation/_label_encoder.py | 10 +-- .../tabular/transformation/_range_scaler.py | 6 +- .../transformation/_standard_scaler.py | 6 +- .../data/image/containers/test_image_list.py | 65 +++++++++++++++---- .../tabular/containers/_table/test_hash.py | 1 - .../transformation/test_label_encoder.py | 2 +- .../transformation/test_simple_imputer.py | 4 +- 9 files changed, 68 insertions(+), 39 deletions(-) diff --git a/src/safeds/_validation/_check_columns_are_numeric.py b/src/safeds/_validation/_check_columns_are_numeric.py index 9123e1178..0dae84220 100644 --- a/src/safeds/_validation/_check_columns_are_numeric.py +++ b/src/safeds/_validation/_check_columns_are_numeric.py @@ -50,12 +50,10 @@ def _check_columns_are_numeric( known_names = table_or_schema.column_names non_numeric_names = [ - name - for name in column_names - if name in known_names and not table_or_schema.get_column_type(name).is_numeric + name for name in column_names if name in known_names and not table_or_schema.get_column_type(name).is_numeric ] if non_numeric_names: - message = _build_error_message( non_numeric_names, operation) + message = _build_error_message(non_numeric_names, operation) raise ColumnTypeError(message) diff --git a/src/safeds/data/image/containers/_single_size_image_list.py b/src/safeds/data/image/containers/_single_size_image_list.py index 1d31d7887..035e5b14a 100644 --- a/src/safeds/data/image/containers/_single_size_image_list.py +++ b/src/safeds/data/image/containers/_single_size_image_list.py @@ -82,7 +82,12 @@ def _create_image_list_from_files( image_list = _SingleSizeImageList() images_tensor = torch.empty( - number_of_images, max_channel, height, width, dtype=torch.uint8, device=_get_device(), + number_of_images, + max_channel, + height, + width, + dtype=torch.uint8, + device=_get_device(), ) thread_packages: list[ImageList._FromFileThreadPackage] = [] diff --git a/src/safeds/data/tabular/transformation/_label_encoder.py b/src/safeds/data/tabular/transformation/_label_encoder.py index 8d99687e4..69ee29f37 100644 --- a/src/safeds/data/tabular/transformation/_label_encoder.py +++ b/src/safeds/data/tabular/transformation/_label_encoder.py @@ -142,10 +142,7 @@ def transform(self, table: Table) -> Table: _check_columns_exist(table, self._column_names) - columns = [ - pl.col(name).replace(self._mapping[name], return_dtype=pl.UInt32) - for name in self._column_names - ] + columns = [pl.col(name).replace(self._mapping[name], return_dtype=pl.UInt32) for name in self._column_names] return Table._from_polars_lazy_frame( table._lazy_frame.with_columns(columns), @@ -189,10 +186,7 @@ def inverse_transform(self, transformed_table: Table) -> Table: operation="inverse-transform with a LabelEncoder", ) - columns = [ - pl.col(name).replace(self._inverse_mapping[name]) - for name in self._column_names - ] + columns = [pl.col(name).replace(self._inverse_mapping[name]) for name in self._column_names] return Table._from_polars_lazy_frame( transformed_table._lazy_frame.with_columns(columns), diff --git a/src/safeds/data/tabular/transformation/_range_scaler.py b/src/safeds/data/tabular/transformation/_range_scaler.py index 5c304ef8a..833da4f7e 100644 --- a/src/safeds/data/tabular/transformation/_range_scaler.py +++ b/src/safeds/data/tabular/transformation/_range_scaler.py @@ -102,11 +102,7 @@ def fit(self, table: Table, column_names: list[str] | None) -> RangeScaler: If the table contains 0 rows. """ if column_names is None: - column_names = [ - name - for name in table.column_names - if table.get_column_type(name).is_numeric - ] + column_names = [name for name in table.column_names if table.get_column_type(name).is_numeric] else: _check_columns_exist(table, column_names) _check_columns_are_numeric(table, column_names, operation="fit a RangeScaler") diff --git a/src/safeds/data/tabular/transformation/_standard_scaler.py b/src/safeds/data/tabular/transformation/_standard_scaler.py index 582004f92..b008baa58 100644 --- a/src/safeds/data/tabular/transformation/_standard_scaler.py +++ b/src/safeds/data/tabular/transformation/_standard_scaler.py @@ -62,11 +62,7 @@ def fit(self, table: Table, column_names: list[str] | None) -> StandardScaler: If the table contains 0 rows. """ if column_names is None: - column_names = [ - name - for name in table.column_names - if table.get_column_type(name).is_numeric - ] + column_names = [name for name in table.column_names if table.get_column_type(name).is_numeric] else: _check_columns_exist(table, column_names) _check_columns_are_numeric(table, column_names, operation="fit a StandardScaler") diff --git a/tests/safeds/data/image/containers/test_image_list.py b/tests/safeds/data/image/containers/test_image_list.py index 4f0a46776..9b27f8f52 100644 --- a/tests/safeds/data/image/containers/test_image_list.py +++ b/tests/safeds/data/image/containers/test_image_list.py @@ -460,7 +460,10 @@ class TestFromFiles: ], ) def test_from_files_creation_return_filenames( - self, resource_path: str | Path, snapshot_png_image_list: SnapshotAssertion, device: Device, + self, + resource_path: str | Path, + snapshot_png_image_list: SnapshotAssertion, + device: Device, ) -> None: configure_test_with_device(device) image_list = ImageList.from_files(resolve_resource_path(resource_path)) @@ -492,7 +495,10 @@ def test_from_files_creation_return_filenames( ], ) def test_from_files_creation_load_percentage( - self, resource_path: str | Path, snapshot_png_image_list: SnapshotAssertion, device: Device, + self, + resource_path: str | Path, + snapshot_png_image_list: SnapshotAssertion, + device: Device, ) -> None: random.seed(420) configure_test_with_device(device) @@ -531,7 +537,10 @@ def test_should_raise_if_one_file_or_directory_not_found(self, resource_path: st [-1.0, 2.0], ) def test_should_raise_if_load_percentage_out_of_bounds( - self, resource_path: str | Path, load_percentage: float, device: Device, + self, + resource_path: str | Path, + load_percentage: float, + device: Device, ) -> None: configure_test_with_device(device) with pytest.raises(OutOfBoundsError): @@ -552,7 +561,10 @@ def test_create_from_single_sized_image_lists_one_image_list(self, device: Devic ids=["all-images"], ) def test_create_from_single_sized_image_lists( - self, resource_path: str | Path, snapshot_png_image_list: SnapshotAssertion, device: Device, + self, + resource_path: str | Path, + snapshot_png_image_list: SnapshotAssertion, + device: Device, ) -> None: configure_test_with_device(device) image_lists = ImageList.from_files(resolve_resource_path(resource_path)) @@ -687,7 +699,9 @@ def test_should_save_images_in_directory(self, resource_path: list[str], device: ids=["all-jpg-images", "jpg-planes", "jpg-grayscale"], ) def test_should_save_images_in_directories_for_different_sizes( - self, resource_path: list[str], device: Device, + self, + resource_path: list[str], + device: Device, ) -> None: configure_test_with_device(device) image_list = ImageList.from_files(resolve_resource_path(resource_path)) @@ -788,7 +802,9 @@ def test_should_save_images_in_directory(self, resource_path: list[str], device: ids=["all-images", "planes", "grayscale"], ) def test_should_save_images_in_directories_for_different_sizes( - self, resource_path: list[str], device: Device, + self, + resource_path: list[str], + device: Device, ) -> None: configure_test_with_device(device) image_list = ImageList.from_files(resolve_resource_path(resource_path)) @@ -842,7 +858,10 @@ class TestShuffleImages: ids=["all-images", "planes"], ) def test_shuffle_images( - self, resource_path: list[str], snapshot_png_image_list: SnapshotAssertion, device: Device, + self, + resource_path: list[str], + snapshot_png_image_list: SnapshotAssertion, + device: Device, ) -> None: configure_test_with_device(device) image_list_original = ImageList.from_files(resolve_resource_path(resource_path)) @@ -1102,7 +1121,11 @@ class TestRemoveImagesWithSize: ids=["invalid width", "invalid height", "invalid width and height"], ) def test_should_raise_negative_size( - self, resource_path: list[str], width: int, height: int, device: Device, + self, + resource_path: list[str], + width: int, + height: int, + device: Device, ) -> None: configure_test_with_device(device) image_list = ImageList.from_files(resolve_resource_path(resource_path)) @@ -1116,7 +1139,11 @@ class TestResize: ids=["invalid width", "invalid height", "invalid width and height"], ) def test_should_raise_new_size( - self, resource_path: list[str], new_width: int, new_height: int, device: Device, + self, + resource_path: list[str], + new_width: int, + new_height: int, + device: Device, ) -> None: configure_test_with_device(device) image_list = ImageList.from_files(resolve_resource_path(resource_path)) @@ -1130,7 +1157,11 @@ class TestCrop: ids=["invalid width", "invalid height", "invalid width and height"], ) def test_should_raise_invalid_size( - self, resource_path: list[str], new_width: int, new_height: int, device: Device, + self, + resource_path: list[str], + new_width: int, + new_height: int, + device: Device, ) -> None: configure_test_with_device(device) image_list = ImageList.from_files(resolve_resource_path(resource_path)) @@ -1143,7 +1174,11 @@ def test_should_raise_invalid_size( ids=["invalid x", "invalid y", "invalid x and y"], ) def test_should_raise_invalid_coordinates( - self, resource_path: list[str], new_x: int, new_y: int, device: Device, + self, + resource_path: list[str], + new_x: int, + new_y: int, + device: Device, ) -> None: configure_test_with_device(device) image_list = ImageList.from_files(resolve_resource_path(resource_path)) @@ -1157,7 +1192,10 @@ class TestAddNoise: ids=["sigma below zero"], ) def test_should_raise_standard_deviation( - self, resource_path: list[str], standard_deviation: float, device: Device, + self, + resource_path: list[str], + standard_deviation: float, + device: Device, ) -> None: configure_test_with_device(device) image_list_original = ImageList.from_files(resolve_resource_path(resource_path)) @@ -1466,7 +1504,8 @@ def test_create_image_list(self, image_list: ImageList, device: Device) -> None: def test_create_image_list_from_files(self, device: Device) -> None: configure_test_with_device(device) assert isinstance( - _SingleSizeImageList()._create_image_list_from_files({}, 0, 4, 1, 1, {}, 5)[0], _EmptyImageList, + _SingleSizeImageList()._create_image_list_from_files({}, 0, 4, 1, 1, {}, 5)[0], + _EmptyImageList, ) def test_create_from_single_sized_image_lists(self, device: Device) -> None: diff --git a/tests/safeds/data/tabular/containers/_table/test_hash.py b/tests/safeds/data/tabular/containers/_table/test_hash.py index 08eefc4d6..e91543403 100644 --- a/tests/safeds/data/tabular/containers/_table/test_hash.py +++ b/tests/safeds/data/tabular/containers/_table/test_hash.py @@ -1,4 +1,3 @@ - import pytest from safeds.data.tabular.containers import Table diff --git a/tests/safeds/data/tabular/transformation/test_label_encoder.py b/tests/safeds/data/tabular/transformation/test_label_encoder.py index 8cdda886f..0388bc394 100644 --- a/tests/safeds/data/tabular/transformation/test_label_encoder.py +++ b/tests/safeds/data/tabular/transformation/test_label_encoder.py @@ -1,7 +1,7 @@ import pytest from safeds.data.tabular.containers import Table from safeds.data.tabular.transformation import LabelEncoder -from safeds.exceptions import ColumnNotFoundError, NonNumericColumnError, TransformerNotFittedError, ColumnTypeError +from safeds.exceptions import ColumnNotFoundError, ColumnTypeError, TransformerNotFittedError class TestFit: diff --git a/tests/safeds/data/tabular/transformation/test_simple_imputer.py b/tests/safeds/data/tabular/transformation/test_simple_imputer.py index 27060d5a2..821018dfe 100644 --- a/tests/safeds/data/tabular/transformation/test_simple_imputer.py +++ b/tests/safeds/data/tabular/transformation/test_simple_imputer.py @@ -135,7 +135,9 @@ class TestStr: ids=lambda x: x.__class__.__name__, ) def test_should_return_correct_string_representation( - self, strategy: SimpleImputer.Strategy, expected: str, + self, + strategy: SimpleImputer.Strategy, + expected: str, ) -> None: assert str(strategy) == expected From cefa66ab586f7732c01201e3c21b55b90d1ad526 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 14 May 2024 22:13:01 +0200 Subject: [PATCH 20/21] fix: mypy error --- src/safeds/data/tabular/transformation/_label_encoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/transformation/_label_encoder.py b/src/safeds/data/tabular/transformation/_label_encoder.py index 69ee29f37..532dfd1d5 100644 --- a/src/safeds/data/tabular/transformation/_label_encoder.py +++ b/src/safeds/data/tabular/transformation/_label_encoder.py @@ -193,7 +193,7 @@ def inverse_transform(self, transformed_table: Table) -> Table: ) -def _warn_if_columns_are_numeric(table: Table, column_names: list[str]): +def _warn_if_columns_are_numeric(table: Table, column_names: list[str]) -> None: numeric_columns = table.remove_columns_except(column_names).remove_non_numeric_columns().column_names if numeric_columns: warnings.warn( From 3fd592b34e7da782253f1df25b8c66f519683ec7 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Tue, 14 May 2024 20:14:41 +0000 Subject: [PATCH 21/21] style: apply automated linter fixes --- .../labeled/containers/test_image_dataset.py | 72 ++++++++++++------- 1 file changed, 45 insertions(+), 27 deletions(-) diff --git a/tests/safeds/data/labeled/containers/test_image_dataset.py b/tests/safeds/data/labeled/containers/test_image_dataset.py index 51052488a..ab0cbe197 100644 --- a/tests/safeds/data/labeled/containers/test_image_dataset.py +++ b/tests/safeds/data/labeled/containers/test_image_dataset.py @@ -135,15 +135,19 @@ def test_should_be_equal(self, image_dataset_output: str | Column | Table, devic configure_test_with_device(device) image_dataset1 = ImageDataset( ImageList.from_files(resolve_resource_path(plane_png_path)), - ImageList.from_files(resolve_resource_path(image_dataset_output)) - if isinstance(image_dataset_output, str) - else image_dataset_output, + ( + ImageList.from_files(resolve_resource_path(image_dataset_output)) + if isinstance(image_dataset_output, str) + else image_dataset_output + ), ) # type: ignore[type-var] image_dataset2 = ImageDataset( ImageList.from_files(resolve_resource_path(plane_png_path)), - ImageList.from_files(resolve_resource_path(image_dataset_output)) - if isinstance(image_dataset_output, str) - else image_dataset_output, + ( + ImageList.from_files(resolve_resource_path(image_dataset_output)) + if isinstance(image_dataset_output, str) + else image_dataset_output + ), ) # type: ignore[type-var] assert image_dataset1 is not image_dataset2 assert image_dataset1 == image_dataset2 @@ -183,15 +187,19 @@ def test_should_not_be_equal( configure_test_with_device(device) image_dataset1 = ImageDataset( ImageList.from_files(resolve_resource_path(plane_png_path)), - ImageList.from_files(resolve_resource_path(image_dataset1_output)) - if isinstance(image_dataset1_output, str) - else image_dataset1_output, + ( + ImageList.from_files(resolve_resource_path(image_dataset1_output)) + if isinstance(image_dataset1_output, str) + else image_dataset1_output + ), ) # type: ignore[type-var] image_dataset2 = ImageDataset( ImageList.from_files(resolve_resource_path(image_dataset2_input)), - ImageList.from_files(resolve_resource_path(image_dataset2_output)) - if isinstance(image_dataset2_output, str) - else image_dataset2_output, + ( + ImageList.from_files(resolve_resource_path(image_dataset2_output)) + if isinstance(image_dataset2_output, str) + else image_dataset2_output + ), ) # type: ignore[type-var] assert image_dataset1 != image_dataset2 assert image_dataset1._input._tensor.device == _get_device() @@ -220,15 +228,19 @@ def test_hash_should_be_equal(self, image_dataset_output: str | Column | Table, configure_test_with_device(device) image_dataset1 = ImageDataset( ImageList.from_files(resolve_resource_path(plane_png_path)), - ImageList.from_files(resolve_resource_path(image_dataset_output)) - if isinstance(image_dataset_output, str) - else image_dataset_output, + ( + ImageList.from_files(resolve_resource_path(image_dataset_output)) + if isinstance(image_dataset_output, str) + else image_dataset_output + ), ) # type: ignore[type-var] image_dataset2 = ImageDataset( ImageList.from_files(resolve_resource_path(plane_png_path)), - ImageList.from_files(resolve_resource_path(image_dataset_output)) - if isinstance(image_dataset_output, str) - else image_dataset_output, + ( + ImageList.from_files(resolve_resource_path(image_dataset_output)) + if isinstance(image_dataset_output, str) + else image_dataset_output + ), ) # type: ignore[type-var] assert image_dataset1 is not image_dataset2 assert hash(image_dataset1) == hash(image_dataset2) @@ -268,15 +280,19 @@ def test_hash_should_not_be_equal( configure_test_with_device(device) image_dataset1 = ImageDataset( ImageList.from_files(resolve_resource_path(plane_png_path)), - ImageList.from_files(resolve_resource_path(image_dataset1_output)) - if isinstance(image_dataset1_output, str) - else image_dataset1_output, + ( + ImageList.from_files(resolve_resource_path(image_dataset1_output)) + if isinstance(image_dataset1_output, str) + else image_dataset1_output + ), ) # type: ignore[type-var] image_dataset2 = ImageDataset( ImageList.from_files(resolve_resource_path(image_dataset2_input)), - ImageList.from_files(resolve_resource_path(image_dataset2_output)) - if isinstance(image_dataset2_output, str) - else image_dataset2_output, + ( + ImageList.from_files(resolve_resource_path(image_dataset2_output)) + if isinstance(image_dataset2_output, str) + else image_dataset2_output + ), ) # type: ignore[type-var] assert hash(image_dataset1) != hash(image_dataset2) assert image_dataset1._input._tensor.device == _get_device() @@ -303,9 +319,11 @@ def test_should_size_be_greater_than_normal_object( configure_test_with_device(device) image_dataset = ImageDataset( ImageList.from_files(resolve_resource_path(plane_png_path)), - ImageList.from_files(resolve_resource_path(image_dataset_output)) - if isinstance(image_dataset_output, str) - else image_dataset_output, + ( + ImageList.from_files(resolve_resource_path(image_dataset_output)) + if isinstance(image_dataset_output, str) + else image_dataset_output + ), ) # type: ignore[type-var] assert sys.getsizeof(image_dataset) > sys.getsizeof(object()) assert image_dataset._input._tensor.device == _get_device()