From 4cca28c5f2f0ecf8d9e2f40000bf368f730ccc29 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Wed, 29 May 2024 13:08:58 +0200 Subject: [PATCH] fix: make NNs pickleable --- src/safeds/ml/nn/_internal_model.py | 60 ++++++++ src/safeds/ml/nn/_model.py | 60 +------- .../ml/nn/layers/_convolutional2d_layer.py | 79 ++--------- src/safeds/ml/nn/layers/_flatten_layer.py | 23 +--- src/safeds/ml/nn/layers/_forward_layer.py | 34 +---- src/safeds/ml/nn/layers/_internal_layers.py | 130 ++++++++++++++++++ src/safeds/ml/nn/layers/_lstm_layer.py | 34 +---- src/safeds/ml/nn/layers/_pooling2d_layer.py | 27 +--- tests/safeds/ml/nn/test_model.py | 42 ++++++ 9 files changed, 263 insertions(+), 226 deletions(-) create mode 100644 src/safeds/ml/nn/_internal_model.py create mode 100644 src/safeds/ml/nn/layers/_internal_layers.py diff --git a/src/safeds/ml/nn/_internal_model.py b/src/safeds/ml/nn/_internal_model.py new file mode 100644 index 000000000..cb1105503 --- /dev/null +++ b/src/safeds/ml/nn/_internal_model.py @@ -0,0 +1,60 @@ +# The class must not be nested inside a function, since pickle cannot serialize local classes. Because of this, the +# slow import of torch must be on the global level. To still evaluate the torch import lazily, the class is moved to a +# separate file. + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from torch import Tensor, nn # slow import + +from safeds._config import _init_default_device +from safeds.ml.nn.converters._input_converter_image import _InputConversionImage +from safeds.ml.nn.layers import FlattenLayer, Layer +from safeds.ml.nn.layers._pooling2d_layer import _Pooling2DLayer + +if TYPE_CHECKING: + from safeds.ml.nn.converters import InputConversion + from safeds.ml.nn.typing import ModelImageSize + + +# Use torch.compile once the following issues are resolved: +# - https://github.com/pytorch/pytorch/issues/120233 (Python 3.12 support) +# - https://github.com/triton-lang/triton/issues/1640 (Windows support) +class _InternalModel(nn.Module): + def __init__(self, input_conversion: InputConversion, layers: list[Layer], is_for_classification: bool) -> None: + super().__init__() + + _init_default_device() + + self._layer_list = layers + internal_layers = [] + previous_output_size = input_conversion._data_size + + for layer in layers: + if previous_output_size is not None: + layer._set_input_size(previous_output_size) + elif isinstance(input_conversion, _InputConversionImage): + layer._set_input_size(input_conversion._data_size) + if isinstance(layer, FlattenLayer | _Pooling2DLayer): + internal_layers.append(layer._get_internal_layer()) + else: + internal_layers.append(layer._get_internal_layer(activation_function="relu")) + previous_output_size = layer.output_size + + if is_for_classification: + internal_layers.pop() + if isinstance(layers[-1].output_size, int) and layers[-1].output_size > 2: + internal_layers.append(layers[-1]._get_internal_layer(activation_function="none")) + else: + internal_layers.append(layers[-1]._get_internal_layer(activation_function="sigmoid")) + self._pytorch_layers = nn.Sequential(*internal_layers) + + @property + def input_size(self) -> int | ModelImageSize: + return self._layer_list[0].input_size + + def forward(self, x: Tensor) -> Tensor: + for layer in self._pytorch_layers: + x = layer(x) + return x diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 95c308aab..710c4df62 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -32,7 +32,7 @@ if TYPE_CHECKING: from collections.abc import Callable - from torch import Tensor, nn + from torch import nn from torch.nn import Module from transformers.image_processing_utils import BaseImageProcessor @@ -206,6 +206,8 @@ def fit( import torch from torch import nn + from ._internal_model import _InternalModel # Slow import on global level + _init_default_device() if not self._input_conversion._is_fit_data_valid(train_data): @@ -216,7 +218,7 @@ def fit( copied_model = copy.deepcopy(self) # TODO: How is this supposed to work with pre-trained models? Should the old weights be kept or discarded? - copied_model._model = _create_internal_model(self._input_conversion, self._layers, is_for_classification=False) + copied_model._model = _InternalModel(self._input_conversion, self._layers, is_for_classification=False) copied_model._input_size = copied_model._model.input_size copied_model._batch_size = batch_size @@ -501,6 +503,8 @@ def fit( import torch from torch import nn + from ._internal_model import _InternalModel # Slow import on global level + _init_default_device() if not self._input_conversion._is_fit_data_valid(train_data): @@ -511,7 +515,7 @@ def fit( copied_model = copy.deepcopy(self) # TODO: How is this supposed to work with pre-trained models? Should the old weights be kept or discarded? - copied_model._model = _create_internal_model(self._input_conversion, self._layers, is_for_classification=True) + copied_model._model = _InternalModel(self._input_conversion, self._layers, is_for_classification=True) copied_model._batch_size = batch_size copied_model._input_size = copied_model._model.input_size @@ -617,53 +621,3 @@ def input_size(self) -> int | ModelImageSize | None: """The input size of the model.""" # TODO: raise if not fitted, don't return None return self._input_size - - -def _create_internal_model( - input_conversion: InputConversion[IFT, IPT], - layers: list[Layer], - is_for_classification: bool, -) -> nn.Module: - from torch import nn - - _init_default_device() - - class _InternalModel(nn.Module): - def __init__(self, layers: list[Layer], is_for_classification: bool) -> None: - super().__init__() - self._layer_list = layers - internal_layers = [] - previous_output_size = input_conversion._data_size - - for layer in layers: - if previous_output_size is not None: - layer._set_input_size(previous_output_size) - elif isinstance(input_conversion, _InputConversionImage): - layer._set_input_size(input_conversion._data_size) - if isinstance(layer, FlattenLayer | _Pooling2DLayer): - internal_layers.append(layer._get_internal_layer()) - else: - internal_layers.append(layer._get_internal_layer(activation_function="relu")) - previous_output_size = layer.output_size - - if is_for_classification: - internal_layers.pop() - if isinstance(layers[-1].output_size, int) and layers[-1].output_size > 2: - internal_layers.append(layers[-1]._get_internal_layer(activation_function="none")) - else: - internal_layers.append(layers[-1]._get_internal_layer(activation_function="sigmoid")) - self._pytorch_layers = nn.Sequential(*internal_layers) - - @property - def input_size(self) -> int | ModelImageSize: - return self._layer_list[0].input_size - - def forward(self, x: Tensor) -> Tensor: - for layer in self._pytorch_layers: - x = layer(x) - return x - - # Use torch.compile once the following issues are resolved: - # - https://github.com/pytorch/pytorch/issues/120233 (Python 3.12 support) - # - https://github.com/triton-lang/triton/issues/1640 (Windows support) - return _InternalModel(layers, is_for_classification) diff --git a/src/safeds/ml/nn/layers/_convolutional2d_layer.py b/src/safeds/ml/nn/layers/_convolutional2d_layer.py index 92879f15e..dd42f2d97 100644 --- a/src/safeds/ml/nn/layers/_convolutional2d_layer.py +++ b/src/safeds/ml/nn/layers/_convolutional2d_layer.py @@ -4,82 +4,14 @@ import sys from typing import TYPE_CHECKING, Any, Literal -from safeds._config import _init_default_device from safeds._utils import _structural_hash from ._layer import Layer if TYPE_CHECKING: - from torch import Tensor, nn - - from safeds.ml.nn.typing import ModelImageSize - - -def _create_internal_model( - input_size: int, - output_size: int, - kernel_size: int, - activation_function: Literal["sigmoid", "relu", "softmax"], - padding: int, - stride: int, - transpose: bool, - output_padding: int = 0, -) -> nn.Module: from torch import nn - _init_default_device() - - class _InternalLayer(nn.Module): - def __init__( - self, - input_size: int, - output_size: int, - kernel_size: int, - activation_function: Literal["sigmoid", "relu", "softmax"], - padding: int, - stride: int, - transpose: bool, - output_padding: int, - ): - super().__init__() - if transpose: - self._layer = nn.ConvTranspose2d( - in_channels=input_size, - out_channels=output_size, - kernel_size=kernel_size, - padding=padding, - stride=stride, - output_padding=output_padding, - ) - else: - self._layer = nn.Conv2d( - in_channels=input_size, - out_channels=output_size, - kernel_size=kernel_size, - padding=padding, - stride=stride, - ) - match activation_function: - case "sigmoid": - self._fn = nn.Sigmoid() - case "relu": - self._fn = nn.ReLU() - case "softmax": - self._fn = nn.Softmax() - - def forward(self, x: Tensor) -> Tensor: - return self._fn(self._layer(x)) - - return _InternalLayer( - input_size, - output_size, - kernel_size, - activation_function, - padding, - stride, - transpose, - output_padding, - ) + from safeds.ml.nn.typing import ModelImageSize class Convolutional2DLayer(Layer): @@ -107,6 +39,8 @@ def __init__(self, output_channel: int, kernel_size: int, *, stride: int = 1, pa self._output_size: ModelImageSize | None = None def _get_internal_layer(self, **kwargs: Any) -> nn.Module: + from ._internal_layers import _InternalConvolutional2DLayer # slow import on global level + if self._input_size is None: raise ValueError( "The input_size is not yet set. The internal layer can only be created when the input_size is set.", @@ -121,7 +55,7 @@ def _get_internal_layer(self, **kwargs: Any) -> nn.Module: ) else: activation_function: Literal["sigmoid", "relu", "softmax"] = kwargs["activation_function"] - return _create_internal_model( + return _InternalConvolutional2DLayer( self._input_size.channel, self._output_channel, self._kernel_size, @@ -129,6 +63,7 @@ def _get_internal_layer(self, **kwargs: Any) -> nn.Module: self._padding, self._stride, transpose=False, + output_padding=0, ) @property @@ -254,6 +189,8 @@ def __init__( self._output_padding = output_padding def _get_internal_layer(self, **kwargs: Any) -> nn.Module: + from ._internal_layers import _InternalConvolutional2DLayer # slow import on global level + if self._input_size is None: raise ValueError( "The input_size is not yet set. The internal layer can only be created when the input_size is set.", @@ -268,7 +205,7 @@ def _get_internal_layer(self, **kwargs: Any) -> nn.Module: ) else: activation_function: Literal["sigmoid", "relu", "softmax"] = kwargs["activation_function"] - return _create_internal_model( + return _InternalConvolutional2DLayer( self._input_size.channel, self._output_channel, self._kernel_size, diff --git a/src/safeds/ml/nn/layers/_flatten_layer.py b/src/safeds/ml/nn/layers/_flatten_layer.py index cacd97bd6..a84551c2b 100644 --- a/src/safeds/ml/nn/layers/_flatten_layer.py +++ b/src/safeds/ml/nn/layers/_flatten_layer.py @@ -3,32 +3,15 @@ import sys from typing import TYPE_CHECKING, Any -from safeds._config import _init_default_device from safeds._utils import _structural_hash from safeds.ml.nn.typing import ConstantImageSize from ._layer import Layer if TYPE_CHECKING: - from torch import Tensor, nn - - from safeds.ml.nn.typing import ModelImageSize - - -def _create_internal_model() -> nn.Module: from torch import nn - _init_default_device() - - class _InternalLayer(nn.Module): - def __init__(self) -> None: - super().__init__() - self._layer = nn.Flatten() - - def forward(self, x: Tensor) -> Tensor: - return self._layer(x) - - return _InternalLayer() + from safeds.ml.nn.typing import ModelImageSize class FlattenLayer(Layer): @@ -39,7 +22,9 @@ def __init__(self) -> None: self._output_size: int | None = None def _get_internal_layer(self, **kwargs: Any) -> nn.Module: # noqa: ARG002 - return _create_internal_model() + from ._internal_layers import _InternalFlattenLayer # Slow import on global level + + return _InternalFlattenLayer() @property def input_size(self) -> ModelImageSize: diff --git a/src/safeds/ml/nn/layers/_forward_layer.py b/src/safeds/ml/nn/layers/_forward_layer.py index e083e56b8..e420b78ec 100644 --- a/src/safeds/ml/nn/layers/_forward_layer.py +++ b/src/safeds/ml/nn/layers/_forward_layer.py @@ -2,7 +2,6 @@ from typing import TYPE_CHECKING, Any -from safeds._config import _init_default_device from safeds._utils import _structural_hash from safeds._validation import _check_bounds, _ClosedBound from safeds.ml.nn.typing import ModelImageSize @@ -10,7 +9,7 @@ from ._layer import Layer if TYPE_CHECKING: - from torch import Tensor, nn + from torch import nn class ForwardLayer(Layer): @@ -36,6 +35,8 @@ def __init__(self, neuron_count: int): self._output_size = neuron_count def _get_internal_layer(self, **kwargs: Any) -> nn.Module: + from ._internal_layers import _InternalForwardLayer # Slow import on global level + if "activation_function" not in kwargs: raise ValueError( "The activation_function is not set. The internal layer can only be created when the activation_function is provided in the kwargs.", @@ -46,7 +47,7 @@ def _get_internal_layer(self, **kwargs: Any) -> nn.Module: if self._input_size is None: raise ValueError("The input_size is not yet set.") - return _create_internal_model(self._input_size, self._output_size, activation_function) + return _InternalForwardLayer(self._input_size, self._output_size, activation_function) @property def input_size(self) -> int: @@ -95,30 +96,3 @@ def __sizeof__(self) -> int: import sys return sys.getsizeof(self._input_size) + sys.getsizeof(self._output_size) - - -def _create_internal_model(input_size: int, output_size: int, activation_function: str) -> nn.Module: - from torch import nn - - _init_default_device() - - class _InternalLayer(nn.Module): - def __init__(self, input_size: int, output_size: int, activation_function: str): - super().__init__() - self._layer = nn.Linear(input_size, output_size) - match activation_function: - case "sigmoid": - self._fn = nn.Sigmoid() - case "relu": - self._fn = nn.ReLU() - case "softmax": - self._fn = nn.Softmax() - case "none": - self._fn = None - case _: - raise ValueError("Unknown Activation Function: " + activation_function) - - def forward(self, x: Tensor) -> Tensor: - return self._fn(self._layer(x)) if self._fn is not None else self._layer(x) - - return _InternalLayer(input_size, output_size, activation_function) diff --git a/src/safeds/ml/nn/layers/_internal_layers.py b/src/safeds/ml/nn/layers/_internal_layers.py new file mode 100644 index 000000000..140be6807 --- /dev/null +++ b/src/safeds/ml/nn/layers/_internal_layers.py @@ -0,0 +1,130 @@ +# These class must not be nested inside a function, since pickle cannot serialize local classes. Because of this, the +# slow import of torch must be on the global level. To still evaluate the torch import lazily, the classes are moved to +# a separate file. + +from __future__ import annotations + +from typing import Literal + +from torch import Tensor, nn # slow import + +from safeds._config import _init_default_device + + +class _InternalConvolutional2DLayer(nn.Module): + def __init__( + self, + input_size: int, + output_size: int, + kernel_size: int, + activation_function: Literal["sigmoid", "relu", "softmax"], + padding: int, + stride: int, + transpose: bool, + output_padding: int, + ): + super().__init__() + + _init_default_device() + + if transpose: + self._layer = nn.ConvTranspose2d( + in_channels=input_size, + out_channels=output_size, + kernel_size=kernel_size, + padding=padding, + stride=stride, + output_padding=output_padding, + ) + else: + self._layer = nn.Conv2d( + in_channels=input_size, + out_channels=output_size, + kernel_size=kernel_size, + padding=padding, + stride=stride, + ) + match activation_function: + case "sigmoid": + self._fn = nn.Sigmoid() + case "relu": + self._fn = nn.ReLU() + case "softmax": + self._fn = nn.Softmax() + + def forward(self, x: Tensor) -> Tensor: + return self._fn(self._layer(x)) + + +class _InternalFlattenLayer(nn.Module): + def __init__(self) -> None: + super().__init__() + + _init_default_device() + + self._layer = nn.Flatten() + + def forward(self, x: Tensor) -> Tensor: + return self._layer(x) + + +class _InternalForwardLayer(nn.Module): + def __init__(self, input_size: int, output_size: int, activation_function: str): + super().__init__() + + _init_default_device() + + self._layer = nn.Linear(input_size, output_size) + match activation_function: + case "sigmoid": + self._fn = nn.Sigmoid() + case "relu": + self._fn = nn.ReLU() + case "softmax": + self._fn = nn.Softmax() + case "none": + self._fn = None + case _: + raise ValueError("Unknown Activation Function: " + activation_function) + + def forward(self, x: Tensor) -> Tensor: + return self._fn(self._layer(x)) if self._fn is not None else self._layer(x) + + +class _InternalLSTMLayer(nn.Module): + def __init__(self, input_size: int, output_size: int, activation_function: str): + super().__init__() + + _init_default_device() + + self._layer = nn.LSTM(input_size, output_size) + match activation_function: + case "sigmoid": + self._fn = nn.Sigmoid() + case "relu": + self._fn = nn.ReLU() + case "softmax": + self._fn = nn.Softmax() + case "none": + self._fn = None + case _: + raise ValueError("Unknown Activation Function: " + activation_function) + + def forward(self, x: Tensor) -> Tensor: + return self._fn(self._layer(x)[0]) if self._fn is not None else self._layer(x)[0] + + +class _InternalPooling2DLayer(nn.Module): + def __init__(self, strategy: Literal["max", "avg"], kernel_size: int, padding: int, stride: int): + super().__init__() + + _init_default_device() + + match strategy: + case "max": + self._layer = nn.MaxPool2d(kernel_size=kernel_size, padding=padding, stride=stride) + case "avg": + self._layer = nn.AvgPool2d(kernel_size=kernel_size, padding=padding, stride=stride) + + def forward(self, x: Tensor) -> Tensor: + return self._layer(x) diff --git a/src/safeds/ml/nn/layers/_lstm_layer.py b/src/safeds/ml/nn/layers/_lstm_layer.py index d4de325e3..330809474 100644 --- a/src/safeds/ml/nn/layers/_lstm_layer.py +++ b/src/safeds/ml/nn/layers/_lstm_layer.py @@ -3,7 +3,6 @@ import sys from typing import TYPE_CHECKING, Any -from safeds._config import _init_default_device from safeds._utils import _structural_hash from safeds._validation import _check_bounds, _ClosedBound from safeds.ml.nn.typing import ModelImageSize @@ -11,7 +10,7 @@ from ._layer import Layer if TYPE_CHECKING: - from torch import Tensor, nn + from torch import nn class LSTMLayer(Layer): @@ -37,6 +36,8 @@ def __init__(self, neuron_count: int): self._output_size = neuron_count def _get_internal_layer(self, **kwargs: Any) -> nn.Module: + from ._internal_layers import _InternalLSTMLayer # Slow import on global level + if "activation_function" not in kwargs: raise ValueError( "The activation_function is not set. The internal layer can only be created when the activation_function is provided in the kwargs.", @@ -47,7 +48,7 @@ def _get_internal_layer(self, **kwargs: Any) -> nn.Module: if self._input_size is None: raise ValueError("The input_size is not yet set.") - return _create_internal_model(self._input_size, self._output_size, activation_function) + return _InternalLSTMLayer(self._input_size, self._output_size, activation_function) @property def input_size(self) -> int: @@ -94,30 +95,3 @@ def __eq__(self, other: object) -> bool: def __sizeof__(self) -> int: return sys.getsizeof(self._input_size) + sys.getsizeof(self._output_size) - - -def _create_internal_model(input_size: int, output_size: int, activation_function: str) -> nn.Module: - from torch import nn - - _init_default_device() - - class _InternalLayer(nn.Module): - def __init__(self, input_size: int, output_size: int, activation_function: str): - super().__init__() - self._layer = nn.LSTM(input_size, output_size) - match activation_function: - case "sigmoid": - self._fn = nn.Sigmoid() - case "relu": - self._fn = nn.ReLU() - case "softmax": - self._fn = nn.Softmax() - case "none": - self._fn = None - case _: - raise ValueError("Unknown Activation Function: " + activation_function) - - def forward(self, x: Tensor) -> Tensor: - return self._fn(self._layer(x)[0]) if self._fn is not None else self._layer(x)[0] - - return _InternalLayer(input_size, output_size, activation_function) diff --git a/src/safeds/ml/nn/layers/_pooling2d_layer.py b/src/safeds/ml/nn/layers/_pooling2d_layer.py index b3c17b1f5..d658ed848 100644 --- a/src/safeds/ml/nn/layers/_pooling2d_layer.py +++ b/src/safeds/ml/nn/layers/_pooling2d_layer.py @@ -4,13 +4,12 @@ import sys from typing import TYPE_CHECKING, Any, Literal -from safeds._config import _init_default_device from safeds._utils import _structural_hash from ._layer import Layer if TYPE_CHECKING: - from torch import Tensor, nn + from torch import nn from safeds.ml.nn.typing import ModelImageSize @@ -40,7 +39,9 @@ def __init__(self, strategy: Literal["max", "avg"], kernel_size: int, *, stride: self._output_size: ModelImageSize | None = None def _get_internal_layer(self, **kwargs: Any) -> nn.Module: # noqa: ARG002 - return _create_internal_model(self._strategy, self._kernel_size, self._padding, self._stride) + from ._internal_layers import _InternalPooling2DLayer # Slow import on global level + + return _InternalPooling2DLayer(self._strategy, self._kernel_size, self._padding, self._stride) @property def input_size(self) -> ModelImageSize: @@ -168,23 +169,3 @@ class AveragePooling2DLayer(_Pooling2DLayer): def __init__(self, kernel_size: int, *, stride: int = -1, padding: int = 0) -> None: super().__init__("avg", kernel_size, stride=stride, padding=padding) - - -def _create_internal_model(strategy: Literal["max", "avg"], kernel_size: int, padding: int, stride: int) -> nn.Module: - from torch import nn - - _init_default_device() - - class _InternalLayer(nn.Module): - def __init__(self, strategy: Literal["max", "avg"], kernel_size: int, padding: int, stride: int): - super().__init__() - match strategy: - case "max": - self._layer = nn.MaxPool2d(kernel_size=kernel_size, padding=padding, stride=stride) - case "avg": - self._layer = nn.AvgPool2d(kernel_size=kernel_size, padding=padding, stride=stride) - - def forward(self, x: Tensor) -> Tensor: - return self._layer(x) - - return _InternalLayer(strategy, kernel_size, padding, stride) diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py index df6261e30..5b8022a2c 100644 --- a/tests/safeds/ml/nn/test_model.py +++ b/tests/safeds/ml/nn/test_model.py @@ -1,3 +1,5 @@ +import pickle + import pytest from safeds.data.image.typing import ImageSize from safeds.data.labeled.containers import TabularDataset @@ -442,6 +444,26 @@ def test_should_raise_if_model_has_invalid_structure( with pytest.raises(InvalidModelStructureError, match=error_msg): NeuralNetworkClassifier(input_conversion, layers) + def test_should_be_pickleable(self, device: Device) -> None: + configure_test_with_device(device) + model = NeuralNetworkClassifier( + InputConversionTable(), + [ + ForwardLayer(1), + ], + ) + fitted_model = model.fit( + Table( + { + "a": [0], + "b": [0], + }, + ).to_tabular_dataset("a"), + ) + + # Should not raise + pickle.dumps(fitted_model) + @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestRegressionModel: @@ -739,3 +761,23 @@ def test_should_raise_if_model_has_invalid_structure( configure_test_with_device(device) with pytest.raises(InvalidModelStructureError, match=error_msg): NeuralNetworkRegressor(input_conversion, layers) + + def test_should_be_pickleable(self, device: Device) -> None: + configure_test_with_device(device) + model = NeuralNetworkRegressor( + InputConversionTable(), + [ + ForwardLayer(1), + ], + ) + fitted_model = model.fit( + Table( + { + "a": [0], + "b": [0], + }, + ).to_tabular_dataset("a"), + ) + + # Should not raise + pickle.dumps(fitted_model)