tinkoff-ai · alex-hse-repository · Dec 22, 2021 · Dec 21, 2021 · Dec 21, 2021 · Dec 21, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Catboost version too high ([#394](https://github.com/tinkoff-ai/etna/pull/394))
 - Add sorting of classes in left bar in docs ([#397](https://github.com/tinkoff-ai/etna/pull/397))
 - nn notebook in docs ([#396](https://github.com/tinkoff-ai/etna/pull/396))
+- SklearnTransform column name generation ([#398](https://github.com/tinkoff-ai/etna/pull/398))
 
 ## [1.4.2] - 2021-12-09
 ### Fixed

diff --git a/etna/core/__init__.py b/etna/core/__init__.py
@@ -1 +1,2 @@
 from etna.core.mixins import BaseMixin
+from etna.core.mixins import StringEnumWithRepr
diff --git a/etna/core/mixins.py b/etna/core/mixins.py
@@ -1,5 +1,6 @@
 import inspect
 import warnings
+from enum import Enum
 
 
 class BaseMixin:
@@ -24,3 +25,11 @@ def __repr__(self):
                     warnings.warn(f"You haven't set all parameters inside class __init__ method: {e}")
                 args_str_representation += f"{arg} = {value.__repr__()}, "
         return f"{self.__class__.__name__}({args_str_representation})"
+
+
+class StringEnumWithRepr(str, Enum):
+    """Base class for str enums, that has alternative __repr__ method."""
+
+    def __repr__(self):
+        """Get string representation for enum string so that enum can be created from it."""
+        return self.value.__repr__()
diff --git a/etna/transforms/power.py b/etna/transforms/power.py
@@ -1,4 +1,3 @@
-import warnings
 from typing import List
 from typing import Optional
 from typing import Union
@@ -26,27 +25,26 @@ def __init__(
         Parameters
         ----------
         in_column:
-            name of processed column
+            columns to be transformed, if None - all columns will be transformed.
         inplace:
             if True, apply transformation inplace to in_column,
             if False, add column to dataset.
         out_column:
-            name of added column. Use self.__repr__() if not given
+            base for the names of generated columns, uses self.__repr__() if not given.
         standardize:
             Set to True to apply zero-mean, unit-variance normalization to the
             transformed output.
+
+        Raises
+        ------
+        ValueError:
+            if incorrect mode given
         """
-        if inplace and (out_column is not None):
-            warnings.warn("Transformation will be applied inplace, out_column param will be ignored")
         self.standardize = standardize
-        self.inplace = inplace
-        self.out_column = out_column
-        self.mode = TransformMode(mode)
-        self.in_column = [in_column] if isinstance(in_column, str) else in_column
         super().__init__(
             in_column=in_column,
             inplace=inplace,
-            out_column=self.out_column if self.out_column is not None else self.__repr__(),
+            out_column=out_column,
             transformer=PowerTransformer(method="yeo-johnson", standardize=self.standardize),
             mode=mode,
         )
@@ -69,27 +67,26 @@ def __init__(
         Parameters
         ----------
         in_column:
-            name of processed column
+            columns to be transformed, if None - all columns will be transformed.
         inplace:
             if True, apply transformation inplace to in_column,
             if False, add column to dataset.
         out_column:
-            name of added column. Use self.__repr__() if not given.
+            base for the names of generated columns, uses self.__repr__() if not given.
         standardize:
             Set to True to apply zero-mean, unit-variance normalization to the
             transformed output.
+
+        Raises
+        ------
+        ValueError:
+            if incorrect mode given
         """
-        if inplace and (out_column is not None):
-            warnings.warn("Transformation will be applied inplace, out_column param will be ignored")
         self.standardize = standardize
-        self.in_column = [in_column] if isinstance(in_column, str) else in_column
-        self.inplace = inplace
-        self.out_column = out_column
-        self.mode = TransformMode(mode)
         super().__init__(
             in_column=in_column,
             inplace=inplace,
-            out_column=self.out_column if self.out_column is not None else self.__repr__(),
+            out_column=out_column,
             transformer=PowerTransformer(method="box-cox", standardize=self.standardize),
             mode=mode,
         )

diff --git a/etna/transforms/scalers.py b/etna/transforms/scalers.py
@@ -1,4 +1,3 @@
-import warnings
 from typing import List
 from typing import Optional
 from typing import Tuple
@@ -38,7 +37,7 @@ def __init__(
         inplace:
             features are changed by scaled.
         out_column:
-            name of added column. Use self.__repr__() if not given.
+            base for the names of generated columns, uses self.__repr__() if not given.
         with_mean:
             if True, center the data before scaling.
         with_std:
@@ -53,18 +52,12 @@ def __init__(
         ValueError:
             if incorrect mode given
         """
-        if inplace and (out_column is not None):
-            warnings.warn("Transformation will be applied inplace, out_column param will be ignored")
-        self.in_column = [in_column] if isinstance(in_column, str) else in_column
-        self.inplace = inplace
-        self.mode = TransformMode(mode)
         self.with_mean = with_mean
         self.with_std = with_std
-        self.out_column = out_column
         super().__init__(
             in_column=in_column,
-            transformer=StandardScaler(with_mean=with_mean, with_std=with_std, copy=True),
-            out_column=self.out_column if self.out_column is not None else self.__repr__(),
+            transformer=StandardScaler(with_mean=self.with_mean, with_std=self.with_std, copy=True),
+            out_column=out_column,
             inplace=inplace,
             mode=mode,
         )
@@ -97,7 +90,7 @@ def __init__(
         inplace:
             features are changed by scaled.
         out_column:
-            name of added column. Use self.__repr__() if not given.
+            base for the names of generated columns, uses self.__repr__() if not given.
         with_centering:
             if True, center the data before scaling.
         with_scaling:
@@ -119,25 +112,19 @@ def __init__(
         ValueError:
             if incorrect mode given
         """
-        if inplace and (out_column is not None):
-            warnings.warn("Transformation will be applied inplace, out_column param will be ignored")
-        self.in_column = [in_column] if isinstance(in_column, str) else in_column
-        self.out_column = out_column
-        self.inplace = inplace
-        self.mode = TransformMode(mode)
         self.with_centering = with_centering
         self.with_scaling = with_scaling
         self.quantile_range = quantile_range
         self.unit_variance = unit_variance
         super().__init__(
             in_column=in_column,
             inplace=inplace,
-            out_column=self.out_column if self.out_column is not None else self.__repr__(),
+            out_column=out_column,
             transformer=RobustScaler(
-                with_centering=with_centering,
-                with_scaling=with_scaling,
-                quantile_range=quantile_range,
-                unit_variance=unit_variance,
+                with_centering=self.with_centering,
+                with_scaling=self.with_scaling,
+                quantile_range=self.quantile_range,
+                unit_variance=self.unit_variance,
                 copy=True,
             ),
             mode=mode,
@@ -169,7 +156,7 @@ def __init__(
         inplace:
             features are changed by scaled.
         out_column:
-            name of added column. Use self.__repr__() if not given.
+            base for the names of generated columns, uses self.__repr__() if not given.
         feature_range:
             desired range of transformed data.
         clip:
@@ -184,19 +171,13 @@ def __init__(
         ValueError:
             if incorrect mode given
         """
-        if inplace and (out_column is not None):
-            warnings.warn("Transformation will be applied inplace, out_column param will be ignored")
-        self.in_column = [in_column] if isinstance(in_column, str) else in_column
-        self.out_column = out_column
-        self.inplace = inplace
-        self.mode = TransformMode(mode)
         self.feature_range = feature_range
         self.clip = clip
         super().__init__(
             in_column=in_column,
             inplace=inplace,
-            out_column=self.out_column if self.out_column is not None else self.__repr__(),
-            transformer=MinMaxScaler(feature_range=feature_range, clip=clip, copy=True),
+            out_column=out_column,
+            transformer=MinMaxScaler(feature_range=self.feature_range, clip=self.clip, copy=True),
             mode=mode,
         )
 
@@ -223,7 +204,7 @@ def __init__(
         inplace:
             features are changed by scaled.
         out_column:
-            name of added column. Use self.__repr__() if not given.
+            base for the names of generated columns, uses self.__repr__() if not given.
         mode:
             "macro" or "per-segment", way to transform features over segments.
             If "macro", transforms features globally, gluing the corresponding ones for all segments.
@@ -234,16 +215,10 @@ def __init__(
         ValueError:
             if incorrect mode given
         """
-        if inplace and (out_column is not None):
-            warnings.warn("Transformation will be applied inplace, out_column param will be ignored")
-        self.in_column = [in_column] if isinstance(in_column, str) else in_column
-        self.inplace = inplace
-        self.mode = TransformMode(mode)
-        self.out_column = out_column
         super().__init__(
             in_column=in_column,
             inplace=inplace,
-            out_column=self.out_column if self.out_column is not None else self.__repr__(),
+            out_column=out_column,
             transformer=MaxAbsScaler(copy=True),
             mode=mode,
         )

diff --git a/etna/transforms/sklearn.py b/etna/transforms/sklearn.py
@@ -1,4 +1,5 @@
-from enum import Enum
+import warnings
+from copy import deepcopy
 from typing import List
 from typing import Optional
 from typing import Union
@@ -7,10 +8,11 @@
 import pandas as pd
 from sklearn.base import TransformerMixin
 
+from etna.core import StringEnumWithRepr
 from etna.transforms.base import Transform
 
 
-class TransformMode(str, Enum):
+class TransformMode(StringEnumWithRepr):
     """Enum for different metric aggregation modes."""
 
     macro = "macro"
@@ -23,7 +25,7 @@ class SklearnTransform(Transform):
     def __init__(
         self,
         in_column: Optional[Union[str, List[str]]],
-        out_column: str,
+        out_column: Optional[str],
         transformer: TransformerMixin,
         inplace: bool = True,
         mode: Union[TransformMode, str] = "per-segment",
@@ -34,13 +36,13 @@ def __init__(
         Parameters
         ----------
         in_column:
-            columns to be transformed, if None - all columns will be scaled.
+            columns to be transformed, if None - all columns will be transformed.
         transformer:
             sklearn.base.TransformerMixin instance.
         inplace:
             features are changed by transformed.
         out_column:
-            name of result column
+            base for the names of generated columns, uses self.__repr__() if not given.
         mode:
             "macro" or "per-segment", way to transform features over segments.
             If "macro", transforms features globally, gluing the corresponding ones for all segments.
@@ -51,13 +53,28 @@ def __init__(
         ValueError:
             if incorrect mode given
         """
+        if inplace and (out_column is not None):
+            warnings.warn("Transformation will be applied inplace, out_column param will be ignored")
+
         self.transformer = transformer
+
         if isinstance(in_column, str):
             in_column = [in_column]
         self.in_column = in_column if in_column is None else sorted(in_column)
+
         self.inplace = inplace
         self.mode = TransformMode(mode)
-        self.out_column_name = out_column
+        self.out_column = out_column
+
+        self.out_columns: Optional[List[str]] = None
+
+    def _get_column_name(self, in_column: str) -> str:
+        if self.out_column is None:
+            new_transform = deepcopy(self)
+            new_transform.in_column = [in_column]
+            return f"{new_transform.__repr__()}"
+        else:
+            return f"{self.out_column}_{in_column}"
 
     def fit(self, df: pd.DataFrame) -> "SklearnTransform":
         """
@@ -73,14 +90,22 @@ def fit(self, df: pd.DataFrame) -> "SklearnTransform":
         self
         """
         segments = sorted(set(df.columns.get_level_values("segment")))
+
         if self.in_column is None:
             self.in_column = sorted(set(df.columns.get_level_values("feature")))
+
+        if self.inplace:
+            self.out_columns = self.in_column
+        else:
+            self.out_columns = [self._get_column_name(column) for column in self.in_column]
+
         if self.mode == TransformMode.per_segment:
             x = df.loc[:, (segments, self.in_column)].values
         elif self.mode == TransformMode.macro:
             x = self._reshape(df)
         else:
             raise ValueError(f"'{self.mode}' is not a valid TransformMode.")
+
         self.transformer.fit(X=x)
         return self
 
@@ -114,9 +139,7 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
             transformed_features = pd.DataFrame(
                 transformed, columns=df.loc[:, (segments, self.in_column)].columns, index=df.index
             )
-            transformed_features.columns = pd.MultiIndex.from_tuples(
-                [(segment_name, self.out_column_name) for segment_name, feature_name in transformed_features.columns]
-            )
+            transformed_features.columns = pd.MultiIndex.from_product([segments, self.out_columns])
             df = pd.concat((df, transformed_features), axis=1)
             df = df.sort_index(axis=1)