From 13cceb5ca53965cb46b53b376f97bef349ba9574 Mon Sep 17 00:00:00 2001 From: Alex Date: Mon, 21 Feb 2022 10:13:08 +0300 Subject: [PATCH 1/4] Add multisegmentmodel --- etna/models/base.py | 69 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/etna/models/base.py b/etna/models/base.py index 92eabe87e..5985d981d 100644 --- a/etna/models/base.py +++ b/etna/models/base.py @@ -336,3 +336,72 @@ def forecast( ts.df = df ts.inverse_transform() return ts + + +class MultisegmentModel(FitAbstractModel, ForecastAbstractModel, BaseMixin): + """Class for holding specific models for per-segment prediction.""" + + def __init__(self, base_model: Any): + """ + Init MultisegmentModel. + + Parameters + ---------- + base_model: + Internal model which will be used to forecast segments, expected to have fit/predict interface + """ + self._base_model = base_model + + @log_decorator + def fit(self, ts: TSDataset) -> "MultisegmentModel": + """Fit model. + + Parameters + ---------- + ts: + Dataset with features + + Returns + ------- + self: + Model after fit + """ + df = ts.to_pandas(flatten=True) + df = df.dropna() # TODO + df = df.drop(columns="segment") + self._base_model.fit(df=df, regressors=ts.regressors) + return self + + @log_decorator + def forecast(self, ts: TSDataset) -> TSDataset: + """Make predictions. + + Parameters + ---------- + ts: + Dataset with features + + Returns + ------- + forecast: + Dataset with predictions + """ + horizon = len(ts.df) + x = ts.to_pandas(flatten=True).drop(["segment"], axis=1) + y = self._base_model.predict(x).reshape(-1, horizon).T + ts.loc[:, pd.IndexSlice[:, "target"]] = y + ts.inverse_transform() + return ts + + def get_model(self) -> Any: + """Get internal model that is used inside etna class. + + Internal model is a model that is used inside etna to forecast segments, e.g. `catboost.CatBoostRegressor` + or `sklearn.linear_model.Ridge`. + + Returns + ------- + result: + Internal model + """ + return self._base_model From f14f97f9cd6846440daedc00846c059e9b7598b5 Mon Sep 17 00:00:00 2001 From: Alex Date: Mon, 21 Feb 2022 10:13:40 +0300 Subject: [PATCH 2/4] Update sklearn+catboost --- etna/models/catboost.py | 54 +++++++++-------------------------------- etna/models/sklearn.py | 40 +++--------------------------- 2 files changed, 16 insertions(+), 78 deletions(-) diff --git a/etna/models/catboost.py b/etna/models/catboost.py index 7711c3163..b5280b56c 100644 --- a/etna/models/catboost.py +++ b/etna/models/catboost.py @@ -6,10 +6,8 @@ from catboost import CatBoostRegressor from catboost import Pool -from etna.datasets.tsdataset import TSDataset -from etna.models.base import Model +from etna.models.base import MultisegmentModel from etna.models.base import PerSegmentModel -from etna.models.base import log_decorator class _CatBoostAdapter: @@ -190,7 +188,7 @@ def __init__( ) -class CatBoostModelMultiSegment(Model): +class CatBoostModelMultiSegment(MultisegmentModel): """Class for holding Catboost model for all segments. Examples @@ -290,42 +288,14 @@ def __init__( self.l2_leaf_reg = l2_leaf_reg self.thread_count = thread_count self.kwargs = kwargs - super(CatBoostModelMultiSegment, self).__init__() - self._base_model = _CatBoostAdapter( - iterations=iterations, - depth=depth, - learning_rate=learning_rate, - logging_level=logging_level, - thread_count=thread_count, - l2_leaf_reg=l2_leaf_reg, - **kwargs, + super().__init__( + base_model=_CatBoostAdapter( + iterations=iterations, + depth=depth, + learning_rate=learning_rate, + logging_level=logging_level, + thread_count=thread_count, + l2_leaf_reg=l2_leaf_reg, + **kwargs, + ) ) - - @log_decorator - def fit(self, ts: TSDataset) -> "CatBoostModelMultiSegment": - """Fit model.""" - df = ts.to_pandas(flatten=True) - df = df.dropna() - df = df.drop(columns="segment") - self._base_model.fit(df=df, regressors=ts.regressors) - return self - - @log_decorator - def forecast(self, ts: TSDataset) -> TSDataset: - """Make predictions. - - Parameters - ---------- - ts: - Dataframe with features - Returns - ------- - DataFrame - Models result - """ - horizon = len(ts.df) - x = ts.to_pandas(flatten=True).drop(["segment"], axis=1) - y = self._base_model.predict(x).reshape(-1, horizon).T - ts.loc[:, pd.IndexSlice[:, "target"]] = y - ts.inverse_transform() - return ts diff --git a/etna/models/sklearn.py b/etna/models/sklearn.py index 6ab65a298..91e905f06 100644 --- a/etna/models/sklearn.py +++ b/etna/models/sklearn.py @@ -5,10 +5,8 @@ import pandas as pd from sklearn.base import RegressorMixin -from etna.datasets.tsdataset import TSDataset -from etna.models.base import Model +from etna.models.base import MultisegmentModel from etna.models.base import PerSegmentModel -from etna.models.base import log_decorator class _SklearnAdapter: @@ -77,7 +75,7 @@ def __init__(self, regressor: RegressorMixin): super().__init__(base_model=_SklearnAdapter(regressor=regressor)) -class SklearnMultiSegmentModel(Model): +class SklearnMultiSegmentModel(MultisegmentModel): """Class for holding Sklearn model for all segments.""" def __init__(self, regressor: RegressorMixin): @@ -87,36 +85,6 @@ def __init__(self, regressor: RegressorMixin): Parameters ---------- regressor: - sklearn model for regression - """ - super().__init__() - self._base_model = _SklearnAdapter(regressor=regressor) - - @log_decorator - def fit(self, ts: TSDataset) -> "SklearnMultiSegmentModel": - """Fit model.""" - df = ts.to_pandas(flatten=True) - df = df.dropna() - df = df.drop(columns="segment") - self._base_model.fit(df=df, regressors=ts.regressors) - return self - - @log_decorator - def forecast(self, ts: TSDataset) -> TSDataset: - """Make predictions. - - Parameters - ---------- - ts: - Dataframe with features - Returns - ------- - DataFrame - Models result + Sklearn model for regression """ - horizon = len(ts.df) - x = ts.to_pandas(flatten=True).drop(["segment"], axis=1) - y = self._base_model.predict(x).reshape(-1, horizon).T - ts.loc[:, pd.IndexSlice[:, "target"]] = y - ts.inverse_transform() - return ts + super().__init__(base_model=_SklearnAdapter(regressor=regressor)) From 828a39e3ad59ef403adab6f0dd525aa76c942642 Mon Sep 17 00:00:00 2001 From: Alex Date: Mon, 21 Feb 2022 10:23:37 +0300 Subject: [PATCH 3/4] Update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 856e0078c..b0fb7a567 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,7 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add plot_residuals ([#539](https://github.com/tinkoff-ai/etna/pull/539)) - - Create `PerSegmentBaseModel`, `PerSegmentPredictionIntervalModel` ([#537](https://github.com/tinkoff-ai/etna/pull/537)) -- +- Create `MultisegmentModel` ([#551](https://github.com/tinkoff-ai/etna/pull/551)) ### Changed - Change the way `ProphetModel` works with regressors ([#383](https://github.com/tinkoff-ai/etna/pull/383)) - Change the way `SARIMAXModel` works with regressors ([#380](https://github.com/tinkoff-ai/etna/pull/380)) From 3ff67e513a8803b01c4cadd1cbceba643343ee58 Mon Sep 17 00:00:00 2001 From: alex-hse-repository Date: Mon, 21 Feb 2022 17:46:37 +0300 Subject: [PATCH 4/4] Rename multisegmentmodel + add referece --- etna/models/base.py | 10 +++++----- etna/models/catboost.py | 4 ++-- etna/models/sklearn.py | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/etna/models/base.py b/etna/models/base.py index 5985d981d..15637ff0b 100644 --- a/etna/models/base.py +++ b/etna/models/base.py @@ -204,7 +204,7 @@ def fit(self, ts: TSDataset) -> "PerSegmentBaseModel": for segment, model in self._models.items(): segment_features = ts[:, segment, :] - segment_features = segment_features.dropna() + segment_features = segment_features.dropna() # TODO: https://github.com/tinkoff-ai/etna/issues/557 segment_features = segment_features.droplevel("segment", axis=1) segment_features = segment_features.reset_index() model.fit(df=segment_features, regressors=ts.regressors) @@ -338,12 +338,12 @@ def forecast( return ts -class MultisegmentModel(FitAbstractModel, ForecastAbstractModel, BaseMixin): +class MultiSegmentModel(FitAbstractModel, ForecastAbstractModel, BaseMixin): """Class for holding specific models for per-segment prediction.""" def __init__(self, base_model: Any): """ - Init MultisegmentModel. + Init MultiSegmentModel. Parameters ---------- @@ -353,7 +353,7 @@ def __init__(self, base_model: Any): self._base_model = base_model @log_decorator - def fit(self, ts: TSDataset) -> "MultisegmentModel": + def fit(self, ts: TSDataset) -> "MultiSegmentModel": """Fit model. Parameters @@ -367,7 +367,7 @@ def fit(self, ts: TSDataset) -> "MultisegmentModel": Model after fit """ df = ts.to_pandas(flatten=True) - df = df.dropna() # TODO + df = df.dropna() # TODO: https://github.com/tinkoff-ai/etna/issues/557 df = df.drop(columns="segment") self._base_model.fit(df=df, regressors=ts.regressors) return self diff --git a/etna/models/catboost.py b/etna/models/catboost.py index b5280b56c..4a7a22cb4 100644 --- a/etna/models/catboost.py +++ b/etna/models/catboost.py @@ -6,7 +6,7 @@ from catboost import CatBoostRegressor from catboost import Pool -from etna.models.base import MultisegmentModel +from etna.models.base import MultiSegmentModel from etna.models.base import PerSegmentModel @@ -188,7 +188,7 @@ def __init__( ) -class CatBoostModelMultiSegment(MultisegmentModel): +class CatBoostModelMultiSegment(MultiSegmentModel): """Class for holding Catboost model for all segments. Examples diff --git a/etna/models/sklearn.py b/etna/models/sklearn.py index 91e905f06..dd2e62514 100644 --- a/etna/models/sklearn.py +++ b/etna/models/sklearn.py @@ -5,7 +5,7 @@ import pandas as pd from sklearn.base import RegressorMixin -from etna.models.base import MultisegmentModel +from etna.models.base import MultiSegmentModel from etna.models.base import PerSegmentModel @@ -75,7 +75,7 @@ def __init__(self, regressor: RegressorMixin): super().__init__(base_model=_SklearnAdapter(regressor=regressor)) -class SklearnMultiSegmentModel(MultisegmentModel): +class SklearnMultiSegmentModel(MultiSegmentModel): """Class for holding Sklearn model for all segments.""" def __init__(self, regressor: RegressorMixin):