Skip to content

Create MultisegmentModel #551

Merged
merged 5 commits into from
Feb 21, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add plot_residuals ([#539](https://github.com/tinkoff-ai/etna/pull/539))
-
- Create `PerSegmentBaseModel`, `PerSegmentPredictionIntervalModel` ([#537](https://github.com/tinkoff-ai/etna/pull/537))
-
- Create `MultisegmentModel` ([#551](https://github.com/tinkoff-ai/etna/pull/551))
### Changed
- Change the way `ProphetModel` works with regressors ([#383](https://github.com/tinkoff-ai/etna/pull/383))
- Change the way `SARIMAXModel` works with regressors ([#380](https://github.com/tinkoff-ai/etna/pull/380))
Expand Down
71 changes: 70 additions & 1 deletion etna/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def fit(self, ts: TSDataset) -> "PerSegmentBaseModel":

for segment, model in self._models.items():
segment_features = ts[:, segment, :]
segment_features = segment_features.dropna()
segment_features = segment_features.dropna() # TODO: https://github.com/tinkoff-ai/etna/issues/557
segment_features = segment_features.droplevel("segment", axis=1)
segment_features = segment_features.reset_index()
model.fit(df=segment_features, regressors=ts.regressors)
Expand Down Expand Up @@ -336,3 +336,72 @@ def forecast(
ts.df = df
ts.inverse_transform()
return ts


class MultiSegmentModel(FitAbstractModel, ForecastAbstractModel, BaseMixin):
"""Class for holding specific models for per-segment prediction."""

def __init__(self, base_model: Any):
"""
Init MultiSegmentModel.

Parameters
----------
base_model:
Internal model which will be used to forecast segments, expected to have fit/predict interface
"""
self._base_model = base_model

@log_decorator
def fit(self, ts: TSDataset) -> "MultiSegmentModel":
"""Fit model.

Parameters
----------
ts:
Dataset with features

Returns
-------
self:
Model after fit
"""
df = ts.to_pandas(flatten=True)
df = df.dropna() # TODO: https://github.com/tinkoff-ai/etna/issues/557
df = df.drop(columns="segment")
self._base_model.fit(df=df, regressors=ts.regressors)
return self

@log_decorator
def forecast(self, ts: TSDataset) -> TSDataset:
"""Make predictions.

Parameters
----------
ts:
Dataset with features

Returns
-------
forecast:
Dataset with predictions
"""
horizon = len(ts.df)
x = ts.to_pandas(flatten=True).drop(["segment"], axis=1)
y = self._base_model.predict(x).reshape(-1, horizon).T
ts.loc[:, pd.IndexSlice[:, "target"]] = y
ts.inverse_transform()
return ts

def get_model(self) -> Any:
"""Get internal model that is used inside etna class.

Internal model is a model that is used inside etna to forecast segments, e.g. `catboost.CatBoostRegressor`
or `sklearn.linear_model.Ridge`.

Returns
-------
result:
Internal model
"""
return self._base_model
54 changes: 12 additions & 42 deletions etna/models/catboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,8 @@
from catboost import CatBoostRegressor
from catboost import Pool

from etna.datasets.tsdataset import TSDataset
from etna.models.base import Model
from etna.models.base import MultiSegmentModel
from etna.models.base import PerSegmentModel
from etna.models.base import log_decorator


class _CatBoostAdapter:
Expand Down Expand Up @@ -190,7 +188,7 @@ def __init__(
)


class CatBoostModelMultiSegment(Model):
class CatBoostModelMultiSegment(MultiSegmentModel):
"""Class for holding Catboost model for all segments.

Examples
Expand Down Expand Up @@ -290,42 +288,14 @@ def __init__(
self.l2_leaf_reg = l2_leaf_reg
self.thread_count = thread_count
self.kwargs = kwargs
super(CatBoostModelMultiSegment, self).__init__()
self._base_model = _CatBoostAdapter(
iterations=iterations,
depth=depth,
learning_rate=learning_rate,
logging_level=logging_level,
thread_count=thread_count,
l2_leaf_reg=l2_leaf_reg,
**kwargs,
super().__init__(
base_model=_CatBoostAdapter(
iterations=iterations,
depth=depth,
learning_rate=learning_rate,
logging_level=logging_level,
thread_count=thread_count,
l2_leaf_reg=l2_leaf_reg,
**kwargs,
)
)

@log_decorator
def fit(self, ts: TSDataset) -> "CatBoostModelMultiSegment":
"""Fit model."""
df = ts.to_pandas(flatten=True)
df = df.dropna()
df = df.drop(columns="segment")
self._base_model.fit(df=df, regressors=ts.regressors)
return self

@log_decorator
def forecast(self, ts: TSDataset) -> TSDataset:
"""Make predictions.

Parameters
----------
ts:
Dataframe with features
Returns
-------
DataFrame
Models result
"""
horizon = len(ts.df)
x = ts.to_pandas(flatten=True).drop(["segment"], axis=1)
y = self._base_model.predict(x).reshape(-1, horizon).T
ts.loc[:, pd.IndexSlice[:, "target"]] = y
ts.inverse_transform()
return ts
40 changes: 4 additions & 36 deletions etna/models/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,8 @@
import pandas as pd
from sklearn.base import RegressorMixin

from etna.datasets.tsdataset import TSDataset
from etna.models.base import Model
from etna.models.base import MultiSegmentModel
from etna.models.base import PerSegmentModel
from etna.models.base import log_decorator


class _SklearnAdapter:
Expand Down Expand Up @@ -77,7 +75,7 @@ def __init__(self, regressor: RegressorMixin):
super().__init__(base_model=_SklearnAdapter(regressor=regressor))


class SklearnMultiSegmentModel(Model):
class SklearnMultiSegmentModel(MultiSegmentModel):
"""Class for holding Sklearn model for all segments."""

def __init__(self, regressor: RegressorMixin):
Expand All @@ -87,36 +85,6 @@ def __init__(self, regressor: RegressorMixin):
Parameters
----------
regressor:
sklearn model for regression
"""
super().__init__()
self._base_model = _SklearnAdapter(regressor=regressor)

@log_decorator
def fit(self, ts: TSDataset) -> "SklearnMultiSegmentModel":
"""Fit model."""
df = ts.to_pandas(flatten=True)
df = df.dropna()
df = df.drop(columns="segment")
self._base_model.fit(df=df, regressors=ts.regressors)
return self

@log_decorator
def forecast(self, ts: TSDataset) -> TSDataset:
"""Make predictions.
Parameters
----------
ts:
Dataframe with features
Returns
-------
DataFrame
Models result
Sklearn model for regression
"""
horizon = len(ts.df)
x = ts.to_pandas(flatten=True).drop(["segment"], axis=1)
y = self._base_model.predict(x).reshape(-1, horizon).T
ts.loc[:, pd.IndexSlice[:, "target"]] = y
ts.inverse_transform()
return ts
super().__init__(base_model=_SklearnAdapter(regressor=regressor))