Skip to content

Implement forecast decomposition for DeadlineMovingAverageModel #1186

Merged
merged 12 commits into from
Mar 31, 2023
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add tests on `inverse_transform` method of transforms on subset of segments, on new segments, on future with gap ([#1127](https://github.com/tinkoff-ai/etna/pull/1127))
- In-sample prediction for `BATSModel` and `TBATSModel` ([#1181](https://github.com/tinkoff-ai/etna/pull/1181))
- Method `predict_components` for forecast decomposition in `_TBATSAdapter` ([#1181](https://github.com/tinkoff-ai/etna/pull/1181))
-
- Forecast decomposition for `DeadlineMovingAverageModel`([#1186](https://github.com/tinkoff-ai/etna/pull/1186))
-
### Changed
- Add optional `features` parameter in the signature of `TSDataset.to_pandas`, `TSDataset.to_flatten` ([#809](https://github.com/tinkoff-ai/etna/pull/809))
- Signature of the constructor of `TFTModel`, `DeepARModel` ([#1110](https://github.com/tinkoff-ai/etna/pull/1110))
Expand Down
102 changes: 87 additions & 15 deletions etna/models/deadline_ma.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import warnings
from enum import Enum
from typing import Optional
from typing import Tuple

import numpy as np
import pandas as pd
Expand All @@ -25,7 +26,14 @@ def _missing_(cls, value):
class DeadlineMovingAverageModel(
NonPredictionIntervalContextRequiredAbstractModel,
):
"""Moving average model that uses exact previous dates to predict."""
"""Moving average model that uses exact previous dates to predict.

Notes
_____
This model supports in-sample and out-of-sample prediction decomposition.
Prediction components are corresponding target seasonal lags (monthly or annual)
with weights of :math:`1/window`.
"""

def __init__(self, window: int = 3, seasonality: str = "month"):
"""Initialize deadline moving average model.
Expand Down Expand Up @@ -156,6 +164,47 @@ def _get_context_beginning(

return first_index

def _get_previous_date(self, date, offset):
"""Get previous date using seasonality offset."""
if self.seasonality == SeasonalityMode.month:
prev_date = date - pd.DateOffset(months=offset)
elif self.seasonality == SeasonalityMode.year:
prev_date = date - pd.DateOffset(years=offset)

return prev_date

def _make_prediction_components(
self, result_template: pd.DataFrame, context: pd.DataFrame, prediction_size: int
) -> pd.DataFrame:
"""Estimate prediction components using ``result_template`` as a base and ``context`` as a context."""
index = result_template.index
end_idx = len(result_template)
start_idx = end_idx - prediction_size

components_data = []
for i in range(start_idx, end_idx):

obs_components = []
for w in range(1, self.window + 1):
prev_date = self._get_previous_date(date=result_template.index[i], offset=w)
obs_components.append(context.loc[prev_date].values)

components_data.append(obs_components)

raw_components = np.asarray(components_data, dtype=float)
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
raw_components = np.swapaxes(raw_components, -1, -2)
raw_components = raw_components.reshape(raw_components.shape[0], -1)
raw_components /= self.window

components = pd.DataFrame(data=raw_components)
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
components.index = index[start_idx:end_idx]
components_names = [f"target_component_{self.seasonality.name}_lag_{w}" for w in range(1, self.window + 1)]
components.columns = pd.MultiIndex.from_product(
[context.columns.get_level_values("segment"), components_names], names=("segment", "feature")
)

return components

def _make_predictions(
self, result_template: pd.DataFrame, context: pd.DataFrame, prediction_size: int
) -> np.ndarray:
Expand All @@ -165,10 +214,7 @@ def _make_predictions(
end_idx = len(result_template)
for i in range(start_idx, end_idx):
for w in range(1, self.window + 1):
if self.seasonality == SeasonalityMode.month:
prev_date = result_template.index[i] - pd.DateOffset(months=w)
elif self.seasonality == SeasonalityMode.year:
prev_date = result_template.index[i] - pd.DateOffset(years=w)
prev_date = self._get_previous_date(date=result_template.index[i], offset=w)

result_template.loc[index[i]] += context.loc[prev_date]

Expand All @@ -177,7 +223,9 @@ def _make_predictions(
result_values = result_template.values[-prediction_size:]
return result_values

def _forecast(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
def _forecast(
self, df: pd.DataFrame, prediction_size: int, return_components: bool = False
) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
"""Make autoregressive forecasts on a wide dataframe."""
context_beginning = self._get_context_beginning(
df=df, prediction_size=prediction_size, seasonality=self.seasonality, window=self.window
Expand All @@ -200,7 +248,14 @@ def _forecast(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
df = df.iloc[-prediction_size:]
y_pred = result_values[-prediction_size:]
df.loc[:, pd.IndexSlice[:, "target"]] = y_pred
return df

components = None
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
if return_components:
components = self._make_prediction_components(
result_template=result_template, context=result_template, prediction_size=prediction_size
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
)

return df, components

def forecast(self, ts: TSDataset, prediction_size: int, return_components: bool = False) -> TSDataset:
"""Make autoregressive forecasts.
Expand Down Expand Up @@ -231,16 +286,22 @@ def forecast(self, ts: TSDataset, prediction_size: int, return_components: bool
ValueError:
if forecast context contains NaNs
"""
if return_components:
raise NotImplementedError("This mode isn't currently implemented!")
self._validate_fitted()

df = ts.to_pandas()
new_df = self._forecast(df=df, prediction_size=prediction_size)
new_df, target_components = self._forecast(
df=df, prediction_size=prediction_size, return_components=return_components
)
ts.df = new_df

if return_components:
ts.add_target_components(target_components_df=target_components)

return ts

def _predict(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
def _predict(
self, df: pd.DataFrame, prediction_size: int, return_components: bool = False
) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
"""Make predictions on a wide dataframe using true values as autoregression context."""
context_beginning = self._get_context_beginning(
df=df, prediction_size=prediction_size, seasonality=self.seasonality, window=self.window
Expand All @@ -261,7 +322,14 @@ def _predict(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
df = df.iloc[-prediction_size:]
y_pred = result_values[-prediction_size:]
df.loc[:, pd.IndexSlice[:, "target"]] = y_pred
return df

components = None
if return_components:
components = self._make_prediction_components(
result_template=result_template, context=context, prediction_size=prediction_size
)

return df, components

def predict(self, ts: TSDataset, prediction_size: int, return_components: bool = False) -> TSDataset:
"""Make predictions using true values as autoregression context (teacher forcing).
Expand Down Expand Up @@ -292,13 +360,17 @@ def predict(self, ts: TSDataset, prediction_size: int, return_components: bool =
ValueError:
if forecast context contains NaNs
"""
if return_components:
raise NotImplementedError("This mode isn't currently implemented!")
self._validate_fitted()

df = ts.to_pandas()
new_df = self._predict(df=df, prediction_size=prediction_size)
new_df, target_components = self._predict(
df=df, prediction_size=prediction_size, return_components=return_components
)
ts.df = new_df

if return_components:
ts.add_target_components(target_components_df=target_components)

return ts


Expand Down
65 changes: 65 additions & 0 deletions tests/test_models/test_simple_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,26 @@ def df():
return tsds
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved


@pytest.fixture()
def long_periodic_ts():
history = 400

df1 = pd.DataFrame()
df1["target"] = np.sin(np.arange(history))
df1["segment"] = "A"
df1["timestamp"] = pd.date_range(start="2020-01-01", periods=history)

df2 = df1.copy()
df2["segment"] = "B"
df2["target"] *= 4

df = pd.concat([df1, df2]).reset_index(drop=True)
df = TSDataset.to_dataset(df)
ts = TSDataset(df, freq="D")

return ts


@pytest.mark.parametrize("model", [SeasonalMovingAverageModel, NaiveModel, MovingAverageModel])
def test_sma_model_forecast(simple_df, model):
_check_forecast(ts=simple_df, model=model(), horizon=7)
Expand Down Expand Up @@ -774,3 +794,48 @@ def test_sma_model_predict_components_correct(

target_components_df = forecast.get_target_components()
np.testing.assert_allclose(target_components_df.values, expected_values)


@pytest.mark.parametrize("method", ("predict", "forecast"))
@pytest.mark.parametrize(
"window,seasonality,expected_components_names",
(
(1, "month", ["target_component_month_lag_1"]),
(3, "month", ["target_component_month_lag_1", "target_component_month_lag_2", "target_component_month_lag_3"]),
(1, "year", ["target_component_year_lag_1"]),
),
)
def test_deadline_ma_predict_components_correct_names(
long_periodic_ts, method, window, seasonality, expected_components_names, horizon=10
):
model = DeadlineMovingAverageModel(window=window, seasonality=seasonality)
model.fit(ts=long_periodic_ts)

method_to_call = getattr(model, method)
forecast = method_to_call(ts=long_periodic_ts, prediction_size=horizon, return_components=True)

assert sorted(forecast.target_components_names) == sorted(expected_components_names)


@pytest.mark.parametrize("method", ("predict", "forecast"))
@pytest.mark.parametrize(
"window,seasonality,expected_components_names",
brsnw250 marked this conversation as resolved.
Show resolved Hide resolved
(
(1, "month", ["target_component_month_lag_1"]),
(3, "month", ["target_component_month_lag_1", "target_component_month_lag_2", "target_component_month_lag_3"]),
(1, "year", ["target_component_year_lag_1"]),
),
)
def test_deadline_ma_predict_components_sum_up_to_target(
long_periodic_ts, method, window, seasonality, expected_components_names, horizon=10
):
model = DeadlineMovingAverageModel(window=window, seasonality=seasonality)
model.fit(ts=long_periodic_ts)

method_to_call = getattr(model, method)
forecast = method_to_call(ts=long_periodic_ts, prediction_size=horizon, return_components=True)

target = forecast.to_pandas(features=["target"])
components = forecast.get_target_components()

np.testing.assert_allclose(target.values, components.sum(axis=1, level="segment").values)