Skip to content

Implement forecast decomposition for Prophet #1172

Merged
merged 11 commits into from
Mar 17, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `ChangePointsLevelTransform` and base classes `PerIntervalModel`, `BaseChangePointsModelAdapter` for per-interval transforms ([#998](https://github.com/tinkoff-ai/etna/pull/998))
- Method `set_params` to change parameters of ETNA objects ([#1102](https://github.com/tinkoff-ai/etna/pull/1102))
- Function `plot_forecast_decomposition` ([#1129](https://github.com/tinkoff-ai/etna/pull/1129))
- Method `forecast_components` for forecast decomposition in `_TBATSAdapter` ([#1125](https://github.com/tinkoff-ai/etna/issues/1125))
- Methods `forecast_components` and `predict_components` for forecast decomposition in `_CatBoostAdapter` ([#1135](https://github.com/tinkoff-ai/etna/issues/1135))
- Method `forecast_components` for forecast decomposition in `_TBATSAdapter` [#1125](https://github.com/tinkoff-ai/etna/issues/1125)
- Methods `forecast_components` and `predict_components` for forecast decomposition in `_CatBoostAdapter` [#1135](https://github.com/tinkoff-ai/etna/issues/1135)
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
- Methods `forecast_components` and `predict_components` for forecast decomposition in `_HoltWintersAdapter ` ([#1146](https://github.com/tinkoff-ai/etna/issues/1146))
-
- Methods `predict_components` for forecast decomposition in `_ProphetAdapter` [#1161](https://github.com/tinkoff-ai/etna/issues/1161)
-
### Changed
- Add optional `features` parameter in the signature of `TSDataset.to_pandas`, `TSDataset.to_flatten` ([#809](https://github.com/tinkoff-ai/etna/pull/809))
- Signature of the constructor of `TFTModel`, `DeepARModel` ([#1110](https://github.com/tinkoff-ai/etna/pull/1110))
Expand Down
78 changes: 78 additions & 0 deletions etna/models/prophet.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
from typing import List
from typing import Optional
from typing import Sequence
from typing import Set
from typing import Union

import bottleneck as bn
import pandas as pd

from etna import SETTINGS
Expand Down Expand Up @@ -152,6 +154,76 @@ def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Sequen
y_pred = y_pred.rename(rename_dict, axis=1)
return y_pred

def _check_mul_components(self, ignore_components: Set[str]):
"""Raise error if model contains multiplicative components."""
components_modes = self.model.component_modes
if components_modes is None:
raise ValueError("This model is not fitted!")

mul_components = set(self.model.component_modes["multiplicative"])
if len(mul_components - ignore_components) > 0:
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError("Forecast decomposition is only supported for additive components!")

def _predict_seasonal_components(self, df: pd.DataFrame, ignore_components: Set[str]) -> pd.DataFrame:
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
"""Estimate seasonal, holidays and exogenous components."""
model = self.model

seasonal_features, _, component_cols, _ = model.make_all_seasonality_features(df)

holiday_names = set(model.train_holiday_names) if model.train_holiday_names is not None else set()

components_data = {}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't we do it all at once(not in the cycle)?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can try to do here vecorization, but main reason of this solution is to persist consistency with similar method from prophet. I don't think we will gain much speed up vectorizing this cycle. Also, we can't throw out the cycle fully as we need to filter individual holidays and aggregated components from columns in component cols.

Copy link
Collaborator

@alex-hse-repository alex-hse-repository Mar 17, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

First of all, we can filter out the unnecessary components before the cycle, the second thing is that Prophet team does not plan to change the model, so we don't need to maintain code consistency if it can be vectorized

for component_name in component_cols.columns:
if component_name in ignore_components or component_name in holiday_names:
continue

beta_c = model.params["beta"] * component_cols[component_name].values
comp = seasonal_features.values @ beta_c.T

# apply rescaling for additive components
comp *= model.y_scale

components_data[component_name] = bn.nanmean(comp, axis=1)

return pd.DataFrame(data=components_data)

def predict_components(self, df: pd.DataFrame) -> pd.DataFrame:
"""Estimate prediction components.

Parameters
----------
df:
features dataframe

Returns
-------
:
dataframe with prediction components
"""
# aggregation of corresponding model terms, e.g. sum
aggregated_components = {
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
"additive_terms",
"multiplicative_terms",
"extra_regressors_additive",
"extra_regressors_multiplicative",
}

self._check_mul_components(aggregated_components)

df = df.reset_index()
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
prophet_df = pd.DataFrame()
prophet_df["y"] = df["target"]
prophet_df["ds"] = df["timestamp"]
prophet_df[self.regressor_columns] = df[self.regressor_columns]

prophet_df = self.model.setup_dataframe(prophet_df)
trend = self.model.predict_trend(df=prophet_df)
components = self._predict_seasonal_components(df=prophet_df, ignore_components=aggregated_components)

components["trend"] = trend
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved

return components.add_prefix("target_component_")

def get_model(self) -> Prophet:
"""Get internal prophet.Prophet model that is used inside etna class.

Expand Down Expand Up @@ -200,6 +272,12 @@ class ProphetModel(
Original Prophet can use features 'cap' and 'floor',
they should be added to the known_future list on dataset initialization.

This model supports in-sample and out-of-sample forecast decomposition. The number
of components in the decomposition depends on model parameters. Main components are:
trend, seasonality, holiday and exogenous effects. Seasonal components will be decomposed
down to individual periods if fitted. Holiday and exogenous will be present in decomposition
if fitted.Corresponding components are obtained directly from the model.

Examples
--------
>>> from etna.datasets import generate_periodic_df
Expand Down
13 changes: 13 additions & 0 deletions tests/test_models/conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
import pytest

from etna.datasets import generate_ar_df
Expand All @@ -16,3 +17,15 @@ def new_format_exog():
exog = generate_ar_df(periods=60, start_time="2021-06-01", n_segments=2)
df = TSDataset.to_dataset(exog)
return df


@pytest.fixture()
def dfs_w_exog():
df = generate_ar_df(start_time="2021-01-01", periods=105, n_segments=1)
df["f1"] = np.sin(df["target"])
df["f2"] = np.cos(df["target"])

df.drop(columns=["segment"], inplace=True)
train = df.iloc[:-5]
test = df.iloc[-5:]
return train, test
12 changes: 0 additions & 12 deletions tests/test_models/test_catboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,18 +147,6 @@ def test_save_load(model, example_tsds):
assert_model_equals_loaded_original(model=model, ts=example_tsds, transforms=transforms, horizon=horizon)


@pytest.fixture()
def dfs_w_exog():
df = generate_ar_df(start_time="2021-01-01", periods=105, n_segments=1)
df["f1"] = np.sin(df["target"])
df["f2"] = np.cos(df["target"])

df.drop(columns=["segment"], inplace=True)
train = df.iloc[:-5]
test = df.iloc[-5:]
return train, test


def test_forecast_components_equal_predict_components(dfs_w_exog):
train, test = dfs_w_exog

Expand Down
136 changes: 136 additions & 0 deletions tests/test_models/test_prophet.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,3 +214,139 @@ def test_custom_seasonality(custom_seasonality):
model = ProphetModel(additional_seasonality_params=custom_seasonality)
for seasonality in custom_seasonality:
assert seasonality["name"] in model._base_model.model.seasonalities


@pytest.fixture
def prophet_dfs(dfs_w_exog):
df = pd.concat(dfs_w_exog, axis=0)
df["cap"] = 4.0

h1_mask = np.arange(len(df)) % 3 == 0
h2_mask = np.arange(len(df)) % 5 == 0

h1 = pd.DataFrame(
{
"holiday": "h1",
"ds": df["timestamp"][h1_mask],
"lower_window": 0,
"upper_window": 1,
}
)

h2 = pd.DataFrame(
{
"holiday": "h2",
"ds": df["timestamp"][h2_mask],
"lower_window": 0,
"upper_window": 1,
}
)
holidays = pd.concat([h1, h2]).reset_index(drop=True)

return df.iloc[-60:-20], df.iloc[-20:], holidays


def test_check_mul_components_not_fitted_error():
model = _ProphetAdapter()
with pytest.raises(ValueError, match="This model is not fitted!"):
model._check_mul_components(set())


def test_check_mul_components(prophet_dfs):
_, test, _ = prophet_dfs

model = _ProphetAdapter(seasonality_mode="multiplicative")
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
model.fit(df=test, regressors=["f1", "f2"])

with pytest.raises(ValueError, match="Forecast decomposition is only supported for additive components!"):
model.predict_components(df=test)


@pytest.mark.parametrize(
"regressors,regressors_comps", ((["f1", "f2", "cap"], ["target_component_f1", "target_component_f2"]), ([], []))
)
@pytest.mark.parametrize(
"custom_seas,custom_seas_comp",
(
([{"name": "s1", "period": 14, "fourier_order": 1}], ["target_component_s1"]),
([], []),
),
)
@pytest.mark.parametrize("use_holidays,holidays_comp", ((True, ["target_component_holidays"]), (False, [])))
@pytest.mark.parametrize("daily,daily_comp", ((True, ["target_component_daily"]), (False, [])))
@pytest.mark.parametrize("weekly,weekly_comp", ((True, ["target_component_weekly"]), (False, [])))
@pytest.mark.parametrize("yearly,yearly_comp", ((True, ["target_component_yearly"]), (False, [])))
def test_predict_components_names(
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
prophet_dfs,
regressors,
regressors_comps,
use_holidays,
holidays_comp,
daily,
daily_comp,
weekly,
weekly_comp,
yearly,
yearly_comp,
custom_seas,
custom_seas_comp,
):
_, test, holidays = prophet_dfs

if not use_holidays:
holidays = None

expected_columns = set(
regressors_comps
+ holidays_comp
+ daily_comp
+ weekly_comp
+ yearly_comp
+ custom_seas_comp
+ ["target_component_trend"]
)

model = _ProphetAdapter(
holidays=holidays,
daily_seasonality=daily,
weekly_seasonality=weekly,
yearly_seasonality=yearly,
additional_seasonality_params=custom_seas,
)
model.fit(df=test, regressors=regressors)

components = model.predict_components(df=test)

assert set(components.columns) == expected_columns


@pytest.mark.long_1
@pytest.mark.parametrize("growth,cap", (("linear", []), ("logistic", ["cap"])))
@pytest.mark.parametrize("regressors", (["f1", "f2"], []))
@pytest.mark.parametrize("custom_seas", ([{"name": "s1", "period": 14, "fourier_order": 1}], []))
@pytest.mark.parametrize("use_holidays", (True, False))
@pytest.mark.parametrize("daily", (True, False))
@pytest.mark.parametrize("weekly", (True, False))
@pytest.mark.parametrize("yearly", (True, False))
def test_predict_components_sum_up_to_target(
prophet_dfs, regressors, use_holidays, daily, weekly, yearly, custom_seas, growth, cap
):
train, test, holidays = prophet_dfs

if not use_holidays:
holidays = None

model = _ProphetAdapter(
growth=growth,
holidays=holidays,
daily_seasonality=daily,
weekly_seasonality=weekly,
yearly_seasonality=yearly,
additional_seasonality_params=custom_seas,
)
model.fit(df=train, regressors=regressors + cap)

components = model.predict_components(df=test)
pred = model.predict(df=test, prediction_interval=False, quantiles=[])

np.testing.assert_allclose(np.sum(components, axis=1), pred["target"].values)