Skip to content

Implement forecast decomposition for Prophet #1172

Merged
merged 11 commits into from
Mar 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Method `forecast_components` for forecast decomposition in `_TBATSAdapter` ([#1125](https://github.com/tinkoff-ai/etna/issues/1125))
- Methods `forecast_components` and `predict_components` for forecast decomposition in `_CatBoostAdapter` ([#1135](https://github.com/tinkoff-ai/etna/issues/1135))
- Methods `forecast_components` and `predict_components` for forecast decomposition in `_HoltWintersAdapter ` ([#1146](https://github.com/tinkoff-ai/etna/issues/1146))
-
- Methods `predict_components` for forecast decomposition in `_ProphetAdapter` ([#1161](https://github.com/tinkoff-ai/etna/issues/1161))
-
### Changed
- Add optional `features` parameter in the signature of `TSDataset.to_pandas`, `TSDataset.to_flatten` ([#809](https://github.com/tinkoff-ai/etna/pull/809))
- Signature of the constructor of `TFTModel`, `DeepARModel` ([#1110](https://github.com/tinkoff-ai/etna/pull/1110))
Expand Down
98 changes: 89 additions & 9 deletions etna/models/prophet.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import List
from typing import Optional
from typing import Sequence
from typing import Set
from typing import Union

import pandas as pd
Expand Down Expand Up @@ -106,10 +107,7 @@ def fit(self, df: pd.DataFrame, regressors: List[str]) -> "_ProphetAdapter":
List of the columns with regressors
"""
self.regressor_columns = regressors
prophet_df = pd.DataFrame()
prophet_df["y"] = df["target"]
prophet_df["ds"] = df["timestamp"]
prophet_df[self.regressor_columns] = df[self.regressor_columns]
prophet_df = self._prepare_prophet_df(df=df)
for regressor in self.regressor_columns:
if regressor not in self.predefined_regressors_names:
self.model.add_regressor(regressor)
Expand All @@ -134,11 +132,7 @@ def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Sequen
:
DataFrame with predictions
"""
df = df.reset_index()
prophet_df = pd.DataFrame()
prophet_df["y"] = df["target"]
prophet_df["ds"] = df["timestamp"]
prophet_df[self.regressor_columns] = df[self.regressor_columns]
prophet_df = self._prepare_prophet_df(df=df)
forecast = self.model.predict(prophet_df)
y_pred = pd.DataFrame(forecast["yhat"])
if prediction_interval:
Expand All @@ -152,6 +146,86 @@ def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Sequen
y_pred = y_pred.rename(rename_dict, axis=1)
return y_pred

def _prepare_prophet_df(self, df: pd.DataFrame) -> pd.DataFrame:
"""Prepare dataframe for fit and predict."""
if self.regressor_columns is None:
raise ValueError("List of regressor is not set!")

df = df.reset_index()

prophet_df = pd.DataFrame()
prophet_df["y"] = df["target"]
prophet_df["ds"] = df["timestamp"]
prophet_df[self.regressor_columns] = df[self.regressor_columns]
return prophet_df

@staticmethod
def _filter_aggregated_components(components: Iterable[str]) -> Set[str]:
"""Filter out aggregated components."""
# aggregation of corresponding model terms, e.g. sum
aggregated_components = {
"additive_terms",
"multiplicative_terms",
"extra_regressors_additive",
"extra_regressors_multiplicative",
}

return set(components) - aggregated_components

def _check_mul_components(self):
"""Raise error if model contains multiplicative components."""
components_modes = self.model.component_modes
if components_modes is None:
raise ValueError("This model is not fitted!")

mul_components = self._filter_aggregated_components(self.model.component_modes["multiplicative"])
if len(mul_components) > 0:
raise ValueError("Forecast decomposition is only supported for additive components!")

def _predict_seasonal_components(self, df: pd.DataFrame) -> pd.DataFrame:
"""Estimate seasonal, holidays and exogenous components."""
model = self.model

seasonal_features, _, component_cols, _ = model.make_all_seasonality_features(df)

holiday_names = set(model.train_holiday_names) if model.train_holiday_names is not None else set()

components_names = list(
filter(lambda v: v not in holiday_names, self._filter_aggregated_components(component_cols.columns))
)

beta_c = model.params["beta"].T * component_cols[components_names].values
comp = seasonal_features.values @ beta_c

# apply rescaling for additive components
comp *= model.y_scale

return pd.DataFrame(data=comp, columns=components_names)

def predict_components(self, df: pd.DataFrame) -> pd.DataFrame:
"""Estimate prediction components.

Parameters
----------
df:
features dataframe

Returns
-------
:
dataframe with prediction components
"""
self._check_mul_components()

prophet_df = self._prepare_prophet_df(df=df)

prophet_df = self.model.setup_dataframe(prophet_df)

components = self._predict_seasonal_components(df=prophet_df)
components["trend"] = self.model.predict_trend(df=prophet_df)

return components.add_prefix("target_component_")

def get_model(self) -> Prophet:
"""Get internal prophet.Prophet model that is used inside etna class.

Expand Down Expand Up @@ -200,6 +274,12 @@ class ProphetModel(
Original Prophet can use features 'cap' and 'floor',
they should be added to the known_future list on dataset initialization.

This model supports in-sample and out-of-sample forecast decomposition. The number
of components in the decomposition depends on model parameters. Main components are:
trend, seasonality, holiday and exogenous effects. Seasonal components will be decomposed
down to individual periods if fitted. Holiday and exogenous will be present in decomposition
if fitted.Corresponding components are obtained directly from the model.

Examples
--------
>>> from etna.datasets import generate_periodic_df
Expand Down
13 changes: 13 additions & 0 deletions tests/test_models/conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
import pytest

from etna.datasets import generate_ar_df
Expand All @@ -16,3 +17,15 @@ def new_format_exog():
exog = generate_ar_df(periods=60, start_time="2021-06-01", n_segments=2)
df = TSDataset.to_dataset(exog)
return df


@pytest.fixture()
def dfs_w_exog():
df = generate_ar_df(start_time="2021-01-01", periods=105, n_segments=1)
df["f1"] = np.sin(df["target"])
df["f2"] = np.cos(df["target"])

df.drop(columns=["segment"], inplace=True)
train = df.iloc[:-5]
test = df.iloc[-5:]
return train, test
12 changes: 0 additions & 12 deletions tests/test_models/test_catboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,18 +147,6 @@ def test_save_load(model, example_tsds):
assert_model_equals_loaded_original(model=model, ts=example_tsds, transforms=transforms, horizon=horizon)


@pytest.fixture()
def dfs_w_exog():
df = generate_ar_df(start_time="2021-01-01", periods=105, n_segments=1)
df["f1"] = np.sin(df["target"])
df["f2"] = np.cos(df["target"])

df.drop(columns=["segment"], inplace=True)
train = df.iloc[:-5]
test = df.iloc[-5:]
return train, test


def test_forecast_components_equal_predict_components(dfs_w_exog):
train, test = dfs_w_exog

Expand Down
151 changes: 151 additions & 0 deletions tests/test_models/test_prophet.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,3 +214,154 @@ def test_custom_seasonality(custom_seasonality):
model = ProphetModel(additional_seasonality_params=custom_seasonality)
for seasonality in custom_seasonality:
assert seasonality["name"] in model._base_model.model.seasonalities


@pytest.fixture
def prophet_dfs(dfs_w_exog):
df = pd.concat(dfs_w_exog, axis=0)
df["cap"] = 4.0

h1_mask = np.arange(len(df)) % 3 == 0
h2_mask = np.arange(len(df)) % 5 == 0

h1 = pd.DataFrame(
{
"holiday": "h1",
"ds": df["timestamp"][h1_mask],
"lower_window": 0,
"upper_window": 1,
}
)

h2 = pd.DataFrame(
{
"holiday": "h2",
"ds": df["timestamp"][h2_mask],
"lower_window": 0,
"upper_window": 1,
}
)
holidays = pd.concat([h1, h2]).reset_index(drop=True)

return df.iloc[-60:-20], df.iloc[-20:], holidays


def test_check_mul_components_not_fitted_error():
model = _ProphetAdapter()
with pytest.raises(ValueError, match="This model is not fitted!"):
model._check_mul_components()


def test_prepare_prophet_df_regressors_not_set_error(prophet_dfs):
_, test, _ = prophet_dfs
model = _ProphetAdapter()
with pytest.raises(ValueError, match="List of regressor is not set!"):
model._prepare_prophet_df(df=test)


@pytest.mark.parametrize(
"seasonality_mode,custom_seasonality",
(
("multiplicative", [{"name": "s1", "period": 14, "fourier_order": 1, "mode": "additive"}]),
("multiplicative", []),
("additive", [{"name": "s1", "period": 14, "fourier_order": 1, "mode": "multiplicative"}]),
),
)
def test_check_mul_components(prophet_dfs, seasonality_mode, custom_seasonality):
_, test, _ = prophet_dfs

model = _ProphetAdapter(seasonality_mode=seasonality_mode, additional_seasonality_params=custom_seasonality)
model.fit(df=test, regressors=["f1", "f2"])

with pytest.raises(ValueError, match="Forecast decomposition is only supported for additive components!"):
model.predict_components(df=test)


@pytest.mark.parametrize(
"regressors,regressors_comps", ((["f1", "f2", "cap"], ["target_component_f1", "target_component_f2"]), ([], []))
)
@pytest.mark.parametrize(
"custom_seas,custom_seas_comp",
(
([{"name": "s1", "period": 14, "fourier_order": 1}], ["target_component_s1"]),
([], []),
),
)
@pytest.mark.parametrize("use_holidays,holidays_comp", ((True, ["target_component_holidays"]), (False, [])))
@pytest.mark.parametrize("daily,daily_comp", ((True, ["target_component_daily"]), (False, [])))
@pytest.mark.parametrize("weekly,weekly_comp", ((True, ["target_component_weekly"]), (False, [])))
@pytest.mark.parametrize("yearly,yearly_comp", ((True, ["target_component_yearly"]), (False, [])))
def test_predict_components_names(
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
prophet_dfs,
regressors,
regressors_comps,
use_holidays,
holidays_comp,
daily,
daily_comp,
weekly,
weekly_comp,
yearly,
yearly_comp,
custom_seas,
custom_seas_comp,
):
_, test, holidays = prophet_dfs

if not use_holidays:
holidays = None

expected_columns = set(
regressors_comps
+ holidays_comp
+ daily_comp
+ weekly_comp
+ yearly_comp
+ custom_seas_comp
+ ["target_component_trend"]
)

model = _ProphetAdapter(
holidays=holidays,
daily_seasonality=daily,
weekly_seasonality=weekly,
yearly_seasonality=yearly,
additional_seasonality_params=custom_seas,
)
model.fit(df=test, regressors=regressors)

components = model.predict_components(df=test)

assert set(components.columns) == expected_columns


@pytest.mark.long_1
@pytest.mark.parametrize("growth,cap", (("linear", []), ("logistic", ["cap"])))
@pytest.mark.parametrize("regressors", (["f1", "f2"], []))
@pytest.mark.parametrize("custom_seas", ([{"name": "s1", "period": 14, "fourier_order": 1}], []))
@pytest.mark.parametrize("use_holidays", (True, False))
@pytest.mark.parametrize("daily", (True, False))
@pytest.mark.parametrize("weekly", (True, False))
@pytest.mark.parametrize("yearly", (True, False))
def test_predict_components_sum_up_to_target(
prophet_dfs, regressors, use_holidays, daily, weekly, yearly, custom_seas, growth, cap
):
train, test, holidays = prophet_dfs

if not use_holidays:
holidays = None

model = _ProphetAdapter(
growth=growth,
holidays=holidays,
daily_seasonality=daily,
weekly_seasonality=weekly,
yearly_seasonality=yearly,
additional_seasonality_params=custom_seas,
)
model.fit(df=train, regressors=regressors + cap)

components = model.predict_components(df=test)
pred = model.predict(df=test, prediction_interval=False, quantiles=[])

np.testing.assert_allclose(np.sum(components, axis=1), pred["target"].values)