Skip to content

Implement forecast decomposition for Prophet #1172

Merged
merged 11 commits into from
Mar 17, 2023
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Method `forecast_components` for forecast decomposition in `_TBATSAdapter` ([#1125](https://github.com/tinkoff-ai/etna/issues/1125))
- Methods `forecast_components` and `predict_components` for forecast decomposition in `_CatBoostAdapter` ([#1135](https://github.com/tinkoff-ai/etna/issues/1135))
- Methods `forecast_components` and `predict_components` for forecast decomposition in `_HoltWintersAdapter ` ([#1146](https://github.com/tinkoff-ai/etna/issues/1146))
-
- Methods `predict_components` for forecast decomposition in `_ProphetAdapter` ([#1161](https://github.com/tinkoff-ai/etna/issues/1161))
-
### Changed
- Add optional `features` parameter in the signature of `TSDataset.to_pandas`, `TSDataset.to_flatten` ([#809](https://github.com/tinkoff-ai/etna/pull/809))
- Signature of the constructor of `TFTModel`, `DeepARModel` ([#1110](https://github.com/tinkoff-ai/etna/pull/1110))
Expand Down
101 changes: 93 additions & 8 deletions etna/models/prophet.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
from typing import List
from typing import Optional
from typing import Sequence
from typing import Set
from typing import Union

import bottleneck as bn
import pandas as pd

from etna import SETTINGS
Expand Down Expand Up @@ -106,10 +108,7 @@ def fit(self, df: pd.DataFrame, regressors: List[str]) -> "_ProphetAdapter":
List of the columns with regressors
"""
self.regressor_columns = regressors
prophet_df = pd.DataFrame()
prophet_df["y"] = df["target"]
prophet_df["ds"] = df["timestamp"]
prophet_df[self.regressor_columns] = df[self.regressor_columns]
prophet_df = self._prepare_prophet_df(df=df)
for regressor in self.regressor_columns:
if regressor not in self.predefined_regressors_names:
self.model.add_regressor(regressor)
Expand All @@ -135,10 +134,7 @@ def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Sequen
DataFrame with predictions
"""
df = df.reset_index()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why didn't you put this line inside the _prepare_prophet_df ? You can add this line in fit too

prophet_df = pd.DataFrame()
prophet_df["y"] = df["target"]
prophet_df["ds"] = df["timestamp"]
prophet_df[self.regressor_columns] = df[self.regressor_columns]
prophet_df = self._prepare_prophet_df(df=df)
forecast = self.model.predict(prophet_df)
y_pred = pd.DataFrame(forecast["yhat"])
if prediction_interval:
Expand All @@ -152,6 +148,89 @@ def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Sequen
y_pred = y_pred.rename(rename_dict, axis=1)
return y_pred

def _prepare_prophet_df(self, df: pd.DataFrame) -> pd.DataFrame:
"""Prepare dataframe for fit and predict."""
if self.regressor_columns is None:
raise ValueError("List of regressor is not set!")

prophet_df = pd.DataFrame()
prophet_df["y"] = df["target"]
prophet_df["ds"] = df["timestamp"]
prophet_df[self.regressor_columns] = df[self.regressor_columns]
return prophet_df

@staticmethod
def _filter_aggregated_components(components: Iterable[str]) -> Set[str]:
"""Filter out aggregated components."""
# aggregation of corresponding model terms, e.g. sum
aggregated_components = {
"additive_terms",
"multiplicative_terms",
"extra_regressors_additive",
"extra_regressors_multiplicative",
}

return set(components) - aggregated_components

def _check_mul_components(self):
"""Raise error if model contains multiplicative components."""
components_modes = self.model.component_modes
if components_modes is None:
raise ValueError("This model is not fitted!")

mul_components = self._filter_aggregated_components(self.model.component_modes["multiplicative"])
if len(mul_components) > 0:
raise ValueError("Forecast decomposition is only supported for additive components!")

def _predict_seasonal_components(self, df: pd.DataFrame) -> pd.DataFrame:
"""Estimate seasonal, holidays and exogenous components."""
model = self.model

seasonal_features, _, component_cols, _ = model.make_all_seasonality_features(df)

holiday_names = set(model.train_holiday_names) if model.train_holiday_names is not None else set()

components_data = {}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't we do it all at once(not in the cycle)?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can try to do here vecorization, but main reason of this solution is to persist consistency with similar method from prophet. I don't think we will gain much speed up vectorizing this cycle. Also, we can't throw out the cycle fully as we need to filter individual holidays and aggregated components from columns in component cols.

Copy link
Collaborator

@alex-hse-repository alex-hse-repository Mar 17, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

First of all, we can filter out the unnecessary components before the cycle, the second thing is that Prophet team does not plan to change the model, so we don't need to maintain code consistency if it can be vectorized

components_names = self._filter_aggregated_components(component_cols.columns)
for component_name in components_names:
if component_name in holiday_names:
continue

beta_c = model.params["beta"] * component_cols[component_name].values
comp = seasonal_features.values @ beta_c.T

# apply rescaling for additive components
comp *= model.y_scale

components_data[component_name] = bn.nanmean(comp, axis=1)

return pd.DataFrame(data=components_data)

def predict_components(self, df: pd.DataFrame) -> pd.DataFrame:
"""Estimate prediction components.

Parameters
----------
df:
features dataframe

Returns
-------
:
dataframe with prediction components
"""
self._check_mul_components()

df = df.reset_index()
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
prophet_df = self._prepare_prophet_df(df=df)

prophet_df = self.model.setup_dataframe(prophet_df)

components = self._predict_seasonal_components(df=prophet_df)
components["trend"] = self.model.predict_trend(df=prophet_df)

return components.add_prefix("target_component_")

def get_model(self) -> Prophet:
"""Get internal prophet.Prophet model that is used inside etna class.

Expand Down Expand Up @@ -200,6 +279,12 @@ class ProphetModel(
Original Prophet can use features 'cap' and 'floor',
they should be added to the known_future list on dataset initialization.

This model supports in-sample and out-of-sample forecast decomposition. The number
of components in the decomposition depends on model parameters. Main components are:
trend, seasonality, holiday and exogenous effects. Seasonal components will be decomposed
down to individual periods if fitted. Holiday and exogenous will be present in decomposition
if fitted.Corresponding components are obtained directly from the model.

Examples
--------
>>> from etna.datasets import generate_periodic_df
Expand Down
13 changes: 13 additions & 0 deletions tests/test_models/conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
import pytest

from etna.datasets import generate_ar_df
Expand All @@ -16,3 +17,15 @@ def new_format_exog():
exog = generate_ar_df(periods=60, start_time="2021-06-01", n_segments=2)
df = TSDataset.to_dataset(exog)
return df


@pytest.fixture()
def dfs_w_exog():
df = generate_ar_df(start_time="2021-01-01", periods=105, n_segments=1)
df["f1"] = np.sin(df["target"])
df["f2"] = np.cos(df["target"])

df.drop(columns=["segment"], inplace=True)
train = df.iloc[:-5]
test = df.iloc[-5:]
return train, test
12 changes: 0 additions & 12 deletions tests/test_models/test_catboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,18 +147,6 @@ def test_save_load(model, example_tsds):
assert_model_equals_loaded_original(model=model, ts=example_tsds, transforms=transforms, horizon=horizon)


@pytest.fixture()
def dfs_w_exog():
df = generate_ar_df(start_time="2021-01-01", periods=105, n_segments=1)
df["f1"] = np.sin(df["target"])
df["f2"] = np.cos(df["target"])

df.drop(columns=["segment"], inplace=True)
train = df.iloc[:-5]
test = df.iloc[-5:]
return train, test


def test_forecast_components_equal_predict_components(dfs_w_exog):
train, test = dfs_w_exog

Expand Down
151 changes: 151 additions & 0 deletions tests/test_models/test_prophet.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,3 +214,154 @@ def test_custom_seasonality(custom_seasonality):
model = ProphetModel(additional_seasonality_params=custom_seasonality)
for seasonality in custom_seasonality:
assert seasonality["name"] in model._base_model.model.seasonalities


@pytest.fixture
def prophet_dfs(dfs_w_exog):
df = pd.concat(dfs_w_exog, axis=0)
df["cap"] = 4.0

h1_mask = np.arange(len(df)) % 3 == 0
h2_mask = np.arange(len(df)) % 5 == 0

h1 = pd.DataFrame(
{
"holiday": "h1",
"ds": df["timestamp"][h1_mask],
"lower_window": 0,
"upper_window": 1,
}
)

h2 = pd.DataFrame(
{
"holiday": "h2",
"ds": df["timestamp"][h2_mask],
"lower_window": 0,
"upper_window": 1,
}
)
holidays = pd.concat([h1, h2]).reset_index(drop=True)

return df.iloc[-60:-20], df.iloc[-20:], holidays


def test_check_mul_components_not_fitted_error():
model = _ProphetAdapter()
with pytest.raises(ValueError, match="This model is not fitted!"):
model._check_mul_components()


def test_prepare_prophet_df_regressors_not_set_error(prophet_dfs):
_, test, _ = prophet_dfs
model = _ProphetAdapter()
with pytest.raises(ValueError, match="List of regressor is not set!"):
model._prepare_prophet_df(df=test)


@pytest.mark.parametrize(
"seasonality_mode,custom_seasonality",
(
("multiplicative", [{"name": "s1", "period": 14, "fourier_order": 1, "mode": "additive"}]),
("multiplicative", []),
("additive", [{"name": "s1", "period": 14, "fourier_order": 1, "mode": "multiplicative"}]),
),
)
def test_check_mul_components(prophet_dfs, seasonality_mode, custom_seasonality):
_, test, _ = prophet_dfs

model = _ProphetAdapter(seasonality_mode=seasonality_mode, additional_seasonality_params=custom_seasonality)
model.fit(df=test, regressors=["f1", "f2"])

with pytest.raises(ValueError, match="Forecast decomposition is only supported for additive components!"):
model.predict_components(df=test)


@pytest.mark.parametrize(
"regressors,regressors_comps", ((["f1", "f2", "cap"], ["target_component_f1", "target_component_f2"]), ([], []))
)
@pytest.mark.parametrize(
"custom_seas,custom_seas_comp",
(
([{"name": "s1", "period": 14, "fourier_order": 1}], ["target_component_s1"]),
([], []),
),
)
@pytest.mark.parametrize("use_holidays,holidays_comp", ((True, ["target_component_holidays"]), (False, [])))
@pytest.mark.parametrize("daily,daily_comp", ((True, ["target_component_daily"]), (False, [])))
@pytest.mark.parametrize("weekly,weekly_comp", ((True, ["target_component_weekly"]), (False, [])))
@pytest.mark.parametrize("yearly,yearly_comp", ((True, ["target_component_yearly"]), (False, [])))
def test_predict_components_names(
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
prophet_dfs,
regressors,
regressors_comps,
use_holidays,
holidays_comp,
daily,
daily_comp,
weekly,
weekly_comp,
yearly,
yearly_comp,
custom_seas,
custom_seas_comp,
):
_, test, holidays = prophet_dfs

if not use_holidays:
holidays = None

expected_columns = set(
regressors_comps
+ holidays_comp
+ daily_comp
+ weekly_comp
+ yearly_comp
+ custom_seas_comp
+ ["target_component_trend"]
)

model = _ProphetAdapter(
holidays=holidays,
daily_seasonality=daily,
weekly_seasonality=weekly,
yearly_seasonality=yearly,
additional_seasonality_params=custom_seas,
)
model.fit(df=test, regressors=regressors)

components = model.predict_components(df=test)

assert set(components.columns) == expected_columns


@pytest.mark.long_1
@pytest.mark.parametrize("growth,cap", (("linear", []), ("logistic", ["cap"])))
@pytest.mark.parametrize("regressors", (["f1", "f2"], []))
@pytest.mark.parametrize("custom_seas", ([{"name": "s1", "period": 14, "fourier_order": 1}], []))
@pytest.mark.parametrize("use_holidays", (True, False))
@pytest.mark.parametrize("daily", (True, False))
@pytest.mark.parametrize("weekly", (True, False))
@pytest.mark.parametrize("yearly", (True, False))
def test_predict_components_sum_up_to_target(
prophet_dfs, regressors, use_holidays, daily, weekly, yearly, custom_seas, growth, cap
):
train, test, holidays = prophet_dfs

if not use_holidays:
holidays = None

model = _ProphetAdapter(
growth=growth,
holidays=holidays,
daily_seasonality=daily,
weekly_seasonality=weekly,
yearly_seasonality=yearly,
additional_seasonality_params=custom_seas,
)
model.fit(df=train, regressors=regressors + cap)

components = model.predict_components(df=test)
pred = model.predict(df=test, prediction_interval=False, quantiles=[])

np.testing.assert_allclose(np.sum(components, axis=1), pred["target"].values)