-
Notifications
You must be signed in to change notification settings - Fork 80
Implement forecast decomposition for Prophet #1172
Changes from 9 commits
5c719d3
4d7e2d4
968710e
516bb99
bc829af
20cdee2
7d85f0c
f02ad46
9322421
8c16a09
eae3411
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,8 +5,10 @@ | |
from typing import List | ||
from typing import Optional | ||
from typing import Sequence | ||
from typing import Set | ||
from typing import Union | ||
|
||
import bottleneck as bn | ||
import pandas as pd | ||
|
||
from etna import SETTINGS | ||
|
@@ -106,10 +108,7 @@ def fit(self, df: pd.DataFrame, regressors: List[str]) -> "_ProphetAdapter": | |
List of the columns with regressors | ||
""" | ||
self.regressor_columns = regressors | ||
prophet_df = pd.DataFrame() | ||
prophet_df["y"] = df["target"] | ||
prophet_df["ds"] = df["timestamp"] | ||
prophet_df[self.regressor_columns] = df[self.regressor_columns] | ||
prophet_df = self._prepare_prophet_df(df=df) | ||
for regressor in self.regressor_columns: | ||
if regressor not in self.predefined_regressors_names: | ||
self.model.add_regressor(regressor) | ||
|
@@ -135,10 +134,7 @@ def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Sequen | |
DataFrame with predictions | ||
""" | ||
df = df.reset_index() | ||
prophet_df = pd.DataFrame() | ||
prophet_df["y"] = df["target"] | ||
prophet_df["ds"] = df["timestamp"] | ||
prophet_df[self.regressor_columns] = df[self.regressor_columns] | ||
prophet_df = self._prepare_prophet_df(df=df) | ||
forecast = self.model.predict(prophet_df) | ||
y_pred = pd.DataFrame(forecast["yhat"]) | ||
if prediction_interval: | ||
|
@@ -152,6 +148,89 @@ def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Sequen | |
y_pred = y_pred.rename(rename_dict, axis=1) | ||
return y_pred | ||
|
||
def _prepare_prophet_df(self, df: pd.DataFrame) -> pd.DataFrame: | ||
"""Prepare dataframe for fit and predict.""" | ||
if self.regressor_columns is None: | ||
raise ValueError("List of regressor is not set!") | ||
|
||
prophet_df = pd.DataFrame() | ||
prophet_df["y"] = df["target"] | ||
prophet_df["ds"] = df["timestamp"] | ||
prophet_df[self.regressor_columns] = df[self.regressor_columns] | ||
return prophet_df | ||
|
||
@staticmethod | ||
def _filter_aggregated_components(components: Iterable[str]) -> Set[str]: | ||
"""Filter out aggregated components.""" | ||
# aggregation of corresponding model terms, e.g. sum | ||
aggregated_components = { | ||
"additive_terms", | ||
"multiplicative_terms", | ||
"extra_regressors_additive", | ||
"extra_regressors_multiplicative", | ||
} | ||
|
||
return set(components) - aggregated_components | ||
|
||
def _check_mul_components(self): | ||
"""Raise error if model contains multiplicative components.""" | ||
components_modes = self.model.component_modes | ||
if components_modes is None: | ||
raise ValueError("This model is not fitted!") | ||
|
||
mul_components = self._filter_aggregated_components(self.model.component_modes["multiplicative"]) | ||
if len(mul_components) > 0: | ||
raise ValueError("Forecast decomposition is only supported for additive components!") | ||
|
||
def _predict_seasonal_components(self, df: pd.DataFrame) -> pd.DataFrame: | ||
"""Estimate seasonal, holidays and exogenous components.""" | ||
model = self.model | ||
|
||
seasonal_features, _, component_cols, _ = model.make_all_seasonality_features(df) | ||
|
||
holiday_names = set(model.train_holiday_names) if model.train_holiday_names is not None else set() | ||
|
||
components_data = {} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't we do it all at once(not in the cycle)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can try to do here vecorization, but main reason of this solution is to persist consistency with similar method from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. First of all, we can filter out the unnecessary components before the cycle, the second thing is that Prophet team does not plan to change the model, so we don't need to maintain code consistency if it can be vectorized |
||
components_names = self._filter_aggregated_components(component_cols.columns) | ||
for component_name in components_names: | ||
if component_name in holiday_names: | ||
continue | ||
|
||
beta_c = model.params["beta"] * component_cols[component_name].values | ||
comp = seasonal_features.values @ beta_c.T | ||
|
||
# apply rescaling for additive components | ||
comp *= model.y_scale | ||
|
||
components_data[component_name] = bn.nanmean(comp, axis=1) | ||
|
||
return pd.DataFrame(data=components_data) | ||
|
||
def predict_components(self, df: pd.DataFrame) -> pd.DataFrame: | ||
"""Estimate prediction components. | ||
|
||
Parameters | ||
---------- | ||
df: | ||
features dataframe | ||
|
||
Returns | ||
------- | ||
: | ||
dataframe with prediction components | ||
""" | ||
self._check_mul_components() | ||
|
||
df = df.reset_index() | ||
alex-hse-repository marked this conversation as resolved.
Show resolved
Hide resolved
|
||
prophet_df = self._prepare_prophet_df(df=df) | ||
|
||
prophet_df = self.model.setup_dataframe(prophet_df) | ||
|
||
components = self._predict_seasonal_components(df=prophet_df) | ||
components["trend"] = self.model.predict_trend(df=prophet_df) | ||
|
||
return components.add_prefix("target_component_") | ||
|
||
def get_model(self) -> Prophet: | ||
"""Get internal prophet.Prophet model that is used inside etna class. | ||
|
||
|
@@ -200,6 +279,12 @@ class ProphetModel( | |
Original Prophet can use features 'cap' and 'floor', | ||
they should be added to the known_future list on dataset initialization. | ||
|
||
This model supports in-sample and out-of-sample forecast decomposition. The number | ||
of components in the decomposition depends on model parameters. Main components are: | ||
trend, seasonality, holiday and exogenous effects. Seasonal components will be decomposed | ||
down to individual periods if fitted. Holiday and exogenous will be present in decomposition | ||
if fitted.Corresponding components are obtained directly from the model. | ||
|
||
Examples | ||
-------- | ||
>>> from etna.datasets import generate_periodic_df | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why didn't you put this line inside the
_prepare_prophet_df
? You can add this line in fit too