Skip to content

Commit

Permalink
Closes #290 (#313)
Browse files Browse the repository at this point in the history
* Closes #290
Renames parameters for number of folds
in different functions to keep consistency
  • Loading branch information
Carlosbogo authored Nov 27, 2021
1 parent c074d3b commit a764443
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 11 deletions.
8 changes: 4 additions & 4 deletions etna/ensembles/stacking_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def __init__(
self,
pipelines: List[Pipeline],
final_model: RegressorMixin = LinearRegression(),
cv: int = 3,
n_folds: int = 3,
features_to_use: Union[None, Literal["all"], List[str]] = None,
n_jobs: int = 1,
joblib_params: Dict[str, Any] = dict(verbose=11, backend="multiprocessing", mmap_mode="c"),
Expand All @@ -75,7 +75,7 @@ def __init__(
List of pipelines that should be used in ensemble.
final_model:
Regression model with fit/predict interface which will be used to combine the base estimators.
cv:
n_folds:
Number of folds to use in the backtest. Backtest is not used for model evaluation but for prediction.
features_to_use:
Features except the forecasts of the base models to use in the `final_model`.
Expand All @@ -93,7 +93,7 @@ def __init__(
self.pipelines = pipelines
self.horizon = self._get_horizon(pipelines=pipelines)
self.final_model = final_model
self.cv = self._validate_cv(cv)
self.n_folds = self._validate_cv(n_folds)
self.features_to_use = features_to_use
self.filtered_features_for_final_model: Union[None, Set[str]] = None
self.n_jobs = n_jobs
Expand Down Expand Up @@ -149,7 +149,7 @@ def _fit_pipeline(pipeline: Pipeline, ts: TSDataset) -> Pipeline:

def _backtest_pipeline(self, pipeline: Pipeline, ts: TSDataset) -> TSDataset:
"""Get forecasts from backtest for given pipeline."""
_, forecasts, _ = pipeline.backtest(ts, metrics=[MAE()], n_folds=self.cv)
_, forecasts, _ = pipeline.backtest(ts, metrics=[MAE()], n_folds=self.n_folds)
forecasts = TSDataset(df=forecasts, freq=ts.freq)
return forecasts

Expand Down
10 changes: 5 additions & 5 deletions etna/pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def __init__(
transforms: Sequence[Transform] = (),
horizon: int = 1,
quantiles: Sequence[float] = (0.025, 0.975),
prediction_interval_cv: int = 3,
n_folds: int = 3,
):
"""
Create instance of Pipeline with given parameters.
Expand All @@ -58,19 +58,19 @@ def __init__(
Number of timestamps in the future for forecasting
quantiles:
Levels of prediction distribution. By default 2.5% and 97.5% taken to form a 95% prediction interval
prediction_interval_cv:
n_folds:
Number of folds to use in the backtest for prediction interval estimation
Raises
------
ValueError:
If the horizon is less than 1, quantile is out of (0,1) or prediction_interval_cv is less than 2.
If the horizon is less than 1, quantile is out of (0,1) or n_folds is less than 2.
"""
super().__init__(quantiles=quantiles)
self.model = model
self.transforms = transforms
self.horizon = self._validate_horizon(horizon)
self.prediction_interval_cv = self._validate_cv(prediction_interval_cv)
self.n_folds = self._validate_cv(n_folds)
self.ts: Optional[TSDataset] = None

@staticmethod
Expand Down Expand Up @@ -111,7 +111,7 @@ def fit(self, ts: TSDataset) -> "Pipeline":

def _forecast_prediction_interval(self, future: TSDataset) -> TSDataset:
"""Forecast prediction interval for the future."""
_, forecasts, _ = self.backtest(self.ts, metrics=[MAE()], n_folds=self.prediction_interval_cv)
_, forecasts, _ = self.backtest(self.ts, metrics=[MAE()], n_folds=self.n_folds)
forecasts = TSDataset(df=forecasts, freq=self.ts.freq)
residuals = (
forecasts.loc[:, pd.IndexSlice[:, "target"]]
Expand Down
4 changes: 2 additions & 2 deletions tests/test_ensembles/test_stacking_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,15 @@ def test_get_horizon_fail(catboost_pipeline: Pipeline, naive_pipeline: Pipeline)
@pytest.mark.parametrize("input_cv,true_cv", ([(2, 2)]))
def test_cv_pass(naive_pipeline_1: Pipeline, naive_pipeline_2: Pipeline, input_cv, true_cv):
"""Check that StackingEnsemble._validate_cv works correctly in case of valid cv parameter."""
ensemble = StackingEnsemble(pipelines=[naive_pipeline_1, naive_pipeline_2], cv=input_cv)
ensemble = StackingEnsemble(pipelines=[naive_pipeline_1, naive_pipeline_2], n_folds=input_cv)
assert ensemble.cv == true_cv


@pytest.mark.parametrize("input_cv", ([1]))
def test_cv_fail_wrong_number(naive_pipeline_1: Pipeline, naive_pipeline_2: Pipeline, input_cv):
"""Check that StackingEnsemble._validate_cv works correctly in case of wrong number for cv parameter."""
with pytest.raises(ValueError, match="At least two folds for backtest are expected."):
_ = StackingEnsemble(pipelines=[naive_pipeline_1, naive_pipeline_2], cv=input_cv)
_ = StackingEnsemble(pipelines=[naive_pipeline_1, naive_pipeline_2], n_folds=input_cv)


@pytest.mark.parametrize(
Expand Down

0 comments on commit a764443

Please sign in to comment.