From a7644438c2ee90dc7b8bc77fe831941a0438e2c9 Mon Sep 17 00:00:00 2001 From: Carlosbogo <84228424+Carlosbogo@users.noreply.github.com> Date: Sat, 27 Nov 2021 21:03:07 +0100 Subject: [PATCH] Closes #290 (#313) * Closes #290 Renames parameters for number of folds in different functions to keep consistency --- etna/ensembles/stacking_ensemble.py | 8 ++++---- etna/pipeline/pipeline.py | 10 +++++----- tests/test_ensembles/test_stacking_ensemble.py | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/etna/ensembles/stacking_ensemble.py b/etna/ensembles/stacking_ensemble.py index fa82286dd..e639e666c 100644 --- a/etna/ensembles/stacking_ensemble.py +++ b/etna/ensembles/stacking_ensemble.py @@ -62,7 +62,7 @@ def __init__( self, pipelines: List[Pipeline], final_model: RegressorMixin = LinearRegression(), - cv: int = 3, + n_folds: int = 3, features_to_use: Union[None, Literal["all"], List[str]] = None, n_jobs: int = 1, joblib_params: Dict[str, Any] = dict(verbose=11, backend="multiprocessing", mmap_mode="c"), @@ -75,7 +75,7 @@ def __init__( List of pipelines that should be used in ensemble. final_model: Regression model with fit/predict interface which will be used to combine the base estimators. - cv: + n_folds: Number of folds to use in the backtest. Backtest is not used for model evaluation but for prediction. features_to_use: Features except the forecasts of the base models to use in the `final_model`. @@ -93,7 +93,7 @@ def __init__( self.pipelines = pipelines self.horizon = self._get_horizon(pipelines=pipelines) self.final_model = final_model - self.cv = self._validate_cv(cv) + self.n_folds = self._validate_cv(n_folds) self.features_to_use = features_to_use self.filtered_features_for_final_model: Union[None, Set[str]] = None self.n_jobs = n_jobs @@ -149,7 +149,7 @@ def _fit_pipeline(pipeline: Pipeline, ts: TSDataset) -> Pipeline: def _backtest_pipeline(self, pipeline: Pipeline, ts: TSDataset) -> TSDataset: """Get forecasts from backtest for given pipeline.""" - _, forecasts, _ = pipeline.backtest(ts, metrics=[MAE()], n_folds=self.cv) + _, forecasts, _ = pipeline.backtest(ts, metrics=[MAE()], n_folds=self.n_folds) forecasts = TSDataset(df=forecasts, freq=ts.freq) return forecasts diff --git a/etna/pipeline/pipeline.py b/etna/pipeline/pipeline.py index f5634bd26..689d7526f 100644 --- a/etna/pipeline/pipeline.py +++ b/etna/pipeline/pipeline.py @@ -43,7 +43,7 @@ def __init__( transforms: Sequence[Transform] = (), horizon: int = 1, quantiles: Sequence[float] = (0.025, 0.975), - prediction_interval_cv: int = 3, + n_folds: int = 3, ): """ Create instance of Pipeline with given parameters. @@ -58,19 +58,19 @@ def __init__( Number of timestamps in the future for forecasting quantiles: Levels of prediction distribution. By default 2.5% and 97.5% taken to form a 95% prediction interval - prediction_interval_cv: + n_folds: Number of folds to use in the backtest for prediction interval estimation Raises ------ ValueError: - If the horizon is less than 1, quantile is out of (0,1) or prediction_interval_cv is less than 2. + If the horizon is less than 1, quantile is out of (0,1) or n_folds is less than 2. """ super().__init__(quantiles=quantiles) self.model = model self.transforms = transforms self.horizon = self._validate_horizon(horizon) - self.prediction_interval_cv = self._validate_cv(prediction_interval_cv) + self.n_folds = self._validate_cv(n_folds) self.ts: Optional[TSDataset] = None @staticmethod @@ -111,7 +111,7 @@ def fit(self, ts: TSDataset) -> "Pipeline": def _forecast_prediction_interval(self, future: TSDataset) -> TSDataset: """Forecast prediction interval for the future.""" - _, forecasts, _ = self.backtest(self.ts, metrics=[MAE()], n_folds=self.prediction_interval_cv) + _, forecasts, _ = self.backtest(self.ts, metrics=[MAE()], n_folds=self.n_folds) forecasts = TSDataset(df=forecasts, freq=self.ts.freq) residuals = ( forecasts.loc[:, pd.IndexSlice[:, "target"]] diff --git a/tests/test_ensembles/test_stacking_ensemble.py b/tests/test_ensembles/test_stacking_ensemble.py index e0e5ee01f..2c37d7ef1 100644 --- a/tests/test_ensembles/test_stacking_ensemble.py +++ b/tests/test_ensembles/test_stacking_ensemble.py @@ -38,7 +38,7 @@ def test_get_horizon_fail(catboost_pipeline: Pipeline, naive_pipeline: Pipeline) @pytest.mark.parametrize("input_cv,true_cv", ([(2, 2)])) def test_cv_pass(naive_pipeline_1: Pipeline, naive_pipeline_2: Pipeline, input_cv, true_cv): """Check that StackingEnsemble._validate_cv works correctly in case of valid cv parameter.""" - ensemble = StackingEnsemble(pipelines=[naive_pipeline_1, naive_pipeline_2], cv=input_cv) + ensemble = StackingEnsemble(pipelines=[naive_pipeline_1, naive_pipeline_2], n_folds=input_cv) assert ensemble.cv == true_cv @@ -46,7 +46,7 @@ def test_cv_pass(naive_pipeline_1: Pipeline, naive_pipeline_2: Pipeline, input_c def test_cv_fail_wrong_number(naive_pipeline_1: Pipeline, naive_pipeline_2: Pipeline, input_cv): """Check that StackingEnsemble._validate_cv works correctly in case of wrong number for cv parameter.""" with pytest.raises(ValueError, match="At least two folds for backtest are expected."): - _ = StackingEnsemble(pipelines=[naive_pipeline_1, naive_pipeline_2], cv=input_cv) + _ = StackingEnsemble(pipelines=[naive_pipeline_1, naive_pipeline_2], n_folds=input_cv) @pytest.mark.parametrize(