From 75011207c3924fd5c097d8a34432d568b6d95e74 Mon Sep 17 00:00:00 2001 From: martins0n Date: Fri, 8 Apr 2022 21:22:46 +0300 Subject: [PATCH 1/6] NEW: forecast_kwargs --- etna/pipeline/base.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/etna/pipeline/base.py b/etna/pipeline/base.py index bbc3889d2..24c7fd5db 100644 --- a/etna/pipeline/base.py +++ b/etna/pipeline/base.py @@ -161,6 +161,7 @@ def backtest( aggregate_metrics: bool = False, n_jobs: int = 1, joblib_params: Optional[Dict[str, Any]] = None, + forecast_params: Optional[Dict[str, Any]] = None, ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: """Run backtest with the pipeline. @@ -373,14 +374,14 @@ def _compute_metrics(metrics: List[Metric], y_true: TSDataset, y_pred: TSDataset return metrics_values def _run_fold( - self, train: TSDataset, test: TSDataset, fold_number: int, mask: FoldMask, metrics: List[Metric] + self, train: TSDataset, test: TSDataset, fold_number: int, mask: FoldMask, metrics: List[Metric], forecast_params: Dict[str, Any] ) -> Dict[str, Any]: """Run fit-forecast pipeline of model for one fold.""" tslogger.start_experiment(job_type="crossval", group=str(fold_number)) pipeline = deepcopy(self) pipeline.fit(ts=train) - forecast = pipeline.forecast() + forecast = pipeline.forecast(**forecast_params) fold: Dict[str, Any] = {} for stage_name, stage_df in zip(("train", "test"), (train, test)): fold[f"{stage_name}_timerange"] = {} @@ -471,6 +472,7 @@ def backtest( aggregate_metrics: bool = False, n_jobs: int = 1, joblib_params: Optional[Dict[str, Any]] = None, + forecast_params: Optional[Dict[str, Any]] = None, ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: """Run backtest with the pipeline. @@ -498,6 +500,9 @@ def backtest( """ if joblib_params is None: joblib_params = dict(verbose=11, backend="multiprocessing", mmap_mode="c") + + if forecast_params is None: + forecast_params = dict() self._init_backtest() self._validate_backtest_metrics(metrics=metrics) @@ -505,7 +510,7 @@ def backtest( folds = Parallel(n_jobs=n_jobs, **joblib_params)( delayed(self._run_fold)( - train=train, test=test, fold_number=fold_number, mask=masks[fold_number], metrics=metrics + train=train, test=test, fold_number=fold_number, mask=masks[fold_number], metrics=metrics, forecast_params=forecast_params ) for fold_number, (train, test) in enumerate( self._generate_folds_datasets(ts=ts, masks=masks, horizon=self.horizon) From 7aa693aa90e81985fe47dd8d1e0c1af28f2a6ee0 Mon Sep 17 00:00:00 2001 From: martins0n <33594071+martins0n@users.noreply.github.com> Date: Mon, 18 Apr 2022 11:26:32 +0300 Subject: [PATCH 2/6] FIX: linter --- etna/pipeline/base.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/etna/pipeline/base.py b/etna/pipeline/base.py index 24c7fd5db..bdc48de72 100644 --- a/etna/pipeline/base.py +++ b/etna/pipeline/base.py @@ -374,7 +374,13 @@ def _compute_metrics(metrics: List[Metric], y_true: TSDataset, y_pred: TSDataset return metrics_values def _run_fold( - self, train: TSDataset, test: TSDataset, fold_number: int, mask: FoldMask, metrics: List[Metric], forecast_params: Dict[str, Any] + self, + train: TSDataset, + test: TSDataset, + fold_number: int, + mask: FoldMask, + metrics: List[Metric], + forecast_params: Dict[str, Any], ) -> Dict[str, Any]: """Run fit-forecast pipeline of model for one fold.""" tslogger.start_experiment(job_type="crossval", group=str(fold_number)) @@ -500,7 +506,7 @@ def backtest( """ if joblib_params is None: joblib_params = dict(verbose=11, backend="multiprocessing", mmap_mode="c") - + if forecast_params is None: forecast_params = dict() @@ -510,7 +516,12 @@ def backtest( folds = Parallel(n_jobs=n_jobs, **joblib_params)( delayed(self._run_fold)( - train=train, test=test, fold_number=fold_number, mask=masks[fold_number], metrics=metrics, forecast_params=forecast_params + train=train, + test=test, + fold_number=fold_number, + mask=masks[fold_number], + metrics=metrics, + forecast_params=forecast_params, ) for fold_number, (train, test) in enumerate( self._generate_folds_datasets(ts=ts, masks=masks, horizon=self.horizon) From 27c283b72c8d3173a63eb7db6d7725849e27bd61 Mon Sep 17 00:00:00 2001 From: martins0n <33594071+martins0n@users.noreply.github.com> Date: Mon, 18 Apr 2022 11:49:35 +0300 Subject: [PATCH 3/6] FIX: test and changelog --- CHANGELOG.md | 2 +- etna/pipeline/base.py | 2 ++ tests/test_pipeline/test_pipeline.py | 15 +++++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e2e9bae57..b47763d52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add seasonal_plot ([#628](https://github.com/tinkoff-ai/etna/pull/628)) - - Add plot_periodogram ([#606](https://github.com/tinkoff-ai/etna/pull/606)) -- +- Add support of quantiles in backtest ([#652](https://github.com/tinkoff-ai/etna/pull/652)) - Fixed bug in SARIMAX model with `horizon`=1 ([#637](https://github.com/tinkoff-ai/etna/pull/637)) - Add prediction_actual_scatter_plot ([#610](https://github.com/tinkoff-ai/etna/pull/610)) - Add plot_holidays ([#624](https://github.com/tinkoff-ai/etna/pull/624)) diff --git a/etna/pipeline/base.py b/etna/pipeline/base.py index bdc48de72..7613c6cee 100644 --- a/etna/pipeline/base.py +++ b/etna/pipeline/base.py @@ -498,6 +498,8 @@ def backtest( Number of jobs to run in parallel joblib_params: Additional parameters for :py:class:`joblib.Parallel` + forecast_params: + Additional parameters for :py:method:`BasePipeline.forecast` Returns ------- diff --git a/tests/test_pipeline/test_pipeline.py b/tests/test_pipeline/test_pipeline.py index 98da11519..0fff0fd40 100644 --- a/tests/test_pipeline/test_pipeline.py +++ b/tests/test_pipeline/test_pipeline.py @@ -13,6 +13,7 @@ from etna.metrics import SMAPE from etna.metrics import Metric from etna.metrics import MetricAggregationMode +from etna.metrics import Width from etna.models import LinearPerSegmentModel from etna.models import MovingAverageModel from etna.models import NaiveModel @@ -487,3 +488,17 @@ def test_backtest_two_points(masked_ts: TSDataset, lag: int, expected: Dict[str, for segment in expected.keys(): assert segment in metrics.keys() np.testing.assert_array_almost_equal(expected[segment], metrics[segment]) + + +def test_sanity_backtest_naive_with_intervals(weekly_period_ts): + train_ts, _ = weekly_period_ts + quantiles = (0.01, 0.99) + pipeline = Pipeline(model=NaiveModel(), horizon=5) + _, forecast_df, _ = pipeline.backtest( + ts=train_ts, + metrics=[MAE(), Width(quantiles=quantiles)], + forecast_params={"quantiles": quantiles, "prediction_interval": True}, + ) + features = forecast_df.columns.get_level_values(1) + assert f"target_{quantiles[0]}" in features + assert f"target_{quantiles[1]}" in features From 0c197ee535e364da0244c8d09515185484cff39e Mon Sep 17 00:00:00 2001 From: martins0n <33594071+martins0n@users.noreply.github.com> Date: Mon, 18 Apr 2022 11:59:49 +0300 Subject: [PATCH 4/6] FIX: doc --- etna/pipeline/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etna/pipeline/base.py b/etna/pipeline/base.py index 7613c6cee..c338a23b5 100644 --- a/etna/pipeline/base.py +++ b/etna/pipeline/base.py @@ -499,7 +499,7 @@ def backtest( joblib_params: Additional parameters for :py:class:`joblib.Parallel` forecast_params: - Additional parameters for :py:method:`BasePipeline.forecast` + Additional parameters for :py:func:`~etna.pipeline.base.BasePipeline.forecast` Returns ------- From 596f4b5a149a38788127d18204976e4fa2b27caf Mon Sep 17 00:00:00 2001 From: martins0n <33594071+martins0n@users.noreply.github.com> Date: Mon, 18 Apr 2022 12:09:29 +0300 Subject: [PATCH 5/6] FIX: private method test case --- tests/test_pipeline/test_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_pipeline/test_pipeline.py b/tests/test_pipeline/test_pipeline.py index 0fff0fd40..79eb9f310 100644 --- a/tests/test_pipeline/test_pipeline.py +++ b/tests/test_pipeline/test_pipeline.py @@ -451,7 +451,7 @@ def test_run_fold(ts_run_fold: TSDataset, mask: FoldMask, expected: Dict[str, Li ) pipeline = Pipeline(model=NaiveModel(lag=5), transforms=[], horizon=4) - fold = pipeline._run_fold(train, test, 1, mask, [MAE()]) + fold = pipeline._run_fold(train, test, 1, mask, [MAE()], forecast_params=dict()) for seg in fold["metrics"]["MAE"].keys(): assert fold["metrics"]["MAE"][seg] == expected[seg] From f8bb8f3455e00ecbc67bbc922b3938ce6b4d77f3 Mon Sep 17 00:00:00 2001 From: Martin Gabdushev <33594071+martins0n@users.noreply.github.com> Date: Tue, 19 Apr 2022 17:14:49 +0300 Subject: [PATCH 6/6] Update base.py --- etna/pipeline/base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/etna/pipeline/base.py b/etna/pipeline/base.py index c338a23b5..207c0782a 100644 --- a/etna/pipeline/base.py +++ b/etna/pipeline/base.py @@ -181,6 +181,8 @@ def backtest( Number of jobs to run in parallel joblib_params: Additional parameters for :py:class:`joblib.Parallel` + forecast_params: + Additional parameters for :py:func:`~etna.pipeline.base.BasePipeline.forecast` Returns -------