diff --git a/examples/backtest.ipynb b/examples/backtest.ipynb index 9ac6891dd..31a26d7d9 100644 --- a/examples/backtest.ipynb +++ b/examples/backtest.ipynb @@ -23,8 +23,6 @@ "\n", "* [What is backtest and how it works](#chapter1) \n", "* [How to run a validation](#chapter2)\n", - " * [`Pipeline`](#section_2_1)\n", - " * [`TimeSeriesCrossValidation`](#section_2_2)\n", "* [Validation visualisation](#chapter3)" ] }, @@ -295,7 +293,7 @@ "id": "0c548b69", "metadata": {}, "source": [ - "### 2.1 `Pipeline` " + "### `Pipeline` " ] }, { @@ -667,378 +665,6 @@ "fold_info_df.head()" ] }, - { - "cell_type": "markdown", - "id": "80f0a163", - "metadata": {}, - "source": [ - "### 2.2 `TimeSeriesCrossValidation` " - ] - }, - { - "cell_type": "markdown", - "id": "1fd2917a", - "metadata": {}, - "source": [ - "**Deprecation warning: this way is deprecated in `etna==1.2.0` and will be removed in `etna==1.4.0`**" - ] - }, - { - "cell_type": "markdown", - "id": "5aa767a1", - "metadata": {}, - "source": [ - "There is also an alternative way to run cross validation by `TimeSeriesCrossValidaton`." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "ffd3e2be", - "metadata": {}, - "outputs": [], - "source": [ - "pipeline = Pipeline(model=model, horizon=horizon, transforms=transforms)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "c2d308b5", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n", - "INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.\n", - "INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.\n", - "[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 1.3s remaining: 0.0s\n", - "INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.\n", - "INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.\n", - "[Parallel(n_jobs=1)]: Done 2 out of 2 | elapsed: 2.4s remaining: 0.0s\n", - "INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.\n", - "INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.\n", - "[Parallel(n_jobs=1)]: Done 3 out of 3 | elapsed: 3.6s remaining: 0.0s\n", - "INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.\n", - "INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.\n", - "[Parallel(n_jobs=1)]: Done 4 out of 4 | elapsed: 4.7s remaining: 0.0s\n", - "INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.\n", - "INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.\n", - "[Parallel(n_jobs=1)]: Done 5 out of 5 | elapsed: 5.8s remaining: 0.0s\n", - "[Parallel(n_jobs=1)]: Done 5 out of 5 | elapsed: 5.8s finished\n" - ] - } - ], - "source": [ - "metrics_df, forecast_df, fold_info_df = pipeline.backtest(ts=ts, metrics=[MAE(), MSE(), SMAPE()])" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "be1c757f", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-25T14:24:22.756309Z", - "start_time": "2021-06-25T14:24:22.745090Z" - }, - "scrolled": false - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
segmentMAEMSESMAPEfold_number
0segment_a21.027199673.2677704.8429420
0segment_a29.8901911297.3676356.2546101
0segment_a35.7222081972.4227377.3680132
0segment_a32.0220841873.4374236.4743913
0segment_a55.2814944903.59131510.0604564
\n", - "
" - ], - "text/plain": [ - " segment MAE MSE SMAPE fold_number\n", - "0 segment_a 21.027199 673.267770 4.842942 0\n", - "0 segment_a 29.890191 1297.367635 6.254610 1\n", - "0 segment_a 35.722208 1972.422737 7.368013 2\n", - "0 segment_a 32.022084 1873.437423 6.474391 3\n", - "0 segment_a 55.281494 4903.591315 10.060456 4" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "metrics_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "087746fc", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-25T14:24:22.813834Z", - "start_time": "2021-06-25T14:24:22.805147Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
segmentsegment_a
featuretargetfold_number
timestamp
2019-07-30506.6058940
2019-07-31495.4478190
2019-08-01486.1889550
2019-08-02473.6311880
2019-08-03390.1740940
\n", - "
" - ], - "text/plain": [ - "segment segment_a \n", - "feature target fold_number\n", - "timestamp \n", - "2019-07-30 506.605894 0\n", - "2019-07-31 495.447819 0\n", - "2019-08-01 486.188955 0\n", - "2019-08-02 473.631188 0\n", - "2019-08-03 390.174094 0" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "forecast_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "25006752", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-25T14:24:22.878475Z", - "start_time": "2021-06-25T14:24:22.868922Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
train_start_timetrain_end_timetest_start_timetest_end_timefold_number
02019-01-012019-07-292019-07-302019-08-290
02019-01-012019-08-292019-08-302019-09-291
02019-01-012019-09-292019-09-302019-10-302
02019-01-012019-10-302019-10-312019-11-303
02019-01-012019-11-302019-12-012019-12-314
\n", - "
" - ], - "text/plain": [ - " train_start_time train_end_time test_start_time test_end_time fold_number\n", - "0 2019-01-01 2019-07-29 2019-07-30 2019-08-29 0\n", - "0 2019-01-01 2019-08-29 2019-08-30 2019-09-29 1\n", - "0 2019-01-01 2019-09-29 2019-09-30 2019-10-30 2\n", - "0 2019-01-01 2019-10-30 2019-10-31 2019-11-30 3\n", - "0 2019-01-01 2019-11-30 2019-12-01 2019-12-31 4" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "fold_info_df.head()" - ] - }, { "cell_type": "markdown", "id": "44be66e7", @@ -1078,7 +704,11 @@ } ], "source": [ - "metrics_df, forecast_df, fold_info_df = pipeline.backtest(ts=ts, metrics=[MAE(), MSE(), SMAPE()], aggregate_metrics=True)" + "metrics_df, forecast_df, fold_info_df = pipeline.backtest(\n", + " ts=ts, \n", + " metrics=[MAE(), MSE(), SMAPE()], \n", + " aggregate_metrics=True\n", + ")" ] }, { diff --git a/tests/test_model_selection/__init__.py b/tests/test_model_selection/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/test_model_selection/test_backtest.py b/tests/test_model_selection/test_backtest.py deleted file mode 100644 index fd7dbaab9..000000000 --- a/tests/test_model_selection/test_backtest.py +++ /dev/null @@ -1,241 +0,0 @@ -from copy import deepcopy -from datetime import datetime -from typing import List - -import pandas as pd -import pytest - -from etna.datasets.tsdataset import TSDataset -from etna.metrics import MAE -from etna.metrics import MSE -from etna.metrics import SMAPE -from etna.metrics.base import MetricAggregationMode -from etna.models.catboost import CatBoostModelMultiSegment -from etna.models.prophet import ProphetModel -from etna.pipeline.pipeline import CrossValidationMode -from etna.pipeline.pipeline import Pipeline -from etna.transforms import DateFlagsTransform - -DEFAULT_METRICS = [MAE(mode=MetricAggregationMode.per_segment)] - - -@pytest.mark.parametrize("n_folds", (0, -1)) -def test_invalid_n_split(n_folds: int, example_reg_tsds): - """Check TimeSeriesCrossValidation behavior in case of invalid n_folds""" - with pytest.raises(ValueError): - pipe = Pipeline(model=ProphetModel(), horizon=12) - pipe.backtest(ts=example_reg_tsds, metrics=DEFAULT_METRICS, n_folds=n_folds) - - -def test_invalid_metrics(example_reg_tsds): - """Check TimeSeriesCrossValidation behavior in case of invalid metrics""" - with pytest.raises(ValueError): - pipe = Pipeline(model=CatBoostModelMultiSegment(), horizon=14) - pipe.backtest(ts=example_reg_tsds, metrics=[MAE(mode=MetricAggregationMode.macro)]) - - -def test_validate_features(imbalanced_tsdf: TSDataset): - """ - Check TimeSeriesCrossValidation behavior in case of small dataframe that - can't be divided to required number of splits - """ - date_flags = DateFlagsTransform(day_number_in_week=True, day_number_in_month=True) - pipe = Pipeline(model=CatBoostModelMultiSegment(), horizon=24, transforms=[date_flags]) - with pytest.raises(ValueError): - pipe.backtest(ts=imbalanced_tsdf, n_folds=3, metrics=DEFAULT_METRICS) - - -def test_generate_expandable_timeranges_days(): - """Test train-test timeranges generation in expand mode with daily freq""" - df = pd.DataFrame({"timestamp": pd.date_range("2021-01-01", "2021-04-01")}) - df["segment"] = "seg" - df["target"] = 1 - df = df.pivot(index="timestamp", columns="segment").reorder_levels([1, 0], axis=1).sort_index(axis=1) - df.columns.names = ["segment", "feature"] - df = TSDataset(df, freq="1D") - - true_borders = ( - (("2021-01-01", "2021-02-24"), ("2021-02-25", "2021-03-08")), - (("2021-01-01", "2021-03-08"), ("2021-03-09", "2021-03-20")), - (("2021-01-01", "2021-03-20"), ("2021-03-21", "2021-04-01")), - ) - for i, stage_dfs in enumerate( - Pipeline._generate_folds_datasets(df, horizon=12, n_folds=3, mode=CrossValidationMode.expand.value) - ): - for stage_df, borders in zip(stage_dfs, true_borders[i]): - assert stage_df.index.min() == datetime.strptime(borders[0], "%Y-%m-%d").date() - assert stage_df.index.max() == datetime.strptime(borders[1], "%Y-%m-%d").date() - - -def test_generate_expandable_timerange_hours(): - """Test train-test timeranges generation in expand mode with hour freq""" - df = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", "2020-02-01", freq="H")}) - df["segment"] = "seg" - df["target"] = 1 - df = df.pivot(index="timestamp", columns="segment").reorder_levels([1, 0], axis=1).sort_index(axis=1) - df.columns.names = ["segment", "feature"] - df = TSDataset(df, freq="1H") - - true_borders = ( - (("2020-01-01 00:00:00", "2020-01-30 12:00:00"), ("2020-01-30 13:00:00", "2020-01-31 00:00:00")), - (("2020-01-01 00:00:00", "2020-01-31 00:00:00"), ("2020-01-31 01:00:00", "2020-01-31 12:00:00")), - (("2020-01-01 00:00:00", "2020-01-31 12:00:00"), ("2020-01-31 13:00:00", "2020-02-01 00:00:00")), - ) - for i, stage_dfs in enumerate( - Pipeline._generate_folds_datasets(df, horizon=12, n_folds=3, mode=CrossValidationMode.expand.value) - ): - for stage_df, borders in zip(stage_dfs, true_borders[i]): - assert stage_df.index.min() == datetime.strptime(borders[0], "%Y-%m-%d %H:%M:%S").date() - assert stage_df.index.max() == datetime.strptime(borders[1], "%Y-%m-%d %H:%M:%S").date() - - -def test_generate_constant_timeranges_days(): - """Test train-test timeranges generation with constant mode with daily freq""" - df = pd.DataFrame({"timestamp": pd.date_range("2021-01-01", "2021-04-01")}) - df["segment"] = "seg" - df["target"] = 1 - df = df.pivot(index="timestamp", columns="segment").reorder_levels([1, 0], axis=1).sort_index(axis=1) - df.columns.names = ["segment", "feature"] - df = TSDataset(df, freq="1D") - - true_borders = ( - (("2021-01-01", "2021-02-24"), ("2021-02-25", "2021-03-08")), - (("2021-01-13", "2021-03-08"), ("2021-03-09", "2021-03-20")), - (("2021-01-25", "2021-03-20"), ("2021-03-21", "2021-04-01")), - ) - for i, stage_dfs in enumerate( - Pipeline._generate_folds_datasets(df, horizon=12, n_folds=3, mode=CrossValidationMode.constant.value) - ): - for stage_df, borders in zip(stage_dfs, true_borders[i]): - assert stage_df.index.min() == datetime.strptime(borders[0], "%Y-%m-%d").date() - assert stage_df.index.max() == datetime.strptime(borders[1], "%Y-%m-%d").date() - - -def test_generate_constant_timeranges_hours(): - """Test train-test timeranges generation with constant mode with hours freq""" - df = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", "2020-02-01", freq="H")}) - df["segment"] = "seg" - df["target"] = 1 - df = df.pivot(index="timestamp", columns="segment").reorder_levels([1, 0], axis=1).sort_index(axis=1) - df.columns.names = ["segment", "feature"] - df = TSDataset(df, freq="1H") - true_borders = ( - (("2020-01-01 00:00:00", "2020-01-30 12:00:00"), ("2020-01-30 13:00:00", "2020-01-31 00:00:00")), - (("2020-01-01 12:00:00", "2020-01-31 00:00:00"), ("2020-01-31 01:00:00", "2020-01-31 12:00:00")), - (("2020-01-02 00:00:00", "2020-01-31 12:00:00"), ("2020-01-31 13:00:00", "2020-02-01 00:00:00")), - ) - for i, stage_dfs in enumerate( - Pipeline._generate_folds_datasets(df, horizon=12, n_folds=3, mode=CrossValidationMode.constant.value) - ): - for stage_df, borders in zip(stage_dfs, true_borders[i]): - assert stage_df.index.min() == datetime.strptime(borders[0], "%Y-%m-%d %H:%M:%S").date() - assert stage_df.index.max() == datetime.strptime(borders[1], "%Y-%m-%d %H:%M:%S").date() - - -@pytest.mark.parametrize( - "aggregate_metrics,expected_columns", - ( - ( - False, - ["fold_number", "MAE", "MSE", "segment", "SMAPE"], - ), - ( - True, - ["MAE", "MSE", "segment", "SMAPE"], - ), - ), -) -def test_get_metrics_interface(aggregate_metrics: bool, expected_columns: List[str], big_daily_example_tsdf: TSDataset): - """Test interface of TimeSeriesCrossValidation.get_metrics with aggregate_metrics=False mode""" - date_flags = DateFlagsTransform(day_number_in_week=True, day_number_in_month=True) - pipe = Pipeline(model=CatBoostModelMultiSegment(), horizon=24, transforms=[date_flags]) - metrics_df, _, _ = pipe.backtest( - ts=big_daily_example_tsdf, - metrics=[MAE("per-segment"), MSE("per-segment"), SMAPE("per-segment")], - aggregate_metrics=aggregate_metrics, - ) - assert sorted(expected_columns) == sorted(metrics_df.columns) - - -def test_get_forecasts_interface_daily(big_daily_example_tsdf: TSDataset): - """Test interface of TimeSeriesCrossValidation.get_forecasts""" - date_flags = DateFlagsTransform( - day_number_in_week=True, day_number_in_month=True, is_weekend=False, out_column="regressor_dateflag" - ) - pipe = Pipeline(model=CatBoostModelMultiSegment(), horizon=24, transforms=[date_flags]) - _, forecast_df, _ = pipe.backtest( - ts=big_daily_example_tsdf, metrics=[MAE("per-segment"), MSE("per-segment"), SMAPE("per-segment")] - ) - expected_columns = sorted( - ["regressor_dateflag_day_number_in_month", "regressor_dateflag_day_number_in_week", "fold_number", "target"] - ) - print(forecast_df.head()) - assert expected_columns == sorted(set(forecast_df.columns.get_level_values("feature"))) - - -def test_get_forecasts_interface_hours(example_tsdf: TSDataset): - """Test interface of TimeSeriesCrossValidation.get_forecasts""" - date_flags = DateFlagsTransform( - day_number_in_week=True, day_number_in_month=True, is_weekend=False, out_column="regressor_dateflag" - ) - pipe = Pipeline(model=CatBoostModelMultiSegment(), horizon=24, transforms=[date_flags]) - _, forecast_df, _ = pipe.backtest( - ts=example_tsdf, metrics=[MAE("per-segment"), MSE("per-segment"), SMAPE("per-segment")] - ) - expected_columns = sorted( - ["regressor_dateflag_day_number_in_month", "regressor_dateflag_day_number_in_week", "fold_number", "target"] - ) - assert expected_columns == sorted(set(forecast_df.columns.get_level_values("feature"))) - - -def test_get_fold_info_interface_daily(big_daily_example_tsdf: TSDataset): - """Test interface of TimeSeriesCrossValidation.get_fold_info""" - date_flags = DateFlagsTransform(day_number_in_week=True, day_number_in_month=True) - pipe = Pipeline(model=CatBoostModelMultiSegment(), horizon=24, transforms=[date_flags]) - _, _, info_df = pipe.backtest( - ts=big_daily_example_tsdf, metrics=[MAE("per-segment"), MSE("per-segment"), SMAPE("per-segment")] - ) - expected_columns = ["fold_number", "test_end_time", "test_start_time", "train_end_time", "train_start_time"] - assert expected_columns == list(sorted(info_df.columns)) - - -def test_get_fold_info_interface_hours(example_tsdf: TSDataset): - """Test interface of TimeSeriesCrossValidation.get_fold_info""" - date_flags = DateFlagsTransform(day_number_in_week=True, day_number_in_month=True) - - pipe = Pipeline(model=CatBoostModelMultiSegment(), horizon=24, transforms=[date_flags]) - _, _, info_df = pipe.backtest( - ts=example_tsdf, metrics=[MAE("per-segment"), MSE("per-segment"), SMAPE("per-segment")] - ) - expected_columns = ["fold_number", "test_end_time", "test_start_time", "train_end_time", "train_start_time"] - assert expected_columns == list(sorted(info_df.columns)) - - -@pytest.mark.long -def test_autoregressiveforecaster_backtest_pipeline(big_daily_example_tsdf: TSDataset): - """This test checks that TimeSeriesCrossValidation works with AutoRegressiveForecaster""" - - pipe = Pipeline(model=ProphetModel(), horizon=12) - _, forecast_df, _ = pipe.backtest( - ts=big_daily_example_tsdf, metrics=[MAE("per-segment"), MSE("per-segment"), SMAPE("per-segment")] - ) - assert isinstance(forecast_df, pd.DataFrame) - - -@pytest.mark.long -def test_backtest_with_n_jobs(big_example_tsdf: TSDataset): - """Check that backtest pipeline gives equal results in case of one and multiple jobs.""" - df1 = TSDataset(deepcopy(big_example_tsdf.df), freq=big_example_tsdf.freq) - df2 = TSDataset(deepcopy(big_example_tsdf.df), freq=big_example_tsdf.freq) - date_flags_1 = DateFlagsTransform(day_number_in_week=True, day_number_in_month=True) - date_flags_2 = DateFlagsTransform(day_number_in_week=True, day_number_in_month=True) - - pipe_1 = Pipeline(model=CatBoostModelMultiSegment(), horizon=24, transforms=[date_flags_1]) - pipe_2 = Pipeline(model=CatBoostModelMultiSegment(), horizon=24, transforms=[date_flags_2]) - _, forecast_df_1, _ = pipe_1.backtest( - ts=df1, metrics=[MAE("per-segment"), MSE("per-segment"), SMAPE("per-segment")], n_jobs=1 - ) - _, forecast_df_2, _ = pipe_2.backtest( - ts=df2, metrics=[MAE("per-segment"), MSE("per-segment"), SMAPE("per-segment")], n_jobs=3 - ) - assert (forecast_df_1 == forecast_df_2).all().all()