tinkoff-ai · Mr-Geekman · Mar 23, 2023 · Jan 19, 2023 · Jan 31, 2023 · Feb 8, 2023
diff --git a/.github/workflows/docs-on-pr.yml b/.github/workflows/docs-on-pr.yml
@@ -16,6 +16,7 @@ jobs:
        - name: Install Poetry
          uses: snok/install-poetry@v1
          with:
+           version: 1.4.0 # TODO: remove after poetry fix
            virtualenvs-create: true
            virtualenvs-in-project: true
        - name: Load cached venv

diff --git a/.github/workflows/docs-unstable.yml b/.github/workflows/docs-unstable.yml
@@ -17,6 +17,7 @@ jobs:
       - name: Install Poetry
         uses: snok/install-poetry@v1
         with:
+          version: 1.4.0 # TODO: remove after poetry fix
           virtualenvs-create: true
           virtualenvs-in-project: true
       - name: Load cached venv

diff --git a/.github/workflows/notebooks.yml b/.github/workflows/notebooks.yml
@@ -26,6 +26,7 @@ jobs:
       - name: Install Poetry
         uses: snok/install-poetry@v1
         with:
+          version: 1.4.0 # TODO: remove after poetry fix
           virtualenvs-create: true
           virtualenvs-in-project: true
       - name: Install dependencies

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -21,7 +21,7 @@ jobs:
 
       - name: Install Dependencies
         run: |
-          pip install poetry
+          pip install poetry==1.4.0 # TODO: remove after poetry fix
           poetry --version
           poetry config virtualenvs.in-project true
           poetry install -E style --no-root
@@ -48,6 +48,7 @@ jobs:
       - name: Install Poetry
         uses: snok/install-poetry@v1
         with:
+          version: 1.4.0 # TODO: remove after poetry fix
           virtualenvs-create: true
           virtualenvs-in-project: true
 
@@ -86,6 +87,7 @@ jobs:
       - name: Install Poetry
         uses: snok/install-poetry@v1
         with:
+          version: 1.4.0 # TODO: remove after poetry fix
           virtualenvs-create: true
           virtualenvs-in-project: true
 
@@ -123,6 +125,7 @@ jobs:
       - name: Install Poetry
         uses: snok/install-poetry@v1
         with:
+          version: 1.4.0 # TODO: remove after poetry fix
           virtualenvs-create: true
           virtualenvs-in-project: true
 
@@ -160,6 +163,7 @@ jobs:
       - name: Install Poetry
         uses: snok/install-poetry@v1
         with:
+          version: 1.4.0 # TODO: remove after poetry fix
           virtualenvs-create: true
           virtualenvs-in-project: true
 
@@ -199,6 +203,7 @@ jobs:
       - name: Install Poetry
         uses: snok/install-poetry@v1
         with:
+          version: 1.4.0 # TODO: remove after poetry fix
           virtualenvs-create: true
           virtualenvs-in-project: true
 

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,18 +7,32 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Unreleased
 ### Added
--
+- Add `refit` parameter into `backtest` ([#1159](https://github.com/tinkoff-ai/etna/pull/1159))
+- Add `stride` parameter into `backtest` ([#1165](https://github.com/tinkoff-ai/etna/pull/1165))
+- Add optional parameter `ts` into `forecast` method of pipelines ([#1071](https://github.com/tinkoff-ai/etna/pull/1071))
+- Add tests on `transform` method of transforms on subset of segments, on new segments, on future with gap ([#1094](https://github.com/tinkoff-ai/etna/pull/1094))
+- Add tests on `inverse_transform` method of transforms on subset of segments, on new segments, on future with gap ([#1127](https://github.com/tinkoff-ai/etna/pull/1127))
 - 
 - 
 - 
 - 
 ### Changed
--
+- Add more scenarios into tests for models ([#1082](https://github.com/tinkoff-ai/etna/pull/1082))
+- Decouple `SeasonalMovingAverageModel` from `PerSegmentModelMixin` ([#1132](https://github.com/tinkoff-ai/etna/pull/1132))
+- Decouple `DeadlineMovingAverageModel` from `PerSegmentModelMixin` ([#1140](https://github.com/tinkoff-ai/etna/pull/1140))
 - 
 - 
 - 
 ### Fixed
--
+- Fix inference tests on new segments for `DeepARModel` and `TFTModel` ([#1109](https://github.com/tinkoff-ai/etna/pull/1109))
+- Fix alignment during forecasting in new NNs, add validation of context size during forecasting in new NNs, add validation of batch in `MLPNet` ([#1108](https://github.com/tinkoff-ai/etna/pull/1108))
+- Fix `MeanSegmentEncoderTransform` to work with subset of segments and raise error on new segments ([#1104](https://github.com/tinkoff-ai/etna/pull/1104))
+- Fix outliers transforms on future with gap ([#1147](https://github.com/tinkoff-ai/etna/pull/1147))
+- Fix `SegmentEncoderTransform` to work with subset of segments and raise error on new segments ([#1103](https://github.com/tinkoff-ai/etna/pull/1103))
+- Fix `SklearnTransform` in per-segment mode to work on subset of segments and raise error on new segments ([#1107](https://github.com/tinkoff-ai/etna/pull/1107))
+- Fix `OutliersTransform` and its children to raise error on new segments ([#1139](https://github.com/tinkoff-ai/etna/pull/1139))
+- Fix `DifferencingTransform` to raise error on new segments during `transform` and `inverse_transform` in inplace mode ([#1141](https://github.com/tinkoff-ai/etna/pull/1141))
+- Teach `DifferencingTransform` to `inverse_transform` with NaNs ([#1155](https://github.com/tinkoff-ai/etna/pull/1155))
 - 
 - 
 - 

diff --git a/docs/source/tutorials.rst b/docs/source/tutorials.rst
@@ -17,3 +17,4 @@ Tutorials
    tutorials/NN_examples
    tutorials/classification
    tutorials/hierarchical_pipeline
+   tutorials/inference
diff --git a/etna/ensembles/direct_ensemble.py b/etna/ensembles/direct_ensemble.py
@@ -122,17 +122,14 @@ def _merge(self, forecasts: List[TSDataset]) -> TSDataset:
         forecast_dataset = TSDataset(df=forecast_df, freq=forecasts[0].freq)
         return forecast_dataset
 
-    def _forecast(self) -> TSDataset:
+    def _forecast(self, ts: TSDataset) -> TSDataset:
         """Make predictions.
 
         In each point in the future, forecast of the ensemble is forecast of base pipeline with the shortest horizon,
         which covers this point.
         """
-        if self.ts is None:
-            raise ValueError("Something went wrong, ts is None!")
-
         forecasts = Parallel(n_jobs=self.n_jobs, backend="multiprocessing", verbose=11)(
-            delayed(self._forecast_pipeline)(pipeline=pipeline) for pipeline in self.pipelines
+            delayed(self._forecast_pipeline)(pipeline=pipeline, ts=ts) for pipeline in self.pipelines
         )
         forecast = self._merge(forecasts=forecasts)
         return forecast

diff --git a/etna/ensembles/mixins.py b/etna/ensembles/mixins.py
@@ -41,10 +41,10 @@ def _fit_pipeline(pipeline: BasePipeline, ts: TSDataset) -> BasePipeline:
         return pipeline
 
     @staticmethod
-    def _forecast_pipeline(pipeline: BasePipeline) -> TSDataset:
+    def _forecast_pipeline(pipeline: BasePipeline, ts: TSDataset) -> TSDataset:
         """Make forecast with given pipeline."""
         tslogger.log(msg=f"Start forecasting with {pipeline}.")
-        forecast = pipeline.forecast()
+        forecast = pipeline.forecast(ts=ts)
         tslogger.log(msg=f"Forecast is done with {pipeline}.")
         return forecast
 

diff --git a/etna/ensembles/stacking_ensemble.py b/etna/ensembles/stacking_ensemble.py
@@ -8,7 +8,6 @@
 from typing import Set
 from typing import Tuple
 from typing import Union
-from typing import cast
 
 import numpy as np
 import pandas as pd
@@ -160,7 +159,7 @@ def fit(self, ts: TSDataset) -> "StackingEnsemble":
 
         # Fit the final model
         self.filtered_features_for_final_model = self._filter_features_to_use(forecasts)
-        x, y = self._make_features(forecasts=forecasts, train=True)
+        x, y = self._make_features(ts=self.ts, forecasts=forecasts, train=True)
         self.final_model.fit(x, y)
 
         # Fit the base models
@@ -170,12 +169,9 @@ def fit(self, ts: TSDataset) -> "StackingEnsemble":
         return self
 
     def _make_features(
-        self, forecasts: List[TSDataset], train: bool = False
+        self, ts: TSDataset, forecasts: List[TSDataset], train: bool = False
     ) -> Tuple[pd.DataFrame, Optional[pd.Series]]:
         """Prepare features for the ``final_model``."""
-        if self.ts is None:
-            raise ValueError("StackingEnsemble is not fitted! Fit the StackingEnsemble before calling forecast method.")
-
         # Stack targets from the forecasts
         targets = [
             forecast[:, :, "target"].rename({"target": f"regressor_target_{i}"}, axis=1)
@@ -201,51 +197,45 @@ def _make_features(
         features_df = pd.concat([features, targets], axis=1)
 
         # Flatten the features to fit the sklearn interface
-        x = pd.concat([features_df.loc[:, segment] for segment in self.ts.segments], axis=0)
+        x = pd.concat([features_df.loc[:, segment] for segment in ts.segments], axis=0)
         if train:
             y = pd.concat(
-                [
-                    self.ts[forecasts[0].index.min() : forecasts[0].index.max(), segment, "target"]
-                    for segment in self.ts.segments
-                ],
+                [ts[forecasts[0].index.min() : forecasts[0].index.max(), segment, "target"] for segment in ts.segments],
                 axis=0,
             )
             return x, y
         else:
             return x, None
 
-    def _process_forecasts(self, forecasts: List[TSDataset]) -> TSDataset:
-        x, _ = self._make_features(forecasts=forecasts, train=False)
-        self.ts = cast(TSDataset, self.ts)
+    def _process_forecasts(self, ts: TSDataset, forecasts: List[TSDataset]) -> TSDataset:
+        x, _ = self._make_features(ts=ts, forecasts=forecasts, train=False)
         y = self.final_model.predict(x)
         num_segments = len(forecasts[0].segments)
         y = y.reshape(num_segments, -1).T
         num_timestamps = y.shape[0]
 
         # Format the forecast into TSDataset
-        segment_col = [segment for segment in self.ts.segments for _ in range(num_timestamps)]
+        segment_col = [segment for segment in ts.segments for _ in range(num_timestamps)]
         x.loc[:, "segment"] = segment_col
         x.loc[:, "timestamp"] = x.index.values
         df_exog = TSDataset.to_dataset(x)
 
         df = forecasts[0][:, :, "target"].copy()
         df.loc[pd.IndexSlice[:], pd.IndexSlice[:, "target"]] = np.NAN
 
-        result = TSDataset(df=df, freq=self.ts.freq, df_exog=df_exog)
+        result = TSDataset(df=df, freq=ts.freq, df_exog=df_exog)
         result.loc[pd.IndexSlice[:], pd.IndexSlice[:, "target"]] = y
         return result
 
-    def _forecast(self) -> TSDataset:
+    def _forecast(self, ts: TSDataset) -> TSDataset:
         """Make predictions.
 
         Compute the combination of pipelines' forecasts using ``final_model``
         """
-        if self.ts is None:
-            raise ValueError("Something went wrong, ts is None!")
         forecasts = Parallel(n_jobs=self.n_jobs, **self.joblib_params)(
-            delayed(self._forecast_pipeline)(pipeline=pipeline) for pipeline in self.pipelines
+            delayed(self._forecast_pipeline)(pipeline=pipeline, ts=ts) for pipeline in self.pipelines
         )
-        forecast = self._process_forecasts(forecasts=forecasts)
+        forecast = self._process_forecasts(ts=ts, forecasts=forecasts)
         return forecast
 
     def _predict(
@@ -265,5 +255,5 @@ def _predict(
             )
             for pipeline in self.pipelines
         )
-        prediction = self._process_forecasts(forecasts=predictions)
+        prediction = self._process_forecasts(ts=ts, forecasts=predictions)
         return prediction
diff --git a/etna/ensembles/voting_ensemble.py b/etna/ensembles/voting_ensemble.py
@@ -5,7 +5,6 @@
 from typing import Optional
 from typing import Sequence
 from typing import Union
-from typing import cast
 
 import pandas as pd
 from joblib import Parallel
@@ -199,16 +198,13 @@ def _vote(self, forecasts: List[TSDataset]) -> TSDataset:
         forecast_dataset = TSDataset(df=forecast_df, freq=forecasts[0].freq)
         return forecast_dataset
 
-    def _forecast(self) -> TSDataset:
+    def _forecast(self, ts: TSDataset) -> TSDataset:
         """Make predictions.
 
         Compute weighted average of pipelines' forecasts
         """
-        if self.ts is None:
-            raise ValueError("Something went wrong, ts is None!")
-
         forecasts = Parallel(n_jobs=self.n_jobs, backend="multiprocessing", verbose=11)(
-            delayed(self._forecast_pipeline)(pipeline=pipeline) for pipeline in self.pipelines
+            delayed(self._forecast_pipeline)(pipeline=pipeline, ts=ts) for pipeline in self.pipelines
         )
         forecast = self._vote(forecasts=forecasts)
         return forecast
@@ -224,7 +220,6 @@ def _predict(
         if prediction_interval:
             raise NotImplementedError(f"Ensemble {self.__class__.__name__} doesn't support prediction intervals!")
 
-        self.ts = cast(TSDataset, self.ts)
         predictions = Parallel(n_jobs=self.n_jobs, backend="multiprocessing", verbose=11)(
             delayed(self._predict_pipeline)(
                 ts=ts, pipeline=pipeline, start_timestamp=start_timestamp, end_timestamp=end_timestamp

diff --git a/etna/models/base.py b/etna/models/base.py
@@ -622,16 +622,24 @@ def forecast(self, ts: "TSDataset", prediction_size: int) -> "TSDataset":
         :
             Dataset with predictions
         """
+        expected_length = prediction_size + self.encoder_length
+        if len(ts.index) < expected_length:
+            raise ValueError(
+                "Given context isn't big enough, try to decrease context_size, prediction_size or increase length of given dataset!"
+            )
+
         test_dataset = ts.to_torch_dataset(
             make_samples=functools.partial(
                 self.net.make_samples, encoder_length=self.encoder_length, decoder_length=prediction_size
             ),
             dropna=False,
         )
         predictions = self.raw_predict(test_dataset)
-        future_ts = ts.tsdataset_idx_slice(start_idx=self.encoder_length, end_idx=self.encoder_length + prediction_size)
+        end_idx = len(ts.index)
+        future_ts = ts.tsdataset_idx_slice(start_idx=end_idx - prediction_size, end_idx=end_idx)
         for (segment, feature_nm), value in predictions.items():
-            future_ts.df.loc[:, pd.IndexSlice[segment, feature_nm]] = value[:prediction_size, :]
+            # we don't want to change dtype after assignment, but there can happen cast to float32
+            future_ts.df.loc[:, pd.IndexSlice[segment, feature_nm]] = value[:prediction_size, :].astype(np.float64)
 
         future_ts.inverse_transform()