From 5687efc7bdd0b3e51df45c272d5055f3eb8633c1 Mon Sep 17 00:00:00 2001
From: "d.a.bunin" <d.a.bunin@tinkoff.ru>
Date: Fri, 23 Sep 2022 13:28:08 +0300
Subject: [PATCH 1/4] Add predict to pipeline and tests for it

---
 etna/pipeline/base.py            | 129 ++++++++++++++++++++++++++
 tests/test_pipeline/test_base.py | 152 +++++++++++++++++++++++++++++++
 2 files changed, 281 insertions(+)
 create mode 100644 tests/test_pipeline/test_base.py

diff --git a/etna/pipeline/base.py b/etna/pipeline/base.py
index b37a23c68..a0db147f8 100644
--- a/etna/pipeline/base.py
+++ b/etna/pipeline/base.py
@@ -150,6 +150,48 @@ def forecast(
         """
         pass
 
+    @abstractmethod
+    def predict(
+        self,
+        start_timestamp: Optional[pd.Timestamp] = None,
+        end_timestamp: Optional[pd.Timestamp] = None,
+        prediction_interval: bool = False,
+        quantiles: Sequence[float] = (0.025, 0.975),
+    ) -> TSDataset:
+        """Make in-sample predictions in a given range.
+
+        Currently, in situation when segments start with different timestamps
+        we only guarantee to work with ``start_timestamp`` >= beginning of all segments.
+
+        Parameters
+        ----------
+        start_timestamp:
+            First timestamp of prediction range to return, should be >= than first timestamp in ``self.ts``;
+            expected that beginning of each segment <= ``start_timestamp``;
+            if isn't set the first timestamp where each segment began is taken.
+        end_timestamp:
+            Last timestamp of prediction range to return; if isn't set the last timestamp of ``self.ts`` is taken.
+            Expected that value is <= ``self.ts``.
+        prediction_interval:
+            If True returns prediction interval for forecast.
+        quantiles:
+            Levels of prediction distribution. By default 2.5% and 97.5% taken to form a 95% prediction interval.
+
+        Returns
+        -------
+        :
+            Dataset with predictions in ``[start_timestamp, end_timestamp]`` range.
+
+        Raises
+        ------
+        ValueError:
+            Value of ``end_timestamp`` is less than ``start_timestamp``.
+        ValueError:
+            Value of ``start_timestamp`` goes before point where each segment started.
+        ValueError:
+            Value of ``end_timestamp`` goes after the last timestamp.
+        """
+
     @abstractmethod
     def backtest(
         self,
@@ -277,6 +319,93 @@ def forecast(
             )
         return predictions
 
+    def _predict(
+        self,
+        start_timestamp: pd.Timestamp,
+        end_timestamp: pd.Timestamp,
+        prediction_interval: bool,
+        quantiles: Sequence[float],
+    ) -> TSDataset:
+        raise NotImplementedError()
+
+    def predict(
+        self,
+        start_timestamp: Optional[pd.Timestamp] = None,
+        end_timestamp: Optional[pd.Timestamp] = None,
+        prediction_interval: bool = False,
+        quantiles: Sequence[float] = (0.025, 0.975),
+    ) -> TSDataset:
+        """Make in-sample predictions in a given range.
+
+        Currently, in situation when segments start with different timestamps
+        we only guarantee to work with ``start_timestamp`` >= beginning of all segments.
+
+        Parameters
+        ----------
+        start_timestamp:
+            First timestamp of prediction range to return, should be >= than first timestamp in ``self.ts``;
+            expected that beginning of each segment <= ``start_timestamp``;
+            if isn't set the first timestamp where each segment began is taken.
+        end_timestamp:
+            Last timestamp of prediction range to return; if isn't set the last timestamp of ``self.ts`` is taken.
+            Expected that value is <= ``self.ts``.
+        prediction_interval:
+            If True returns prediction interval for forecast.
+        quantiles:
+            Levels of prediction distribution. By default 2.5% and 97.5% taken to form a 95% prediction interval.
+
+        Returns
+        -------
+        :
+            Dataset with predictions in ``[start_timestamp, end_timestamp]`` range.
+
+        Raises
+        ------
+        ValueError:
+            Pipeline wasn't fitted.
+        ValueError:
+            Value of ``end_timestamp`` is less than ``start_timestamp``.
+        ValueError:
+            Value of ``start_timestamp`` goes before point where each segment started.
+        ValueError:
+            Value of ``end_timestamp`` goes after the last timestamp.
+        """
+        # check presence dataset
+        if self.ts is None:
+            raise ValueError(
+                f"{self.__class__.__name__} is not fitted! Fit the {self.__class__.__name__} "
+                f"before calling predict method."
+            )
+
+        # check timestamps
+        min_timestamp = self.ts.describe()["start_timestamp"].max()
+        max_timestamp = self.ts.index[-1]
+
+        if start_timestamp is None:
+            start_timestamp = min_timestamp
+        if end_timestamp is None:
+            end_timestamp = max_timestamp
+
+        if start_timestamp < min_timestamp:
+            raise ValueError("Value of start_timestamp is less than beginning of some segments!")
+        if end_timestamp > max_timestamp:
+            raise ValueError("Value of end_timestamp is more than ending of dataset!")
+
+        if start_timestamp > end_timestamp:
+            raise ValueError("Value of end_timestamp is less than start_timestamp!")
+
+        # check quantiles
+        self._validate_quantiles(quantiles=quantiles)
+
+        # make prediction
+        prediction = self._predict(
+            start_timestamp=start_timestamp,
+            end_timestamp=end_timestamp,
+            prediction_interval=prediction_interval,
+            quantiles=quantiles,
+        )
+        return prediction
+
     def _init_backtest(self):
         self._folds: Optional[Dict[int, Any]] = None
         self._fold_column = "fold_number"
diff --git a/tests/test_pipeline/test_base.py b/tests/test_pipeline/test_base.py
new file mode 100644
index 000000000..0a6ceacf8
--- /dev/null
+++ b/tests/test_pipeline/test_base.py
@@ -0,0 +1,152 @@
+from typing import Sequence
+from unittest.mock import MagicMock
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from etna.datasets import TSDataset
+from etna.datasets import generate_ar_df
+from etna.pipeline.base import BasePipeline
+
+
+@pytest.fixture
+def ts_with_different_beginnings(example_tsds):
+    df = example_tsds.to_pandas()
+    df.iloc[:5, 0] = np.NaN
+    return TSDataset(df=df, freq="D")
+
+
+class DummyPipeline(BasePipeline):
+    def __init__(self, horizon: int):
+        super().__init__(horizon=horizon)
+
+    def fit(self, ts: TSDataset):
+        self.ts = ts
+        return self
+
+    def _forecast(self):
+        return None
+
+    def _predict(
+        self,
+        start_timestamp: pd.Timestamp,
+        end_timestamp: pd.Timestamp,
+        prediction_interval: bool,
+        quantiles: Sequence[float],
+    ) -> TSDataset:
+        return self.ts
+
+
+@pytest.mark.parametrize("quantiles", [(0.025,), (0.975,), (0.025, 0.975)])
+@pytest.mark.parametrize("prediction_interval", [False, True])
+@pytest.mark.parametrize(
+    "start_timestamp, end_timestamp",
+    [
+        (pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-05")),
+        (pd.Timestamp("2020-01-10"), pd.Timestamp("2020-01-15")),
+    ],
+)
+@pytest.mark.parametrize(
+    "ts", [TSDataset(df=TSDataset.to_dataset(generate_ar_df(start_time="2020-01-01", periods=5)), freq="D")]
+)
+def test_predict_pass_params(ts, start_timestamp, end_timestamp, prediction_interval, quantiles):
+    pipeline = DummyPipeline(horizon=5)
+    mock = MagicMock()
+    pipeline._predict = mock
+
+    pipeline.fit(ts)
+    _ = pipeline.predict(
+        start_timestamp=start_timestamp,
+        end_timestamp=end_timestamp,
+        prediction_interval=prediction_interval,
+        quantiles=quantiles,
+    )
+
+    mock.assert_called_once_with(
+        start_timestamp=start_timestamp,
+        end_timestamp=end_timestamp,
+        prediction_interval=prediction_interval,
+        quantiles=quantiles,
+    )
+
+
+def test_predict_fail_not_fitted():
+    pipeline = DummyPipeline(horizon=5)
+    with pytest.raises(ValueError, match="DummyPipeline is not fitted"):
+        _ = pipeline.predict()
+
+
+@pytest.mark.parametrize("ts_name", ["example_tsds", "ts_with_different_beginnings"])
+def test_predict_use_ts_timestamps(ts_name, request):
+    ts = request.getfixturevalue(ts_name)
+    pipeline = DummyPipeline(horizon=5)
+    mock = MagicMock()
+    pipeline._predict = mock
+
+    pipeline.fit(ts)
+    _ = pipeline.predict()
+
+    expected_start_timestamp = ts.describe()["start_timestamp"].max()
+    expected_end_timestamp = ts.index.max()
+
+    mock.assert_called_once_with(
+        start_timestamp=expected_start_timestamp,
+        end_timestamp=expected_end_timestamp,
+        prediction_interval=False,
+        quantiles=(0.025, 0.975),
+    )
+
+
+def test_predict_fail_early_start(example_tsds):
+    pipeline = DummyPipeline(horizon=5)
+    pipeline.fit(example_tsds)
+    start_timestamp = example_tsds.index[0] - pd.DateOffset(days=5)
+
+    with pytest.raises(ValueError, match="Value of start_timestamp is less than beginning of some segments"):
+        _ = pipeline.predict(start_timestamp=start_timestamp)
+
+
+def test_predict_fail_late_end(example_tsds):
+    pipeline = DummyPipeline(horizon=5)
+
+    pipeline.fit(example_tsds)
+    end_timestamp = example_tsds.index[-1] + pd.DateOffset(days=5)
+
+    with pytest.raises(ValueError, match="Value of end_timestamp is more than ending of dataset"):
+        _ = pipeline.predict(end_timestamp=end_timestamp)
+
+
+def test_predict_fail_start_later_than_end(example_tsds):
+    pipeline = DummyPipeline(horizon=5)
+
+    pipeline.fit(example_tsds)
+    start_timestamp = example_tsds.index[2]
+    end_timestamp = example_tsds.index[0]
+
+    with pytest.raises(ValueError, match="Value of end_timestamp is less than start_timestamp"):
+        _ = pipeline.predict(start_timestamp=start_timestamp, end_timestamp=end_timestamp)
+
+
+@pytest.mark.parametrize("quantiles", [(0.025,), (0.975,), (0.025, 0.975)])
+def test_predict_validate_quantiles(quantiles, example_tsds):
+    pipeline = DummyPipeline(horizon=5)
+    mock = MagicMock()
+    pipeline._validate_quantiles = mock
+
+    pipeline.fit(example_tsds)
+    _ = pipeline.predict(prediction_interval=True, quantiles=quantiles)
+
+    mock.assert_called_once_with(quantiles=quantiles)
+
+
+def test_predict_return_private_predict(example_tsds):
+    pipeline = DummyPipeline(horizon=5)
+    mock = MagicMock()
+    pipeline._predict = mock
+    expected_result = mock.return_value
+
+    pipeline.fit(example_tsds)
+    returned_result = pipeline.predict()
+
+    assert returned_result == expected_result

From 6ccf82a2d79d73dc84d44e493a2d143c1b547598 Mon Sep 17 00:00:00 2001
From: "d.a.bunin" <d.a.bunin@tinkoff.ru>
Date: Fri, 23 Sep 2022 13:29:47 +0300
Subject: [PATCH 2/4] Update changelog

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 054a7ce4d..37ebad246 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,7 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - `DeadlineMovingAverageModel` ([#827](https://github.com/tinkoff-ai/etna/pull/827))
 - `DirectEnsemble` ([#824](https://github.com/tinkoff-ai/etna/pull/824))
 - 
-- 
+- Add `predict` method to pipelines ([#954](https://github.com/tinkoff-ai/etna/pull/954))
 - Implement predict method in `SARIMAXModel`, `AutoARIMAModel`, `SeasonalMovingAverageModel`, `DeadlineMovingAverageModel` ([#948](https://github.com/tinkoff-ai/etna/pull/948))
 - Make `SeasonalMovingAverageModel` and `DeadlineMovingAverageModel` to work with context ([#917](https://github.com/tinkoff-ai/etna/pull/917))
 - 

From e249cb8739db740dc92366a7ae055ea5762dc2fa Mon Sep 17 00:00:00 2001
From: "d.a.bunin" <d.a.bunin@tinkoff.ru>
Date: Fri, 23 Sep 2022 14:33:09 +0300
Subject: [PATCH 3/4] Fix tests

---
 tests/test_pipeline/test_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_pipeline/test_base.py b/tests/test_pipeline/test_base.py
index 0a6ceacf8..ad906e7dd 100644
--- a/tests/test_pipeline/test_base.py
+++ b/tests/test_pipeline/test_base.py
@@ -48,7 +48,7 @@ def _predict(
     ],
 )
 @pytest.mark.parametrize(
-    "ts", [TSDataset(df=TSDataset.to_dataset(generate_ar_df(start_time="2020-01-01", periods=5)), freq="D")]
+    "ts", [TSDataset(df=TSDataset.to_dataset(generate_ar_df(start_time="2020-01-01", periods=50)), freq="D")]
 )
 def test_predict_pass_params(ts, start_timestamp, end_timestamp, prediction_interval, quantiles):
     pipeline = DummyPipeline(horizon=5)

From c8c772fc17dcce07258d40dd792a4e049085d6a3 Mon Sep 17 00:00:00 2001
From: "d.a.bunin" <d.a.bunin@tinkoff.ru>
Date: Mon, 26 Sep 2022 15:47:17 +0300
Subject: [PATCH 4/4] Remove redundant fixture

---
 tests/test_pipeline/test_base.py | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/tests/test_pipeline/test_base.py b/tests/test_pipeline/test_base.py
index ad906e7dd..078cfb68a 100644
--- a/tests/test_pipeline/test_base.py
+++ b/tests/test_pipeline/test_base.py
@@ -1,7 +1,6 @@
 from typing import Sequence
 from unittest.mock import MagicMock
 
-import numpy as np
 import pandas as pd
 import pytest
 
@@ -10,13 +9,6 @@
 from etna.pipeline.base import BasePipeline
 
 
-@pytest.fixture
-def ts_with_different_beginnings(example_tsds):
-    df = example_tsds.to_pandas()
-    df.iloc[:5, 0] = np.NaN
-    return TSDataset(df=df, freq="D")
-
-
 class DummyPipeline(BasePipeline):
     def __init__(self, horizon: int):
         super().__init__(horizon=horizon)
@@ -77,7 +69,7 @@ def test_predict_fail_not_fitted():
         _ = pipeline.predict()
 
 
-@pytest.mark.parametrize("ts_name", ["example_tsds", "ts_with_different_beginnings"])
+@pytest.mark.parametrize("ts_name", ["example_tsds", "ts_with_different_series_length"])
 def test_predict_use_ts_timestamps(ts_name, request):
     ts = request.getfixturevalue(ts_name)
     pipeline = DummyPipeline(horizon=5)