From cfa4cd3474beb07d7a399014373667c16dc04551 Mon Sep 17 00:00:00 2001
From: lschilders <lars.schilders@alliander.com>
Date: Fri, 27 Sep 2024 14:28:36 +0200
Subject: [PATCH 01/21] postprocess imputation df by putting back trailing
 nan's

Signed-off-by: lschilders <lars.schilders@alliander.com>
---
 .../feature_engineering/missing_values_transformer.py     | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/openstef/feature_engineering/missing_values_transformer.py b/openstef/feature_engineering/missing_values_transformer.py
index b2ced0895..5b7303d71 100644
--- a/openstef/feature_engineering/missing_values_transformer.py
+++ b/openstef/feature_engineering/missing_values_transformer.py
@@ -83,7 +83,13 @@ def transform(self, x) -> pd.DataFrame:
 
         x = x[self.non_null_feature_names]
 
-        return self.imputer_.transform(x)
+        transformed = self.imputer_.transform(x)
+
+        # Do not impute for trailing missing values
+        trailing_nans = x.bfill().isna().to_numpy()
+        transformed = transformed.where(~trailing_nans, np.nan)
+
+        return transformed
 
     def fit_transform(self, x, y=None):
         """Fit the imputer on the input data and transform it.

From ca5f858acd986ec1eceb20e106f4f7b004a79d6f Mon Sep 17 00:00:00 2001
From: lschilders <lars.schilders@alliander.com>
Date: Fri, 27 Sep 2024 14:29:13 +0200
Subject: [PATCH 02/21] add unit test test_no_imputation_for_future_data

Signed-off-by: lschilders <lars.schilders@alliander.com>
---
 .../test_missing_values_transformer.py        | 27 +++++++++++--------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/test/unit/feature_engineering/test_missing_values_transformer.py b/test/unit/feature_engineering/test_missing_values_transformer.py
index 6a50157e7..b4e7e6376 100644
--- a/test/unit/feature_engineering/test_missing_values_transformer.py
+++ b/test/unit/feature_engineering/test_missing_values_transformer.py
@@ -15,41 +15,46 @@
 class MissingValuesTransformerTests(BaseTestCase):
     def setUp(self):
         self.data = pd.DataFrame(
-            {"A": [1, np.nan, 3], "B": [4, 5, np.nan], "C": [np.nan, np.nan, np.nan]}
+            {"A": [np.nan, 2, 3], "B": [1, np.nan, 3], "C": [4, 5, np.nan], "D": [np.nan, np.nan, np.nan]}
         )
 
     def test_imputation_with_mean_strategy_fills_missing_values(self):
         transformer = MissingValuesTransformer(imputation_strategy="mean")
         transformed = transformer.fit_transform(self.data)
-        self.assertEqual(transformed.isnull().sum().sum(), 0)
-        self.assertAlmostEqual(transformed.loc[1, "A"], 2)
-        self.assertAlmostEqual(transformed.loc[2, "B"], 4.5)
+        self.assertEqual(transformed[["A", "B"]].isnull().sum().sum(), 0)
+        self.assertAlmostEqual(transformed.loc[0, "A"], 2.5)
+        self.assertAlmostEqual(transformed.loc[1, "B"], 2)
 
     def test_imputation_with_constant_strategy_fills_missing_values(self):
         transformer = MissingValuesTransformer(
             imputation_strategy="constant", fill_value=0
         )
         transformed = transformer.fit_transform(self.data)
-        self.assertEqual(transformed.isnull().sum().sum(), 0)
-        self.assertEqual(transformed.loc[1, "A"], 0)
-        self.assertEqual(transformed.loc[2, "B"], 0)
+        self.assertEqual(transformed[["A", "B"]].isnull().sum().sum(), 0)
+        self.assertEqual(transformed.loc[0, "A"], 0)
+        self.assertEqual(transformed.loc[1, "B"], 0)
 
     def test_columns_always_null_are_removed(self):
         transformer = MissingValuesTransformer()
         transformer.fit(self.data)
-        self.assertNotIn("C", transformer.non_null_feature_names)
+        self.assertNotIn("D", transformer.non_null_feature_names)
 
     def test_non_dataframe_input_is_converted_and_processed(self):
         transformer = MissingValuesTransformer(imputation_strategy="mean")
-        array = np.array([[1, np.nan], [np.nan, 2]])
+        array = np.array([[1, np.nan, np.nan], [np.nan, 2, np.nan]])
         transformed = transformer.fit_transform(array)
         self.assertIsInstance(transformed, pd.DataFrame)
-        self.assertEqual(transformed.isnull().sum().sum(), 0)
+        self.assertEqual(transformed.isnull().sum().sum(), 1)
 
     def test_fitting_transformer_without_strategy_keeps_data_unchanged(self):
         transformer = MissingValuesTransformer()
         transformed = transformer.fit_transform(self.data)
-        pd.testing.assert_frame_equal(transformed, self.data.drop(columns=["C"]))
+        pd.testing.assert_frame_equal(transformed, self.data.drop(columns=["D"]))
+
+    def test_no_imputation_for_future_data(self):
+        transformer = MissingValuesTransformer(imputation_strategy="mean")
+        transformed = transformer.fit_transform(self.data)
+        self.assertIsNAN(transformed.loc[2, "C"])
 
     def test_calling_transform_before_fit_raises_error(self):
         transformer = MissingValuesTransformer()

From 19f5104575a77ff0832e6101b4a09f15f43b42b9 Mon Sep 17 00:00:00 2001
From: lschilders <lars.schilders@alliander.com>
Date: Fri, 27 Sep 2024 14:53:08 +0200
Subject: [PATCH 03/21] fix unit test test_linear_quantile

Signed-off-by: lschilders <lars.schilders@alliander.com>
---
 test/unit/model/regressors/test_linear_quantile.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/unit/model/regressors/test_linear_quantile.py b/test/unit/model/regressors/test_linear_quantile.py
index b6ed316a1..6579770fa 100644
--- a/test/unit/model/regressors/test_linear_quantile.py
+++ b/test/unit/model/regressors/test_linear_quantile.py
@@ -56,7 +56,7 @@ def test_imputer(self):
         n_sample = train_input.shape[0]
         X = train_input.iloc[:, 1:].copy(deep=True)
         sp = np.ones(n_sample)
-        sp[-1] = np.nan
+        sp[-2] = np.nan
         X["Sparse"] = sp
         model1 = LinearQuantileOpenstfRegressor(imputation_strategy=None)
         model2 = LinearQuantileOpenstfRegressor(imputation_strategy="mean")

From 6d07d842fc02b7697523c37c0c94e4ea763c33e7 Mon Sep 17 00:00:00 2001
From: lschilders <lars.schilders@alliander.com>
Date: Fri, 27 Sep 2024 15:36:55 +0200
Subject: [PATCH 04/21] fix imports in flatliner.py

Signed-off-by: lschilders <lars.schilders@alliander.com>
---
 openstef/model/regressors/flatliner.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/openstef/model/regressors/flatliner.py b/openstef/model/regressors/flatliner.py
index 764773d52..995052bbf 100644
--- a/openstef/model/regressors/flatliner.py
+++ b/openstef/model/regressors/flatliner.py
@@ -2,18 +2,13 @@
 #
 # SPDX-License-Identifier: MPL-2.0
 import re
-from typing import Dict, Union, Set, Optional, List
+from typing import List
 
 import numpy as np
 import pandas as pd
 from sklearn.base import RegressorMixin
-from sklearn.linear_model import QuantileRegressor
-from sklearn.preprocessing import MinMaxScaler
 from sklearn.utils.validation import check_is_fitted
 
-from openstef.feature_engineering.missing_values_transformer import (
-    MissingValuesTransformer,
-)
 from openstef.model.regressors.regressor import OpenstfRegressor
 
 

From 8a96d251b184c00b866110c356d3b74349f62f98 Mon Sep 17 00:00:00 2001
From: lschilders <lars.schilders@alliander.com>
Date: Fri, 27 Sep 2024 16:52:41 +0200
Subject: [PATCH 05/21] remove columns with future nan values

Signed-off-by: lschilders <lars.schilders@alliander.com>
---
 .../missing_values_transformer.py              |  7 ++-----
 openstef/model/regressors/linear_quantile.py   |  2 +-
 .../test_missing_values_transformer.py         | 18 +++++++++---------
 3 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/openstef/feature_engineering/missing_values_transformer.py b/openstef/feature_engineering/missing_values_transformer.py
index 5b7303d71..bec517b7b 100644
--- a/openstef/feature_engineering/missing_values_transformer.py
+++ b/openstef/feature_engineering/missing_values_transformer.py
@@ -68,7 +68,8 @@ def fit(self, x, y=None):
 
         # Remove always null columns
         is_column_null = x.isnull().all(axis="index")
-        self.non_null_feature_names = list(x.columns[~is_column_null])
+        trailing_null_columns = x.bfill().isnull().any(axis="index")
+        self.non_null_feature_names = list(x.columns[~(is_column_null | trailing_null_columns)])
 
         # Imputers do not support labels
         self.imputer_.fit(X=x[self.non_null_feature_names], y=None)
@@ -85,10 +86,6 @@ def transform(self, x) -> pd.DataFrame:
 
         transformed = self.imputer_.transform(x)
 
-        # Do not impute for trailing missing values
-        trailing_nans = x.bfill().isna().to_numpy()
-        transformed = transformed.where(~trailing_nans, np.nan)
-
         return transformed
 
     def fit_transform(self, x, y=None):
diff --git a/openstef/model/regressors/linear_quantile.py b/openstef/model/regressors/linear_quantile.py
index 5e64fa4b3..b6d9c1e43 100644
--- a/openstef/model/regressors/linear_quantile.py
+++ b/openstef/model/regressors/linear_quantile.py
@@ -166,7 +166,7 @@ def fit(self, x: pd.DataFrame, y: pd.Series, **kwargs) -> RegressorMixin:
 
         # Fix nan columns
         x = self.imputer_.fit_transform(x)
-        if x.isna().any().any():
+        if (x.isna() & ~x.bfill().isna()).any().any():
             raise ValueError(
                 "There are nan values in the input data. Set "
                 "imputation_strategy to solve them."
diff --git a/test/unit/feature_engineering/test_missing_values_transformer.py b/test/unit/feature_engineering/test_missing_values_transformer.py
index b4e7e6376..4dc45e2e5 100644
--- a/test/unit/feature_engineering/test_missing_values_transformer.py
+++ b/test/unit/feature_engineering/test_missing_values_transformer.py
@@ -21,7 +21,7 @@ def setUp(self):
     def test_imputation_with_mean_strategy_fills_missing_values(self):
         transformer = MissingValuesTransformer(imputation_strategy="mean")
         transformed = transformer.fit_transform(self.data)
-        self.assertEqual(transformed[["A", "B"]].isnull().sum().sum(), 0)
+        self.assertEqual(transformed.isnull().sum().sum(), 0)
         self.assertAlmostEqual(transformed.loc[0, "A"], 2.5)
         self.assertAlmostEqual(transformed.loc[1, "B"], 2)
 
@@ -30,7 +30,7 @@ def test_imputation_with_constant_strategy_fills_missing_values(self):
             imputation_strategy="constant", fill_value=0
         )
         transformed = transformer.fit_transform(self.data)
-        self.assertEqual(transformed[["A", "B"]].isnull().sum().sum(), 0)
+        self.assertEqual(transformed.isnull().sum().sum(), 0)
         self.assertEqual(transformed.loc[0, "A"], 0)
         self.assertEqual(transformed.loc[1, "B"], 0)
 
@@ -39,22 +39,22 @@ def test_columns_always_null_are_removed(self):
         transformer.fit(self.data)
         self.assertNotIn("D", transformer.non_null_feature_names)
 
+    def test_columns_with_missing_values_at_end_are_removed(self):
+        transformer = MissingValuesTransformer()
+        transformer.fit(self.data)
+        self.assertNotIn("C", transformer.non_null_feature_names)
+
     def test_non_dataframe_input_is_converted_and_processed(self):
         transformer = MissingValuesTransformer(imputation_strategy="mean")
         array = np.array([[1, np.nan, np.nan], [np.nan, 2, np.nan]])
         transformed = transformer.fit_transform(array)
         self.assertIsInstance(transformed, pd.DataFrame)
-        self.assertEqual(transformed.isnull().sum().sum(), 1)
+        self.assertEqual(transformed.isnull().sum().sum(), 0)
 
     def test_fitting_transformer_without_strategy_keeps_data_unchanged(self):
         transformer = MissingValuesTransformer()
         transformed = transformer.fit_transform(self.data)
-        pd.testing.assert_frame_equal(transformed, self.data.drop(columns=["D"]))
-
-    def test_no_imputation_for_future_data(self):
-        transformer = MissingValuesTransformer(imputation_strategy="mean")
-        transformed = transformer.fit_transform(self.data)
-        self.assertIsNAN(transformed.loc[2, "C"])
+        pd.testing.assert_frame_equal(transformed, self.data.drop(columns=["C", "D"]))
 
     def test_calling_transform_before_fit_raises_error(self):
         transformer = MissingValuesTransformer()

From 0cba05961f8730058ea425c6be6b9ddb20c28f20 Mon Sep 17 00:00:00 2001
From: lschilders <lars.schilders@alliander.com>
Date: Wed, 2 Oct 2024 10:14:09 +0200
Subject: [PATCH 06/21] implement non_trailing_null_rows in
 missing_values_transformer and add unit tests

Signed-off-by: lschilders <lars.schilders@alliander.com>
---
 .../missing_values_transformer.py                   | 13 ++++++++++---
 .../test_missing_values_transformer.py              | 11 ++++++-----
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/openstef/feature_engineering/missing_values_transformer.py b/openstef/feature_engineering/missing_values_transformer.py
index bec517b7b..f323566d0 100644
--- a/openstef/feature_engineering/missing_values_transformer.py
+++ b/openstef/feature_engineering/missing_values_transformer.py
@@ -21,6 +21,7 @@ class MissingValuesTransformer:
     _n_in_features: Optional[int] = None
 
     non_null_feature_names: List[str] = None
+    non_trailing_null_rows: List[int] = None
 
     def __init__(
         self,
@@ -68,11 +69,16 @@ def fit(self, x, y=None):
 
         # Remove always null columns
         is_column_null = x.isnull().all(axis="index")
-        trailing_null_columns = x.bfill().isnull().any(axis="index")
-        self.non_null_feature_names = list(x.columns[~(is_column_null | trailing_null_columns)])
+        self.non_null_feature_names = list(x.columns[~is_column_null])
+        x = x[self.non_null_feature_names]
+
+        # Remove rows with trailing null values
+        row_has_trailing_null = x.bfill().isnull().any(axis="columns")
+        self.non_trailing_null_rows = list(x.index[~row_has_trailing_null])
+        x = x.loc[self.non_trailing_null_rows]
 
         # Imputers do not support labels
-        self.imputer_.fit(X=x[self.non_null_feature_names], y=None)
+        self.imputer_.fit(X=x, y=None)
         self.is_fitted_ = True
 
     def transform(self, x) -> pd.DataFrame:
@@ -83,6 +89,7 @@ def transform(self, x) -> pd.DataFrame:
             x = pd.DataFrame(np.asarray(x))
 
         x = x[self.non_null_feature_names]
+        x = x.loc[self.non_trailing_null_rows]
 
         transformed = self.imputer_.transform(x)
 
diff --git a/test/unit/feature_engineering/test_missing_values_transformer.py b/test/unit/feature_engineering/test_missing_values_transformer.py
index 4dc45e2e5..60504f50e 100644
--- a/test/unit/feature_engineering/test_missing_values_transformer.py
+++ b/test/unit/feature_engineering/test_missing_values_transformer.py
@@ -15,7 +15,7 @@
 class MissingValuesTransformerTests(BaseTestCase):
     def setUp(self):
         self.data = pd.DataFrame(
-            {"A": [np.nan, 2, 3], "B": [1, np.nan, 3], "C": [4, 5, np.nan], "D": [np.nan, np.nan, np.nan]}
+            {"A": [np.nan, 2, 3, 4], "B": [1, np.nan, 3, 4], "C": [3, 4, 5, np.nan], "D": [np.nan, np.nan, np.nan, np.nan]}
         )
 
     def test_imputation_with_mean_strategy_fills_missing_values(self):
@@ -39,10 +39,10 @@ def test_columns_always_null_are_removed(self):
         transformer.fit(self.data)
         self.assertNotIn("D", transformer.non_null_feature_names)
 
-    def test_columns_with_missing_values_at_end_are_removed(self):
+    def test_rows_with_missing_values_at_end_are_removed(self):
         transformer = MissingValuesTransformer()
         transformer.fit(self.data)
-        self.assertNotIn("C", transformer.non_null_feature_names)
+        self.assertEqual(transformer.non_trailing_null_rows, [0, 1, 2])
 
     def test_non_dataframe_input_is_converted_and_processed(self):
         transformer = MissingValuesTransformer(imputation_strategy="mean")
@@ -50,11 +50,12 @@ def test_non_dataframe_input_is_converted_and_processed(self):
         transformed = transformer.fit_transform(array)
         self.assertIsInstance(transformed, pd.DataFrame)
         self.assertEqual(transformed.isnull().sum().sum(), 0)
+        self.assertEqual(transformed.shape, (1, 1))
 
-    def test_fitting_transformer_without_strategy_keeps_data_unchanged(self):
+    def test_fitting_transformer_without_strategy_keeps_valid_data_unchanged(self):
         transformer = MissingValuesTransformer()
         transformed = transformer.fit_transform(self.data)
-        pd.testing.assert_frame_equal(transformed, self.data.drop(columns=["C", "D"]))
+        pd.testing.assert_frame_equal(transformed, self.data.drop(index=3, columns=["D"]))
 
     def test_calling_transform_before_fit_raises_error(self):
         transformer = MissingValuesTransformer()

From 521d77f02686c1462d9d165365e5b8369a4a7123 Mon Sep 17 00:00:00 2001
From: lschilders <lars.schilders@alliander.com>
Date: Wed, 2 Oct 2024 10:33:02 +0200
Subject: [PATCH 07/21] also transform labels y with trailing null rows

Signed-off-by: lschilders <lars.schilders@alliander.com>
---
 openstef/feature_engineering/missing_values_transformer.py | 6 +++++-
 openstef/model/regressors/linear_quantile.py               | 4 ++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/openstef/feature_engineering/missing_values_transformer.py b/openstef/feature_engineering/missing_values_transformer.py
index f323566d0..4320d305f 100644
--- a/openstef/feature_engineering/missing_values_transformer.py
+++ b/openstef/feature_engineering/missing_values_transformer.py
@@ -103,7 +103,11 @@ def fit_transform(self, x, y=None):
 
         """
         self.fit(x, y)
-        return self.transform(x)
+
+        if y is not None:
+            y = y.loc[self.non_trailing_null_rows]
+
+        return self.transform(x), y
 
     @classmethod
     def _identity(cls, x):
diff --git a/openstef/model/regressors/linear_quantile.py b/openstef/model/regressors/linear_quantile.py
index b6d9c1e43..b0c9af91a 100644
--- a/openstef/model/regressors/linear_quantile.py
+++ b/openstef/model/regressors/linear_quantile.py
@@ -165,8 +165,8 @@ def fit(self, x: pd.DataFrame, y: pd.Series, **kwargs) -> RegressorMixin:
         x = self._remove_ignored_features(x)
 
         # Fix nan columns
-        x = self.imputer_.fit_transform(x)
-        if (x.isna() & ~x.bfill().isna()).any().any():
+        x, y = self.imputer_.fit_transform(x, y)
+        if x.isna().any().any():
             raise ValueError(
                 "There are nan values in the input data. Set "
                 "imputation_strategy to solve them."

From 0956e653a0a27da79516e3fede71f0de54daf9a6 Mon Sep 17 00:00:00 2001
From: lschilders <lars.schilders@alliander.com>
Date: Wed, 2 Oct 2024 10:48:09 +0200
Subject: [PATCH 08/21] test in test_linear_quantile for trailing null

Signed-off-by: lschilders <lars.schilders@alliander.com>
---
 test/unit/model/regressors/test_linear_quantile.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/test/unit/model/regressors/test_linear_quantile.py b/test/unit/model/regressors/test_linear_quantile.py
index 6579770fa..93d37618a 100644
--- a/test/unit/model/regressors/test_linear_quantile.py
+++ b/test/unit/model/regressors/test_linear_quantile.py
@@ -55,9 +55,10 @@ def test_imputer(self):
         # Arrange
         n_sample = train_input.shape[0]
         X = train_input.iloc[:, 1:].copy(deep=True)
-        sp = np.ones(n_sample)
-        sp[-2] = np.nan
-        X["Sparse"] = sp
+        X["sparse"] = np.ones(n_sample)
+        X.loc[X.index[-2], "sparse"] = np.nan
+        X["sparse_2"] = np.ones(n_sample)
+        X.loc[X.index[-1], "sparse_2"] = np.nan
         model1 = LinearQuantileOpenstfRegressor(imputation_strategy=None)
         model2 = LinearQuantileOpenstfRegressor(imputation_strategy="mean")
 
@@ -75,6 +76,9 @@ def test_imputer(self):
         X_ = pd.DataFrame(model2.imputer_.transform(X), columns=X.columns)
         self.assertTrue((model2.predict(X_) == model2.predict(X)).all())
 
+        # check if last row is removed because of trailing null values
+        self.assertEqual(X_.shape[0], n_sample - 1)
+
     def test_value_error_raised(self):
         # Check if Value Error is raised when 0.5 is not in the requested quantiles list
         with self.assertRaises(ValueError):

From 7e5a77151c87db45537ace44e3f8fe12e133ab5a Mon Sep 17 00:00:00 2001
From: black <action@github.com>
Date: Wed, 2 Oct 2024 09:22:56 +0000
Subject: [PATCH 09/21] Format Python code with Black

Signed-off-by: black <action@github.com>
---
 .../test_missing_values_transformer.py                | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/test/unit/feature_engineering/test_missing_values_transformer.py b/test/unit/feature_engineering/test_missing_values_transformer.py
index 60504f50e..a72f6972d 100644
--- a/test/unit/feature_engineering/test_missing_values_transformer.py
+++ b/test/unit/feature_engineering/test_missing_values_transformer.py
@@ -15,7 +15,12 @@
 class MissingValuesTransformerTests(BaseTestCase):
     def setUp(self):
         self.data = pd.DataFrame(
-            {"A": [np.nan, 2, 3, 4], "B": [1, np.nan, 3, 4], "C": [3, 4, 5, np.nan], "D": [np.nan, np.nan, np.nan, np.nan]}
+            {
+                "A": [np.nan, 2, 3, 4],
+                "B": [1, np.nan, 3, 4],
+                "C": [3, 4, 5, np.nan],
+                "D": [np.nan, np.nan, np.nan, np.nan],
+            }
         )
 
     def test_imputation_with_mean_strategy_fills_missing_values(self):
@@ -55,7 +60,9 @@ def test_non_dataframe_input_is_converted_and_processed(self):
     def test_fitting_transformer_without_strategy_keeps_valid_data_unchanged(self):
         transformer = MissingValuesTransformer()
         transformed = transformer.fit_transform(self.data)
-        pd.testing.assert_frame_equal(transformed, self.data.drop(index=3, columns=["D"]))
+        pd.testing.assert_frame_equal(
+            transformed, self.data.drop(index=3, columns=["D"])
+        )
 
     def test_calling_transform_before_fit_raises_error(self):
         transformer = MissingValuesTransformer()

From 5a7537df3114741c0ebfafd4d8f13a8b8b35104f Mon Sep 17 00:00:00 2001
From: lschilders <lars.schilders@alliander.com>
Date: Wed, 2 Oct 2024 13:42:05 +0200
Subject: [PATCH 10/21] merge Black formatting in branch

Signed-off-by: lschilders <lars.schilders@alliander.com>
---
 .../missing_values_transformer.py                |  6 ++++--
 .../test_missing_values_transformer.py           | 16 +++++++++++-----
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/openstef/feature_engineering/missing_values_transformer.py b/openstef/feature_engineering/missing_values_transformer.py
index 4320d305f..f0e55498d 100644
--- a/openstef/feature_engineering/missing_values_transformer.py
+++ b/openstef/feature_engineering/missing_values_transformer.py
@@ -95,7 +95,7 @@ def transform(self, x) -> pd.DataFrame:
 
         return transformed
 
-    def fit_transform(self, x, y=None):
+    def fit_transform(self, x, y=None) -> tuple[pd.DataFrame, Optional[pd.Series]]:
         """Fit the imputer on the input data and transform it.
 
         Returns:
@@ -107,7 +107,9 @@ def fit_transform(self, x, y=None):
         if y is not None:
             y = y.loc[self.non_trailing_null_rows]
 
-        return self.transform(x), y
+        x = self.transform(x)
+        assert x.shape[0] == y.shape[0], "Number of rows in x and y should be equal."
+        return x, y
 
     @classmethod
     def _identity(cls, x):
diff --git a/test/unit/feature_engineering/test_missing_values_transformer.py b/test/unit/feature_engineering/test_missing_values_transformer.py
index a72f6972d..86f3c554b 100644
--- a/test/unit/feature_engineering/test_missing_values_transformer.py
+++ b/test/unit/feature_engineering/test_missing_values_transformer.py
@@ -3,7 +3,6 @@
 # SPDX-License-Identifier: MPL-2.0
 from test.unit.utils.base import BaseTestCase
 
-import unittest
 import pandas as pd
 import numpy as np
 from sklearn.exceptions import NotFittedError
@@ -25,7 +24,7 @@ def setUp(self):
 
     def test_imputation_with_mean_strategy_fills_missing_values(self):
         transformer = MissingValuesTransformer(imputation_strategy="mean")
-        transformed = transformer.fit_transform(self.data)
+        transformed, _ = transformer.fit_transform(self.data)
         self.assertEqual(transformed.isnull().sum().sum(), 0)
         self.assertAlmostEqual(transformed.loc[0, "A"], 2.5)
         self.assertAlmostEqual(transformed.loc[1, "B"], 2)
@@ -34,7 +33,7 @@ def test_imputation_with_constant_strategy_fills_missing_values(self):
         transformer = MissingValuesTransformer(
             imputation_strategy="constant", fill_value=0
         )
-        transformed = transformer.fit_transform(self.data)
+        transformed, _ = transformer.fit_transform(self.data)
         self.assertEqual(transformed.isnull().sum().sum(), 0)
         self.assertEqual(transformed.loc[0, "A"], 0)
         self.assertEqual(transformed.loc[1, "B"], 0)
@@ -49,17 +48,24 @@ def test_rows_with_missing_values_at_end_are_removed(self):
         transformer.fit(self.data)
         self.assertEqual(transformer.non_trailing_null_rows, [0, 1, 2])
 
+    def test_fitting_with_labels_removes_rows_with_trailing_nulls(self):
+        transformer = MissingValuesTransformer()
+        _, y_transformed = transformer.fit_transform(
+            self.data, y=pd.Series([1, 2, 3, 4])
+        )
+        self.assertEqual(y_transformed.tolist(), [1, 2, 3])
+
     def test_non_dataframe_input_is_converted_and_processed(self):
         transformer = MissingValuesTransformer(imputation_strategy="mean")
         array = np.array([[1, np.nan, np.nan], [np.nan, 2, np.nan]])
-        transformed = transformer.fit_transform(array)
+        transformed, _ = transformer.fit_transform(array)
         self.assertIsInstance(transformed, pd.DataFrame)
         self.assertEqual(transformed.isnull().sum().sum(), 0)
         self.assertEqual(transformed.shape, (1, 1))
 
     def test_fitting_transformer_without_strategy_keeps_valid_data_unchanged(self):
         transformer = MissingValuesTransformer()
-        transformed = transformer.fit_transform(self.data)
+        transformed, _ = transformer.fit_transform(self.data)
         pd.testing.assert_frame_equal(
             transformed, self.data.drop(index=3, columns=["D"])
         )

From 2db9afade4728c674e3dc31647fc87cc9b2b4cb2 Mon Sep 17 00:00:00 2001
From: lschilders <lars.schilders@alliander.com>
Date: Wed, 2 Oct 2024 14:22:54 +0200
Subject: [PATCH 11/21] remove assert in fit_transform
 missing_values_transformer

Signed-off-by: lschilders <lars.schilders@alliander.com>
---
 openstef/feature_engineering/missing_values_transformer.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/openstef/feature_engineering/missing_values_transformer.py b/openstef/feature_engineering/missing_values_transformer.py
index f0e55498d..15d23d681 100644
--- a/openstef/feature_engineering/missing_values_transformer.py
+++ b/openstef/feature_engineering/missing_values_transformer.py
@@ -108,7 +108,6 @@ def fit_transform(self, x, y=None) -> tuple[pd.DataFrame, Optional[pd.Series]]:
             y = y.loc[self.non_trailing_null_rows]
 
         x = self.transform(x)
-        assert x.shape[0] == y.shape[0], "Number of rows in x and y should be equal."
         return x, y
 
     @classmethod

From a04700e54fe2c2d81f909212ab0843dbf411c498 Mon Sep 17 00:00:00 2001
From: lschilders <lars.schilders@alliander.com>
Date: Wed, 2 Oct 2024 16:11:59 +0200
Subject: [PATCH 12/21] only train on subset of data in
 test_pipeline_train_model

Signed-off-by: lschilders <lars.schilders@alliander.com>
---
 test/unit/pipeline/test_pipeline_train_model.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/test/unit/pipeline/test_pipeline_train_model.py b/test/unit/pipeline/test_pipeline_train_model.py
index f4b5b7491..e68923c0f 100644
--- a/test/unit/pipeline/test_pipeline_train_model.py
+++ b/test/unit/pipeline/test_pipeline_train_model.py
@@ -125,8 +125,6 @@ def test_train_model_pipeline_core_happy_flow(self):
         but it can/should include predictors (e.g. weather data)
 
         """
-        # Select 50 data points to speedup test
-        train_input = self.train_input.iloc[:50, :]
         # Remove modeltypes which are optional, and add a dummy regressor
         for model_type in list(MLModelType) + [__name__ + ".DummyRegressor"]:
             with self.subTest(model_type=model_type):
@@ -136,7 +134,9 @@ def test_train_model_pipeline_core_happy_flow(self):
                     model_type.value if hasattr(model_type, "value") else model_type
                 )
                 model_specs = self.model_specs
-                train_input = self.train_input
+
+                # Select 150 data points to speedup test
+                train_input = self.train_input.iloc[:150, :]
 
                 # Use default parameters
                 model_specs.hyper_params = {}
@@ -155,7 +155,6 @@ def test_train_model_pipeline_core_happy_flow(self):
                         function=split_dummy_arima,
                         arguments={},
                     )
-                    train_input = self.train_input[:150]
 
                 model, report, modelspecs, _ = train_model_pipeline_core(
                     pj=pj, model_specs=model_specs, input_data=train_input

From f73541e908d1075a0d1db4373f5089af03e70d0d Mon Sep 17 00:00:00 2001
From: lschilders <lars.schilders@alliander.com>
Date: Wed, 2 Oct 2024 16:13:41 +0200
Subject: [PATCH 13/21] adapt transform(x) to not remove non_trailing_nulls and
 index with DataFrame rather than list of index

Signed-off-by: lschilders <lars.schilders@alliander.com>
---
 openstef/feature_engineering/missing_values_transformer.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/openstef/feature_engineering/missing_values_transformer.py b/openstef/feature_engineering/missing_values_transformer.py
index 15d23d681..6dccdd3cb 100644
--- a/openstef/feature_engineering/missing_values_transformer.py
+++ b/openstef/feature_engineering/missing_values_transformer.py
@@ -21,7 +21,7 @@ class MissingValuesTransformer:
     _n_in_features: Optional[int] = None
 
     non_null_feature_names: List[str] = None
-    non_trailing_null_rows: List[int] = None
+    non_trailing_null_rows: pd.Series = None
 
     def __init__(
         self,
@@ -73,8 +73,7 @@ def fit(self, x, y=None):
         x = x[self.non_null_feature_names]
 
         # Remove rows with trailing null values
-        row_has_trailing_null = x.bfill().isnull().any(axis="columns")
-        self.non_trailing_null_rows = list(x.index[~row_has_trailing_null])
+        self.non_trailing_null_rows = ~x.bfill().isnull().any(axis="columns")
         x = x.loc[self.non_trailing_null_rows]
 
         # Imputers do not support labels
@@ -89,7 +88,6 @@ def transform(self, x) -> pd.DataFrame:
             x = pd.DataFrame(np.asarray(x))
 
         x = x[self.non_null_feature_names]
-        x = x.loc[self.non_trailing_null_rows]
 
         transformed = self.imputer_.transform(x)
 

From ee53800b08fbc0c0e5606ed0a6bee4f6697a0c99 Mon Sep 17 00:00:00 2001
From: lschilders <lars.schilders@alliander.com>
Date: Wed, 2 Oct 2024 16:14:10 +0200
Subject: [PATCH 14/21] test for duplicate indices

Signed-off-by: lschilders <lars.schilders@alliander.com>
---
 .../test_missing_values_transformer.py        | 22 +++++++++++--------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/test/unit/feature_engineering/test_missing_values_transformer.py b/test/unit/feature_engineering/test_missing_values_transformer.py
index 86f3c554b..7947116f0 100644
--- a/test/unit/feature_engineering/test_missing_values_transformer.py
+++ b/test/unit/feature_engineering/test_missing_values_transformer.py
@@ -19,15 +19,16 @@ def setUp(self):
                 "B": [1, np.nan, 3, 4],
                 "C": [3, 4, 5, np.nan],
                 "D": [np.nan, np.nan, np.nan, np.nan],
-            }
+            },
+            index=[0, 1, 1, 2],
         )
 
     def test_imputation_with_mean_strategy_fills_missing_values(self):
         transformer = MissingValuesTransformer(imputation_strategy="mean")
         transformed, _ = transformer.fit_transform(self.data)
         self.assertEqual(transformed.isnull().sum().sum(), 0)
-        self.assertAlmostEqual(transformed.loc[0, "A"], 2.5)
-        self.assertAlmostEqual(transformed.loc[1, "B"], 2)
+        self.assertAlmostEqual(transformed.iloc[0]["A"], 2.5)
+        self.assertAlmostEqual(transformed.iloc[1]["B"], 2)
 
     def test_imputation_with_constant_strategy_fills_missing_values(self):
         transformer = MissingValuesTransformer(
@@ -35,23 +36,26 @@ def test_imputation_with_constant_strategy_fills_missing_values(self):
         )
         transformed, _ = transformer.fit_transform(self.data)
         self.assertEqual(transformed.isnull().sum().sum(), 0)
-        self.assertEqual(transformed.loc[0, "A"], 0)
-        self.assertEqual(transformed.loc[1, "B"], 0)
+        self.assertEqual(transformed.iloc[0]["A"], 0)
+        self.assertEqual(transformed.iloc[1]["B"], 0)
 
     def test_columns_always_null_are_removed(self):
         transformer = MissingValuesTransformer()
         transformer.fit(self.data)
         self.assertNotIn("D", transformer.non_null_feature_names)
 
-    def test_rows_with_missing_values_at_end_are_removed(self):
+    def test_determining_non_trailing_null_rows(self):
         transformer = MissingValuesTransformer()
         transformer.fit(self.data)
-        self.assertEqual(transformer.non_trailing_null_rows, [0, 1, 2])
+        pd.testing.assert_series_equal(
+            transformer.non_trailing_null_rows,
+            pd.Series([True, True, True, False], index=[0, 1, 1, 2]),
+        )
 
     def test_fitting_with_labels_removes_rows_with_trailing_nulls(self):
         transformer = MissingValuesTransformer()
         _, y_transformed = transformer.fit_transform(
-            self.data, y=pd.Series([1, 2, 3, 4])
+            self.data, y=pd.Series([1, 2, 3, 4], index=self.data.index)
         )
         self.assertEqual(y_transformed.tolist(), [1, 2, 3])
 
@@ -67,7 +71,7 @@ def test_fitting_transformer_without_strategy_keeps_valid_data_unchanged(self):
         transformer = MissingValuesTransformer()
         transformed, _ = transformer.fit_transform(self.data)
         pd.testing.assert_frame_equal(
-            transformed, self.data.drop(index=3, columns=["D"])
+            transformed, self.data.drop(index=2, columns=["D"])
         )
 
     def test_calling_transform_before_fit_raises_error(self):

From 52017ee06f4838a2984e183fbbed919494ef36f4 Mon Sep 17 00:00:00 2001
From: lschilders <lars.schilders@alliander.com>
Date: Wed, 2 Oct 2024 16:52:05 +0200
Subject: [PATCH 15/21] remove non trailing null rows in fit_transform and
 check in test_imputer of test_linear_quantile

Signed-off-by: lschilders <lars.schilders@alliander.com>
---
 openstef/feature_engineering/missing_values_transformer.py | 6 ++++++
 test/unit/model/regressors/test_linear_quantile.py         | 3 ++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/openstef/feature_engineering/missing_values_transformer.py b/openstef/feature_engineering/missing_values_transformer.py
index 6dccdd3cb..509dfe9bc 100644
--- a/openstef/feature_engineering/missing_values_transformer.py
+++ b/openstef/feature_engineering/missing_values_transformer.py
@@ -58,6 +58,11 @@ def __init__(
             ).set_output(transform="pandas")
             self.imputer_._validate_params()
 
+    def remove_trailing_null_rows(self, df: pd.DataFrame) -> pd.DataFrame:
+        """Remove rows with trailing null values in a DataFrame."""
+        self.non_trailing_null_rows = ~df.bfill().isnull().any(axis="columns")
+        return df.loc[self.non_trailing_null_rows]
+
     def fit(self, x, y=None):
         """Fit the imputer on the input data."""
         _ = check_array(x, force_all_finite="allow-nan")
@@ -106,6 +111,7 @@ def fit_transform(self, x, y=None) -> tuple[pd.DataFrame, Optional[pd.Series]]:
             y = y.loc[self.non_trailing_null_rows]
 
         x = self.transform(x)
+        x = x.loc[self.non_trailing_null_rows]
         return x, y
 
     @classmethod
diff --git a/test/unit/model/regressors/test_linear_quantile.py b/test/unit/model/regressors/test_linear_quantile.py
index 93d37618a..36d21b683 100644
--- a/test/unit/model/regressors/test_linear_quantile.py
+++ b/test/unit/model/regressors/test_linear_quantile.py
@@ -77,7 +77,8 @@ def test_imputer(self):
         self.assertTrue((model2.predict(X_) == model2.predict(X)).all())
 
         # check if last row is removed because of trailing null values
-        self.assertEqual(X_.shape[0], n_sample - 1)
+        X_transformed, _ = model2.imputer_.fit_transform(X)
+        self.assertEqual(X_transformed.shape[0], n_sample - 1)
 
     def test_value_error_raised(self):
         # Check if Value Error is raised when 0.5 is not in the requested quantiles list

From 24590d75de1888220095d49ee41131be41c681f3 Mon Sep 17 00:00:00 2001
From: lschilders <lars.schilders@alliander.com>
Date: Mon, 7 Oct 2024 09:16:39 +0200
Subject: [PATCH 16/21] add test for calling transform separately

Signed-off-by: lschilders <lars.schilders@alliander.com>
---
 .../test_missing_values_transformer.py          | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/test/unit/feature_engineering/test_missing_values_transformer.py b/test/unit/feature_engineering/test_missing_values_transformer.py
index 7947116f0..214f3d71e 100644
--- a/test/unit/feature_engineering/test_missing_values_transformer.py
+++ b/test/unit/feature_engineering/test_missing_values_transformer.py
@@ -74,6 +74,23 @@ def test_fitting_transformer_without_strategy_keeps_valid_data_unchanged(self):
             transformed, self.data.drop(index=2, columns=["D"])
         )
 
+    def test_call_transform_on_fitted_transformer_does_not_remove_trailing_null_rows(
+        self,
+    ):
+        transformer = MissingValuesTransformer()
+        transformer.fit(self.data)
+        new_data = pd.DataFrame(
+            {
+                "A": [1, 2, 3, 4],
+                "B": [1, 2, 3, 4],
+                "C": [1, 2, 3, 4],
+                "D": [1, 2, 3, 4],
+            },
+            index=[0, 1, 1, 2],
+        )
+        transformed = transformer.transform(new_data)
+        pd.testing.assert_frame_equal(transformed, new_data.drop(columns=["D"]))
+
     def test_calling_transform_before_fit_raises_error(self):
         transformer = MissingValuesTransformer()
         with self.assertRaises(NotFittedError):

From e66afe8e108391b146ff8c7caaeeb4ac32487e87 Mon Sep 17 00:00:00 2001
From: lschilders <lars.schilders@alliander.com>
Date: Mon, 7 Oct 2024 12:10:43 +0200
Subject: [PATCH 17/21] refactored MissingValuesTransformer with private static
 method _determine_trailing_null_rows

Signed-off-by: lschilders <lars.schilders@alliander.com>
---
 .../missing_values_transformer.py             | 30 ++++++++++++-------
 .../test_missing_values_transformer.py        |  5 +++-
 2 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/openstef/feature_engineering/missing_values_transformer.py b/openstef/feature_engineering/missing_values_transformer.py
index 509dfe9bc..9aeed43e9 100644
--- a/openstef/feature_engineering/missing_values_transformer.py
+++ b/openstef/feature_engineering/missing_values_transformer.py
@@ -21,7 +21,6 @@ class MissingValuesTransformer:
     _n_in_features: Optional[int] = None
 
     non_null_feature_names: List[str] = None
-    non_trailing_null_rows: pd.Series = None
 
     def __init__(
         self,
@@ -58,10 +57,10 @@ def __init__(
             ).set_output(transform="pandas")
             self.imputer_._validate_params()
 
-    def remove_trailing_null_rows(self, df: pd.DataFrame) -> pd.DataFrame:
-        """Remove rows with trailing null values in a DataFrame."""
-        self.non_trailing_null_rows = ~df.bfill().isnull().any(axis="columns")
-        return df.loc[self.non_trailing_null_rows]
+    @staticmethod
+    def _determine_trailing_null_rows(x: pd.DataFrame) -> pd.Series:
+        """Determine rows with trailing null values in a DataFrame."""
+        return ~x.bfill().isnull().any(axis="columns")
 
     def fit(self, x, y=None):
         """Fit the imputer on the input data."""
@@ -77,9 +76,9 @@ def fit(self, x, y=None):
         self.non_null_feature_names = list(x.columns[~is_column_null])
         x = x[self.non_null_feature_names]
 
-        # Remove rows with trailing null values
-        self.non_trailing_null_rows = ~x.bfill().isnull().any(axis="columns")
-        x = x.loc[self.non_trailing_null_rows]
+        # Remove trailing null rows
+        trailing_null_rows = self._determine_trailing_null_rows(x)
+        x = x.loc[trailing_null_rows]
 
         # Imputers do not support labels
         self.imputer_.fit(X=x, y=None)
@@ -107,11 +106,20 @@ def fit_transform(self, x, y=None) -> tuple[pd.DataFrame, Optional[pd.Series]]:
         """
         self.fit(x, y)
 
-        if y is not None:
-            y = y.loc[self.non_trailing_null_rows]
+        if not isinstance(x, pd.DataFrame):
+            x = pd.DataFrame(np.asarray(x))
+
+        x = x[self.non_null_feature_names]
+
+        non_trailing_null_rows = self._determine_trailing_null_rows(x)
+
+        x = x.loc[non_trailing_null_rows]
 
         x = self.transform(x)
-        x = x.loc[self.non_trailing_null_rows]
+
+        if y is not None:
+            y = y.loc[non_trailing_null_rows]
+
         return x, y
 
     @classmethod
diff --git a/test/unit/feature_engineering/test_missing_values_transformer.py b/test/unit/feature_engineering/test_missing_values_transformer.py
index 214f3d71e..af2125e58 100644
--- a/test/unit/feature_engineering/test_missing_values_transformer.py
+++ b/test/unit/feature_engineering/test_missing_values_transformer.py
@@ -47,8 +47,11 @@ def test_columns_always_null_are_removed(self):
     def test_determining_non_trailing_null_rows(self):
         transformer = MissingValuesTransformer()
         transformer.fit(self.data)
+        non_trailing_null_rows = transformer._determine_trailing_null_rows(
+            self.data[transformer.non_null_feature_names]
+        )
         pd.testing.assert_series_equal(
-            transformer.non_trailing_null_rows,
+            non_trailing_null_rows,
             pd.Series([True, True, True, False], index=[0, 1, 1, 2]),
         )
 

From c9b73dc9384bc6e1a71f6e8bbda14031b01ce1e7 Mon Sep 17 00:00:00 2001
From: lschilders <lars.schilders@alliander.com>
Date: Mon, 7 Oct 2024 13:42:12 +0200
Subject: [PATCH 18/21] add whitelist for no_fill_future_values_features

Signed-off-by: lschilders <lars.schilders@alliander.com>
---
 .../missing_values_transformer.py             | 20 ++++++++++++++-----
 openstef/model/regressors/linear_quantile.py  |  7 ++++++-
 .../test_missing_values_transformer.py        | 18 ++++++++---------
 .../model/regressors/test_linear_quantile.py  |  4 +++-
 4 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/openstef/feature_engineering/missing_values_transformer.py b/openstef/feature_engineering/missing_values_transformer.py
index 9aeed43e9..5d2e3dc74 100644
--- a/openstef/feature_engineering/missing_values_transformer.py
+++ b/openstef/feature_engineering/missing_values_transformer.py
@@ -27,6 +27,7 @@ def __init__(
         missing_values: Union[int, float, str, None] = np.nan,
         imputation_strategy: str = None,
         fill_value: Union[str, int, float] = None,
+        no_fill_future_values_features: List[str] = [],
     ):
         """Initialize missing values handler.
 
@@ -37,11 +38,14 @@ def __init__(
                 Can be one of "mean", "median", "most_frequent", "constant" or None.
             fill_value: When strategy == "constant", fill_value is used to replace all
                 occurrences of missing_values.
-
+            no_fill_future_values_features: The features for which it does not make sense
+                to fill future values. Rows that contain trailing null values for these
+                features will be removed from the data.
         """
         self.missing_values = missing_values
         self.imputation_strategy = imputation_strategy
         self.fill_value = fill_value
+        self.no_fill_future_values_features = no_fill_future_values_features
         self.is_fitted_ = False
 
         # Build the proper imputation transformer
@@ -76,8 +80,11 @@ def fit(self, x, y=None):
         self.non_null_feature_names = list(x.columns[~is_column_null])
         x = x[self.non_null_feature_names]
 
-        # Remove trailing null rows
-        trailing_null_rows = self._determine_trailing_null_rows(x)
+        # Remove trailing null rows for features that should
+        # not be imputed in the future
+        trailing_null_rows = self._determine_trailing_null_rows(
+            x[self.no_fill_future_values_features]
+        )
         x = x.loc[trailing_null_rows]
 
         # Imputers do not support labels
@@ -111,8 +118,11 @@ def fit_transform(self, x, y=None) -> tuple[pd.DataFrame, Optional[pd.Series]]:
 
         x = x[self.non_null_feature_names]
 
-        non_trailing_null_rows = self._determine_trailing_null_rows(x)
-
+        # Remove trailing null rows for features that should
+        # not be imputed in the future
+        non_trailing_null_rows = self._determine_trailing_null_rows(
+            x[self.no_fill_future_values_features]
+        )
         x = x.loc[non_trailing_null_rows]
 
         x = self.transform(x)
diff --git a/openstef/model/regressors/linear_quantile.py b/openstef/model/regressors/linear_quantile.py
index b0c9af91a..37d77e371 100644
--- a/openstef/model/regressors/linear_quantile.py
+++ b/openstef/model/regressors/linear_quantile.py
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: MPL-2.0
 import re
-from typing import Dict, Union, Set, Optional
+from typing import Dict, Union, Set, Optional, List
 
 import numpy as np
 import pandas as pd
@@ -47,6 +47,7 @@ def __init__(
         missing_values: Union[int, float, str, None] = np.nan,
         imputation_strategy: Optional[str] = "mean",
         fill_value: Union[str, int, float] = None,
+        no_fill_future_values_features: List[str] = [],
     ):
         """Initialize LinearQuantileOpenstfRegressor.
 
@@ -69,6 +70,9 @@ def __init__(
             missing_values: Value to be considered as missing value
             imputation_strategy: Imputation strategy
             fill_value: Fill value
+            no_fill_future_values_features: The features for which it does not make sense
+                to fill future values. Rows that contain trailing null values for these
+                features will be removed from the data.
 
         """
         super().__init__()
@@ -86,6 +90,7 @@ def __init__(
             missing_values=missing_values,
             imputation_strategy=imputation_strategy,
             fill_value=fill_value,
+            no_fill_future_values_features=no_fill_future_values_features,
         )
         self.x_scaler_ = MinMaxScaler(feature_range=(-1, 1))
         self.y_scaler_ = MinMaxScaler(feature_range=(-1, 1))
diff --git a/test/unit/feature_engineering/test_missing_values_transformer.py b/test/unit/feature_engineering/test_missing_values_transformer.py
index af2125e58..ac93a30f3 100644
--- a/test/unit/feature_engineering/test_missing_values_transformer.py
+++ b/test/unit/feature_engineering/test_missing_values_transformer.py
@@ -16,7 +16,7 @@ def setUp(self):
         self.data = pd.DataFrame(
             {
                 "A": [np.nan, 2, 3, 4],
-                "B": [1, np.nan, 3, 4],
+                "B": [3, np.nan, 4, 5],
                 "C": [3, 4, 5, np.nan],
                 "D": [np.nan, np.nan, np.nan, np.nan],
             },
@@ -27,8 +27,8 @@ def test_imputation_with_mean_strategy_fills_missing_values(self):
         transformer = MissingValuesTransformer(imputation_strategy="mean")
         transformed, _ = transformer.fit_transform(self.data)
         self.assertEqual(transformed.isnull().sum().sum(), 0)
-        self.assertAlmostEqual(transformed.iloc[0]["A"], 2.5)
-        self.assertAlmostEqual(transformed.iloc[1]["B"], 2)
+        self.assertAlmostEqual(transformed.iloc[0]["A"], 3)
+        self.assertAlmostEqual(transformed.iloc[1]["B"], 4)
 
     def test_imputation_with_constant_strategy_fills_missing_values(self):
         transformer = MissingValuesTransformer(
@@ -45,7 +45,7 @@ def test_columns_always_null_are_removed(self):
         self.assertNotIn("D", transformer.non_null_feature_names)
 
     def test_determining_non_trailing_null_rows(self):
-        transformer = MissingValuesTransformer()
+        transformer = MissingValuesTransformer(no_fill_future_values_features=["C"])
         transformer.fit(self.data)
         non_trailing_null_rows = transformer._determine_trailing_null_rows(
             self.data[transformer.non_null_feature_names]
@@ -56,7 +56,7 @@ def test_determining_non_trailing_null_rows(self):
         )
 
     def test_fitting_with_labels_removes_rows_with_trailing_nulls(self):
-        transformer = MissingValuesTransformer()
+        transformer = MissingValuesTransformer(no_fill_future_values_features=["C"])
         _, y_transformed = transformer.fit_transform(
             self.data, y=pd.Series([1, 2, 3, 4], index=self.data.index)
         )
@@ -68,19 +68,17 @@ def test_non_dataframe_input_is_converted_and_processed(self):
         transformed, _ = transformer.fit_transform(array)
         self.assertIsInstance(transformed, pd.DataFrame)
         self.assertEqual(transformed.isnull().sum().sum(), 0)
-        self.assertEqual(transformed.shape, (1, 1))
+        self.assertEqual(transformed.shape, (2, 2))
 
     def test_fitting_transformer_without_strategy_keeps_valid_data_unchanged(self):
         transformer = MissingValuesTransformer()
         transformed, _ = transformer.fit_transform(self.data)
-        pd.testing.assert_frame_equal(
-            transformed, self.data.drop(index=2, columns=["D"])
-        )
+        pd.testing.assert_frame_equal(transformed, self.data.drop(columns=["D"]))
 
     def test_call_transform_on_fitted_transformer_does_not_remove_trailing_null_rows(
         self,
     ):
-        transformer = MissingValuesTransformer()
+        transformer = MissingValuesTransformer(no_fill_future_values_features=["C"])
         transformer.fit(self.data)
         new_data = pd.DataFrame(
             {
diff --git a/test/unit/model/regressors/test_linear_quantile.py b/test/unit/model/regressors/test_linear_quantile.py
index 36d21b683..50ea32bbc 100644
--- a/test/unit/model/regressors/test_linear_quantile.py
+++ b/test/unit/model/regressors/test_linear_quantile.py
@@ -60,7 +60,9 @@ def test_imputer(self):
         X["sparse_2"] = np.ones(n_sample)
         X.loc[X.index[-1], "sparse_2"] = np.nan
         model1 = LinearQuantileOpenstfRegressor(imputation_strategy=None)
-        model2 = LinearQuantileOpenstfRegressor(imputation_strategy="mean")
+        model2 = LinearQuantileOpenstfRegressor(
+            imputation_strategy="mean", no_fill_future_values_features=["sparse_2"]
+        )
 
         # Act
         # Model should give error if nan values are present.

From 7f710a7612b608cdecb83d34fb4996891f43b821 Mon Sep 17 00:00:00 2001
From: lschilders <lars.schilders@alliander.com>
Date: Mon, 7 Oct 2024 14:01:01 +0200
Subject: [PATCH 19/21] immutable default value for
 no_fill_future_values_features

---
 openstef/feature_engineering/missing_values_transformer.py | 4 +++-
 openstef/model/regressors/linear_quantile.py               | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/openstef/feature_engineering/missing_values_transformer.py b/openstef/feature_engineering/missing_values_transformer.py
index 5d2e3dc74..33ae3f994 100644
--- a/openstef/feature_engineering/missing_values_transformer.py
+++ b/openstef/feature_engineering/missing_values_transformer.py
@@ -27,7 +27,7 @@ def __init__(
         missing_values: Union[int, float, str, None] = np.nan,
         imputation_strategy: str = None,
         fill_value: Union[str, int, float] = None,
-        no_fill_future_values_features: List[str] = [],
+        no_fill_future_values_features: List[str] = None,
     ):
         """Initialize missing values handler.
 
@@ -45,6 +45,8 @@ def __init__(
         self.missing_values = missing_values
         self.imputation_strategy = imputation_strategy
         self.fill_value = fill_value
+        if no_fill_future_values_features is None:
+            no_fill_future_values_features = []
         self.no_fill_future_values_features = no_fill_future_values_features
         self.is_fitted_ = False
 
diff --git a/openstef/model/regressors/linear_quantile.py b/openstef/model/regressors/linear_quantile.py
index 37d77e371..2c8ead73a 100644
--- a/openstef/model/regressors/linear_quantile.py
+++ b/openstef/model/regressors/linear_quantile.py
@@ -47,7 +47,7 @@ def __init__(
         missing_values: Union[int, float, str, None] = np.nan,
         imputation_strategy: Optional[str] = "mean",
         fill_value: Union[str, int, float] = None,
-        no_fill_future_values_features: List[str] = [],
+        no_fill_future_values_features: List[str] = None,
     ):
         """Initialize LinearQuantileOpenstfRegressor.
 

From 4203538cab56a92cf32e1f8483315733531e8d97 Mon Sep 17 00:00:00 2001
From: Lars Schilders <123180911+lschilders@users.noreply.github.com>
Date: Tue, 8 Oct 2024 09:23:19 +0200
Subject: [PATCH 20/21] Update
 openstef/feature_engineering/missing_values_transformer.py

Co-authored-by: Egor Dmitriev <egordmitriev2@gmail.com>
Signed-off-by: Lars Schilders <123180911+lschilders@users.noreply.github.com>
---
 openstef/feature_engineering/missing_values_transformer.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/openstef/feature_engineering/missing_values_transformer.py b/openstef/feature_engineering/missing_values_transformer.py
index 33ae3f994..7c46e5192 100644
--- a/openstef/feature_engineering/missing_values_transformer.py
+++ b/openstef/feature_engineering/missing_values_transformer.py
@@ -45,9 +45,7 @@ def __init__(
         self.missing_values = missing_values
         self.imputation_strategy = imputation_strategy
         self.fill_value = fill_value
-        if no_fill_future_values_features is None:
-            no_fill_future_values_features = []
-        self.no_fill_future_values_features = no_fill_future_values_features
+        self.no_fill_future_values_features = no_fill_future_values_features or []
         self.is_fitted_ = False
 
         # Build the proper imputation transformer

From 5f83b6210819ea9f9460d1225afbcd35f47884ef Mon Sep 17 00:00:00 2001
From: lschilders <lars.schilders@alliander.com>
Date: Tue, 8 Oct 2024 09:24:40 +0200
Subject: [PATCH 21/21] add no_fill_future_values_features to model_creator

Signed-off-by: lschilders <lars.schilders@alliander.com>
---
 openstef/model/model_creator.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/openstef/model/model_creator.py b/openstef/model/model_creator.py
index 837c6052a..bd2448889 100644
--- a/openstef/model/model_creator.py
+++ b/openstef/model/model_creator.py
@@ -116,6 +116,7 @@
         "missing_values",
         "imputation_strategy",
         "fill_value",
+        "no_fill_future_values_features",
     ],
     MLModelType.ARIMA: [
         "backtest_max_horizon",