Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature(KTP-1279): Changed linear model scaling and improved sample weighting feature. #565

Merged
merged 25 commits into from
Oct 11, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
e0ebfb5
feature(KTP-1279): Changed feature scaling in linear model. Added exp…
egordm Oct 4, 2024
870b8a8
feature(KTP-1279): Added test to change linear model parameters.
egordm Oct 4, 2024
c7f550a
style: Code style fixes.
egordm Oct 4, 2024
ad2eba6
Format Python code with Black
actions-user Oct 4, 2024
20fd99c
feature(KTP-1279): Added additional test condition for linear model p…
egordm Oct 4, 2024
094391a
style: Code style fixes.
egordm Oct 4, 2024
0667ec2
Format Python code with Black
actions-user Oct 4, 2024
f85f2e6
feature(KTP-1279): Added additional test condition for linear model p…
egordm Oct 4, 2024
6f4afa7
feature(KTP-1279): Added weight floor. Added documentation for sample…
egordm Oct 7, 2024
6c5e67d
Format Python code with Black
actions-user Oct 7, 2024
385c356
Merge branch 'main' into feature/KTP-1279-linear-sample-weight
clara-de-smet Oct 10, 2024
cada386
Merge branch 'main' into feature/KTP-1279-linear-sample-weight
clara-de-smet Oct 10, 2024
75764b5
Format Python code with Black
actions-user Oct 10, 2024
873e984
Fixed linter suggestion
clara-de-smet Oct 10, 2024
5dbed25
Added documentation
clara-de-smet Oct 11, 2024
b5d2430
Format Python code with Black
actions-user Oct 11, 2024
da443b7
Bumped version of black formatting
clara-de-smet Oct 11, 2024
b51c690
Updated documentation
clara-de-smet Oct 11, 2024
fafc48b
Format Python code with Black
actions-user Oct 11, 2024
ac5b8e3
Removed blank line
clara-de-smet Oct 11, 2024
be8d70c
Merge branch 'feature/KTP-1279-linear-sample-weight' of https://githu…
clara-de-smet Oct 11, 2024
0c80259
Format Python code with Black
actions-user Oct 11, 2024
f6a09a2
Reformatting docs
clara-de-smet Oct 11, 2024
482953b
Merge branch 'feature/KTP-1279-linear-sample-weight' of https://githu…
clara-de-smet Oct 11, 2024
1143f1d
Reformatting docs
clara-de-smet Oct 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions openstef/model/model_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@
"missing_values",
"imputation_strategy",
"fill_value",
"weight_scale_percentile",
"weight_exponent",
],
ModelType.ARIMA: [
"backtest_max_horizon",
Expand Down
34 changes: 28 additions & 6 deletions openstef/model/regressors/linear_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pandas as pd
from sklearn.base import RegressorMixin
from sklearn.linear_model import QuantileRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.utils.validation import check_is_fitted

from openstef.feature_engineering.missing_values_transformer import (
Expand All @@ -25,8 +25,8 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
solver: str

imputer_: MissingValuesTransformer
x_scaler_: MinMaxScaler
y_scaler_: MinMaxScaler
x_scaler_: StandardScaler
y_scaler_: StandardScaler
models_: Dict[float, QuantileRegressor]

is_fitted_: bool = False
Expand All @@ -47,6 +47,8 @@ def __init__(
missing_values: Union[int, float, str, None] = np.nan,
imputation_strategy: Optional[str] = "mean",
fill_value: Union[str, int, float] = None,
weight_scale_percentile: int = 95,
weight_exponent: float = 1,
):
"""Initialize LinearQuantileOpenstfRegressor.

Expand Down Expand Up @@ -82,13 +84,15 @@ def __init__(
self.quantiles = quantiles
self.alpha = alpha
self.solver = solver
self.weight_scale_percentile = weight_scale_percentile
self.weight_exponent = weight_exponent
self.imputer_ = MissingValuesTransformer(
missing_values=missing_values,
imputation_strategy=imputation_strategy,
fill_value=fill_value,
)
self.x_scaler_ = MinMaxScaler(feature_range=(-1, 1))
self.y_scaler_ = MinMaxScaler(feature_range=(-1, 1))
self.x_scaler_ = StandardScaler()
self.y_scaler_ = StandardScaler()
self.models_ = {
quantile: QuantileRegressor(alpha=alpha, quantile=quantile, solver=solver)
for quantile in quantiles
Expand Down Expand Up @@ -177,7 +181,7 @@ def fit(self, x: pd.DataFrame, y: pd.Series, **kwargs) -> RegressorMixin:
y_scaled = self.y_scaler_.fit_transform(y.to_frame())[:, 0]

# Add more focus on extreme / peak values
sample_weight = np.abs(y_scaled)
sample_weight = self._calculate_sample_weights(y.values.squeeze())

# Fit quantile regressors
for quantile in self.quantiles:
Expand All @@ -191,6 +195,16 @@ def fit(self, x: pd.DataFrame, y: pd.Series, **kwargs) -> RegressorMixin:

return self

def _calculate_sample_weights(self, y: np.array):
clara-de-smet marked this conversation as resolved.
Show resolved Hide resolved
return np.clip(
_weight_exp(
_scale_percentile(y, percentile=self.weight_scale_percentile),
exponent=self.weight_exponent,
),
a_min=0,
a_max=1,
)

def predict(self, x: pd.DataFrame, quantile: float = 0.5, **kwargs) -> np.array:
"""Makes a prediction for a desired quantile.

Expand Down Expand Up @@ -245,3 +259,11 @@ def _get_param_names(cls):

def __sklearn_is_fitted__(self) -> bool:
return self.is_fitted_


def _scale_percentile(x: np.ndarray, percentile: int = 95):
return np.abs(x / np.percentile(np.abs(x), percentile))


def _weight_exp(x: np.ndarray, exponent: float = 1):
return np.abs(x) ** exponent
21 changes: 21 additions & 0 deletions test/unit/model/regressors/test_linear_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from sklearn.utils.estimator_checks import check_estimator

from openstef.feature_engineering.apply_features import apply_features
from openstef.model.model_creator import ModelCreator
from openstef.model.regressors.linear_quantile import LinearQuantileOpenstfRegressor
from test.unit.utils.base import BaseTestCase
from test.unit.utils.data import TestData
Expand Down Expand Up @@ -144,3 +145,23 @@ def test_ignore_features(self):
self.assertNotIn("E1B_AMI_I", input_data_filtered.columns)
self.assertNotIn("E4A_I", input_data_filtered.columns)
self.assertIn("load", input_data_filtered.columns)

def test_create_model(self):
# Arrange
kwargs = {
"weight_scale_percentile": 50,
"weight_exponent": 2,
}

# Act
model = ModelCreator.create_model(
model_type="linear_quantile",
quantiles=[0.5],
**kwargs,
)

# Assert
self.assertIsInstance(model, LinearQuantileOpenstfRegressor)
self.assertEqual(model.weight_scale_percentile, 50)
self.assertEqual(model.weight_exponent, 2)
self.assertEqual(model.quantiles, [0.5])