Skip to content

Commit

Permalink
feature(KTP-1279): Changed linear model scaling and improved sample w…
Browse files Browse the repository at this point in the history
…eighting feature. (#565)

* feature(KTP-1279): Changed feature scaling in linear model. Added exponential sample weighting in linear model.

Signed-off-by: Egor Dmitriev <[email protected]>

* feature(KTP-1279): Added test to change linear model parameters.

Signed-off-by: Egor Dmitriev <[email protected]>

* style: Code style fixes.

Signed-off-by: Egor Dmitriev <[email protected]>

* Format Python code with Black

Signed-off-by: black <[email protected]>

* feature(KTP-1279): Added additional test condition for linear model params.

Signed-off-by: Egor Dmitriev <[email protected]>

* style: Code style fixes.

Signed-off-by: Egor Dmitriev <[email protected]>

* Format Python code with Black

Signed-off-by: black <[email protected]>

* feature(KTP-1279): Added additional test condition for linear model params.

Signed-off-by: Egor Dmitriev <[email protected]>

* feature(KTP-1279): Added weight floor. Added documentation for sample weight calculation.

Signed-off-by: Egor Dmitriev <[email protected]>

* Format Python code with Black

Signed-off-by: black <[email protected]>

* Merge branch 'main' into feature/KTP-1279-linear-sample-weight

Signed-off-by: Clara De Smet <[email protected]>

* Format Python code with Black

Signed-off-by: black <[email protected]>

* Fixed linter suggestion

Signed-off-by: Clara De Smet <[email protected]>

* Added documentation

* Format Python code with Black

Signed-off-by: black <[email protected]>

* Bumped version of black formatting

* Updated documentation

* Format Python code with Black

Signed-off-by: black <[email protected]>

* Removed blank line

* Format Python code with Black

Signed-off-by: black <[email protected]>

* Reformatting docs

* Reformatting docs

---------

Signed-off-by: Egor Dmitriev <[email protected]>
Signed-off-by: black <[email protected]>
Signed-off-by: Clara De Smet <[email protected]>
Signed-off-by: Clara De Smet <[email protected]>
Co-authored-by: black <[email protected]>
Co-authored-by: Clara De Smet <[email protected]>
Co-authored-by: Clara De Smet <[email protected]>
  • Loading branch information
4 people authored Oct 11, 2024
1 parent 21692ad commit 08c9965
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 8 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

repos:
- repo: https://github.com/ambv/black
rev: 22.1.0
rev: 24.3.0
hooks:
- id: black
language_version: python3.11
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ OpenSTEF is a Python package designed for generating short-term forecasts in the
pip install openstef
```

### Remark regarding installation within a **conda environment on Windows**:
### Remark regarding installation within a **conda environment on Windows**

A version of the pywin32 package will be installed as a secondary dependency along with the installation of the openstef package. Since conda relies on an old version of pywin32, the new installation can break conda's functionality. The following command can solve this issue:
```shell
Expand Down
1 change: 1 addition & 0 deletions openstef/feature_engineering/missing_values_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def __init__(
no_fill_future_values_features: The features for which it does not make sense
to fill future values. Rows that contain trailing null values for these
features will be removed from the data.
"""
self.missing_values = missing_values
self.imputation_strategy = imputation_strategy
Expand Down
3 changes: 3 additions & 0 deletions openstef/model/model_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,9 @@
"missing_values",
"imputation_strategy",
"fill_value",
"weight_scale_percentile",
"weight_exponent",
"weight_floor",
"no_fill_future_values_features",
],
ModelType.ARIMA: [
Expand Down
56 changes: 50 additions & 6 deletions openstef/model/regressors/linear_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pandas as pd
from sklearn.base import RegressorMixin
from sklearn.linear_model import QuantileRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.utils.validation import check_is_fitted

from openstef.feature_engineering.missing_values_transformer import (
Expand All @@ -25,8 +25,8 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
solver: str

imputer_: MissingValuesTransformer
x_scaler_: MinMaxScaler
y_scaler_: MinMaxScaler
x_scaler_: StandardScaler
y_scaler_: StandardScaler
models_: Dict[float, QuantileRegressor]

is_fitted_: bool = False
Expand All @@ -47,6 +47,9 @@ def __init__(
missing_values: Union[int, float, str, None] = np.nan,
imputation_strategy: Optional[str] = "mean",
fill_value: Union[str, int, float] = None,
weight_scale_percentile: int = 95,
weight_exponent: float = 1,
weight_floor: float = 0.1,
no_fill_future_values_features: List[str] = None,
):
"""Initialize LinearQuantileOpenstfRegressor.
Expand All @@ -70,6 +73,9 @@ def __init__(
missing_values: Value to be considered as missing value
imputation_strategy: Imputation strategy
fill_value: Fill value
weight_scale_percentile: Percentile used in scaling of the samples
weight_exponent: Exponent used in sample weighing
weight_floor: Minimum weight for samples
no_fill_future_values_features: The features for which it does not make sense
to fill future values. Rows that contain trailing null values for these
features will be removed from the data.
Expand All @@ -86,14 +92,17 @@ def __init__(
self.quantiles = quantiles
self.alpha = alpha
self.solver = solver
self.weight_scale_percentile = weight_scale_percentile
self.weight_exponent = weight_exponent
self.weight_floor = weight_floor
self.imputer_ = MissingValuesTransformer(
missing_values=missing_values,
imputation_strategy=imputation_strategy,
fill_value=fill_value,
no_fill_future_values_features=no_fill_future_values_features,
)
self.x_scaler_ = MinMaxScaler(feature_range=(-1, 1))
self.y_scaler_ = MinMaxScaler(feature_range=(-1, 1))
self.x_scaler_ = StandardScaler()
self.y_scaler_ = StandardScaler()
self.models_ = {
quantile: QuantileRegressor(alpha=alpha, quantile=quantile, solver=solver)
for quantile in quantiles
Expand Down Expand Up @@ -182,7 +191,7 @@ def fit(self, x: pd.DataFrame, y: pd.Series, **kwargs) -> RegressorMixin:
y_scaled = self.y_scaler_.fit_transform(y.to_frame())[:, 0]

# Add more focus on extreme / peak values
sample_weight = np.abs(y_scaled)
sample_weight = self._calculate_sample_weights(y.values.squeeze())

# Fit quantile regressors
for quantile in self.quantiles:
Expand All @@ -196,6 +205,33 @@ def fit(self, x: pd.DataFrame, y: pd.Series, **kwargs) -> RegressorMixin:

return self

def _calculate_sample_weights(self, y: np.array):
"""Calculate sample weights based on the y values of arbitrary scale.
The resulting weights are in the range [0,1] and are used to put more emphasis
on certain samples. The sample weighting function does:
* Rescale data to a [-1, 1] range using quantile scaling. 90% of the data will
be within this range. Rest is outside.
* Calculate the weight by taking the exponent of scaled data.
* exponent=0: Results in uniform weights for all samples.
* exponent=1: Results in linearly increasing weights for samples that are
closer to the extremes.
* exponent>1: Results in exponentially increasing weights for samples that are
closer to the extremes.
* Clip the data to [0, 1] range with weight_floor as the minimum weight.
* Weight floor is used to make sure that all the samples are considered.
"""
return np.clip(
_weight_exp(
_scale_percentile(y, percentile=self.weight_scale_percentile),
exponent=self.weight_exponent,
),
a_min=self.weight_floor,
a_max=1,
)

def predict(self, x: pd.DataFrame, quantile: float = 0.5, **kwargs) -> np.array:
"""Makes a prediction for a desired quantile.
Expand Down Expand Up @@ -250,3 +286,11 @@ def _get_param_names(cls):

def __sklearn_is_fitted__(self) -> bool:
return self.is_fitted_


def _scale_percentile(x: np.ndarray, percentile: int = 95):
return np.abs(x / np.percentile(np.abs(x), percentile))


def _weight_exp(x: np.ndarray, exponent: float = 1):
return np.abs(x) ** exponent
20 changes: 20 additions & 0 deletions test/unit/model/regressors/test_linear_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from sklearn.utils.estimator_checks import check_estimator

from openstef.feature_engineering.apply_features import apply_features
from openstef.model.model_creator import ModelCreator
from openstef.model.regressors.linear_quantile import LinearQuantileOpenstfRegressor
from test.unit.utils.base import BaseTestCase
from test.unit.utils.data import TestData
Expand Down Expand Up @@ -151,3 +152,22 @@ def test_ignore_features(self):
self.assertNotIn("E1B_AMI_I", input_data_filtered.columns)
self.assertNotIn("E4A_I", input_data_filtered.columns)
self.assertIn("load", input_data_filtered.columns)

def test_create_model(self):
# Arrange
kwargs = {
"weight_scale_percentile": 50,
"weight_exponent": 2,
}

# Act
model = ModelCreator.create_model(
model_type="linear_quantile",
quantiles=[0.5],
**kwargs,
)

# Assert
self.assertIsInstance(model, LinearQuantileOpenstfRegressor)
self.assertEqual(model.weight_scale_percentile, 50)
self.assertEqual(model.weight_exponent, 2)

0 comments on commit 08c9965

Please sign in to comment.