Skip to content

Commit

Permalink
Teach DifferencingTransform to inverse_transform with NaNs (#1155)
Browse files Browse the repository at this point in the history
  • Loading branch information
Mr-Geekman authored Mar 7, 2023
1 parent 156eb49 commit d9c24de
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 54 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Fix `SklearnTransform` in per-segment mode to work on subset of segments and raise error on new segments ([#1107](https://github.com/tinkoff-ai/etna/pull/1107))
- Fix `OutliersTransform` and its children to raise error on new segments ([#1139](https://github.com/tinkoff-ai/etna/pull/1139))
- Fix `DifferencingTransform` to raise error on new segments during `transform` and `inverse_transform` in inplace mode ([#1141](https://github.com/tinkoff-ai/etna/pull/1141))
- Teach `DifferencingTransform` to `inverse_transform` with NaNs ([#1155](https://github.com/tinkoff-ai/etna/pull/1155))
## [1.14.0] - 2022-12-16
### Added
- Add python 3.10 support ([#1005](https://github.com/tinkoff-ai/etna/pull/1005))
Expand Down
36 changes: 26 additions & 10 deletions etna/transforms/math/differencing.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
class _SingleDifferencingTransform(Transform):
"""Calculate a time series differences of order 1.
This transform can work with NaNs at the beginning of the segment, but fails when meets NaN inside the segment.
During ``fit`` this transform can work with NaNs at the beginning of the segment, but fails when meets NaN inside the segment.
During ``transform`` and ``inverse_transform`` there is no special treatment of NaNs.
Notes
-----
Expand Down Expand Up @@ -86,6 +87,11 @@ def fit(self, df: pd.DataFrame) -> "_SingleDifferencingTransform":
Returns
-------
result: _SingleDifferencingTransform
Raises
------
ValueError:
if NaNs are present inside the segment
"""
segments = sorted(set(df.columns.get_level_values("segment")))
fit_df = df.loc[:, pd.IndexSlice[segments, self.in_column]].copy()
Expand Down Expand Up @@ -124,11 +130,8 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
transformed = df.loc[:, pd.IndexSlice[segments, self.in_column]].copy()
for current_segment in segments:
to_transform = transformed.loc[:, pd.IndexSlice[current_segment, self.in_column]]
start_idx = to_transform.first_valid_index()
# make a differentiation
transformed.loc[start_idx:, pd.IndexSlice[current_segment, self.in_column]] = to_transform.loc[
start_idx:
].diff(periods=self.period)
transformed.loc[:, pd.IndexSlice[current_segment, self.in_column]] = to_transform.diff(periods=self.period)

if self.inplace:
result_df = df.copy()
Expand Down Expand Up @@ -188,10 +191,6 @@ def _reconstruct_test(self, df: pd.DataFrame, columns_to_inverse: Set[str]) -> p
init_df = init_df[segments]
to_transform = pd.concat([init_df, to_transform])

# validate values inside the series to transform
if to_transform.isna().sum().sum() > 0:
raise ValueError(f"There should be no NaNs inside the segments")

# run reconstruction and save the result
to_transform = self._make_inv_diff(to_transform)
result_df.loc[:, pd.IndexSlice[segments, column]] = to_transform
Expand All @@ -210,6 +209,13 @@ def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame:
-------
result:
transformed DataFrame.
Raises
------
ValueError:
if inverse transform is applied not to full train nor to test that goes after train
ValueError:
if inverse transform is applied to test that goes after train with gap
"""
# we assume this to be fitted
self._train_timestamp = cast(pd.DatetimeIndex, self._train_timestamp)
Expand Down Expand Up @@ -241,7 +247,8 @@ def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame:
class DifferencingTransform(Transform):
"""Calculate a time series differences.
This transform can work with NaNs at the beginning of the segment, but fails when meets NaN inside the segment.
During ``fit`` this transform can work with NaNs at the beginning of the segment, but fails when meets NaN inside the segment.
During ``transform`` and ``inverse_transform`` there is no special treatment of NaNs.
Notes
-----
Expand Down Expand Up @@ -334,6 +341,11 @@ def fit(self, df: pd.DataFrame) -> "DifferencingTransform":
Returns
-------
result: DifferencingTransform
Raises
------
ValueError:
if NaNs are present inside the segment
"""
# this is made because transforms of high order may need some columns created by transforms of lower order
result_df = df.copy()
Expand Down Expand Up @@ -395,6 +407,10 @@ def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame:
if transform isn't fitted
NotImplementedError:
if there are segments that weren't present during training
ValueError:
if inverse transform is applied not to full train nor to test that goes after train
ValueError:
if inverse transform is applied to test that goes after train with gap
"""
self._check_is_fitted()
if not self.inplace:
Expand Down
12 changes: 1 addition & 11 deletions tests/test_transforms/test_inference/test_inverse_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,7 @@ def _test_inverse_transform_future_subset_segments(self, ts, transform, segments
(LogTransform(in_column="target", inplace=True), "positive_ts"),
(LogTransform(in_column="positive", inplace=True), "ts_with_exog"),
(DifferencingTransform(in_column="target", inplace=False), "regular_ts"),
(DifferencingTransform(in_column="target", inplace=True), "regular_ts"),
(DifferencingTransform(in_column="positive", inplace=True), "ts_with_exog"),
(MADTransform(in_column="target", window=14), "regular_ts"),
(MaxTransform(in_column="target", window=14), "regular_ts"),
Expand Down Expand Up @@ -389,17 +390,6 @@ def test_inverse_transform_future_subset_segments(self, transform, dataset_name,
ts = request.getfixturevalue(dataset_name)
self._test_inverse_transform_future_subset_segments(ts, transform, segments=["segment_2"])

@to_be_fixed(ValueError, match="There should be no NaNs inside the segments")
@pytest.mark.parametrize(
"transform, dataset_name",
[
(DifferencingTransform(in_column="target", inplace=True), "regular_ts"),
],
)
def test_inverse_transform_future_subset_difference_fail(self, transform, dataset_name, request):
ts = request.getfixturevalue(dataset_name)
self._test_inverse_transform_future_subset_segments(ts, transform, segments=["segment_2"])


class TestInverseTransformTrainNewSegments:
"""Test inverse transform on train part of new segments.
Expand Down
96 changes: 63 additions & 33 deletions tests/test_transforms/test_math/test_differencing_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,18 @@ def df_nans() -> pd.DataFrame:
return df


@pytest.fixture
def df_nans_middle() -> pd.DataFrame:
"""Create DataFrame with nans in the middle of the segment."""
timestamp = pd.date_range("2021-01-01", "2021-04-01")
df_1 = pd.DataFrame({"timestamp": timestamp, "target": np.arange(timestamp.shape[0]), "segment": "1"})
df_2 = pd.DataFrame({"timestamp": timestamp, "target": np.arange(timestamp.shape[0]) * 2, "segment": "2"})
df = pd.concat([df_1, df_2], ignore_index=True)
df = TSDataset.to_dataset(df)
df.iloc[5:10, 0] = np.NaN
return df


@pytest.fixture
def df_segments_split(df_nans) -> Tuple[pd.DataFrame, pd.DataFrame]:
"""Create a pair of DataFrames with different segments."""
Expand Down Expand Up @@ -106,15 +118,10 @@ def check_transform(
series_init = df.loc[:, pd.IndexSlice[segment, "target"]]
series_transformed = transformed_df.loc[:, pd.IndexSlice[segment, out_column]]

series_init = series_init.loc[series_init.first_valid_index() :]
series_transformed = series_transformed.loc[series_transformed.first_valid_index() :]

assert series_init.shape[0] == series_transformed.shape[0] + order * period

for _ in range(order):
series_init = series_init.diff(periods=period).iloc[period:]
series_init = series_init.diff(periods=period)

assert np.all(series_init == series_transformed)
assert series_init.equals(series_transformed)


def check_inverse_transform_not_inplace(
Expand All @@ -134,10 +141,10 @@ def check_inverse_transform_inplace_train(transform: GeneralDifferencingTransfor
assert inverse_transformed_df.equals(df)


def check_inverse_transform_inplace_test(
def check_inverse_transform_inplace_filled_test(
transform: GeneralDifferencingTransform, period: int, order: int, df: pd.DataFrame
):
"""Check that differencing transform correctly makes inverse_transform on test data in inplace mode."""
"""Check that differencing transform correctly makes inverse_transform on filled test data in inplace mode."""
ts = TSDataset(df, freq="D")
ts_train, ts_test = ts.train_test_split(test_size=20)
ts_train.fit_transform(transforms=[transform])
Expand All @@ -158,6 +165,19 @@ def check_inverse_transform_inplace_test(
assert np.all(future_ts.to_pandas() == ts_test.to_pandas())


def check_inverse_transform_inplace_unfilled_test(transform: GeneralDifferencingTransform, df: pd.DataFrame):
"""Check that differencing transform correctly makes inverse_transform on unfilled test data in inplace mode."""
ts = TSDataset(df, freq="D")
ts_train, ts_test = ts.train_test_split(test_size=20)
ts_train.fit_transform(transforms=[transform])

future_ts = ts_train.make_future(20)

# check values from inverse_transform
future_ts.inverse_transform()
assert future_ts.to_pandas().isna().all().all()


def check_inverse_transform_inplace_test_quantiles(transform: GeneralDifferencingTransform, df: pd.DataFrame):
"""Check that differencing transform correctly makes inverse_transform on test data with quantiles."""
ts = TSDataset(df, freq="D")
Expand Down Expand Up @@ -345,6 +365,25 @@ def test_full_transform(period, order, inplace, out_column, df_nans):
check_transform(transform, period, order, out_column, df_nans, df_nans)


@pytest.mark.parametrize("period", [1, 7])
@pytest.mark.parametrize("inplace, out_column", [(False, "diff"), (True, "target")])
def test_single_transform_nans_middle(period, inplace, out_column, df_nans, df_nans_middle):
"""Test that _SingleDifferencingTransform generates correct values in transform with NaNs in the middle."""
transform = _SingleDifferencingTransform(in_column="target", period=period, inplace=inplace, out_column=out_column)
check_transform(transform, period, 1, out_column, df_nans, df_nans_middle)


@pytest.mark.parametrize("period", [1, 7])
@pytest.mark.parametrize("order", [1, 2])
@pytest.mark.parametrize("inplace, out_column", [(False, "diff"), (True, "target")])
def test_full_transform_nans_middle(period, order, inplace, out_column, df_nans, df_nans_middle):
"""Test that DifferencingTransform generates correct values in transform with NaNs in the middle."""
transform = DifferencingTransform(
in_column="target", period=period, order=order, inplace=inplace, out_column=out_column
)
check_transform(transform, period, order, out_column, df_nans, df_nans_middle)


@pytest.mark.parametrize("period", [1, 7])
def test_single_transform_not_inplace_new_segments(period, df_segments_split):
"""Test that _SingleDifferencingTransform generates correct values in transform on new segments in non-inplace mode."""
Expand Down Expand Up @@ -466,43 +505,34 @@ def test_full_inverse_transform_inplace_train(period, order, df_nans):
check_inverse_transform_inplace_train(transform, df_nans)


@pytest.mark.parametrize(
"transform",
[
_SingleDifferencingTransform(in_column="target", period=1, inplace=True),
DifferencingTransform(in_column="target", period=1, order=1, inplace=True),
],
)
def test_general_inverse_transform_inplace_test_fail_nans(transform, df_nans):
"""Test that differencing transform fails to make inverse_transform on test data if there are NaNs."""
ts = TSDataset(df_nans, freq="D")
ts_train, ts_test = ts.train_test_split(test_size=20)

ts_train.fit_transform(transforms=[transform])
@pytest.mark.parametrize("period", [1, 7])
def test_single_inverse_transform_inplace_filled_test(period, df_nans):
"""Test that _SingleDifferencingTransform correctly makes inverse_transform on filled test data in inplace mode."""
transform = _SingleDifferencingTransform(in_column="target", period=period, inplace=True)
check_inverse_transform_inplace_filled_test(transform, period, 1, df_nans)

# make predictions by hand only on one segment
future_ts = ts_train.make_future(20)
future_ts.df.loc[:, pd.IndexSlice["1", "target"]] = np.NaN
future_ts.df.loc[:, pd.IndexSlice["2", "target"]] = 2

# check fail on inverse_transform
with pytest.raises(ValueError, match="There should be no NaNs inside the segments"):
future_ts.inverse_transform()
@pytest.mark.parametrize("period", [1, 7])
@pytest.mark.parametrize("order", [1, 2])
def test_full_inverse_transform_inplace_test(period, order, df_nans):
"""Test that DifferencingTransform correctly makes inverse_transform on filled test data in inplace mode."""
transform = DifferencingTransform(in_column="target", period=period, order=order, inplace=True)
check_inverse_transform_inplace_filled_test(transform, period, order, df_nans)


@pytest.mark.parametrize("period", [1, 7])
def test_single_inverse_transform_inplace_test(period, df_nans):
"""Test that _SingleDifferencingTransform correctly makes inverse_transform on test data in inplace mode."""
"""Test that _SingleDifferencingTransform correctly makes inverse_transform on unfilled test data in inplace mode."""
transform = _SingleDifferencingTransform(in_column="target", period=period, inplace=True)
check_inverse_transform_inplace_test(transform, period, 1, df_nans)
check_inverse_transform_inplace_unfilled_test(transform, df_nans)


@pytest.mark.parametrize("period", [1, 7])
@pytest.mark.parametrize("order", [1, 2])
def test_full_inverse_transform_inplace_test(period, order, df_nans):
"""Test that DifferencingTransform correctly makes inverse_transform on test data in inplace mode."""
"""Test that DifferencingTransform correctly makes inverse_transform on unfilled test data in inplace mode."""
transform = DifferencingTransform(in_column="target", period=period, order=order, inplace=True)
check_inverse_transform_inplace_test(transform, period, order, df_nans)
check_inverse_transform_inplace_unfilled_test(transform, df_nans)


@pytest.mark.parametrize("period", [1, 7])
Expand Down

0 comments on commit d9c24de

Please sign in to comment.