Skip to content

Replace setting columns with join in SegmentEncoder #331

Merged
merged 5 commits into from
Dec 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Make test_ts optional in plot_forecast ([#321](https://github.com/tinkoff-ai/etna/pull/321))
- Speed up inference for multisegment regression models ([#333](https://github.com/tinkoff-ai/etna/pull/333))
- Speed up Pipeline._get_backtest_forecasts ([#336](https://github.com/tinkoff-ai/etna/pull/336))
- Speed up SegmentEncoderTransform ([#331](https://github.com/tinkoff-ai/etna/pull/331))

## [1.3.3] - 2021-11-24
### Added
Expand Down
6 changes: 1 addition & 5 deletions etna/transforms/segment_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,7 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
index=df.index,
)
encoded_df = encoded_df.astype("category")

for segment in set(df.columns.get_level_values("segment")):
df.loc[self.idx[:], self.idx[segment, "regressor_segment_code"]] = encoded_df.loc[
self.idx[:], self.idx[segment, "regressor_segment_code"]
]
df = df.join(encoded_df)
df = df.sort_index(axis=1)
return df

Expand Down
9 changes: 8 additions & 1 deletion tests/test_transforms/test_segment_encoder_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,20 @@ def dummy_df() -> pd.DataFrame:
return df


def test_dummy(dummy_df):
def test_segment_encoder_transform(dummy_df):
transform = SegmentEncoderTransform()
transformed_df = transform.fit_transform(dummy_df)
assert (
len(transformed_df.loc[:, pd.IndexSlice[:, "regressor_segment_code"]].columns) == 2
), "Number of columns not the same as segments"
assert len(dummy_df) == len(transformed_df), "Row missing"
codes = set()
for segment in dummy_df.columns.get_level_values("segment").unique():
column = transformed_df.loc[:, pd.IndexSlice[segment, "regressor_segment_code"]]
assert column.dtype == "category", "Column type is not category"
assert np.all(column == column.iloc[0]), "Values are not the same for the whole column"
codes.add(column.iloc[0])
assert codes == {0, 1}, "Codes are not 0 and 1"


@pytest.fixture
Expand Down