From 1659cea06fe903d7a4a5c8ec62271a1418795f36 Mon Sep 17 00:00:00 2001 From: alex-hse-repository Date: Tue, 19 Oct 2021 09:29:42 +0300 Subject: [PATCH 1/3] Fix typo in 'length' --- etna/analysis/outliers/sequence_outliers.py | 26 +++++++++---------- etna/models/nn/deepar.py | 2 +- etna/models/nn/tft.py | 2 +- etna/transforms/outliers.py | 18 ++++++------- examples/EDA.ipynb | 2 +- examples/outliers.ipynb | 2 +- .../test_outliers/test_sequence_anomalies.py | 14 +++++----- 7 files changed, 33 insertions(+), 33 deletions(-) diff --git a/etna/analysis/outliers/sequence_outliers.py b/etna/analysis/outliers/sequence_outliers.py index 6973b5a49..528fb01ff 100644 --- a/etna/analysis/outliers/sequence_outliers.py +++ b/etna/analysis/outliers/sequence_outliers.py @@ -13,7 +13,7 @@ def get_segment_sequence_anomalies( - series: np.ndarray, num_anomalies: int = 1, anomaly_lenght: int = 15, alphabet_size: int = 3, word_lenght: int = 3 + series: np.ndarray, num_anomalies: int = 1, anomaly_length: int = 15, alphabet_size: int = 3, word_length: int = 3 ) -> List[Tuple[int, int]]: """ Get indices of start and end of sequence outliers for one segment using SAX HOT algorithm. @@ -24,11 +24,11 @@ def get_segment_sequence_anomalies( array to find outliers in num_anomalies: number of outliers to be found - anomaly_lenght: - target lenght of outliers + anomaly_length: + target length of outliers alphabet_size: the number of letters with which the subsequence will be encrypted - word_lenght: + word_length: the number of segments into which the subsequence will be divided by the paa algorithm Returns @@ -36,10 +36,10 @@ def get_segment_sequence_anomalies( list of tuples with start and end of outliers. """ start_points = find_discords_hotsax( - series=series, win_size=anomaly_lenght, num_discords=num_anomalies, a_size=alphabet_size, paa_size=word_lenght + series=series, win_size=anomaly_length, num_discords=num_anomalies, a_size=alphabet_size, paa_size=word_length ) - result = [(pt[0], pt[0] + anomaly_lenght) for pt in start_points] + result = [(pt[0], pt[0] + anomaly_length) for pt in start_points] return result @@ -48,9 +48,9 @@ def get_sequence_anomalies( ts: "TSDataset", in_column: str = "target", num_anomalies: int = 1, - anomaly_lenght: int = 15, + anomaly_length: int = 15, alphabet_size: int = 3, - word_lenght: int = 3, + word_length: int = 3, ) -> Dict[str, List[pd.Timestamp]]: """ Find the start and end of the sequence outliers for each segment using the SAX HOT algorithm. @@ -65,11 +65,11 @@ def get_sequence_anomalies( name of the column in which the anomaly is searching num_anomalies: number of outliers to be found - anomaly_lenght: - target lenght of outliers + anomaly_length: + target length of outliers alphabet_size: the number of letters with which the subsequence will be encrypted - word_lenght: + word_length: the number of segments into which the subsequence will be divided by the paa algorithm Returns @@ -92,9 +92,9 @@ def get_sequence_anomalies( outliers_idxs = get_segment_sequence_anomalies( series=segment_df[in_column].values, num_anomalies=num_anomalies, - anomaly_lenght=anomaly_lenght, + anomaly_length=anomaly_length, alphabet_size=alphabet_size, - word_lenght=word_lenght, + word_length=word_length, ) timestamps = segment_df["timestamp"].values diff --git a/etna/models/nn/deepar.py b/etna/models/nn/deepar.py index c28784825..297448eaa 100644 --- a/etna/models/nn/deepar.py +++ b/etna/models/nn/deepar.py @@ -156,7 +156,7 @@ def forecast(self, ts: TSDataset) -> TSDataset: ) predicts = self.model.predict(prediction_dataloader).numpy() # type: ignore - # shape (segments, encoder_lenght) + # shape (segments, encoder_length) ts.loc[:, pd.IndexSlice[:, "target"]] = predicts.T[-len(ts.df) :] return ts diff --git a/etna/models/nn/tft.py b/etna/models/nn/tft.py index 007a1c63b..9d9da306d 100644 --- a/etna/models/nn/tft.py +++ b/etna/models/nn/tft.py @@ -163,7 +163,7 @@ def forecast(self, ts: TSDataset) -> TSDataset: ) predicts = self.model.predict(prediction_dataloader).numpy() # type: ignore - # shape (segments, encoder_lenght) + # shape (segments, encoder_length) ts.loc[:, pd.IndexSlice[:, "target"]] = predicts.T[-len(ts.df) :] return ts diff --git a/etna/transforms/outliers.py b/etna/transforms/outliers.py index 0e762a265..38aec65ba 100644 --- a/etna/transforms/outliers.py +++ b/etna/transforms/outliers.py @@ -226,9 +226,9 @@ def __init__( self, in_column: str, num_anomalies: int = 1, - anomaly_lenght: int = 15, + anomaly_length: int = 15, alphabet_size: int = 3, - word_lenght: int = 3, + word_length: int = 3, ): """Create instance of SAXOutliersTransform. @@ -238,17 +238,17 @@ def __init__( name of processed column num_anomalies: number of outliers to be found - anomaly_lenght: - target lenght of outliers + anomaly_length: + target length of outliers alphabet_size: the number of letters with which the subsequence will be encrypted - word_lenght: + word_length: the number of segments into which the subsequence will be divided by the paa algorithm """ self.num_anomalies = num_anomalies - self.anomaly_lenght = anomaly_lenght + self.anomaly_length = anomaly_length self.alphabet_size = alphabet_size - self.word_lenght = word_lenght + self.word_length = word_length super().__init__(in_column=in_column) def detect_outliers(self, ts: TSDataset) -> Dict[str, List[pd.Timestamp]]: @@ -268,9 +268,9 @@ def detect_outliers(self, ts: TSDataset) -> Dict[str, List[pd.Timestamp]]: ts=ts, in_column=self.in_column, num_anomalies=self.num_anomalies, - anomaly_lenght=self.anomaly_lenght, + anomaly_length=self.anomaly_length, alphabet_size=self.alphabet_size, - word_lenght=self.word_lenght, + word_length=self.word_length, ) diff --git a/examples/EDA.ipynb b/examples/EDA.ipynb index c328797c8..25f536212 100644 --- a/examples/EDA.ipynb +++ b/examples/EDA.ipynb @@ -952,7 +952,7 @@ } ], "source": [ - "anomaly_seq_dict = get_sequence_anomalies(ts, num_anomalies = 3, anomaly_lenght = 20)\n", + "anomaly_seq_dict = get_sequence_anomalies(ts, num_anomalies = 3, anomaly_length = 20)\n", "plot_anomalies(ts, anomaly_seq_dict)" ] }, diff --git a/examples/outliers.ipynb b/examples/outliers.ipynb index 4ce5e53a6..0775a3381 100644 --- a/examples/outliers.ipynb +++ b/examples/outliers.ipynb @@ -402,7 +402,7 @@ } ], "source": [ - "anomaly_seq_dict = get_sequence_anomalies(ts, num_anomalies = 3, anomaly_lenght = 20)\n", + "anomaly_seq_dict = get_sequence_anomalies(ts, num_anomalies = 3, anomaly_length = 20)\n", "plot_anomalies(ts, anomaly_seq_dict)" ] }, diff --git a/tests/test_analysis/test_outliers/test_sequence_anomalies.py b/tests/test_analysis/test_outliers/test_sequence_anomalies.py index 808d387cc..25e2271ad 100644 --- a/tests/test_analysis/test_outliers/test_sequence_anomalies.py +++ b/tests/test_analysis/test_outliers/test_sequence_anomalies.py @@ -12,13 +12,13 @@ @pytest.fixture def test_sequence_anomalies_interface(outliers_tsds: TSDataset): - lenght = 5 - anomaly_seq_dict = get_sequence_anomalies(ts=outliers_tsds, num_anomalies=1, anomaly_lenght=lenght) + length = 5 + anomaly_seq_dict = get_sequence_anomalies(ts=outliers_tsds, num_anomalies=1, anomaly_length=length) for segment in ["1", "2"]: assert segment in anomaly_seq_dict assert isinstance(anomaly_seq_dict[segment], list) - assert len(anomaly_seq_dict[segment]) == lenght + assert len(anomaly_seq_dict[segment]) == length for timestamp in anomaly_seq_dict[segment]: assert isinstance(timestamp, np.datetime64) @@ -33,11 +33,11 @@ def test_sequence_anomalies_interface(outliers_tsds: TSDataset): ) def test_segment_sequence_anomalies(arr: List[int], expected: List[Tuple[int, int]]): arr = np.array(arr) - anomaly_lenght = 3 + anomaly_length = 3 num_anomalies = len(expected) expected = sorted(expected) - result = get_segment_sequence_anomalies(series=arr, num_anomalies=num_anomalies, anomaly_lenght=3) + result = get_segment_sequence_anomalies(series=arr, num_anomalies=num_anomalies, anomaly_length=3) result = sorted(result) for idx in range(num_anomalies): assert (result[idx][0] == expected[idx][0]) and (result[idx][1] == expected[idx][1]) @@ -50,7 +50,7 @@ def test_sequence_anomalies(outliers_tsds: TSDataset): } delta = pd.to_timedelta(outliers_tsds.index.freq) expected = dict([(seg, np.arange(bounds[0], bounds[1], delta)) for seg, bounds in bounds_dict.items()]) - anomaly_seq_dict = get_sequence_anomalies(outliers_tsds, num_anomalies=1, anomaly_lenght=15) + anomaly_seq_dict = get_sequence_anomalies(outliers_tsds, num_anomalies=1, anomaly_length=15) for segment in expected: assert (anomaly_seq_dict[segment] == expected[segment]).all() @@ -58,7 +58,7 @@ def test_sequence_anomalies(outliers_tsds: TSDataset): def test_in_column(outliers_df_with_two_columns): outliers = get_sequence_anomalies( - ts=outliers_df_with_two_columns, num_anomalies=1, anomaly_lenght=4, in_column="feature" + ts=outliers_df_with_two_columns, num_anomalies=1, anomaly_length=4, in_column="feature" ) delta = pd.to_timedelta(outliers_df_with_two_columns.index.freq) From 2e74098b6591c2d1acdefe17df0466486a6ff5c6 Mon Sep 17 00:00:00 2001 From: alex-hse-repository Date: Tue, 19 Oct 2021 10:23:47 +0300 Subject: [PATCH 2/3] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f99f36a91..d9a370de0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -53,6 +53,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Categorical and fillna issues with pandas >=1.2 ([#190](https://github.com/tinkoff-ai/etna-ts/pull/190)) - Fix `TSDataset.to_dataset` method sorting bug ([#183](https://github.com/tinkoff-ai/etna-ts/pull/183)) - Undefinded behaviour of DataFrame.loc[:, pd.IndexSlice[:, ["a", "b"]]] between 1.1.* and >= 1.2 ([#188](https://github.com/tinkoff-ai/etna-ts/pull/188)) +- Fix typo in word "length" ([#212](https://github.com/tinkoff-ai/etna-ts/pull/212)) ## [1.1.3] - 2021-10-08 ### Fixed From 6077f1c30501a736cdbf04452420e29342acfb44 Mon Sep 17 00:00:00 2001 From: alex-hse-repository Date: Tue, 19 Oct 2021 12:23:44 +0300 Subject: [PATCH 3/3] Update CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d9a370de0..c7b9a3a98 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -53,7 +53,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Categorical and fillna issues with pandas >=1.2 ([#190](https://github.com/tinkoff-ai/etna-ts/pull/190)) - Fix `TSDataset.to_dataset` method sorting bug ([#183](https://github.com/tinkoff-ai/etna-ts/pull/183)) - Undefinded behaviour of DataFrame.loc[:, pd.IndexSlice[:, ["a", "b"]]] between 1.1.* and >= 1.2 ([#188](https://github.com/tinkoff-ai/etna-ts/pull/188)) -- Fix typo in word "length" ([#212](https://github.com/tinkoff-ai/etna-ts/pull/212)) +- Fix typo in word "length" in `get_segment_sequence_anomalies`,`get_sequence_anomalies`,`SAXOutliersTransform` arguments ([#212](https://github.com/tinkoff-ai/etna-ts/pull/212)) ## [1.1.3] - 2021-10-08 ### Fixed