tinkoff-ai · julia-shenshina · Oct 19, 2021 · Oct 19, 2021 · Oct 19, 2021 · Oct 19, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -53,6 +53,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Categorical and fillna issues with pandas >=1.2 ([#190](https://github.com/tinkoff-ai/etna-ts/pull/190))
 - Fix `TSDataset.to_dataset` method sorting bug ([#183](https://github.com/tinkoff-ai/etna-ts/pull/183))
 - Undefinded behaviour of DataFrame.loc[:, pd.IndexSlice[:, ["a", "b"]]] between 1.1.* and >= 1.2 ([#188](https://github.com/tinkoff-ai/etna-ts/pull/188))
+- Fix typo in word "length" in `get_segment_sequence_anomalies`,`get_sequence_anomalies`,`SAXOutliersTransform` arguments ([#212](https://github.com/tinkoff-ai/etna-ts/pull/212))
 
 ## [1.1.3] - 2021-10-08
 ### Fixed

diff --git a/etna/analysis/outliers/sequence_outliers.py b/etna/analysis/outliers/sequence_outliers.py
@@ -13,7 +13,7 @@
 
 
 def get_segment_sequence_anomalies(
-    series: np.ndarray, num_anomalies: int = 1, anomaly_lenght: int = 15, alphabet_size: int = 3, word_lenght: int = 3
+    series: np.ndarray, num_anomalies: int = 1, anomaly_length: int = 15, alphabet_size: int = 3, word_length: int = 3
 ) -> List[Tuple[int, int]]:
     """
     Get indices of start and end of sequence outliers for one segment using SAX HOT algorithm.
@@ -24,22 +24,22 @@ def get_segment_sequence_anomalies(
         array to find outliers in
     num_anomalies:
         number of outliers to be found
-    anomaly_lenght:
-        target lenght of outliers
+    anomaly_length:
+        target length of outliers
     alphabet_size:
         the number of letters with which the subsequence will be encrypted
-    word_lenght:
+    word_length:
         the number of segments into which the subsequence will be divided by the paa algorithm
 
     Returns
     -------
     list of tuples with start and end of outliers.
     """
     start_points = find_discords_hotsax(
-        series=series, win_size=anomaly_lenght, num_discords=num_anomalies, a_size=alphabet_size, paa_size=word_lenght
+        series=series, win_size=anomaly_length, num_discords=num_anomalies, a_size=alphabet_size, paa_size=word_length
     )
 
-    result = [(pt[0], pt[0] + anomaly_lenght) for pt in start_points]
+    result = [(pt[0], pt[0] + anomaly_length) for pt in start_points]
 
     return result
 
@@ -48,9 +48,9 @@ def get_sequence_anomalies(
     ts: "TSDataset",
     in_column: str = "target",
     num_anomalies: int = 1,
-    anomaly_lenght: int = 15,
+    anomaly_length: int = 15,
     alphabet_size: int = 3,
-    word_lenght: int = 3,
+    word_length: int = 3,
 ) -> Dict[str, List[pd.Timestamp]]:
     """
     Find the start and end of the sequence outliers for each segment using the SAX HOT algorithm.
@@ -65,11 +65,11 @@ def get_sequence_anomalies(
         name of the column in which the anomaly is searching
     num_anomalies:
         number of outliers to be found
-    anomaly_lenght:
-        target lenght of outliers
+    anomaly_length:
+        target length of outliers
     alphabet_size:
         the number of letters with which the subsequence will be encrypted
-    word_lenght:
+    word_length:
         the number of segments into which the subsequence will be divided by the paa algorithm
 
     Returns
@@ -92,9 +92,9 @@ def get_sequence_anomalies(
         outliers_idxs = get_segment_sequence_anomalies(
             series=segment_df[in_column].values,
             num_anomalies=num_anomalies,
-            anomaly_lenght=anomaly_lenght,
+            anomaly_length=anomaly_length,
             alphabet_size=alphabet_size,
-            word_lenght=word_lenght,
+            word_length=word_length,
         )
 
         timestamps = segment_df["timestamp"].values

diff --git a/etna/models/nn/deepar.py b/etna/models/nn/deepar.py
@@ -156,7 +156,7 @@ def forecast(self, ts: TSDataset) -> TSDataset:
         )
 
         predicts = self.model.predict(prediction_dataloader).numpy()  # type: ignore
-        # shape (segments, encoder_lenght)
+        # shape (segments, encoder_length)
 
         ts.loc[:, pd.IndexSlice[:, "target"]] = predicts.T[-len(ts.df) :]
         return ts
diff --git a/etna/models/nn/tft.py b/etna/models/nn/tft.py
@@ -163,7 +163,7 @@ def forecast(self, ts: TSDataset) -> TSDataset:
         )
 
         predicts = self.model.predict(prediction_dataloader).numpy()  # type: ignore
-        # shape (segments, encoder_lenght)
+        # shape (segments, encoder_length)
 
         ts.loc[:, pd.IndexSlice[:, "target"]] = predicts.T[-len(ts.df) :]
         return ts
diff --git a/etna/transforms/outliers.py b/etna/transforms/outliers.py
@@ -226,9 +226,9 @@ def __init__(
         self,
         in_column: str,
         num_anomalies: int = 1,
-        anomaly_lenght: int = 15,
+        anomaly_length: int = 15,
         alphabet_size: int = 3,
-        word_lenght: int = 3,
+        word_length: int = 3,
     ):
         """Create instance of SAXOutliersTransform.
 
@@ -238,17 +238,17 @@ def __init__(
             name of processed column
         num_anomalies:
             number of outliers to be found
-        anomaly_lenght:
-            target lenght of outliers
+        anomaly_length:
+            target length of outliers
         alphabet_size:
             the number of letters with which the subsequence will be encrypted
-        word_lenght:
+        word_length:
             the number of segments into which the subsequence will be divided by the paa algorithm
         """
         self.num_anomalies = num_anomalies
-        self.anomaly_lenght = anomaly_lenght
+        self.anomaly_length = anomaly_length
         self.alphabet_size = alphabet_size
-        self.word_lenght = word_lenght
+        self.word_length = word_length
         super().__init__(in_column=in_column)
 
     def detect_outliers(self, ts: TSDataset) -> Dict[str, List[pd.Timestamp]]:
@@ -268,9 +268,9 @@ def detect_outliers(self, ts: TSDataset) -> Dict[str, List[pd.Timestamp]]:
             ts=ts,
             in_column=self.in_column,
             num_anomalies=self.num_anomalies,
-            anomaly_lenght=self.anomaly_lenght,
+            anomaly_length=self.anomaly_length,
             alphabet_size=self.alphabet_size,
-            word_lenght=self.word_lenght,
+            word_length=self.word_length,
         )
 
 

diff --git a/examples/EDA.ipynb b/examples/EDA.ipynb
@@ -952,7 +952,7 @@
     }
    ],
    "source": [
-    "anomaly_seq_dict = get_sequence_anomalies(ts, num_anomalies = 3, anomaly_lenght = 20)\n",
+    "anomaly_seq_dict = get_sequence_anomalies(ts, num_anomalies = 3, anomaly_length = 20)\n",
     "plot_anomalies(ts, anomaly_seq_dict)"
    ]
   },

diff --git a/examples/outliers.ipynb b/examples/outliers.ipynb
@@ -402,7 +402,7 @@
     }
    ],
    "source": [
-    "anomaly_seq_dict = get_sequence_anomalies(ts, num_anomalies = 3, anomaly_lenght = 20)\n",
+    "anomaly_seq_dict = get_sequence_anomalies(ts, num_anomalies = 3, anomaly_length = 20)\n",
     "plot_anomalies(ts, anomaly_seq_dict)"
    ]
   },

diff --git a/tests/test_analysis/test_outliers/test_sequence_anomalies.py b/tests/test_analysis/test_outliers/test_sequence_anomalies.py
@@ -12,13 +12,13 @@
 
 @pytest.fixture
 def test_sequence_anomalies_interface(outliers_tsds: TSDataset):
-    lenght = 5
-    anomaly_seq_dict = get_sequence_anomalies(ts=outliers_tsds, num_anomalies=1, anomaly_lenght=lenght)
+    length = 5
+    anomaly_seq_dict = get_sequence_anomalies(ts=outliers_tsds, num_anomalies=1, anomaly_length=length)
 
     for segment in ["1", "2"]:
         assert segment in anomaly_seq_dict
         assert isinstance(anomaly_seq_dict[segment], list)
-        assert len(anomaly_seq_dict[segment]) == lenght
+        assert len(anomaly_seq_dict[segment]) == length
         for timestamp in anomaly_seq_dict[segment]:
             assert isinstance(timestamp, np.datetime64)
 
@@ -33,11 +33,11 @@ def test_sequence_anomalies_interface(outliers_tsds: TSDataset):
 )
 def test_segment_sequence_anomalies(arr: List[int], expected: List[Tuple[int, int]]):
     arr = np.array(arr)
-    anomaly_lenght = 3
+    anomaly_length = 3
     num_anomalies = len(expected)
     expected = sorted(expected)
 
-    result = get_segment_sequence_anomalies(series=arr, num_anomalies=num_anomalies, anomaly_lenght=3)
+    result = get_segment_sequence_anomalies(series=arr, num_anomalies=num_anomalies, anomaly_length=3)
     result = sorted(result)
     for idx in range(num_anomalies):
         assert (result[idx][0] == expected[idx][0]) and (result[idx][1] == expected[idx][1])
@@ -50,15 +50,15 @@ def test_sequence_anomalies(outliers_tsds: TSDataset):
     }
     delta = pd.to_timedelta(outliers_tsds.index.freq)
     expected = dict([(seg, np.arange(bounds[0], bounds[1], delta)) for seg, bounds in bounds_dict.items()])
-    anomaly_seq_dict = get_sequence_anomalies(outliers_tsds, num_anomalies=1, anomaly_lenght=15)
+    anomaly_seq_dict = get_sequence_anomalies(outliers_tsds, num_anomalies=1, anomaly_length=15)
 
     for segment in expected:
         assert (anomaly_seq_dict[segment] == expected[segment]).all()
 
 
 def test_in_column(outliers_df_with_two_columns):
     outliers = get_sequence_anomalies(
-        ts=outliers_df_with_two_columns, num_anomalies=1, anomaly_lenght=4, in_column="feature"
+        ts=outliers_df_with_two_columns, num_anomalies=1, anomaly_length=4, in_column="feature"
     )
     delta = pd.to_timedelta(outliers_df_with_two_columns.index.freq)