Skip to content

Fix typo in 'length' #212

Merged
merged 3 commits into from
Oct 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Categorical and fillna issues with pandas >=1.2 ([#190](https://github.com/tinkoff-ai/etna-ts/pull/190))
- Fix `TSDataset.to_dataset` method sorting bug ([#183](https://github.com/tinkoff-ai/etna-ts/pull/183))
- Undefinded behaviour of DataFrame.loc[:, pd.IndexSlice[:, ["a", "b"]]] between 1.1.* and >= 1.2 ([#188](https://github.com/tinkoff-ai/etna-ts/pull/188))
- Fix typo in word "length" in `get_segment_sequence_anomalies`,`get_sequence_anomalies`,`SAXOutliersTransform` arguments ([#212](https://github.com/tinkoff-ai/etna-ts/pull/212))

## [1.1.3] - 2021-10-08
### Fixed
Expand Down
26 changes: 13 additions & 13 deletions etna/analysis/outliers/sequence_outliers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@


def get_segment_sequence_anomalies(
series: np.ndarray, num_anomalies: int = 1, anomaly_lenght: int = 15, alphabet_size: int = 3, word_lenght: int = 3
series: np.ndarray, num_anomalies: int = 1, anomaly_length: int = 15, alphabet_size: int = 3, word_length: int = 3
) -> List[Tuple[int, int]]:
"""
Get indices of start and end of sequence outliers for one segment using SAX HOT algorithm.
Expand All @@ -24,22 +24,22 @@ def get_segment_sequence_anomalies(
array to find outliers in
num_anomalies:
number of outliers to be found
anomaly_lenght:
target lenght of outliers
anomaly_length:
target length of outliers
alphabet_size:
the number of letters with which the subsequence will be encrypted
word_lenght:
word_length:
the number of segments into which the subsequence will be divided by the paa algorithm

Returns
-------
list of tuples with start and end of outliers.
"""
start_points = find_discords_hotsax(
series=series, win_size=anomaly_lenght, num_discords=num_anomalies, a_size=alphabet_size, paa_size=word_lenght
series=series, win_size=anomaly_length, num_discords=num_anomalies, a_size=alphabet_size, paa_size=word_length
)

result = [(pt[0], pt[0] + anomaly_lenght) for pt in start_points]
result = [(pt[0], pt[0] + anomaly_length) for pt in start_points]

return result

Expand All @@ -48,9 +48,9 @@ def get_sequence_anomalies(
ts: "TSDataset",
in_column: str = "target",
num_anomalies: int = 1,
anomaly_lenght: int = 15,
anomaly_length: int = 15,
alphabet_size: int = 3,
word_lenght: int = 3,
word_length: int = 3,
) -> Dict[str, List[pd.Timestamp]]:
"""
Find the start and end of the sequence outliers for each segment using the SAX HOT algorithm.
Expand All @@ -65,11 +65,11 @@ def get_sequence_anomalies(
name of the column in which the anomaly is searching
num_anomalies:
number of outliers to be found
anomaly_lenght:
target lenght of outliers
anomaly_length:
target length of outliers
alphabet_size:
the number of letters with which the subsequence will be encrypted
word_lenght:
word_length:
the number of segments into which the subsequence will be divided by the paa algorithm

Returns
Expand All @@ -92,9 +92,9 @@ def get_sequence_anomalies(
outliers_idxs = get_segment_sequence_anomalies(
series=segment_df[in_column].values,
num_anomalies=num_anomalies,
anomaly_lenght=anomaly_lenght,
anomaly_length=anomaly_length,
alphabet_size=alphabet_size,
word_lenght=word_lenght,
word_length=word_length,
)

timestamps = segment_df["timestamp"].values
Expand Down
2 changes: 1 addition & 1 deletion etna/models/nn/deepar.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def forecast(self, ts: TSDataset) -> TSDataset:
)

predicts = self.model.predict(prediction_dataloader).numpy() # type: ignore
# shape (segments, encoder_lenght)
# shape (segments, encoder_length)

ts.loc[:, pd.IndexSlice[:, "target"]] = predicts.T[-len(ts.df) :]
return ts
2 changes: 1 addition & 1 deletion etna/models/nn/tft.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def forecast(self, ts: TSDataset) -> TSDataset:
)

predicts = self.model.predict(prediction_dataloader).numpy() # type: ignore
# shape (segments, encoder_lenght)
# shape (segments, encoder_length)

ts.loc[:, pd.IndexSlice[:, "target"]] = predicts.T[-len(ts.df) :]
return ts
18 changes: 9 additions & 9 deletions etna/transforms/outliers.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,9 +226,9 @@ def __init__(
self,
in_column: str,
num_anomalies: int = 1,
anomaly_lenght: int = 15,
anomaly_length: int = 15,
alphabet_size: int = 3,
word_lenght: int = 3,
word_length: int = 3,
):
"""Create instance of SAXOutliersTransform.

Expand All @@ -238,17 +238,17 @@ def __init__(
name of processed column
num_anomalies:
number of outliers to be found
anomaly_lenght:
target lenght of outliers
anomaly_length:
target length of outliers
alphabet_size:
the number of letters with which the subsequence will be encrypted
word_lenght:
word_length:
the number of segments into which the subsequence will be divided by the paa algorithm
"""
self.num_anomalies = num_anomalies
self.anomaly_lenght = anomaly_lenght
self.anomaly_length = anomaly_length
self.alphabet_size = alphabet_size
self.word_lenght = word_lenght
self.word_length = word_length
super().__init__(in_column=in_column)

def detect_outliers(self, ts: TSDataset) -> Dict[str, List[pd.Timestamp]]:
Expand All @@ -268,9 +268,9 @@ def detect_outliers(self, ts: TSDataset) -> Dict[str, List[pd.Timestamp]]:
ts=ts,
in_column=self.in_column,
num_anomalies=self.num_anomalies,
anomaly_lenght=self.anomaly_lenght,
anomaly_length=self.anomaly_length,
alphabet_size=self.alphabet_size,
word_lenght=self.word_lenght,
word_length=self.word_length,
)


Expand Down
2 changes: 1 addition & 1 deletion examples/EDA.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -952,7 +952,7 @@
}
],
"source": [
"anomaly_seq_dict = get_sequence_anomalies(ts, num_anomalies = 3, anomaly_lenght = 20)\n",
"anomaly_seq_dict = get_sequence_anomalies(ts, num_anomalies = 3, anomaly_length = 20)\n",
"plot_anomalies(ts, anomaly_seq_dict)"
]
},
Expand Down
2 changes: 1 addition & 1 deletion examples/outliers.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@
}
],
"source": [
"anomaly_seq_dict = get_sequence_anomalies(ts, num_anomalies = 3, anomaly_lenght = 20)\n",
"anomaly_seq_dict = get_sequence_anomalies(ts, num_anomalies = 3, anomaly_length = 20)\n",
"plot_anomalies(ts, anomaly_seq_dict)"
]
},
Expand Down
14 changes: 7 additions & 7 deletions tests/test_analysis/test_outliers/test_sequence_anomalies.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@

@pytest.fixture
def test_sequence_anomalies_interface(outliers_tsds: TSDataset):
lenght = 5
anomaly_seq_dict = get_sequence_anomalies(ts=outliers_tsds, num_anomalies=1, anomaly_lenght=lenght)
length = 5
anomaly_seq_dict = get_sequence_anomalies(ts=outliers_tsds, num_anomalies=1, anomaly_length=length)

for segment in ["1", "2"]:
assert segment in anomaly_seq_dict
assert isinstance(anomaly_seq_dict[segment], list)
assert len(anomaly_seq_dict[segment]) == lenght
assert len(anomaly_seq_dict[segment]) == length
for timestamp in anomaly_seq_dict[segment]:
assert isinstance(timestamp, np.datetime64)

Expand All @@ -33,11 +33,11 @@ def test_sequence_anomalies_interface(outliers_tsds: TSDataset):
)
def test_segment_sequence_anomalies(arr: List[int], expected: List[Tuple[int, int]]):
arr = np.array(arr)
anomaly_lenght = 3
anomaly_length = 3
num_anomalies = len(expected)
expected = sorted(expected)

result = get_segment_sequence_anomalies(series=arr, num_anomalies=num_anomalies, anomaly_lenght=3)
result = get_segment_sequence_anomalies(series=arr, num_anomalies=num_anomalies, anomaly_length=3)
result = sorted(result)
for idx in range(num_anomalies):
assert (result[idx][0] == expected[idx][0]) and (result[idx][1] == expected[idx][1])
Expand All @@ -50,15 +50,15 @@ def test_sequence_anomalies(outliers_tsds: TSDataset):
}
delta = pd.to_timedelta(outliers_tsds.index.freq)
expected = dict([(seg, np.arange(bounds[0], bounds[1], delta)) for seg, bounds in bounds_dict.items()])
anomaly_seq_dict = get_sequence_anomalies(outliers_tsds, num_anomalies=1, anomaly_lenght=15)
anomaly_seq_dict = get_sequence_anomalies(outliers_tsds, num_anomalies=1, anomaly_length=15)

for segment in expected:
assert (anomaly_seq_dict[segment] == expected[segment]).all()


def test_in_column(outliers_df_with_two_columns):
outliers = get_sequence_anomalies(
ts=outliers_df_with_two_columns, num_anomalies=1, anomaly_lenght=4, in_column="feature"
ts=outliers_df_with_two_columns, num_anomalies=1, anomaly_length=4, in_column="feature"
)
delta = pd.to_timedelta(outliers_df_with_two_columns.index.freq)

Expand Down