-
-
Notifications
You must be signed in to change notification settings - Fork 4.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ref(alerts): Update rule status on dynamic alert rule update #78143
Changes from all commits
b35b364
0b3b82b
2149f54
3cf28e1
41da948
4a13d7f
25d46bd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -908,7 +908,7 @@ def update_alert_rule( | |
elif detection_type == AlertRuleDetectionType.DYNAMIC: | ||
# NOTE: we set seasonality for EA | ||
updated_query_fields["resolution"] = timedelta( | ||
minutes=time_window if time_window is not None else snuba_query.time_window | ||
seconds=time_window if time_window is not None else snuba_query.time_window | ||
) | ||
updated_fields["seasonality"] = AlertRuleSeasonality.AUTO | ||
updated_fields["comparison_delta"] = None | ||
|
@@ -936,22 +936,37 @@ def update_alert_rule( | |
raise ResourceDoesNotExist( | ||
"Your organization does not have access to this feature." | ||
) | ||
|
||
if updated_fields.get("detection_type") == AlertRuleDetectionType.DYNAMIC and ( | ||
alert_rule.detection_type != AlertRuleDetectionType.DYNAMIC or query or aggregate | ||
): | ||
for k, v in updated_fields.items(): | ||
setattr(alert_rule, k, v) | ||
|
||
for k, v in updated_query_fields.items(): | ||
if k == "dataset": | ||
v = v.value | ||
elif k == "time_window": | ||
v = time_window if time_window else snuba_query.time_window | ||
elif k == "event_types": | ||
continue | ||
setattr(alert_rule.snuba_query, k, v) | ||
|
||
try: | ||
# NOTE: if adding a new metric alert type, take care to check that it's handled here | ||
rule_status = send_historical_data_to_seer( | ||
alert_rule=alert_rule, | ||
project=projects[0] if projects else alert_rule.projects.get(), | ||
snuba_query=alert_rule.snuba_query, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. since we fetch the snuba query from the db in |
||
event_types=updated_query_fields.get("event_types"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
) | ||
if rule_status == AlertRuleStatus.NOT_ENOUGH_DATA: | ||
# if we don't have at least seven days worth of data, then the dynamic alert won't fire | ||
alert_rule.update(status=AlertRuleStatus.NOT_ENOUGH_DATA.value) | ||
elif ( | ||
rule_status == AlertRuleStatus.PENDING | ||
and alert_rule.status != AlertRuleStatus.PENDING | ||
): | ||
alert_rule.update(status=AlertRuleStatus.PENDING.value) | ||
except (TimeoutError, MaxRetryError): | ||
raise TimeoutError("Failed to send data to Seer - cannot update alert rule.") | ||
except ParseError: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,11 +21,11 @@ | |
from sentry.seer.anomaly_detection.utils import ( | ||
fetch_historical_data, | ||
format_historical_data, | ||
get_dataset_from_label, | ||
translate_direction, | ||
) | ||
from sentry.seer.signed_seer_api import make_signed_seer_api_request | ||
from sentry.snuba.models import SnubaQuery | ||
from sentry.snuba.utils import get_dataset | ||
from sentry.snuba.models import SnubaQuery, SnubaQueryEventType | ||
from sentry.utils import json | ||
from sentry.utils.json import JSONDecodeError | ||
|
||
|
@@ -35,7 +35,7 @@ | |
settings.SEER_ANOMALY_DETECTION_URL, | ||
timeout=settings.SEER_ANOMALY_DETECTION_TIMEOUT, | ||
) | ||
NUM_DAYS = 28 | ||
MIN_DAYS = 7 | ||
|
||
|
||
def _get_start_and_end_indices(data: list[TimeSeriesPoint]) -> tuple[int, int]: | ||
|
@@ -57,16 +57,28 @@ def _get_start_and_end_indices(data: list[TimeSeriesPoint]) -> tuple[int, int]: | |
return start, end | ||
|
||
|
||
def send_historical_data_to_seer(alert_rule: AlertRule, project: Project) -> AlertRuleStatus: | ||
def send_historical_data_to_seer( | ||
alert_rule: AlertRule, | ||
project: Project, | ||
snuba_query=None, | ||
event_types: list[SnubaQueryEventType.EventType] | None = None, | ||
) -> AlertRuleStatus: | ||
""" | ||
Get 28 days of historical data and pass it to Seer to be used for prediction anomalies on the alert. | ||
""" | ||
snuba_query = SnubaQuery.objects.get(id=alert_rule.snuba_query_id) | ||
if not snuba_query: | ||
snuba_query = SnubaQuery.objects.get(id=alert_rule.snuba_query_id) | ||
window_min = int(snuba_query.time_window / 60) | ||
dataset = get_dataset(snuba_query.dataset) | ||
dataset = get_dataset_from_label(snuba_query.dataset) | ||
query_columns = get_query_columns([snuba_query.aggregate], snuba_query.time_window) | ||
if not event_types: | ||
event_types = snuba_query.event_types | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes a snuba query will always have event types on it, but I changed this function to accept both |
||
historical_data = fetch_historical_data( | ||
alert_rule=alert_rule, snuba_query=snuba_query, query_columns=query_columns, project=project | ||
alert_rule=alert_rule, | ||
snuba_query=snuba_query, | ||
query_columns=query_columns, | ||
project=project, | ||
event_types=event_types, | ||
) | ||
|
||
if not historical_data: | ||
|
@@ -182,7 +194,6 @@ def send_historical_data_to_seer(alert_rule: AlertRule, project: Project) -> Ale | |
) | ||
raise Exception(message) | ||
|
||
MIN_DAYS = 7 | ||
data_start_index, data_end_index = _get_start_and_end_indices(formatted_data) | ||
if data_start_index == -1: | ||
return AlertRuleStatus.NOT_ENOUGH_DATA | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -101,7 +101,7 @@ | |
from sentry.snuba.dataset import Dataset | ||
from sentry.snuba.models import QuerySubscription, SnubaQuery, SnubaQueryEventType | ||
from sentry.testutils.cases import BaseIncidentsTest, BaseMetricsTestCase, TestCase | ||
from sentry.testutils.helpers.datetime import before_now, freeze_time | ||
from sentry.testutils.helpers.datetime import before_now, freeze_time, iso_format | ||
from sentry.testutils.helpers.features import with_feature | ||
from sentry.testutils.helpers.options import override_options | ||
from sentry.testutils.silo import assume_test_silo_mode, assume_test_silo_mode_of | ||
|
@@ -1751,6 +1751,119 @@ def test_update_alert_rule_static_to_dynamic_enough_data(self, mock_seer_request | |
assert mock_seer_request.call_count == 1 | ||
assert alert_rule.status == AlertRuleStatus.PENDING.value | ||
|
||
@with_feature("organizations:anomaly-detection-alerts") | ||
@patch( | ||
"sentry.seer.anomaly_detection.store_data.seer_anomaly_detection_connection_pool.urlopen" | ||
) | ||
def test_update_dynamic_alert_not_enough_to_pending(self, mock_seer_request): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. note to self - this is freezing time - it's just pretty hidden in this diff. |
||
""" | ||
Update a dynamic rule's aggregate so the rule's status changes from not enough data to enough/pending | ||
""" | ||
seer_return_value: StoreDataResponse = {"success": True} | ||
mock_seer_request.return_value = HTTPResponse(orjson.dumps(seer_return_value), status=200) | ||
|
||
dynamic_rule = self.create_alert_rule( | ||
sensitivity=AlertRuleSensitivity.HIGH, | ||
seasonality=AlertRuleSeasonality.AUTO, | ||
time_window=60, | ||
detection_type=AlertRuleDetectionType.DYNAMIC, | ||
) | ||
assert mock_seer_request.call_count == 1 | ||
assert dynamic_rule.status == AlertRuleStatus.NOT_ENOUGH_DATA.value | ||
mock_seer_request.reset_mock() | ||
|
||
two_weeks_ago = before_now(days=14).replace(hour=10, minute=0, second=0, microsecond=0) | ||
with self.options({"issues.group_attributes.send_kafka": True}): | ||
self.store_event( | ||
data={ | ||
"event_id": "a" * 32, | ||
"message": "super bad", | ||
"timestamp": iso_format(two_weeks_ago + timedelta(minutes=1)), | ||
"tags": {"sentry:user": self.user.email}, | ||
"exception": [{"value": "BadError"}], | ||
}, | ||
project_id=self.project.id, | ||
) | ||
self.store_event( | ||
data={ | ||
"event_id": "a" * 32, | ||
"message": "super bad", | ||
"timestamp": iso_format(two_weeks_ago + timedelta(days=10)), # 4 days ago | ||
"tags": {"sentry:user": self.user.email}, | ||
"exception": [{"value": "BadError"}], | ||
}, | ||
project_id=self.project.id, | ||
) | ||
# update aggregate | ||
update_alert_rule( | ||
dynamic_rule, | ||
aggregate="count_unique(user)", | ||
time_window=60, | ||
detection_type=AlertRuleDetectionType.DYNAMIC, | ||
) | ||
assert mock_seer_request.call_count == 1 | ||
assert dynamic_rule.status == AlertRuleStatus.PENDING.value | ||
|
||
@with_feature("organizations:anomaly-detection-alerts") | ||
@patch( | ||
"sentry.seer.anomaly_detection.store_data.seer_anomaly_detection_connection_pool.urlopen" | ||
) | ||
def test_update_dynamic_alert_pending_to_not_enough(self, mock_seer_request): | ||
""" | ||
Update a dynamic rule's aggregate so the rule's status changes from enough/pending to not enough data | ||
""" | ||
seer_return_value: StoreDataResponse = {"success": True} | ||
mock_seer_request.return_value = HTTPResponse(orjson.dumps(seer_return_value), status=200) | ||
|
||
two_weeks_ago = before_now(days=14).replace(hour=10, minute=0, second=0, microsecond=0) | ||
with self.options({"issues.group_attributes.send_kafka": True}): | ||
self.store_event( | ||
data={ | ||
"event_id": "b" * 32, | ||
"message": "super bad", | ||
"timestamp": iso_format(two_weeks_ago + timedelta(minutes=1)), | ||
"fingerprint": ["group2"], | ||
"tags": {"sentry:user": self.user.email}, | ||
"exception": [{"value": "BadError"}], | ||
}, | ||
project_id=self.project.id, | ||
) | ||
self.store_event( | ||
data={ | ||
"event_id": "b" * 32, | ||
"message": "super bad", | ||
"timestamp": iso_format(two_weeks_ago + timedelta(days=10)), # 4 days ago | ||
"fingerprint": ["group2"], | ||
"tags": {"sentry:user": self.user.email}, | ||
"exception": [{"value": "BadError"}], | ||
}, | ||
project_id=self.project.id, | ||
) | ||
|
||
dynamic_rule = self.create_alert_rule( | ||
sensitivity=AlertRuleSensitivity.HIGH, | ||
seasonality=AlertRuleSeasonality.AUTO, | ||
time_window=60, | ||
detection_type=AlertRuleDetectionType.DYNAMIC, | ||
) | ||
assert mock_seer_request.call_count == 1 | ||
assert dynamic_rule.status == AlertRuleStatus.PENDING.value | ||
|
||
mock_seer_request.reset_mock() | ||
|
||
# update aggregate | ||
update_alert_rule( | ||
dynamic_rule, | ||
aggregate="p95(measurements.fid)", # first input delay data we don't have stored | ||
dataset=Dataset.Transactions, | ||
event_types=[SnubaQueryEventType.EventType.TRANSACTION], | ||
query="", | ||
# time_window=60, | ||
detection_type=AlertRuleDetectionType.DYNAMIC, | ||
) | ||
assert mock_seer_request.call_count == 1 | ||
assert dynamic_rule.status == AlertRuleStatus.NOT_ENOUGH_DATA.value | ||
|
||
@with_feature("organizations:anomaly-detection-alerts") | ||
@patch( | ||
"sentry.seer.anomaly_detection.store_data.seer_anomaly_detection_connection_pool.urlopen" | ||
|
@@ -1830,6 +1943,7 @@ def test_update_alert_rule_anomaly_detection_seer_timeout_max_retry( | |
seasonality=AlertRuleSeasonality.AUTO, | ||
time_window=60, | ||
detection_type=AlertRuleDetectionType.DYNAMIC, | ||
name="my rule", | ||
) | ||
assert mock_seer_request.call_count == 1 | ||
mock_seer_request.reset_mock() | ||
|
@@ -1845,6 +1959,7 @@ def test_update_alert_rule_anomaly_detection_seer_timeout_max_retry( | |
detection_type=AlertRuleDetectionType.DYNAMIC, | ||
sensitivity=AlertRuleSensitivity.HIGH, | ||
seasonality=AlertRuleSeasonality.AUTO, | ||
name="your rule", | ||
) | ||
|
||
assert mock_logger.warning.call_count == 1 | ||
|
@@ -1865,10 +1980,17 @@ def test_update_alert_rule_anomaly_detection_seer_timeout_max_retry( | |
detection_type=AlertRuleDetectionType.DYNAMIC, | ||
sensitivity=AlertRuleSensitivity.HIGH, | ||
seasonality=AlertRuleSeasonality.AUTO, | ||
name="hellboy's rule", | ||
) | ||
|
||
assert mock_logger.warning.call_count == 1 | ||
assert mock_seer_request.call_count == 1 | ||
# make sure the rule wasn't updated - need to refetch | ||
fresh_dynamic_rule = AlertRule.objects.get(id=dynamic_rule.id) | ||
assert fresh_dynamic_rule.name == "my rule" | ||
assert fresh_dynamic_rule.snuba_query | ||
assert fresh_dynamic_rule.snuba_query.time_window == 60 * 60 | ||
assert fresh_dynamic_rule.snuba_query.query == "level:error" | ||
|
||
@with_feature("organizations:anomaly-detection-alerts") | ||
@patch( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
any snuba query updates were previously not getting passed to the Seer code until this was added
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's the cases that we'll execute this code?
feels a little confusing to me, if it's a dynamic alert - and - it's not a dynamic alert or query or aggregate? could we simplify this check at all or maybe nest them for clarity? If we can nest it, should we merge this code: https://github.com/getsentry/sentry/pull/78143/files#diff-54ef12aa467e9e032fcdeb5d98079424d01fb28db982938d9e8db849ae78dd5eR1012 into that check?
I'm trying to think about how this method is quite large, and ways we can decompose it into smaller methods. it seems like we could use the alert type as a switch statement and handle different chunks accordingly. (fwiw, i think decomposing this whole method is out of scope, but i think we should try to limit adding to this method and slowly improve it over time)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That check is saying "if the rule type is changing to become a dynamic type OR it's already dynamic and the update changes either the query or the aggregate..." then we need to refetch the historical data and send it to Seer because either it wasn't a dynamic type before (so Seer wouldn't have that data) or the snuba query has changed so the historical data has changed