From 616367ab32893af9dd88c6df21858e4a34939e63 Mon Sep 17 00:00:00 2001 From: Takashi Matsuo Date: Wed, 8 Apr 2020 11:43:08 -0700 Subject: [PATCH 01/11] [monitoring] testing: mitigate 409 conflicts fixes #2971 --- .../api/v3/alerts-client/snippets_test.py | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/monitoring/api/v3/alerts-client/snippets_test.py b/monitoring/api/v3/alerts-client/snippets_test.py index 1980601cdfa3..30f189252168 100644 --- a/monitoring/api/v3/alerts-client/snippets_test.py +++ b/monitoring/api/v3/alerts-client/snippets_test.py @@ -16,6 +16,7 @@ import random import string +import time from google.api_core.exceptions import Aborted from google.cloud import monitoring_v3 @@ -102,15 +103,23 @@ def test_enable_alert_policies(capsys, pochan): def invoke_sample(val): snippets.enable_alert_policies(pochan.project_name, val) + # These sleep calls are for mitigating the following error: + # "409 Too many concurrent edits to the project configuration. + # Please try again." + # TODO(#3310): Having multiple projects will void these `sleep()` calls + time.sleep(2) invoke_sample(False) + time.sleep(2) invoke_sample(False) out, _ = capsys.readouterr() assert "already disabled" in out + time.sleep(2) invoke_sample(True) out, _ = capsys.readouterr() assert "Enabled {0}".format(pochan.project_name) in out + time.sleep(2) invoke_sample(True) out, _ = capsys.readouterr() assert "already enabled" in out @@ -126,6 +135,11 @@ def invoke_sample(): snippets.replace_notification_channels( pochan.project_name, alert_policy_id, [notification_channel_id]) + # This sleep call is for mitigating the following error: + # "409 Too many concurrent edits to the project configuration. + # Please try again." + # TODO(#3310): Having multiple projects will void this `sleep()` call. + time.sleep(2) invoke_sample() out, _ = capsys.readouterr() assert "Updated {0}".format(pochan.alert_policy.name) in out @@ -137,6 +151,11 @@ def test_backup_and_restore(capsys, pochan): def invoke_backup(): snippets.backup(pochan.project_name, 'backup.json') + # These sleep calls are for mitigating the following error: + # "409 Too many concurrent edits to the project configuration. + # Please try again." + # TODO(#3310): Having multiple projects will void these `sleep()` calls + time.sleep(2) invoke_backup() out, _ = capsys.readouterr() @@ -145,6 +164,7 @@ def invoke_backup(): def invoke_restore(): snippets.restore(pochan.project_name, 'backup.json') + time.sleep(2) invoke_restore() out, _ = capsys.readouterr() assert "Updated {0}".format(pochan.alert_policy.name) in out @@ -161,6 +181,11 @@ def invoke_delete(): snippets.delete_notification_channels( pochan.project_name, [notification_channel_id], force=True) + # This sleep call is for mitigating the following error: + # "409 Too many concurrent edits to the project configuration. + # Please try again." + # TODO(#3310): Having multiple projects will void this `sleep()` call. + time.sleep(2) invoke_delete() out, _ = capsys.readouterr() assert "{0} deleted".format(notification_channel_id) in out From c02328e98510b446e6f93e2f718598aaab1ec184 Mon Sep 17 00:00:00 2001 From: Takashi Matsuo Date: Wed, 8 Apr 2020 12:05:00 -0700 Subject: [PATCH 02/11] retry on ServiceUnavailable too --- monitoring/api/v3/alerts-client/snippets_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/monitoring/api/v3/alerts-client/snippets_test.py b/monitoring/api/v3/alerts-client/snippets_test.py index 30f189252168..00666f28eb8e 100644 --- a/monitoring/api/v3/alerts-client/snippets_test.py +++ b/monitoring/api/v3/alerts-client/snippets_test.py @@ -19,6 +19,7 @@ import time from google.api_core.exceptions import Aborted +from google.api_core.exceptions import ServiceUnavailable from google.cloud import monitoring_v3 import google.protobuf.json_format import pytest @@ -33,7 +34,7 @@ def random_name(length): def retry_if_aborted(exception): - return isinstance(exception, Aborted) + return isinstance(exception, (Aborted, ServiceUnavailable)) class PochanFixture: From abf1aa611e74380486dad975695e94cb6b9ee6a3 Mon Sep 17 00:00:00 2001 From: Takashi Matsuo Date: Wed, 8 Apr 2020 12:13:29 -0700 Subject: [PATCH 03/11] remove TODOs --- monitoring/api/v3/alerts-client/snippets_test.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/monitoring/api/v3/alerts-client/snippets_test.py b/monitoring/api/v3/alerts-client/snippets_test.py index 00666f28eb8e..ef23cf364591 100644 --- a/monitoring/api/v3/alerts-client/snippets_test.py +++ b/monitoring/api/v3/alerts-client/snippets_test.py @@ -107,7 +107,8 @@ def invoke_sample(val): # These sleep calls are for mitigating the following error: # "409 Too many concurrent edits to the project configuration. # Please try again." - # TODO(#3310): Having multiple projects will void these `sleep()` calls + # Having multiple projects will void these `sleep()` calls. + # See also #3310 time.sleep(2) invoke_sample(False) time.sleep(2) @@ -139,7 +140,8 @@ def invoke_sample(): # This sleep call is for mitigating the following error: # "409 Too many concurrent edits to the project configuration. # Please try again." - # TODO(#3310): Having multiple projects will void this `sleep()` call. + # Having multiple projects will void this `sleep()` call. + # See also #3310 time.sleep(2) invoke_sample() out, _ = capsys.readouterr() @@ -155,7 +157,8 @@ def invoke_backup(): # These sleep calls are for mitigating the following error: # "409 Too many concurrent edits to the project configuration. # Please try again." - # TODO(#3310): Having multiple projects will void these `sleep()` calls + # Having multiple projects will void this `sleep()` call. + # See also #3310 time.sleep(2) invoke_backup() out, _ = capsys.readouterr() @@ -185,7 +188,8 @@ def invoke_delete(): # This sleep call is for mitigating the following error: # "409 Too many concurrent edits to the project configuration. # Please try again." - # TODO(#3310): Having multiple projects will void this `sleep()` call. + # Having multiple projects will void these `sleep()` calls. + # See also #3310 time.sleep(2) invoke_delete() out, _ = capsys.readouterr() From 7cb80cdd16ce013b44b1e016843d048a8825af6a Mon Sep 17 00:00:00 2001 From: Takashi Matsuo Date: Wed, 8 Apr 2020 13:17:21 -0700 Subject: [PATCH 04/11] reduce the number of api calls --- monitoring/api/v3/alerts-client/snippets_test.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/monitoring/api/v3/alerts-client/snippets_test.py b/monitoring/api/v3/alerts-client/snippets_test.py index ef23cf364591..1ae793ca107c 100644 --- a/monitoring/api/v3/alerts-client/snippets_test.py +++ b/monitoring/api/v3/alerts-client/snippets_test.py @@ -109,22 +109,16 @@ def invoke_sample(val): # Please try again." # Having multiple projects will void these `sleep()` calls. # See also #3310 - time.sleep(2) - invoke_sample(False) - time.sleep(2) - invoke_sample(False) - out, _ = capsys.readouterr() - assert "already disabled" in out - time.sleep(2) invoke_sample(True) + out, _ = capsys.readouterr() assert "Enabled {0}".format(pochan.project_name) in out time.sleep(2) - invoke_sample(True) + invoke_sample(False) out, _ = capsys.readouterr() - assert "already enabled" in out + assert "Disabled {}".format(pochan.project_name) in out def test_replace_channels(capsys, pochan): From 925ebdf064b1867fb2579a0ed89c4f5749c228f6 Mon Sep 17 00:00:00 2001 From: Takashi Matsuo Date: Wed, 8 Apr 2020 13:44:26 -0700 Subject: [PATCH 05/11] mark tests as flaky instead of having retries --- .../v3/alerts-client/requirements-test.txt | 1 + .../api/v3/alerts-client/snippets_test.py | 57 +++++++------------ 2 files changed, 22 insertions(+), 36 deletions(-) diff --git a/monitoring/api/v3/alerts-client/requirements-test.txt b/monitoring/api/v3/alerts-client/requirements-test.txt index 826c2b7161c4..759a86bd8e6b 100644 --- a/monitoring/api/v3/alerts-client/requirements-test.txt +++ b/monitoring/api/v3/alerts-client/requirements-test.txt @@ -1,2 +1,3 @@ pytest==5.3.2 retrying==1.3.3 +flaky==3.6.1 diff --git a/monitoring/api/v3/alerts-client/snippets_test.py b/monitoring/api/v3/alerts-client/snippets_test.py index 1ae793ca107c..5c30b7977d37 100644 --- a/monitoring/api/v3/alerts-client/snippets_test.py +++ b/monitoring/api/v3/alerts-client/snippets_test.py @@ -37,6 +37,13 @@ def retry_if_aborted(exception): return isinstance(exception, (Aborted, ServiceUnavailable)) +def delay_on_aborted(err, *args): + if retry_if_aborted(err[0]): + time.sleep(2) + return True + return False + + class PochanFixture: """A test fixture that creates an alert POlicy and a notification CHANnel, hence the name, pochan. @@ -98,38 +105,28 @@ def test_list_alert_policies(capsys, pochan): assert pochan.alert_policy.display_name in out +@pytest.mark.flaky(rerun_filter=delay_on_aborted, max_runs=5) def test_enable_alert_policies(capsys, pochan): - @retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, - stop_max_attempt_number=5, retry_on_exception=retry_if_aborted) - def invoke_sample(val): - snippets.enable_alert_policies(pochan.project_name, val) - # These sleep calls are for mitigating the following error: # "409 Too many concurrent edits to the project configuration. # Please try again." # Having multiple projects will void these `sleep()` calls. # See also #3310 time.sleep(2) - invoke_sample(True) - + snippets.enable_alert_policies(pochan.project_name, True) out, _ = capsys.readouterr() assert "Enabled {0}".format(pochan.project_name) in out time.sleep(2) - invoke_sample(False) + snippets.enable_alert_policies(pochan.project_name, False) out, _ = capsys.readouterr() assert "Disabled {}".format(pochan.project_name) in out +@pytest.mark.flaky(rerun_filter=delay_on_aborted, max_runs=5) def test_replace_channels(capsys, pochan): - @retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, - stop_max_attempt_number=5, retry_on_exception=retry_if_aborted) - def invoke_sample(): - alert_policy_id = pochan.alert_policy.name.split('/')[-1] - notification_channel_id = pochan.notification_channel.name.split( - '/')[-1] - snippets.replace_notification_channels( - pochan.project_name, alert_policy_id, [notification_channel_id]) + alert_policy_id = pochan.alert_policy.name.split('/')[-1] + notification_channel_id = pochan.notification_channel.name.split('/')[-1] # This sleep call is for mitigating the following error: # "409 Too many concurrent edits to the project configuration. @@ -137,55 +134,43 @@ def invoke_sample(): # Having multiple projects will void this `sleep()` call. # See also #3310 time.sleep(2) - invoke_sample() + snippets.replace_notification_channels( + pochan.project_name, alert_policy_id, [notification_channel_id]) out, _ = capsys.readouterr() assert "Updated {0}".format(pochan.alert_policy.name) in out +@pytest.mark.flaky(rerun_filter=delay_on_aborted, max_runs=5) def test_backup_and_restore(capsys, pochan): - @retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, - stop_max_attempt_number=5, retry_on_exception=retry_if_aborted) - def invoke_backup(): - snippets.backup(pochan.project_name, 'backup.json') - # These sleep calls are for mitigating the following error: # "409 Too many concurrent edits to the project configuration. # Please try again." # Having multiple projects will void this `sleep()` call. # See also #3310 time.sleep(2) - invoke_backup() + snippets.backup(pochan.project_name, 'backup.json') out, _ = capsys.readouterr() - @retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, - stop_max_attempt_number=5, retry_on_exception=retry_if_aborted) - def invoke_restore(): - snippets.restore(pochan.project_name, 'backup.json') - time.sleep(2) - invoke_restore() + snippets.restore(pochan.project_name, 'backup.json') out, _ = capsys.readouterr() assert "Updated {0}".format(pochan.alert_policy.name) in out assert "Updating channel {0}".format( pochan.notification_channel.display_name) in out +@pytest.mark.flaky(rerun_filter=delay_on_aborted, max_runs=5) def test_delete_channels(capsys, pochan): notification_channel_id = pochan.notification_channel.name.split('/')[-1] - @retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, - stop_max_attempt_number=5, retry_on_exception=retry_if_aborted) - def invoke_delete(): - snippets.delete_notification_channels( - pochan.project_name, [notification_channel_id], force=True) - # This sleep call is for mitigating the following error: # "409 Too many concurrent edits to the project configuration. # Please try again." # Having multiple projects will void these `sleep()` calls. # See also #3310 time.sleep(2) - invoke_delete() + snippets.delete_notification_channels( + pochan.project_name, [notification_channel_id], force=True) out, _ = capsys.readouterr() assert "{0} deleted".format(notification_channel_id) in out pochan.notification_channel.name = '' # So teardown is not tried From 0a94813aa1feb095b5250a366891591fa1b6f47b Mon Sep 17 00:00:00 2001 From: Takashi Matsuo Date: Wed, 8 Apr 2020 14:06:41 -0700 Subject: [PATCH 06/11] fix the rerun_filter implementation --- monitoring/api/v3/alerts-client/snippets_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monitoring/api/v3/alerts-client/snippets_test.py b/monitoring/api/v3/alerts-client/snippets_test.py index 5c30b7977d37..74e887572c20 100644 --- a/monitoring/api/v3/alerts-client/snippets_test.py +++ b/monitoring/api/v3/alerts-client/snippets_test.py @@ -38,7 +38,7 @@ def retry_if_aborted(exception): def delay_on_aborted(err, *args): - if retry_if_aborted(err[0]): + if retry_if_aborted(err[1]): time.sleep(2) return True return False From 9c2fec86ef5aad4045a92ef1e7d6cb187ed52a84 Mon Sep 17 00:00:00 2001 From: Takashi Matsuo Date: Wed, 8 Apr 2020 14:59:11 -0700 Subject: [PATCH 07/11] add randomness to the sleep calls --- monitoring/api/v3/alerts-client/snippets_test.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/monitoring/api/v3/alerts-client/snippets_test.py b/monitoring/api/v3/alerts-client/snippets_test.py index 74e887572c20..766c54654a32 100644 --- a/monitoring/api/v3/alerts-client/snippets_test.py +++ b/monitoring/api/v3/alerts-client/snippets_test.py @@ -28,6 +28,10 @@ import snippets +# We assume we have access to good randomness source. +random.seed() + + def random_name(length): return ''.join( [random.choice(string.ascii_lowercase) for i in range(length)]) @@ -39,7 +43,8 @@ def retry_if_aborted(exception): def delay_on_aborted(err, *args): if retry_if_aborted(err[1]): - time.sleep(2) + # add randomness for avoiding continuous conflict + time.sleep(2 + (random.randint(0, 9) * 0.1)) return True return False From ef044b713296cce01ba2ae7f59e813d2634b806f Mon Sep 17 00:00:00 2001 From: Takashi Matsuo Date: Wed, 8 Apr 2020 15:18:55 -0700 Subject: [PATCH 08/11] lonter wait, better teardown --- .../api/v3/alerts-client/snippets_test.py | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/monitoring/api/v3/alerts-client/snippets_test.py b/monitoring/api/v3/alerts-client/snippets_test.py index 766c54654a32..31a875b05fe3 100644 --- a/monitoring/api/v3/alerts-client/snippets_test.py +++ b/monitoring/api/v3/alerts-client/snippets_test.py @@ -19,6 +19,7 @@ import time from google.api_core.exceptions import Aborted +from google.api_core.exceptions import NotFound from google.api_core.exceptions import ServiceUnavailable from google.cloud import monitoring_v3 import google.protobuf.json_format @@ -44,7 +45,7 @@ def retry_if_aborted(exception): def delay_on_aborted(err, *args): if retry_if_aborted(err[1]): # add randomness for avoiding continuous conflict - time.sleep(2 + (random.randint(0, 9) * 0.1)) + time.sleep(5 + (random.randint(0, 9) * 0.1)) return True return False @@ -90,11 +91,18 @@ def __exit__(self, type, value, traceback): @retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, stop_max_attempt_number=5, retry_on_exception=retry_if_aborted) def teardown(): - self.alert_policy_client.delete_alert_policy( - self.alert_policy.name) - if self.notification_channel.name: - self.notification_channel_client.delete_notification_channel( - self.notification_channel.name) + try: + self.alert_policy_client.delete_alert_policy( + self.alert_policy.name) + except NotFound: + print("Ignored NotFound when deleting a policy.") + try: + if self.notification_channel.name: + self.notification_channel_client\ + .delete_notification_channel( + self.notification_channel.name) + except NotFound: + print("Ignored NotFound when deleting a channel.") teardown() From 98fe9a00e800d43ad7af1ccbe1534fb75f2f59e9 Mon Sep 17 00:00:00 2001 From: Takashi Matsuo Date: Wed, 8 Apr 2020 15:41:10 -0700 Subject: [PATCH 09/11] use retry for enable_alert test --- monitoring/api/v3/alerts-client/snippets_test.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/monitoring/api/v3/alerts-client/snippets_test.py b/monitoring/api/v3/alerts-client/snippets_test.py index 31a875b05fe3..8b02e76b9c1b 100644 --- a/monitoring/api/v3/alerts-client/snippets_test.py +++ b/monitoring/api/v3/alerts-client/snippets_test.py @@ -118,20 +118,23 @@ def test_list_alert_policies(capsys, pochan): assert pochan.alert_policy.display_name in out -@pytest.mark.flaky(rerun_filter=delay_on_aborted, max_runs=5) def test_enable_alert_policies(capsys, pochan): + @retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, + stop_max_attempt_number=5, retry_on_exception=retry_if_aborted) + def invoke_sample(val): + snippets.enable_alert_policies(pochan.project_name, val) # These sleep calls are for mitigating the following error: # "409 Too many concurrent edits to the project configuration. # Please try again." # Having multiple projects will void these `sleep()` calls. # See also #3310 time.sleep(2) - snippets.enable_alert_policies(pochan.project_name, True) + invoke_sample(True) out, _ = capsys.readouterr() assert "Enabled {0}".format(pochan.project_name) in out time.sleep(2) - snippets.enable_alert_policies(pochan.project_name, False) + invoke_sample(False) out, _ = capsys.readouterr() assert "Disabled {}".format(pochan.project_name) in out From ffbc1167833a2193956ad06d10274d3cfbc81cfc Mon Sep 17 00:00:00 2001 From: Takashi Matsuo Date: Wed, 8 Apr 2020 15:48:47 -0700 Subject: [PATCH 10/11] add more retries for setup and teardown --- monitoring/api/v3/alerts-client/snippets_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/monitoring/api/v3/alerts-client/snippets_test.py b/monitoring/api/v3/alerts-client/snippets_test.py index 8b02e76b9c1b..ea18b2681244 100644 --- a/monitoring/api/v3/alerts-client/snippets_test.py +++ b/monitoring/api/v3/alerts-client/snippets_test.py @@ -64,7 +64,7 @@ def __init__(self): def __enter__(self): @retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, - stop_max_attempt_number=5, retry_on_exception=retry_if_aborted) + stop_max_attempt_number=10, retry_on_exception=retry_if_aborted) def setup(): # Create a policy. policy = monitoring_v3.types.alert_pb2.AlertPolicy() @@ -89,7 +89,7 @@ def setup(): def __exit__(self, type, value, traceback): # Delete the policy and channel we created. @retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, - stop_max_attempt_number=5, retry_on_exception=retry_if_aborted) + stop_max_attempt_number=10, retry_on_exception=retry_if_aborted) def teardown(): try: self.alert_policy_client.delete_alert_policy( From 4c79387a7a9cfdf69db76d6f385fd56b76a99d75 Mon Sep 17 00:00:00 2001 From: Takashi Matsuo Date: Wed, 8 Apr 2020 16:19:02 -0700 Subject: [PATCH 11/11] allow both messages --- monitoring/api/v3/alerts-client/snippets_test.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/monitoring/api/v3/alerts-client/snippets_test.py b/monitoring/api/v3/alerts-client/snippets_test.py index ea18b2681244..b69a1b5fd387 100644 --- a/monitoring/api/v3/alerts-client/snippets_test.py +++ b/monitoring/api/v3/alerts-client/snippets_test.py @@ -118,25 +118,24 @@ def test_list_alert_policies(capsys, pochan): assert pochan.alert_policy.display_name in out +@pytest.mark.flaky(rerun_filter=delay_on_aborted, max_runs=5) def test_enable_alert_policies(capsys, pochan): - @retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, - stop_max_attempt_number=5, retry_on_exception=retry_if_aborted) - def invoke_sample(val): - snippets.enable_alert_policies(pochan.project_name, val) # These sleep calls are for mitigating the following error: # "409 Too many concurrent edits to the project configuration. # Please try again." # Having multiple projects will void these `sleep()` calls. # See also #3310 time.sleep(2) - invoke_sample(True) + snippets.enable_alert_policies(pochan.project_name, True) out, _ = capsys.readouterr() - assert "Enabled {0}".format(pochan.project_name) in out + assert "Enabled {0}".format(pochan.project_name) in out \ + or "{} is already enabled".format(pochan.alert_policy.name) in out time.sleep(2) - invoke_sample(False) + snippets.enable_alert_policies(pochan.project_name, False) out, _ = capsys.readouterr() - assert "Disabled {}".format(pochan.project_name) in out + assert "Disabled {}".format(pochan.project_name) in out \ + or "{} is already disabled".format(pochan.alert_policy.name) in out @pytest.mark.flaky(rerun_filter=delay_on_aborted, max_runs=5)