Skip to content

Commit

Permalink
tests: add a retry when asserting the up metric (#243)
Browse files Browse the repository at this point in the history
* tests: add a retry when asserting the up metric

Adding a retry for checking the state of an alert will allow time to prometheus-k8s to scrape
the necessary metrics for a unit, without it we may run into a race condition where the assertion
of the metric is run before prometheus is even able to scrape.
This commit adds a retry logic to avoid this.

Fixes #244
  • Loading branch information
DnPlas committed Feb 23, 2024
1 parent 1d1a6f5 commit d58d132
Showing 1 changed file with 17 additions and 6 deletions.
23 changes: 17 additions & 6 deletions tests/integration/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,20 @@ async def check_alert_propagation(url, alert_name):
assert alert_rule is not None and alert_rule["state"] == "firing"


@tenacity.retry(wait=tenacity.wait_fixed(30), stop=tenacity.stop_after_attempt(6), reraise=True)
async def assert_seldon_unit_is_available(prometheus_url):
"""Assert the unit is available.
This will be tried multiple times to avoid raising errors when prometheus-k8s
is not in an Active status.
"""
# query for the up metric and assert the unit is available
up_query_response = await fetch_url(
f'http://{prometheus_url}:9090/api/v1/query?query=up{{juju_application="{APP_NAME}"}}'
)
assert up_query_response["data"]["result"][0]["value"][1] == "1"


@pytest.mark.abort_on_fail
@pytest.mark.asyncio
async def test_seldon_alert_rules(ops_test: OpsTest):
Expand Down Expand Up @@ -197,12 +211,6 @@ async def test_seldon_alert_rules(ops_test: OpsTest):
discovered_labels = targets_result["data"]["activeTargets"][0]["discoveredLabels"]
assert discovered_labels["juju_application"] == "seldon-controller-manager"

# query for the up metric and assert the unit is available
up_query_response = await fetch_url(
f'http://{prometheus_url}:9090/api/v1/query?query=up{{juju_application="{APP_NAME}"}}'
)
assert up_query_response["data"]["result"][0]["value"][1] == "1"

# obtain alert rules from Prometheus
rules_url = f"http://{prometheus_url}:9090/api/v1/rules"
alert_rules_result = await fetch_url(rules_url)
Expand All @@ -228,6 +236,9 @@ async def test_seldon_alert_rules(ops_test: OpsTest):
for rule in rules:
assert rule["name"] in rules_file_alert_names

# verify SeldonUnitIsUnavailable alert is not firing
await assert_seldon_unit_is_available(prometheus_url)

# The following integration test is optional (experimental) and might not be functioning
# correctly under some conditions due to its reliance on timing of K8S deployments, timing of
# Prometheus scraping, and rate calculations for alerts.
Expand Down

0 comments on commit d58d132

Please sign in to comment.