diff --git a/config.yaml b/config.yaml index 0661a61..2cee600 100644 --- a/config.yaml +++ b/config.yaml @@ -1,8 +1,4 @@ options: - metrics-port: - type: string - default: '8080' - description: Metrics port webhook-port: type: string default: '4443' diff --git a/src/charm.py b/src/charm.py index 30be0ca..52c529c 100755 --- a/src/charm.py +++ b/src/charm.py @@ -64,6 +64,8 @@ "configmap__predictor__tempo_server__v2", ] DEFAULT_IMAGES_FILE = "src/default-custom-images.json" +METRICS_PATH = "/metrics" +METRICS_PORT = "8080" with open(DEFAULT_IMAGES_FILE, "r") as json_file: DEFAULT_IMAGES = json.load(json_file) @@ -84,7 +86,6 @@ def __init__(self, *args): self._namespace = self.model.name self._lightkube_field_manager = "lightkube" self._name = self.model.app.name - self._metrics_port = self.model.config["metrics-port"] self._webhook_port = self.model.config["webhook-port"] self._manager_create_resources = self.model.config["manager-create-resources"] self._manager_log_level = self.model.config["manager-log-level"] @@ -104,7 +105,7 @@ def __init__(self, *args): self._exec_command = ( "/manager " "--enable-leader-election " - f"--metrics-addr=:{self._metrics_port} " + f"--metrics-addr=:{METRICS_PORT} " f"--webhook-port {self._webhook_port} " f"--log-level={self._manager_log_level} " f"--leader-election-id={self._manager_leader_election_id} " @@ -131,7 +132,7 @@ def __init__(self, *args): self._crd_resource_handler = None self._configmap_resource_handler = None - metrics_port = ServicePort(int(self._metrics_port), name="metrics-port") + metrics_port = ServicePort(int(METRICS_PORT), name="metrics-port") webhook_port = ServicePort(int(self._webhook_port), name="webhook-port") self.service_patcher = KubernetesServicePatch( self, @@ -139,29 +140,16 @@ def __init__(self, *args): service_name=f"{self.model.app.name}", ) - # setup events to be handled by main event handler - self.framework.observe(self.on.config_changed, self._on_event) - for rel in self.model.relations.keys(): - self.framework.observe(self.on[rel].relation_changed, self._on_event) - - # setup events to be handled by specific event handlers - self.framework.observe(self.on.install, self._on_install) - self.framework.observe(self.on.upgrade_charm, self._on_upgrade) - self.framework.observe(self.on.seldon_core_pebble_ready, self._on_pebble_ready) - self.framework.observe(self.on.remove, self._on_remove) - self.framework.observe(self.on.stop, self._on_stop) - # Prometheus related config self.prometheus_provider = MetricsEndpointProvider( charm=self, relation_name="metrics-endpoint", jobs=[ { - "metrics_path": self.config["executor-server-metrics-port-name"], - "static_configs": [{"targets": ["*:{}".format(self.config["metrics-port"])]}], + "metrics_path": METRICS_PATH, + "static_configs": [{"targets": ["*:{}".format(METRICS_PORT)]}], } ], - lookaside_jobs_callable=self.return_list_of_running_models, ) # Dashboard related config (Grafana) @@ -170,6 +158,18 @@ def __init__(self, *args): relation_name="grafana-dashboard", ) + # setup events to be handled by main event handler + self.framework.observe(self.on.config_changed, self._on_event) + for rel in self.model.relations.keys(): + self.framework.observe(self.on[rel].relation_changed, self._on_event) + + # setup events to be handled by specific event handlers + self.framework.observe(self.on.install, self._on_install) + self.framework.observe(self.on.upgrade_charm, self._on_upgrade) + self.framework.observe(self.on.seldon_core_pebble_ready, self._on_pebble_ready) + self.framework.observe(self.on.remove, self._on_remove) + self.framework.observe(self.on.stop, self._on_stop) + @property def container(self): """Return container.""" diff --git a/src/prometheus_alert_rules/unit_unavailable.rule b/src/prometheus_alert_rules/unit_unavailable.rule index 631396a..aa09943 100644 --- a/src/prometheus_alert_rules/unit_unavailable.rule +++ b/src/prometheus_alert_rules/unit_unavailable.rule @@ -1,6 +1,6 @@ alert: SeldonUnitIsUnavailable expr: up < 1 -for: 0m +for: 5m labels: severity: critical annotations: diff --git a/tests/integration/test_charm.py b/tests/integration/test_charm.py index 719c44c..d30ffdc 100644 --- a/tests/integration/test_charm.py +++ b/tests/integration/test_charm.py @@ -197,6 +197,12 @@ async def test_seldon_alert_rules(ops_test: OpsTest): discovered_labels = targets_result["data"]["activeTargets"][0]["discoveredLabels"] assert discovered_labels["juju_application"] == "seldon-controller-manager" + # query for the up metric and assert the unit is available + up_query_response = await fetch_url( + f'http://{prometheus_url}:9090/api/v1/query?query=up{{juju_application="{APP_NAME}"}}' + ) + assert up_query_response["data"]["result"][0]["value"][1] == "1" + # obtain alert rules from Prometheus rules_url = f"http://{prometheus_url}:9090/api/v1/rules" alert_rules_result = await fetch_url(rules_url) diff --git a/tests/unit/test_operator.py b/tests/unit/test_operator.py index 146cc37..22f05f6 100644 --- a/tests/unit/test_operator.py +++ b/tests/unit/test_operator.py @@ -13,7 +13,7 @@ from ops.model import ActiveStatus, BlockedStatus, MaintenanceStatus, WaitingStatus from ops.testing import Harness -from charm import SeldonCoreOperator +from charm import METRICS_PORT, SeldonCoreOperator SELDON_CM_NAME = "seldon-config" @@ -181,7 +181,7 @@ def test_pebble_layer( assert ( pebble_plan_info["services"]["seldon-core"]["command"] == "/manager " "--enable-leader-election " - f"--metrics-addr=:{harness.charm._metrics_port} " + f"--metrics-addr=:{METRICS_PORT} " f"--webhook-port {harness.charm._webhook_port} " f"--log-level={harness.charm._manager_log_level} " f"--leader-election-id={harness.charm._manager_leader_election_id} "