Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Serve] Add route tags with custom metrics #35246

Merged
merged 2 commits into from
May 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 32 additions & 1 deletion python/ray/serve/metrics.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from ray.util import metrics
from typing import Tuple, Optional, Dict, List
from typing import Tuple, Optional, Dict, List, Union
from ray.serve import context
import ray

DEPLOYMENT_TAG = "deployment"
REPLICA_TAG = "replica"
APPLICATION_TAG = "application"
ROUTE_TAG = "route"


def _add_serve_metric_tags(tag_keys: Optional[Tuple[str]] = None) -> Tuple[str]:
Expand Down Expand Up @@ -52,6 +54,14 @@ def _add_serve_metric_default_tags(default_tags: Dict[str, str]):
return default_tags


def _add_serve_context_tag_values(tag_keys: Tuple, tags: Dict[str, str]):
"""Add serve context tag values to the metric tags"""

_request_context = ray.serve.context._serve_request_context.get()
if ROUTE_TAG in tag_keys and ROUTE_TAG not in tags:
tags[ROUTE_TAG] = _request_context.route


class Counter(metrics.Counter):
def __init__(
self, name: str, description: str = "", tag_keys: Optional[Tuple[str]] = None
Expand All @@ -67,6 +77,13 @@ def __init__(
def set_default_tags(self, default_tags: Dict[str, str]):
super().set_default_tags(_add_serve_metric_default_tags(default_tags))

def inc(self, value: Union[int, float] = 1.0, tags: Dict[str, str] = None):
"""Increment the counter by the given value, add serve context
tag values to the tags
"""
_add_serve_context_tag_values(self._tag_keys, tags)
super().inc(value, tags)


class Gauge(metrics.Gauge):
def __init__(
Expand All @@ -83,6 +100,13 @@ def __init__(
def set_default_tags(self, default_tags: Dict[str, str]):
super().set_default_tags(_add_serve_metric_default_tags(default_tags))

def set(self, value: Union[int, float], tags: Dict[str, str] = None):
"""Set the gauge to the given value, add serve context
tag values to the tags
"""
_add_serve_context_tag_values(self._tag_keys, tags)
super().set(value, tags)


class Histogram(metrics.Histogram):
def __init__(
Expand All @@ -102,3 +126,10 @@ def __init__(

def set_default_tags(self, default_tags: Dict[str, str]):
super().set_default_tags(_add_serve_metric_default_tags(default_tags))

def observe(self, value: Union[int, float], tags: Dict[str, str] = None):
"""Observe the given value, add serve context
tag values to the tags
"""
_add_serve_context_tag_values(self._tag_keys, tags)
super().observe(value, tags)
78 changes: 54 additions & 24 deletions python/ray/serve/tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,10 @@ def _generate_metrics_summary(self, metrics):
]
return metrics_summary_route, metrics_summary_app

def verify_metrics(self, metric, expected_output):
for key in expected_output:
assert metric[key] == expected_output[key]

def test_request_context_pass_for_http_proxy(self, serve_start_shutdown):
"""Test HTTP proxy passing request context"""

Expand Down Expand Up @@ -411,7 +415,7 @@ def h():
"serve_deployment_processing_latency_ms_sum",
]:
metrics_route, metrics_app_name = self._generate_metrics_summary(
get_metric_dictionaries("serve_handle_request_counter")
get_metric_dictionaries(metric_name)
)
assert metrics_route["app1_f"] == {"/app1"}
assert metrics_route["app2_g"] == {"/app2"}
Expand Down Expand Up @@ -488,6 +492,7 @@ def __init__(self):
tag_keys=(
"my_static_tag",
"my_runtime_tag",
"route",
),
)
self.counter.set_default_tags({"my_static_tag": "static_value"})
Expand All @@ -498,6 +503,7 @@ def __init__(self):
tag_keys=(
"my_static_tag",
"my_runtime_tag",
"route",
),
)
self.histogram.set_default_tags({"my_static_tag": "static_value"})
Expand All @@ -507,6 +513,7 @@ def __init__(self):
tag_keys=(
"my_static_tag",
"my_runtime_tag",
"route",
),
)
self.gauge.set_default_tags({"my_static_tag": "static_value"})
Expand All @@ -527,29 +534,42 @@ def __call__(self):
lambda: len(get_metric_dictionaries("my_gauge")) == 1,
timeout=20,
)

counter_metrics = get_metric_dictionaries("my_counter")
assert len(counter_metrics) == 1
counter_metrics[0]["my_static_tag"] == "static_value"
counter_metrics[0]["my_runtime_tag"] == "100"
counter_metrics[0]["replica"] == replica_tag
counter_metrics[0]["deployment"] == deployment_name
counter_metrics[0]["application"] == "app"

expected_metrics = {
"my_static_tag": "static_value",
"my_runtime_tag": "100",
"replica": replica_tag,
"deployment": deployment_name,
"application": "app",
"route": "/app",
}
self.verify_metrics(counter_metrics[0], expected_metrics)

expected_metrics = {
"my_static_tag": "static_value",
"my_runtime_tag": "300",
"replica": replica_tag,
"deployment": deployment_name,
"application": "app",
"route": "/app",
}
gauge_metrics = get_metric_dictionaries("my_gauge")
assert len(counter_metrics) == 1
gauge_metrics[0]["my_static_tag"] == "static_value"
gauge_metrics[0]["my_runtime_tag"] == "300"
gauge_metrics[0]["replica"] == replica_tag
gauge_metrics[0]["deployment"] == deployment_name
gauge_metrics[0]["application"] == "app"

self.verify_metrics(gauge_metrics[0], expected_metrics)

expected_metrics = {
"my_static_tag": "static_value",
"my_runtime_tag": "200",
"replica": replica_tag,
"deployment": deployment_name,
"application": "app",
"route": "/app",
}
histogram_metrics = get_metric_dictionaries("my_histogram_sum")
assert len(histogram_metrics) == 1
histogram_metrics[0]["my_static_tag"] == "static_value"
histogram_metrics[0]["my_runtime_tag"] == "200"
histogram_metrics[0]["replica"] == replica_tag
histogram_metrics[0]["deployment"] == deployment_name
gauge_metrics[0]["application"] == "app"
self.verify_metrics(histogram_metrics[0], expected_metrics)

@pytest.mark.parametrize("use_actor", [False, True])
def test_serve_metrics_outside_serve(self, use_actor, serve_start_shutdown):
Expand Down Expand Up @@ -650,20 +670,30 @@ async def __call__(self):
lambda: len(get_metric_dictionaries("my_gauge")) == 1,
timeout=20,
)

counter_metrics = get_metric_dictionaries("my_counter")
assert len(counter_metrics) == 1
counter_metrics[0]["my_static_tag"] == "static_value"
counter_metrics[0]["my_runtime_tag"] == "100"
expected_metrics = {
"my_static_tag": "static_value",
"my_runtime_tag": "100",
}
self.verify_metrics(counter_metrics[0], expected_metrics)

gauge_metrics = get_metric_dictionaries("my_gauge")
assert len(counter_metrics) == 1
gauge_metrics[0]["my_static_tag"] == "static_value"
gauge_metrics[0]["my_runtime_tag"] == "300"
expected_metrics = {
"my_static_tag": "static_value",
"my_runtime_tag": "300",
}
self.verify_metrics(gauge_metrics[0], expected_metrics)

histogram_metrics = get_metric_dictionaries("my_histogram_sum")
assert len(histogram_metrics) == 1
histogram_metrics[0]["my_static_tag"] == "static_value"
histogram_metrics[0]["my_runtime_tag"] == "200"
expected_metrics = {
"my_static_tag": "static_value",
"my_runtime_tag": "200",
}
self.verify_metrics(histogram_metrics[0], expected_metrics)


def test_actor_summary(serve_instance):
Expand Down