Skip to content

Commit

Permalink
[Serve] Add http request latency (ray-project#32839)
Browse files Browse the repository at this point in the history
Add http request latency metrics.

Signed-off-by: Jack He <[email protected]>
  • Loading branch information
sihanwang41 authored and ProjectsByJackHe committed May 4, 2023
1 parent 5301f55 commit 6ad00b3
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 4 deletions.
3 changes: 3 additions & 0 deletions doc/source/serve/production-guide/monitoring.md
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,9 @@ The following metrics are exposed by Ray Serve:
* error_code
* method
- The number of non-200 HTTP responses returned by each deployment.
* - ``serve_http_request_latency_ms`` [*]
- * endpoint
- The end-to-end latency of HTTP requests (measured from the Serve HTTP proxy).
```
[*] - only available when using HTTP calls
[**] - only available when using Python `ServeHandle` calls
Expand Down
18 changes: 17 additions & 1 deletion python/ray/serve/_private/http_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@
set_socket_reuse_port,
)
from ray.serve._private.common import EndpointInfo, EndpointTag
from ray.serve._private.constants import SERVE_LOGGER_NAME, SERVE_NAMESPACE
from ray.serve._private.constants import (
SERVE_LOGGER_NAME,
SERVE_NAMESPACE,
DEFAULT_LATENCY_BUCKET_MS,
)
from ray.serve._private.long_poll import LongPollClient, LongPollNamespace
from ray.serve._private.logging_utils import access_log_msg, configure_component_logger

Expand Down Expand Up @@ -291,6 +295,15 @@ def get_handle(name):
"method",
),
)
self.processing_latency_tracker = metrics.Histogram(
"serve_http_request_latency_ms",
description=(
"The end-to-end latency of HTTP requests "
"(measured from the Serve HTTP proxy)."
),
boundaries=DEFAULT_LATENCY_BUCKET_MS,
tag_keys=("route_prefix",),
)

def _update_routes(self, endpoints: Dict[EndpointTag, EndpointInfo]) -> None:
self.route_info: Dict[str, Tuple[EndpointTag, List[str]]] = dict()
Expand Down Expand Up @@ -370,6 +383,9 @@ async def __call__(self, scope, receive, send):
start_time = time.time()
status_code = await _send_request_to_handle(handle, scope, receive, send)
latency_ms = (time.time() - start_time) * 1000.0
self.processing_latency_tracker.observe(
latency_ms, tags={"route_prefix": route_prefix}
)
logger.info(
access_log_msg(
method=scope["method"],
Expand Down
7 changes: 4 additions & 3 deletions python/ray/serve/tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ def verify_metrics(do_assert=False):
"serve_deployment_request_counter",
"serve_deployment_replica_starts",
# histogram
"deployment_processing_latency_ms_bucket",
"deployment_processing_latency_ms_count",
"deployment_processing_latency_ms_sum",
"serve_deployment_processing_latency_ms_bucket",
"serve_deployment_processing_latency_ms_count",
"serve_deployment_processing_latency_ms_sum",
"serve_deployment_processing_latency_ms",
# gauge
"serve_replica_processing_queries",
Expand Down Expand Up @@ -120,6 +120,7 @@ def verify_metrics(expected_metrics, do_assert=False):
# https://docs.ray.io/en/latest/serve/monitoring.html#metrics
# Any updates here should be reflected there too.
expected_metrics.append("serve_num_deployment_http_error_requests")
expected_metrics.append("serve_http_request_latency_ms")

@serve.deployment(name="A")
class A:
Expand Down

0 comments on commit 6ad00b3

Please sign in to comment.