Skip to content

Commit

Permalink
[Metrics] Fix shared memory is not displayed properly (#34301)
Browse files Browse the repository at this point in the history
Looks like we incorrectly recorded shared memory, and incorrectly displayed it to the metrics graph (I forgot to append ray_)
  • Loading branch information
rkooo567 authored Apr 13, 2023
1 parent d16dd9c commit 688ddf6
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ def max_plus_pending(max_resource, pending_resource):
legend="{{Component}}",
),
Target(
expr="node_mem_shared_bytes{{{global_filters}}}",
expr="sum(ray_node_mem_shared_bytes{{{global_filters}}})",
legend="shared_memory",
),
Target(
Expand Down
2 changes: 1 addition & 1 deletion dashboard/modules/reporter/reporter_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -869,7 +869,7 @@ def _record_stats(self, stats, cluster_stats):
if shm_used:
node_mem_shared = Record(
gauge=METRICS_GAUGES["node_mem_shared_bytes"],
value=mem_total,
value=shm_used,
tags={"ip": ip},
)
records_reported.append(node_mem_shared)
Expand Down
7 changes: 6 additions & 1 deletion dashboard/modules/reporter/tests/test_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,8 @@ def test_case_stats_exist():
"ray_node_mem_used" in metric_names,
"ray_node_mem_available" in metric_names,
"ray_node_mem_total" in metric_names,
"ray_component_cpu_percentage" in metric_names,
"ray_node_mem_total" in metric_names,
"ray_node_mem_shared_bytes" in metric_names,
"ray_component_rss_mb" in metric_names,
"ray_component_uss_mb" in metric_names,
"ray_node_disk_io_read" in metric_names,
Expand Down Expand Up @@ -260,6 +261,10 @@ def test_report_stats():

records = agent._record_stats(STATS_TEMPLATE, cluster_stats)
for record in records:
name = record.gauge.name
val = record.value
if name == "node_mem_shared_bytes":
assert val == STATS_TEMPLATE["shm"]
print(record.gauge.name)
print(record)
assert len(records) == 33
Expand Down

0 comments on commit 688ddf6

Please sign in to comment.