-
Notifications
You must be signed in to change notification settings - Fork 5.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Core][Enable gcs scheduler 5/n] Adapt gcs scheduler with external modules #28162
Changes from 3 commits
707bd61
ba9f452
c55a7a1
da88560
523a932
fca95d2
f605805
db89ccb
dbfe7e0
b73a86b
e62e88c
1b72546
afb8997
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -93,15 +93,16 @@ void GcsResourceManager::HandleGetAllAvailableResources( | |
if (using_resource_reports) { | ||
auto resource_iter = | ||
node_resource_usages_[node_id].resources_available().find(resource_name); | ||
if (resource_iter != node_resource_usages_[node_id].resources_available().end()) { | ||
if (resource_iter != node_resource_usages_[node_id].resources_available().end() && | ||
resource_iter->second > 0) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If I look at the else statement, it seems like we update resources when the value > 0. Is it expected? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Every resource in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This |
||
resource.mutable_resources_available()->insert( | ||
{resource_name, resource_iter->second}); | ||
continue; | ||
} | ||
} else { | ||
const auto &resource_value = node_resources.available.Get(resource_id); | ||
resource.mutable_resources_available()->insert( | ||
{resource_name, resource_value.Double()}); | ||
} | ||
const auto &resource_value = node_resources.available.Get(resource_id); | ||
resource.mutable_resources_available()->insert( | ||
{resource_name, resource_value.Double()}); | ||
} | ||
reply->add_resources_list()->CopyFrom(resource); | ||
} | ||
|
@@ -111,6 +112,12 @@ void GcsResourceManager::HandleGetAllAvailableResources( | |
|
||
void GcsResourceManager::UpdateFromResourceReport(const rpc::ResourcesData &data) { | ||
NodeID node_id = NodeID::FromBinary(data.node_id()); | ||
// We only need to update worker nodes' resource usage. Gcs node ifself does not | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should be able to assert this right? GCS shouldn't ever send out a resource report right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This part is actually from another feature, which I'll do in the next split PR. So I'll just revert this here. |
||
// execute any tasks so its report can be ignored. | ||
if (node_id == local_node_id_) { | ||
return; | ||
} | ||
|
||
if (RayConfig::instance().gcs_actor_scheduling_enabled()) { | ||
UpdateNodeNormalTaskResources(node_id, data); | ||
} else { | ||
|
@@ -148,53 +155,69 @@ void GcsResourceManager::HandleReportResourceUsage( | |
++counts_[CountType::REPORT_RESOURCE_USAGE_REQUEST]; | ||
} | ||
|
||
void GcsResourceManager::FillAggregateLoad( | ||
const rpc::ResourcesData &resources_data, | ||
std::unordered_map<google::protobuf::Map<std::string, double>, rpc::ResourceDemand> | ||
*aggregate_load) { | ||
auto load = resources_data.resource_load_by_shape(); | ||
for (const auto &demand : load.resource_demands()) { | ||
auto &aggregate_demand = (*aggregate_load)[demand.shape()]; | ||
aggregate_demand.set_num_ready_requests_queued( | ||
aggregate_demand.num_ready_requests_queued() + | ||
demand.num_ready_requests_queued()); | ||
aggregate_demand.set_num_infeasible_requests_queued( | ||
aggregate_demand.num_infeasible_requests_queued() + | ||
demand.num_infeasible_requests_queued()); | ||
aggregate_demand.set_backlog_size(aggregate_demand.backlog_size() + | ||
demand.backlog_size()); | ||
} | ||
} | ||
|
||
void GcsResourceManager::HandleGetAllResourceUsage( | ||
const rpc::GetAllResourceUsageRequest &request, | ||
rpc::GetAllResourceUsageReply *reply, | ||
rpc::SendReplyCallback send_reply_callback) { | ||
if (cluster_task_manager_ && RayConfig::instance().gcs_actor_scheduling_enabled()) { | ||
rpc::ResourcesData resources_data; | ||
cluster_task_manager_->FillPendingActorInfo(resources_data); | ||
node_resource_usages_[local_node_id_].CopyFrom(resources_data); | ||
} | ||
if (!node_resource_usages_.empty()) { | ||
auto batch = std::make_shared<rpc::ResourceUsageBatchData>(); | ||
rpc::ResourceUsageBatchData batch; | ||
std::unordered_map<google::protobuf::Map<std::string, double>, rpc::ResourceDemand> | ||
aggregate_load; | ||
|
||
for (const auto &usage : node_resource_usages_) { | ||
// Aggregate the load reported by each raylet. | ||
auto load = usage.second.resource_load_by_shape(); | ||
for (const auto &demand : load.resource_demands()) { | ||
auto &aggregate_demand = aggregate_load[demand.shape()]; | ||
aggregate_demand.set_num_ready_requests_queued( | ||
aggregate_demand.num_ready_requests_queued() + | ||
demand.num_ready_requests_queued()); | ||
aggregate_demand.set_num_infeasible_requests_queued( | ||
aggregate_demand.num_infeasible_requests_queued() + | ||
demand.num_infeasible_requests_queued()); | ||
aggregate_demand.set_backlog_size(aggregate_demand.backlog_size() + | ||
demand.backlog_size()); | ||
} | ||
FillAggregateLoad(usage.second, &aggregate_load); | ||
batch.add_batch()->CopyFrom(usage.second); | ||
} | ||
|
||
batch->add_batch()->CopyFrom(usage.second); | ||
if (cluster_task_manager_) { | ||
Chong-Li marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// Fill the gcs info when gcs actor scheduler is enabled. | ||
rpc::ResourcesData gcs_resources_data; | ||
cluster_task_manager_->FillPendingActorInfo(gcs_resources_data); | ||
// Aggregate the load (pending actor info) of gcs. | ||
FillAggregateLoad(gcs_resources_data, &aggregate_load); | ||
// We only export gcs's pending info without adding the corresponding | ||
// `ResourcesData` to the `batch` list. So if gcs has detected cluster full of | ||
// actors, set the dedicated field in reply. | ||
if (gcs_resources_data.cluster_full_of_actors_detected()) { | ||
reply->set_cluster_full_of_actors_detected_by_gcs(true); | ||
} | ||
} | ||
|
||
for (const auto &demand : aggregate_load) { | ||
auto demand_proto = batch->mutable_resource_load_by_shape()->add_resource_demands(); | ||
auto demand_proto = batch.mutable_resource_load_by_shape()->add_resource_demands(); | ||
demand_proto->CopyFrom(demand.second); | ||
for (const auto &resource_pair : demand.first) { | ||
(*demand_proto->mutable_shape())[resource_pair.first] = resource_pair.second; | ||
} | ||
} | ||
|
||
// Update placement group load to heartbeat batch. | ||
// This is updated only one per second. | ||
if (placement_group_load_.has_value()) { | ||
auto placement_group_load = placement_group_load_.value(); | ||
auto placement_group_load_proto = batch->mutable_placement_group_load(); | ||
auto placement_group_load_proto = batch.mutable_placement_group_load(); | ||
placement_group_load_proto->CopyFrom(*placement_group_load.get()); | ||
} | ||
reply->mutable_resource_usage_data()->CopyFrom(*batch); | ||
|
||
reply->mutable_resource_usage_data()->CopyFrom(batch); | ||
} | ||
|
||
GCS_RPC_SEND_REPLY(send_reply_callback, reply, Status::OK()); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -175,6 +175,12 @@ message GetAllNodeInfoRequest {} | |
message GetAllNodeInfoReply { | ||
GcsStatus status = 1; | ||
repeated GcsNodeInfo node_info_list = 2; | ||
GcsInfo gcs_info = 3; | ||
Chong-Li marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
message GcsInfo { | ||
repeated TaskSpec infeasible_tasks = 1; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you only report necessary fields? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you take a look at |
||
repeated TaskSpec ready_tasks = 2; | ||
} | ||
|
||
message ReportHeartbeatRequest { | ||
|
@@ -583,6 +589,10 @@ message GetAllResourceUsageRequest {} | |
message GetAllResourceUsageReply { | ||
GcsStatus status = 1; | ||
ResourceUsageBatchData resource_usage_data = 2; | ||
/// True if gcs finds infeasible or pending actor creation tasks | ||
/// locally (when gcs actor scheduler is enabled). This field is | ||
/// expected to help triggering auto-scaling. | ||
bool cluster_full_of_actors_detected_by_gcs = 3; | ||
Chong-Li marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm, is there any reason we can't reuse the ResourceUsageBatchData? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
} | ||
|
||
// Service for node resource info access. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should go away if you rebase right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because there might be pending actors in gcs server (if enabling gcs actor scheduler), we need to not only check if any worker node has detected
cluster full
(see line 276-281), but also check gcs server's report (this part).