Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[libc] Make a dedicated thread for the RPC server #111210

Merged
merged 1 commit into from
Oct 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 19 additions & 9 deletions libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,11 @@
#include "hsa/hsa_ext_amd.h"
#endif

#include <atomic>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <thread>
#include <tuple>
#include <utility>

Expand Down Expand Up @@ -289,18 +291,26 @@ hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable,
__atomic_store_n((uint32_t *)&packet->header, header_word, __ATOMIC_RELEASE);
hsa_signal_store_relaxed(queue->doorbell_signal, packet_id);

std::atomic<bool> finished = false;
std::thread server(
[](std::atomic<bool> *finished, rpc_device_t device) {
while (!*finished) {
if (rpc_status_t err = rpc_handle_server(device))
handle_error(err);
}
},
&finished, device);

// Wait until the kernel has completed execution on the device. Periodically
// check the RPC client for work to be performed on the server.
while (hsa_signal_wait_scacquire(
packet->completion_signal, HSA_SIGNAL_CONDITION_EQ, 0,
/*timeout_hint=*/1024, HSA_WAIT_STATE_ACTIVE) != 0)
if (rpc_status_t err = rpc_handle_server(device))
handle_error(err);
while (hsa_signal_wait_scacquire(packet->completion_signal,
HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX,
HSA_WAIT_STATE_BLOCKED) != 0)
;

// Handle the server one more time in case the kernel exited with a pending
// send still in flight.
if (rpc_status_t err = rpc_handle_server(device))
handle_error(err);
finished = true;
if (server.joinable())
server.join();

// Destroy the resources acquired to launch the kernel and return.
if (hsa_status_t err = hsa_amd_memory_pool_free(args))
Expand Down
26 changes: 17 additions & 9 deletions libc/utils/gpu/loader/nvptx/nvptx-loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@
#include "llvm/Object/ELF.h"
#include "llvm/Object/ELFObjectFile.h"

#include <atomic>
#include <cstddef>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <thread>
#include <vector>

using namespace llvm;
Expand Down Expand Up @@ -224,24 +226,30 @@ CUresult launch_kernel(CUmodule binary, CUstream stream,
if (print_resource_usage)
print_kernel_resources(binary, kernel_name);

std::atomic<bool> finished = false;
std::thread server(
[](std::atomic<bool> *finished, rpc_device_t device) {
while (!*finished) {
if (rpc_status_t err = rpc_handle_server(device))
handle_error(err);
}
},
&finished, rpc_device);

// Call the kernel with the given arguments.
if (CUresult err = cuLaunchKernel(
function, params.num_blocks_x, params.num_blocks_y,
params.num_blocks_z, params.num_threads_x, params.num_threads_y,
params.num_threads_z, 0, stream, nullptr, args_config))
handle_error(err);

// Wait until the kernel has completed execution on the device. Periodically
// check the RPC client for work to be performed on the server.
while (cuStreamQuery(stream) == CUDA_ERROR_NOT_READY)
if (rpc_status_t err = rpc_handle_server(rpc_device))
handle_error(err);

// Handle the server one more time in case the kernel exited with a pending
// send still in flight.
if (rpc_status_t err = rpc_handle_server(rpc_device))
if (CUresult err = cuStreamSynchronize(stream))
handle_error(err);

finished = true;
if (server.joinable())
server.join();

return CUDA_SUCCESS;
}

Expand Down
Loading