Skip to content

Commit

Permalink
rocr/aie: Support VMEM handle creation
Browse files Browse the repository at this point in the history
Adds support for AllocateMemoryOnly inside XDNA driver.

Move the IsLocalMemory() check inside the KFD driver
since the XDNA driver can, and needs to, create handles
on system memory buffer objects.

Changed handle variable name from thunk_handle to user_mode_driver_handle,
which is more representative if we support non-GPU drivers.

Change-Id: I95db9d575afd1ab0ff2de74cea5175d9a12a721b
  • Loading branch information
atgutier committed Sep 16, 2024
1 parent f678d36 commit 0d77583
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 10 deletions.
5 changes: 5 additions & 0 deletions runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,11 @@ KfdDriver::AllocateMemory(const core::MemoryRegion &mem_region,
kmt_alloc_flags.ui32.NonPaged = 1;
}

if (!m_region.IsLocalMemory() &&
(alloc_flags & core::MemoryRegion::AllocateMemoryOnly)) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}

// Allocating a memory handle for virtual memory
kmt_alloc_flags.ui32.NoAddress =
!!(alloc_flags & core::MemoryRegion::AllocateMemoryOnly);
Expand Down
20 changes: 16 additions & 4 deletions runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ XdnaDriver::AllocateMemory(const core::MemoryRegion &mem_region,

amdxdna_drm_get_bo_info get_bo_info_args{0};
drm_gem_close close_bo_args{0};
void *mapped_mem(nullptr);

if (!m_region.IsSystem()) {
return HSA_STATUS_ERROR_INVALID_REGION;
Expand All @@ -162,18 +163,29 @@ XdnaDriver::AllocateMemory(const core::MemoryRegion &mem_region,
return HSA_STATUS_ERROR;
}

/// TODO: For now we always map the memory and keep a mapping from handles
/// to VA memory addresses. Once we can support the separate VMEM call to
/// map handles we can fix this.
if (m_region.kernarg()) {
*mem = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_,
get_bo_info_args.map_offset);
if (*mem == MAP_FAILED) {
mapped_mem = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_,
get_bo_info_args.map_offset);
if (mapped_mem == MAP_FAILED) {
// Close the BO in the case when a mapping fails and we got a BO handle.
ioctl(fd_, DRM_IOCTL_GEM_CLOSE, &close_bo_args);
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
}
} else {
*mem = reinterpret_cast<void *>(get_bo_info_args.vaddr);
mapped_mem = reinterpret_cast<void *>(get_bo_info_args.vaddr);
}

if (alloc_flags & core::MemoryRegion::AllocateMemoryOnly) {
*mem = reinterpret_cast<void *>(create_bo_args.handle);
} else {
*mem = mapped_mem;
}

vmem_handle_mappings.emplace(create_bo_args.handle, mapped_mem);

return HSA_STATUS_SUCCESS;
}

Expand Down
7 changes: 7 additions & 0 deletions runtime/hsa-runtime/core/inc/amd_xdna_driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#define HSA_RUNTIME_CORE_INC_AMD_XDNA_DRIVER_H_

#include <memory>
#include <unordered_map>

#include "core/inc/driver.h"
#include "core/inc/memory_region.h"
Expand Down Expand Up @@ -89,6 +90,12 @@ class XdnaDriver : public core::Driver {
hsa_status_t InitDeviceHeap();
hsa_status_t FreeDeviceHeap();

/// TODO: Remove this in the future and rely on the core Runtime
/// object to track handle allocations. Using the VMEM API for mapping XDNA
/// driver handles requires a bit more refactoring. So rely on the XDNA driver
/// to manage some of this for now.
std::unordered_map<uint32_t, void *> vmem_handle_mappings;

/// @brief Virtual address range allocated for the device heap.
///
/// Allocate a large enough space so we can carve out the device heap in
Expand Down
14 changes: 8 additions & 6 deletions runtime/hsa-runtime/core/runtime/runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3111,20 +3111,22 @@ hsa_status_t Runtime::VMemoryHandleCreate(const MemoryRegion* region, size_t siz
uint64_t flags_unused,
hsa_amd_vmem_alloc_handle_t* memoryOnlyHandle) {
const AMD::MemoryRegion* memRegion = static_cast<const AMD::MemoryRegion*>(region);
if (!memRegion->IsLocalMemory()) return HSA_STATUS_ERROR_INVALID_ARGUMENT;

if (!IsMultipleOf(size, memRegion->GetPageSize()))
return HSA_STATUS_ERROR_INVALID_ARGUMENT;

ScopedAcquire<KernelSharedMutex> lock(&memory_lock_);
void* thunk_handle;
hsa_status_t status = region->Allocate(size, alloc_flags, &thunk_handle, 0);
void *user_mode_driver_handle;
hsa_status_t status =
region->Allocate(size, alloc_flags, &user_mode_driver_handle, 0);
if (status == HSA_STATUS_SUCCESS) {
memory_handle_map_.emplace(std::piecewise_construct,
std::forward_as_tuple(thunk_handle),
std::forward_as_tuple(region, size, flags_unused, thunk_handle, alloc_flags));
std::forward_as_tuple(user_mode_driver_handle),
std::forward_as_tuple(region, size, flags_unused,
user_mode_driver_handle,
alloc_flags));

*memoryOnlyHandle = MemoryHandle::Convert(thunk_handle);
*memoryOnlyHandle = MemoryHandle::Convert(user_mode_driver_handle);
}
return status;
}
Expand Down

0 comments on commit 0d77583

Please sign in to comment.