diff --git a/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp b/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp index 605b0223..adb382d8 100644 --- a/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp +++ b/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp @@ -103,6 +103,11 @@ KfdDriver::AllocateMemory(const core::MemoryRegion &mem_region, kmt_alloc_flags.ui32.NonPaged = 1; } + if (!m_region.IsLocalMemory() && + (alloc_flags & core::MemoryRegion::AllocateMemoryOnly)) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + // Allocating a memory handle for virtual memory kmt_alloc_flags.ui32.NoAddress = !!(alloc_flags & core::MemoryRegion::AllocateMemoryOnly); diff --git a/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp b/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp index 46a7ef5b..b9b62ad6 100644 --- a/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp +++ b/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp @@ -136,6 +136,7 @@ XdnaDriver::AllocateMemory(const core::MemoryRegion &mem_region, amdxdna_drm_get_bo_info get_bo_info_args{0}; drm_gem_close close_bo_args{0}; + void *mapped_mem(nullptr); if (!m_region.IsSystem()) { return HSA_STATUS_ERROR_INVALID_REGION; @@ -162,18 +163,29 @@ XdnaDriver::AllocateMemory(const core::MemoryRegion &mem_region, return HSA_STATUS_ERROR; } + /// TODO: For now we always map the memory and keep a mapping from handles + /// to VA memory addresses. Once we can support the separate VMEM call to + /// map handles we can fix this. if (m_region.kernarg()) { - *mem = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, - get_bo_info_args.map_offset); - if (*mem == MAP_FAILED) { + mapped_mem = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, + get_bo_info_args.map_offset); + if (mapped_mem == MAP_FAILED) { // Close the BO in the case when a mapping fails and we got a BO handle. ioctl(fd_, DRM_IOCTL_GEM_CLOSE, &close_bo_args); return HSA_STATUS_ERROR_OUT_OF_RESOURCES; } } else { - *mem = reinterpret_cast(get_bo_info_args.vaddr); + mapped_mem = reinterpret_cast(get_bo_info_args.vaddr); } + if (alloc_flags & core::MemoryRegion::AllocateMemoryOnly) { + *mem = reinterpret_cast(create_bo_args.handle); + } else { + *mem = mapped_mem; + } + + vmem_handle_mappings.emplace(create_bo_args.handle, mapped_mem); + return HSA_STATUS_SUCCESS; } diff --git a/runtime/hsa-runtime/core/inc/amd_xdna_driver.h b/runtime/hsa-runtime/core/inc/amd_xdna_driver.h index e08db0e6..e313eb4b 100644 --- a/runtime/hsa-runtime/core/inc/amd_xdna_driver.h +++ b/runtime/hsa-runtime/core/inc/amd_xdna_driver.h @@ -43,6 +43,7 @@ #define HSA_RUNTIME_CORE_INC_AMD_XDNA_DRIVER_H_ #include +#include #include "core/inc/driver.h" #include "core/inc/memory_region.h" @@ -89,6 +90,12 @@ class XdnaDriver : public core::Driver { hsa_status_t InitDeviceHeap(); hsa_status_t FreeDeviceHeap(); + /// TODO: Remove this in the future and rely on the core Runtime + /// object to track handle allocations. Using the VMEM API for mapping XDNA + /// driver handles requires a bit more refactoring. So rely on the XDNA driver + /// to manage some of this for now. + std::unordered_map vmem_handle_mappings; + /// @brief Virtual address range allocated for the device heap. /// /// Allocate a large enough space so we can carve out the device heap in diff --git a/runtime/hsa-runtime/core/runtime/runtime.cpp b/runtime/hsa-runtime/core/runtime/runtime.cpp index e25cf556..ab53fbe9 100644 --- a/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -3111,20 +3111,22 @@ hsa_status_t Runtime::VMemoryHandleCreate(const MemoryRegion* region, size_t siz uint64_t flags_unused, hsa_amd_vmem_alloc_handle_t* memoryOnlyHandle) { const AMD::MemoryRegion* memRegion = static_cast(region); - if (!memRegion->IsLocalMemory()) return HSA_STATUS_ERROR_INVALID_ARGUMENT; if (!IsMultipleOf(size, memRegion->GetPageSize())) return HSA_STATUS_ERROR_INVALID_ARGUMENT; ScopedAcquire lock(&memory_lock_); - void* thunk_handle; - hsa_status_t status = region->Allocate(size, alloc_flags, &thunk_handle, 0); + void *user_mode_driver_handle; + hsa_status_t status = + region->Allocate(size, alloc_flags, &user_mode_driver_handle, 0); if (status == HSA_STATUS_SUCCESS) { memory_handle_map_.emplace(std::piecewise_construct, - std::forward_as_tuple(thunk_handle), - std::forward_as_tuple(region, size, flags_unused, thunk_handle, alloc_flags)); + std::forward_as_tuple(user_mode_driver_handle), + std::forward_as_tuple(region, size, flags_unused, + user_mode_driver_handle, + alloc_flags)); - *memoryOnlyHandle = MemoryHandle::Convert(thunk_handle); + *memoryOnlyHandle = MemoryHandle::Convert(user_mode_driver_handle); } return status; }