Skip to content

Commit

Permalink
Implement cuda::mr::cuda_managed_memory_resource
Browse files Browse the repository at this point in the history
Fixes Implement a memory_resource using `cudaMallocManaged` and `cudaFree` #1515
  • Loading branch information
miscco committed Apr 3, 2024
1 parent 77391d3 commit 975cfde
Show file tree
Hide file tree
Showing 6 changed files with 456 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
//===----------------------------------------------------------------------===//
//
// Part of libcu++, the C++ Standard Library for your entire system,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef _CUDA__MEMORY_RESOURCE_CUDA_MANAGED_MEMORY_RESOURCE_H
#define _CUDA__MEMORY_RESOURCE_CUDA_MANAGED_MEMORY_RESOURCE_H

#include <cuda/__cccl_config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#if !defined(_CCCL_COMPILER_MSVC_2017)

#if !defined(_CCCL_CUDA_COMPILER_NVCC) && !defined(_CCCL_CUDA_COMPILER_NVHPC)
# include <cuda_runtime_api.h>
#endif // !_CCCL_CUDA_COMPILER_NVCC && !_CCCL_CUDA_COMPILER_NVHPC

#include <cuda/__memory_resource/get_property.h>
#include <cuda/__memory_resource/properties.h>
#include <cuda/__memory_resource/resource_ref.h>
#include <cuda/__memory_resource/resource.h>
#include <cuda/std/detail/libcxx/include/__new/bad_alloc.h>

#if _CCCL_STD_VER >= 2014

_LIBCUDACXX_BEGIN_NAMESPACE_CUDA_MR

/**
* @brief `cuda_managed_memory_resource` uses cudaMallocManaged / cudaFree for allocation/deallocation.
*/
class cuda_managed_memory_resource
{
private:
unsigned int __flags_ = cudaMemAttachGlobal;

static constexpr unsigned int __available_flags = cudaMemAttachGlobal | cudaMemAttachHost;

public:
constexpr cuda_managed_memory_resource(const unsigned int __flags = cudaMemAttachGlobal) noexcept
: __flags_(__flags & __available_flags)
{
_LIBCUDACXX_ASSERT(__flags_ == __flags, "Unexpected flags passed to cuda_managed_memory_resource");
}

/**
* @brief Allocate device memory of size at least \p __bytes.
* @param __bytes The size in bytes of the allocation.
* @param __alignment The requested alignment of the allocation.
* @throw cuda::cuda_error of the returned error code
* @return void* Pointer to the newly allocated memory
*/
void* allocate(const size_t __bytes, const size_t __alignment = __default_cuda_malloc_alignment) const
{
// We need to ensure that the provided alignment matches the minimal provided alignment
if (!__is_valid_alignment(__alignment))
{
_CUDA_VSTD_NOVERSION::__throw_bad_alloc();
}

void* __ptr{nullptr};
const ::cudaError_t __status = ::cudaMallocManaged(&__ptr, __bytes, __flags_);
switch (__status)
{
case ::cudaSuccess:
break;
default:
::cudaGetLastError(); // Clear CUDA error state
# ifndef _LIBCUDACXX_NO_EXCEPTIONS
throw cuda::cuda_error{__status, "Failed to allocate memory with cudaMallocManaged."};
# else
_LIBCUDACXX_UNREACHABLE();
# endif
}
return __ptr;
}

/**
* @brief Deallocate memory pointed to by \p __ptr.
* @param __ptr Pointer to be deallocated. Must have been allocated through a call to `allocate`
* @param __bytes The number of bytes that was passed to the `allocate` call that returned \p __ptr.
* @param __alignment The alignment that was passed to the `allocate` call that returned \p __ptr.
*/
void deallocate(void* __ptr, const size_t, const size_t __alignment = __default_cuda_malloc_alignment) const
{
// We need to ensure that the provided alignment matches the minimal provided alignment
_LIBCUDACXX_ASSERT(__is_valid_alignment(__alignment),
"Invalid alignment passed to cuda_memory_resource::deallocate.");
const ::cudaError_t __status = ::cudaFree(__ptr);
(void) __status;
(void) __alignment;
_LIBCUDACXX_ASSERT(__status == cudaSuccess, "cuda_managed_memory_resource::deallocate failed");
}

/**
* @brief Equality comparison with another cuda_managed_memory_resource
* @return Whether both cuda_managed_memory_resource were constructed with the same flags
*/
_LIBCUDACXX_NODISCARD_ATTRIBUTE constexpr bool operator==(cuda_managed_memory_resource const& __other) const noexcept
{
return __flags_ == __other.__flags_;
}
# if _CCCL_STD_VER <= 2017
/**
* @brief Inequality comparison with another cuda_managed_memory_resource
* @return Whether both cuda_managed_memory_resource were constructed with different flags
*/
_LIBCUDACXX_NODISCARD_ATTRIBUTE constexpr bool operator!=(cuda_managed_memory_resource const& __other) const noexcept
{
return __flags_ != __other.__flags_;
}
# endif // _CCCL_STD_VER <= 2017

/**
* @brief Equality comparison between a cuda_memory_resource and another resource
* @param __lhs The cuda_memory_resource
* @param __rhs The resource to compare to
* @return Result of equality comparison of both resources converted to a resource_ref<>
*/
template <class _Resource>
_LIBCUDACXX_NODISCARD_FRIEND auto
operator==(cuda_managed_memory_resource const& __lhs, _Resource const& __rhs) noexcept
_LIBCUDACXX_TRAILING_REQUIRES(bool)(__different_resource<cuda_managed_memory_resource, _Resource>)
{
return resource_ref<>{const_cast<cuda_managed_memory_resource&>(__lhs)}
== resource_ref<>{const_cast<_Resource&>(__rhs)};
}
# if _CCCL_STD_VER <= 2017
/**
* @copydoc cuda_managed_memory_resource::operator<_Resource>==(cuda_managed_memory_resource const&, _Resource const&)
*/
template <class _Resource>
_LIBCUDACXX_NODISCARD_FRIEND auto
operator==(_Resource const& __rhs, cuda_managed_memory_resource const& __lhs) noexcept
_LIBCUDACXX_TRAILING_REQUIRES(bool)(__different_resource<cuda_managed_memory_resource, _Resource>)
{
return resource_ref<>{const_cast<cuda_managed_memory_resource&>(__lhs)}
== resource_ref<>{const_cast<_Resource&>(__rhs)};
}
/**
* @copydoc cuda_managed_memory_resource::operator<_Resource>==(cuda_managed_memory_resource const&, _Resource const&)
*/
template <class _Resource>
_LIBCUDACXX_NODISCARD_FRIEND auto
operator!=(cuda_managed_memory_resource const& __lhs, _Resource const& __rhs) noexcept
_LIBCUDACXX_TRAILING_REQUIRES(bool)(__different_resource<cuda_managed_memory_resource, _Resource>)
{
return resource_ref<>{const_cast<cuda_managed_memory_resource&>(__lhs)}
!= resource_ref<>{const_cast<_Resource&>(__rhs)};
}
/**
* @copydoc cuda_managed_memory_resource::operator<_Resource>==(cuda_managed_memory_resource const&, _Resource const&)
*/
template <class _Resource>
_LIBCUDACXX_NODISCARD_FRIEND auto
operator!=(_Resource const& __rhs, cuda_managed_memory_resource const& __lhs) noexcept
_LIBCUDACXX_TRAILING_REQUIRES(bool)(__different_resource<cuda_managed_memory_resource, _Resource>)
{
return resource_ref<>{const_cast<cuda_managed_memory_resource&>(__lhs)}
!= resource_ref<>{const_cast<_Resource&>(__rhs)};
}
# endif // _CCCL_STD_VER <= 2017

/**
* @brief Enables the `managed_memory` property
*/
friend constexpr void get_property(cuda_managed_memory_resource const&, managed_memory) noexcept {}
/**
* @brief Enables the `device_accessible` property
*/
friend constexpr void get_property(cuda_managed_memory_resource const&, device_accessible) noexcept {}
/**
* @brief Enables the `host_accessible` property
*/
friend constexpr void get_property(cuda_managed_memory_resource const&, host_accessible) noexcept {}

/**
* @brief Checks whether the passed in alignment is valid
*/
static constexpr bool __is_valid_alignment(const size_t __alignment) noexcept
{
return __alignment <= __default_cuda_malloc_alignment && (__default_cuda_malloc_alignment % __alignment == 0);
}
};
static_assert(resource_with<cuda_managed_memory_resource, managed_memory>, "");
static_assert(resource_with<cuda_managed_memory_resource, device_accessible>, "");
static_assert(resource_with<cuda_managed_memory_resource, host_accessible>, "");

_LIBCUDACXX_END_NAMESPACE_CUDA_MR

#endif // _CCCL_STD_VER >= 2014

#endif // !_CCCL_COMPILER_MSVC_2017

#endif //_CUDA__MEMORY_RESOURCE_CUDA_MANAGED_MEMORY_RESOURCE_H
5 changes: 5 additions & 0 deletions libcudacxx/include/cuda/__memory_resource/properties.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ struct device_accessible
struct host_accessible
{};

/// \struct managed_memory
/// \brief The \c managed_memory property signals that the allocated memory is managed
struct managed_memory
{};

_LIBCUDACXX_END_NAMESPACE_CUDA_MR

#endif // _CCCL_STD_VER >= 2014
Expand Down
1 change: 1 addition & 0 deletions libcudacxx/include/cuda/memory_resource
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ class resource_ref {
# pragma system_header
# endif // no system header

#include <cuda/__memory_resource/cuda_managed_memory_resource.h>
#include <cuda/__memory_resource/cuda_memory_resource.h>
#include <cuda/__memory_resource/get_property.h>
#include <cuda/__memory_resource/properties.h>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
//===----------------------------------------------------------------------===//
//
// Part of libcu++, the C++ Standard Library for your entire system,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

// UNSUPPORTED: c++03, c++11
// UNSUPPORTED: msvc-19.16
// UNSUPPORTED: nvrtc

#include <cuda/memory_resource>
#include <cuda/std/cassert>
#include <cuda/std/cstdint>
#include <cuda/stream_ref>

#include "test_macros.h"

void ensure_device_ptr(void* ptr) {
assert(ptr != nullptr);
cudaPointerAttributes attributes;
cudaError_t status = cudaPointerGetAttributes (&attributes, ptr);
assert(status == cudaSuccess);
assert(attributes.type == cudaMemoryTypeManaged);
}

void test(const unsigned int flag) {
cuda::mr::cuda_managed_memory_resource res{flag};

{ // allocate / deallocate
auto* ptr = res.allocate(42);
static_assert(cuda::std::is_same<decltype(ptr), void*>::value, "");
ensure_device_ptr(ptr);

res.deallocate(ptr, 42);
}

{ // allocate / deallocate with alignment
auto* ptr = res.allocate(42, 4);
static_assert(cuda::std::is_same<decltype(ptr), void*>::value, "");
ensure_device_ptr(ptr);

res.deallocate(ptr, 42, 4);
}

#ifndef TEST_HAS_NO_EXCEPTIONS
{ // allocate with too small alignment
while(true) {
try {
auto* ptr = res.allocate(5, 42);
} catch(const cuda::error&) {
break;
}
assert(false);
}
}

{ // allocate with non matching alignment
while(true) {
try {
auto* ptr = res.allocate(5, 1337);
} catch(const cuda::error&) {
break;
}
assert(false);
}
}
#endif // TEST_HAS_NO_EXCEPTIONS
}

void test() {
test(cudaMemAttachGlobal);
test(cudaMemAttachHost);
}

int main(int, char**) {
NV_IF_TARGET(NV_IS_HOST, test();)
return 0;
}
Loading

0 comments on commit 975cfde

Please sign in to comment.