Skip to content

Commit

Permalink
Implement cuda::mr::cuda_managed_memory_resource
Browse files Browse the repository at this point in the history
Fixes Implement a memory_resource using `cudaMallocManaged` and `cudaFree` #1515
  • Loading branch information
miscco committed Apr 12, 2024
1 parent 83a2dd5 commit a79affb
Show file tree
Hide file tree
Showing 7 changed files with 474 additions and 6 deletions.
9 changes: 7 additions & 2 deletions libcudacxx/docs/extended_api/memory_resource/properties.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,14 @@ template<class MemoryResource, class = cuda::std::enable_if_t<cuda::has_property
void function_that_dispatches_to_device(MemoryResource& resource);
```
For now, libcu++ provides two commonly used properties: `cuda::mr::device_accessible` and `cuda::mr::host_accessible`. More properties may be added as the library and the hardware capabilities evolve. However, a user library is free to define as many properties as needed to fully cover its API surface.
For now, libcu++ provides various commonly used properties:
Note that the libcu++ provided properties are stateless. However, properties can also provide stateful information that is retrieved via the `get_property` free function. In order to communicate the desired type of the carried state, a stateful property must define the `value_type` alias. A library can constrain interfaces that require a stateful property with `cuda::has_property_with` as shown in the example below (See [here](https://godbolt.org/z/11sGbr333) for a minimal Compiler Explorer example).
* `cuda::mr::device_accessible` and `cuda::mr::host_accessible` indicate whether memory allocated using a memory resource is accessible from host or device respectively.
* `cuda::mr::managed_memory` indicates that the memory resource allocates CUDA unified memory which is both host and device accessible
More properties may be added as the library and the hardware capabilities evolve. However, a user library is free to define custom properties.
Note that currently the libcu++ provided properties are stateless. However, properties can also provide stateful information that is retrieved via the `get_property` free function. In order to communicate the desired type of the carried state, a stateful property must define the `value_type` alias. A library can constrain interfaces that require a stateful property with `cuda::has_property_with` as shown in the example below (See [here](https://godbolt.org/z/11sGbr333) for a minimal Compiler Explorer example).
```c++
struct required_alignment{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
//===----------------------------------------------------------------------===//
//
// Part of libcu++, the C++ Standard Library for your entire system,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef _CUDA__MEMORY_RESOURCE_CUDA_MANAGED_MEMORY_RESOURCE_H
#define _CUDA__MEMORY_RESOURCE_CUDA_MANAGED_MEMORY_RESOURCE_H

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#if !defined(_CCCL_COMPILER_MSVC_2017)

# if !defined(_CCCL_CUDA_COMPILER_NVCC) && !defined(_CCCL_CUDA_COMPILER_NVHPC)
# include <cuda_runtime_api.h>
# endif // !_CCCL_CUDA_COMPILER_NVCC && !_CCCL_CUDA_COMPILER_NVHPC

# include <cuda/__memory_resource/get_property.h>
# include <cuda/__memory_resource/properties.h>
# include <cuda/__memory_resource/resource.h>
# include <cuda/__memory_resource/resource_ref.h>
# include <cuda/std/__cuda/api_wrapper.h>
# include <cuda/std/detail/libcxx/include/__new/bad_alloc.h>

# if _CCCL_STD_VER >= 2014

_LIBCUDACXX_BEGIN_NAMESPACE_CUDA_MR

/**
* @brief `cuda_managed_memory_resource` uses cudaMallocManaged / cudaFree for allocation/deallocation.
*/
class cuda_managed_memory_resource
{
private:
unsigned int __flags_ = cudaMemAttachGlobal;

static constexpr unsigned int __available_flags = cudaMemAttachGlobal | cudaMemAttachHost;

public:
constexpr cuda_managed_memory_resource(const unsigned int __flags = cudaMemAttachGlobal) noexcept
: __flags_(__flags & __available_flags)
{
_LIBCUDACXX_ASSERT(__flags_ == __flags, "Unexpected flags passed to cuda_managed_memory_resource");
}

/**
* @brief Allocate CUDA unified memory of size at least \p __bytes.
* @param __bytes The size in bytes of the allocation.
* @param __alignment The requested alignment of the allocation.
* @throw cuda::cuda_error of the returned error code
* @return Pointer to the newly allocated memory
*/
void* allocate(const size_t __bytes, const size_t __alignment = default_cuda_malloc_alignment) const
{
// We need to ensure that the provided alignment matches the minimal provided alignment
if (!__is_valid_alignment(__alignment))
{
_CUDA_VSTD_NOVERSION::__throw_bad_alloc();
}

void* __ptr{nullptr};
_CCCL_TRY_CUDA_API(
::cudaMallocManaged, "Failed to allocate memory with cudaMallocManaged.", &__ptr, __bytes, __flags_);
return __ptr;
}

/**
* @brief Deallocate memory pointed to by \p __ptr.
* @param __ptr Pointer to be deallocated. Must have been allocated through a call to `allocate`
* @param __bytes The number of bytes that was passed to the `allocate` call that returned \p __ptr.
* @param __alignment The alignment that was passed to the `allocate` call that returned \p __ptr.
*/
void deallocate(void* __ptr, const size_t, const size_t __alignment = default_cuda_malloc_alignment) const
{
// We need to ensure that the provided alignment matches the minimal provided alignment
_LIBCUDACXX_ASSERT(__is_valid_alignment(__alignment),
"Invalid alignment passed to cuda_memory_resource::deallocate.");
_CCCL_ASSERT_CUDA_API(::cudaFree, "cuda_managed_memory_resource::deallocate failed", __ptr);
(void) __alignment;
}

/**
* @brief Equality comparison with another cuda_managed_memory_resource
* @return Whether both cuda_managed_memory_resource were constructed with the same flags
*/
_LIBCUDACXX_NODISCARD_ATTRIBUTE constexpr bool operator==(cuda_managed_memory_resource const& __other) const noexcept
{
return __flags_ == __other.__flags_;
}
# if _CCCL_STD_VER <= 2017
/**
* @brief Inequality comparison with another cuda_managed_memory_resource
* @return Whether both cuda_managed_memory_resource were constructed with different flags
*/
_LIBCUDACXX_NODISCARD_ATTRIBUTE constexpr bool operator!=(cuda_managed_memory_resource const& __other) const noexcept
{
return __flags_ != __other.__flags_;
}
# endif // _CCCL_STD_VER <= 2017

/**
* @brief Equality comparison between a cuda_memory_resource and another resource
* @param __lhs The cuda_memory_resource
* @param __rhs The resource to compare to
* @return If the underlying types are equality comparable, returns the result of equality comparison of both
* resources. Otherwise, returns false.
*/
template <class _Resource>
_LIBCUDACXX_NODISCARD_FRIEND auto
operator==(cuda_managed_memory_resource const& __lhs, _Resource const& __rhs) noexcept
_LIBCUDACXX_TRAILING_REQUIRES(bool)(__different_resource<cuda_managed_memory_resource, _Resource>)
{
return resource_ref<>{const_cast<cuda_managed_memory_resource&>(__lhs)}
== resource_ref<>{const_cast<_Resource&>(__rhs)};
}
# if _CCCL_STD_VER <= 2017
/**
* @copydoc cuda_managed_memory_resource::operator<_Resource>==(cuda_managed_memory_resource const&, _Resource const&)
*/
template <class _Resource>
_LIBCUDACXX_NODISCARD_FRIEND auto
operator==(_Resource const& __rhs, cuda_managed_memory_resource const& __lhs) noexcept
_LIBCUDACXX_TRAILING_REQUIRES(bool)(__different_resource<cuda_managed_memory_resource, _Resource>)
{
return resource_ref<>{const_cast<cuda_managed_memory_resource&>(__lhs)}
== resource_ref<>{const_cast<_Resource&>(__rhs)};
}
/**
* @copydoc cuda_managed_memory_resource::operator<_Resource>==(cuda_managed_memory_resource const&, _Resource const&)
*/
template <class _Resource>
_LIBCUDACXX_NODISCARD_FRIEND auto
operator!=(cuda_managed_memory_resource const& __lhs, _Resource const& __rhs) noexcept
_LIBCUDACXX_TRAILING_REQUIRES(bool)(__different_resource<cuda_managed_memory_resource, _Resource>)
{
return resource_ref<>{const_cast<cuda_managed_memory_resource&>(__lhs)}
!= resource_ref<>{const_cast<_Resource&>(__rhs)};
}
/**
* @copydoc cuda_managed_memory_resource::operator<_Resource>==(cuda_managed_memory_resource const&, _Resource const&)
*/
template <class _Resource>
_LIBCUDACXX_NODISCARD_FRIEND auto
operator!=(_Resource const& __rhs, cuda_managed_memory_resource const& __lhs) noexcept
_LIBCUDACXX_TRAILING_REQUIRES(bool)(__different_resource<cuda_managed_memory_resource, _Resource>)
{
return resource_ref<>{const_cast<cuda_managed_memory_resource&>(__lhs)}
!= resource_ref<>{const_cast<_Resource&>(__rhs)};
}
# endif // _CCCL_STD_VER <= 2017

/**
* @brief Enables the `managed_memory` property
*/
friend constexpr void get_property(cuda_managed_memory_resource const&, managed_memory) noexcept {}
/**
* @brief Enables the `device_accessible` property
*/
friend constexpr void get_property(cuda_managed_memory_resource const&, device_accessible) noexcept {}
/**
* @brief Enables the `host_accessible` property
*/
friend constexpr void get_property(cuda_managed_memory_resource const&, host_accessible) noexcept {}

/**
* @brief Checks whether the passed in alignment is valid
*/
static constexpr bool __is_valid_alignment(const size_t __alignment) noexcept
{
return __alignment <= default_cuda_malloc_alignment && (default_cuda_malloc_alignment % __alignment == 0);
}
};
static_assert(resource_with<cuda_managed_memory_resource, managed_memory>, "");
static_assert(resource_with<cuda_managed_memory_resource, device_accessible>, "");
static_assert(resource_with<cuda_managed_memory_resource, host_accessible>, "");

_LIBCUDACXX_END_NAMESPACE_CUDA_MR

# endif // _CCCL_STD_VER >= 2014

#endif // !_CCCL_COMPILER_MSVC_2017

#endif //_CUDA__MEMORY_RESOURCE_CUDA_MANAGED_MEMORY_RESOURCE_H
16 changes: 12 additions & 4 deletions libcudacxx/include/cuda/__memory_resource/properties.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,24 @@ _LIBCUDACXX_BEGIN_NAMESPACE_CUDA_MR
*/
_LIBCUDACXX_INLINE_VAR constexpr size_t default_cuda_malloc_alignment = 256;

/// \struct device_accessible
/// \brief The \c device_accessible property signals that the allocated memory is device accessible
/**
* @brief The \c device_accessible property signals that the allocated memory is device accessible
*/
struct device_accessible
{};

/// \struct host_accessible
/// \brief The \c host_accessible property signals that the allocated memory is host accessible
/**
* @brief The \c host_accessible property signals that the allocated memory is host accessible
*/
struct host_accessible
{};

/**
* @brief The \c managed_memory property signals that the allocated memory is managed
*/
struct managed_memory
{};

_LIBCUDACXX_END_NAMESPACE_CUDA_MR

# endif // _CCCL_STD_VER >= 2014
Expand Down
1 change: 1 addition & 0 deletions libcudacxx/include/cuda/memory_resource
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ class resource_ref {
# pragma system_header
# endif // no system header

#include <cuda/__memory_resource/cuda_managed_memory_resource.h>
#include <cuda/__memory_resource/cuda_memory_resource.h>
#include <cuda/__memory_resource/get_property.h>
#include <cuda/__memory_resource/properties.h>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
//===----------------------------------------------------------------------===//
//
// Part of libcu++, the C++ Standard Library for your entire system,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

// UNSUPPORTED: c++03, c++11
// UNSUPPORTED: msvc-19.16
// UNSUPPORTED: nvrtc

#include <cuda/memory_resource>
#include <cuda/std/cassert>
#include <cuda/std/cstdint>
#include <cuda/stream_ref>

#include "test_macros.h"

void ensure_managed_ptr(void* ptr)
{
assert(ptr != nullptr);
cudaPointerAttributes attributes;
cudaError_t status = cudaPointerGetAttributes(&attributes, ptr);
assert(status == cudaSuccess);
assert(attributes.type == cudaMemoryTypeManaged);
}

void test(const unsigned int flag)
{
cuda::mr::cuda_managed_memory_resource res{flag};

{ // allocate / deallocate
auto* ptr = res.allocate(42);
static_assert(cuda::std::is_same<decltype(ptr), void*>::value, "");
ensure_managed_ptr(ptr);

res.deallocate(ptr, 42);
}

{ // allocate / deallocate with alignment
auto* ptr = res.allocate(42, 4);
static_assert(cuda::std::is_same<decltype(ptr), void*>::value, "");
ensure_managed_ptr(ptr);

res.deallocate(ptr, 42, 4);
}

#ifndef TEST_HAS_NO_EXCEPTIONS
{ // allocate with too small alignment
while (true)
{
try
{
auto* ptr = res.allocate(5, 42);
unused(ptr);
}
catch (const cuda::std::bad_alloc&)
{
break;
}
assert(false);
}
}

{ // allocate with non matching alignment
while (true)
{
try
{
auto* ptr = res.allocate(5, 1337);
unused(ptr);
}
catch (const cuda::std::bad_alloc&)
{
break;
}
assert(false);
}
}
#endif // TEST_HAS_NO_EXCEPTIONS
}

void test()
{
test(cudaMemAttachGlobal);
test(cudaMemAttachHost);
}

int main(int, char**)
{
NV_IF_TARGET(NV_IS_HOST, test();)
return 0;
}
Loading

0 comments on commit a79affb

Please sign in to comment.