From 975cfde04baa687c3ea290089a4bdc352fb431d1 Mon Sep 17 00:00:00 2001 From: Michael Schellenberger Costa Date: Tue, 2 Apr 2024 20:35:13 +0200 Subject: [PATCH] Implement `cuda::mr::cuda_managed_memory_resource` Fixes Implement a memory_resource using `cudaMallocManaged` and `cudaFree` #1515 --- .../cuda_managed_memory_resource.h | 206 ++++++++++++++++++ .../cuda/__memory_resource/properties.h | 5 + libcudacxx/include/cuda/memory_resource | 1 + .../allocate.pass.cpp | 82 +++++++ .../equality.pass.cpp | 132 +++++++++++ .../traits.pass.cpp | 30 +++ 6 files changed, 456 insertions(+) create mode 100644 libcudacxx/include/cuda/__memory_resource/cuda_managed_memory_resource.h create mode 100644 libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_managed_memory_resource/allocate.pass.cpp create mode 100644 libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_managed_memory_resource/equality.pass.cpp create mode 100644 libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_managed_memory_resource/traits.pass.cpp diff --git a/libcudacxx/include/cuda/__memory_resource/cuda_managed_memory_resource.h b/libcudacxx/include/cuda/__memory_resource/cuda_managed_memory_resource.h new file mode 100644 index 00000000000..f701d67435b --- /dev/null +++ b/libcudacxx/include/cuda/__memory_resource/cuda_managed_memory_resource.h @@ -0,0 +1,206 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA__MEMORY_RESOURCE_CUDA_MANAGED_MEMORY_RESOURCE_H +#define _CUDA__MEMORY_RESOURCE_CUDA_MANAGED_MEMORY_RESOURCE_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#if !defined(_CCCL_COMPILER_MSVC_2017) + +#if !defined(_CCCL_CUDA_COMPILER_NVCC) && !defined(_CCCL_CUDA_COMPILER_NVHPC) +# include +#endif // !_CCCL_CUDA_COMPILER_NVCC && !_CCCL_CUDA_COMPILER_NVHPC + +#include +#include +#include +#include +#include + +#if _CCCL_STD_VER >= 2014 + +_LIBCUDACXX_BEGIN_NAMESPACE_CUDA_MR + +/** + * @brief `cuda_managed_memory_resource` uses cudaMallocManaged / cudaFree for allocation/deallocation. + */ +class cuda_managed_memory_resource +{ +private: + unsigned int __flags_ = cudaMemAttachGlobal; + + static constexpr unsigned int __available_flags = cudaMemAttachGlobal | cudaMemAttachHost; + +public: + constexpr cuda_managed_memory_resource(const unsigned int __flags = cudaMemAttachGlobal) noexcept + : __flags_(__flags & __available_flags) + { + _LIBCUDACXX_ASSERT(__flags_ == __flags, "Unexpected flags passed to cuda_managed_memory_resource"); + } + + /** + * @brief Allocate device memory of size at least \p __bytes. + * @param __bytes The size in bytes of the allocation. + * @param __alignment The requested alignment of the allocation. + * @throw cuda::cuda_error of the returned error code + * @return void* Pointer to the newly allocated memory + */ + void* allocate(const size_t __bytes, const size_t __alignment = __default_cuda_malloc_alignment) const + { + // We need to ensure that the provided alignment matches the minimal provided alignment + if (!__is_valid_alignment(__alignment)) + { + _CUDA_VSTD_NOVERSION::__throw_bad_alloc(); + } + + void* __ptr{nullptr}; + const ::cudaError_t __status = ::cudaMallocManaged(&__ptr, __bytes, __flags_); + switch (__status) + { + case ::cudaSuccess: + break; + default: + ::cudaGetLastError(); // Clear CUDA error state +# ifndef _LIBCUDACXX_NO_EXCEPTIONS + throw cuda::cuda_error{__status, "Failed to allocate memory with cudaMallocManaged."}; +# else + _LIBCUDACXX_UNREACHABLE(); +# endif + } + return __ptr; + } + + /** + * @brief Deallocate memory pointed to by \p __ptr. + * @param __ptr Pointer to be deallocated. Must have been allocated through a call to `allocate` + * @param __bytes The number of bytes that was passed to the `allocate` call that returned \p __ptr. + * @param __alignment The alignment that was passed to the `allocate` call that returned \p __ptr. + */ + void deallocate(void* __ptr, const size_t, const size_t __alignment = __default_cuda_malloc_alignment) const + { + // We need to ensure that the provided alignment matches the minimal provided alignment + _LIBCUDACXX_ASSERT(__is_valid_alignment(__alignment), + "Invalid alignment passed to cuda_memory_resource::deallocate."); + const ::cudaError_t __status = ::cudaFree(__ptr); + (void) __status; + (void) __alignment; + _LIBCUDACXX_ASSERT(__status == cudaSuccess, "cuda_managed_memory_resource::deallocate failed"); + } + + /** + * @brief Equality comparison with another cuda_managed_memory_resource + * @return Whether both cuda_managed_memory_resource were constructed with the same flags + */ + _LIBCUDACXX_NODISCARD_ATTRIBUTE constexpr bool operator==(cuda_managed_memory_resource const& __other) const noexcept + { + return __flags_ == __other.__flags_; + } +# if _CCCL_STD_VER <= 2017 + /** + * @brief Inequality comparison with another cuda_managed_memory_resource + * @return Whether both cuda_managed_memory_resource were constructed with different flags + */ + _LIBCUDACXX_NODISCARD_ATTRIBUTE constexpr bool operator!=(cuda_managed_memory_resource const& __other) const noexcept + { + return __flags_ != __other.__flags_; + } +# endif // _CCCL_STD_VER <= 2017 + + /** + * @brief Equality comparison between a cuda_memory_resource and another resource + * @param __lhs The cuda_memory_resource + * @param __rhs The resource to compare to + * @return Result of equality comparison of both resources converted to a resource_ref<> + */ + template + _LIBCUDACXX_NODISCARD_FRIEND auto + operator==(cuda_managed_memory_resource const& __lhs, _Resource const& __rhs) noexcept + _LIBCUDACXX_TRAILING_REQUIRES(bool)(__different_resource) + { + return resource_ref<>{const_cast(__lhs)} + == resource_ref<>{const_cast<_Resource&>(__rhs)}; + } +# if _CCCL_STD_VER <= 2017 + /** + * @copydoc cuda_managed_memory_resource::operator<_Resource>==(cuda_managed_memory_resource const&, _Resource const&) + */ + template + _LIBCUDACXX_NODISCARD_FRIEND auto + operator==(_Resource const& __rhs, cuda_managed_memory_resource const& __lhs) noexcept + _LIBCUDACXX_TRAILING_REQUIRES(bool)(__different_resource) + { + return resource_ref<>{const_cast(__lhs)} + == resource_ref<>{const_cast<_Resource&>(__rhs)}; + } + /** + * @copydoc cuda_managed_memory_resource::operator<_Resource>==(cuda_managed_memory_resource const&, _Resource const&) + */ + template + _LIBCUDACXX_NODISCARD_FRIEND auto + operator!=(cuda_managed_memory_resource const& __lhs, _Resource const& __rhs) noexcept + _LIBCUDACXX_TRAILING_REQUIRES(bool)(__different_resource) + { + return resource_ref<>{const_cast(__lhs)} + != resource_ref<>{const_cast<_Resource&>(__rhs)}; + } + /** + * @copydoc cuda_managed_memory_resource::operator<_Resource>==(cuda_managed_memory_resource const&, _Resource const&) + */ + template + _LIBCUDACXX_NODISCARD_FRIEND auto + operator!=(_Resource const& __rhs, cuda_managed_memory_resource const& __lhs) noexcept + _LIBCUDACXX_TRAILING_REQUIRES(bool)(__different_resource) + { + return resource_ref<>{const_cast(__lhs)} + != resource_ref<>{const_cast<_Resource&>(__rhs)}; + } +# endif // _CCCL_STD_VER <= 2017 + + /** + * @brief Enables the `managed_memory` property + */ + friend constexpr void get_property(cuda_managed_memory_resource const&, managed_memory) noexcept {} + /** + * @brief Enables the `device_accessible` property + */ + friend constexpr void get_property(cuda_managed_memory_resource const&, device_accessible) noexcept {} + /** + * @brief Enables the `host_accessible` property + */ + friend constexpr void get_property(cuda_managed_memory_resource const&, host_accessible) noexcept {} + + /** + * @brief Checks whether the passed in alignment is valid + */ + static constexpr bool __is_valid_alignment(const size_t __alignment) noexcept + { + return __alignment <= __default_cuda_malloc_alignment && (__default_cuda_malloc_alignment % __alignment == 0); + } +}; +static_assert(resource_with, ""); +static_assert(resource_with, ""); +static_assert(resource_with, ""); + +_LIBCUDACXX_END_NAMESPACE_CUDA_MR + +#endif // _CCCL_STD_VER >= 2014 + +#endif // !_CCCL_COMPILER_MSVC_2017 + +#endif //_CUDA__MEMORY_RESOURCE_CUDA_MANAGED_MEMORY_RESOURCE_H diff --git a/libcudacxx/include/cuda/__memory_resource/properties.h b/libcudacxx/include/cuda/__memory_resource/properties.h index 621f8e40764..b7a33e52d80 100644 --- a/libcudacxx/include/cuda/__memory_resource/properties.h +++ b/libcudacxx/include/cuda/__memory_resource/properties.h @@ -42,6 +42,11 @@ struct device_accessible struct host_accessible {}; +/// \struct managed_memory +/// \brief The \c managed_memory property signals that the allocated memory is managed +struct managed_memory +{}; + _LIBCUDACXX_END_NAMESPACE_CUDA_MR #endif // _CCCL_STD_VER >= 2014 diff --git a/libcudacxx/include/cuda/memory_resource b/libcudacxx/include/cuda/memory_resource index 14e776d157b..4a08a76d9b9 100644 --- a/libcudacxx/include/cuda/memory_resource +++ b/libcudacxx/include/cuda/memory_resource @@ -92,6 +92,7 @@ class resource_ref { # pragma system_header # endif // no system header +#include #include #include #include diff --git a/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_managed_memory_resource/allocate.pass.cpp b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_managed_memory_resource/allocate.pass.cpp new file mode 100644 index 00000000000..70ddbcddc34 --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_managed_memory_resource/allocate.pass.cpp @@ -0,0 +1,82 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11 +// UNSUPPORTED: msvc-19.16 +// UNSUPPORTED: nvrtc + +#include +#include +#include +#include + +#include "test_macros.h" + +void ensure_device_ptr(void* ptr) { + assert(ptr != nullptr); + cudaPointerAttributes attributes; + cudaError_t status = cudaPointerGetAttributes (&attributes, ptr); + assert(status == cudaSuccess); + assert(attributes.type == cudaMemoryTypeManaged); +} + +void test(const unsigned int flag) { + cuda::mr::cuda_managed_memory_resource res{flag}; + + { // allocate / deallocate + auto* ptr = res.allocate(42); + static_assert(cuda::std::is_same::value, ""); + ensure_device_ptr(ptr); + + res.deallocate(ptr, 42); + } + + { // allocate / deallocate with alignment + auto* ptr = res.allocate(42, 4); + static_assert(cuda::std::is_same::value, ""); + ensure_device_ptr(ptr); + + res.deallocate(ptr, 42, 4); + } + +#ifndef TEST_HAS_NO_EXCEPTIONS + { // allocate with too small alignment + while(true) { + try { + auto* ptr = res.allocate(5, 42); + } catch(const cuda::error&) { + break; + } + assert(false); + } + } + + { // allocate with non matching alignment + while(true) { + try { + auto* ptr = res.allocate(5, 1337); + } catch(const cuda::error&) { + break; + } + assert(false); + } + } +#endif // TEST_HAS_NO_EXCEPTIONS +} + +void test() { + test(cudaMemAttachGlobal); + test(cudaMemAttachHost); +} + +int main(int, char**) { + NV_IF_TARGET(NV_IS_HOST, test();) + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_managed_memory_resource/equality.pass.cpp b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_managed_memory_resource/equality.pass.cpp new file mode 100644 index 00000000000..faa743ee5f2 --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_managed_memory_resource/equality.pass.cpp @@ -0,0 +1,132 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11 +// UNSUPPORTED: msvc-19.16 +// UNSUPPORTED: nvrtc + +#include +#include +#include +#include + +enum class AccessibilityType { + Device, + Host, +}; + +template +struct resource { + void* allocate(size_t, size_t) { return nullptr; } + void deallocate(void*, size_t, size_t) {} + + bool operator==(const resource&) const { return true; } + bool operator!=(const resource& other) const { return false; } + + template = 0> + friend void get_property(const resource&, + cuda::mr::managed_memory) noexcept {} +}; +static_assert(cuda::mr::resource >, ""); +static_assert(!cuda::mr::resource_with, + cuda::mr::managed_memory>, + ""); +static_assert(cuda::mr::resource >, ""); +static_assert(cuda::mr::resource_with, + cuda::mr::managed_memory>, + ""); + +template +struct async_resource : public resource { + void* allocate_async(size_t, size_t, cuda::stream_ref) { return nullptr; } + void deallocate_async(void*, size_t, size_t, cuda::stream_ref) {} +}; +static_assert( + cuda::mr::async_resource >, ""); +static_assert( + !cuda::mr::async_resource_with, + cuda::mr::managed_memory>, + ""); +static_assert( + cuda::mr::async_resource >, ""); +static_assert( + cuda::mr::async_resource_with, + cuda::mr::managed_memory>, + ""); + +// Ensure that we can only + +void test() { + cuda::mr::cuda_managed_memory_resource first{}; + { // comparison against a plain cuda_managed_memory_resource + cuda::mr::cuda_managed_memory_resource second{}; + assert(first == second); + assert(!(first != second)); + } + + { // comparison against a plain cuda_managed_memory_resource with a different flag set + cuda::mr::cuda_managed_memory_resource second{cudaMemAttachHost}; + assert(!(first == second)); + assert((first != second)); + } + + { // comparison against a cuda_managed_memory_resource wrapped inside a resource_ref + cuda::mr::cuda_managed_memory_resource second{}; + assert(first == cuda::mr::resource_ref{second}); + assert(!(first != cuda::mr::resource_ref{second})); + assert(cuda::mr::resource_ref{second} == first); + assert(!(cuda::mr::resource_ref{second} != first)); + } + + { // comparison against a cuda_managed_memory_resource wrapped inside a resource_ref<> + cuda::mr::cuda_managed_memory_resource second{}; + assert(first == cuda::mr::resource_ref<>{second}); + assert(!(first != cuda::mr::resource_ref<>{second})); + assert(cuda::mr::resource_ref<>{second} == first); + assert(!(cuda::mr::resource_ref<>{second} != first)); + } + + { // comparison against a different resource through resource_ref + resource host_resource{}; + resource device_resource{}; + assert(!(first == host_resource)); + assert(first != host_resource); + assert(!(first == device_resource)); + assert(first != device_resource); + + assert(!(host_resource == first)); + assert(host_resource != first); + assert(!(device_resource == first)); + assert(device_resource != first); + } + + { // comparison against a different resource through resource_ref + async_resource host_async_resource{}; + async_resource device_async_resource{}; + assert(!(first == host_async_resource)); + assert(first != host_async_resource); + assert(!(first == device_async_resource)); + assert(first != device_async_resource); + + assert(!(host_async_resource == first)); + assert(host_async_resource != first); + assert(!(device_async_resource == first)); + assert(device_async_resource != first); + } +} + +int main(int, char**) { + NV_IF_TARGET(NV_IS_HOST, + test(); + ) + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_managed_memory_resource/traits.pass.cpp b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_managed_memory_resource/traits.pass.cpp new file mode 100644 index 00000000000..7e9dfea4ebd --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_managed_memory_resource/traits.pass.cpp @@ -0,0 +1,30 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11 +// UNSUPPORTED: msvc-19.16 +// UNSUPPORTED: nvrtc + +#include +#include + +using resource = cuda::mr::cuda_managed_memory_resource; +static_assert(!cuda::std::is_trivial::value, ""); +static_assert(!cuda::std::is_trivially_default_constructible::value, ""); +static_assert(cuda::std::is_trivially_copy_constructible::value, ""); +static_assert(cuda::std::is_trivially_move_constructible::value, ""); +static_assert(cuda::std::is_trivially_copy_assignable::value, ""); +static_assert(cuda::std::is_trivially_move_assignable::value, ""); +static_assert(cuda::std::is_trivially_destructible::value, ""); +static_assert(!cuda::std::is_empty::value, ""); + +int main(int, char**) { + return 0; +}