From 975cfde04baa687c3ea290089a4bdc352fb431d1 Mon Sep 17 00:00:00 2001
From: Michael Schellenberger Costa <miscco@nvidia.com>
Date: Tue, 2 Apr 2024 20:35:13 +0200
Subject: [PATCH] Implement `cuda::mr::cuda_managed_memory_resource`

Fixes Implement a memory_resource using `cudaMallocManaged` and `cudaFree` #1515
---
 .../cuda_managed_memory_resource.h            | 206 ++++++++++++++++++
 .../cuda/__memory_resource/properties.h       |   5 +
 libcudacxx/include/cuda/memory_resource       |   1 +
 .../allocate.pass.cpp                         |  82 +++++++
 .../equality.pass.cpp                         | 132 +++++++++++
 .../traits.pass.cpp                           |  30 +++
 6 files changed, 456 insertions(+)
 create mode 100644 libcudacxx/include/cuda/__memory_resource/cuda_managed_memory_resource.h
 create mode 100644 libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_managed_memory_resource/allocate.pass.cpp
 create mode 100644 libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_managed_memory_resource/equality.pass.cpp
 create mode 100644 libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_managed_memory_resource/traits.pass.cpp

diff --git a/libcudacxx/include/cuda/__memory_resource/cuda_managed_memory_resource.h b/libcudacxx/include/cuda/__memory_resource/cuda_managed_memory_resource.h
new file mode 100644
index 00000000000..f701d67435b
--- /dev/null
+++ b/libcudacxx/include/cuda/__memory_resource/cuda_managed_memory_resource.h
@@ -0,0 +1,206 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA__MEMORY_RESOURCE_CUDA_MANAGED_MEMORY_RESOURCE_H
+#define _CUDA__MEMORY_RESOURCE_CUDA_MANAGED_MEMORY_RESOURCE_H
+
+#include <cuda/__cccl_config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#if !defined(_CCCL_COMPILER_MSVC_2017)
+
+#if !defined(_CCCL_CUDA_COMPILER_NVCC) && !defined(_CCCL_CUDA_COMPILER_NVHPC)
+#  include <cuda_runtime_api.h>
+#endif // !_CCCL_CUDA_COMPILER_NVCC && !_CCCL_CUDA_COMPILER_NVHPC
+
+#include <cuda/__memory_resource/get_property.h>
+#include <cuda/__memory_resource/properties.h>
+#include <cuda/__memory_resource/resource_ref.h>
+#include <cuda/__memory_resource/resource.h>
+#include <cuda/std/detail/libcxx/include/__new/bad_alloc.h>
+
+#if _CCCL_STD_VER >= 2014
+
+_LIBCUDACXX_BEGIN_NAMESPACE_CUDA_MR
+
+/**
+ * @brief `cuda_managed_memory_resource` uses cudaMallocManaged / cudaFree for allocation/deallocation.
+ */
+class cuda_managed_memory_resource
+{
+private:
+  unsigned int __flags_ = cudaMemAttachGlobal;
+
+  static constexpr unsigned int __available_flags = cudaMemAttachGlobal | cudaMemAttachHost;
+
+public:
+  constexpr cuda_managed_memory_resource(const unsigned int __flags = cudaMemAttachGlobal) noexcept
+      : __flags_(__flags & __available_flags)
+  {
+    _LIBCUDACXX_ASSERT(__flags_ == __flags, "Unexpected flags passed to cuda_managed_memory_resource");
+  }
+
+  /**
+   * @brief Allocate device memory of size at least \p __bytes.
+   * @param __bytes The size in bytes of the allocation.
+   * @param __alignment The requested alignment of the allocation.
+   * @throw cuda::cuda_error of the returned error code
+   * @return void* Pointer to the newly allocated memory
+   */
+  void* allocate(const size_t __bytes, const size_t __alignment = __default_cuda_malloc_alignment) const
+  {
+    // We need to ensure that the provided alignment matches the minimal provided alignment
+    if (!__is_valid_alignment(__alignment))
+    {
+      _CUDA_VSTD_NOVERSION::__throw_bad_alloc();
+    }
+
+    void* __ptr{nullptr};
+    const ::cudaError_t __status = ::cudaMallocManaged(&__ptr, __bytes, __flags_);
+    switch (__status)
+    {
+      case ::cudaSuccess:
+        break;
+      default:
+        ::cudaGetLastError(); // Clear CUDA error state
+#  ifndef _LIBCUDACXX_NO_EXCEPTIONS
+        throw cuda::cuda_error{__status, "Failed to allocate memory with cudaMallocManaged."};
+#  else
+        _LIBCUDACXX_UNREACHABLE();
+#  endif
+    }
+    return __ptr;
+  }
+
+  /**
+   * @brief Deallocate memory pointed to by \p __ptr.
+   * @param __ptr Pointer to be deallocated. Must have been allocated through a call to `allocate`
+   * @param __bytes The number of bytes that was passed to the `allocate` call that returned \p __ptr.
+   * @param __alignment The alignment that was passed to the `allocate` call that returned \p __ptr.
+   */
+  void deallocate(void* __ptr, const size_t, const size_t __alignment = __default_cuda_malloc_alignment) const
+  {
+    // We need to ensure that the provided alignment matches the minimal provided alignment
+    _LIBCUDACXX_ASSERT(__is_valid_alignment(__alignment),
+                       "Invalid alignment passed to cuda_memory_resource::deallocate.");
+    const ::cudaError_t __status = ::cudaFree(__ptr);
+    (void) __status;
+    (void) __alignment;
+    _LIBCUDACXX_ASSERT(__status == cudaSuccess, "cuda_managed_memory_resource::deallocate failed");
+  }
+
+  /**
+   * @brief Equality comparison with another cuda_managed_memory_resource
+   * @return Whether both cuda_managed_memory_resource were constructed with the same flags
+   */
+  _LIBCUDACXX_NODISCARD_ATTRIBUTE constexpr bool operator==(cuda_managed_memory_resource const& __other) const noexcept
+  {
+    return __flags_ == __other.__flags_;
+  }
+#  if _CCCL_STD_VER <= 2017
+  /**
+   * @brief Inequality comparison with another cuda_managed_memory_resource
+   * @return Whether both cuda_managed_memory_resource were constructed with different flags
+   */
+  _LIBCUDACXX_NODISCARD_ATTRIBUTE constexpr bool operator!=(cuda_managed_memory_resource const& __other) const noexcept
+  {
+    return __flags_ != __other.__flags_;
+  }
+#  endif // _CCCL_STD_VER <= 2017
+
+  /**
+   * @brief Equality comparison between a cuda_memory_resource and another resource
+   * @param __lhs The cuda_memory_resource
+   * @param __rhs The resource to compare to
+   * @return Result of equality comparison of both resources converted to a resource_ref<>
+   */
+  template <class _Resource>
+  _LIBCUDACXX_NODISCARD_FRIEND auto
+  operator==(cuda_managed_memory_resource const& __lhs, _Resource const& __rhs) noexcept
+    _LIBCUDACXX_TRAILING_REQUIRES(bool)(__different_resource<cuda_managed_memory_resource, _Resource>)
+  {
+    return resource_ref<>{const_cast<cuda_managed_memory_resource&>(__lhs)}
+        == resource_ref<>{const_cast<_Resource&>(__rhs)};
+  }
+#  if _CCCL_STD_VER <= 2017
+  /**
+   * @copydoc cuda_managed_memory_resource::operator<_Resource>==(cuda_managed_memory_resource const&, _Resource const&)
+   */
+  template <class _Resource>
+  _LIBCUDACXX_NODISCARD_FRIEND auto
+  operator==(_Resource const& __rhs, cuda_managed_memory_resource const& __lhs) noexcept
+    _LIBCUDACXX_TRAILING_REQUIRES(bool)(__different_resource<cuda_managed_memory_resource, _Resource>)
+  {
+    return resource_ref<>{const_cast<cuda_managed_memory_resource&>(__lhs)}
+        == resource_ref<>{const_cast<_Resource&>(__rhs)};
+  }
+  /**
+   * @copydoc cuda_managed_memory_resource::operator<_Resource>==(cuda_managed_memory_resource const&, _Resource const&)
+   */
+  template <class _Resource>
+  _LIBCUDACXX_NODISCARD_FRIEND auto
+  operator!=(cuda_managed_memory_resource const& __lhs, _Resource const& __rhs) noexcept
+    _LIBCUDACXX_TRAILING_REQUIRES(bool)(__different_resource<cuda_managed_memory_resource, _Resource>)
+  {
+    return resource_ref<>{const_cast<cuda_managed_memory_resource&>(__lhs)}
+        != resource_ref<>{const_cast<_Resource&>(__rhs)};
+  }
+  /**
+   * @copydoc cuda_managed_memory_resource::operator<_Resource>==(cuda_managed_memory_resource const&, _Resource const&)
+   */
+  template <class _Resource>
+  _LIBCUDACXX_NODISCARD_FRIEND auto
+  operator!=(_Resource const& __rhs, cuda_managed_memory_resource const& __lhs) noexcept
+    _LIBCUDACXX_TRAILING_REQUIRES(bool)(__different_resource<cuda_managed_memory_resource, _Resource>)
+  {
+    return resource_ref<>{const_cast<cuda_managed_memory_resource&>(__lhs)}
+        != resource_ref<>{const_cast<_Resource&>(__rhs)};
+  }
+#  endif // _CCCL_STD_VER <= 2017
+
+  /**
+   * @brief Enables the `managed_memory` property
+   */
+  friend constexpr void get_property(cuda_managed_memory_resource const&, managed_memory) noexcept {}
+  /**
+   * @brief Enables the `device_accessible` property
+   */
+  friend constexpr void get_property(cuda_managed_memory_resource const&, device_accessible) noexcept {}
+  /**
+   * @brief Enables the `host_accessible` property
+   */
+  friend constexpr void get_property(cuda_managed_memory_resource const&, host_accessible) noexcept {}
+
+  /**
+   * @brief Checks whether the passed in alignment is valid
+   */
+  static constexpr bool __is_valid_alignment(const size_t __alignment) noexcept
+  {
+    return __alignment <= __default_cuda_malloc_alignment && (__default_cuda_malloc_alignment % __alignment == 0);
+  }
+};
+static_assert(resource_with<cuda_managed_memory_resource, managed_memory>, "");
+static_assert(resource_with<cuda_managed_memory_resource, device_accessible>, "");
+static_assert(resource_with<cuda_managed_memory_resource, host_accessible>, "");
+
+_LIBCUDACXX_END_NAMESPACE_CUDA_MR
+
+#endif // _CCCL_STD_VER >= 2014
+
+#endif // !_CCCL_COMPILER_MSVC_2017
+
+#endif //_CUDA__MEMORY_RESOURCE_CUDA_MANAGED_MEMORY_RESOURCE_H
diff --git a/libcudacxx/include/cuda/__memory_resource/properties.h b/libcudacxx/include/cuda/__memory_resource/properties.h
index 621f8e40764..b7a33e52d80 100644
--- a/libcudacxx/include/cuda/__memory_resource/properties.h
+++ b/libcudacxx/include/cuda/__memory_resource/properties.h
@@ -42,6 +42,11 @@ struct device_accessible
 struct host_accessible
 {};
 
+/// \struct managed_memory
+/// \brief The \c managed_memory property signals that the allocated memory is managed
+struct managed_memory
+{};
+
 _LIBCUDACXX_END_NAMESPACE_CUDA_MR
 
 #endif // _CCCL_STD_VER >= 2014
diff --git a/libcudacxx/include/cuda/memory_resource b/libcudacxx/include/cuda/memory_resource
index 14e776d157b..4a08a76d9b9 100644
--- a/libcudacxx/include/cuda/memory_resource
+++ b/libcudacxx/include/cuda/memory_resource
@@ -92,6 +92,7 @@ class resource_ref {
 #    pragma system_header
 #  endif // no system header
 
+#include <cuda/__memory_resource/cuda_managed_memory_resource.h>
 #include <cuda/__memory_resource/cuda_memory_resource.h>
 #include <cuda/__memory_resource/get_property.h>
 #include <cuda/__memory_resource/properties.h>
diff --git a/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_managed_memory_resource/allocate.pass.cpp b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_managed_memory_resource/allocate.pass.cpp
new file mode 100644
index 00000000000..70ddbcddc34
--- /dev/null
+++ b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_managed_memory_resource/allocate.pass.cpp
@@ -0,0 +1,82 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11
+// UNSUPPORTED: msvc-19.16
+// UNSUPPORTED: nvrtc
+
+#include <cuda/memory_resource>
+#include <cuda/std/cassert>
+#include <cuda/std/cstdint>
+#include <cuda/stream_ref>
+
+#include "test_macros.h"
+
+void ensure_device_ptr(void* ptr) {
+    assert(ptr != nullptr);
+    cudaPointerAttributes attributes;
+    cudaError_t status = cudaPointerGetAttributes (&attributes, ptr);
+    assert(status == cudaSuccess);
+    assert(attributes.type == cudaMemoryTypeManaged);
+}
+
+void test(const unsigned int flag) {
+  cuda::mr::cuda_managed_memory_resource res{flag};
+
+  { // allocate / deallocate
+    auto* ptr = res.allocate(42);
+    static_assert(cuda::std::is_same<decltype(ptr), void*>::value, "");
+    ensure_device_ptr(ptr);
+
+    res.deallocate(ptr, 42);
+  }
+
+  { // allocate / deallocate with alignment
+    auto* ptr = res.allocate(42, 4);
+    static_assert(cuda::std::is_same<decltype(ptr), void*>::value, "");
+    ensure_device_ptr(ptr);
+
+    res.deallocate(ptr, 42, 4);
+  }
+
+#ifndef TEST_HAS_NO_EXCEPTIONS
+  { // allocate with too small alignment
+    while(true) {
+      try {
+        auto* ptr = res.allocate(5, 42);
+      } catch(const cuda::error&) {
+        break;
+      }
+      assert(false);
+    }
+  }
+
+  { // allocate with non matching alignment
+    while(true) {
+      try {
+        auto* ptr = res.allocate(5, 1337);
+      } catch(const cuda::error&) {
+        break;
+      }
+      assert(false);
+    }
+  }
+#endif // TEST_HAS_NO_EXCEPTIONS
+}
+
+void test() {
+  test(cudaMemAttachGlobal);
+  test(cudaMemAttachHost);
+}
+
+int main(int, char**) {
+  NV_IF_TARGET(NV_IS_HOST, test();)
+  return 0;
+}
diff --git a/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_managed_memory_resource/equality.pass.cpp b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_managed_memory_resource/equality.pass.cpp
new file mode 100644
index 00000000000..faa743ee5f2
--- /dev/null
+++ b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_managed_memory_resource/equality.pass.cpp
@@ -0,0 +1,132 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11
+// UNSUPPORTED: msvc-19.16
+// UNSUPPORTED: nvrtc
+
+#include <cuda/memory_resource>
+#include <cuda/std/cassert>
+#include <cuda/std/cstdint>
+#include <cuda/stream_ref>
+
+enum class AccessibilityType {
+  Device,
+  Host,
+};
+
+template <AccessibilityType Accessibilty>
+struct resource {
+  void* allocate(size_t, size_t) { return nullptr; }
+  void deallocate(void*, size_t, size_t) {}
+
+  bool operator==(const resource&) const { return true; }
+  bool operator!=(const resource& other) const { return false; }
+
+  template <AccessibilityType Accessibilty2 = Accessibilty,
+            cuda::std::enable_if_t<Accessibilty2 == AccessibilityType::Device,
+                                   int> = 0>
+  friend void get_property(const resource&,
+                           cuda::mr::managed_memory) noexcept {}
+};
+static_assert(cuda::mr::resource<resource<AccessibilityType::Host> >, "");
+static_assert(!cuda::mr::resource_with<resource<AccessibilityType::Host>,
+                                       cuda::mr::managed_memory>,
+              "");
+static_assert(cuda::mr::resource<resource<AccessibilityType::Device> >, "");
+static_assert(cuda::mr::resource_with<resource<AccessibilityType::Device>,
+                                      cuda::mr::managed_memory>,
+              "");
+
+template <AccessibilityType Accessibilty>
+struct async_resource : public resource<Accessibilty> {
+  void* allocate_async(size_t, size_t, cuda::stream_ref) { return nullptr; }
+  void deallocate_async(void*, size_t, size_t, cuda::stream_ref) {}
+};
+static_assert(
+    cuda::mr::async_resource<async_resource<AccessibilityType::Host> >, "");
+static_assert(
+    !cuda::mr::async_resource_with<async_resource<AccessibilityType::Host>,
+                                   cuda::mr::managed_memory>,
+    "");
+static_assert(
+    cuda::mr::async_resource<async_resource<AccessibilityType::Device> >, "");
+static_assert(
+    cuda::mr::async_resource_with<async_resource<AccessibilityType::Device>,
+                                  cuda::mr::managed_memory>,
+    "");
+
+// Ensure that we can only
+
+void test() {
+  cuda::mr::cuda_managed_memory_resource first{};
+  { // comparison against a plain cuda_managed_memory_resource
+    cuda::mr::cuda_managed_memory_resource second{};
+    assert(first == second);
+    assert(!(first != second));
+  }
+
+  { // comparison against a plain cuda_managed_memory_resource with a different flag set
+    cuda::mr::cuda_managed_memory_resource second{cudaMemAttachHost};
+    assert(!(first == second));
+    assert((first != second));
+  }
+
+  { // comparison against a cuda_managed_memory_resource wrapped inside a resource_ref<managed_memory>
+    cuda::mr::cuda_managed_memory_resource second{};
+    assert(first == cuda::mr::resource_ref<cuda::mr::managed_memory>{second});
+    assert(!(first != cuda::mr::resource_ref<cuda::mr::managed_memory>{second}));
+    assert(cuda::mr::resource_ref<cuda::mr::managed_memory>{second} == first);
+    assert(!(cuda::mr::resource_ref<cuda::mr::managed_memory>{second} != first));
+  }
+
+  { // comparison against a cuda_managed_memory_resource wrapped inside a resource_ref<>
+    cuda::mr::cuda_managed_memory_resource second{};
+    assert(first == cuda::mr::resource_ref<>{second});
+    assert(!(first != cuda::mr::resource_ref<>{second}));
+    assert(cuda::mr::resource_ref<>{second} == first);
+    assert(!(cuda::mr::resource_ref<>{second} != first));
+  }
+
+  { // comparison against a different resource through resource_ref
+    resource<AccessibilityType::Host> host_resource{};
+    resource<AccessibilityType::Device> device_resource{};
+    assert(!(first == host_resource));
+    assert(first != host_resource);
+    assert(!(first == device_resource));
+    assert(first != device_resource);
+
+    assert(!(host_resource == first));
+    assert(host_resource != first);
+    assert(!(device_resource == first));
+    assert(device_resource != first);
+  }
+
+  { // comparison against a different resource through resource_ref
+    async_resource<AccessibilityType::Host> host_async_resource{};
+    async_resource<AccessibilityType::Device> device_async_resource{};
+    assert(!(first == host_async_resource));
+    assert(first != host_async_resource);
+    assert(!(first == device_async_resource));
+    assert(first != device_async_resource);
+
+    assert(!(host_async_resource == first));
+    assert(host_async_resource != first);
+    assert(!(device_async_resource == first));
+    assert(device_async_resource != first);
+  }
+}
+
+int main(int, char**) {
+  NV_IF_TARGET(NV_IS_HOST,
+    test();
+  )
+  return 0;
+}
diff --git a/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_managed_memory_resource/traits.pass.cpp b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_managed_memory_resource/traits.pass.cpp
new file mode 100644
index 00000000000..7e9dfea4ebd
--- /dev/null
+++ b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_managed_memory_resource/traits.pass.cpp
@@ -0,0 +1,30 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11
+// UNSUPPORTED: msvc-19.16
+// UNSUPPORTED: nvrtc
+
+#include <cuda/memory_resource>
+#include <cuda/std/type_traits>
+
+using resource = cuda::mr::cuda_managed_memory_resource;
+static_assert(!cuda::std::is_trivial<resource>::value, "");
+static_assert(!cuda::std::is_trivially_default_constructible<resource>::value, "");
+static_assert(cuda::std::is_trivially_copy_constructible<resource>::value, "");
+static_assert(cuda::std::is_trivially_move_constructible<resource>::value, "");
+static_assert(cuda::std::is_trivially_copy_assignable<resource>::value, "");
+static_assert(cuda::std::is_trivially_move_assignable<resource>::value, "");
+static_assert(cuda::std::is_trivially_destructible<resource>::value, "");
+static_assert(!cuda::std::is_empty<resource>::value, "");
+
+int main(int, char**) {
+  return 0;
+}