Skip to content

Commit

Permalink
Implement cuda::mr::cuda_pinned_memory_resource
Browse files Browse the repository at this point in the history
Fixes Implement a memory_resource using cudaMallocHost and cudaFreeHost #1516
  • Loading branch information
miscco committed Apr 11, 2024
1 parent fa73fae commit c42ff8f
Show file tree
Hide file tree
Showing 6 changed files with 459 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
//===----------------------------------------------------------------------===//
//
// Part of libcu++, the C++ Standard Library for your entire system,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef _CUDA__MEMORY_RESOURCE_CUDA_PINNED_MEMORY_RESOURCE_H
#define _CUDA__MEMORY_RESOURCE_CUDA_PINNED_MEMORY_RESOURCE_H

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#if !defined(_CCCL_COMPILER_MSVC_2017)

# if !defined(_CCCL_CUDA_COMPILER_NVCC) && !defined(_CCCL_CUDA_COMPILER_NVHPC)
# include <cuda_runtime_api.h>
# endif // !_CCCL_CUDA_COMPILER_NVCC && !_CCCL_CUDA_COMPILER_NVHPC

# include <cuda/__memory_resource/get_property.h>
# include <cuda/__memory_resource/properties.h>
# include <cuda/__memory_resource/resource.h>
# include <cuda/__memory_resource/resource_ref.h>
# include <cuda/std/__cuda/api_wrapper.h>
# include <cuda/std/detail/libcxx/include/__new/bad_alloc.h>

# if _CCCL_STD_VER >= 2014

_LIBCUDACXX_BEGIN_NAMESPACE_CUDA_MR

/**
* @brief `cuda_pinned_memory_resource` uses cudaMallocHost / cudaFreeHost for allocation/deallocation.
*/
class cuda_pinned_memory_resource
{
private:
unsigned int __flags_ = cudaHostAllocDefault;

static constexpr unsigned int __available_flags =
cudaHostAllocDefault | cudaHostAllocPortable | cudaHostAllocMapped | cudaHostAllocWriteCombined;

public:
constexpr cuda_pinned_memory_resource(const unsigned int __flags = cudaHostAllocDefault) noexcept
: __flags_(__flags & __available_flags)
{
_LIBCUDACXX_ASSERT(__flags_ == __flags, "Unexpected flags passed to cuda_pinned_memory_resource");
}

/**
* @brief Allocate host memory of size at least \p __bytes.
* @param __bytes The size in bytes of the allocation.
* @param __alignment The requested alignment of the allocation.
* @throw cuda::cuda_error if allocation fails with a CUDA error.
* @return Pointer to the newly allocated memory
*/
void* allocate(const size_t __bytes, const size_t __alignment = default_cuda_malloc_host_alignment) const
{
// We need to ensure that the provided alignment matches the minimal provided alignment
if (!__is_valid_alignment(__alignment))
{
_CUDA_VSTD_NOVERSION::__throw_bad_alloc();
}

void* __ptr{nullptr};
_CCCL_TRY_CUDA_API(::cudaMallocHost, "Failed to allocate memory with cudaMallocHost.", &__ptr, __bytes, __flags_);
return __ptr;
}

/**
* @brief Deallocate memory pointed to by \p __ptr.
* @param __ptr Pointer to be deallocated. Must have been allocated through a call to `allocate`
* @param __bytes The number of bytes that was passed to the `allocate` call that returned \p __ptr.
* @param __alignment The alignment that was passed to the `allocate` call that returned \p __ptr.
*/
void deallocate(void* __ptr, const size_t, const size_t __alignment = default_cuda_malloc_host_alignment) const
{
// We need to ensure that the provided alignment matches the minimal provided alignment
_LIBCUDACXX_ASSERT(__is_valid_alignment(__alignment),
"Invalid alignment passed to cuda_memory_resource::deallocate.");
_CCCL_ASSERT_CUDA_API(::cudaFreeHost, "cuda_pinned_memory_resource::deallocate failed", __ptr);
(void) __alignment;
}

/**
* @brief Equality comparison with another cuda_pinned_memory_resource
* @return Whether both cuda_pinned_memory_resource were constructed with the same flags
*/
_LIBCUDACXX_NODISCARD_ATTRIBUTE constexpr bool operator==(cuda_pinned_memory_resource const& __other) const noexcept
{
return __flags_ == __other.__flags_;
}
# if _CCCL_STD_VER <= 2017
/**
* @brief Equality comparison with another cuda_pinned_memory_resource
* @return Whether both cuda_pinned_memory_resource were constructed with different flags
*/
_LIBCUDACXX_NODISCARD_ATTRIBUTE constexpr bool operator!=(cuda_pinned_memory_resource const& __other) const noexcept
{
return __flags_ != __other.__flags_;
}
# endif // _CCCL_STD_VER <= 2017

/**
* @brief Equality comparison between a cuda_memory_resource and another resource
* @param __lhs The cuda_memory_resource
* @param __rhs The resource to compare to
* @return If the underlying types are equality comparable, returns the result of equality comparison of both
* resources. Otherwise, returns false.
*/
template <class _Resource>
_LIBCUDACXX_NODISCARD_FRIEND auto operator==(cuda_pinned_memory_resource const& __lhs, _Resource const& __rhs) noexcept
_LIBCUDACXX_TRAILING_REQUIRES(bool)(__different_resource<cuda_pinned_memory_resource, _Resource>)
{
return resource_ref<>{const_cast<cuda_pinned_memory_resource&>(__lhs)}
== resource_ref<>{const_cast<_Resource&>(__rhs)};
}
# if _CCCL_STD_VER <= 2017
/**
* @copydoc cuda_pinned_memory_resource::operator<_Resource>==(cuda_pinned_memory_resource const&, _Resource const&)
*/
template <class _Resource>
_LIBCUDACXX_NODISCARD_FRIEND auto operator==(_Resource const& __rhs, cuda_pinned_memory_resource const& __lhs) noexcept
_LIBCUDACXX_TRAILING_REQUIRES(bool)(__different_resource<cuda_pinned_memory_resource, _Resource>)
{
return resource_ref<>{const_cast<cuda_pinned_memory_resource&>(__lhs)}
== resource_ref<>{const_cast<_Resource&>(__rhs)};
}
/**
* @copydoc cuda_pinned_memory_resource::operator<_Resource>==(cuda_pinned_memory_resource const&, _Resource const&)
*/
template <class _Resource>
_LIBCUDACXX_NODISCARD_FRIEND auto operator!=(cuda_pinned_memory_resource const& __lhs, _Resource const& __rhs) noexcept
_LIBCUDACXX_TRAILING_REQUIRES(bool)(__different_resource<cuda_pinned_memory_resource, _Resource>)
{
return resource_ref<>{const_cast<cuda_pinned_memory_resource&>(__lhs)}
!= resource_ref<>{const_cast<_Resource&>(__rhs)};
}
/**
* @copydoc cuda_pinned_memory_resource::operator<_Resource>==(cuda_pinned_memory_resource const&, _Resource const&)
*/
template <class _Resource>
_LIBCUDACXX_NODISCARD_FRIEND auto operator!=(_Resource const& __rhs, cuda_pinned_memory_resource const& __lhs) noexcept
_LIBCUDACXX_TRAILING_REQUIRES(bool)(__different_resource<cuda_pinned_memory_resource, _Resource>)
{
return resource_ref<>{const_cast<cuda_pinned_memory_resource&>(__lhs)}
!= resource_ref<>{const_cast<_Resource&>(__rhs)};
}
# endif // _CCCL_STD_VER <= 2017

/**
* @brief Enables the `pinned_memory` property
*/
friend constexpr void get_property(cuda_pinned_memory_resource const&, pinned_memory) noexcept {}
/**
* @brief Enables the `device_accessible` property
*/
friend constexpr void get_property(cuda_pinned_memory_resource const&, device_accessible) noexcept {}
/**
* @brief Enables the `host_accessible` property
*/
friend constexpr void get_property(cuda_pinned_memory_resource const&, host_accessible) noexcept {}

/**
* @brief Checks whether the passed in alignment is valid
*/
static constexpr bool __is_valid_alignment(const size_t __alignment) noexcept
{
return __alignment <= default_cuda_malloc_host_alignment && (default_cuda_malloc_host_alignment % __alignment == 0);
}
};
static_assert(resource_with<cuda_pinned_memory_resource, pinned_memory>, "");
static_assert(resource_with<cuda_pinned_memory_resource, device_accessible>, "");
static_assert(resource_with<cuda_pinned_memory_resource, host_accessible>, "");

_LIBCUDACXX_END_NAMESPACE_CUDA_MR

# endif // _CCCL_STD_VER >= 2014

#endif // !_CCCL_COMPILER_MSVC_2017

#endif //_CUDA__MEMORY_RESOURCE_CUDA_PINNED_MEMORY_RESOURCE_H
6 changes: 6 additions & 0 deletions libcudacxx/include/cuda/__memory_resource/properties.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ struct host_accessible
struct managed_memory
{};

/**
* @brief The \c pinned_memory property signals that the allocated memory is not pageable.
*/
struct pinned_memory
{};

_LIBCUDACXX_END_NAMESPACE_CUDA_MR

# endif // _CCCL_STD_VER >= 2014
Expand Down
1 change: 1 addition & 0 deletions libcudacxx/include/cuda/memory_resource
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ class resource_ref {

#include <cuda/__memory_resource/cuda_managed_memory_resource.h>
#include <cuda/__memory_resource/cuda_memory_resource.h>
#include <cuda/__memory_resource/cuda_pinned_memory_resource.h>
#include <cuda/__memory_resource/get_property.h>
#include <cuda/__memory_resource/properties.h>
#include <cuda/__memory_resource/resource.h>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
//===----------------------------------------------------------------------===//
//
// Part of libcu++, the C++ Standard Library for your entire system,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

// UNSUPPORTED: c++03, c++11
// UNSUPPORTED: msvc-19.16
// UNSUPPORTED: nvrtc

#include <cuda/memory_resource>
#include <cuda/std/cassert>
#include <cuda/std/cstdint>
#include <cuda/stream_ref>

#include "test_macros.h"

void ensure_pinned_host_ptr(void* ptr)
{
assert(ptr != nullptr);
cudaPointerAttributes attributes;
cudaError_t status = cudaPointerGetAttributes(&attributes, ptr);
assert(status == cudaSuccess);
assert((attributes.type == cudaMemoryTypeHost) && (attributes.devicePointer != nullptr));
}

void test(const unsigned int flag)
{
cuda::mr::cuda_pinned_memory_resource res{flag};

{ // allocate / deallocate
auto* ptr = res.allocate(42);
static_assert(cuda::std::is_same<decltype(ptr), void*>::value, "");
ensure_pinned_host_ptr(ptr);

res.deallocate(ptr, 42);
}

{ // allocate / deallocate with alignment
auto* ptr = res.allocate(42, 4);
static_assert(cuda::std::is_same<decltype(ptr), void*>::value, "");
ensure_pinned_host_ptr(ptr);

res.deallocate(ptr, 42, 4);
}

#ifndef TEST_HAS_NO_EXCEPTIONS
{ // allocate with too small alignment
while (true)
{
try
{
auto* ptr = res.allocate(5, 42);
}
catch (const cuda::std::bad_alloc&)
{
break;
}
assert(false);
}
}

{ // allocate with non matching alignment
while (true)
{
try
{
auto* ptr = res.allocate(5, 1337);
}
catch (const cuda::std::bad_alloc&)
{
break;
}
assert(false);
}
}
#endif // TEST_HAS_NO_EXCEPTIONS
}

void test()
{
test(cudaHostAllocDefault);
test(cudaHostAllocPortable);
test(cudaHostAllocMapped);
test(cudaHostAllocWriteCombined);
}

int main(int, char**)
{
NV_IF_TARGET(NV_IS_HOST, test();)
return 0;
}
Loading

0 comments on commit c42ff8f

Please sign in to comment.