Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CUDA vector_add sample project #2160

Merged
merged 48 commits into from
Aug 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
6634ffb
add the CUDA vector addition sample
ericniebler Jul 4, 2024
2d9f364
Remove unnecessary sample helpers
pciolkosz Aug 1, 2024
e872ca2
Merge remote-tracking branch 'origin/main' into cudax-samples
ericniebler Aug 2, 2024
93a82ce
use a specific cuda architecture instead of `native`
ericniebler Aug 2, 2024
59ea51d
use `cuda::launch` instead of launching the kernel directly
ericniebler Aug 2, 2024
e62220a
use thrust's host_ and device_vector types in the cudax sample for now
ericniebler Aug 3, 2024
adb634d
use a temporary `launch_ex` fn that applies an arg transform
ericniebler Aug 3, 2024
29732bf
minor cleanup
ericniebler Aug 3, 2024
6f13b40
Merge remote-tracking branch 'origin/main' into cudax-samples
ericniebler Aug 7, 2024
dbd7a68
use `__launch_transform` in the `vector_add` sample
ericniebler Aug 7, 2024
9ed5532
mock up a cudax::vector and the in/out annotations
ericniebler Aug 7, 2024
52e6c7e
a working example with vector, in/out, and launch
ericniebler Aug 8, 2024
82db01d
insert a sync stream at the right place
ericniebler Aug 8, 2024
f587bc9
add missing include directory
ericniebler Aug 8, 2024
4732a80
i do not like cmake
ericniebler Aug 8, 2024
c73f856
add missing header
ericniebler Aug 8, 2024
ef0b399
add explicit device selection
ericniebler Aug 8, 2024
4d1ad50
try to fix msvc build break
ericniebler Aug 8, 2024
fad5e66
try again
ericniebler Aug 8, 2024
e817699
Merge remote-tracking branch 'origin/main' into cudax-samples
ericniebler Aug 8, 2024
2500b30
cmake is evil
ericniebler Aug 8, 2024
56247ca
once more with feeling
ericniebler Aug 8, 2024
388d57c
again
ericniebler Aug 8, 2024
1d74986
again
ericniebler Aug 8, 2024
791d13c
ah, enable language CXX
ericniebler Aug 8, 2024
1ff49c6
again
ericniebler Aug 9, 2024
fc27771
try c++ 20
ericniebler Aug 9, 2024
2fc597d
better?
ericniebler Aug 9, 2024
285333e
maybe this?
ericniebler Aug 9, 2024
8a200a2
will it ever end?
ericniebler Aug 9, 2024
600dde1
wassup?
ericniebler Aug 9, 2024
0f2494e
work around msvc non-conformance
ericniebler Aug 9, 2024
87e67f3
very close now i think
ericniebler Aug 9, 2024
b43b90b
use msvc with conforming preprocessor
ericniebler Aug 9, 2024
ab29482
cmake string strangeness
ericniebler Aug 9, 2024
9850ef5
here i go again
ericniebler Aug 9, 2024
da3120d
try c++20
ericniebler Aug 9, 2024
505545b
only require c++20 when using msvc
ericniebler Aug 9, 2024
ca9d544
Replace the mdspan concept emulation with libcu++ one
miscco Aug 9, 2024
6ffa2ae
Fix formatting
miscco Aug 9, 2024
ac8e6d8
Fix issues with concept emulation
miscco Aug 9, 2024
9a13c77
Try and work around issue with nvcc deduction failure
miscco Aug 9, 2024
e2e7354
Drop the whole macro
miscco Aug 9, 2024
4be1ee9
drop more concept emulation
miscco Aug 9, 2024
bdbd29e
Fix one more issue with `is_always_strided`
miscco Aug 9, 2024
457e0d9
Merge branch 'main' into pr/ericniebler/2160
miscco Aug 9, 2024
6656965
Merge remote-tracking branch 'origin/main' into cudax-samples
ericniebler Aug 9, 2024
f9580c8
Merge branch 'cudax-samples' of github.com:ericniebler/cccl into cuda…
ericniebler Aug 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion cudax/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ if (cudax_TOPLEVEL_PROJECT)
cmake_minimum_required(VERSION 3.21)
endif()

project(cudax LANGUAGES CUDA)
project(cudax LANGUAGES CUDA CXX)

option(cudax_ENABLE_INSTALL_RULES "Enable installation of CUDA Experimental." ${cudax_TOPLEVEL_PROJECT})
if (cudax_ENABLE_INSTALL_RULES)
Expand All @@ -25,6 +25,7 @@ endif()

option(cudax_ENABLE_HEADER_TESTING "Test that CUDA Experimental's public headers compile." ON)
option(cudax_ENABLE_TESTING "Build CUDA Experimental's tests." ON)
option(cudax_ENABLE_SAMPLES "Build CUDA Experimental's samples." ON)

include(cmake/cudaxBuildCompilerTargets.cmake)
include(cmake/cudaxBuildTargetList.cmake)
Expand All @@ -41,3 +42,13 @@ if (cudax_ENABLE_TESTING)
enable_testing() # Must be in root directory
add_subdirectory(test)
endif()

if (cudax_ENABLE_SAMPLES)
include(ExternalProject)
ExternalProject_Add(cudax_samples
PREFIX samples
SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/samples"
BUILD_ALWAYS ON
INSTALL_COMMAND cmake -E echo "Skipping install step.")
add_dependencies(cudax.all cudax_samples)
endif()
2 changes: 1 addition & 1 deletion cudax/cmake/cudaxBuildCompilerTargets.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
include("${cudax_SOURCE_DIR}/cmake/AppendOptionIfAvailable.cmake")

function(cudax_build_compiler_targets)
set(cxx_compile_definitions)
set(cxx_compile_definitions LIBCUDACXX_ENABLE_EXCEPTIONS)
set(cxx_compile_options)
set(cuda_compile_options)

Expand Down
1 change: 1 addition & 0 deletions cudax/cmake/cudaxBuildTargetList.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ function(cudax_build_target_list)
file(GLOB_RECURSE all_sources
RELATIVE "${CMAKE_CURRENT_LIST_DIR}"
"${cudax_SOURCE_DIR}/include/cuda/experimental/*.hpp"
"${cudax_SOURCE_DIR}/include/cuda/experimental/*.cuh"
)
add_custom_target(cudax.all SOURCES ${all_sources})

Expand Down
19 changes: 15 additions & 4 deletions cudax/include/cuda/experimental/__detail/utility.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,31 @@ namespace cuda::experimental
{
namespace detail
{
struct __ignore
// This is a helper type that can be used to ignore function arguments.
struct [[maybe_unused]] __ignore
{
template <typename... Args>
_CCCL_HOST_DEVICE constexpr __ignore(Args&&...) noexcept
__ignore() = default;

template <typename _Arg>
_CCCL_HOST_DEVICE constexpr __ignore(_Arg&&) noexcept
{}
};

// Classes can inherit from this type to become immovable.
struct __immovable
{
__immovable() = default;
__immovable(__immovable&&) = delete;
__immovable& operator=(__immovable&&) = delete;
};
} // namespace detail

struct uninit_t
{
explicit uninit_t() = default;
};

inline constexpr uninit_t uninit{};
_CCCL_GLOBAL_CONSTANT uninit_t uninit{};
} // namespace cuda::experimental

#endif // __CUDAX_DETAIL_UTILITY_H
85 changes: 85 additions & 0 deletions cudax/include/cuda/experimental/__launch/param_kind.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
//===----------------------------------------------------------------------===//
//
// Part of CUDA Experimental in CUDA C++ Core Libraries,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef _CUDAX__LAUNCH_PARAM_KIND
#define _CUDAX__LAUNCH_PARAM_KIND

#include <cuda/__cccl_config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cuda/std/__type_traits/maybe_const.h>

#include <cuda/experimental/__detail/utility.cuh>

namespace cuda::experimental
{
namespace detail
{
enum class __param_kind : unsigned
{
_in = 1,
_out = 2,
_inout = 3
};

_CCCL_NODISCARD _CCCL_HOST_DEVICE inline constexpr __param_kind operator&(__param_kind __a, __param_kind __b) noexcept
{
return __param_kind(unsigned(__a) & unsigned(__b));
}

template <typename _Ty, __param_kind _Kind>
struct _CCCL_NODISCARD __box
{
::cuda::std::__maybe_const<_Kind == __param_kind::_in, _Ty>& __val;
};

struct __in_t
{
template <class _Ty>
__box<_Ty, __param_kind::_in> operator()(const _Ty& __v) const noexcept
{
return {__v};
}
};

struct __out_t
{
template <class _Ty>
__box<_Ty, __param_kind::_out> operator()(_Ty& __v) const noexcept
{
return {__v};
}
};

struct __inout_t
{
template <class _Ty>
__box<_Ty, __param_kind::_inout> operator()(_Ty& __v) const noexcept
{
return {__v};
}
};

} // namespace detail

_CCCL_GLOBAL_CONSTANT detail::__in_t in{};
_CCCL_GLOBAL_CONSTANT detail::__out_t out{};
_CCCL_GLOBAL_CONSTANT detail::__inout_t inout{};

} // namespace cuda::experimental

#endif // _CUDAX__LAUNCH_PARAM_KIND
3 changes: 3 additions & 0 deletions cudax/include/cuda/experimental/launch.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
#ifndef __CUDAX_LAUNCH___
#define __CUDAX_LAUNCH___

#include <cuda/experimental/__launch/configuration.cuh>
#include <cuda/experimental/__launch/launch.cuh>
#include <cuda/experimental/__launch/launch_transform.cuh>
#include <cuda/experimental/__launch/param_kind.cuh>

#endif // __CUDAX_LAUNCH___
76 changes: 76 additions & 0 deletions cudax/samples/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

cmake_minimum_required(VERSION 3.14 FATAL_ERROR)

project(CUDAX_SAMPLES CUDA CXX)

# This example uses the CMake Package Manager (CPM) to simplify fetching CCCL from GitHub
# For more information, see https://github.com/cpm-cmake/CPM.cmake
include(cmake/CPM.cmake)

# We define these as variables so they can be overriden in CI to pull from a PR instead of CCCL `main`
# In your project, these variables are unncessary and you can just use the values directly
set(CCCL_REPOSITORY "nvidia/cccl" CACHE STRING "GitHub repository to fetch CCCL from")
set(CCCL_TAG "main" CACHE STRING "Git tag/branch to fetch from CCCL repository")

# This will automatically clone CCCL from GitHub and make the exported cmake targets available
CPMAddPackage(
NAME CCCL
GITHUB_REPOSITORY ${CCCL_REPOSITORY}
GIT_TAG ${CCCL_TAG}
GIT_SHALLOW ON
OPTIONS "CCCL_ENABLE_UNSTABLE ON"
)

# Default to building for the GPU on the current system
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES 86)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem isn't the architecture value, it's that the way this test is set up, it requires running on a GPU runner, but is ending up on a CPU runner.

@alliepiper can help you get it sorted.

endif()

# Creates a cmake executable target for the main program
add_executable(vector_add vector_add/vector_add.cu)

# "Links" the CCCL::cudax CMake target to the `vector_add` executable. This
# configures everything needed to use CCCL's headers, including setting up
# include paths, compiler flags, etc.
target_link_libraries(vector_add
PUBLIC
CCCL::cudax
CCCL::CCCL
CCCL::Thrust
CCCL::libcudacxx
INTERFACE cudax.compiler_interface
)

# TODO: These are temporary until the main branch catches up with the latest changes
target_compile_definitions(vector_add PUBLIC LIBCUDACXX_ENABLE_EXCEPTIONS)

if ("MSVC" STREQUAL "${CMAKE_CXX_COMPILER_ID}")
# mdspan on windows only works in C++20 mode
target_compile_features(vector_add PUBLIC cxx_std_20)

# cudax requires dim3 to be usable from a constexpr context, and the CUDART headers require
# __cplusplus to be defined for this to work:
target_compile_options(vector_add PRIVATE
$<$<COMPILE_LANGUAGE:CXX>:/Zc:__cplusplus /Zc:preprocessor>
$<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:-Xcompiler=/Zc:__cplusplus -Xcompiler=/Zc:preprocessor>
)
endif()

# This is only relevant for internal testing and not needed by end users.
include(CTest)
enable_testing()
add_test(NAME vector_add COMMAND vector_add)
33 changes: 33 additions & 0 deletions cudax/samples/cmake/CPM.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
set(CPM_DOWNLOAD_VERSION 0.38.1)

if(CPM_SOURCE_CACHE)
set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
elseif(DEFINED ENV{CPM_SOURCE_CACHE})
set(CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
else()
set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
endif()

# Expand relative path. This is important if the provided path contains a tilde (~)
get_filename_component(CPM_DOWNLOAD_LOCATION ${CPM_DOWNLOAD_LOCATION} ABSOLUTE)

function(download_cpm)
message(STATUS "Downloading CPM.cmake to ${CPM_DOWNLOAD_LOCATION}")
file(DOWNLOAD
https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake
${CPM_DOWNLOAD_LOCATION}
)
endfunction()

if(NOT (EXISTS ${CPM_DOWNLOAD_LOCATION}))
download_cpm()
else()
# resume download if it previously failed
file(READ ${CPM_DOWNLOAD_LOCATION} check)
if("${check}" STREQUAL "")
download_cpm()
endif()
unset(check)
endif()

include(${CPM_DOWNLOAD_LOCATION})
85 changes: 85 additions & 0 deletions cudax/samples/vector_add/param_kind.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
//===----------------------------------------------------------------------===//
//
// Part of CUDA Experimental in CUDA C++ Core Libraries,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef _CUDAX__LAUNCH_PARAM_KIND
#define _CUDAX__LAUNCH_PARAM_KIND

#include <cuda/__cccl_config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cuda/std/__type_traits/maybe_const.h>

#include <cuda/experimental/__detail/utility.cuh>

namespace cuda::experimental
{
namespace detail
{
enum class __param_kind : unsigned
{
_in = 1,
_out = 2,
_inout = 3
};

_CCCL_NODISCARD _CCCL_HOST_DEVICE inline constexpr __param_kind operator&(__param_kind __a, __param_kind __b) noexcept
{
return __param_kind(unsigned(__a) & unsigned(__b));
}

template <typename _Ty, __param_kind _Kind>
struct _CCCL_NODISCARD __box
{
::cuda::std::__maybe_const<_Kind == __param_kind::_in, _Ty>& __val;
};

struct __in_t
{
template <class _Ty>
__box<_Ty, __param_kind::_in> operator()(const _Ty& __v) const noexcept
{
return {__v};
}
};

struct __out_t
{
template <class _Ty>
__box<_Ty, __param_kind::_out> operator()(_Ty& __v) const noexcept
{
return {__v};
}
};

struct __inout_t
{
template <class _Ty>
__box<_Ty, __param_kind::_inout> operator()(_Ty& __v) const noexcept
{
return {__v};
}
};

} // namespace detail

_CCCL_GLOBAL_CONSTANT detail::__in_t in{};
_CCCL_GLOBAL_CONSTANT detail::__out_t out{};
_CCCL_GLOBAL_CONSTANT detail::__inout_t inout{};

} // namespace cuda::experimental

#endif // _CUDAX__LAUNCH_PARAM_KIND
Loading
Loading