NVIDIA · ericniebler · Aug 9, 2024 · Jul 4, 2024 · Aug 1, 2024 · Aug 2, 2024
@@ -25,6 +25,7 @@ endif()
 
 option(cudax_ENABLE_HEADER_TESTING "Test that CUDA Experimental's public headers compile." ON)
 option(cudax_ENABLE_TESTING "Build CUDA Experimental's tests." ON)
+option(cudax_ENABLE_SAMPLES "Build CUDA Experimental's samples." ON)
 
 include(cmake/cudaxBuildCompilerTargets.cmake)
 include(cmake/cudaxBuildTargetList.cmake)
@@ -41,3 +42,13 @@ if (cudax_ENABLE_TESTING)
   enable_testing() # Must be in root directory
   add_subdirectory(test)
 endif()
+
+if (cudax_ENABLE_SAMPLES)
+  include(ExternalProject)
+  ExternalProject_Add(cudax_samples
+    PREFIX samples
+    SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/samples"
+    BUILD_ALWAYS ON
+    INSTALL_COMMAND cmake -E echo "Skipping install step.")
+  add_dependencies(cudax.all cudax_samples)
+endif()
@@ -25,20 +25,31 @@ namespace cuda::experimental
 {
 namespace detail
 {
-struct __ignore
+// This is a helper type that can be used to ignore function arguments.
+struct [[maybe_unused]] __ignore
 {
-  template <typename... Args>
-  _CCCL_HOST_DEVICE constexpr __ignore(Args&&...) noexcept
+  __ignore() = default;
+
+  template <typename _Arg>
+  _CCCL_HOST_DEVICE constexpr __ignore(_Arg&&) noexcept
   {}
 };
+
+// Classes can inherit from this type to become immovable.
+struct __immovable
+{
+  __immovable()                         = default;
+  __immovable(__immovable&&)            = delete;
+  __immovable& operator=(__immovable&&) = delete;
+};
 } // namespace detail
 
 struct uninit_t
 {
   explicit uninit_t() = default;
 };
 
-inline constexpr uninit_t uninit{};
+_CCCL_GLOBAL_CONSTANT uninit_t uninit{};
 } // namespace cuda::experimental
 
 #endif // __CUDAX_DETAIL_UTILITY_H
@@ -0,0 +1,53 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
+
+project(CUDAX_SAMPLES CUDA)
+
+# This example uses the CMake Package Manager (CPM) to simplify fetching CCCL from GitHub
+# For more information, see https://github.com/cpm-cmake/CPM.cmake
+include(cmake/CPM.cmake)
+
+# We define these as variables so they can be overriden in CI to pull from a PR instead of CCCL `main`
+# In your project, these variables are unncessary and you can just use the values directly
+set(CCCL_REPOSITORY "nvidia/cccl" CACHE STRING "GitHub repository to fetch CCCL from")
+set(CCCL_TAG "main" CACHE STRING "Git tag/branch to fetch from CCCL repository")
+
+# This will automatically clone CCCL from GitHub and make the exported cmake targets available
+CPMAddPackage(
+    NAME CCCL
+    GITHUB_REPOSITORY ${CCCL_REPOSITORY}
+    GIT_TAG ${CCCL_TAG}
+)
+
+# Default to building for the GPU on the current system
+if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
+  set(CMAKE_CUDA_ARCHITECTURES 86)
+endif()
+
+# Creates a cmake executable target for the main program
+add_executable(vector_add vector_add/vector_add.cu)
+set_property(TARGET vector_add PROPERTY CXX_STANDARD 17)
+target_include_directories(vector_add PRIVATE ${CMAKE_SOURCE_DIR}/../include)
+
+# "Links" the CCCL Cmake target to the `vector_add` executable. This configures everything needed to use
+# CCCL headers, including setting up include paths, compiler flags, etc.
+target_link_libraries(vector_add PRIVATE CCCL::CCCL)
+
+# This is only relevant for internal testing and not needed by end users.
+include(CTest)
+enable_testing()
+add_test(NAME vector_add COMMAND vector_add)
@@ -0,0 +1,33 @@
+set(CPM_DOWNLOAD_VERSION 0.38.1)
+
+if(CPM_SOURCE_CACHE)
+  set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
+elseif(DEFINED ENV{CPM_SOURCE_CACHE})
+  set(CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
+else()
+  set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
+endif()
+
+# Expand relative path. This is important if the provided path contains a tilde (~)
+get_filename_component(CPM_DOWNLOAD_LOCATION ${CPM_DOWNLOAD_LOCATION} ABSOLUTE)
+
+function(download_cpm)
+  message(STATUS "Downloading CPM.cmake to ${CPM_DOWNLOAD_LOCATION}")
+  file(DOWNLOAD
+       https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake
+       ${CPM_DOWNLOAD_LOCATION}
+  )
+endfunction()
+
+if(NOT (EXISTS ${CPM_DOWNLOAD_LOCATION}))
+  download_cpm()
+else()
+  # resume download if it previously failed
+  file(READ ${CPM_DOWNLOAD_LOCATION} check)
+  if("${check}" STREQUAL "")
+    download_cpm()
+  endif()
+  unset(check)
+endif()
+
+include(${CPM_DOWNLOAD_LOCATION})
@@ -0,0 +1,185 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDA Experimental in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDAX__CONTAINER_VECTOR
+#define _CUDAX__CONTAINER_VECTOR
+
+#include <cuda/__cccl_config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <thrust/device_vector.h>
+#include <thrust/host_vector.h>
+
+#include <cuda/std/__type_traits/maybe_const.h>
+#include <cuda/std/span>
+#include <cuda/stream_ref>
+
+#include <cuda/experimental/__detail/utility.cuh>
+
+#if 1 //_CCCL_STD_VER >= 2017
+namespace cuda::experimental
+{
+using ::cuda::std::span;
+using ::thrust::device_vector;
+using ::thrust::host_vector;
+
+namespace detail
+{
+template <typename _Ty>
+struct __in_box
+{
+  const _Ty& __val;
+};
+
+template <typename _Ty>
+struct __out_box
+{
+  _Ty& __val;
+};
+} // namespace detail
+
+template <typename _Ty>
+class vector
+{
+public:
+  vector() = default;
+  explicit vector(size_t n)
+      : __h_(n)
+  {}
+
+  _Ty& operator[](size_t i) noexcept
+  {
+    __dirty_ = true;
+    return __h_[i];
+  }
+
+  const _Ty& operator[](size_t i) const noexcept
+  {
+    return __h_[i];
+  }
+
+private:
+  enum class __param : unsigned
+  {
+    _in    = 1,
+    _out   = 2,
+    _inout = 3
+  };
+
+  _CCCL_NODISCARD_FRIEND _CCCL_HOST_DEVICE constexpr __param operator&(__param __a, __param __b) noexcept
+  {
+    return __param(unsigned(__a) & unsigned(__b));
+  }
+
+  void sync_host_to_device() const
+  {
+    if (__dirty_)
+    {
+      printf("sync_host_to_device\n");
+      __d_     = __h_;
+      __dirty_ = false;
+    }
+  }
+
+  void sync_device_to_host()
+  {
+    printf("sync_device_to_host\n");
+    __h_ = __d_;
+  }
+
+  template <__param _Param>
+  struct __action : detail::__immovable
+  {
+    static constexpr bool __mut = ((_Param & __param::_out) == __param::_out);
+    using __cv_vector           = ::cuda::std::__maybe_const<!__mut, vector>;
+
+    explicit __action(stream_ref __str, __cv_vector& __v) noexcept
+        : __str_(__str)
+        , __v_(__v)
+    {
+      printf("action()\n");
+      if constexpr ((_Param & __param::_in) == __param::_in)
+      {
+        __v_.sync_host_to_device();
+      }
+    }
+
+    ~__action()
+    {
+      printf("~action()\n");
+      if constexpr ((_Param & __param::_out) == __param::_out)
+      {
+        printf("about to synchronize the stream\n");
+        fflush(stdout);
+        __str_.wait(); // wait for the kernel to finish
+        printf("done synchronizing the stream\n");
+        fflush(stdout);
+        __v_.sync_device_to_host();
+      }
+    }
+
+    using __as_kernel_arg = ::cuda::std::span<_Ty>;
+
+    operator ::cuda::std::span<_Ty>()
+    {
+      printf("to span\n");
+      return {__v_.__d_.data().get(), __v_.__d_.size()};
+    }
+
+  public:
+    stream_ref __str_;
+    __cv_vector& __v_;
+  };
+
+  _CCCL_NODISCARD_FRIEND __action<__param::_inout> __cudax_launch_transform(stream_ref __str, const vector& __v) noexcept
+  {
+    return __action<__param::_inout>{__str, __v};
+  }
+
+  _CCCL_NODISCARD_FRIEND __action<__param::_in>
+  __cudax_launch_transform(stream_ref __str, detail::__in_box<vector> __b) noexcept
+  {
+    return __action<__param::_in>{__str, __b.__val};
+  }
+
+  _CCCL_NODISCARD_FRIEND __action<__param::_out>
+  __cudax_launch_transform(stream_ref __str, detail::__out_box<vector> __b) noexcept
+  {
+    return __action<__param::_out>{__str, __b.__val};
+  }
+
+  host_vector<_Ty> __h_;
+  mutable device_vector<_Ty> __d_{};
+  mutable bool __dirty_ = true;
+};
+
+template <class _Ty>
+detail::__in_box<_Ty> in(const _Ty& __v) noexcept
+{
+  return {__v};
+}
+
+template <class _Ty>
+detail::__out_box<_Ty> out(_Ty& __v) noexcept
+{
+  return {__v};
+}
+
+} // namespace cuda::experimental
+
+#endif
+#endif