Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CMake CUDA features #9677

Merged
merged 5 commits into from
Oct 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 40 additions & 14 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@ cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
project(xgboost LANGUAGES CXX C VERSION 2.1.0)
include(cmake/Utils.cmake)
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
cmake_policy(SET CMP0022 NEW)
cmake_policy(SET CMP0079 NEW)
cmake_policy(SET CMP0076 NEW)
set(CMAKE_POLICY_DEFAULT_CMP0063 NEW)
cmake_policy(SET CMP0063 NEW)

if((${CMAKE_VERSION} VERSION_GREATER 3.13) OR (${CMAKE_VERSION} VERSION_EQUAL 3.13))
cmake_policy(SET CMP0077 NEW)
endif()
# These policies are already set from 3.18 but we still need to set the policy
# default variables here for lower minimum versions in the submodules
set(CMAKE_POLICY_DEFAULT_CMP0063 NEW)
set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
chuckatkins marked this conversation as resolved.
Show resolved Hide resolved
set(CMAKE_POLICY_DEFAULT_CMP0076 NEW)
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
set(CMAKE_POLICY_DEFAULT_CMP0079 NEW)

message(STATUS "CMake version ${CMAKE_VERSION}")

Expand Down Expand Up @@ -41,6 +40,8 @@ write_version()
set_default_configuration_release()

#-- Options
include(CMakeDependentOption)

## User options
option(BUILD_C_DOC "Build documentation for C APIs using Doxygen." OFF)
option(USE_OPENMP "Build with OpenMP support." ON)
Expand Down Expand Up @@ -69,8 +70,24 @@ option(USE_CUDA "Build with GPU acceleration" OFF)
option(USE_PER_THREAD_DEFAULT_STREAM "Build with per-thread default stream" ON)
option(USE_NCCL "Build with NCCL to enable distributed GPU support." OFF)
option(BUILD_WITH_SHARED_NCCL "Build with shared NCCL library." OFF)
set(GPU_COMPUTE_VER "" CACHE STRING
"Semicolon separated list of compute versions to be built against, e.g. '35;61'")
if(USE_CUDA)
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES AND NOT DEFINED ENV{CUDAARCHS})
set(GPU_COMPUTE_VER "" CACHE STRING
"Semicolon separated list of compute versions to be built against, e.g. '35;61'")
else()
# Clear any cached values from previous runs
unset(GPU_COMPUTE_VER)
unset(GPU_COMPUTE_VER CACHE)
endif()
endif()
# CUDA device LTO was introduced in CMake v3.25 and requires host LTO to also be enabled but can still
# be explicitly disabled allowing for LTO on host only, host and device, or neither, but device-only LTO
# is not a supproted configuration
cmake_dependent_option(USE_CUDA_LTO
"Enable link-time optimization for CUDA device code"
"${CMAKE_INTERPROCEDURAL_OPTIMIZATION}"
"CMAKE_VERSION VERSION_GREATER_EQUAL 3.25;USE_CUDA;CMAKE_INTERPROCEDURAL_OPTIMIZATION"
OFF)
## Sanitizers
option(USE_SANITIZER "Use santizer flags" OFF)
option(SANITIZER_PATH "Path to sanitizes.")
Expand Down Expand Up @@ -168,15 +185,24 @@ endif()
if(USE_CUDA)
set(USE_OPENMP ON CACHE BOOL "CUDA requires OpenMP" FORCE)
# `export CXX=' is ignored by CMake CUDA.
set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
message(STATUS "Configured CUDA host compiler: ${CMAKE_CUDA_HOST_COMPILER}")
if(NOT DEFINED CMAKE_CUDA_HOST_COMPILER AND NOT DEFINED ENV{CUDAHOSTCXX})
set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER} CACHE FILEPATH
"The compiler executable to use when compiling host code for CUDA or HIP language files.")
mark_as_advanced(CMAKE_CUDA_HOST_COMPILER)
message(STATUS "Configured CUDA host compiler: ${CMAKE_CUDA_HOST_COMPILER}")
endif()

if(NOT DEFINED CMAKE_CUDA_RUNTIME_LIBRARY)
set(CMAKE_CUDA_RUNTIME_LIBRARY Static)
endif()

enable_language(CUDA)
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 11.0)
message(FATAL_ERROR "CUDA version must be at least 11.0!")
endif()
set(GEN_CODE "")
format_gencode_flags("${GPU_COMPUTE_VER}" GEN_CODE)
if(DEFINED GPU_COMPUTE_VER)
compute_cmake_cuda_archs("${GPU_COMPUTE_VER}")
endif()
add_subdirectory(${PROJECT_SOURCE_DIR}/gputreeshap)

find_package(CUDAToolkit REQUIRED)
Expand Down
62 changes: 26 additions & 36 deletions cmake/Utils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -82,54 +82,42 @@ function(set_default_configuration_release)
endif()
endfunction()

# Generate nvcc compiler flags given a list of architectures
# Generate CMAKE_CUDA_ARCHITECTURES form a list of architectures
# Also generates PTX for the most recent architecture for forwards compatibility
function(format_gencode_flags flags out)
function(compute_cmake_cuda_archs archs)
if(CMAKE_CUDA_COMPILER_VERSION MATCHES "^([0-9]+\\.[0-9]+)")
set(CUDA_VERSION "${CMAKE_MATCH_1}")
endif()
# Set up architecture flags
if(NOT flags)
list(SORT archs)
unset(CMAKE_CUDA_ARCHITECTURES CACHE)
set(CMAKE_CUDA_ARCHITECTURES ${archs})

# Set up defaults based on CUDA varsion
if(NOT CMAKE_CUDA_ARCHITECTURES)
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8")
set(flags "50;60;70;80;90")
set(CMAKE_CUDA_ARCHITECTURES 50 60 70 80 90)
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
set(flags "50;60;70;80")
set(CMAKE_CUDA_ARCHITECTURES 50 60 70 80)
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
set(flags "35;50;60;70")
set(CMAKE_CUDA_ARCHITECTURES 35 50 60 70)
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "9.0")
set(flags "35;50;60;70")
set(CMAKE_CUDA_ARCHITECTURES 35 50 60 70)
else()
set(flags "35;50;60")
set(CMAKE_CUDA_ARCHITECTURES 35 50 60)
endif()
endif()

if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
cmake_policy(SET CMP0104 NEW)
chuckatkins marked this conversation as resolved.
Show resolved Hide resolved
list(GET flags -1 latest_arch)
list(TRANSFORM flags APPEND "-real")
list(APPEND flags ${latest_arch})
set(CMAKE_CUDA_ARCHITECTURES ${flags})
set(CMAKE_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" PARENT_SCOPE)
message(STATUS "CMAKE_CUDA_ARCHITECTURES: ${CMAKE_CUDA_ARCHITECTURES}")
else()
# Generate SASS
foreach(ver ${flags})
set(${out} "${${out}}--generate-code=arch=compute_${ver},code=sm_${ver};")
endforeach()
# Generate PTX for last architecture
list(GET flags -1 ver)
set(${out} "${${out}}--generate-code=arch=compute_${ver},code=compute_${ver};")
set(${out} "${${out}}" PARENT_SCOPE)
message(STATUS "CUDA GEN_CODE: ${GEN_CODE}")
endif()
list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND "-real")
list(TRANSFORM CMAKE_CUDA_ARCHITECTURES REPLACE "([0-9]+)-real" "\\0;\\1-virtual" AT -1)
set(CMAKE_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" PARENT_SCOPE)
message(STATUS "CMAKE_CUDA_ARCHITECTURES: ${CMAKE_CUDA_ARCHITECTURES}")
endfunction()

# Set CUDA related flags to target. Must be used after code `format_gencode_flags`.
function(xgboost_set_cuda_flags target)
target_compile_options(${target} PRIVATE
$<$<COMPILE_LANGUAGE:CUDA>:--expt-extended-lambda>
$<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>
$<$<COMPILE_LANGUAGE:CUDA>:${GEN_CODE}>
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=${OpenMP_CXX_FLAGS}>
$<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all>)

Expand All @@ -138,10 +126,6 @@ function(xgboost_set_cuda_flags target)
$<$<COMPILE_LANGUAGE:CUDA>:--default-stream per-thread>)
endif()

if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
set_property(TARGET ${target} PROPERTY CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES})
endif()

if(FORCE_COLORED_OUTPUT)
if(FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR
Expand Down Expand Up @@ -176,9 +160,15 @@ function(xgboost_set_cuda_flags target)

set_target_properties(${target} PROPERTIES
CUDA_STANDARD 17
CUDA_STANDARD_REQUIRED ON
CUDA_SEPARABLE_COMPILATION OFF
CUDA_RUNTIME_LIBRARY Static)
CUDA_STANDARD_REQUIRED ON)
if(USE_CUDA_LTO)
set_target_properties(${target} PROPERTIES
INTERPROCEDURAL_OPTIMIZATION ON
CUDA_SEPARABLE_COMPILATION ON)
chuckatkins marked this conversation as resolved.
Show resolved Hide resolved
else()
set_target_properties(${target} PROPERTIES
CUDA_SEPARABLE_COMPILATION OFF)
endif()
endfunction()

macro(xgboost_link_nccl target)
Expand Down
4 changes: 1 addition & 3 deletions tests/ci_build/prune_libnccl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,8 @@ cmake_policy(SET CMP0104 NEW)
set(CMAKE_CUDA_HOST_COMPILER \${CMAKE_CXX_COMPILER})
enable_language(CUDA)
include(../cmake/Utils.cmake)
set(GEN_CODE "")
format_gencode_flags("" GEN_CODE)
compute_cmake_cuda_archs("")
add_library(test OBJECT test.cu)
set_property(TARGET test PROPERTY CUDA_ARCHITECTURES \${CMAKE_CUDA_ARCHITECTURES})
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
EOF

Expand Down
Loading