Skip to content

Commit

Permalink
CMake LTO and CUDA arch (#9677)
Browse files Browse the repository at this point in the history
  • Loading branch information
chuckatkins committed Oct 20, 2023
1 parent 3b86260 commit 83cdf14
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 53 deletions.
54 changes: 40 additions & 14 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@ cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
project(xgboost LANGUAGES CXX C VERSION 2.1.0)
include(cmake/Utils.cmake)
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
cmake_policy(SET CMP0022 NEW)
cmake_policy(SET CMP0079 NEW)
cmake_policy(SET CMP0076 NEW)
set(CMAKE_POLICY_DEFAULT_CMP0063 NEW)
cmake_policy(SET CMP0063 NEW)

if((${CMAKE_VERSION} VERSION_GREATER 3.13) OR (${CMAKE_VERSION} VERSION_EQUAL 3.13))
cmake_policy(SET CMP0077 NEW)
endif()
# These policies are already set from 3.18 but we still need to set the policy
# default variables here for lower minimum versions in the submodules
set(CMAKE_POLICY_DEFAULT_CMP0063 NEW)
set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
set(CMAKE_POLICY_DEFAULT_CMP0076 NEW)
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
set(CMAKE_POLICY_DEFAULT_CMP0079 NEW)

message(STATUS "CMake version ${CMAKE_VERSION}")

Expand Down Expand Up @@ -41,6 +40,8 @@ write_version()
set_default_configuration_release()

#-- Options
include(CMakeDependentOption)

## User options
option(BUILD_C_DOC "Build documentation for C APIs using Doxygen." OFF)
option(USE_OPENMP "Build with OpenMP support." ON)
Expand Down Expand Up @@ -69,8 +70,24 @@ option(USE_CUDA "Build with GPU acceleration" OFF)
option(USE_PER_THREAD_DEFAULT_STREAM "Build with per-thread default stream" ON)
option(USE_NCCL "Build with NCCL to enable distributed GPU support." OFF)
option(BUILD_WITH_SHARED_NCCL "Build with shared NCCL library." OFF)
set(GPU_COMPUTE_VER "" CACHE STRING
"Semicolon separated list of compute versions to be built against, e.g. '35;61'")
if(USE_CUDA)
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES AND NOT DEFINED ENV{CUDAARCHS})
set(GPU_COMPUTE_VER "" CACHE STRING
"Semicolon separated list of compute versions to be built against, e.g. '35;61'")
else()
# Clear any cached values from previous runs
unset(GPU_COMPUTE_VER)
unset(GPU_COMPUTE_VER CACHE)
endif()
endif()
# CUDA device LTO was introduced in CMake v3.25 and requires host LTO to also be enabled but can still
# be explicitly disabled allowing for LTO on host only, host and device, or neither, but device-only LTO
# is not a supproted configuration
cmake_dependent_option(USE_CUDA_LTO
"Enable link-time optimization for CUDA device code"
"${CMAKE_INTERPROCEDURAL_OPTIMIZATION}"
"CMAKE_VERSION VERSION_GREATER_EQUAL 3.25;USE_CUDA;CMAKE_INTERPROCEDURAL_OPTIMIZATION"
OFF)
## Sanitizers
option(USE_SANITIZER "Use santizer flags" OFF)
option(SANITIZER_PATH "Path to sanitizes.")
Expand Down Expand Up @@ -168,15 +185,24 @@ endif()
if(USE_CUDA)
set(USE_OPENMP ON CACHE BOOL "CUDA requires OpenMP" FORCE)
# `export CXX=' is ignored by CMake CUDA.
set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
message(STATUS "Configured CUDA host compiler: ${CMAKE_CUDA_HOST_COMPILER}")
if(NOT DEFINED CMAKE_CUDA_HOST_COMPILER AND NOT DEFINED ENV{CUDAHOSTCXX})
set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER} CACHE FILEPATH
"The compiler executable to use when compiling host code for CUDA or HIP language files.")
mark_as_advanced(CMAKE_CUDA_HOST_COMPILER)
message(STATUS "Configured CUDA host compiler: ${CMAKE_CUDA_HOST_COMPILER}")
endif()

if(NOT DEFINED CMAKE_CUDA_RUNTIME_LIBRARY)
set(CMAKE_CUDA_RUNTIME_LIBRARY Static)
endif()

enable_language(CUDA)
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 11.0)
message(FATAL_ERROR "CUDA version must be at least 11.0!")
endif()
set(GEN_CODE "")
format_gencode_flags("${GPU_COMPUTE_VER}" GEN_CODE)
if(DEFINED GPU_COMPUTE_VER)
compute_cmake_cuda_archs("${GPU_COMPUTE_VER}")
endif()
add_subdirectory(${PROJECT_SOURCE_DIR}/gputreeshap)

find_package(CUDAToolkit REQUIRED)
Expand Down
62 changes: 26 additions & 36 deletions cmake/Utils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -82,54 +82,42 @@ function(set_default_configuration_release)
endif()
endfunction()

# Generate nvcc compiler flags given a list of architectures
# Generate CMAKE_CUDA_ARCHITECTURES form a list of architectures
# Also generates PTX for the most recent architecture for forwards compatibility
function(format_gencode_flags flags out)
function(compute_cmake_cuda_archs archs)
if(CMAKE_CUDA_COMPILER_VERSION MATCHES "^([0-9]+\\.[0-9]+)")
set(CUDA_VERSION "${CMAKE_MATCH_1}")
endif()
# Set up architecture flags
if(NOT flags)
list(SORT archs)
unset(CMAKE_CUDA_ARCHITECTURES CACHE)
set(CMAKE_CUDA_ARCHITECTURES ${archs})

# Set up defaults based on CUDA varsion
if(NOT CMAKE_CUDA_ARCHITECTURES)
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8")
set(flags "50;60;70;80;90")
set(CMAKE_CUDA_ARCHITECTURES 50 60 70 80 90)
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
set(flags "50;60;70;80")
set(CMAKE_CUDA_ARCHITECTURES 50 60 70 80)
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
set(flags "35;50;60;70")
set(CMAKE_CUDA_ARCHITECTURES 35 50 60 70)
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "9.0")
set(flags "35;50;60;70")
set(CMAKE_CUDA_ARCHITECTURES 35 50 60 70)
else()
set(flags "35;50;60")
set(CMAKE_CUDA_ARCHITECTURES 35 50 60)
endif()
endif()

if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
cmake_policy(SET CMP0104 NEW)
list(GET flags -1 latest_arch)
list(TRANSFORM flags APPEND "-real")
list(APPEND flags ${latest_arch})
set(CMAKE_CUDA_ARCHITECTURES ${flags})
set(CMAKE_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" PARENT_SCOPE)
message(STATUS "CMAKE_CUDA_ARCHITECTURES: ${CMAKE_CUDA_ARCHITECTURES}")
else()
# Generate SASS
foreach(ver ${flags})
set(${out} "${${out}}--generate-code=arch=compute_${ver},code=sm_${ver};")
endforeach()
# Generate PTX for last architecture
list(GET flags -1 ver)
set(${out} "${${out}}--generate-code=arch=compute_${ver},code=compute_${ver};")
set(${out} "${${out}}" PARENT_SCOPE)
message(STATUS "CUDA GEN_CODE: ${GEN_CODE}")
endif()
list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND "-real")
list(TRANSFORM CMAKE_CUDA_ARCHITECTURES REPLACE "([0-9]+)-real" "\\0;\\1-virtual" AT -1)
set(CMAKE_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" PARENT_SCOPE)
message(STATUS "CMAKE_CUDA_ARCHITECTURES: ${CMAKE_CUDA_ARCHITECTURES}")
endfunction()

# Set CUDA related flags to target. Must be used after code `format_gencode_flags`.
function(xgboost_set_cuda_flags target)
target_compile_options(${target} PRIVATE
$<$<COMPILE_LANGUAGE:CUDA>:--expt-extended-lambda>
$<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>
$<$<COMPILE_LANGUAGE:CUDA>:${GEN_CODE}>
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=${OpenMP_CXX_FLAGS}>
$<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all>)

Expand All @@ -138,10 +126,6 @@ function(xgboost_set_cuda_flags target)
$<$<COMPILE_LANGUAGE:CUDA>:--default-stream per-thread>)
endif()

if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
set_property(TARGET ${target} PROPERTY CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES})
endif()

if(FORCE_COLORED_OUTPUT)
if(FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR
Expand Down Expand Up @@ -176,9 +160,15 @@ function(xgboost_set_cuda_flags target)

set_target_properties(${target} PROPERTIES
CUDA_STANDARD 17
CUDA_STANDARD_REQUIRED ON
CUDA_SEPARABLE_COMPILATION OFF
CUDA_RUNTIME_LIBRARY Static)
CUDA_STANDARD_REQUIRED ON)
if(USE_CUDA_LTO)
set_target_properties(${target} PROPERTIES
INTERPROCEDURAL_OPTIMIZATION ON
CUDA_SEPARABLE_COMPILATION ON)
else()
set_target_properties(${target} PROPERTIES
CUDA_SEPARABLE_COMPILATION OFF)
endif()
endfunction()

macro(xgboost_link_nccl target)
Expand Down
4 changes: 1 addition & 3 deletions tests/ci_build/prune_libnccl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,8 @@ cmake_policy(SET CMP0104 NEW)
set(CMAKE_CUDA_HOST_COMPILER \${CMAKE_CXX_COMPILER})
enable_language(CUDA)
include(../cmake/Utils.cmake)
set(GEN_CODE "")
format_gencode_flags("" GEN_CODE)
compute_cmake_cuda_archs("")
add_library(test OBJECT test.cu)
set_property(TARGET test PROPERTY CUDA_ARCHITECTURES \${CMAKE_CUDA_ARCHITECTURES})
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
EOF

Expand Down

0 comments on commit 83cdf14

Please sign in to comment.