Skip to content

Commit

Permalink
Statically link all CUDA toolkit libraries (rapidsai#4881)
Browse files Browse the repository at this point in the history
This PR ensures cuML statically links all the CUDA toolkit libraries (not just `cudart`) if a user enables `CUDA_STATIC_RUNTIME`.

Authors:
  - Paul Taylor (https://github.com/trxcllnt)

Approvers:
  - Dante Gama Dessavre (https://github.com/dantegd)

URL: rapidsai#4881
  • Loading branch information
trxcllnt authored Sep 8, 2022
1 parent 03cf2c7 commit 00a1351
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 53 deletions.
20 changes: 19 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ option(ENABLE_CUMLPRIMS_MG "Enable algorithms that use libcumlprims_mg" ON)
option(NVTX "Enable nvtx markers" OFF)
option(SINGLEGPU "Disable all mnmg components and comms libraries" OFF)
option(USE_CCACHE "Cache build artifacts with ccache" OFF)
option(CUDA_STATIC_RUNTIME "Statically link the CUDA toolkit runtime and libraries" OFF)
option(CUML_USE_RAFT_STATIC "Build and statically link the RAFT libraries" OFF)
option(CUML_USE_FAISS_STATIC "Build and statically link the FAISS library for nearest neighbors search on GPU" OFF)
option(CUML_USE_TREELITE_STATIC "Build and statically link the treelite library" OFF)
Expand All @@ -91,6 +92,7 @@ message(VERBOSE "CUML_CPP: Enabling lineinfo in nvcc: ${CUDA_ENABLE_LINE_INFO}")
message(VERBOSE "CUML_CPP: Enabling nvtx markers: ${NVTX}")
message(VERBOSE "CUML_CPP: Disabling all mnmg components and comms libraries: ${SINGLEGPU}")
message(VERBOSE "CUML_CPP: Cache build artifacts with ccache: ${USE_CCACHE}")
message(VERBOSE "CUML_CPP: Statically link the CUDA toolkit runtime and libraries: ${CUDA_STATIC_RUNTIME}")
message(VERBOSE "CUML_CPP: Build and statically link RAFT libraries: ${CUML_USE_RAFT_STATIC}")
message(VERBOSE "CUML_CPP: Build and statically link FAISS library: ${CUML_USE_FAISS_STATIC}")
message(VERBOSE "CUML_CPP: Build and statically link Treelite library: ${CUML_USE_TREELITE_STATIC}")
Expand Down Expand Up @@ -129,13 +131,29 @@ endif()
##############################################################################
# - compiler options ---------------------------------------------------------

set(_ctk_static_suffix "")
if(CUDA_STATIC_RUNTIME)
# If we're statically linking CTK cuBLAS,
# we also want to statically link BLAS
set(BLA_STATIC ON)
set(_ctk_static_suffix "_static")
set(_ctk_static_suffix_cufft "_static_nocallback")
# Control legacy FindCUDA.cmake behavior too
# Remove this after we push it into rapids-cmake:
# https://github.com/rapidsai/rapids-cmake/pull/259
set(CUDA_USE_STATIC_CUDA_RUNTIME ON)
endif()

if (NOT DISABLE_OPENMP)
find_package(OpenMP)
if(OpenMP_FOUND)
message(STATUS "CUML_CPP: OpenMP found in ${OPENMP_INCLUDE_DIRS}")
endif()
endif()

# CUDA runtime
rapids_cuda_init_runtime(USE_STATIC ${CUDA_STATIC_RUNTIME})

# * find CUDAToolkit package
# * determine GPU architectures
# * enable the CMake CUDA language
Expand Down Expand Up @@ -522,7 +540,7 @@ if(BUILD_CUML_CPP_LIBRARY)
$<$<BOOL:${CUML_USE_RAFT_NN}>:raft::nn>
$<$<BOOL:${CUML_USE_RAFT_DIST}>:raft::distance>
PRIVATE
$<$<BOOL:${LINK_CUFFT}>:CUDA::cufft>
$<$<BOOL:${LINK_CUFFT}>:CUDA::cufft${_ctk_static_suffix_cufft}>
${TREELITE_LIBS}
$<$<BOOL:${treeshap_algo}>:GPUTreeShap::GPUTreeShap>
${OpenMP_CXX_LIB_NAMES}
Expand Down
4 changes: 2 additions & 2 deletions cpp/examples/symreg/CMakeLists_standalone.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2021, NVIDIA CORPORATION.
# Copyright (c) 2021-2022, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -30,4 +30,4 @@ add_executable(symreg_example symreg_example.cpp)
set_target_properties(symreg_example PROPERTIES LINKER_LANGUAGE "CUDA")

# Link cuml and cudart
target_link_libraries(symreg_example cuml::cuml++ CUDA::cudart)
target_link_libraries(symreg_example cuml::cuml++)
84 changes: 42 additions & 42 deletions cpp/src/umap/knn_graph/algo.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,13 @@ void launcher(const raft::handle_t& handle,

// Instantiation for dense inputs, int64_t indices
template <>
void launcher(const raft::handle_t& handle,
const ML::manifold_dense_inputs_t<float>& inputsA,
const ML::manifold_dense_inputs_t<float>& inputsB,
ML::knn_graph<int64_t, float>& out,
int n_neighbors,
const ML::UMAPParams* params,
cudaStream_t stream)
inline void launcher(const raft::handle_t& handle,
const ML::manifold_dense_inputs_t<float>& inputsA,
const ML::manifold_dense_inputs_t<float>& inputsB,
ML::knn_graph<int64_t, float>& out,
int n_neighbors,
const ML::UMAPParams* params,
cudaStream_t stream)
{
std::vector<float*> ptrs(1);
std::vector<int> sizes(1);
Expand All @@ -76,25 +76,25 @@ void launcher(const raft::handle_t& handle,

// Instantiation for dense inputs, int indices
template <>
void launcher(const raft::handle_t& handle,
const ML::manifold_dense_inputs_t<float>& inputsA,
const ML::manifold_dense_inputs_t<float>& inputsB,
ML::knn_graph<int, float>& out,
int n_neighbors,
const ML::UMAPParams* params,
cudaStream_t stream)
inline void launcher(const raft::handle_t& handle,
const ML::manifold_dense_inputs_t<float>& inputsA,
const ML::manifold_dense_inputs_t<float>& inputsB,
ML::knn_graph<int, float>& out,
int n_neighbors,
const ML::UMAPParams* params,
cudaStream_t stream)
{
throw raft::exception("Dense KNN doesn't yet support 32-bit integer indices");
}

template <>
void launcher(const raft::handle_t& handle,
const ML::manifold_sparse_inputs_t<int, float>& inputsA,
const ML::manifold_sparse_inputs_t<int, float>& inputsB,
ML::knn_graph<int, float>& out,
int n_neighbors,
const ML::UMAPParams* params,
cudaStream_t stream)
inline void launcher(const raft::handle_t& handle,
const ML::manifold_sparse_inputs_t<int, float>& inputsA,
const ML::manifold_sparse_inputs_t<int, float>& inputsB,
ML::knn_graph<int, float>& out,
int n_neighbors,
const ML::UMAPParams* params,
cudaStream_t stream)
{
raft::sparse::selection::brute_force_knn(inputsA.indptr,
inputsA.indices,
Expand All @@ -119,39 +119,39 @@ void launcher(const raft::handle_t& handle,
}

template <>
void launcher(const raft::handle_t& handle,
const ML::manifold_sparse_inputs_t<int64_t, float>& inputsA,
const ML::manifold_sparse_inputs_t<int64_t, float>& inputsB,
ML::knn_graph<int64_t, float>& out,
int n_neighbors,
const ML::UMAPParams* params,
cudaStream_t stream)
inline void launcher(const raft::handle_t& handle,
const ML::manifold_sparse_inputs_t<int64_t, float>& inputsA,
const ML::manifold_sparse_inputs_t<int64_t, float>& inputsB,
ML::knn_graph<int64_t, float>& out,
int n_neighbors,
const ML::UMAPParams* params,
cudaStream_t stream)
{
throw raft::exception("Sparse KNN doesn't support 64-bit integer indices");
}

template <>
void launcher(const raft::handle_t& handle,
const ML::manifold_precomputed_knn_inputs_t<int64_t, float>& inputsA,
const ML::manifold_precomputed_knn_inputs_t<int64_t, float>& inputsB,
ML::knn_graph<int64_t, float>& out,
int n_neighbors,
const ML::UMAPParams* params,
cudaStream_t stream)
inline void launcher(const raft::handle_t& handle,
const ML::manifold_precomputed_knn_inputs_t<int64_t, float>& inputsA,
const ML::manifold_precomputed_knn_inputs_t<int64_t, float>& inputsB,
ML::knn_graph<int64_t, float>& out,
int n_neighbors,
const ML::UMAPParams* params,
cudaStream_t stream)
{
out.knn_indices = inputsA.knn_graph.knn_indices;
out.knn_dists = inputsA.knn_graph.knn_dists;
}

// Instantiation for precomputed inputs, int indices
template <>
void launcher(const raft::handle_t& handle,
const ML::manifold_precomputed_knn_inputs_t<int, float>& inputsA,
const ML::manifold_precomputed_knn_inputs_t<int, float>& inputsB,
ML::knn_graph<int, float>& out,
int n_neighbors,
const ML::UMAPParams* params,
cudaStream_t stream)
inline void launcher(const raft::handle_t& handle,
const ML::manifold_precomputed_knn_inputs_t<int, float>& inputsA,
const ML::manifold_precomputed_knn_inputs_t<int, float>& inputsB,
ML::knn_graph<int, float>& out,
int n_neighbors,
const ML::UMAPParams* params,
cudaStream_t stream)
{
out.knn_indices = inputsA.knn_graph.knn_indices;
out.knn_dists = inputsA.knn_graph.knn_dists;
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/umap/optimize.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ void optimize_params(T* input,
} while (tol_grads < 2 && num_iters < max_epochs);
}

void find_params_ab(UMAPParams* params, cudaStream_t stream)
inline void find_params_ab(UMAPParams* params, cudaStream_t stream)
{
float spread = params->spread;
float min_dist = params->min_dist;
Expand Down
5 changes: 4 additions & 1 deletion cpp/src/umap/runner.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,10 @@ __global__ void init_transform(int* indices,
* a and b, which are based on min_dist and spread
* parameters.
*/
void find_ab(UMAPParams* params, cudaStream_t stream) { Optimize::find_params_ab(params, stream); }
inline void find_ab(UMAPParams* params, cudaStream_t stream)
{
Optimize::find_params_ab(params, stream);
}

template <typename value_idx, typename value_t, typename umap_inputs, int TPB_X>
void _get_graph(const raft::handle_t& handle,
Expand Down
12 changes: 6 additions & 6 deletions cpp/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@ function(ConfigureTest)
PRIVATE
${CUML_CPP_TARGET}
$<$<BOOL:BUILD_CUML_C_LIBRARY>:${CUML_C_TARGET}>
CUDA::cublas
CUDA::curand
CUDA::cusolver
CUDA::cudart
CUDA::cusparse
$<$<BOOL:${LINK_CUFFT}>:CUDA::cufft>
CUDA::cublas${_ctk_static_suffix}
CUDA::curand${_ctk_static_suffix}
CUDA::cusolver${_ctk_static_suffix}
CUDA::cudart${_ctk_static_suffix}
CUDA::cusparse${_ctk_static_suffix}
$<$<BOOL:${LINK_CUFFT}>:CUDA::cufft${_ctk_static_suffix_cufft}>
rmm::rmm
raft::raft
$<$<BOOL:${CUML_USE_RAFT_NN}>:raft::nn>
Expand Down

0 comments on commit 00a1351

Please sign in to comment.