diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 860c3ebeac..3cb8493d43 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -68,6 +68,7 @@ option(ENABLE_CUMLPRIMS_MG "Enable algorithms that use libcumlprims_mg" ON) option(NVTX "Enable nvtx markers" OFF) option(SINGLEGPU "Disable all mnmg components and comms libraries" OFF) option(USE_CCACHE "Cache build artifacts with ccache" OFF) +option(CUDA_STATIC_RUNTIME "Statically link the CUDA toolkit runtime and libraries" OFF) option(CUML_USE_RAFT_STATIC "Build and statically link the RAFT libraries" OFF) option(CUML_USE_FAISS_STATIC "Build and statically link the FAISS library for nearest neighbors search on GPU" OFF) option(CUML_USE_TREELITE_STATIC "Build and statically link the treelite library" OFF) @@ -91,6 +92,7 @@ message(VERBOSE "CUML_CPP: Enabling lineinfo in nvcc: ${CUDA_ENABLE_LINE_INFO}") message(VERBOSE "CUML_CPP: Enabling nvtx markers: ${NVTX}") message(VERBOSE "CUML_CPP: Disabling all mnmg components and comms libraries: ${SINGLEGPU}") message(VERBOSE "CUML_CPP: Cache build artifacts with ccache: ${USE_CCACHE}") +message(VERBOSE "CUML_CPP: Statically link the CUDA toolkit runtime and libraries: ${CUDA_STATIC_RUNTIME}") message(VERBOSE "CUML_CPP: Build and statically link RAFT libraries: ${CUML_USE_RAFT_STATIC}") message(VERBOSE "CUML_CPP: Build and statically link FAISS library: ${CUML_USE_FAISS_STATIC}") message(VERBOSE "CUML_CPP: Build and statically link Treelite library: ${CUML_USE_TREELITE_STATIC}") @@ -129,6 +131,19 @@ endif() ############################################################################## # - compiler options --------------------------------------------------------- +set(_ctk_static_suffix "") +if(CUDA_STATIC_RUNTIME) + # If we're statically linking CTK cuBLAS, + # we also want to statically link BLAS + set(BLA_STATIC ON) + set(_ctk_static_suffix "_static") + set(_ctk_static_suffix_cufft "_static_nocallback") + # Control legacy FindCUDA.cmake behavior too + # Remove this after we push it into rapids-cmake: + # https://github.com/rapidsai/rapids-cmake/pull/259 + set(CUDA_USE_STATIC_CUDA_RUNTIME ON) +endif() + if (NOT DISABLE_OPENMP) find_package(OpenMP) if(OpenMP_FOUND) @@ -136,6 +151,9 @@ if (NOT DISABLE_OPENMP) endif() endif() +# CUDA runtime +rapids_cuda_init_runtime(USE_STATIC ${CUDA_STATIC_RUNTIME}) + # * find CUDAToolkit package # * determine GPU architectures # * enable the CMake CUDA language @@ -522,7 +540,7 @@ if(BUILD_CUML_CPP_LIBRARY) $<$:raft::nn> $<$:raft::distance> PRIVATE - $<$:CUDA::cufft> + $<$:CUDA::cufft${_ctk_static_suffix_cufft}> ${TREELITE_LIBS} $<$:GPUTreeShap::GPUTreeShap> ${OpenMP_CXX_LIB_NAMES} diff --git a/cpp/examples/symreg/CMakeLists_standalone.txt b/cpp/examples/symreg/CMakeLists_standalone.txt index e79a215cca..eeb6903018 100644 --- a/cpp/examples/symreg/CMakeLists_standalone.txt +++ b/cpp/examples/symreg/CMakeLists_standalone.txt @@ -1,5 +1,5 @@ # -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2021-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -30,4 +30,4 @@ add_executable(symreg_example symreg_example.cpp) set_target_properties(symreg_example PROPERTIES LINKER_LANGUAGE "CUDA") # Link cuml and cudart -target_link_libraries(symreg_example cuml::cuml++ CUDA::cudart) \ No newline at end of file +target_link_libraries(symreg_example cuml::cuml++) diff --git a/cpp/src/umap/knn_graph/algo.cuh b/cpp/src/umap/knn_graph/algo.cuh index 65918d88fb..8b2743a148 100644 --- a/cpp/src/umap/knn_graph/algo.cuh +++ b/cpp/src/umap/knn_graph/algo.cuh @@ -48,13 +48,13 @@ void launcher(const raft::handle_t& handle, // Instantiation for dense inputs, int64_t indices template <> -void launcher(const raft::handle_t& handle, - const ML::manifold_dense_inputs_t& inputsA, - const ML::manifold_dense_inputs_t& inputsB, - ML::knn_graph& out, - int n_neighbors, - const ML::UMAPParams* params, - cudaStream_t stream) +inline void launcher(const raft::handle_t& handle, + const ML::manifold_dense_inputs_t& inputsA, + const ML::manifold_dense_inputs_t& inputsB, + ML::knn_graph& out, + int n_neighbors, + const ML::UMAPParams* params, + cudaStream_t stream) { std::vector ptrs(1); std::vector sizes(1); @@ -76,25 +76,25 @@ void launcher(const raft::handle_t& handle, // Instantiation for dense inputs, int indices template <> -void launcher(const raft::handle_t& handle, - const ML::manifold_dense_inputs_t& inputsA, - const ML::manifold_dense_inputs_t& inputsB, - ML::knn_graph& out, - int n_neighbors, - const ML::UMAPParams* params, - cudaStream_t stream) +inline void launcher(const raft::handle_t& handle, + const ML::manifold_dense_inputs_t& inputsA, + const ML::manifold_dense_inputs_t& inputsB, + ML::knn_graph& out, + int n_neighbors, + const ML::UMAPParams* params, + cudaStream_t stream) { throw raft::exception("Dense KNN doesn't yet support 32-bit integer indices"); } template <> -void launcher(const raft::handle_t& handle, - const ML::manifold_sparse_inputs_t& inputsA, - const ML::manifold_sparse_inputs_t& inputsB, - ML::knn_graph& out, - int n_neighbors, - const ML::UMAPParams* params, - cudaStream_t stream) +inline void launcher(const raft::handle_t& handle, + const ML::manifold_sparse_inputs_t& inputsA, + const ML::manifold_sparse_inputs_t& inputsB, + ML::knn_graph& out, + int n_neighbors, + const ML::UMAPParams* params, + cudaStream_t stream) { raft::sparse::selection::brute_force_knn(inputsA.indptr, inputsA.indices, @@ -119,25 +119,25 @@ void launcher(const raft::handle_t& handle, } template <> -void launcher(const raft::handle_t& handle, - const ML::manifold_sparse_inputs_t& inputsA, - const ML::manifold_sparse_inputs_t& inputsB, - ML::knn_graph& out, - int n_neighbors, - const ML::UMAPParams* params, - cudaStream_t stream) +inline void launcher(const raft::handle_t& handle, + const ML::manifold_sparse_inputs_t& inputsA, + const ML::manifold_sparse_inputs_t& inputsB, + ML::knn_graph& out, + int n_neighbors, + const ML::UMAPParams* params, + cudaStream_t stream) { throw raft::exception("Sparse KNN doesn't support 64-bit integer indices"); } template <> -void launcher(const raft::handle_t& handle, - const ML::manifold_precomputed_knn_inputs_t& inputsA, - const ML::manifold_precomputed_knn_inputs_t& inputsB, - ML::knn_graph& out, - int n_neighbors, - const ML::UMAPParams* params, - cudaStream_t stream) +inline void launcher(const raft::handle_t& handle, + const ML::manifold_precomputed_knn_inputs_t& inputsA, + const ML::manifold_precomputed_knn_inputs_t& inputsB, + ML::knn_graph& out, + int n_neighbors, + const ML::UMAPParams* params, + cudaStream_t stream) { out.knn_indices = inputsA.knn_graph.knn_indices; out.knn_dists = inputsA.knn_graph.knn_dists; @@ -145,13 +145,13 @@ void launcher(const raft::handle_t& handle, // Instantiation for precomputed inputs, int indices template <> -void launcher(const raft::handle_t& handle, - const ML::manifold_precomputed_knn_inputs_t& inputsA, - const ML::manifold_precomputed_knn_inputs_t& inputsB, - ML::knn_graph& out, - int n_neighbors, - const ML::UMAPParams* params, - cudaStream_t stream) +inline void launcher(const raft::handle_t& handle, + const ML::manifold_precomputed_knn_inputs_t& inputsA, + const ML::manifold_precomputed_knn_inputs_t& inputsB, + ML::knn_graph& out, + int n_neighbors, + const ML::UMAPParams* params, + cudaStream_t stream) { out.knn_indices = inputsA.knn_graph.knn_indices; out.knn_dists = inputsA.knn_graph.knn_dists; diff --git a/cpp/src/umap/optimize.cuh b/cpp/src/umap/optimize.cuh index c275201e0a..438d58d591 100644 --- a/cpp/src/umap/optimize.cuh +++ b/cpp/src/umap/optimize.cuh @@ -169,7 +169,7 @@ void optimize_params(T* input, } while (tol_grads < 2 && num_iters < max_epochs); } -void find_params_ab(UMAPParams* params, cudaStream_t stream) +inline void find_params_ab(UMAPParams* params, cudaStream_t stream) { float spread = params->spread; float min_dist = params->min_dist; diff --git a/cpp/src/umap/runner.cuh b/cpp/src/umap/runner.cuh index af304c3fce..3a89f58613 100644 --- a/cpp/src/umap/runner.cuh +++ b/cpp/src/umap/runner.cuh @@ -86,7 +86,10 @@ __global__ void init_transform(int* indices, * a and b, which are based on min_dist and spread * parameters. */ -void find_ab(UMAPParams* params, cudaStream_t stream) { Optimize::find_params_ab(params, stream); } +inline void find_ab(UMAPParams* params, cudaStream_t stream) +{ + Optimize::find_params_ab(params, stream); +} template void _get_graph(const raft::handle_t& handle, diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index cc4b295bf0..b93606dcc0 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -36,12 +36,12 @@ function(ConfigureTest) PRIVATE ${CUML_CPP_TARGET} $<$:${CUML_C_TARGET}> - CUDA::cublas - CUDA::curand - CUDA::cusolver - CUDA::cudart - CUDA::cusparse - $<$:CUDA::cufft> + CUDA::cublas${_ctk_static_suffix} + CUDA::curand${_ctk_static_suffix} + CUDA::cusolver${_ctk_static_suffix} + CUDA::cudart${_ctk_static_suffix} + CUDA::cusparse${_ctk_static_suffix} + $<$:CUDA::cufft${_ctk_static_suffix_cufft}> rmm::rmm raft::raft $<$:raft::nn>