From e12278be1d532ef7681513ebc78327ef490a1df7 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 26 Jan 2022 13:38:11 -0600 Subject: [PATCH 1/4] Update developer guide to recommend no default stream parameter. --- cpp/docs/DEVELOPER_GUIDE.md | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/cpp/docs/DEVELOPER_GUIDE.md b/cpp/docs/DEVELOPER_GUIDE.md index 5e465ed6991..da3d10d4103 100644 --- a/cpp/docs/DEVELOPER_GUIDE.md +++ b/cpp/docs/DEVELOPER_GUIDE.md @@ -347,9 +347,10 @@ implemented using asynchronous APIs on the default stream (e.g., stream 0). The recommended pattern for doing this is to make the definition of the external API invoke an internal API in the `detail` namespace. The internal `detail` API has the same parameters as the -public API, plus a `rmm::cuda_stream_view` parameter at the end defaulted to -`rmm::cuda_stream_default`. The implementation should be wholly contained in the `detail` API -definition and use only asynchronous versions of CUDA APIs with the stream parameter. +public API, plus a `rmm::cuda_stream_view` parameter at the end with no default value. The public +API will call the detail API and provide `rmm::cuda_stream_default`. The implementation should be +wholly contained in the `detail` API definition and use only asynchronous versions of CUDA APIs +with the stream parameter. In order to make the `detail` API callable from other libcudf functions, it should be exposed in a header placed in the `cudf/cpp/include/detail/` directory. @@ -362,14 +363,14 @@ void external_function(...); // cpp/include/cudf/detail/header.hpp namespace detail{ -void external_function(..., rmm::cuda_stream_view stream = rmm::cuda_stream_default) +void external_function(..., rmm::cuda_stream_view stream) } // namespace detail // cudf/src/implementation.cpp namespace detail{ - // defaulted stream parameter + // use stream parameter in detail implementation void external_function(..., rmm::cuda_stream_view stream){ - // implementation uses stream w/ async APIs + // implementation uses stream with async APIs rmm::device_buffer buff(...,stream); CUDA_TRY(cudaMemcpyAsync(...,stream.value())); kernel<<<..., stream>>>(...); @@ -379,7 +380,7 @@ namespace detail{ void external_function(...){ CUDF_FUNC_RANGE(); // Auto generates NVTX range for lifetime of this function - detail::external_function(...); + detail::external_function(..., rmm::cuda_stream_default); } ``` From 0f14cc13b2a98d67ffa7c64c45d9ded7c52a12ea Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 26 Jan 2022 14:22:04 -0600 Subject: [PATCH 2/4] Apply suggestions from code review Co-authored-by: Nghia Truong --- cpp/docs/DEVELOPER_GUIDE.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/docs/DEVELOPER_GUIDE.md b/cpp/docs/DEVELOPER_GUIDE.md index da3d10d4103..71c413bb194 100644 --- a/cpp/docs/DEVELOPER_GUIDE.md +++ b/cpp/docs/DEVELOPER_GUIDE.md @@ -368,9 +368,9 @@ void external_function(..., rmm::cuda_stream_view stream) // cudf/src/implementation.cpp namespace detail{ - // use stream parameter in detail implementation + // Use stream parameter in detail implementation. void external_function(..., rmm::cuda_stream_view stream){ - // implementation uses stream with async APIs + // Implementation uses stream with async APIs. rmm::device_buffer buff(...,stream); CUDA_TRY(cudaMemcpyAsync(...,stream.value())); kernel<<<..., stream>>>(...); @@ -379,7 +379,7 @@ namespace detail{ } // namespace detail void external_function(...){ - CUDF_FUNC_RANGE(); // Auto generates NVTX range for lifetime of this function + CUDF_FUNC_RANGE(); // Generates NVTX range for lifetime of this function. detail::external_function(..., rmm::cuda_stream_default); } ``` From b3052cd085a4ec5a28389fb53e1b447c08baa8d2 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 26 Jan 2022 15:24:49 -0600 Subject: [PATCH 3/4] Stream goes before memory resource. --- cpp/docs/DEVELOPER_GUIDE.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/cpp/docs/DEVELOPER_GUIDE.md b/cpp/docs/DEVELOPER_GUIDE.md index 71c413bb194..6a70c16ad45 100644 --- a/cpp/docs/DEVELOPER_GUIDE.md +++ b/cpp/docs/DEVELOPER_GUIDE.md @@ -347,10 +347,11 @@ implemented using asynchronous APIs on the default stream (e.g., stream 0). The recommended pattern for doing this is to make the definition of the external API invoke an internal API in the `detail` namespace. The internal `detail` API has the same parameters as the -public API, plus a `rmm::cuda_stream_view` parameter at the end with no default value. The public -API will call the detail API and provide `rmm::cuda_stream_default`. The implementation should be -wholly contained in the `detail` API definition and use only asynchronous versions of CUDA APIs -with the stream parameter. +public API, plus a `rmm::cuda_stream_view` parameter at the end with no default value. If the +detail API also accepts a memory resource parameter, the stream parameter should be ideally placed +just *before* the memory resource. The public API will call the detail API and provide +`rmm::cuda_stream_default`. The implementation should be wholly contained in the `detail` API +definition and use only asynchronous versions of CUDA APIs with the stream parameter. In order to make the `detail` API callable from other libcudf functions, it should be exposed in a header placed in the `cudf/cpp/include/detail/` directory. From cf0d6db9bbe14aa800e5ee535f2dbc9c9bced856 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 28 Jan 2022 08:28:49 -0600 Subject: [PATCH 4/4] Apply suggestions from code review Co-authored-by: David Wendt <45795991+davidwendt@users.noreply.github.com> --- cpp/docs/DEVELOPER_GUIDE.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/docs/DEVELOPER_GUIDE.md b/cpp/docs/DEVELOPER_GUIDE.md index 6a70c16ad45..eeebe38d873 100644 --- a/cpp/docs/DEVELOPER_GUIDE.md +++ b/cpp/docs/DEVELOPER_GUIDE.md @@ -369,9 +369,9 @@ void external_function(..., rmm::cuda_stream_view stream) // cudf/src/implementation.cpp namespace detail{ - // Use stream parameter in detail implementation. + // Use the stream parameter in the detail implementation. void external_function(..., rmm::cuda_stream_view stream){ - // Implementation uses stream with async APIs. + // Implementation uses the stream with async APIs. rmm::device_buffer buff(...,stream); CUDA_TRY(cudaMemcpyAsync(...,stream.value())); kernel<<<..., stream>>>(...); @@ -380,7 +380,7 @@ namespace detail{ } // namespace detail void external_function(...){ - CUDF_FUNC_RANGE(); // Generates NVTX range for lifetime of this function. + CUDF_FUNC_RANGE(); // Generates an NVTX range for the lifetime of this function. detail::external_function(..., rmm::cuda_stream_default); } ```