From a34b4599d2304c07bbf4f0256778b0996821025b Mon Sep 17 00:00:00 2001 From: gonidelis Date: Wed, 13 Mar 2024 08:47:50 -0700 Subject: [PATCH 1/8] Add Devcice::FlaggedIf --- cub/cub/device/device_select.cuh | 117 ++++++++++++++++++ .../catch2_test_device_select_flagged_if.cu | 95 ++++++++++++++ 2 files changed, 212 insertions(+) create mode 100644 cub/test/catch2_test_device_select_flagged_if.cu diff --git a/cub/cub/device/device_select.cuh b/cub/cub/device/device_select.cuh index 4b8de544c8d..9c82cb359b2 100644 --- a/cub/cub/device/device_select.cuh +++ b/cub/cub/device/device_select.cuh @@ -697,6 +697,123 @@ struct DeviceSelect stream); } + template + CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t FlaggedIf( + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + FlagIterator d_flags, + OutputIteratorT d_out, + NumSelectedIteratorT d_num_selected_out, + int num_items, + SelectOp select_op, + cudaStream_t stream = 0) + { + using OffsetT = int; // Signed integer type for global offsets + using EqualityOp = NullType; // Equality operator (not used) + + return DispatchSelectIf< + InputIteratorT, + FlagIterator, + OutputIteratorT, + NumSelectedIteratorT, + SelectOp, + EqualityOp, + OffsetT, + false>::Dispatch(d_temp_storage, + temp_storage_bytes, + d_in, + d_flags, + d_out, + d_num_selected_out, + select_op, + EqualityOp(), + num_items, + stream); + } + + template + CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t FlaggedIf( + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + FlagIterator d_flags, + OutputIteratorT d_out, + NumSelectedIteratorT d_num_selected_out, + int num_items, + SelectOp select_op, + cudaStream_t stream, + bool debug_synchronous) + { + CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG + + return FlaggedIf( + d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items, select_op, stream); + } + + template + CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t FlaggedIf( + void* d_temp_storage, + size_t& temp_storage_bytes, + IteratorT d_data, + FlagIterator d_flags, + NumSelectedIteratorT d_num_selected_out, + int num_items, + SelectOp select_op, + cudaStream_t stream = 0) + { + using OffsetT = int; // Signed integer type for global offsets + using EqualityOp = NullType; // Equality operator (not used) + + constexpr bool may_alias = true; + + return DispatchSelectIf< + IteratorT, + FlagIterator, + IteratorT, + NumSelectedIteratorT, + SelectOp, + EqualityOp, + OffsetT, + false, + may_alias>::Dispatch(d_temp_storage, + temp_storage_bytes, + d_data, // in + d_flags, + d_data, // out + d_num_selected_out, + select_op, + EqualityOp(), + num_items, + stream); + } + + template + CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t FlaggedIf( + void* d_temp_storage, + size_t& temp_storage_bytes, + IteratorT d_data, + FlagIterator d_flags, + NumSelectedIteratorT d_num_selected_out, + int num_items, + SelectOp select_op, + cudaStream_t stream, + bool debug_synchronous) + { + CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG + + return FlaggedIf( + d_temp_storage, temp_storage_bytes, d_data, d_flags, d_num_selected_out, num_items, select_op, stream); + } + //! @rst //! Given an input sequence ``d_in`` having runs of consecutive equal-valued keys, //! only the first key from each run is selectively copied to ``d_out``. diff --git a/cub/test/catch2_test_device_select_flagged_if.cu b/cub/test/catch2_test_device_select_flagged_if.cu new file mode 100644 index 00000000000..f794ceb6e72 --- /dev/null +++ b/cub/test/catch2_test_device_select_flagged_if.cu @@ -0,0 +1,95 @@ +/****************************************************************************** + * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +#include + +// #include "catch2_test_helper.h" +#include "catch2_test_launch_helper.h" + +DECLARE_LAUNCH_WRAPPER(cub::DeviceSelect::FlaggedIf, select_flagged_if); + +// %PARAM% TEST_LAUNCH lid 0:1:2 + +struct always_true_t +{ + template + __device__ bool operator()(const T&) const + { + return true; + } +}; + +using all_types = + c2h::type_list>; + +using types = c2h::type_list>; + +CUB_TEST("DeviceSelect::FlaggedIf can run with empty input", "[device][select_flagged_if]", types) +{ + using type = typename c2h::get<0, TestType>; + + constexpr int num_items = 0; + c2h::device_vector in(num_items); + c2h::device_vector out(num_items); + c2h::device_vector flags(num_items); + + // Needs to be device accessible + c2h::device_vector num_selected_out(1, 0); + int* d_num_selected_out = thrust::raw_pointer_cast(num_selected_out.data()); + + select_flagged_if(in.begin(), flags.begin(), out.begin(), d_num_selected_out, num_items, always_true_t{}); + + REQUIRE(num_selected_out[0] == 0); +} + +CUB_TEST("DeviceSelect::If handles all matched", "[device][select_flagged_if]", types) +{ + using type = typename c2h::get<0, TestType>; + + const int num_items = GENERATE_COPY(take(2, random(1, 1000000))); + c2h::device_vector in(num_items); + c2h::device_vector out(num_items); + c2h::device_vector flags(num_items); + c2h::gen(CUB_SEED(2), in); + + // Needs to be device accessible + c2h::device_vector num_selected_out(1, 0); + int* d_first_num_selected_out = thrust::raw_pointer_cast(num_selected_out.data()); + + select_flagged_if(in.begin(), flags.begin(), out.begin(), d_first_num_selected_out, num_items, always_true_t{}); + + REQUIRE(num_selected_out[0] == num_items); + REQUIRE(out == in); +} From 82fcc50eca686c11018a0060e0085a4faba480a3 Mon Sep 17 00:00:00 2001 From: gonidelis Date: Thu, 14 Mar 2024 15:36:21 -0700 Subject: [PATCH 2/8] Add cub::DeviceSelect::FlaggedIf tests --- .../catch2_test_device_select_flagged_if.cu | 202 +++++++++++++++++- 1 file changed, 200 insertions(+), 2 deletions(-) diff --git a/cub/test/catch2_test_device_select_flagged_if.cu b/cub/test/catch2_test_device_select_flagged_if.cu index f794ceb6e72..570a4e008b9 100644 --- a/cub/test/catch2_test_device_select_flagged_if.cu +++ b/cub/test/catch2_test_device_select_flagged_if.cu @@ -27,13 +27,81 @@ #include +#include + // #include "catch2_test_helper.h" #include "catch2_test_launch_helper.h" +#include "thrust/functional.h" + +template +static c2h::host_vector +get_reference(const c2h::device_vector& in, const c2h::device_vector& flags, Pred const& if_predicate) +{ + struct selector + { + const T* ref_begin = nullptr; + const FlagT* flag_begin = nullptr; + Pred const& if_pred; + + constexpr selector(const T* ref, const FlagT* flag, Pred const& pred) noexcept + : ref_begin(ref) + , flag_begin(flag) + , if_pred(pred) + {} + + bool operator()(const T& val) const + { + const auto pos = &val - ref_begin; + return static_cast(if_pred(flag_begin[pos])); + } + }; + + c2h::host_vector reference = in; + c2h::host_vector h_flags = flags; + + const selector pred{ + thrust::raw_pointer_cast(reference.data()), thrust::raw_pointer_cast(h_flags.data()), if_predicate}; + const auto boundary = std::stable_partition(reference.begin(), reference.end(), pred); + reference.erase(boundary, reference.end()); + return reference; +} DECLARE_LAUNCH_WRAPPER(cub::DeviceSelect::FlaggedIf, select_flagged_if); // %PARAM% TEST_LAUNCH lid 0:1:2 +struct is_even +{ + __host__ __device__ bool operator()(int const& elem) const + { + return !(elem % 2); + } +}; + +template +struct less_than_t +{ + T compare; + + explicit __host__ less_than_t(T compare) + : compare(compare) + {} + + __host__ __device__ bool operator()(const T& a) const + { + return a < compare; + } +}; + +struct always_false_t +{ + template + __device__ bool operator()(const T&) const + { + return false; + } +}; + struct always_true_t { template @@ -54,7 +122,8 @@ using all_types = long2, c2h::custom_type_t>; -using types = c2h::type_list>; +using types = c2h:: + type_list>; CUB_TEST("DeviceSelect::FlaggedIf can run with empty input", "[device][select_flagged_if]", types) { @@ -74,7 +143,7 @@ CUB_TEST("DeviceSelect::FlaggedIf can run with empty input", "[device][select_fl REQUIRE(num_selected_out[0] == 0); } -CUB_TEST("DeviceSelect::If handles all matched", "[device][select_flagged_if]", types) +CUB_TEST("DeviceSelect::FlaggedIf handles all matched", "[device][select_flagged_if]", types) { using type = typename c2h::get<0, TestType>; @@ -93,3 +162,132 @@ CUB_TEST("DeviceSelect::If handles all matched", "[device][select_flagged_if]", REQUIRE(num_selected_out[0] == num_items); REQUIRE(out == in); } + +CUB_TEST("DeviceSelect::FlaggedIf handles no matched", "[device][select_flagged_if]", types) +{ + using type = typename c2h::get<0, TestType>; + + const int num_items = GENERATE_COPY(take(2, random(1, 1000000))); + c2h::device_vector in(num_items); + c2h::device_vector out(0); + c2h::gen(CUB_SEED(2), in); + + c2h::device_vector flags(num_items, 0); + + // Needs to be device accessible + c2h::device_vector num_selected_out(1, 0); + int* d_first_num_selected_out = thrust::raw_pointer_cast(num_selected_out.data()); + + select_flagged_if(in.begin(), flags.begin(), out.begin(), d_first_num_selected_out, num_items, always_false_t{}); + + REQUIRE(num_selected_out[0] == 0); +} + +CUB_TEST("DeviceSelect::FlaggedIf does not change input", "[device][select_flagged_if]", types) +{ + using type = typename c2h::get<0, TestType>; + + const int num_items = GENERATE_COPY(take(2, random(1, 1000000))); + c2h::device_vector in(num_items); + c2h::device_vector out(num_items); + c2h::gen(CUB_SEED(2), in); + + c2h::device_vector flags(num_items); + c2h::gen(CUB_SEED(1), flags, 0, 1); + const int num_selected = static_cast(thrust::count(c2h::device_policy, flags.begin(), flags.end(), 0)); + + // Needs to be device accessible + c2h::device_vector num_selected_out(1, 0); + int* d_num_selected_out = thrust::raw_pointer_cast(num_selected_out.data()); + + // copy input first + c2h::device_vector reference = in; + + select_flagged_if(in.begin(), flags.begin(), out.begin(), d_num_selected_out, num_items, is_even{}); + + REQUIRE(num_selected == num_selected_out[0]); + REQUIRE(reference == in); +} + +CUB_TEST("DeviceSelect::FlaggedIf is stable", + "[device][select_flagged_if]", + c2h::type_list>) +{ + using type = typename c2h::get<0, TestType>; + + const int num_items = GENERATE_COPY(take(2, random(1, 1000000))); + c2h::device_vector in(num_items); + c2h::device_vector out(num_items); + c2h::gen(CUB_SEED(2), in); + + c2h::device_vector flags(num_items); + c2h::gen(CUB_SEED(1), flags, 0, 1); + const int num_selected = static_cast(thrust::count(c2h::device_policy, flags.begin(), flags.end(), 0)); + const c2h::host_vector reference = get_reference(in, flags, is_even{}); + + // Needs to be device accessible + c2h::device_vector num_selected_out(1, 0); + int* d_num_selected_out = thrust::raw_pointer_cast(num_selected_out.data()); + + select_flagged_if(in.begin(), flags.begin(), out.begin(), d_num_selected_out, num_items, is_even{}); + + out.resize(num_selected_out[0]); + REQUIRE(num_selected == num_selected_out[0]); + REQUIRE(reference == out); +} + +CUB_TEST("DeviceSelect::FlaggedIf works with iterators", "[device][select_flagged_if]", all_types) +{ + using type = typename c2h::get<0, TestType>; + + const int num_items = GENERATE_COPY(take(2, random(1, 1000000))); + c2h::device_vector in(num_items); + c2h::device_vector out(num_items); + c2h::gen(CUB_SEED(2), in); + + c2h::device_vector flags(num_items); + c2h::gen(CUB_SEED(1), flags, 0, 1); + const int num_selected = static_cast(thrust::count(c2h::device_policy, flags.begin(), flags.end(), 0)); + const c2h::host_vector reference = get_reference(in, flags, is_even{}); + + // Needs to be device accessible + c2h::device_vector num_selected_out(1, 0); + int* d_num_selected_out = thrust::raw_pointer_cast(num_selected_out.data()); + + select_flagged_if(in.data(), flags.begin(), out.data(), d_num_selected_out, num_items, is_even{}); + + out.resize(num_selected_out[0]); + REQUIRE(num_selected == num_selected_out[0]); + REQUIRE(reference == out); +} + +CUB_TEST("DeviceSelect::FlaggedIf works with pointers", "[device][select_flagged_if]", types) +{ + using type = typename c2h::get<0, TestType>; + + const int num_items = GENERATE_COPY(take(2, random(1, 1000000))); + c2h::device_vector in(num_items); + c2h::device_vector out(num_items); + c2h::gen(CUB_SEED(2), in); + + c2h::device_vector flags(num_items); + c2h::gen(CUB_SEED(1), flags, 0, 1); + + const int num_selected = static_cast(thrust::count(c2h::device_policy, flags.begin(), flags.end(), 0)); + const c2h::host_vector reference = get_reference(in, flags, is_even{}); + + // Needs to be device accessible + c2h::device_vector num_selected_out(1, 0); + int *d_num_selected_out = thrust::raw_pointer_cast(num_selected_out.data()); + + select_flagged_if(thrust::raw_pointer_cast(in.data()), + thrust::raw_pointer_cast(flags.data()), + thrust::raw_pointer_cast(out.data()), + d_num_selected_out, + num_items, + is_even{}); + + out.resize(num_selected_out[0]); + REQUIRE(num_selected == num_selected_out[0]); + REQUIRE(reference == out); +} From f5b3634dd2e26edf574d9c354b6bd4aabc5c13cc Mon Sep 17 00:00:00 2001 From: gonidelis Date: Wed, 20 Mar 2024 12:40:32 -0700 Subject: [PATCH 3/8] Add unit tests for DeviceSelect::FlaggedIf --- cub/cub/device/device_select.cuh | 41 ----- .../catch2_test_device_select_flagged_if.cu | 167 +++++++++--------- 2 files changed, 80 insertions(+), 128 deletions(-) diff --git a/cub/cub/device/device_select.cuh b/cub/cub/device/device_select.cuh index 9c82cb359b2..a0593beed5a 100644 --- a/cub/cub/device/device_select.cuh +++ b/cub/cub/device/device_select.cuh @@ -736,29 +736,6 @@ struct DeviceSelect stream); } - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t FlaggedIf( - void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - FlagIterator d_flags, - OutputIteratorT d_out, - NumSelectedIteratorT d_num_selected_out, - int num_items, - SelectOp select_op, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return FlaggedIf( - d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items, select_op, stream); - } - template CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t FlaggedIf( void* d_temp_storage, @@ -796,24 +773,6 @@ struct DeviceSelect stream); } - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t FlaggedIf( - void* d_temp_storage, - size_t& temp_storage_bytes, - IteratorT d_data, - FlagIterator d_flags, - NumSelectedIteratorT d_num_selected_out, - int num_items, - SelectOp select_op, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return FlaggedIf( - d_temp_storage, temp_storage_bytes, d_data, d_flags, d_num_selected_out, num_items, select_op, stream); - } - //! @rst //! Given an input sequence ``d_in`` having runs of consecutive equal-valued keys, //! only the first key from each run is selectively copied to ``d_out``. diff --git a/cub/test/catch2_test_device_select_flagged_if.cu b/cub/test/catch2_test_device_select_flagged_if.cu index 570a4e008b9..2778b7dc68c 100644 --- a/cub/test/catch2_test_device_select_flagged_if.cu +++ b/cub/test/catch2_test_device_select_flagged_if.cu @@ -28,28 +28,28 @@ #include #include +#include -// #include "catch2_test_helper.h" +#include "catch2_test_helper.h" #include "catch2_test_launch_helper.h" -#include "thrust/functional.h" template static c2h::host_vector -get_reference(const c2h::device_vector& in, const c2h::device_vector& flags, Pred const& if_predicate) +get_reference(c2h::device_vector const& in, c2h::device_vector const& flags, Pred if_predicate) { struct selector { - const T* ref_begin = nullptr; - const FlagT* flag_begin = nullptr; + T const* ref_begin = nullptr; + FlagT const* flag_begin = nullptr; Pred const& if_pred; - constexpr selector(const T* ref, const FlagT* flag, Pred const& pred) noexcept + constexpr selector(T const* ref, FlagT const* flag, Pred const& pred) noexcept : ref_begin(ref) , flag_begin(flag) , if_pred(pred) {} - bool operator()(const T& val) const + bool operator()(T const& val) const { const auto pos = &val - ref_begin; return static_cast(if_pred(flag_begin[pos])); @@ -70,26 +70,32 @@ DECLARE_LAUNCH_WRAPPER(cub::DeviceSelect::FlaggedIf, select_flagged_if); // %PARAM% TEST_LAUNCH lid 0:1:2 -struct is_even +using custom_t = c2h::custom_type_t; + +template +struct is_even_t { - __host__ __device__ bool operator()(int const& elem) const + __host__ __device__ bool operator()(T const& elem) const { return !(elem % 2); } }; -template -struct less_than_t +template <> +struct is_even_t { - T compare; - - explicit __host__ less_than_t(T compare) - : compare(compare) - {} + __host__ __device__ bool operator()(custom_t elem) const + { + return !(elem.key % 2); + } +}; +struct equal_to_default_t +{ + template __host__ __device__ bool operator()(const T& a) const { - return a < compare; + return a == T{}; } }; @@ -112,18 +118,11 @@ struct always_true_t }; using all_types = - c2h::type_list>; - -using types = c2h:: - type_list>; + c2h::type_list; + +using types = c2h::type_list; + +using flag_types = c2h::type_list; CUB_TEST("DeviceSelect::FlaggedIf can run with empty input", "[device][select_flagged_if]", types) { @@ -183,109 +182,103 @@ CUB_TEST("DeviceSelect::FlaggedIf handles no matched", "[device][select_flagged_ REQUIRE(num_selected_out[0] == 0); } -CUB_TEST("DeviceSelect::FlaggedIf does not change input", "[device][select_flagged_if]", types) +CUB_TEST("DeviceSelect::FlaggedIf does not change input and is stable", + "[device][select_flagged_if]", + c2h::type_list, + flag_types) { - using type = typename c2h::get<0, TestType>; + using input_type = typename c2h::get<0, TestType>; + using flag_type = typename c2h::get<1, TestType>; const int num_items = GENERATE_COPY(take(2, random(1, 1000000))); - c2h::device_vector in(num_items); - c2h::device_vector out(num_items); + c2h::device_vector in(num_items); + c2h::device_vector out(num_items); c2h::gen(CUB_SEED(2), in); - c2h::device_vector flags(num_items); - c2h::gen(CUB_SEED(1), flags, 0, 1); - const int num_selected = static_cast(thrust::count(c2h::device_policy, flags.begin(), flags.end(), 0)); + is_even_t is_even{}; + + c2h::device_vector flags(num_items); + c2h::gen(CUB_SEED(1), flags); + const int num_selected = static_cast(thrust::count_if(c2h::device_policy, flags.begin(), flags.end(), is_even)); + const c2h::host_vector reference_out = get_reference(in, flags, is_even); // Needs to be device accessible c2h::device_vector num_selected_out(1, 0); int* d_num_selected_out = thrust::raw_pointer_cast(num_selected_out.data()); // copy input first - c2h::device_vector reference = in; + c2h::device_vector reference_in = in; - select_flagged_if(in.begin(), flags.begin(), out.begin(), d_num_selected_out, num_items, is_even{}); + select_flagged_if(in.begin(), flags.begin(), out.begin(), d_num_selected_out, num_items, is_even); REQUIRE(num_selected == num_selected_out[0]); - REQUIRE(reference == in); -} + REQUIRE(reference_in == in); -CUB_TEST("DeviceSelect::FlaggedIf is stable", - "[device][select_flagged_if]", - c2h::type_list>) -{ - using type = typename c2h::get<0, TestType>; - - const int num_items = GENERATE_COPY(take(2, random(1, 1000000))); - c2h::device_vector in(num_items); - c2h::device_vector out(num_items); - c2h::gen(CUB_SEED(2), in); - - c2h::device_vector flags(num_items); - c2h::gen(CUB_SEED(1), flags, 0, 1); - const int num_selected = static_cast(thrust::count(c2h::device_policy, flags.begin(), flags.end(), 0)); - const c2h::host_vector reference = get_reference(in, flags, is_even{}); - - // Needs to be device accessible - c2h::device_vector num_selected_out(1, 0); - int* d_num_selected_out = thrust::raw_pointer_cast(num_selected_out.data()); - - select_flagged_if(in.begin(), flags.begin(), out.begin(), d_num_selected_out, num_items, is_even{}); + // Ensure that we did not overwrite other elements + const auto boundary = out.begin() + num_selected_out[0]; + REQUIRE(thrust::all_of(c2h::device_policy, boundary, out.end(), equal_to_default_t{})); out.resize(num_selected_out[0]); - REQUIRE(num_selected == num_selected_out[0]); - REQUIRE(reference == out); + REQUIRE(reference_out == out); } -CUB_TEST("DeviceSelect::FlaggedIf works with iterators", "[device][select_flagged_if]", all_types) +CUB_TEST("DeviceSelect::If works with iterators", "[device][select_if]", all_types, flag_types) { - using type = typename c2h::get<0, TestType>; + using input_type = typename c2h::get<0, TestType>; + using flag_type = typename c2h::get<1, TestType>; const int num_items = GENERATE_COPY(take(2, random(1, 1000000))); - c2h::device_vector in(num_items); - c2h::device_vector out(num_items); + c2h::device_vector in(num_items); + c2h::device_vector out(num_items); c2h::gen(CUB_SEED(2), in); - c2h::device_vector flags(num_items); - c2h::gen(CUB_SEED(1), flags, 0, 1); - const int num_selected = static_cast(thrust::count(c2h::device_policy, flags.begin(), flags.end(), 0)); - const c2h::host_vector reference = get_reference(in, flags, is_even{}); + is_even_t is_even{}; + + c2h::device_vector flags(num_items); + c2h::gen(CUB_SEED(1), flags); + const int num_selected = static_cast(thrust::count_if(c2h::device_policy, flags.begin(), flags.end(), is_even)); + const c2h::host_vector reference = get_reference(in, flags, is_even); // Needs to be device accessible c2h::device_vector num_selected_out(1, 0); - int* d_num_selected_out = thrust::raw_pointer_cast(num_selected_out.data()); + int* d_first_num_selected_out = thrust::raw_pointer_cast(num_selected_out.data()); - select_flagged_if(in.data(), flags.begin(), out.data(), d_num_selected_out, num_items, is_even{}); + select_flagged_if(in.begin(), flags.begin(), out.begin(), d_first_num_selected_out, num_items, is_even); out.resize(num_selected_out[0]); REQUIRE(num_selected == num_selected_out[0]); REQUIRE(reference == out); } -CUB_TEST("DeviceSelect::FlaggedIf works with pointers", "[device][select_flagged_if]", types) +CUB_TEST("DeviceSelect::Flagged works with pointers", "[device][select_flagged]", types, flag_types) { - using type = typename c2h::get<0, TestType>; + using input_type = typename c2h::get<0, TestType>; + using flag_type = typename c2h::get<1, TestType>; const int num_items = GENERATE_COPY(take(2, random(1, 1000000))); - c2h::device_vector in(num_items); - c2h::device_vector out(num_items); + c2h::device_vector in(num_items); + c2h::device_vector out(num_items); c2h::gen(CUB_SEED(2), in); - c2h::device_vector flags(num_items); - c2h::gen(CUB_SEED(1), flags, 0, 1); + is_even_t is_even{}; + + c2h::device_vector flags(num_items); + c2h::gen(CUB_SEED(1), flags); - const int num_selected = static_cast(thrust::count(c2h::device_policy, flags.begin(), flags.end(), 0)); - const c2h::host_vector reference = get_reference(in, flags, is_even{}); + const int num_selected = static_cast(thrust::count_if(c2h::device_policy, flags.begin(), flags.end(), is_even)); + const c2h::host_vector reference = get_reference(in, flags, is_even); // Needs to be device accessible c2h::device_vector num_selected_out(1, 0); int *d_num_selected_out = thrust::raw_pointer_cast(num_selected_out.data()); - select_flagged_if(thrust::raw_pointer_cast(in.data()), - thrust::raw_pointer_cast(flags.data()), - thrust::raw_pointer_cast(out.data()), - d_num_selected_out, - num_items, - is_even{}); + select_flagged_if( + thrust::raw_pointer_cast(in.data()), + thrust::raw_pointer_cast(flags.data()), + thrust::raw_pointer_cast(out.data()), + d_num_selected_out, + num_items, + is_even); out.resize(num_selected_out[0]); REQUIRE(num_selected == num_selected_out[0]); From a585e26774ab493a5547a7faaeaa8bda13254596 Mon Sep 17 00:00:00 2001 From: gonidelis Date: Wed, 20 Mar 2024 13:45:52 -0700 Subject: [PATCH 4/8] Add cub::DeviceSelect::FlaggedIf docs and API example test --- cub/cub/device/device_select.cuh | 72 ++++++++++++++++++ cub/test/catch2_test_device_select_api.cu | 89 +++++++++++++++++++++++ 2 files changed, 161 insertions(+) create mode 100644 cub/test/catch2_test_device_select_api.cu diff --git a/cub/cub/device/device_select.cuh b/cub/cub/device/device_select.cuh index a0593beed5a..0e5329b4e9d 100644 --- a/cub/cub/device/device_select.cuh +++ b/cub/cub/device/device_select.cuh @@ -697,6 +697,78 @@ struct DeviceSelect stream); } +//! @rst + //! Uses the ``select_op`` functor applied to ``d_flag`` to selectively copy the + //! corresponding items from ``d_in`` into ``d_out``. + //! The total number of items selected is written to ``d_num_selected_out``. + //! + //! - The type of ``d_flags`` must conform to the requirements of the input + //! argument of the unary predicate ``select_op``. + //! - The return value of ``select_op(d_flags)`` must be castable to ``bool``. + //! - Copies of the selected items are compacted into ``d_out`` and maintain + //! their original relative ordering. + //! - | The range ``[d_out, d_out + *d_num_selected_out)`` shall not overlap + //! | ``[d_in, d_in + num_items)`` nor ``d_num_selected_out`` in any way. + //! - @devicestorage + //! + //! Snippet + //! +++++++++++++++++++++++++++++++++++++++++++++ + //! + //! The code snippet below illustrates the compaction of items selected from an ``int`` device vector. + //! + //! .. literalinclude:: ../../test/catch2_test_device_select_api.cu + //! :language: c++ + //! :dedent: + //! :start-after: example-begin segmented-select-flaggedif + //! :end-before: example-end segmented-select-flaggedif + //! + //! @endrst + //! + //! @tparam InputIteratorT + //! **[inferred]** Random-access input iterator type for reading input items @iterator + //! + //! @tparam FlagIterator + //! **[inferred]** Random-access input iterator type for reading selection flags @iterator + //! + //! @tparam OutputIteratorT + //! **[inferred]** Random-access output iterator type for writing selected items @iterator + //! + //! @tparam NumSelectedIteratorT + //! **[inferred]** Output iterator type for recording the number of items selected @iterator + //! + //! @tparam SelectOp + //! **[inferred]** Selection operator type having member `bool operator()(const T &a)` + //! + //! @param[in] d_temp_storage + //! Device-accessible allocation of temporary storage. When `nullptr`, the + //! required allocation size is written to `temp_storage_bytes` and no work is done. + //! + //! @param[in,out] temp_storage_bytes + //! Reference to size in bytes of `d_temp_storage` allocation + //! + //! @param[in] d_in + //! Pointer to the input sequence of data items + //! + //! @param[in] d_flags + //! Pointer to the input sequence of selection flags + //! + //! @param[out] d_out + //! Pointer to the output sequence of selected data items + //! + //! @param[out] d_num_selected_out + //! Pointer to the output total number of items selected + //! (i.e., length of `d_out`) + //! + //! @param[in] num_items + //! Total number of input items (i.e., length of `d_in`) + //! + //! @param[in] select_op + //! Unary selection operator + //! + //! @param[in] stream + //! @rst + //! **[optional]** CUDA stream to launch kernels within. Default is stream\ :sub:`0`. + //! @endrst template + +#include +#include + +#include + +#include "catch2_test_helper.h" + +struct is_even_t +{ + __host__ __device__ bool operator()(int const& elem) const + { + return !(elem % 2); + } +}; + +CUB_TEST("cub::DeviceSelect::FlaggedIf works with int data elements", "[select][device]") +{ + // example-begin segmented-select-flaggedif + int num_items = 8; + thrust::device_vector d_in = {0, 1, 2, 3, 4, 5, 6, 7}; + // auto d_offsets_it = thrust::raw_pointer_cast(d_offsets.data()); + thrust::device_vector d_flags = {8, 6, 7, 5, 3, 0, 9, 3}; + thrust::device_vector d_out(num_items); + thrust::device_vector d_num_selected_out(num_items); + is_even_t is_even{}; + + // Determine temporary device storage requirements + void* d_temp_storage = nullptr; + size_t temp_storage_bytes = 0; + cub::DeviceSelect::FlaggedIf( + d_temp_storage, + temp_storage_bytes, + d_in.begin(), + d_flags.begin(), + d_out.begin(), + d_num_selected_out.data(), + num_items, + is_even); + + // Allocate temporary storage + cudaMalloc(&d_temp_storage, temp_storage_bytes); + + // Run selection + cub::DeviceSelect::FlaggedIf( + d_temp_storage, + temp_storage_bytes, + d_in.begin(), + d_flags.begin(), + d_out.begin(), + d_num_selected_out.data(), + num_items, + is_even); + + thrust::device_vector expected{0, 1, 5}; + // example-end segmented-select-flaggedif + + d_out.resize(d_num_selected_out[0]); + REQUIRE(d_out == expected); + REQUIRE(d_num_selected_out[0] == (int) expected.size()); +} From 9ef0f0e3ba88ba6b000c56e2d9e5a0a71dc123dd Mon Sep 17 00:00:00 2001 From: gonidelis Date: Fri, 22 Mar 2024 16:38:45 -0700 Subject: [PATCH 5/8] Resolve review comments, add documentation for in-place overload --- cub/cub/device/device_select.cuh | 114 +++++++++++++++++- cub/test/catch2_test_device_select_api.cu | 9 +- .../catch2_test_device_select_flagged_if.cu | 4 +- 3 files changed, 115 insertions(+), 12 deletions(-) diff --git a/cub/cub/device/device_select.cuh b/cub/cub/device/device_select.cuh index 0e5329b4e9d..0c4e0757584 100644 --- a/cub/cub/device/device_select.cuh +++ b/cub/cub/device/device_select.cuh @@ -697,14 +697,14 @@ struct DeviceSelect stream); } -//! @rst - //! Uses the ``select_op`` functor applied to ``d_flag`` to selectively copy the + //! @rst + //! Uses the ``select_op`` functor applied to ``d_flags`` to selectively copy the //! corresponding items from ``d_in`` into ``d_out``. //! The total number of items selected is written to ``d_num_selected_out``. - //! - //! - The type of ``d_flags`` must conform to the requirements of the input - //! argument of the unary predicate ``select_op``. - //! - The return value of ``select_op(d_flags)`` must be castable to ``bool``. + //! + //! - The expression ``select_op(d_flags)`` must be convertible to ``bool`` for + //! every argument ``flag``, where the type of ``flag`` corresponds to the + //! value type of ``FlagIterator``. //! - Copies of the selected items are compacted into ``d_out`` and maintain //! their original relative ordering. //! - | The range ``[d_out, d_out + *d_num_selected_out)`` shall not overlap @@ -719,6 +719,12 @@ struct DeviceSelect //! .. literalinclude:: ../../test/catch2_test_device_select_api.cu //! :language: c++ //! :dedent: + //! :start-after: example-begin segmented-select-iseven + //! :end-before: example-end segmented-select-iseven + //! + //! .. literalinclude:: ../../test/catch2_test_device_select_api.cu + //! :language: c++ + //! :dedent: //! :start-after: example-begin segmented-select-flaggedif //! :end-before: example-end segmented-select-flaggedif //! @@ -808,6 +814,102 @@ struct DeviceSelect stream); } + //! @rst + //! Uses the ``select_op`` functor applied to ``d_flags`` to selectively compact the + //! corresponding items in ``d_data``. + //! The total number of items selected is written to ``d_num_selected_out``. + //! + //! - The expression ``select_op(d_flags)`` must be convertible to ``bool`` for + //! every argument ``flag``, where the type of ``flag`` corresponds to the + //! value type of ``FlagIterator``. + //! - Copies of the selected items are compacted in-place and maintain their original relative ordering. + //! - | The ``d_data`` may equal ``d_flags``. The range ``[d_data, d_data + num_items)`` shall not overlap + //! | ``[d_flags, d_flags + num_items)`` in any other way. + //! - @devicestorage + //! + //! Snippet + //! +++++++++++++++++++++++++++++++++++++++++++++ + //! + //! The code snippet below illustrates the compaction of items selected from an ``int`` device vector. + //! + //! .. code-block:: c++ + //! + //! #include // or equivalently + //! + //! struct is_even_t + //! { + //! __host__ __device__ bool operator()(int const& elem) const + //! { + //! return !(elem % 2); + //! } + //! }; + //! + //! // Declare, allocate, and initialize device-accessible pointers for input, + //! // flags, and output + //! int num_items; // e.g., 8 + //! int *d_data; // e.g., [0, 1, 2, 3, 4, 5, 6, 7] + //! char *d_flags; // e.g., [8, 6, 7, 5, 3, 0, 9, 3] + //! int *d_num_selected_out; // e.g., [ ] + //! ... + //! + //! // Determine temporary device storage requirements + //! void *d_temp_storage = NULL; + //! size_t temp_storage_bytes = 0; + //! cub::DeviceSelect::FlaggedIf( + //! d_temp_storage, temp_storage_bytes, + //! d_in, d_flags, d_num_selected_out, num_items, is_even); + //! + //! // Allocate temporary storage + //! cudaMalloc(&d_temp_storage, temp_storage_bytes); + //! + //! // Run selection + //! cub::DeviceSelect::Flagged( + //! d_temp_storage, temp_storage_bytes, + //! d_in, d_flags, d_num_selected_out, num_items, is_even); + //! + //! // d_data <-- [0, 1, 5] + //! // d_num_selected_out <-- [3] + //! + //! @endrst + //! + //! @tparam IteratorT + //! **[inferred]** Random-access iterator type for reading and writing selected items @iterator + //! + //! @tparam FlagIterator + //! **[inferred]** Random-access input iterator type for reading selection flags @iterator + //! + //! @tparam NumSelectedIteratorT + //! **[inferred]** Output iterator type for recording the number of items selected @iterator + //! + //! @tparam SelectOp + //! **[inferred]** Selection operator type having member `bool operator()(const T &a)` + //! + //! @param[in] d_temp_storage + //! Device-accessible allocation of temporary storage. When `nullptr`, the + //! required allocation size is written to `temp_storage_bytes` and no work is done. + //! + //! @param[in,out] temp_storage_bytes + //! Reference to size in bytes of `d_temp_storage` allocation + //! + //! @param[in,out] d_data + //! Pointer to the sequence of data items + //! + //! @param[in] d_flags + //! Pointer to the input sequence of selection flags + //! + //! @param[out] d_num_selected_out + //! Pointer to the output total number of items selected + //! + //! @param[in] num_items + //! Total number of input items (i.e., length of `d_data`) + //! + //! @param[in] select_op + //! Unary selection operator + //! + //! @param[in] stream + //! @rst + //! **[optional]** CUDA stream to launch kernels within. Default is stream\ :sub:`0`. + //! @endrst template CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t FlaggedIf( void* d_temp_storage, diff --git a/cub/test/catch2_test_device_select_api.cu b/cub/test/catch2_test_device_select_api.cu index c8ce6b3ef55..badec794146 100644 --- a/cub/test/catch2_test_device_select_api.cu +++ b/cub/test/catch2_test_device_select_api.cu @@ -34,6 +34,7 @@ #include "catch2_test_helper.h" +// example-begin segmented-select-iseven struct is_even_t { __host__ __device__ bool operator()(int const& elem) const @@ -41,13 +42,13 @@ struct is_even_t return !(elem % 2); } }; +// example-end segmented-select-iseven CUB_TEST("cub::DeviceSelect::FlaggedIf works with int data elements", "[select][device]") { // example-begin segmented-select-flaggedif - int num_items = 8; - thrust::device_vector d_in = {0, 1, 2, 3, 4, 5, 6, 7}; - // auto d_offsets_it = thrust::raw_pointer_cast(d_offsets.data()); + constexpr int num_items = 8; + thrust::device_vector d_in = {0, 1, 2, 3, 4, 5, 6, 7}; thrust::device_vector d_flags = {8, 6, 7, 5, 3, 0, 9, 3}; thrust::device_vector d_out(num_items); thrust::device_vector d_num_selected_out(num_items); @@ -83,7 +84,7 @@ CUB_TEST("cub::DeviceSelect::FlaggedIf works with int data elements", "[select][ thrust::device_vector expected{0, 1, 5}; // example-end segmented-select-flaggedif + REQUIRE(d_num_selected_out[0] == static_cast(expected.size())); d_out.resize(d_num_selected_out[0]); REQUIRE(d_out == expected); - REQUIRE(d_num_selected_out[0] == (int) expected.size()); } diff --git a/cub/test/catch2_test_device_select_flagged_if.cu b/cub/test/catch2_test_device_select_flagged_if.cu index 2778b7dc68c..546fa07b8e3 100644 --- a/cub/test/catch2_test_device_select_flagged_if.cu +++ b/cub/test/catch2_test_device_select_flagged_if.cu @@ -222,7 +222,7 @@ CUB_TEST("DeviceSelect::FlaggedIf does not change input and is stable", REQUIRE(reference_out == out); } -CUB_TEST("DeviceSelect::If works with iterators", "[device][select_if]", all_types, flag_types) +CUB_TEST("DeviceSelect::FlaggedIf works with iterators", "[device][select_if]", all_types, flag_types) { using input_type = typename c2h::get<0, TestType>; using flag_type = typename c2h::get<1, TestType>; @@ -250,7 +250,7 @@ CUB_TEST("DeviceSelect::If works with iterators", "[device][select_if]", all_typ REQUIRE(reference == out); } -CUB_TEST("DeviceSelect::Flagged works with pointers", "[device][select_flagged]", types, flag_types) +CUB_TEST("DeviceSelect::FlaggedIf works with pointers", "[device][select_flagged]", types, flag_types) { using input_type = typename c2h::get<0, TestType>; using flag_type = typename c2h::get<1, TestType>; From 5e39acceaea21c4aa53aaa4be033e540b0b320db Mon Sep 17 00:00:00 2001 From: gonidelis Date: Mon, 25 Mar 2024 14:39:45 -0700 Subject: [PATCH 6/8] Fix get_reference so that it doesn't use a ref to the value --- cub/cub/device/device_select.cuh | 10 ++-- cub/test/catch2_test_device_select_api.cu | 4 +- .../catch2_test_device_select_flagged_if.cu | 51 +++++++++---------- 3 files changed, 30 insertions(+), 35 deletions(-) diff --git a/cub/cub/device/device_select.cuh b/cub/cub/device/device_select.cuh index 0c4e0757584..2a39e5023f7 100644 --- a/cub/cub/device/device_select.cuh +++ b/cub/cub/device/device_select.cuh @@ -702,9 +702,8 @@ struct DeviceSelect //! corresponding items from ``d_in`` into ``d_out``. //! The total number of items selected is written to ``d_num_selected_out``. //! - //! - The expression ``select_op(d_flags)`` must be convertible to ``bool`` for - //! every argument ``flag``, where the type of ``flag`` corresponds to the - //! value type of ``FlagIterator``. + //! - The expression ``select_op(flag)`` must be convertible to ``bool``, + //! where the type of ``flag`` corresponds to the value type of ``FlagIterator``. //! - Copies of the selected items are compacted into ``d_out`` and maintain //! their original relative ordering. //! - | The range ``[d_out, d_out + *d_num_selected_out)`` shall not overlap @@ -819,9 +818,8 @@ struct DeviceSelect //! corresponding items in ``d_data``. //! The total number of items selected is written to ``d_num_selected_out``. //! - //! - The expression ``select_op(d_flags)`` must be convertible to ``bool`` for - //! every argument ``flag``, where the type of ``flag`` corresponds to the - //! value type of ``FlagIterator``. + //! - The expression ``select_op(flag)`` must be convertible to ``bool``, + //! where the type of ``flag`` corresponds to the value type of ``FlagIterator``. //! - Copies of the selected items are compacted in-place and maintain their original relative ordering. //! - | The ``d_data`` may equal ``d_flags``. The range ``[d_data, d_data + num_items)`` shall not overlap //! | ``[d_flags, d_flags + num_items)`` in any other way. diff --git a/cub/test/catch2_test_device_select_api.cu b/cub/test/catch2_test_device_select_api.cu index badec794146..7910d11f59f 100644 --- a/cub/test/catch2_test_device_select_api.cu +++ b/cub/test/catch2_test_device_select_api.cu @@ -37,9 +37,9 @@ // example-begin segmented-select-iseven struct is_even_t { - __host__ __device__ bool operator()(int const& elem) const + __host__ __device__ bool operator()(int flag) const { - return !(elem % 2); + return !(flag % 2); } }; // example-end segmented-select-iseven diff --git a/cub/test/catch2_test_device_select_flagged_if.cu b/cub/test/catch2_test_device_select_flagged_if.cu index 546fa07b8e3..8f00ecd0eb2 100644 --- a/cub/test/catch2_test_device_select_flagged_if.cu +++ b/cub/test/catch2_test_device_select_flagged_if.cu @@ -27,42 +27,39 @@ #include -#include +#include #include #include "catch2_test_helper.h" #include "catch2_test_launch_helper.h" +template +struct predicate_op_wrapper_t +{ + PredOpT if_pred; + template + __host__ __device__ bool operator()(thrust::tuple tuple) const + { + const auto flag = thrust::get<0>(tuple); + return static_cast(if_pred(flag)); + } +}; + template static c2h::host_vector get_reference(c2h::device_vector const& in, c2h::device_vector const& flags, Pred if_predicate) { - struct selector - { - T const* ref_begin = nullptr; - FlagT const* flag_begin = nullptr; - Pred const& if_pred; - - constexpr selector(T const* ref, FlagT const* flag, Pred const& pred) noexcept - : ref_begin(ref) - , flag_begin(flag) - , if_pred(pred) - {} - - bool operator()(T const& val) const - { - const auto pos = &val - ref_begin; - return static_cast(if_pred(flag_begin[pos])); - } - }; - c2h::host_vector reference = in; c2h::host_vector h_flags = flags; + // Zips flags and items + auto zipped_in_it = thrust::make_zip_iterator(h_flags.cbegin(), reference.cbegin()); + + // Discards the flags part and only keeps the items + auto zipped_out_it = thrust::make_zip_iterator(thrust::make_discard_iterator(), reference.begin()); - const selector pred{ - thrust::raw_pointer_cast(reference.data()), thrust::raw_pointer_cast(h_flags.data()), if_predicate}; - const auto boundary = std::stable_partition(reference.begin(), reference.end(), pred); - reference.erase(boundary, reference.end()); + auto end = + std::copy_if(zipped_in_it, zipped_in_it + in.size(), zipped_out_it, predicate_op_wrapper_t{if_predicate}); + reference.resize(thrust::distance(zipped_out_it, end)); return reference; } @@ -199,8 +196,8 @@ CUB_TEST("DeviceSelect::FlaggedIf does not change input and is stable", c2h::device_vector flags(num_items); c2h::gen(CUB_SEED(1), flags); - const int num_selected = static_cast(thrust::count_if(c2h::device_policy, flags.begin(), flags.end(), is_even)); const c2h::host_vector reference_out = get_reference(in, flags, is_even); + const int num_selected = reference_out.size(); // Needs to be device accessible c2h::device_vector num_selected_out(1, 0); @@ -236,8 +233,8 @@ CUB_TEST("DeviceSelect::FlaggedIf works with iterators", "[device][select_if]", c2h::device_vector flags(num_items); c2h::gen(CUB_SEED(1), flags); - const int num_selected = static_cast(thrust::count_if(c2h::device_policy, flags.begin(), flags.end(), is_even)); const c2h::host_vector reference = get_reference(in, flags, is_even); + const int num_selected = reference.size(); // Needs to be device accessible c2h::device_vector num_selected_out(1, 0); @@ -265,8 +262,8 @@ CUB_TEST("DeviceSelect::FlaggedIf works with pointers", "[device][select_flagged c2h::device_vector flags(num_items); c2h::gen(CUB_SEED(1), flags); - const int num_selected = static_cast(thrust::count_if(c2h::device_policy, flags.begin(), flags.end(), is_even)); const c2h::host_vector reference = get_reference(in, flags, is_even); + const int num_selected = reference.size(); // Needs to be device accessible c2h::device_vector num_selected_out(1, 0); From 69f03e51042de8d98c8f623ff7df05180d59d210 Mon Sep 17 00:00:00 2001 From: gonidelis Date: Mon, 25 Mar 2024 14:48:50 -0700 Subject: [PATCH 7/8] Add deviceSelect flagged_if API test example --- cub/cub/device/device_select.cuh | 46 ++++--------------- cub/test/catch2_test_device_select_api.cu | 30 ++++++++++++ .../catch2_test_device_select_flagged_if.cu | 6 +-- 3 files changed, 43 insertions(+), 39 deletions(-) diff --git a/cub/cub/device/device_select.cuh b/cub/cub/device/device_select.cuh index 2a39e5023f7..dc8539f200b 100644 --- a/cub/cub/device/device_select.cuh +++ b/cub/cub/device/device_select.cuh @@ -830,43 +830,17 @@ struct DeviceSelect //! //! The code snippet below illustrates the compaction of items selected from an ``int`` device vector. //! - //! .. code-block:: c++ - //! - //! #include // or equivalently - //! - //! struct is_even_t - //! { - //! __host__ __device__ bool operator()(int const& elem) const - //! { - //! return !(elem % 2); - //! } - //! }; - //! - //! // Declare, allocate, and initialize device-accessible pointers for input, - //! // flags, and output - //! int num_items; // e.g., 8 - //! int *d_data; // e.g., [0, 1, 2, 3, 4, 5, 6, 7] - //! char *d_flags; // e.g., [8, 6, 7, 5, 3, 0, 9, 3] - //! int *d_num_selected_out; // e.g., [ ] - //! ... - //! - //! // Determine temporary device storage requirements - //! void *d_temp_storage = NULL; - //! size_t temp_storage_bytes = 0; - //! cub::DeviceSelect::FlaggedIf( - //! d_temp_storage, temp_storage_bytes, - //! d_in, d_flags, d_num_selected_out, num_items, is_even); - //! - //! // Allocate temporary storage - //! cudaMalloc(&d_temp_storage, temp_storage_bytes); - //! - //! // Run selection - //! cub::DeviceSelect::Flagged( - //! d_temp_storage, temp_storage_bytes, - //! d_in, d_flags, d_num_selected_out, num_items, is_even); + //! .. literalinclude:: ../../test/catch2_test_device_select_api.cu + //! :language: c++ + //! :dedent: + //! :start-after: example-begin segmented-select-iseven + //! :end-before: example-end segmented-select-iseven //! - //! // d_data <-- [0, 1, 5] - //! // d_num_selected_out <-- [3] + //! .. literalinclude:: ../../test/catch2_test_device_select_api.cu + //! :language: c++ + //! :dedent: + //! :start-after: example-begin segmented-select-flaggedif-inplace + //! :end-before: example-end segmented-select-flaggedif-inplace //! //! @endrst //! diff --git a/cub/test/catch2_test_device_select_api.cu b/cub/test/catch2_test_device_select_api.cu index 7910d11f59f..affdca8b2fb 100644 --- a/cub/test/catch2_test_device_select_api.cu +++ b/cub/test/catch2_test_device_select_api.cu @@ -88,3 +88,33 @@ CUB_TEST("cub::DeviceSelect::FlaggedIf works with int data elements", "[select][ d_out.resize(d_num_selected_out[0]); REQUIRE(d_out == expected); } + +CUB_TEST("cub::DeviceSelect::FlaggedIf in-place works with int data elements", "[select][device]") +{ + // example-begin segmented-select-flaggedif-inplace + constexpr int num_items = 8; + thrust::device_vector d_data = {0, 1, 2, 3, 4, 5, 6, 7}; + thrust::device_vector d_flags = {8, 6, 7, 5, 3, 0, 9, 3}; + thrust::device_vector d_num_selected_out(num_items); + is_even_t is_even{}; + + // Determine temporary device storage requirements + void* d_temp_storage = nullptr; + size_t temp_storage_bytes = 0; + cub::DeviceSelect::FlaggedIf( + d_temp_storage, temp_storage_bytes, d_data.begin(), d_flags.begin(), d_num_selected_out.data(), num_items, is_even); + + // Allocate temporary storage + cudaMalloc(&d_temp_storage, temp_storage_bytes); + + // Run selection + cub::DeviceSelect::FlaggedIf( + d_temp_storage, temp_storage_bytes, d_data.begin(), d_flags.begin(), d_num_selected_out.data(), num_items, is_even); + + thrust::device_vector expected{0, 1, 5}; + // example-end segmented-select-flaggedif-inplace + + REQUIRE(d_num_selected_out[0] == static_cast(expected.size())); + d_data.resize(d_num_selected_out[0]); + REQUIRE(d_data == expected); +} diff --git a/cub/test/catch2_test_device_select_flagged_if.cu b/cub/test/catch2_test_device_select_flagged_if.cu index 8f00ecd0eb2..dac944e197d 100644 --- a/cub/test/catch2_test_device_select_flagged_if.cu +++ b/cub/test/catch2_test_device_select_flagged_if.cu @@ -197,7 +197,7 @@ CUB_TEST("DeviceSelect::FlaggedIf does not change input and is stable", c2h::device_vector flags(num_items); c2h::gen(CUB_SEED(1), flags); const c2h::host_vector reference_out = get_reference(in, flags, is_even); - const int num_selected = reference_out.size(); + const std::size_t num_selected = reference_out.size(); // Needs to be device accessible c2h::device_vector num_selected_out(1, 0); @@ -234,7 +234,7 @@ CUB_TEST("DeviceSelect::FlaggedIf works with iterators", "[device][select_if]", c2h::device_vector flags(num_items); c2h::gen(CUB_SEED(1), flags); const c2h::host_vector reference = get_reference(in, flags, is_even); - const int num_selected = reference.size(); + const std::size_t num_selected = reference.size(); // Needs to be device accessible c2h::device_vector num_selected_out(1, 0); @@ -263,7 +263,7 @@ CUB_TEST("DeviceSelect::FlaggedIf works with pointers", "[device][select_flagged c2h::gen(CUB_SEED(1), flags); const c2h::host_vector reference = get_reference(in, flags, is_even); - const int num_selected = reference.size(); + const std::size_t num_selected = reference.size(); // Needs to be device accessible c2h::device_vector num_selected_out(1, 0); From 62e5bcaedd27d0ae6cdf3a6c3e9cb526b8a6ea7c Mon Sep 17 00:00:00 2001 From: gonidelis Date: Tue, 26 Mar 2024 13:47:49 -0700 Subject: [PATCH 8/8] Consolidate num_selected value types --- cub/test/catch2_test_device_select_flagged_if.cu | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/cub/test/catch2_test_device_select_flagged_if.cu b/cub/test/catch2_test_device_select_flagged_if.cu index dac944e197d..fc0494eda76 100644 --- a/cub/test/catch2_test_device_select_flagged_if.cu +++ b/cub/test/catch2_test_device_select_flagged_if.cu @@ -28,8 +28,11 @@ #include #include +#include #include +#include + #include "catch2_test_helper.h" #include "catch2_test_launch_helper.h" @@ -197,7 +200,7 @@ CUB_TEST("DeviceSelect::FlaggedIf does not change input and is stable", c2h::device_vector flags(num_items); c2h::gen(CUB_SEED(1), flags); const c2h::host_vector reference_out = get_reference(in, flags, is_even); - const std::size_t num_selected = reference_out.size(); + const int num_selected = static_cast(reference_out.size()); // Needs to be device accessible c2h::device_vector num_selected_out(1, 0); @@ -234,7 +237,7 @@ CUB_TEST("DeviceSelect::FlaggedIf works with iterators", "[device][select_if]", c2h::device_vector flags(num_items); c2h::gen(CUB_SEED(1), flags); const c2h::host_vector reference = get_reference(in, flags, is_even); - const std::size_t num_selected = reference.size(); + const int num_selected = static_cast(reference.size()); // Needs to be device accessible c2h::device_vector num_selected_out(1, 0); @@ -263,7 +266,7 @@ CUB_TEST("DeviceSelect::FlaggedIf works with pointers", "[device][select_flagged c2h::gen(CUB_SEED(1), flags); const c2h::host_vector reference = get_reference(in, flags, is_even); - const std::size_t num_selected = reference.size(); + const int num_selected = static_cast(reference.size()); // Needs to be device accessible c2h::device_vector num_selected_out(1, 0);