Skip to content

Commit

Permalink
Resolve review comments, add documentation for in-place overload
Browse files Browse the repository at this point in the history
  • Loading branch information
gonidelis committed Mar 22, 2024
1 parent fcaf15b commit 47f3a50
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 12 deletions.
114 changes: 108 additions & 6 deletions cub/cub/device/device_select.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -697,14 +697,14 @@ struct DeviceSelect
stream);
}

//! @rst
//! Uses the ``select_op`` functor applied to ``d_flag`` to selectively copy the
//! @rst
//! Uses the ``select_op`` functor applied to ``d_flags`` to selectively copy the
//! corresponding items from ``d_in`` into ``d_out``.
//! The total number of items selected is written to ``d_num_selected_out``.
//!
//! - The type of ``d_flags`` must conform to the requirements of the input
//! argument of the unary predicate ``select_op``.
//! - The return value of ``select_op(d_flags)`` must be castable to ``bool``.
//!
//! - The expression ``select_op(d_flags)`` must be convertible to ``bool`` for
//! every argument ``flag``, where the type of ``flag`` corresponds to the
//! value type of ``FlagIterator``.
//! - Copies of the selected items are compacted into ``d_out`` and maintain
//! their original relative ordering.
//! - | The range ``[d_out, d_out + *d_num_selected_out)`` shall not overlap
Expand All @@ -719,6 +719,12 @@ struct DeviceSelect
//! .. literalinclude:: ../../test/catch2_test_device_select_api.cu
//! :language: c++
//! :dedent:
//! :start-after: example-begin segmented-select-iseven
//! :end-before: example-end segmented-select-iseven
//!
//! .. literalinclude:: ../../test/catch2_test_device_select_api.cu
//! :language: c++
//! :dedent:
//! :start-after: example-begin segmented-select-flaggedif
//! :end-before: example-end segmented-select-flaggedif
//!
Expand Down Expand Up @@ -808,6 +814,102 @@ struct DeviceSelect
stream);
}

//! @rst
//! Uses the ``select_op`` functor applied to ``d_flags`` to selectively compact the
//! corresponding items in ``d_data``.
//! The total number of items selected is written to ``d_num_selected_out``.
//!
//! - The expression ``select_op(d_flags)`` must be convertible to ``bool`` for
//! every argument ``flag``, where the type of ``flag`` corresponds to the
//! value type of ``FlagIterator``.
//! - Copies of the selected items are compacted in-place and maintain their original relative ordering.
//! - | The ``d_data`` may equal ``d_flags``. The range ``[d_data, d_data + num_items)`` shall not overlap
//! | ``[d_flags, d_flags + num_items)`` in any other way.
//! - @devicestorage
//!
//! Snippet
//! +++++++++++++++++++++++++++++++++++++++++++++
//!
//! The code snippet below illustrates the compaction of items selected from an ``int`` device vector.
//!
//! .. code-block:: c++
//!
//! #include <cub/cub.cuh> // or equivalently <cub/device/device_select.cuh>
//!
//! struct is_even_t
//! {
//! __host__ __device__ bool operator()(int const& elem) const
//! {
//! return !(elem % 2);
//! }
//! };
//!
//! // Declare, allocate, and initialize device-accessible pointers for input,
//! // flags, and output
//! int num_items; // e.g., 8
//! int *d_data; // e.g., [0, 1, 2, 3, 4, 5, 6, 7]
//! char *d_flags; // e.g., [8, 6, 7, 5, 3, 0, 9, 3]
//! int *d_num_selected_out; // e.g., [ ]
//! ...
//!
//! // Determine temporary device storage requirements
//! void *d_temp_storage = NULL;
//! size_t temp_storage_bytes = 0;
//! cub::DeviceSelect::FlaggedIf(
//! d_temp_storage, temp_storage_bytes,
//! d_in, d_flags, d_num_selected_out, num_items, is_even);
//!
//! // Allocate temporary storage
//! cudaMalloc(&d_temp_storage, temp_storage_bytes);
//!
//! // Run selection
//! cub::DeviceSelect::Flagged(
//! d_temp_storage, temp_storage_bytes,
//! d_in, d_flags, d_num_selected_out, num_items, is_even);
//!
//! // d_data <-- [0, 1, 5]
//! // d_num_selected_out <-- [3]
//!
//! @endrst
//!
//! @tparam IteratorT
//! **[inferred]** Random-access iterator type for reading and writing selected items @iterator
//!
//! @tparam FlagIterator
//! **[inferred]** Random-access input iterator type for reading selection flags @iterator
//!
//! @tparam NumSelectedIteratorT
//! **[inferred]** Output iterator type for recording the number of items selected @iterator
//!
//! @tparam SelectOp
//! **[inferred]** Selection operator type having member `bool operator()(const T &a)`
//!
//! @param[in] d_temp_storage
//! Device-accessible allocation of temporary storage. When `nullptr`, the
//! required allocation size is written to `temp_storage_bytes` and no work is done.
//!
//! @param[in,out] temp_storage_bytes
//! Reference to size in bytes of `d_temp_storage` allocation
//!
//! @param[in,out] d_data
//! Pointer to the sequence of data items
//!
//! @param[in] d_flags
//! Pointer to the input sequence of selection flags
//!
//! @param[out] d_num_selected_out
//! Pointer to the output total number of items selected
//!
//! @param[in] num_items
//! Total number of input items (i.e., length of `d_data`)
//!
//! @param[in] select_op
//! Unary selection operator
//!
//! @param[in] stream
//! @rst
//! **[optional]** CUDA stream to launch kernels within. Default is stream\ :sub:`0`.
//! @endrst
template <typename IteratorT, typename FlagIterator, typename NumSelectedIteratorT, typename SelectOp>
CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t FlaggedIf(
void* d_temp_storage,
Expand Down
9 changes: 5 additions & 4 deletions cub/test/catch2_test_device_select_api.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,20 +34,21 @@

#include "catch2_test_helper.h"

// example-begin segmented-select-iseven
struct is_even_t
{
__host__ __device__ bool operator()(int const& elem) const
{
return !(elem % 2);
}
};
// example-end segmented-select-iseven

CUB_TEST("cub::DeviceSelect::FlaggedIf works with int data elements", "[select][device]")
{
// example-begin segmented-select-flaggedif
int num_items = 8;
thrust::device_vector<int> d_in = {0, 1, 2, 3, 4, 5, 6, 7};
// auto d_offsets_it = thrust::raw_pointer_cast(d_offsets.data());
constexpr int num_items = 8;
thrust::device_vector<int> d_in = {0, 1, 2, 3, 4, 5, 6, 7};
thrust::device_vector<int> d_flags = {8, 6, 7, 5, 3, 0, 9, 3};
thrust::device_vector<int> d_out(num_items);
thrust::device_vector<int> d_num_selected_out(num_items);
Expand Down Expand Up @@ -83,7 +84,7 @@ CUB_TEST("cub::DeviceSelect::FlaggedIf works with int data elements", "[select][
thrust::device_vector<int> expected{0, 1, 5};
// example-end segmented-select-flaggedif

REQUIRE(d_num_selected_out[0] == static_cast<int>(expected.size()));
d_out.resize(d_num_selected_out[0]);
REQUIRE(d_out == expected);
REQUIRE(d_num_selected_out[0] == (int) expected.size());
}
4 changes: 2 additions & 2 deletions cub/test/catch2_test_device_select_flagged_if.cu
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ CUB_TEST("DeviceSelect::FlaggedIf does not change input and is stable",
REQUIRE(reference_out == out);
}

CUB_TEST("DeviceSelect::If works with iterators", "[device][select_if]", all_types, flag_types)
CUB_TEST("DeviceSelect::FlaggedIf works with iterators", "[device][select_if]", all_types, flag_types)
{
using input_type = typename c2h::get<0, TestType>;
using flag_type = typename c2h::get<1, TestType>;
Expand Down Expand Up @@ -250,7 +250,7 @@ CUB_TEST("DeviceSelect::If works with iterators", "[device][select_if]", all_typ
REQUIRE(reference == out);
}

CUB_TEST("DeviceSelect::Flagged works with pointers", "[device][select_flagged]", types, flag_types)
CUB_TEST("DeviceSelect::FlaggedIf works with pointers", "[device][select_flagged]", types, flag_types)
{
using input_type = typename c2h::get<0, TestType>;
using flag_type = typename c2h::get<1, TestType>;
Expand Down

0 comments on commit 47f3a50

Please sign in to comment.