Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add the DeviceSelect::FlaggedIf algorithm #1533

Merged
merged 8 commits into from
Mar 27, 2024
222 changes: 222 additions & 0 deletions cub/cub/device/device_select.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -697,6 +697,228 @@ struct DeviceSelect
stream);
}

//! @rst
//! Uses the ``select_op`` functor applied to ``d_flags`` to selectively copy the
//! corresponding items from ``d_in`` into ``d_out``.
//! The total number of items selected is written to ``d_num_selected_out``.
//!
//! - The expression ``select_op(flag)`` must be convertible to ``bool``,
//! where the type of ``flag`` corresponds to the value type of ``FlagIterator``.
//! - Copies of the selected items are compacted into ``d_out`` and maintain
//! their original relative ordering.
//! - | The range ``[d_out, d_out + *d_num_selected_out)`` shall not overlap
//! | ``[d_in, d_in + num_items)`` nor ``d_num_selected_out`` in any way.
//! - @devicestorage
//!
//! Snippet
//! +++++++++++++++++++++++++++++++++++++++++++++
//!
//! The code snippet below illustrates the compaction of items selected from an ``int`` device vector.
//!
//! .. literalinclude:: ../../test/catch2_test_device_select_api.cu
//! :language: c++
//! :dedent:
//! :start-after: example-begin segmented-select-iseven
//! :end-before: example-end segmented-select-iseven
//!
//! .. literalinclude:: ../../test/catch2_test_device_select_api.cu
//! :language: c++
//! :dedent:
//! :start-after: example-begin segmented-select-flaggedif
//! :end-before: example-end segmented-select-flaggedif
//!
//! @endrst
//!
//! @tparam InputIteratorT
//! **[inferred]** Random-access input iterator type for reading input items @iterator
//!
//! @tparam FlagIterator
gonidelis marked this conversation as resolved.
Show resolved Hide resolved
//! **[inferred]** Random-access input iterator type for reading selection flags @iterator
//!
//! @tparam OutputIteratorT
//! **[inferred]** Random-access output iterator type for writing selected items @iterator
//!
//! @tparam NumSelectedIteratorT
//! **[inferred]** Output iterator type for recording the number of items selected @iterator
//!
//! @tparam SelectOp
//! **[inferred]** Selection operator type having member `bool operator()(const T &a)`
//!
//! @param[in] d_temp_storage
//! Device-accessible allocation of temporary storage. When `nullptr`, the
//! required allocation size is written to `temp_storage_bytes` and no work is done.
//!
//! @param[in,out] temp_storage_bytes
//! Reference to size in bytes of `d_temp_storage` allocation
//!
//! @param[in] d_in
//! Pointer to the input sequence of data items
//!
//! @param[in] d_flags
//! Pointer to the input sequence of selection flags
//!
//! @param[out] d_out
//! Pointer to the output sequence of selected data items
//!
//! @param[out] d_num_selected_out
//! Pointer to the output total number of items selected
//! (i.e., length of `d_out`)
//!
//! @param[in] num_items
//! Total number of input items (i.e., length of `d_in`)
//!
//! @param[in] select_op
//! Unary selection operator
//!
//! @param[in] stream
//! @rst
//! **[optional]** CUDA stream to launch kernels within. Default is stream\ :sub:`0`.
//! @endrst
template <typename InputIteratorT,
typename FlagIterator,
typename OutputIteratorT,
typename NumSelectedIteratorT,
typename SelectOp>
CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t FlaggedIf(
void* d_temp_storage,
size_t& temp_storage_bytes,
InputIteratorT d_in,
FlagIterator d_flags,
OutputIteratorT d_out,
NumSelectedIteratorT d_num_selected_out,
int num_items,
SelectOp select_op,
cudaStream_t stream = 0)
{
using OffsetT = int; // Signed integer type for global offsets
using EqualityOp = NullType; // Equality operator (not used)

return DispatchSelectIf<
InputIteratorT,
FlagIterator,
OutputIteratorT,
NumSelectedIteratorT,
SelectOp,
EqualityOp,
OffsetT,
false>::Dispatch(d_temp_storage,
temp_storage_bytes,
d_in,
d_flags,
d_out,
d_num_selected_out,
select_op,
EqualityOp(),
num_items,
stream);
}

//! @rst
//! Uses the ``select_op`` functor applied to ``d_flags`` to selectively compact the
//! corresponding items in ``d_data``.
//! The total number of items selected is written to ``d_num_selected_out``.
//!
//! - The expression ``select_op(flag)`` must be convertible to ``bool``,
//! where the type of ``flag`` corresponds to the value type of ``FlagIterator``.
//! - Copies of the selected items are compacted in-place and maintain their original relative ordering.
//! - | The ``d_data`` may equal ``d_flags``. The range ``[d_data, d_data + num_items)`` shall not overlap
//! | ``[d_flags, d_flags + num_items)`` in any other way.
//! - @devicestorage
//!
//! Snippet
//! +++++++++++++++++++++++++++++++++++++++++++++
//!
//! The code snippet below illustrates the compaction of items selected from an ``int`` device vector.
//!
//! .. literalinclude:: ../../test/catch2_test_device_select_api.cu
//! :language: c++
//! :dedent:
//! :start-after: example-begin segmented-select-iseven
//! :end-before: example-end segmented-select-iseven
//!
//! .. literalinclude:: ../../test/catch2_test_device_select_api.cu
//! :language: c++
//! :dedent:
//! :start-after: example-begin segmented-select-flaggedif-inplace
//! :end-before: example-end segmented-select-flaggedif-inplace
//!
//! @endrst
//!
//! @tparam IteratorT
//! **[inferred]** Random-access iterator type for reading and writing selected items @iterator
//!
//! @tparam FlagIterator
//! **[inferred]** Random-access input iterator type for reading selection flags @iterator
//!
//! @tparam NumSelectedIteratorT
//! **[inferred]** Output iterator type for recording the number of items selected @iterator
//!
//! @tparam SelectOp
//! **[inferred]** Selection operator type having member `bool operator()(const T &a)`
//!
//! @param[in] d_temp_storage
//! Device-accessible allocation of temporary storage. When `nullptr`, the
//! required allocation size is written to `temp_storage_bytes` and no work is done.
//!
//! @param[in,out] temp_storage_bytes
//! Reference to size in bytes of `d_temp_storage` allocation
//!
//! @param[in,out] d_data
//! Pointer to the sequence of data items
//!
//! @param[in] d_flags
//! Pointer to the input sequence of selection flags
//!
//! @param[out] d_num_selected_out
//! Pointer to the output total number of items selected
//!
//! @param[in] num_items
//! Total number of input items (i.e., length of `d_data`)
//!
//! @param[in] select_op
//! Unary selection operator
//!
//! @param[in] stream
//! @rst
//! **[optional]** CUDA stream to launch kernels within. Default is stream\ :sub:`0`.
//! @endrst
template <typename IteratorT, typename FlagIterator, typename NumSelectedIteratorT, typename SelectOp>
gonidelis marked this conversation as resolved.
Show resolved Hide resolved
CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t FlaggedIf(
void* d_temp_storage,
size_t& temp_storage_bytes,
IteratorT d_data,
FlagIterator d_flags,
NumSelectedIteratorT d_num_selected_out,
int num_items,
SelectOp select_op,
cudaStream_t stream = 0)
{
using OffsetT = int; // Signed integer type for global offsets
using EqualityOp = NullType; // Equality operator (not used)

constexpr bool may_alias = true;

return DispatchSelectIf<
IteratorT,
FlagIterator,
IteratorT,
NumSelectedIteratorT,
SelectOp,
EqualityOp,
OffsetT,
false,
may_alias>::Dispatch(d_temp_storage,
temp_storage_bytes,
d_data, // in
d_flags,
d_data, // out
d_num_selected_out,
select_op,
EqualityOp(),
num_items,
stream);
}

//! @rst
//! Given an input sequence ``d_in`` having runs of consecutive equal-valued keys,
//! only the first key from each run is selectively copied to ``d_out``.
Expand Down
120 changes: 120 additions & 0 deletions cub/test/catch2_test_device_select_api.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/******************************************************************************
gonidelis marked this conversation as resolved.
Show resolved Hide resolved
* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/

#include <cub/device/device_select.cuh>

#include <thrust/device_vector.h>
#include <thrust/equal.h>

#include <cstddef>

#include "catch2_test_helper.h"

// example-begin segmented-select-iseven
struct is_even_t
{
__host__ __device__ bool operator()(int flag) const
{
return !(flag % 2);
}
};
// example-end segmented-select-iseven

CUB_TEST("cub::DeviceSelect::FlaggedIf works with int data elements", "[select][device]")
{
// example-begin segmented-select-flaggedif
constexpr int num_items = 8;
thrust::device_vector<int> d_in = {0, 1, 2, 3, 4, 5, 6, 7};
thrust::device_vector<int> d_flags = {8, 6, 7, 5, 3, 0, 9, 3};
thrust::device_vector<int> d_out(num_items);
thrust::device_vector<int> d_num_selected_out(num_items);
is_even_t is_even{};
gonidelis marked this conversation as resolved.
Show resolved Hide resolved

// Determine temporary device storage requirements
void* d_temp_storage = nullptr;
size_t temp_storage_bytes = 0;
cub::DeviceSelect::FlaggedIf(
d_temp_storage,
temp_storage_bytes,
d_in.begin(),
d_flags.begin(),
d_out.begin(),
d_num_selected_out.data(),
num_items,
is_even);

// Allocate temporary storage
cudaMalloc(&d_temp_storage, temp_storage_bytes);

// Run selection
cub::DeviceSelect::FlaggedIf(
d_temp_storage,
temp_storage_bytes,
d_in.begin(),
d_flags.begin(),
d_out.begin(),
d_num_selected_out.data(),
num_items,
is_even);

thrust::device_vector<int> expected{0, 1, 5};
// example-end segmented-select-flaggedif

REQUIRE(d_num_selected_out[0] == static_cast<int>(expected.size()));
d_out.resize(d_num_selected_out[0]);
REQUIRE(d_out == expected);
}

CUB_TEST("cub::DeviceSelect::FlaggedIf in-place works with int data elements", "[select][device]")
{
// example-begin segmented-select-flaggedif-inplace
constexpr int num_items = 8;
thrust::device_vector<int> d_data = {0, 1, 2, 3, 4, 5, 6, 7};
thrust::device_vector<int> d_flags = {8, 6, 7, 5, 3, 0, 9, 3};
thrust::device_vector<int> d_num_selected_out(num_items);
is_even_t is_even{};

// Determine temporary device storage requirements
void* d_temp_storage = nullptr;
size_t temp_storage_bytes = 0;
cub::DeviceSelect::FlaggedIf(
d_temp_storage, temp_storage_bytes, d_data.begin(), d_flags.begin(), d_num_selected_out.data(), num_items, is_even);

// Allocate temporary storage
cudaMalloc(&d_temp_storage, temp_storage_bytes);

// Run selection
cub::DeviceSelect::FlaggedIf(
d_temp_storage, temp_storage_bytes, d_data.begin(), d_flags.begin(), d_num_selected_out.data(), num_items, is_even);

thrust::device_vector<int> expected{0, 1, 5};
// example-end segmented-select-flaggedif-inplace

REQUIRE(d_num_selected_out[0] == static_cast<int>(expected.size()));
d_data.resize(d_num_selected_out[0]);
REQUIRE(d_data == expected);
}
Loading
Loading