Skip to content

Commit

Permalink
Rework our system header approach to be more error proof (#661)
Browse files Browse the repository at this point in the history
Co-authored-by: Georgii Evtushenko <[email protected]>
  • Loading branch information
miscco and gevtushenko authored Nov 8, 2023
1 parent 9a3df59 commit 774a2a5
Show file tree
Hide file tree
Showing 1,398 changed files with 10,049 additions and 7,243 deletions.
12 changes: 7 additions & 5 deletions cub/cub/agent/agent_adjacent_difference.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,13 @@

#include <cub/config.cuh>

#if defined(_CCCL_COMPILER_NVHPC) && defined(_CCCL_USE_IMPLICIT_SYSTEM_DEADER)
#pragma GCC system_header
#else // ^^^ _CCCL_COMPILER_NVHPC ^^^ / vvv !_CCCL_COMPILER_NVHPC vvv
_CCCL_IMPLICIT_SYSTEM_HEADER
#endif // !_CCCL_COMPILER_NVHPC
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cub/block/block_adjacent_difference.cuh>
#include <cub/block/block_load.cuh>
Expand Down
12 changes: 7 additions & 5 deletions cub/cub/agent/agent_batch_memcpy.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,13 @@

#include <cub/config.cuh>

#if defined(_CCCL_COMPILER_NVHPC) && defined(_CCCL_USE_IMPLICIT_SYSTEM_DEADER)
#pragma GCC system_header
#else // ^^^ _CCCL_COMPILER_NVHPC ^^^ / vvv !_CCCL_COMPILER_NVHPC vvv
_CCCL_IMPLICIT_SYSTEM_HEADER
#endif // !_CCCL_COMPILER_NVHPC
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cub/agent/single_pass_scan_operators.cuh>
#include <cub/block/block_exchange.cuh>
Expand Down
46 changes: 24 additions & 22 deletions cub/cub/agent/agent_histogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,13 @@

#include <cub/config.cuh>

#if defined(_CCCL_COMPILER_NVHPC) && defined(_CCCL_USE_IMPLICIT_SYSTEM_DEADER)
#pragma GCC system_header
#else // ^^^ _CCCL_COMPILER_NVHPC ^^^ / vvv !_CCCL_COMPILER_NVHPC vvv
_CCCL_IMPLICIT_SYSTEM_HEADER
#endif // !_CCCL_COMPILER_NVHPC
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cub/block/block_load.cuh>
#include <cub/grid/grid_queue.cuh>
Expand Down Expand Up @@ -266,18 +268,18 @@ struct AgentHistogram
struct _TempStorage
{
// Smem needed for block-privatized smem histogram (with 1 word of padding)
CounterT histograms[NUM_ACTIVE_CHANNELS][PRIVATIZED_SMEM_BINS + 1];
CounterT histograms[NUM_ACTIVE_CHANNELS][PRIVATIZED_SMEM_BINS + 1];

int tile_idx;

// Aliasable storage layout
union Aliasable
{
// Smem needed for loading a tile of samples
typename BlockLoadSampleT::TempStorage sample_load;
typename BlockLoadSampleT::TempStorage sample_load;

// Smem needed for loading a tile of pixels
typename BlockLoadPixelT::TempStorage pixel_load;
typename BlockLoadPixelT::TempStorage pixel_load;

// Smem needed for loading a tile of vecs
typename BlockLoadVecT::TempStorage vec_load;
Expand Down Expand Up @@ -650,7 +652,7 @@ struct AgentHistogram

/**
* @brief Consume a tile of data samples
*
*
* @tparam IS_ALIGNED
* Whether the tile offset is aligned (vec-aligned for single-channel, pixel-aligned for multi-channel)
*
Expand Down Expand Up @@ -691,17 +693,17 @@ struct AgentHistogram

/**
* @brief Consume row tiles. Specialized for work-stealing from queue
*
* @param num_row_pixels
* The number of multi-channel pixels per row in the region of interest
*
* @param num_rows
* @param num_row_pixels
* The number of multi-channel pixels per row in the region of interest
*
* @param num_rows
* The number of rows in the region of interest
*
* @param row_stride_samples
* @param row_stride_samples
* The number of samples between starts of consecutive rows in the region of interest
*
* @param tiles_per_row
* @param tiles_per_row
* Number of image tiles per row
*/
template <bool IS_ALIGNED>
Expand Down Expand Up @@ -752,17 +754,17 @@ struct AgentHistogram

/**
* @brief Consume row tiles. Specialized for even-share (striped across thread blocks)
*
* @param num_row_pixels
*
* @param num_row_pixels
* The number of multi-channel pixels per row in the region of interest
*
* @param num_rows
* @param num_rows
* The number of rows in the region of interest
*
* @param row_stride_samples
* @param row_stride_samples
* The number of samples between starts of consecutive rows in the region of interest
*
* @param tiles_per_row
* @param tiles_per_row
* Number of image tiles per row
*/
template <bool IS_ALIGNED>
Expand Down Expand Up @@ -829,10 +831,10 @@ struct AgentHistogram
/**
* @brief Constructor
*
* @param temp_storage
* @param temp_storage
* Reference to temp_storage
*
* @param d_samples
* @param d_samples
* Input data to reduce
*
* @param num_output_bins
Expand Down
12 changes: 7 additions & 5 deletions cub/cub/agent/agent_merge_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,13 @@

#include <cub/config.cuh>

#if defined(_CCCL_COMPILER_NVHPC) && defined(_CCCL_USE_IMPLICIT_SYSTEM_DEADER)
#pragma GCC system_header
#else // ^^^ _CCCL_COMPILER_NVHPC ^^^ / vvv !_CCCL_COMPILER_NVHPC vvv
_CCCL_IMPLICIT_SYSTEM_HEADER
#endif // !_CCCL_COMPILER_NVHPC
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cub/block/block_load.cuh>
#include <cub/block/block_merge_sort.cuh>
Expand Down
14 changes: 8 additions & 6 deletions cub/cub/agent/agent_radix_sort_downsweep.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,13 @@

#include <cub/config.cuh>

#if defined(_CCCL_COMPILER_NVHPC) && defined(_CCCL_USE_IMPLICIT_SYSTEM_DEADER)
#pragma GCC system_header
#else // ^^^ _CCCL_COMPILER_NVHPC ^^^ / vvv !_CCCL_COMPILER_NVHPC vvv
_CCCL_IMPLICIT_SYSTEM_HEADER
#endif // !_CCCL_COMPILER_NVHPC
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cub/block/block_exchange.cuh>
#include <cub/block/block_load.cuh>
Expand Down Expand Up @@ -131,7 +133,7 @@ struct AgentRadixSortDownsweepPolicy : ScalingType


/**
* @brief AgentRadixSortDownsweep implements a stateful abstraction of CUDA thread blocks for participating in
* @brief AgentRadixSortDownsweep implements a stateful abstraction of CUDA thread blocks for participating in
* device-wide radix sort downsweep .
*
* @tparam AgentRadixSortDownsweepPolicy
Expand Down
12 changes: 7 additions & 5 deletions cub/cub/agent/agent_radix_sort_histogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,13 @@

#include <cub/config.cuh>

#if defined(_CCCL_COMPILER_NVHPC) && defined(_CCCL_USE_IMPLICIT_SYSTEM_DEADER)
#pragma GCC system_header
#else // ^^^ _CCCL_COMPILER_NVHPC ^^^ / vvv !_CCCL_COMPILER_NVHPC vvv
_CCCL_IMPLICIT_SYSTEM_HEADER
#endif // !_CCCL_COMPILER_NVHPC
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cub/block/block_load.cuh>
#include <cub/block/radix_rank_sort_operations.cuh>
Expand Down
12 changes: 7 additions & 5 deletions cub/cub/agent/agent_radix_sort_onesweep.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,13 @@

#include <cub/config.cuh>

#if defined(_CCCL_COMPILER_NVHPC) && defined(_CCCL_USE_IMPLICIT_SYSTEM_DEADER)
#pragma GCC system_header
#else // ^^^ _CCCL_COMPILER_NVHPC ^^^ / vvv !_CCCL_COMPILER_NVHPC vvv
_CCCL_IMPLICIT_SYSTEM_HEADER
#endif // !_CCCL_COMPILER_NVHPC
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cub/block/block_radix_rank.cuh>
#include <cub/block/block_store.cuh>
Expand Down
14 changes: 8 additions & 6 deletions cub/cub/agent/agent_radix_sort_upsweep.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,13 @@

#include <cub/config.cuh>

#if defined(_CCCL_COMPILER_NVHPC) && defined(_CCCL_USE_IMPLICIT_SYSTEM_DEADER)
#pragma GCC system_header
#else // ^^^ _CCCL_COMPILER_NVHPC ^^^ / vvv !_CCCL_COMPILER_NVHPC vvv
_CCCL_IMPLICIT_SYSTEM_HEADER
#endif // !_CCCL_COMPILER_NVHPC
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cub/block/block_load.cuh>
#include <cub/block/radix_rank_sort_operations.cuh>
Expand Down Expand Up @@ -510,7 +512,7 @@ struct AgentRadixSortUpsweep
* @brief Extract counts
*
* @param[out] bin_count
* The exclusive prefix sum for the digits
* The exclusive prefix sum for the digits
* [(threadIdx.x * BINS_TRACKED_PER_THREAD) ... (threadIdx.x * BINS_TRACKED_PER_THREAD) + BINS_TRACKED_PER_THREAD - 1]
*/
template <int BINS_TRACKED_PER_THREAD>
Expand Down
12 changes: 7 additions & 5 deletions cub/cub/agent/agent_reduce.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,13 @@

#include <cub/config.cuh>

#if defined(_CCCL_COMPILER_NVHPC) && defined(_CCCL_USE_IMPLICIT_SYSTEM_DEADER)
#pragma GCC system_header
#else // ^^^ _CCCL_COMPILER_NVHPC ^^^ / vvv !_CCCL_COMPILER_NVHPC vvv
_CCCL_IMPLICIT_SYSTEM_HEADER
#endif // !_CCCL_COMPILER_NVHPC
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <iterator>

Expand Down
12 changes: 7 additions & 5 deletions cub/cub/agent/agent_reduce_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,13 @@

#include <cub/config.cuh>

#if defined(_CCCL_COMPILER_NVHPC) && defined(_CCCL_USE_IMPLICIT_SYSTEM_DEADER)
#pragma GCC system_header
#else // ^^^ _CCCL_COMPILER_NVHPC ^^^ / vvv !_CCCL_COMPILER_NVHPC vvv
_CCCL_IMPLICIT_SYSTEM_HEADER
#endif // !_CCCL_COMPILER_NVHPC
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cub/agent/single_pass_scan_operators.cuh>
#include <cub/block/block_discontinuity.cuh>
Expand Down
42 changes: 22 additions & 20 deletions cub/cub/agent/agent_rle.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,13 @@

#include <cub/config.cuh>

#if defined(_CCCL_COMPILER_NVHPC) && defined(_CCCL_USE_IMPLICIT_SYSTEM_DEADER)
#pragma GCC system_header
#else // ^^^ _CCCL_COMPILER_NVHPC ^^^ / vvv !_CCCL_COMPILER_NVHPC vvv
_CCCL_IMPLICIT_SYSTEM_HEADER
#endif // !_CCCL_COMPILER_NVHPC
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cub/agent/single_pass_scan_operators.cuh>
#include <cub/block/block_discontinuity.cuh>
Expand Down Expand Up @@ -130,7 +132,7 @@ struct AgentRlePolicy
******************************************************************************/

/**
* @brief AgentRle implements a stateful abstraction of CUDA thread blocks for participating in device-wide run-length-encode
* @brief AgentRle implements a stateful abstraction of CUDA thread blocks for participating in device-wide run-length-encode
*
* @tparam AgentRlePolicyT
* Parameterized AgentRlePolicyT tuning policy type
Expand Down Expand Up @@ -329,22 +331,22 @@ struct AgentRle
//---------------------------------------------------------------------

/**
* @param[in] temp_storage
* @param[in] temp_storage
* Reference to temp_storage
*
* @param[in] d_in
* @param[in] d_in
* Pointer to input sequence of data items
*
* @param[out] d_offsets_out
* @param[out] d_offsets_out
* Pointer to output sequence of run offsets
*
* @param[out] d_lengths_out
* @param[out] d_lengths_out
* Pointer to output sequence of run lengths
*
* @param[in] equality_op
* @param[in] equality_op
* Equality operator
*
* @param[in] num_items
* @param[in] num_items
* Total number of input items
*/
__device__ __forceinline__ AgentRle(TempStorage &temp_storage,
Expand Down Expand Up @@ -731,19 +733,19 @@ struct AgentRle
/**
* @brief Process a tile of input (dynamic chained scan)
*
* @param num_items
* @param num_items
* Total number of global input items
*
* @param num_remaining
* @param num_remaining
* Number of global input items remaining (including this tile)
*
* @param tile_idx
* @param tile_idx
* Tile index
*
* @param tile_offset
* @param tile_offset
* Tile offset
*
* @param &tile_status
* @param &tile_status
* Global list of tile status
*/
template <bool LAST_TILE>
Expand Down Expand Up @@ -953,13 +955,13 @@ struct AgentRle
/**
* @brief Scan tiles of items as part of a dynamic chained scan
*
* @param num_tiles
* @param num_tiles
* Total number of input tiles
*
* @param tile_status
* @param tile_status
* Global list of tile status
*
* @param d_num_runs_out
* @param d_num_runs_out
* Output pointer for total number of runs identified
*
* @tparam NumRunsIteratorT
Expand Down
Loading

0 comments on commit 774a2a5

Please sign in to comment.