Skip to content

Commit

Permalink
remove extra braces, add comments
Browse files Browse the repository at this point in the history
  • Loading branch information
danhoeflinger committed Aug 6, 2024
1 parent 82f3ee5 commit 36da55e
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 25 deletions.
48 changes: 24 additions & 24 deletions include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h
Original file line number Diff line number Diff line change
Expand Up @@ -840,30 +840,30 @@ __parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag __backen
_WriteOp{}, __init, _Inclusive{});
}
}
{
using _Assigner = unseq_backend::__scan_assigner;
using _NoAssign = unseq_backend::__scan_no_assign;
using _UnaryFunctor = unseq_backend::walk_n<_ExecutionPolicy, _UnaryOperation>;
using _NoOpFunctor = unseq_backend::walk_n<_ExecutionPolicy, oneapi::dpl::__internal::__no_op>;

_Assigner __assign_op;
_NoAssign __no_assign_op;
_NoOpFunctor __get_data_op;

return __parallel_transform_scan_base(
__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__in_rng),
std::forward<_Range2>(__out_rng), __binary_op, __init,
// local scan
unseq_backend::__scan<_Inclusive, _ExecutionPolicy, _BinaryOperation, _UnaryFunctor, _Assigner, _Assigner,
_NoOpFunctor, _InitType>{__binary_op, _UnaryFunctor{__unary_op}, __assign_op,
__assign_op, __get_data_op},
// scan between groups
unseq_backend::__scan</*inclusive=*/std::true_type, _ExecutionPolicy, _BinaryOperation, _NoOpFunctor,
_NoAssign, _Assigner, _NoOpFunctor, unseq_backend::__no_init_value<_Type>>{
__binary_op, _NoOpFunctor{}, __no_assign_op, __assign_op, __get_data_op},
// global scan
unseq_backend::__global_scan_functor<_Inclusive, _BinaryOperation, _InitType>{__binary_op, __init});
}
//else use legacy scan implementation

using _Assigner = unseq_backend::__scan_assigner;
using _NoAssign = unseq_backend::__scan_no_assign;
using _UnaryFunctor = unseq_backend::walk_n<_ExecutionPolicy, _UnaryOperation>;
using _NoOpFunctor = unseq_backend::walk_n<_ExecutionPolicy, oneapi::dpl::__internal::__no_op>;

_Assigner __assign_op;
_NoAssign __no_assign_op;
_NoOpFunctor __get_data_op;

return __parallel_transform_scan_base(
__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__in_rng),
std::forward<_Range2>(__out_rng), __binary_op, __init,
// local scan
unseq_backend::__scan<_Inclusive, _ExecutionPolicy, _BinaryOperation, _UnaryFunctor, _Assigner, _Assigner,
_NoOpFunctor, _InitType>{__binary_op, _UnaryFunctor{__unary_op}, __assign_op,
__assign_op, __get_data_op},
// scan between groups
unseq_backend::__scan</*inclusive=*/::std::true_type, _ExecutionPolicy, _BinaryOperation, _NoOpFunctor,
_NoAssign, _Assigner, _NoOpFunctor, unseq_backend::__no_init_value<_Type>>{
__binary_op, _NoOpFunctor{}, __no_assign_op, __assign_op, __get_data_op},
// global scan
unseq_backend::__global_scan_functor<_Inclusive, _BinaryOperation, _InitType>{__binary_op, __init});
}

template <typename _SizeType>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ __scan_through_elements_helper(const _SubGroup& __sub_group, _GenInput __gen_inp
bool __is_full_thread = __subgroup_start_idx + __iters_per_item * __sub_group_size <= __n;
if (__is_full_thread && __is_full_block)
{
// For full block and full thread, we can unroll the loop
auto __v = __gen_input(__in_rng, __start_idx);
__sub_group_scan<__sub_group_size, __is_inclusive, __init_present>(__sub_group, __scan_input_transform(__v),
__binary_op, __sub_group_carry);
Expand All @@ -184,6 +185,9 @@ __scan_through_elements_helper(const _SubGroup& __sub_group, _GenInput __gen_inp
}
else if (__is_full_thread)
{
// For full thread but not full block, we can't unroll the loop, but we
// can proceed without special casing for partial subgroups.

auto __v = __gen_input(__in_rng, __start_idx);
__sub_group_scan<__sub_group_size, __is_inclusive, __init_present>(__sub_group, __scan_input_transform(__v),
__binary_op, __sub_group_carry);
Expand All @@ -204,6 +208,7 @@ __scan_through_elements_helper(const _SubGroup& __sub_group, _GenInput __gen_inp
}
else
{
// For partial thread, we need to handle the partial subgroup at the end of the range
if (__sub_group_id < __active_subgroups)
{
auto __iters = oneapi::dpl::__internal::__dpl_ceiling_div(__n - __subgroup_start_idx, __sub_group_size);
Expand Down Expand Up @@ -552,7 +557,7 @@ struct __parallel_reduce_then_scan_scan_submitter<
__carry_last, __remaining_elements);
}

// steps 3/4) load global carry in from neighbor work-group
// steps 3+4) load global carry in from neighbor work-group
// and apply to local sub-group prefix carries
auto __carry_offset = 0;

Expand Down

0 comments on commit 36da55e

Please sign in to comment.