From b551ce4ce48120dfa01539a2f9399c7123dec898 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Mon, 5 Aug 2024 13:36:33 -0400 Subject: [PATCH 01/14] Revert "remove copy_if pattern family from reduce_then_scan" This reverts commit 75eeca8712f16ea2b238a17b56657c04a9289e48. --- .../hetero/algorithm_ranges_impl_hetero.h | 16 +- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 156 +++++++++++++++--- .../dpl/pstl/hetero/dpcpp/sycl_traits.h | 38 +++++ .../device_copyable.pass.cpp | 38 +++++ test/support/utils_device_copyable.h | 30 ++++ 5 files changed, 244 insertions(+), 34 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h index 1df37f8acbc..7dd0b9537b3 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h @@ -384,17 +384,17 @@ template oneapi::dpl::__internal::__difference_t<_Range2> __pattern_copy_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, - _Predicate __pred, _Assign) + _Predicate __pred, _Assign&& __assign) { - using _SizeType = decltype(__rng1.size()); - using _ReduceOp = ::std::plus<_SizeType>; + auto __n = __rng1.size(); + if (__n == 0) + return 0; - unseq_backend::__create_mask<_Predicate, _SizeType> __create_mask_op{__pred}; - unseq_backend::__copy_by_mask<_ReduceOp, _Assign, /*inclusive*/ ::std::true_type, 1> __copy_by_mask_op; + auto __res = oneapi::dpl::__par_backend_hetero::__parallel_copy_if( + _BackendTag{}, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__rng1), + std::forward<_Range2>(__rng2), __n, __pred, std::forward<_Assign>(__assign)); - return __ranges::__pattern_scan_copy(__tag, ::std::forward<_ExecutionPolicy>(__exec), - ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), - __create_mask_op, __copy_by_mask_op); + return __res.get(); //is a blocking call } //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index a818d3d6eed..6cfe1f6c42d 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -555,10 +555,10 @@ struct __parallel_copy_if_static_single_group_submitter<_Size, _ElemsPerItem, _W __internal::__optional_kernel_name<_ScanKernelName...>> { template + typename _UnaryOp, typename _Assign> auto operator()(_Policy&& __policy, _InRng&& __in_rng, _OutRng&& __out_rng, ::std::size_t __n, _InitType __init, - _BinaryOperation __bin_op, _UnaryOp __unary_op) + _BinaryOperation __bin_op, _UnaryOp __unary_op, _Assign __assign) { using _ValueType = ::std::uint16_t; @@ -622,7 +622,8 @@ struct __parallel_copy_if_static_single_group_submitter<_Size, _ElemsPerItem, _W for (::std::uint16_t __idx = __item_id; __idx < __n; __idx += _WGSize) { if (__lacc[__idx]) - __out_rng[__lacc[__idx + __elems_per_wg]] = static_cast<__tuple_type>(__in_rng[__idx]); + __assign(static_cast<__tuple_type>(__in_rng[__idx]), + __out_rng[__lacc[__idx + __elems_per_wg]]); } const ::std::uint16_t __residual = __n % _WGSize; @@ -631,7 +632,8 @@ struct __parallel_copy_if_static_single_group_submitter<_Size, _ElemsPerItem, _W { auto __idx = __residual_start + __item_id; if (__lacc[__idx]) - __out_rng[__lacc[__idx + __elems_per_wg]] = static_cast<__tuple_type>(__in_rng[__idx]); + __assign(static_cast<__tuple_type>(__in_rng[__idx]), + __out_rng[__lacc[__idx + __elems_per_wg]]); } if (__item_id == 0) @@ -796,6 +798,76 @@ struct __simple_write_to_id } }; +template +struct __gen_mask +{ + template + bool + operator()(_InRng&& __in_rng, std::size_t __idx) const + { + return __pred(__in_rng[__idx]); + } + _Predicate __pred; +}; + +template +struct __gen_count_mask +{ + template + _SizeType + operator()(_InRng&& __in_rng, _SizeType __idx) const + { + return __gen_mask(std::forward<_InRng>(__in_rng), __idx) ? _SizeType{1} : _SizeType{0}; + } + _GenMask __gen_mask; +}; + +template +struct __gen_expand_count_mask +{ + template + auto + operator()(_InRng&& __in_rng, _SizeType __idx) const + { + // Explicitly creating this element type is necessary to avoid modifying the input data when _InRng is a + // zip_iterator which will return a tuple of references when dereferenced. With this explicit type, we copy + // the values of zipped the input types rather than their references. + using _ElementType = + oneapi::dpl::__internal::__decay_with_tuple_specialization_t>; + _ElementType ele = __in_rng[__idx]; + bool mask = __gen_mask(__in_rng, __idx); + return std::tuple(mask ? _SizeType{1} : _SizeType{0}, mask, ele); + } + _GenMask __gen_mask; +}; + +struct __get_zeroth_element +{ + template + auto& + operator()(_Tp&& __a) const + { + return std::get<0>(std::forward<_Tp>(__a)); + } +}; +template +struct __write_to_idx_if +{ + template + void + operator()(_OutRng&& __out_rng, _SizeType __idx, const ValueType& __v) const + { + // Use of an explicit cast to our internal tuple type is required to resolve conversion issues between our + // internal tuple and std::tuple. If the underlying type is not a tuple, then the type will just be passed through. + using _ConvertedTupleType = + typename oneapi::dpl::__internal::__get_tuple_type(__v))>, + std::decay_t>::__type; + if (std::get<1>(__v)) + __assign(static_cast<_ConvertedTupleType>(std::get<2>(__v)), __out_rng[std::get<0>(__v) - 1 + __offset]); + } + Assign __assign; +}; + template auto @@ -874,9 +946,11 @@ struct __invoke_single_group_copy_if // Specialization for devices that have a max work-group size of at least 1024 static constexpr ::std::uint16_t __targeted_wg_size = 1024; - template <::std::uint16_t _Size, typename _ExecutionPolicy, typename _InRng, typename _OutRng, typename _Pred> + template auto - operator()(_ExecutionPolicy&& __exec, ::std::size_t __n, _InRng&& __in_rng, _OutRng&& __out_rng, _Pred&& __pred) + operator()(_ExecutionPolicy&& __exec, std::size_t __n, _InRng&& __in_rng, _OutRng&& __out_rng, _Pred&& __pred, + _Assign&& __assign) { constexpr ::std::uint16_t __wg_size = ::std::min(_Size, __targeted_wg_size); constexpr ::std::uint16_t __num_elems_per_item = ::oneapi::dpl::__internal::__dpl_ceiling_div(_Size, __wg_size); @@ -895,7 +969,8 @@ struct __invoke_single_group_copy_if return __par_backend_hetero::__parallel_copy_if_static_single_group_submitter< _SizeType, __num_elems_per_item, __wg_size, true, _FullKernelName>()( std::forward<_ExecutionPolicy>(__exec), std::forward<_InRng>(__in_rng), - std::forward<_OutRng>(__out_rng), __n, _InitType{}, _ReduceOp{}, std::forward<_Pred>(__pred)); + std::forward<_OutRng>(__out_rng), __n, _InitType{}, _ReduceOp{}, std::forward<_Pred>(__pred), + std::forward<_Assign>(__assign)); } else { @@ -908,11 +983,31 @@ struct __invoke_single_group_copy_if return __par_backend_hetero::__parallel_copy_if_static_single_group_submitter< _SizeType, __num_elems_per_item, __wg_size, false, _NonFullKernelName>()( std::forward<_ExecutionPolicy>(__exec), std::forward<_InRng>(__in_rng), - std::forward<_OutRng>(__out_rng), __n, _InitType{}, _ReduceOp{}, std::forward<_Pred>(__pred)); + std::forward<_OutRng>(__out_rng), __n, _InitType{}, _ReduceOp{}, std::forward<_Pred>(__pred), + std::forward<_Assign>(__assign)); } } }; +template +auto +__parallel_reduce_then_scan_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _GenMask __generate_mask, + _WriteOp __write_op) +{ + using _GenReduceInput = oneapi::dpl::__par_backend_hetero::__gen_count_mask<_GenMask>; + using _ReduceOp = std::plus<_Size>; + using _GenScanInput = oneapi::dpl::__par_backend_hetero::__gen_expand_count_mask<_GenMask>; + using _ScanInputTransform = oneapi::dpl::__par_backend_hetero::__get_zeroth_element; + + return __parallel_transform_reduce_then_scan( + __backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_InRng>(__in_rng), + std::forward<_OutRng>(__out_rng), _GenReduceInput{__generate_mask}, _ReduceOp{}, _GenScanInput{__generate_mask}, + _ScanInputTransform{}, __write_op, oneapi::dpl::unseq_backend::__no_init_value<_Size>{}, + /*_Inclusive=*/std::true_type{}); +} + template auto @@ -920,7 +1015,7 @@ __parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) { - using _ReduceOp = ::std::plus<_Size>; + using _ReduceOp = std::plus<_Size>; using _Assigner = unseq_backend::__scan_assigner; using _NoAssign = unseq_backend::__scan_no_assign; using _MaskAssigner = unseq_backend::__mask_assigner<1>; @@ -937,7 +1032,7 @@ __parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag return __parallel_transform_scan_base( __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), - oneapi::dpl::__ranges::make_zip_view( + oneapi::dpl::__ranges::zip_view( ::std::forward<_InRng>(__in_rng), oneapi::dpl::__ranges::all_view( __mask_buf.get_buffer())), @@ -954,10 +1049,11 @@ __parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag __copy_by_mask_op); } -template +template auto __parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, - _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _Pred __pred) + _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _Pred __pred, _Assign&& __assign = _Assign{}) { using _SingleGroupInvoker = __invoke_single_group_copy_if<_Size>; @@ -971,30 +1067,38 @@ __parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag, // The kernel stores n integers for the predicate and another n integers for the offsets const auto __req_slm_size = sizeof(::std::uint16_t) * __n_uniform * 2; - constexpr ::std::uint16_t __single_group_upper_limit = 16384; + constexpr ::std::uint16_t __single_group_upper_limit = 2048; - ::std::size_t __max_wg_size = oneapi::dpl::__internal::__max_work_group_size(__exec); + std::size_t __max_wg_size = oneapi::dpl::__internal::__max_work_group_size(__exec); if (__n <= __single_group_upper_limit && __max_slm_size >= __req_slm_size && __max_wg_size >= _SingleGroupInvoker::__targeted_wg_size) { - using _SizeBreakpoints = - ::std::integer_sequence<::std::uint16_t, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384>; + using _SizeBreakpoints = ::std::integer_sequence<::std::uint16_t, 16, 32, 64, 128, 256, 512, 1024, 2048>; return __par_backend_hetero::__static_monotonic_dispatcher<_SizeBreakpoints>::__dispatch( - _SingleGroupInvoker{}, __n, ::std::forward<_ExecutionPolicy>(__exec), __n, ::std::forward<_InRng>(__in_rng), - ::std::forward<_OutRng>(__out_rng), __pred); + _SingleGroupInvoker{}, __n, std::forward<_ExecutionPolicy>(__exec), __n, std::forward<_InRng>(__in_rng), + std::forward<_OutRng>(__out_rng), __pred, std::forward<_Assign>(__assign)); + } + else if (oneapi::dpl::__par_backend_hetero::__prefer_reduce_then_scan(__exec)) + { + using _GenMask = oneapi::dpl::__par_backend_hetero::__gen_mask<_Pred>; + using _WriteOp = oneapi::dpl::__par_backend_hetero::__write_to_idx_if<0, _Assign>; + + return __parallel_reduce_then_scan_copy(__backend_tag, std::forward<_ExecutionPolicy>(__exec), + std::forward<_InRng>(__in_rng), std::forward<_OutRng>(__out_rng), __n, + _GenMask{__pred}, _WriteOp{std::forward<_Assign>(__assign)}); } else { - using _ReduceOp = ::std::plus<_Size>; - using CreateOp = unseq_backend::__create_mask<_Pred, _Size>; - using CopyOp = unseq_backend::__copy_by_mask<_ReduceOp, oneapi::dpl::__internal::__pstl_assign, - /*inclusive*/ ::std::true_type, 1>; - - return __parallel_scan_copy(__backend_tag, ::std::forward<_ExecutionPolicy>(__exec), - ::std::forward<_InRng>(__in_rng), ::std::forward<_OutRng>(__out_rng), __n, - CreateOp{__pred}, CopyOp{}); + using _ReduceOp = std::plus<_Size>; + using _CreateOp = unseq_backend::__create_mask<_Pred, _Size>; + using _CopyOp = unseq_backend::__copy_by_mask<_ReduceOp, _Assign, + /*inclusive*/ std::true_type, 1>; + + return __parallel_scan_copy(__backend_tag, std::forward<_ExecutionPolicy>(__exec), + std::forward<_InRng>(__in_rng), std::forward<_OutRng>(__out_rng), __n, + _CreateOp{__pred}, _CopyOp{_ReduceOp{}, std::forward<_Assign>(__assign)}); } } diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h index 2a948e205d1..12b294964b5 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h @@ -245,6 +245,18 @@ namespace oneapi::dpl::__par_backend_hetero template struct __gen_transform_input; +template +struct __gen_mask; + +template +struct __gen_count_mask; + +template +struct __gen_expand_count_mask; + +template +struct __write_to_idx_if; + template struct __early_exit_find_or; @@ -257,6 +269,32 @@ struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__par_backen { }; +template +struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__par_backend_hetero::__gen_mask, _Predicate)> + : oneapi::dpl::__internal::__are_all_device_copyable<_Predicate> +{ +}; + +template +struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__par_backend_hetero::__gen_count_mask, _GenMask)> + : oneapi::dpl::__internal::__are_all_device_copyable<_GenMask> +{ +}; + +template +struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__par_backend_hetero::__gen_expand_count_mask, + _GenMask)> + : oneapi::dpl::__internal::__are_all_device_copyable<_GenMask> +{ +}; + +template +struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__par_backend_hetero::__write_to_idx_if, __offset, + Assign)> + : oneapi::dpl::__internal::__are_all_device_copyable +{ +}; + template struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__par_backend_hetero::__early_exit_find_or, _ExecutionPolicy, _Pred)> diff --git a/test/general/implementation_details/device_copyable.pass.cpp b/test/general/implementation_details/device_copyable.pass.cpp index 481a5cb1c05..7c51200e9e8 100644 --- a/test/general/implementation_details/device_copyable.pass.cpp +++ b/test/general/implementation_details/device_copyable.pass.cpp @@ -150,6 +150,25 @@ test_device_copyable() sycl::is_device_copyable_v>, "__gen_transform_input is not device copyable with device copyable types"); + //__gen_mask + static_assert(sycl::is_device_copyable_v>, + "__gen_mask is not device copyable with device copyable types"); + + //__gen_count_mask + static_assert(sycl::is_device_copyable_v>>, + "__gen_count_mask is not device copyable with device copyable types"); + + //__gen_expand_count_mask + static_assert(sycl::is_device_copyable_v>>, + "__gen_expand_count_mask is not device copyable with device copyable types"); + + //__write_to_idx_if + static_assert( + sycl::is_device_copyable_v>, + "__write_to_idx_if is not device copyable with device copyable types"); + // __early_exit_find_or static_assert( sycl::is_device_copyable_v< @@ -353,6 +372,25 @@ test_non_device_copyable() !sycl::is_device_copyable_v>, "__gen_transform_input is device copyable with non device copyable types"); + //__gen_mask + static_assert(!sycl::is_device_copyable_v>, + "__gen_mask is device copyable with non device copyable types"); + + //__gen_count_mask + static_assert(!sycl::is_device_copyable_v>>, + "__gen_count_mask is device copyable with non device copyable types"); + + //__gen_expand_count_mask + static_assert(!sycl::is_device_copyable_v>>, + "__gen_expand_count_mask is device copyable with non device copyable types"); + + //__write_to_idx_if + static_assert(!sycl::is_device_copyable_v< + oneapi::dpl::__par_backend_hetero::__write_to_idx_if<0, assign_non_device_copyable>>, + "__write_to_idx_if is device copyable with non device copyable types"); + // __early_exit_find_or static_assert( !sycl::is_device_copyable_v + void + operator()(const _Xp& __x, _Yp&& __y) const + { + std::forward<_Yp>(__y) = __x; + } +}; + +struct assign_device_copyable +{ + assign_device_copyable(const assign_device_copyable& other) { std::cout << "non trivial copy ctor\n"; } + template + void + operator()(const _Xp& __x, _Yp&& __y) const + { + std::forward<_Yp>(__y) = __x; + } +}; + // Device copyable int wrapper struct used in testing as surrogate for values, value types, etc. // Intentionally non-trivially copyable to test that device_copyable speciailzation works and we are not // relying on trivial copyability @@ -160,6 +185,11 @@ struct sycl::is_device_copyable : std::true_typ { }; +template <> +struct sycl::is_device_copyable : std::true_type +{ +}; + template <> struct sycl::is_device_copyable : std::true_type { From 3025631cc23fca93e7e550472043d61afba23bc5 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Wed, 7 Aug 2024 13:40:04 -0400 Subject: [PATCH 02/14] better match existing code from ranges impl --- include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 6cfe1f6c42d..0a970e4d21a 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -1033,9 +1033,8 @@ __parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag return __parallel_transform_scan_base( __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), oneapi::dpl::__ranges::zip_view( - ::std::forward<_InRng>(__in_rng), - oneapi::dpl::__ranges::all_view( - __mask_buf.get_buffer())), + __in_rng, oneapi::dpl::__ranges::all_view( + __mask_buf.get_buffer())), ::std::forward<_OutRng>(__out_rng), __reduce_op, _InitType{}, // local scan unseq_backend::__scan Date: Wed, 7 Aug 2024 13:41:06 -0400 Subject: [PATCH 03/14] remove unrelated code change Signed-off-by: Dan Hoeflinger --- include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 0a970e4d21a..228c5988c5a 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -1015,7 +1015,7 @@ __parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) { - using _ReduceOp = std::plus<_Size>; + using _ReduceOp = ::std::plus<_Size>; using _Assigner = unseq_backend::__scan_assigner; using _NoAssign = unseq_backend::__scan_no_assign; using _MaskAssigner = unseq_backend::__mask_assigner<1>; From afdfce2ea14da4aeebba75c448c26412b7b89e77 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Wed, 7 Aug 2024 15:17:53 -0400 Subject: [PATCH 04/14] standardizing convention on forwarding callables Signed-off-by: Dan Hoeflinger --- .../hetero/algorithm_ranges_impl_hetero.h | 4 ++-- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 20 +++++++++---------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h index 7dd0b9537b3..cdb555f1790 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h @@ -384,7 +384,7 @@ template oneapi::dpl::__internal::__difference_t<_Range2> __pattern_copy_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, - _Predicate __pred, _Assign&& __assign) + _Predicate __pred, _Assign __assign) { auto __n = __rng1.size(); if (__n == 0) @@ -392,7 +392,7 @@ __pattern_copy_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _R auto __res = oneapi::dpl::__par_backend_hetero::__parallel_copy_if( _BackendTag{}, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__rng1), - std::forward<_Range2>(__rng2), __n, __pred, std::forward<_Assign>(__assign)); + std::forward<_Range2>(__rng2), __n, __pred, __assign); return __res.get(); //is a blocking call } diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 228c5988c5a..5fc3c481069 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -850,7 +850,7 @@ struct __get_zeroth_element return std::get<0>(std::forward<_Tp>(__a)); } }; -template +template struct __write_to_idx_if { template @@ -949,8 +949,8 @@ struct __invoke_single_group_copy_if template auto - operator()(_ExecutionPolicy&& __exec, std::size_t __n, _InRng&& __in_rng, _OutRng&& __out_rng, _Pred&& __pred, - _Assign&& __assign) + operator()(_ExecutionPolicy&& __exec, std::size_t __n, _InRng&& __in_rng, _OutRng&& __out_rng, _Pred __pred, + _Assign __assign) { constexpr ::std::uint16_t __wg_size = ::std::min(_Size, __targeted_wg_size); constexpr ::std::uint16_t __num_elems_per_item = ::oneapi::dpl::__internal::__dpl_ceiling_div(_Size, __wg_size); @@ -969,8 +969,7 @@ struct __invoke_single_group_copy_if return __par_backend_hetero::__parallel_copy_if_static_single_group_submitter< _SizeType, __num_elems_per_item, __wg_size, true, _FullKernelName>()( std::forward<_ExecutionPolicy>(__exec), std::forward<_InRng>(__in_rng), - std::forward<_OutRng>(__out_rng), __n, _InitType{}, _ReduceOp{}, std::forward<_Pred>(__pred), - std::forward<_Assign>(__assign)); + std::forward<_OutRng>(__out_rng), __n, _InitType{}, _ReduceOp{}, __pred, __assign); } else { @@ -983,8 +982,7 @@ struct __invoke_single_group_copy_if return __par_backend_hetero::__parallel_copy_if_static_single_group_submitter< _SizeType, __num_elems_per_item, __wg_size, false, _NonFullKernelName>()( std::forward<_ExecutionPolicy>(__exec), std::forward<_InRng>(__in_rng), - std::forward<_OutRng>(__out_rng), __n, _InitType{}, _ReduceOp{}, std::forward<_Pred>(__pred), - std::forward<_Assign>(__assign)); + std::forward<_OutRng>(__out_rng), __n, _InitType{}, _ReduceOp{}, __pred, __assign); } } }; @@ -1052,7 +1050,7 @@ template auto __parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, - _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _Pred __pred, _Assign&& __assign = _Assign{}) + _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _Pred __pred, _Assign __assign = _Assign{}) { using _SingleGroupInvoker = __invoke_single_group_copy_if<_Size>; @@ -1077,7 +1075,7 @@ __parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag, return __par_backend_hetero::__static_monotonic_dispatcher<_SizeBreakpoints>::__dispatch( _SingleGroupInvoker{}, __n, std::forward<_ExecutionPolicy>(__exec), __n, std::forward<_InRng>(__in_rng), - std::forward<_OutRng>(__out_rng), __pred, std::forward<_Assign>(__assign)); + std::forward<_OutRng>(__out_rng), __pred, __assign); } else if (oneapi::dpl::__par_backend_hetero::__prefer_reduce_then_scan(__exec)) { @@ -1086,7 +1084,7 @@ __parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag, return __parallel_reduce_then_scan_copy(__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_InRng>(__in_rng), std::forward<_OutRng>(__out_rng), __n, - _GenMask{__pred}, _WriteOp{std::forward<_Assign>(__assign)}); + _GenMask{__pred}, _WriteOp{__assign}); } else { @@ -1097,7 +1095,7 @@ __parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag, return __parallel_scan_copy(__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_InRng>(__in_rng), std::forward<_OutRng>(__out_rng), __n, - _CreateOp{__pred}, _CopyOp{_ReduceOp{}, std::forward<_Assign>(__assign)}); + _CreateOp{__pred}, _CopyOp{_ReduceOp{}, __assign}); } } From 21684aa9535cddb639f0217b76b3d809df84a9ad Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Wed, 7 Aug 2024 15:18:36 -0400 Subject: [PATCH 05/14] ::std:: -> std:: Signed-off-by: Dan Hoeflinger --- include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 5fc3c481069..6a845cd4aa5 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -1071,7 +1071,7 @@ __parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag, if (__n <= __single_group_upper_limit && __max_slm_size >= __req_slm_size && __max_wg_size >= _SingleGroupInvoker::__targeted_wg_size) { - using _SizeBreakpoints = ::std::integer_sequence<::std::uint16_t, 16, 32, 64, 128, 256, 512, 1024, 2048>; + using _SizeBreakpoints = std::integer_sequence; return __par_backend_hetero::__static_monotonic_dispatcher<_SizeBreakpoints>::__dispatch( _SingleGroupInvoker{}, __n, std::forward<_ExecutionPolicy>(__exec), __n, std::forward<_InRng>(__in_rng), From 2662cf2a6e642f78b54dd37bc9f985fd3a5f4157 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Wed, 7 Aug 2024 15:28:46 -0400 Subject: [PATCH 06/14] removing unused forwarding reference --- include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 6a845cd4aa5..533971ed467 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -803,7 +803,7 @@ struct __gen_mask { template bool - operator()(_InRng&& __in_rng, std::size_t __idx) const + operator()(const _InRng& __in_rng, std::size_t __idx) const { return __pred(__in_rng[__idx]); } From a00304b66d5b486bd8764a5b9f2633128fe6eeee Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Wed, 7 Aug 2024 15:31:31 -0400 Subject: [PATCH 07/14] forwarding one range where possible, removing forwarding reference in another instance Signed-off-by: Dan Hoeflinger --- include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 533971ed467..547804d081a 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -835,7 +835,7 @@ struct __gen_expand_count_mask using _ElementType = oneapi::dpl::__internal::__decay_with_tuple_specialization_t>; _ElementType ele = __in_rng[__idx]; - bool mask = __gen_mask(__in_rng, __idx); + bool mask = __gen_mask(std::forward<_InRng>(__in_rng), __idx); return std::tuple(mask ? _SizeType{1} : _SizeType{0}, mask, ele); } _GenMask __gen_mask; @@ -855,7 +855,7 @@ struct __write_to_idx_if { template void - operator()(_OutRng&& __out_rng, _SizeType __idx, const ValueType& __v) const + operator()(const _OutRng& __out_rng, _SizeType __idx, const ValueType& __v) const { // Use of an explicit cast to our internal tuple type is required to resolve conversion issues between our // internal tuple and std::tuple. If the underlying type is not a tuple, then the type will just be passed through. From 2c60b7584cede988678fa609fb6f2706756e91f6 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Thu, 15 Aug 2024 16:23:26 -0400 Subject: [PATCH 08/14] fix naming of __prefer_reduce_then_scan -> __is_gpu_with_sg_32 Signed-off-by: Dan Hoeflinger --- include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 547804d081a..8de16bd879c 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -1077,7 +1077,7 @@ __parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _SingleGroupInvoker{}, __n, std::forward<_ExecutionPolicy>(__exec), __n, std::forward<_InRng>(__in_rng), std::forward<_OutRng>(__out_rng), __pred, __assign); } - else if (oneapi::dpl::__par_backend_hetero::__prefer_reduce_then_scan(__exec)) + else if (oneapi::dpl::__par_backend_hetero::__is_gpu_with_sg_32(__exec)) { using _GenMask = oneapi::dpl::__par_backend_hetero::__gen_mask<_Pred>; using _WriteOp = oneapi::dpl::__par_backend_hetero::__write_to_idx_if<0, _Assign>; From c767d60fa3d397e43c785d9a24f97d83d803d27b Mon Sep 17 00:00:00 2001 From: Matthew Michel Date: Mon, 19 Aug 2024 15:35:49 -0500 Subject: [PATCH 09/14] Remove unnecessary __decay_with_tuple_specialization_t Signed-off-by: Matthew Michel --- include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 8de16bd879c..1d23b54c4d7 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -832,8 +832,7 @@ struct __gen_expand_count_mask // Explicitly creating this element type is necessary to avoid modifying the input data when _InRng is a // zip_iterator which will return a tuple of references when dereferenced. With this explicit type, we copy // the values of zipped the input types rather than their references. - using _ElementType = - oneapi::dpl::__internal::__decay_with_tuple_specialization_t>; + using _ElementType = oneapi::dpl::__internal::__value_t<_InRng>; _ElementType ele = __in_rng[__idx]; bool mask = __gen_mask(std::forward<_InRng>(__in_rng), __idx); return std::tuple(mask ? _SizeType{1} : _SizeType{0}, mask, ele); From 45d90605c01fa4235a73f7e8416e9e04a19f1357 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Wed, 21 Aug 2024 14:25:39 -0400 Subject: [PATCH 10/14] explicit typing of range size Signed-off-by: Dan Hoeflinger --- include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h index cdb555f1790..1fd7c4dd48a 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h @@ -386,7 +386,7 @@ oneapi::dpl::__internal::__difference_t<_Range2> __pattern_copy_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Predicate __pred, _Assign __assign) { - auto __n = __rng1.size(); + oneapi::dpl::__internal::__difference_t<_Range1> __n = __rng1.size(); if (__n == 0) return 0; From 7fde4c2bf1e7ff0e715176846573df213242d020 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Thu, 22 Aug 2024 08:42:47 -0400 Subject: [PATCH 11/14] idx -> id Signed-off-by: Dan Hoeflinger --- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 22 +++++++++---------- .../dpl/pstl/hetero/dpcpp/sycl_traits.h | 4 ++-- .../device_copyable.pass.cpp | 12 +++++----- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 1d23b54c4d7..ec1b5b230ca 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -803,9 +803,9 @@ struct __gen_mask { template bool - operator()(const _InRng& __in_rng, std::size_t __idx) const + operator()(const _InRng& __in_rng, std::size_t __id) const { - return __pred(__in_rng[__idx]); + return __pred(__in_rng[__id]); } _Predicate __pred; }; @@ -815,9 +815,9 @@ struct __gen_count_mask { template _SizeType - operator()(_InRng&& __in_rng, _SizeType __idx) const + operator()(_InRng&& __in_rng, _SizeType __id) const { - return __gen_mask(std::forward<_InRng>(__in_rng), __idx) ? _SizeType{1} : _SizeType{0}; + return __gen_mask(std::forward<_InRng>(__in_rng), __id) ? _SizeType{1} : _SizeType{0}; } _GenMask __gen_mask; }; @@ -827,14 +827,14 @@ struct __gen_expand_count_mask { template auto - operator()(_InRng&& __in_rng, _SizeType __idx) const + operator()(_InRng&& __in_rng, _SizeType __id) const { // Explicitly creating this element type is necessary to avoid modifying the input data when _InRng is a // zip_iterator which will return a tuple of references when dereferenced. With this explicit type, we copy // the values of zipped the input types rather than their references. using _ElementType = oneapi::dpl::__internal::__value_t<_InRng>; - _ElementType ele = __in_rng[__idx]; - bool mask = __gen_mask(std::forward<_InRng>(__in_rng), __idx); + _ElementType ele = __in_rng[__id]; + bool mask = __gen_mask(std::forward<_InRng>(__in_rng), __id); return std::tuple(mask ? _SizeType{1} : _SizeType{0}, mask, ele); } _GenMask __gen_mask; @@ -850,17 +850,17 @@ struct __get_zeroth_element } }; template -struct __write_to_idx_if +struct __write_to_id_if { template void - operator()(const _OutRng& __out_rng, _SizeType __idx, const ValueType& __v) const + operator()(const _OutRng& __out_rng, _SizeType __id, const ValueType& __v) const { // Use of an explicit cast to our internal tuple type is required to resolve conversion issues between our // internal tuple and std::tuple. If the underlying type is not a tuple, then the type will just be passed through. using _ConvertedTupleType = typename oneapi::dpl::__internal::__get_tuple_type(__v))>, - std::decay_t>::__type; + std::decay_t>::__type; if (std::get<1>(__v)) __assign(static_cast<_ConvertedTupleType>(std::get<2>(__v)), __out_rng[std::get<0>(__v) - 1 + __offset]); } @@ -1079,7 +1079,7 @@ __parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag, else if (oneapi::dpl::__par_backend_hetero::__is_gpu_with_sg_32(__exec)) { using _GenMask = oneapi::dpl::__par_backend_hetero::__gen_mask<_Pred>; - using _WriteOp = oneapi::dpl::__par_backend_hetero::__write_to_idx_if<0, _Assign>; + using _WriteOp = oneapi::dpl::__par_backend_hetero::__write_to_id_if<0, _Assign>; return __parallel_reduce_then_scan_copy(__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_InRng>(__in_rng), std::forward<_OutRng>(__out_rng), __n, diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h index 12b294964b5..44f562620f0 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h @@ -255,7 +255,7 @@ template struct __gen_expand_count_mask; template -struct __write_to_idx_if; +struct __write_to_id_if; template struct __early_exit_find_or; @@ -289,7 +289,7 @@ struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__par_backen }; template -struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__par_backend_hetero::__write_to_idx_if, __offset, +struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__par_backend_hetero::__write_to_id_if, __offset, Assign)> : oneapi::dpl::__internal::__are_all_device_copyable { diff --git a/test/general/implementation_details/device_copyable.pass.cpp b/test/general/implementation_details/device_copyable.pass.cpp index 7c51200e9e8..00e84cea005 100644 --- a/test/general/implementation_details/device_copyable.pass.cpp +++ b/test/general/implementation_details/device_copyable.pass.cpp @@ -164,10 +164,10 @@ test_device_copyable() oneapi::dpl::__par_backend_hetero::__gen_mask>>, "__gen_expand_count_mask is not device copyable with device copyable types"); - //__write_to_idx_if + //__write_to_id_if static_assert( - sycl::is_device_copyable_v>, - "__write_to_idx_if is not device copyable with device copyable types"); + sycl::is_device_copyable_v>, + "__write_to_id_if is not device copyable with device copyable types"); // __early_exit_find_or static_assert( @@ -386,10 +386,10 @@ test_non_device_copyable() oneapi::dpl::__par_backend_hetero::__gen_mask>>, "__gen_expand_count_mask is device copyable with non device copyable types"); - //__write_to_idx_if + //__write_to_id_if static_assert(!sycl::is_device_copyable_v< - oneapi::dpl::__par_backend_hetero::__write_to_idx_if<0, assign_non_device_copyable>>, - "__write_to_idx_if is device copyable with non device copyable types"); + oneapi::dpl::__par_backend_hetero::__write_to_id_if<0, assign_non_device_copyable>>, + "__write_to_id_if is device copyable with non device copyable types"); // __early_exit_find_or static_assert( From 303c5cf4cc60fdf20b42bd888d53867ed2be12af Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Thu, 22 Aug 2024 15:46:20 -0400 Subject: [PATCH 12/14] address review feedback Signed-off-by: Dan Hoeflinger --- .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 10 +++++----- include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index ec1b5b230ca..22b2ced1c14 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -831,7 +831,7 @@ struct __gen_expand_count_mask { // Explicitly creating this element type is necessary to avoid modifying the input data when _InRng is a // zip_iterator which will return a tuple of references when dereferenced. With this explicit type, we copy - // the values of zipped the input types rather than their references. + // the values of zipped input types rather than their references. using _ElementType = oneapi::dpl::__internal::__value_t<_InRng>; _ElementType ele = __in_rng[__id]; bool mask = __gen_mask(std::forward<_InRng>(__in_rng), __id); @@ -849,12 +849,12 @@ struct __get_zeroth_element return std::get<0>(std::forward<_Tp>(__a)); } }; -template +template struct __write_to_id_if { - template + template void - operator()(const _OutRng& __out_rng, _SizeType __id, const ValueType& __v) const + operator()(const _OutRng& __out_rng, _SizeType __id, const _ValueType& __v) const { // Use of an explicit cast to our internal tuple type is required to resolve conversion issues between our // internal tuple and std::tuple. If the underlying type is not a tuple, then the type will just be passed through. @@ -864,7 +864,7 @@ struct __write_to_id_if if (std::get<1>(__v)) __assign(static_cast<_ConvertedTupleType>(std::get<2>(__v)), __out_rng[std::get<0>(__v) - 1 + __offset]); } - Assign __assign; + _Assign __assign; }; template struct __gen_expand_count_mask; -template +template struct __write_to_id_if; template @@ -288,10 +288,10 @@ struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__par_backen { }; -template +template struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__par_backend_hetero::__write_to_id_if, __offset, - Assign)> - : oneapi::dpl::__internal::__are_all_device_copyable + _Assign)> + : oneapi::dpl::__internal::__are_all_device_copyable<_Assign> { }; From 75c55c46a6c66597e5548848f61638dcc4f35dc6 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Tue, 27 Aug 2024 13:47:30 -0400 Subject: [PATCH 13/14] fixing ouput range types Signed-off-by: Dan Hoeflinger --- include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 22b2ced1c14..3aed84386ec 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -854,7 +854,7 @@ struct __write_to_id_if { template void - operator()(const _OutRng& __out_rng, _SizeType __id, const _ValueType& __v) const + operator()(_OutRng& __out_rng, _SizeType __id, const _ValueType& __v) const { // Use of an explicit cast to our internal tuple type is required to resolve conversion issues between our // internal tuple and std::tuple. If the underlying type is not a tuple, then the type will just be passed through. From 19185180fac14cdd4725e4993cdb62c3c90b00fa Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Fri, 30 Aug 2024 13:51:17 -0400 Subject: [PATCH 14/14] clang-format Signed-off-by: Dan Hoeflinger --- .../oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 2 +- .../general/implementation_details/device_copyable.pass.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 3aed84386ec..f606cd69e9b 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -1090,7 +1090,7 @@ __parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag, using _ReduceOp = std::plus<_Size>; using _CreateOp = unseq_backend::__create_mask<_Pred, _Size>; using _CopyOp = unseq_backend::__copy_by_mask<_ReduceOp, _Assign, - /*inclusive*/ std::true_type, 1>; + /*inclusive*/ std::true_type, 1>; return __parallel_scan_copy(__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_InRng>(__in_rng), std::forward<_OutRng>(__out_rng), __n, diff --git a/test/general/implementation_details/device_copyable.pass.cpp b/test/general/implementation_details/device_copyable.pass.cpp index 00e84cea005..2720c87d879 100644 --- a/test/general/implementation_details/device_copyable.pass.cpp +++ b/test/general/implementation_details/device_copyable.pass.cpp @@ -387,9 +387,9 @@ test_non_device_copyable() "__gen_expand_count_mask is device copyable with non device copyable types"); //__write_to_id_if - static_assert(!sycl::is_device_copyable_v< - oneapi::dpl::__par_backend_hetero::__write_to_id_if<0, assign_non_device_copyable>>, - "__write_to_id_if is device copyable with non device copyable types"); + static_assert( + !sycl::is_device_copyable_v>, + "__write_to_id_if is device copyable with non device copyable types"); // __early_exit_find_or static_assert(