From 5c9668fd2e7fd9733337abb3939e711fa8371ac4 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Mon, 5 Aug 2024 13:40:10 -0400 Subject: [PATCH 1/9] Revert "remove partition pattern family from reduce_then_scan" This reverts commit 3d69c3c2c2417a9e9583c045c4a3000139c25ac2. --- .../dpl/pstl/hetero/algorithm_impl_hetero.h | 25 ++++++---- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 46 +++++++++++++++++++ .../dpl/pstl/hetero/dpcpp/sycl_traits.h | 10 ++++ .../device_copyable.pass.cpp | 10 ++++ 4 files changed, 81 insertions(+), 10 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 9fc492006c8..a2805387be7 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -952,19 +952,24 @@ __pattern_partition_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __e return ::std::make_pair(__result1, __result2); using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type; - using _ReduceOp = ::std::plus<_It1DifferenceType>; - unseq_backend::__create_mask<_UnaryPredicate, _It1DifferenceType> __create_mask_op{__pred}; - unseq_backend::__partition_by_mask<_ReduceOp, /*inclusive*/ ::std::true_type> __copy_by_mask_op{_ReduceOp{}}; + _It1DifferenceType __n = __last - __first; - auto __result = __pattern_scan_copy( - __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - __par_backend_hetero::zip( - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__result1), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__result2)), - __create_mask_op, __copy_by_mask_op); + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); + auto __buf1 = __keep1(__first, __last); + + auto __zipped_res = __par_backend_hetero::zip( + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__result1), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__result2)); + + auto __keep2 = + oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, decltype(__zipped_res)>(); + auto __buf2 = __keep2(__zipped_res, __zipped_res + __n); + + auto __result = oneapi::dpl::__par_backend_hetero::__parallel_partition_copy( + _BackendTag{}, std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), __buf2.all_view(), __pred); - return ::std::make_pair(__result1 + __result.second, __result2 + (__last - __first - __result.second)); + return std::make_pair(__result1 + __result.get(), __result2 + (__last - __first - __result.get())); } //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index f606cd69e9b..f9ca0414b7e 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -867,6 +867,25 @@ struct __write_to_id_if _Assign __assign; }; +template +struct __write_to_idx_if_else +{ + template + void + operator()(_OutRng&& __out_rng, _SizeType __idx, const ValueType& __v) const + { + using _ConvertedTupleType = + typename oneapi::dpl::__internal::__get_tuple_type(__v))>, + std::decay_t>::__type; + if (std::get<1>(__v)) + __assign(static_cast<_ConvertedTupleType>(std::get<2>(__v)), std::get<0>(__out_rng[std::get<0>(__v) - 1])); + else + __assign(static_cast<_ConvertedTupleType>(std::get<2>(__v)), + std::get<1>(__out_rng[__idx - std::get<0>(__v)])); + } + Assign __assign; +}; + template auto @@ -1045,6 +1064,33 @@ __parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag __copy_by_mask_op); } +template +auto +__parallel_partition_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _Range1&& __rng, _Range2&& __result, _UnaryPredicate __pred) +{ + auto __n = __rng.size(); + if (oneapi::dpl::__par_backend_hetero::__prefer_reduce_then_scan(__exec)) + { + using _GenMask = oneapi::dpl::__par_backend_hetero::__gen_mask<_UnaryPredicate>; + using _WriteOp = + oneapi::dpl::__par_backend_hetero::__write_to_idx_if_else; + + return __parallel_reduce_then_scan_copy(__backend_tag, std::forward<_ExecutionPolicy>(__exec), + std::forward<_Range1>(__rng), std::forward<_Range2>(__result), __n, + _GenMask{__pred}, _WriteOp{}); + } + else + { + using _ReduceOp = std::plus; + using _CreateOp = unseq_backend::__create_mask<_UnaryPredicate, decltype(__n)>; + using _CopyOp = unseq_backend::__partition_by_mask<_ReduceOp, /*inclusive*/ std::true_type>; + + return __parallel_scan_copy(__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__rng), + std::forward<_Range2>(__result), __n, _CreateOp{__pred}, _CopyOp{_ReduceOp{}}); + } +} + template auto diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h index d7ec0189b45..7e09be021fb 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h @@ -257,6 +257,9 @@ struct __gen_expand_count_mask; template struct __write_to_id_if; +template +struct __write_to_idx_if_else; + template struct __early_exit_find_or; @@ -295,6 +298,13 @@ struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__par_backen { }; +template +struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__par_backend_hetero::__write_to_idx_if_else, + Assign)> + : oneapi::dpl::__internal::__are_all_device_copyable +{ +}; + template struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__par_backend_hetero::__early_exit_find_or, _ExecutionPolicy, _Pred)> diff --git a/test/general/implementation_details/device_copyable.pass.cpp b/test/general/implementation_details/device_copyable.pass.cpp index 2720c87d879..41428356610 100644 --- a/test/general/implementation_details/device_copyable.pass.cpp +++ b/test/general/implementation_details/device_copyable.pass.cpp @@ -169,6 +169,11 @@ test_device_copyable() sycl::is_device_copyable_v>, "__write_to_id_if is not device copyable with device copyable types"); + //__write_to_idx_if_else + static_assert( + sycl::is_device_copyable_v>, + "__write_to_idx_if_else is not device copyable with device copyable types"); + // __early_exit_find_or static_assert( sycl::is_device_copyable_v< @@ -391,6 +396,11 @@ test_non_device_copyable() !sycl::is_device_copyable_v>, "__write_to_id_if is device copyable with non device copyable types"); + //__write_to_idx_if_else + static_assert(!sycl::is_device_copyable_v< + oneapi::dpl::__par_backend_hetero::__write_to_idx_if_else>, + "__write_to_idx_if_else is device copyable with non device copyable types"); + // __early_exit_find_or static_assert( !sycl::is_device_copyable_v Date: Wed, 7 Aug 2024 15:26:14 -0400 Subject: [PATCH 2/9] clean up to be more clear and not call `get()` twice Signed-off-by: Dan Hoeflinger --- include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index a2805387be7..c7425a3c5b2 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -969,7 +969,9 @@ __pattern_partition_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __e auto __result = oneapi::dpl::__par_backend_hetero::__parallel_partition_copy( _BackendTag{}, std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), __buf2.all_view(), __pred); - return std::make_pair(__result1 + __result.get(), __result2 + (__last - __first - __result.get())); + _It1DifferenceType __num_true = __result.get(); // blocking call + + return std::make_pair(__result1 + __num_true, __result2 + (__last - __first - __num_true)); } //------------------------------------------------------------------------ From acf4260fa6eb7028b5a4f4f5b082640e3591a104 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Wed, 7 Aug 2024 15:37:17 -0400 Subject: [PATCH 3/9] removing unused forwarding references Signed-off-by: Dan Hoeflinger --- include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index f9ca0414b7e..ae64070d896 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -872,7 +872,7 @@ struct __write_to_idx_if_else { template void - operator()(_OutRng&& __out_rng, _SizeType __idx, const ValueType& __v) const + operator()(const _OutRng& __out_rng, _SizeType __idx, const ValueType& __v) const { using _ConvertedTupleType = typename oneapi::dpl::__internal::__get_tuple_type(__v))>, From 4b3929335d6ba1acf7a57960b1c3c96bbc9f55da Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Wed, 7 Aug 2024 16:37:18 -0400 Subject: [PATCH 4/9] remove unnecessary default Signed-off-by: Dan Hoeflinger --- include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index ae64070d896..a36d6cb266b 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -867,7 +867,7 @@ struct __write_to_id_if _Assign __assign; }; -template +template struct __write_to_idx_if_else { template From a48ac4a385a2fb1154ba9c87343b088fa11a843d Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Thu, 15 Aug 2024 16:46:07 -0400 Subject: [PATCH 5/9] fix naming of __prefer_reduce_then_scan -> __is_gpu_with_sg_32 Signed-off-by: Dan Hoeflinger --- include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index a36d6cb266b..b65d0d5fc88 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -1070,7 +1070,7 @@ __parallel_partition_copy(oneapi::dpl::__internal::__device_backend_tag __backen _Range1&& __rng, _Range2&& __result, _UnaryPredicate __pred) { auto __n = __rng.size(); - if (oneapi::dpl::__par_backend_hetero::__prefer_reduce_then_scan(__exec)) + if (oneapi::dpl::__par_backend_hetero::__is_gpu_with_sg_32(__exec)) { using _GenMask = oneapi::dpl::__par_backend_hetero::__gen_mask<_UnaryPredicate>; using _WriteOp = From fd3c2e8302dcc7ae3e22e0f0db061b311340143e Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Wed, 21 Aug 2024 14:28:44 -0400 Subject: [PATCH 6/9] explicit typing of range size Signed-off-by: Dan Hoeflinger --- include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index b65d0d5fc88..dacb91691a2 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -1069,7 +1069,7 @@ auto __parallel_partition_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, _Range1&& __rng, _Range2&& __result, _UnaryPredicate __pred) { - auto __n = __rng.size(); + oneapi::dpl::__internal::__difference_t<_Range1> __n = __rng.size(); if (oneapi::dpl::__par_backend_hetero::__is_gpu_with_sg_32(__exec)) { using _GenMask = oneapi::dpl::__par_backend_hetero::__gen_mask<_UnaryPredicate>; From a7c4835fa3a2e7af340ea57fb51936956f4cad57 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Thu, 22 Aug 2024 08:47:16 -0400 Subject: [PATCH 7/9] idx -> id Signed-off-by: Dan Hoeflinger --- .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 10 +++++----- include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h | 4 ++-- .../implementation_details/device_copyable.pass.cpp | 12 ++++++------ 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index dacb91691a2..68dbd12bca7 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -868,20 +868,20 @@ struct __write_to_id_if }; template -struct __write_to_idx_if_else +struct __write_to_id_if_else { template void - operator()(const _OutRng& __out_rng, _SizeType __idx, const ValueType& __v) const + operator()(const _OutRng& __out_rng, _SizeType __id, const ValueType& __v) const { using _ConvertedTupleType = typename oneapi::dpl::__internal::__get_tuple_type(__v))>, - std::decay_t>::__type; + std::decay_t>::__type; if (std::get<1>(__v)) __assign(static_cast<_ConvertedTupleType>(std::get<2>(__v)), std::get<0>(__out_rng[std::get<0>(__v) - 1])); else __assign(static_cast<_ConvertedTupleType>(std::get<2>(__v)), - std::get<1>(__out_rng[__idx - std::get<0>(__v)])); + std::get<1>(__out_rng[__id - std::get<0>(__v)])); } Assign __assign; }; @@ -1074,7 +1074,7 @@ __parallel_partition_copy(oneapi::dpl::__internal::__device_backend_tag __backen { using _GenMask = oneapi::dpl::__par_backend_hetero::__gen_mask<_UnaryPredicate>; using _WriteOp = - oneapi::dpl::__par_backend_hetero::__write_to_idx_if_else; + oneapi::dpl::__par_backend_hetero::__write_to_id_if_else; return __parallel_reduce_then_scan_copy(__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__rng), std::forward<_Range2>(__result), __n, diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h index 7e09be021fb..56be7cb94ec 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h @@ -258,7 +258,7 @@ template struct __write_to_id_if; template -struct __write_to_idx_if_else; +struct __write_to_id_if_else; template struct __early_exit_find_or; @@ -299,7 +299,7 @@ struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__par_backen }; template -struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__par_backend_hetero::__write_to_idx_if_else, +struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__par_backend_hetero::__write_to_id_if_else, Assign)> : oneapi::dpl::__internal::__are_all_device_copyable { diff --git a/test/general/implementation_details/device_copyable.pass.cpp b/test/general/implementation_details/device_copyable.pass.cpp index 41428356610..548a915b78c 100644 --- a/test/general/implementation_details/device_copyable.pass.cpp +++ b/test/general/implementation_details/device_copyable.pass.cpp @@ -169,10 +169,10 @@ test_device_copyable() sycl::is_device_copyable_v>, "__write_to_id_if is not device copyable with device copyable types"); - //__write_to_idx_if_else + //__write_to_id_if_else static_assert( - sycl::is_device_copyable_v>, - "__write_to_idx_if_else is not device copyable with device copyable types"); + sycl::is_device_copyable_v>, + "__write_to_id_if_else is not device copyable with device copyable types"); // __early_exit_find_or static_assert( @@ -396,10 +396,10 @@ test_non_device_copyable() !sycl::is_device_copyable_v>, "__write_to_id_if is device copyable with non device copyable types"); - //__write_to_idx_if_else + //__write_to_id_if_else static_assert(!sycl::is_device_copyable_v< - oneapi::dpl::__par_backend_hetero::__write_to_idx_if_else>, - "__write_to_idx_if_else is device copyable with non device copyable types"); + oneapi::dpl::__par_backend_hetero::__write_to_id_if_else>, + "__write_to_id_if_else is device copyable with non device copyable types"); // __early_exit_find_or static_assert( From 191201a19902c642dd365c7b5f1a1ba569f5b48f Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Thu, 22 Aug 2024 15:57:25 -0400 Subject: [PATCH 8/9] address review feedback Signed-off-by: Dan Hoeflinger --- .../oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 8 ++++---- include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 68dbd12bca7..9b8fa96c078 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -867,12 +867,12 @@ struct __write_to_id_if _Assign __assign; }; -template +template struct __write_to_id_if_else { - template + template void - operator()(const _OutRng& __out_rng, _SizeType __id, const ValueType& __v) const + operator()(const _OutRng& __out_rng, _SizeType __id, const _ValueType& __v) const { using _ConvertedTupleType = typename oneapi::dpl::__internal::__get_tuple_type(__v))>, @@ -883,7 +883,7 @@ struct __write_to_id_if_else __assign(static_cast<_ConvertedTupleType>(std::get<2>(__v)), std::get<1>(__out_rng[__id - std::get<0>(__v)])); } - Assign __assign; + _Assign __assign; }; template struct __write_to_id_if; -template +template struct __write_to_id_if_else; template @@ -298,10 +298,10 @@ struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__par_backen { }; -template +template struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__par_backend_hetero::__write_to_id_if_else, - Assign)> - : oneapi::dpl::__internal::__are_all_device_copyable + _Assign)> + : oneapi::dpl::__internal::__are_all_device_copyable<_Assign> { }; From bd6aba60ce47a5a1efab6207bf44448203ccf6f6 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Tue, 27 Aug 2024 13:51:32 -0400 Subject: [PATCH 9/9] fixing ouput range type Signed-off-by: Dan Hoeflinger --- include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 9b8fa96c078..374a1df2767 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -872,7 +872,7 @@ struct __write_to_id_if_else { template void - operator()(const _OutRng& __out_rng, _SizeType __id, const _ValueType& __v) const + operator()(_OutRng& __out_rng, _SizeType __id, const _ValueType& __v) const { using _ConvertedTupleType = typename oneapi::dpl::__internal::__get_tuple_type(__v))>,