Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable use of cudaMemcpyAsync for thrust::copy #211

Merged
merged 1 commit into from
Jul 18, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,38 @@
#include <thrust/system/cuda/config.h>
#include <thrust/system/cuda/detail/execution_policy.h>
#include <thrust/system/cuda/detail/transform.h>
#include <thrust/system/cuda/detail/util.h>
#include <thrust/distance.h>
#include <thrust/functional.h>
#include <thrust/type_traits/is_trivially_relocatable.h>

THRUST_NAMESPACE_BEGIN
namespace cuda_cub {

namespace __copy {
template <class Derived,
class InputIt,
class OutputIt>
OutputIt THRUST_RUNTIME_FUNCTION
device_to_device(execution_policy<Derived>& policy,
InputIt first,
InputIt last,
OutputIt result,
thrust::detail::true_type)
{
typedef typename thrust::iterator_traits<InputIt>::value_type InputTy;
const auto n = thrust::distance(first, last);
if (n > 0) {
cudaError status;
status = trivial_copy_device_to_device(policy,
reinterpret_cast<InputTy*>(thrust::raw_pointer_cast(&*result)),
reinterpret_cast<InputTy const*>(thrust::raw_pointer_cast(&*first)),
n);
cuda_cub::throw_on_error(status, "__copy:: D->D: failed");
}

return result + n;
}

template <class Derived,
class InputIt,
Expand All @@ -47,16 +73,32 @@ namespace __copy {
device_to_device(execution_policy<Derived>& policy,
InputIt first,
InputIt last,
OutputIt result)
OutputIt result,
thrust::detail::false_type)
{
typedef typename thrust::iterator_traits<InputIt>::value_type InputTy;
return cuda_cub::transform(policy,
first,
last,
result,
thrust::identity<InputTy>());
}

template <class Derived,
class InputIt,
class OutputIt>
OutputIt THRUST_RUNTIME_FUNCTION
device_to_device(execution_policy<Derived>& policy,
InputIt first,
InputIt last,
OutputIt result)
{
return device_to_device(policy,
first,
last,
result,
thrust::identity<InputTy>());
typename is_indirectly_trivially_relocatable_to<InputIt, OutputIt>::type());
}

} // namespace __copy

} // namespace cuda_cub
Expand Down