-
Notifications
You must be signed in to change notification settings - Fork 908
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactor strings column factories (#7397)
This PR refactors strings column factories to eliminate the use of `device_vector` and `std::vector` parameters, and to facility more use of `device_uvector` in calls to the factories. This is a small part of #7287 . Multiple versions of `make_strings_columns` take `device_vector` parameters. This PR expands the use of iterator and `device_span` versions to enable switching to `device_uvector` as described in #7287. It also adds new `make_device_uvector_async/sync` utility functions. This will help facilitate safe CUDA stream usage. Authors: - Mark Harris (@harrism) Approvers: - AJ Schmidt (@ajschmidt8) - Jake Hemstad (@jrhemstad) - David (@davidwendt) - Christopher Harris (@cwharris) URL: #7397
- Loading branch information
Showing
40 changed files
with
564 additions
and
404 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,236 @@ | ||
/* | ||
* Copyright (c) 2021, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
/** | ||
* @brief Convenience factories for creating device vectors from host spans | ||
* @file vector_factories.hpp | ||
*/ | ||
|
||
#include <cudf/utilities/span.hpp> | ||
|
||
#include <rmm/cuda_stream_view.hpp> | ||
#include <rmm/device_uvector.hpp> | ||
#include <rmm/mr/device/device_memory_resource.hpp> | ||
|
||
namespace cudf { | ||
namespace detail { | ||
|
||
/** | ||
* @brief Asynchronously construct a `device_uvector` containing a deep copy of data from a | ||
* `host_span` | ||
* | ||
* @note This function does not synchronize `stream`. | ||
* | ||
* @tparam T The type of the data to copy | ||
* @param source_data The host_span of data to deep copy | ||
* @param stream The stream on which to allocate memory and perform the copy | ||
* @param mr The memory resource to use for allocating the returned device_uvector | ||
* @return A device_uvector containing the copied data | ||
*/ | ||
template <typename T> | ||
rmm::device_uvector<T> make_device_uvector_async( | ||
host_span<T const> source_data, | ||
rmm::cuda_stream_view stream = rmm::cuda_stream_default, | ||
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) | ||
{ | ||
rmm::device_uvector<T> ret(source_data.size(), stream, mr); | ||
CUDA_TRY(cudaMemcpyAsync(ret.data(), | ||
source_data.data(), | ||
source_data.size() * sizeof(T), | ||
cudaMemcpyDefault, | ||
stream.value())); | ||
return ret; | ||
} | ||
|
||
/** | ||
* @brief Asynchronously construct a `device_uvector` containing a deep copy of data from a host | ||
* container | ||
* | ||
* @note This function does not synchronize `stream`. | ||
* | ||
* @tparam Container The type of the container to copy from | ||
* @tparam T The type of the data to copy | ||
* @param c The input host container from which to copy | ||
* @param stream The stream on which to allocate memory and perform the copy | ||
* @param mr The memory resource to use for allocating the returned device_uvector | ||
* @return A device_uvector containing the copied data | ||
*/ | ||
template <typename Container, | ||
std::enable_if_t< | ||
std::is_convertible<Container, | ||
host_span<typename Container::value_type const>>::value>* = nullptr> | ||
rmm::device_uvector<typename Container::value_type> make_device_uvector_async( | ||
Container const& c, | ||
rmm::cuda_stream_view stream = rmm::cuda_stream_default, | ||
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) | ||
{ | ||
return make_device_uvector_async(host_span<typename Container::value_type const>{c}, stream, mr); | ||
} | ||
|
||
/** | ||
* @brief Asynchronously construct a `device_uvector` containing a deep copy of data from a | ||
* `device_span` | ||
* | ||
* @note This function does not synchronize `stream`. | ||
* | ||
* @tparam T The type of the data to copy | ||
* @param source_data The device_span of data to deep copy | ||
* @param stream The stream on which to allocate memory and perform the copy | ||
* @param mr The memory resource to use for allocating the returned device_uvector | ||
* @return A device_uvector containing the copied data | ||
*/ | ||
template <typename T> | ||
rmm::device_uvector<T> make_device_uvector_async( | ||
device_span<T const> source_data, | ||
rmm::cuda_stream_view stream = rmm::cuda_stream_default, | ||
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) | ||
{ | ||
rmm::device_uvector<T> ret(source_data.size(), stream, mr); | ||
CUDA_TRY(cudaMemcpyAsync(ret.data(), | ||
source_data.data(), | ||
source_data.size() * sizeof(T), | ||
cudaMemcpyDefault, | ||
stream.value())); | ||
return ret; | ||
} | ||
|
||
/** | ||
* @brief Asynchronously construct a `device_uvector` containing a deep copy of data from a device | ||
* container | ||
* | ||
* @note This function does not synchronize `stream`. | ||
* | ||
* @tparam Container The type of the container to copy from | ||
* @tparam T The type of the data to copy | ||
* @param c The input device container from which to copy | ||
* @param stream The stream on which to allocate memory and perform the copy | ||
* @param mr The memory resource to use for allocating the returned device_uvector | ||
* @return A device_uvector containing the copied data | ||
*/ | ||
template < | ||
typename Container, | ||
std::enable_if_t< | ||
std::is_convertible<Container, device_span<typename Container::value_type const>>::value>* = | ||
nullptr> | ||
rmm::device_uvector<typename Container::value_type> make_device_uvector_async( | ||
Container const& c, | ||
rmm::cuda_stream_view stream = rmm::cuda_stream_default, | ||
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) | ||
{ | ||
return make_device_uvector_async( | ||
device_span<typename Container::value_type const>{c}, stream, mr); | ||
} | ||
|
||
/** | ||
* @brief Synchronously construct a `device_uvector` containing a deep copy of data from a | ||
* `host_span` | ||
* | ||
* @note This function synchronizes `stream`. | ||
* | ||
* @tparam T The type of the data to copy | ||
* @param source_data The host_span of data to deep copy | ||
* @param stream The stream on which to allocate memory and perform the copy | ||
* @param mr The memory resource to use for allocating the returned device_uvector | ||
* @return A device_uvector containing the copied data | ||
*/ | ||
template <typename T> | ||
rmm::device_uvector<T> make_device_uvector_sync( | ||
host_span<T const> source_data, | ||
rmm::cuda_stream_view stream = rmm::cuda_stream_default, | ||
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) | ||
{ | ||
auto ret = make_device_uvector_async(source_data, stream, mr); | ||
stream.synchronize(); | ||
return ret; | ||
} | ||
|
||
/** | ||
* @brief Synchronously construct a `device_uvector` containing a deep copy of data from a host | ||
* container | ||
* | ||
* @note This function synchronizes `stream`. | ||
* | ||
* @tparam Container The type of the container to copy from | ||
* @tparam T The type of the data to copy | ||
* @param c The input host container from which to copy | ||
* @param stream The stream on which to allocate memory and perform the copy | ||
* @param mr The memory resource to use for allocating the returned device_uvector | ||
* @return A device_uvector containing the copied data | ||
*/ | ||
template <typename Container, | ||
std::enable_if_t< | ||
std::is_convertible<Container, | ||
host_span<typename Container::value_type const>>::value>* = nullptr> | ||
rmm::device_uvector<typename Container::value_type> make_device_uvector_sync( | ||
Container const& c, | ||
rmm::cuda_stream_view stream = rmm::cuda_stream_default, | ||
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) | ||
{ | ||
return make_device_uvector_sync(host_span<typename Container::value_type const>{c}, stream, mr); | ||
} | ||
|
||
/** | ||
* @brief Synchronously construct a `device_uvector` containing a deep copy of data from a | ||
* `device_span` | ||
* | ||
* @note This function synchronizes `stream`. | ||
* | ||
* @tparam T The type of the data to copy | ||
* @param source_data The device_span of data to deep copy | ||
* @param stream The stream on which to allocate memory and perform the copy | ||
* @param mr The memory resource to use for allocating the returned device_uvector | ||
* @return A device_uvector containing the copied data | ||
*/ | ||
template <typename T> | ||
rmm::device_uvector<T> make_device_uvector_sync( | ||
device_span<T const> source_data, | ||
rmm::cuda_stream_view stream = rmm::cuda_stream_default, | ||
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) | ||
{ | ||
auto ret = make_device_uvector_async(source_data, stream, mr); | ||
stream.synchronize(); | ||
return ret; | ||
} | ||
|
||
/** | ||
* @brief Synchronously construct a `device_uvector` containing a deep copy of data from a device | ||
* container | ||
* | ||
* @note This function synchronizes `stream`. | ||
* | ||
* @tparam Container The type of the container to copy from | ||
* @tparam T The type of the data to copy | ||
* @param c The input device container from which to copy | ||
* @param stream The stream on which to allocate memory and perform the copy | ||
* @param mr The memory resource to use for allocating the returned device_uvector | ||
* @return A device_uvector containing the copied data | ||
*/ | ||
template < | ||
typename Container, | ||
std::enable_if_t< | ||
std::is_convertible<Container, device_span<typename Container::value_type const>>::value>* = | ||
nullptr> | ||
rmm::device_uvector<typename Container::value_type> make_device_uvector_sync( | ||
Container const& c, | ||
rmm::cuda_stream_view stream = rmm::cuda_stream_default, | ||
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) | ||
{ | ||
return make_device_uvector_sync(device_span<typename Container::value_type const>{c}, stream, mr); | ||
} | ||
|
||
} // namespace detail | ||
|
||
} // namespace cudf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.