Skip to content

Commit

Permalink
Refactor strings column factories (#7397)
Browse files Browse the repository at this point in the history
This PR refactors strings column factories to eliminate the use of `device_vector` and `std::vector` parameters, and to facility more use of `device_uvector` in calls to the factories. This is a small part of #7287 . Multiple versions of `make_strings_columns` take `device_vector` parameters. This PR expands the use of iterator and `device_span` versions to enable switching to `device_uvector` as described in #7287.  It also adds new `make_device_uvector_async/sync` utility functions.

This will help facilitate safe CUDA stream usage.

Authors:
  - Mark Harris (@harrism)

Approvers:
  - AJ Schmidt (@ajschmidt8)
  - Jake Hemstad (@jrhemstad)
  - David (@davidwendt)
  - Christopher Harris (@cwharris)

URL: #7397
  • Loading branch information
harrism authored Mar 4, 2021
1 parent 4d0c160 commit e5d0ec9
Show file tree
Hide file tree
Showing 40 changed files with 564 additions and 404 deletions.
1 change: 1 addition & 0 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ test:
- test -f $PREFIX/include/cudf/detail/utilities/alignment.hpp
- test -f $PREFIX/include/cudf/detail/utilities/integer_utils.hpp
- test -f $PREFIX/include/cudf/detail/utilities/int_fastdiv.h
- test -f $PREFIX/include/cudf/detail/utilities/vector_factories.hpp
- test -f $PREFIX/include/cudf/dictionary/detail/concatenate.hpp
- test -f $PREFIX/include/cudf/dictionary/detail/encode.hpp
- test -f $PREFIX/include/cudf/dictionary/detail/merge.hpp
Expand Down
7 changes: 6 additions & 1 deletion cpp/benchmarks/common/generate_benchmark_input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_buffer.hpp>
#include <rmm/device_vector.hpp>

#include <future>
#include <memory>
Expand Down Expand Up @@ -411,7 +412,11 @@ std::unique_ptr<cudf::column> create_random_column<cudf::string_view>(data_profi
row += std::max(run_len - 1, 0);
}
}
return cudf::make_strings_column(out_col.chars, out_col.offsets, out_col.null_mask);

rmm::device_vector<char> d_chars(out_col.chars);
rmm::device_vector<cudf::size_type> d_offsets(out_col.offsets);
rmm::device_vector<cudf::bitmask_type> d_null_mask(out_col.null_mask);
return cudf::make_strings_column(d_chars, d_offsets, d_null_mask);
}

template <>
Expand Down
3 changes: 1 addition & 2 deletions cpp/benchmarks/copying/shift_benchmark.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -23,7 +23,6 @@

#include <benchmark/benchmark.h>

#include <thrust/device_vector.h>
#include <thrust/execution_policy.h>
#include <thrust/functional.h>
#include <thrust/sequence.h>
Expand Down
187 changes: 65 additions & 122 deletions cpp/include/cudf/column/column_factories.hpp

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions cpp/include/cudf/detail/utilities/trie.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018-2020, NVIDIA CORPORATION.
* Copyright (c) 2018-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -30,7 +30,7 @@
#include <cuda_runtime.h>
#include <thrust/host_vector.h>

using cudf::detail::device_span;
using cudf::device_span;

static constexpr char trie_terminating_character = '\n';

Expand Down
236 changes: 236 additions & 0 deletions cpp/include/cudf/detail/utilities/vector_factories.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
* @brief Convenience factories for creating device vectors from host spans
* @file vector_factories.hpp
*/

#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>

namespace cudf {
namespace detail {

/**
* @brief Asynchronously construct a `device_uvector` containing a deep copy of data from a
* `host_span`
*
* @note This function does not synchronize `stream`.
*
* @tparam T The type of the data to copy
* @param source_data The host_span of data to deep copy
* @param stream The stream on which to allocate memory and perform the copy
* @param mr The memory resource to use for allocating the returned device_uvector
* @return A device_uvector containing the copied data
*/
template <typename T>
rmm::device_uvector<T> make_device_uvector_async(
host_span<T const> source_data,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
rmm::device_uvector<T> ret(source_data.size(), stream, mr);
CUDA_TRY(cudaMemcpyAsync(ret.data(),
source_data.data(),
source_data.size() * sizeof(T),
cudaMemcpyDefault,
stream.value()));
return ret;
}

/**
* @brief Asynchronously construct a `device_uvector` containing a deep copy of data from a host
* container
*
* @note This function does not synchronize `stream`.
*
* @tparam Container The type of the container to copy from
* @tparam T The type of the data to copy
* @param c The input host container from which to copy
* @param stream The stream on which to allocate memory and perform the copy
* @param mr The memory resource to use for allocating the returned device_uvector
* @return A device_uvector containing the copied data
*/
template <typename Container,
std::enable_if_t<
std::is_convertible<Container,
host_span<typename Container::value_type const>>::value>* = nullptr>
rmm::device_uvector<typename Container::value_type> make_device_uvector_async(
Container const& c,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
return make_device_uvector_async(host_span<typename Container::value_type const>{c}, stream, mr);
}

/**
* @brief Asynchronously construct a `device_uvector` containing a deep copy of data from a
* `device_span`
*
* @note This function does not synchronize `stream`.
*
* @tparam T The type of the data to copy
* @param source_data The device_span of data to deep copy
* @param stream The stream on which to allocate memory and perform the copy
* @param mr The memory resource to use for allocating the returned device_uvector
* @return A device_uvector containing the copied data
*/
template <typename T>
rmm::device_uvector<T> make_device_uvector_async(
device_span<T const> source_data,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
rmm::device_uvector<T> ret(source_data.size(), stream, mr);
CUDA_TRY(cudaMemcpyAsync(ret.data(),
source_data.data(),
source_data.size() * sizeof(T),
cudaMemcpyDefault,
stream.value()));
return ret;
}

/**
* @brief Asynchronously construct a `device_uvector` containing a deep copy of data from a device
* container
*
* @note This function does not synchronize `stream`.
*
* @tparam Container The type of the container to copy from
* @tparam T The type of the data to copy
* @param c The input device container from which to copy
* @param stream The stream on which to allocate memory and perform the copy
* @param mr The memory resource to use for allocating the returned device_uvector
* @return A device_uvector containing the copied data
*/
template <
typename Container,
std::enable_if_t<
std::is_convertible<Container, device_span<typename Container::value_type const>>::value>* =
nullptr>
rmm::device_uvector<typename Container::value_type> make_device_uvector_async(
Container const& c,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
return make_device_uvector_async(
device_span<typename Container::value_type const>{c}, stream, mr);
}

/**
* @brief Synchronously construct a `device_uvector` containing a deep copy of data from a
* `host_span`
*
* @note This function synchronizes `stream`.
*
* @tparam T The type of the data to copy
* @param source_data The host_span of data to deep copy
* @param stream The stream on which to allocate memory and perform the copy
* @param mr The memory resource to use for allocating the returned device_uvector
* @return A device_uvector containing the copied data
*/
template <typename T>
rmm::device_uvector<T> make_device_uvector_sync(
host_span<T const> source_data,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
auto ret = make_device_uvector_async(source_data, stream, mr);
stream.synchronize();
return ret;
}

/**
* @brief Synchronously construct a `device_uvector` containing a deep copy of data from a host
* container
*
* @note This function synchronizes `stream`.
*
* @tparam Container The type of the container to copy from
* @tparam T The type of the data to copy
* @param c The input host container from which to copy
* @param stream The stream on which to allocate memory and perform the copy
* @param mr The memory resource to use for allocating the returned device_uvector
* @return A device_uvector containing the copied data
*/
template <typename Container,
std::enable_if_t<
std::is_convertible<Container,
host_span<typename Container::value_type const>>::value>* = nullptr>
rmm::device_uvector<typename Container::value_type> make_device_uvector_sync(
Container const& c,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
return make_device_uvector_sync(host_span<typename Container::value_type const>{c}, stream, mr);
}

/**
* @brief Synchronously construct a `device_uvector` containing a deep copy of data from a
* `device_span`
*
* @note This function synchronizes `stream`.
*
* @tparam T The type of the data to copy
* @param source_data The device_span of data to deep copy
* @param stream The stream on which to allocate memory and perform the copy
* @param mr The memory resource to use for allocating the returned device_uvector
* @return A device_uvector containing the copied data
*/
template <typename T>
rmm::device_uvector<T> make_device_uvector_sync(
device_span<T const> source_data,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
auto ret = make_device_uvector_async(source_data, stream, mr);
stream.synchronize();
return ret;
}

/**
* @brief Synchronously construct a `device_uvector` containing a deep copy of data from a device
* container
*
* @note This function synchronizes `stream`.
*
* @tparam Container The type of the container to copy from
* @tparam T The type of the data to copy
* @param c The input device container from which to copy
* @param stream The stream on which to allocate memory and perform the copy
* @param mr The memory resource to use for allocating the returned device_uvector
* @return A device_uvector containing the copied data
*/
template <
typename Container,
std::enable_if_t<
std::is_convertible<Container, device_span<typename Container::value_type const>>::value>* =
nullptr>
rmm::device_uvector<typename Container::value_type> make_device_uvector_sync(
Container const& c,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
return make_device_uvector_sync(device_span<typename Container::value_type const>{c}, stream, mr);
}

} // namespace detail

} // namespace cudf
3 changes: 1 addition & 2 deletions cpp/include/cudf/strings/detail/gather.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,7 @@ std::unique_ptr<cudf::column> gather(
auto const d_out_chars = out_chars_column->mutable_view().template data<char>();

// fill in chars
cudf::detail::device_span<int32_t const> const d_out_offsets_span(d_out_offsets,
output_count + 1);
cudf::device_span<int32_t const> const d_out_offsets_span(d_out_offsets, output_count + 1);
auto const d_in_chars = (strings_count > 0) ? strings.chars().data<char>() : nullptr;
auto gather_chars_fn =
[d_out_offsets_span, begin, d_in_offsets, d_in_chars] __device__(size_type out_char_idx) {
Expand Down
Loading

0 comments on commit e5d0ec9

Please sign in to comment.