Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor strings column factories #7397

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
7cfdbd5
Initial attempt at using span/iterators in make_strings_columns (WIP)
harrism Feb 16, 2021
a3a1cfa
Fix zip iterator
harrism Feb 17, 2021
99eea9e
Convert chars, offsets, nulls make_strings_column to iterators
harrism Feb 17, 2021
189fdf5
use device_span version of make_strings_column in scan.cu
harrism Feb 17, 2021
01a02e2
Add another device_span version of make_strings_column
harrism Feb 17, 2021
1b8b200
Use device_span version of make_strings_column in CSV reader_impl.cu
harrism Feb 17, 2021
b33ca20
make_strings_column from std::vector uses spans internally
harrism Feb 23, 2021
f43aeb1
Clean up binops/scan spans.
harrism Feb 23, 2021
f5ecc6e
Merge branch 'branch-0.19' into fea-refactor-strings-factories
harrism Feb 23, 2021
61604d1
Remove errant std::cout
harrism Feb 23, 2021
43a3192
Make span classes part of public interface and refactor strings colum…
harrism Feb 23, 2021
bc55428
Merge branch 'branch-0.19' into fea-refactor-strings-factories
harrism Feb 23, 2021
9962b9b
Add make_device_uvector_* utilities
harrism Feb 24, 2021
75937af
Eliminate std::vector version of make_strings_column factory
harrism Feb 24, 2021
76e67a6
Add vector_factories.hpp to meta.yaml
harrism Feb 24, 2021
164a6b2
Don't put return type in doxygen @return
harrism Feb 24, 2021
1299e01
Remove unnecessary includes and add CUDF_FUNC_RANGE
harrism Feb 24, 2021
f940ee5
Convert strings extract vector to uvector
harrism Feb 24, 2021
7ec1d44
strings findall device_uvector to device_vector
harrism Feb 24, 2021
b9d6972
strings partition device_vector -> uvector
harrism Feb 25, 2021
55d2e4e
device_vector ->uvector in strings split / split_record
harrism Feb 25, 2021
ed0fd9d
device_vector -> uvector in tokenize
harrism Feb 25, 2021
5dc8e4a
Merge branch 'branch-0.19' into fea-refactor-strings-factories
harrism Mar 2, 2021
3fad943
Clean up vector_factories.hpp
harrism Mar 2, 2021
aeff8d2
Add missing const
harrism Mar 2, 2021
0cd009e
Add sync
harrism Mar 2, 2021
71db9af
Docs cleanup
harrism Mar 2, 2021
6596426
Remove unnecessary syncs in test
harrism Mar 2, 2021
4a4c8fc
Copyrights
harrism Mar 3, 2021
8ec27bf
Remove blank line
harrism Mar 3, 2021
8b132c4
Merge branch 'branch-0.19' into fea-refactor-strings-factories
harrism Mar 3, 2021
fb3c023
Fix detail::span
harrism Mar 4, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ test:
- test -f $PREFIX/include/cudf/detail/utilities/alignment.hpp
- test -f $PREFIX/include/cudf/detail/utilities/integer_utils.hpp
- test -f $PREFIX/include/cudf/detail/utilities/int_fastdiv.h
- test -f $PREFIX/include/cudf/detail/utilities/vector_factories.hpp
- test -f $PREFIX/include/cudf/dictionary/detail/concatenate.hpp
- test -f $PREFIX/include/cudf/dictionary/detail/encode.hpp
- test -f $PREFIX/include/cudf/dictionary/detail/merge.hpp
Expand Down
7 changes: 6 additions & 1 deletion cpp/benchmarks/common/generate_benchmark_input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_buffer.hpp>
#include <rmm/device_vector.hpp>

#include <future>
#include <memory>
Expand Down Expand Up @@ -411,7 +412,11 @@ std::unique_ptr<cudf::column> create_random_column<cudf::string_view>(data_profi
row += std::max(run_len - 1, 0);
}
}
return cudf::make_strings_column(out_col.chars, out_col.offsets, out_col.null_mask);

rmm::device_vector<char> d_chars(out_col.chars);
rmm::device_vector<cudf::size_type> d_offsets(out_col.offsets);
rmm::device_vector<cudf::bitmask_type> d_null_mask(out_col.null_mask);
return cudf::make_strings_column(d_chars, d_offsets, d_null_mask);
}

template <>
Expand Down
3 changes: 1 addition & 2 deletions cpp/benchmarks/copying/shift_benchmark.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -23,7 +23,6 @@

#include <benchmark/benchmark.h>

#include <thrust/device_vector.h>
#include <thrust/execution_policy.h>
#include <thrust/functional.h>
#include <thrust/sequence.h>
Expand Down
187 changes: 65 additions & 122 deletions cpp/include/cudf/column/column_factories.hpp

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions cpp/include/cudf/detail/utilities/trie.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018-2020, NVIDIA CORPORATION.
* Copyright (c) 2018-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -30,7 +30,7 @@
#include <cuda_runtime.h>
#include <thrust/host_vector.h>

using cudf::detail::device_span;
using cudf::device_span;

static constexpr char trie_terminating_character = '\n';

Expand Down
236 changes: 236 additions & 0 deletions cpp/include/cudf/detail/utilities/vector_factories.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
* @brief Convenience factories for creating device vectors from host spans
* @file vector_factories.hpp
*/

#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>

namespace cudf {
namespace detail {

/**
* @brief Asynchronously construct a `device_uvector` containing a deep copy of data from a
* `host_span`
*
* @note This function does not synchronize `stream`.
*
* @tparam T The type of the data to copy
* @param source_data The host_span of data to deep copy
* @param stream The stream on which to allocate memory and perform the copy
* @param mr The memory resource to use for allocating the returned device_uvector
* @return A device_uvector containing the copied data
*/
template <typename T>
rmm::device_uvector<T> make_device_uvector_async(
host_span<T const> source_data,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
rmm::device_uvector<T> ret(source_data.size(), stream, mr);
CUDA_TRY(cudaMemcpyAsync(ret.data(),
source_data.data(),
source_data.size() * sizeof(T),
cudaMemcpyDefault,
stream.value()));
return ret;
}

/**
* @brief Asynchronously construct a `device_uvector` containing a deep copy of data from a host
* container
*
* @note This function does not synchronize `stream`.
*
* @tparam Container The type of the container to copy from
* @tparam T The type of the data to copy
* @param c The input host container from which to copy
* @param stream The stream on which to allocate memory and perform the copy
* @param mr The memory resource to use for allocating the returned device_uvector
* @return A device_uvector containing the copied data
*/
template <typename Container,
std::enable_if_t<
std::is_convertible<Container,
host_span<typename Container::value_type const>>::value>* = nullptr>
rmm::device_uvector<typename Container::value_type> make_device_uvector_async(
Container const& c,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
return make_device_uvector_async(host_span<typename Container::value_type const>{c}, stream, mr);
}

/**
* @brief Asynchronously construct a `device_uvector` containing a deep copy of data from a
* `device_span`
*
* @note This function does not synchronize `stream`.
*
* @tparam T The type of the data to copy
* @param source_data The device_span of data to deep copy
* @param stream The stream on which to allocate memory and perform the copy
* @param mr The memory resource to use for allocating the returned device_uvector
* @return A device_uvector containing the copied data
*/
template <typename T>
rmm::device_uvector<T> make_device_uvector_async(
device_span<T const> source_data,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
rmm::device_uvector<T> ret(source_data.size(), stream, mr);
CUDA_TRY(cudaMemcpyAsync(ret.data(),
source_data.data(),
source_data.size() * sizeof(T),
cudaMemcpyDefault,
stream.value()));
return ret;
}

/**
* @brief Asynchronously construct a `device_uvector` containing a deep copy of data from a device
* container
*
* @note This function does not synchronize `stream`.
*
* @tparam Container The type of the container to copy from
* @tparam T The type of the data to copy
* @param c The input device container from which to copy
* @param stream The stream on which to allocate memory and perform the copy
* @param mr The memory resource to use for allocating the returned device_uvector
* @return A device_uvector containing the copied data
*/
template <
typename Container,
std::enable_if_t<
std::is_convertible<Container, device_span<typename Container::value_type const>>::value>* =
nullptr>
rmm::device_uvector<typename Container::value_type> make_device_uvector_async(
Container const& c,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
return make_device_uvector_async(
device_span<typename Container::value_type const>{c}, stream, mr);
}

/**
* @brief Synchronously construct a `device_uvector` containing a deep copy of data from a
* `host_span`
*
* @note This function synchronizes `stream`.
*
* @tparam T The type of the data to copy
* @param source_data The host_span of data to deep copy
* @param stream The stream on which to allocate memory and perform the copy
* @param mr The memory resource to use for allocating the returned device_uvector
* @return A device_uvector containing the copied data
*/
template <typename T>
rmm::device_uvector<T> make_device_uvector_sync(
host_span<T const> source_data,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
auto ret = make_device_uvector_async(source_data, stream, mr);
stream.synchronize();
return ret;
}

/**
* @brief Synchronously construct a `device_uvector` containing a deep copy of data from a host
* container
*
* @note This function synchronizes `stream`.
*
* @tparam Container The type of the container to copy from
* @tparam T The type of the data to copy
* @param c The input host container from which to copy
* @param stream The stream on which to allocate memory and perform the copy
* @param mr The memory resource to use for allocating the returned device_uvector
* @return A device_uvector containing the copied data
*/
template <typename Container,
std::enable_if_t<
std::is_convertible<Container,
host_span<typename Container::value_type const>>::value>* = nullptr>
rmm::device_uvector<typename Container::value_type> make_device_uvector_sync(
Container const& c,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
return make_device_uvector_sync(host_span<typename Container::value_type const>{c}, stream, mr);
}

/**
* @brief Synchronously construct a `device_uvector` containing a deep copy of data from a
* `device_span`
*
* @note This function synchronizes `stream`.
*
* @tparam T The type of the data to copy
* @param source_data The device_span of data to deep copy
* @param stream The stream on which to allocate memory and perform the copy
* @param mr The memory resource to use for allocating the returned device_uvector
* @return A device_uvector containing the copied data
*/
template <typename T>
rmm::device_uvector<T> make_device_uvector_sync(
device_span<T const> source_data,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
auto ret = make_device_uvector_async(source_data, stream, mr);
stream.synchronize();
return ret;
}

/**
* @brief Synchronously construct a `device_uvector` containing a deep copy of data from a device
* container
*
* @note This function synchronizes `stream`.
*
* @tparam Container The type of the container to copy from
* @tparam T The type of the data to copy
* @param c The input device container from which to copy
* @param stream The stream on which to allocate memory and perform the copy
* @param mr The memory resource to use for allocating the returned device_uvector
* @return A device_uvector containing the copied data
*/
template <
typename Container,
std::enable_if_t<
std::is_convertible<Container, device_span<typename Container::value_type const>>::value>* =
nullptr>
rmm::device_uvector<typename Container::value_type> make_device_uvector_sync(
Container const& c,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
return make_device_uvector_sync(device_span<typename Container::value_type const>{c}, stream, mr);
}

} // namespace detail

} // namespace cudf
3 changes: 1 addition & 2 deletions cpp/include/cudf/strings/detail/gather.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,7 @@ std::unique_ptr<cudf::column> gather(
auto const d_out_chars = out_chars_column->mutable_view().template data<char>();

// fill in chars
cudf::detail::device_span<int32_t const> const d_out_offsets_span(d_out_offsets,
output_count + 1);
cudf::device_span<int32_t const> const d_out_offsets_span(d_out_offsets, output_count + 1);
auto const d_in_chars = (strings_count > 0) ? strings.chars().data<char>() : nullptr;
auto gather_chars_fn =
[d_out_offsets_span, begin, d_in_offsets, d_in_chars] __device__(size_type out_char_idx) {
Expand Down
Loading