Skip to content

Commit

Permalink
Merge branch 'branch-23.12' into add_stream_list_comparison_intersection
Browse files Browse the repository at this point in the history
  • Loading branch information
ttnghia authored Oct 23, 2023
2 parents c354de0 + e8cf0eb commit c7d1869
Show file tree
Hide file tree
Showing 55 changed files with 2,652 additions and 1,401 deletions.
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,7 @@ if(CUDF_BUILD_TESTUTIL)
tests/io/metadata_utilities.cpp
tests/utilities/base_fixture.cpp
tests/utilities/column_utilities.cu
tests/utilities/debug_utilities.cu
tests/utilities/table_utilities.cu
tests/utilities/tdigest_utilities.cu
)
Expand Down
6 changes: 3 additions & 3 deletions cpp/include/cudf/lists/detail/scatter.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
#include <cudf/column/column_factories.hpp>
#include <cudf/copying.hpp>
#include <cudf/detail/iterator.cuh>
#include <cudf/detail/null_mask.hpp>
#include <cudf/lists/detail/scatter_helper.cuh>
#include <cudf/lists/list_device_view.cuh>
#include <cudf/null_mask.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>
Expand Down Expand Up @@ -130,8 +130,8 @@ std::unique_ptr<column> scatter_impl(rmm::device_uvector<unbound_list_view> cons
std::vector<std::unique_ptr<column>> children;
children.emplace_back(std::move(offsets_column));
children.emplace_back(std::move(child_column));
auto null_mask =
target.has_nulls() ? copy_bitmask(target, stream, mr) : rmm::device_buffer{0, stream, mr};
auto null_mask = target.has_nulls() ? cudf::detail::copy_bitmask(target, stream, mr)
: rmm::device_buffer{0, stream, mr};

// The output column from this function only has null masks copied from the target columns.
// That is still not a correct final null mask for the scatter result.
Expand Down
24 changes: 22 additions & 2 deletions cpp/include/cudf/null_mask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#pragma once

#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/device_buffer.hpp>
Expand Down Expand Up @@ -80,13 +81,15 @@ size_type num_bitmask_words(size_type number_of_bits);
*
* @param size The number of elements to be represented by the mask
* @param state The desired state of the mask
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned device_buffer
* @return A `device_buffer` for use as a null bitmask
* satisfying the desired size and state
*/
rmm::device_buffer create_null_mask(
size_type size,
mask_state state,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -100,8 +103,13 @@ rmm::device_buffer create_null_mask(
* @param begin_bit Index of the first bit to set (inclusive)
* @param end_bit Index of the last bit to set (exclusive)
* @param valid If true set all entries to valid; otherwise, set all to null
* @param stream CUDA stream used for device memory operations and kernel launches
*/
void set_null_mask(bitmask_type* bitmask, size_type begin_bit, size_type end_bit, bool valid);
void set_null_mask(bitmask_type* bitmask,
size_type begin_bit,
size_type end_bit,
bool valid,
rmm::cuda_stream_view stream = cudf::get_default_stream());

/**
* @brief Creates a `device_buffer` from a slice of bitmask defined by a range
Expand All @@ -115,6 +123,7 @@ void set_null_mask(bitmask_type* bitmask, size_type begin_bit, size_type end_bit
* @param mask Bitmask residing in device memory whose bits will be copied
* @param begin_bit Index of the first bit to be copied (inclusive)
* @param end_bit Index of the last bit to be copied (exclusive)
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned device_buffer
* @return A `device_buffer` containing the bits
* `[begin_bit, end_bit)` from `mask`.
Expand All @@ -123,6 +132,7 @@ rmm::device_buffer copy_bitmask(
bitmask_type const* mask,
size_type begin_bit,
size_type end_bit,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -132,12 +142,14 @@ rmm::device_buffer copy_bitmask(
* Returns empty `device_buffer` if the column is not nullable
*
* @param view Column view whose bitmask needs to be copied
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned device_buffer
* @return A `device_buffer` containing the bits
* `[view.offset(), view.offset() + view.size())` from `view`'s bitmask.
*/
rmm::device_buffer copy_bitmask(
column_view const& view,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -148,11 +160,13 @@ rmm::device_buffer copy_bitmask(
* If no column in the table is nullable, an empty bitmask is returned.
*
* @param view The table of columns
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned device_buffer
* @return A pair of resulting bitmask and count of unset bits
*/
std::pair<rmm::device_buffer, size_type> bitmask_and(
table_view const& view,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -163,11 +177,13 @@ std::pair<rmm::device_buffer, size_type> bitmask_and(
* If no column in the table is nullable, an empty bitmask is returned.
*
* @param view The table of columns
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned device_buffer
* @return A pair of resulting bitmask and count of unset bits
*/
std::pair<rmm::device_buffer, size_type> bitmask_or(
table_view const& view,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -183,8 +199,12 @@ std::pair<rmm::device_buffer, size_type> bitmask_or(
* @param bitmask Validity bitmask residing in device memory.
* @param start Index of the first bit to count (inclusive).
* @param stop Index of the last bit to count (exclusive).
* @param stream CUDA stream used for device memory operations and kernel launches
* @return The number of null elements in the specified range.
*/
cudf::size_type null_count(bitmask_type const* bitmask, size_type start, size_type stop);
cudf::size_type null_count(bitmask_type const* bitmask,
size_type start,
size_type stop,
rmm::cuda_stream_view stream = cudf::get_default_stream());
/** @} */ // end of group
} // namespace cudf
88 changes: 48 additions & 40 deletions cpp/include/cudf/strings/combine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,18 +66,20 @@ enum class output_if_empty_list {
*
* @throw cudf::logic_error if separator is not valid.
*
* @param strings Strings for this operation.
* @param input Strings for this operation
* @param separator String that should inserted between each string.
* Default is an empty string.
* @param narep String that should represent any null strings found.
* @param narep String to replace any null strings found.
* Default of invalid-scalar will ignore any null entries.
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New column containing one string.
*/
std::unique_ptr<column> join_strings(
strings_column_view const& strings,
strings_column_view const& input,
string_scalar const& separator = string_scalar(""),
string_scalar const& narep = string_scalar("", false),
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down Expand Up @@ -127,25 +129,25 @@ std::unique_ptr<column> join_strings(
* @throw cudf::logic_error if the number of rows from @p separators and @p strings_columns
* do not match
*
* @param strings_columns List of strings columns to concatenate.
* @param strings_columns List of strings columns to concatenate
* @param separators Strings column that provides the separator for a given row
* @param separator_narep String that should be used in place of a null separator for a given
* row. Default of invalid-scalar means no row separator value replacements.
* Default is an invalid string.
* @param col_narep String that should be used in place of any null strings
* found in any column. Default of invalid-scalar means no null column value replacements.
* Default is an invalid string.
* @param separator_narep String to replace a null separator for a given row.
* Default of invalid-scalar means no row separator value replacements.
* @param col_narep String that should be used in place of any null strings found in any column.
* Default of invalid-scalar means no null column value replacements.
* @param separate_nulls If YES, then the separator is included for null rows
* if `col_narep` is valid.
* @param mr Resource for allocating device memory.
* @return New column with concatenated results.
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Resource for allocating device memory
* @return New column with concatenated results
*/
std::unique_ptr<column> concatenate(
table_view const& strings_columns,
strings_column_view const& separators,
string_scalar const& separator_narep = string_scalar("", false),
string_scalar const& col_narep = string_scalar("", false),
separator_on_nulls separate_nulls = separator_on_nulls::YES,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down Expand Up @@ -184,21 +186,23 @@ std::unique_ptr<column> concatenate(
* @throw cudf::logic_error if separator is not valid.
* @throw cudf::logic_error if only one column is specified
*
* @param strings_columns List of string columns to concatenate.
* @param strings_columns List of string columns to concatenate
* @param separator String that should inserted between each string from each row.
* Default is an empty string.
* @param narep String that should be used in place of any null strings
* found in any column. Default of invalid-scalar means any null entry in any column will
* @param narep String to replace any null strings found in any column.
* Default of invalid-scalar means any null entry in any column will
* produces a null result for that row.
* @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New column with concatenated results.
* @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New column with concatenated results
*/
std::unique_ptr<column> concatenate(
table_view const& strings_columns,
string_scalar const& separator = string_scalar(""),
string_scalar const& narep = string_scalar("", false),
separator_on_nulls separate_nulls = separator_on_nulls::YES,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down Expand Up @@ -243,19 +247,20 @@ std::unique_ptr<column> concatenate(
* @throw cudf::logic_error if the number of rows from `separators` and `lists_strings_column` do
* not match
*
* @param lists_strings_column Column containing lists of strings to concatenate.
* @param separators Strings column that provides separators for concatenation.
* @param separator_narep String that should be used to replace null separator, default is an
* invalid-scalar denoting that rows containing null separator will result in null string in
* the corresponding output rows.
* @param string_narep String that should be used to replace null strings in any non-null list row,
* default is an invalid-scalar denoting that list rows containing null strings will result
* in null string in the corresponding output rows.
* @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid.
* @param empty_list_policy if set to EMPTY_STRING, any input row that is an empty list will
* @param lists_strings_column Column containing lists of strings to concatenate
* @param separators Strings column that provides separators for concatenation
* @param separator_narep String that should be used to replace a null separator.
* Default is an invalid-scalar denoting that rows containing null separator will result in
* a null string in the corresponding output rows.
* @param string_narep String to replace null strings in any non-null list row.
* Default is an invalid-scalar denoting that list rows containing null strings will result
* in a null string in the corresponding output rows.
* @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid
* @param empty_list_policy If set to EMPTY_STRING, any input row that is an empty list will
* result in an empty string. Otherwise, it will result in a null.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New strings column with concatenated results.
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New strings column with concatenated results
*/
std::unique_ptr<column> join_list_elements(
lists_column_view const& lists_strings_column,
Expand All @@ -264,6 +269,7 @@ std::unique_ptr<column> join_list_elements(
string_scalar const& string_narep = string_scalar("", false),
separator_on_nulls separate_nulls = separator_on_nulls::YES,
output_if_empty_list empty_list_policy = output_if_empty_list::EMPTY_STRING,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down Expand Up @@ -303,24 +309,26 @@ std::unique_ptr<column> join_list_elements(
* @throw cudf::logic_error if input column is not lists of strings column.
* @throw cudf::logic_error if separator is not valid.
*
* @param lists_strings_column Column containing lists of strings to concatenate.
* @param separator String that should inserted between strings of each list row, default is an
* empty string.
* @param narep String that should be used to replace null strings in any non-null list row, default
* is an invalid-scalar denoting that list rows containing null strings will result in null
* string in the corresponding output rows.
* @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid.
* @param empty_list_policy if set to EMPTY_STRING, any input row that is an empty list will result
* @param lists_strings_column Column containing lists of strings to concatenate
* @param separator String to insert between strings of each list row.
* Default is an empty string.
* @param narep String to replace null strings in any non-null list row.
* Default is an invalid-scalar denoting that list rows containing null strings will result
* in a null string in the corresponding output rows.
* @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid
* @param empty_list_policy If set to EMPTY_STRING, any input row that is an empty list will result
* in an empty string. Otherwise, it will result in a null.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New strings column with concatenated results.
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New strings column with concatenated results
*/
std::unique_ptr<column> join_list_elements(
lists_column_view const& lists_strings_column,
string_scalar const& separator = string_scalar(""),
string_scalar const& narep = string_scalar("", false),
separator_on_nulls separate_nulls = separator_on_nulls::YES,
output_if_empty_list empty_list_policy = output_if_empty_list::EMPTY_STRING,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
Expand Down
8 changes: 7 additions & 1 deletion cpp/include/cudf/strings/repeat_strings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,14 @@ namespace strings {
*
* @param input The scalar containing the string to repeat
* @param repeat_times The number of times the input string is repeated
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned string scalar
* @return New string scalar in which the input string is repeated
*/
std::unique_ptr<string_scalar> repeat_string(
string_scalar const& input,
size_type repeat_times,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -81,12 +83,14 @@ std::unique_ptr<string_scalar> repeat_string(
*
* @param input The column containing strings to repeat
* @param repeat_times The number of times each input string is repeated
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned strings column
* @return New column containing the repeated strings
*/
std::unique_ptr<column> repeat_strings(
strings_column_view const& input,
size_type repeat_times,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down Expand Up @@ -115,13 +119,15 @@ std::unique_ptr<column> repeat_strings(
*
* @param input The column containing strings to repeat
* @param repeat_times The column containing numbers of times that the corresponding input strings
* are repeated
* for each row are repeated
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned strings column
* @return New column containing the repeated strings.
*/
std::unique_ptr<column> repeat_strings(
strings_column_view const& input,
column_view const& repeat_times,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
Expand Down
Loading

0 comments on commit c7d1869

Please sign in to comment.