Skip to content

Commit

Permalink
Expose stream parameter in public strings combine APIs (#14281)
Browse files Browse the repository at this point in the history
Add stream parameter to public APIs:

- `cudf::strings::concatenate()` (x2)
- `cudf::strings::join_strings()`
- `cudf::strings::join_list_elements()` (x2)
- `cudf::strings::repeat_string()`
- `cudf::strings::repeat_strings()` (x2)

Also added stream gtests and fixed up some doxygen comments.

Reference #13744

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Mike Wilson (https://github.com/hyperbolic2346)

URL: #14281
  • Loading branch information
davidwendt authored Oct 23, 2023
1 parent 8ae3aab commit e8cf0eb
Show file tree
Hide file tree
Showing 8 changed files with 168 additions and 63 deletions.
88 changes: 48 additions & 40 deletions cpp/include/cudf/strings/combine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,18 +66,20 @@ enum class output_if_empty_list {
*
* @throw cudf::logic_error if separator is not valid.
*
* @param strings Strings for this operation.
* @param input Strings for this operation
* @param separator String that should inserted between each string.
* Default is an empty string.
* @param narep String that should represent any null strings found.
* @param narep String to replace any null strings found.
* Default of invalid-scalar will ignore any null entries.
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New column containing one string.
*/
std::unique_ptr<column> join_strings(
strings_column_view const& strings,
strings_column_view const& input,
string_scalar const& separator = string_scalar(""),
string_scalar const& narep = string_scalar("", false),
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down Expand Up @@ -127,25 +129,25 @@ std::unique_ptr<column> join_strings(
* @throw cudf::logic_error if the number of rows from @p separators and @p strings_columns
* do not match
*
* @param strings_columns List of strings columns to concatenate.
* @param strings_columns List of strings columns to concatenate
* @param separators Strings column that provides the separator for a given row
* @param separator_narep String that should be used in place of a null separator for a given
* row. Default of invalid-scalar means no row separator value replacements.
* Default is an invalid string.
* @param col_narep String that should be used in place of any null strings
* found in any column. Default of invalid-scalar means no null column value replacements.
* Default is an invalid string.
* @param separator_narep String to replace a null separator for a given row.
* Default of invalid-scalar means no row separator value replacements.
* @param col_narep String that should be used in place of any null strings found in any column.
* Default of invalid-scalar means no null column value replacements.
* @param separate_nulls If YES, then the separator is included for null rows
* if `col_narep` is valid.
* @param mr Resource for allocating device memory.
* @return New column with concatenated results.
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Resource for allocating device memory
* @return New column with concatenated results
*/
std::unique_ptr<column> concatenate(
table_view const& strings_columns,
strings_column_view const& separators,
string_scalar const& separator_narep = string_scalar("", false),
string_scalar const& col_narep = string_scalar("", false),
separator_on_nulls separate_nulls = separator_on_nulls::YES,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down Expand Up @@ -184,21 +186,23 @@ std::unique_ptr<column> concatenate(
* @throw cudf::logic_error if separator is not valid.
* @throw cudf::logic_error if only one column is specified
*
* @param strings_columns List of string columns to concatenate.
* @param strings_columns List of string columns to concatenate
* @param separator String that should inserted between each string from each row.
* Default is an empty string.
* @param narep String that should be used in place of any null strings
* found in any column. Default of invalid-scalar means any null entry in any column will
* @param narep String to replace any null strings found in any column.
* Default of invalid-scalar means any null entry in any column will
* produces a null result for that row.
* @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New column with concatenated results.
* @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New column with concatenated results
*/
std::unique_ptr<column> concatenate(
table_view const& strings_columns,
string_scalar const& separator = string_scalar(""),
string_scalar const& narep = string_scalar("", false),
separator_on_nulls separate_nulls = separator_on_nulls::YES,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down Expand Up @@ -243,19 +247,20 @@ std::unique_ptr<column> concatenate(
* @throw cudf::logic_error if the number of rows from `separators` and `lists_strings_column` do
* not match
*
* @param lists_strings_column Column containing lists of strings to concatenate.
* @param separators Strings column that provides separators for concatenation.
* @param separator_narep String that should be used to replace null separator, default is an
* invalid-scalar denoting that rows containing null separator will result in null string in
* the corresponding output rows.
* @param string_narep String that should be used to replace null strings in any non-null list row,
* default is an invalid-scalar denoting that list rows containing null strings will result
* in null string in the corresponding output rows.
* @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid.
* @param empty_list_policy if set to EMPTY_STRING, any input row that is an empty list will
* @param lists_strings_column Column containing lists of strings to concatenate
* @param separators Strings column that provides separators for concatenation
* @param separator_narep String that should be used to replace a null separator.
* Default is an invalid-scalar denoting that rows containing null separator will result in
* a null string in the corresponding output rows.
* @param string_narep String to replace null strings in any non-null list row.
* Default is an invalid-scalar denoting that list rows containing null strings will result
* in a null string in the corresponding output rows.
* @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid
* @param empty_list_policy If set to EMPTY_STRING, any input row that is an empty list will
* result in an empty string. Otherwise, it will result in a null.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New strings column with concatenated results.
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New strings column with concatenated results
*/
std::unique_ptr<column> join_list_elements(
lists_column_view const& lists_strings_column,
Expand All @@ -264,6 +269,7 @@ std::unique_ptr<column> join_list_elements(
string_scalar const& string_narep = string_scalar("", false),
separator_on_nulls separate_nulls = separator_on_nulls::YES,
output_if_empty_list empty_list_policy = output_if_empty_list::EMPTY_STRING,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down Expand Up @@ -303,24 +309,26 @@ std::unique_ptr<column> join_list_elements(
* @throw cudf::logic_error if input column is not lists of strings column.
* @throw cudf::logic_error if separator is not valid.
*
* @param lists_strings_column Column containing lists of strings to concatenate.
* @param separator String that should inserted between strings of each list row, default is an
* empty string.
* @param narep String that should be used to replace null strings in any non-null list row, default
* is an invalid-scalar denoting that list rows containing null strings will result in null
* string in the corresponding output rows.
* @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid.
* @param empty_list_policy if set to EMPTY_STRING, any input row that is an empty list will result
* @param lists_strings_column Column containing lists of strings to concatenate
* @param separator String to insert between strings of each list row.
* Default is an empty string.
* @param narep String to replace null strings in any non-null list row.
* Default is an invalid-scalar denoting that list rows containing null strings will result
* in a null string in the corresponding output rows.
* @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid
* @param empty_list_policy If set to EMPTY_STRING, any input row that is an empty list will result
* in an empty string. Otherwise, it will result in a null.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New strings column with concatenated results.
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New strings column with concatenated results
*/
std::unique_ptr<column> join_list_elements(
lists_column_view const& lists_strings_column,
string_scalar const& separator = string_scalar(""),
string_scalar const& narep = string_scalar("", false),
separator_on_nulls separate_nulls = separator_on_nulls::YES,
output_if_empty_list empty_list_policy = output_if_empty_list::EMPTY_STRING,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
Expand Down
8 changes: 7 additions & 1 deletion cpp/include/cudf/strings/repeat_strings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,14 @@ namespace strings {
*
* @param input The scalar containing the string to repeat
* @param repeat_times The number of times the input string is repeated
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned string scalar
* @return New string scalar in which the input string is repeated
*/
std::unique_ptr<string_scalar> repeat_string(
string_scalar const& input,
size_type repeat_times,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -81,12 +83,14 @@ std::unique_ptr<string_scalar> repeat_string(
*
* @param input The column containing strings to repeat
* @param repeat_times The number of times each input string is repeated
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned strings column
* @return New column containing the repeated strings
*/
std::unique_ptr<column> repeat_strings(
strings_column_view const& input,
size_type repeat_times,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down Expand Up @@ -115,13 +119,15 @@ std::unique_ptr<column> repeat_strings(
*
* @param input The column containing strings to repeat
* @param repeat_times The column containing numbers of times that the corresponding input strings
* are repeated
* for each row are repeated
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned strings column
* @return New column containing the repeated strings.
*/
std::unique_ptr<column> repeat_strings(
strings_column_view const& input,
column_view const& repeat_times,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
Expand Down
14 changes: 5 additions & 9 deletions cpp/src/strings/combine/concatenate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -267,28 +267,24 @@ std::unique_ptr<column> concatenate(table_view const& strings_columns,
string_scalar const& separator,
string_scalar const& narep,
separator_on_nulls separate_nulls,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::concatenate(
strings_columns, separator, narep, separate_nulls, cudf::get_default_stream(), mr);
return detail::concatenate(strings_columns, separator, narep, separate_nulls, stream, mr);
}

std::unique_ptr<column> concatenate(table_view const& strings_columns,
strings_column_view const& separators,
string_scalar const& separator_narep,
string_scalar const& col_narep,
separator_on_nulls separate_nulls,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::concatenate(strings_columns,
separators,
separator_narep,
col_narep,
separate_nulls,
cudf::get_default_stream(),
mr);
return detail::concatenate(
strings_columns, separators, separator_narep, col_narep, separate_nulls, stream, mr);
}

} // namespace strings
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/strings/combine/join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -180,10 +180,11 @@ std::unique_ptr<column> join_strings(strings_column_view const& input,
std::unique_ptr<column> join_strings(strings_column_view const& strings,
string_scalar const& separator,
string_scalar const& narep,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::join_strings(strings, separator, narep, cudf::get_default_stream(), mr);
return detail::join_strings(strings, separator, narep, stream, mr);
}

} // namespace strings
Expand Down
13 changes: 5 additions & 8 deletions cpp/src/strings/combine/join_list_elements.cu
Original file line number Diff line number Diff line change
Expand Up @@ -301,16 +301,12 @@ std::unique_ptr<column> join_list_elements(lists_column_view const& lists_string
string_scalar const& narep,
separator_on_nulls separate_nulls,
output_if_empty_list empty_list_policy,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::join_list_elements(lists_strings_column,
separator,
narep,
separate_nulls,
empty_list_policy,
cudf::get_default_stream(),
mr);
return detail::join_list_elements(
lists_strings_column, separator, narep, separate_nulls, empty_list_policy, stream, mr);
}

std::unique_ptr<column> join_list_elements(lists_column_view const& lists_strings_column,
Expand All @@ -319,6 +315,7 @@ std::unique_ptr<column> join_list_elements(lists_column_view const& lists_string
string_scalar const& string_narep,
separator_on_nulls separate_nulls,
output_if_empty_list empty_list_policy,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
Expand All @@ -328,7 +325,7 @@ std::unique_ptr<column> join_list_elements(lists_column_view const& lists_string
string_narep,
separate_nulls,
empty_list_policy,
cudf::get_default_stream(),
stream,
mr);
}

Expand Down
11 changes: 7 additions & 4 deletions cpp/src/strings/repeat_strings.cu
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ std::unique_ptr<string_scalar> repeat_string(string_scalar const& input,
return in_ptr[idx % str_size];
});

return std::make_unique<string_scalar>(std::move(buff));
return std::make_unique<string_scalar>(std::move(buff), true, stream, mr);
}

namespace {
Expand Down Expand Up @@ -260,26 +260,29 @@ std::unique_ptr<column> repeat_strings(strings_column_view const& input,

std::unique_ptr<string_scalar> repeat_string(string_scalar const& input,
size_type repeat_times,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::repeat_string(input, repeat_times, cudf::get_default_stream(), mr);
return detail::repeat_string(input, repeat_times, stream, mr);
}

std::unique_ptr<column> repeat_strings(strings_column_view const& input,
size_type repeat_times,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::repeat_strings(input, repeat_times, cudf::get_default_stream(), mr);
return detail::repeat_strings(input, repeat_times, stream, mr);
}

std::unique_ptr<column> repeat_strings(strings_column_view const& input,
column_view const& repeat_times,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::repeat_strings(input, repeat_times, cudf::get_default_stream(), mr);
return detail::repeat_strings(input, repeat_times, stream, mr);
}

} // namespace strings
Expand Down
1 change: 1 addition & 0 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -637,6 +637,7 @@ ConfigureTest(STREAM_DICTIONARY_TEST streams/dictionary_test.cpp STREAM_MODE tes
ConfigureTest(
STREAM_STRINGS_TEST
streams/strings/case_test.cpp
streams/strings/combine_test.cpp
streams/strings/convert_test.cpp
streams/strings/find_test.cpp
streams/strings/replace_test.cpp
Expand Down
Loading

0 comments on commit e8cf0eb

Please sign in to comment.