Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose stream param in transform APIs #16452

Merged
merged 23 commits into from
Aug 8, 2024
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion cpp/include/cudf/transform.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ namespace CUDF_EXPORT cudf {
* @param unary_udf The PTX/CUDA string of the unary function to apply
* @param output_type The output type that is compatible with the output type in the UDF
* @param is_ptx true: the UDF is treated as PTX code; false: the UDF is treated as CUDA code
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return The column resulting from applying the unary function to
* every element of the input
Expand All @@ -56,6 +57,7 @@ std::unique_ptr<column> transform(
std::string const& unary_udf,
data_type output_type,
bool is_ptx,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -65,12 +67,14 @@ std::unique_ptr<column> transform(
* @throws cudf::logic_error if `input.type()` is a non-floating type
*
* @param input An immutable view of the input column of floating-point type
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned bitmask
* @return A pair containing a `device_buffer` with the new bitmask and it's
* null count obtained by replacing `NaN` in `input` with null.
*/
std::pair<std::unique_ptr<rmm::device_buffer>, size_type> nans_to_nulls(
column_view const& input,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -83,12 +87,14 @@ std::pair<std::unique_ptr<rmm::device_buffer>, size_type> nans_to_nulls(
*
* @param table The table used for expression evaluation
* @param expr The root of the expression tree
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource
* @return Output column
*/
std::unique_ptr<column> compute_column(
table_view const& table,
ast::expression const& expr,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -101,13 +107,15 @@ std::unique_ptr<column> compute_column(
* @throws cudf::logic_error if `input.type()` is a non-boolean type
*
* @param input Boolean elements to convert to a bitmask
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned bitmask
* @return A pair containing a `device_buffer` with the new bitmask and it's
* null count obtained from input considering `true` represent `valid`/`1` and
* `false` represent `invalid`/`0`.
*/
std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> bools_to_mask(
column_view const& input,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -130,12 +138,14 @@ std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> bools_to_mask(
* @endcode
*
* @param input Table containing values to be encoded
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned table's device memory
* @return A pair containing the distinct row of the input table in sorter order,
* and a column of integer indices representing the encoded rows.
*/
std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::column>> encode(
cudf::table_view const& input,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -162,12 +172,14 @@ std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::column>> encode(
*
* @param input Column containing values to be encoded
* @param categories Column containing categories
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned table's device memory
* @return A pair containing the owner to all encoded data and a table view into the data
*/
std::pair<std::unique_ptr<column>, table_view> one_hot_encode(
column_view const& input,
column_view const& categories,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -188,13 +200,15 @@ std::pair<std::unique_ptr<column>, table_view> one_hot_encode(
* @param bitmask A device pointer to the bitmask which needs to be converted
* @param begin_bit position of the bit from which the conversion should start
* @param end_bit position of the bit before which the conversion should stop
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned columns' device memory
* @return A boolean column representing the given mask from [begin_bit, end_bit)
*/
std::unique_ptr<column> mask_to_bools(
bitmask_type const* bitmask,
size_type begin_bit,
size_type end_bit,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -219,11 +233,14 @@ std::unique_ptr<column> mask_to_bools(
* row_bit_count(column(x)) >= row_bit_count(gather(column(x)))
*
* @param t The table view to perform the computation on
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned columns' device memory
* @return A 32-bit integer column containing the per-row bit counts
*/
std::unique_ptr<column> row_bit_count(
table_view const& t, rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
table_view const& t,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());

/**
* @brief Returns an approximate cumulative size in bits of all columns in the `table_view` for
Expand All @@ -240,12 +257,14 @@ std::unique_ptr<column> row_bit_count(
*
* @param t The table view to perform the computation on
* @param segment_length The number of rows in each segment for which the total size is computed
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned columns' device memory
* @return A 32-bit integer column containing the bit counts for each segment of rows
*/
std::unique_ptr<column> segmented_row_bit_count(
table_view const& t,
size_type segment_length,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/interop/to_arrow.cu
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<bool>(column_view in
arrow::MemoryPool* ar_mr,
rmm::cuda_stream_view stream)
{
auto bitmask = bools_to_mask(input, stream, rmm::mr::get_current_device_resource());
auto bitmask = detail::bools_to_mask(input, stream, rmm::mr::get_current_device_resource());

auto data_buffer = allocate_arrow_buffer(static_cast<int64_t>(bitmask.first->size()), ar_mr);

Expand Down
4 changes: 2 additions & 2 deletions cpp/src/interop/to_arrow_device.cu
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ int dispatch_to_arrow_device::operator()<bool>(cudf::column&& column,
nanoarrow::UniqueArray tmp;
NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_BOOL, column));

auto bitmask = bools_to_mask(column.view(), stream, mr);
auto bitmask = detail::bools_to_mask(column.view(), stream, mr);
auto contents = column.release();
NANOARROW_RETURN_NOT_OK(set_null_mask(contents, tmp.get()));
NANOARROW_RETURN_NOT_OK(
Expand Down Expand Up @@ -442,7 +442,7 @@ int dispatch_to_arrow_device_view::operator()<bool>(ArrowArray* out) const
nanoarrow::UniqueArray tmp;
NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_BOOL, column));

auto bitmask = bools_to_mask(column, stream, mr);
auto bitmask = detail::bools_to_mask(column, stream, mr);
NANOARROW_RETURN_NOT_OK(
set_buffer(std::move(bitmask.first), fixed_width_data_buffer_idx, tmp.get()));
NANOARROW_RETURN_NOT_OK(set_null_mask(column, tmp.get()));
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/interop/to_arrow_host.cu
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ int dispatch_to_arrow_host::operator()<bool>(ArrowArray* out) const
NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_BOOL, column));

NANOARROW_RETURN_NOT_OK(populate_validity_bitmap(ArrowArrayValidityBitmap(tmp.get())));
auto bitmask = bools_to_mask(column, stream, mr);
auto bitmask = detail::bools_to_mask(column, stream, mr);
JayjeetAtGithub marked this conversation as resolved.
Show resolved Hide resolved
NANOARROW_RETURN_NOT_OK(populate_data_buffer(
device_span<uint8_t const>(reinterpret_cast<const uint8_t*>(bitmask.first->data()),
bitmask.first->size()),
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/transform/bools_to_mask.cu
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,10 @@ std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> bools_to_mask(
} // namespace detail

std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> bools_to_mask(
column_view const& input, rmm::device_async_resource_ref mr)
column_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
return detail::bools_to_mask(input, cudf::get_default_stream(), mr);
return detail::bools_to_mask(input, stream, mr);
}

} // namespace cudf
3 changes: 2 additions & 1 deletion cpp/src/transform/compute_column.cu
Original file line number Diff line number Diff line change
Expand Up @@ -138,10 +138,11 @@ std::unique_ptr<column> compute_column(table_view const& table,

std::unique_ptr<column> compute_column(table_view const& table,
ast::expression const& expr,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
return detail::compute_column(table, expr, cudf::get_default_stream(), mr);
return detail::compute_column(table, expr, stream, mr);
}

} // namespace cudf
4 changes: 2 additions & 2 deletions cpp/src/transform/encode.cu
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,10 @@ std::pair<std::unique_ptr<table>, std::unique_ptr<column>> encode(table_view con
} // namespace detail

std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::column>> encode(
cudf::table_view const& input, rmm::device_async_resource_ref mr)
cudf::table_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
return detail::encode(input, cudf::get_default_stream(), mr);
return detail::encode(input, stream, mr);
}

} // namespace cudf
3 changes: 2 additions & 1 deletion cpp/src/transform/mask_to_bools.cu
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,10 @@ std::unique_ptr<column> mask_to_bools(bitmask_type const* bitmask,
std::unique_ptr<column> mask_to_bools(bitmask_type const* bitmask,
size_type begin_bit,
size_type end_bit,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
return detail::mask_to_bools(bitmask, begin_bit, end_bit, cudf::get_default_stream(), mr);
return detail::mask_to_bools(bitmask, begin_bit, end_bit, stream, mr);
}
} // namespace cudf
4 changes: 2 additions & 2 deletions cpp/src/transform/nans_to_nulls.cu
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,10 @@ std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> nans_to_nulls(
} // namespace detail

std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> nans_to_nulls(
column_view const& input, rmm::device_async_resource_ref mr)
column_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
return detail::nans_to_nulls(input, cudf::get_default_stream(), mr);
return detail::nans_to_nulls(input, stream, mr);
}

} // namespace cudf
3 changes: 2 additions & 1 deletion cpp/src/transform/one_hot_encode.cu
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,10 @@ std::pair<std::unique_ptr<column>, table_view> one_hot_encode(column_view const&

std::pair<std::unique_ptr<column>, table_view> one_hot_encode(column_view const& input,
column_view const& categories,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
return detail::one_hot_encode(input, categories, cudf::get_default_stream(), mr);
return detail::one_hot_encode(input, categories, stream, mr);
}
} // namespace cudf
11 changes: 7 additions & 4 deletions cpp/src/transform/row_bit_count.cu
Original file line number Diff line number Diff line change
Expand Up @@ -561,23 +561,26 @@ std::unique_ptr<column> row_bit_count(table_view const& t,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return segmented_row_bit_count(t, 1, stream, mr);
return detail::segmented_row_bit_count(t, 1, stream, mr);
}

} // namespace detail

std::unique_ptr<column> segmented_row_bit_count(table_view const& t,
size_type segment_length,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
return detail::segmented_row_bit_count(t, segment_length, cudf::get_default_stream(), mr);
return detail::segmented_row_bit_count(t, segment_length, stream, mr);
}

std::unique_ptr<column> row_bit_count(table_view const& t, rmm::device_async_resource_ref mr)
std::unique_ptr<column> row_bit_count(table_view const& t,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
return detail::row_bit_count(t, cudf::get_default_stream(), mr);
return detail::row_bit_count(t, stream, mr);
}

} // namespace cudf
3 changes: 2 additions & 1 deletion cpp/src/transform/transform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,11 @@ std::unique_ptr<column> transform(column_view const& input,
std::string const& unary_udf,
data_type output_type,
bool is_ptx,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
return detail::transform(input, unary_udf, output_type, is_ptx, cudf::get_default_stream(), mr);
return detail::transform(input, unary_udf, output_type, is_ptx, stream, mr);
}

} // namespace cudf
1 change: 1 addition & 0 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,7 @@ ConfigureTest(
STREAM_MODE
testing
)
ConfigureTest(STREAM_TRANSFORM_TEST streams/transform_test.cu STREAM_MODE testing)
ConfigureTest(STREAM_UNARY_TEST streams/unary_test.cpp STREAM_MODE testing)

# ##################################################################################################
Expand Down
Loading
Loading