Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Forward-merge branch-23.06 to branch-23.08 #13416

Merged
merged 8 commits into from
May 23, 2023
2 changes: 0 additions & 2 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ requirements:
- cython >=0.29,<0.30
- scikit-build >=0.13.1
- setuptools
- numba >=0.56.4,<0.57
- dlpack >=0.5,<0.6.0a0
- pyarrow =11
- libcudf ={{ version }}
Expand All @@ -69,7 +68,6 @@ requirements:
- numpy >=1.21,<1.24 # Temporarily upper bound numpy to avoid overflow deprecations
- {{ pin_compatible('pyarrow', max_pin='x.x.x') }}
- libcudf {{ version }}
- fastavro >=0.22.0
- {{ pin_compatible('rmm', max_pin='x.x') }}
- fsspec >=0.6.0
- {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}
Expand Down
8 changes: 6 additions & 2 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,12 @@ ConfigureBench(APPLY_BOOLEAN_MASK_BENCH stream_compaction/apply_boolean_mask.cpp
# ##################################################################################################
# * stream_compaction benchmark -------------------------------------------------------------------
ConfigureNVBench(
STREAM_COMPACTION_NVBENCH stream_compaction/distinct.cpp stream_compaction/distinct_count.cpp
stream_compaction/unique.cpp stream_compaction/unique_count.cpp
STREAM_COMPACTION_NVBENCH
stream_compaction/distinct.cpp
stream_compaction/distinct_count.cpp
stream_compaction/stable_distinct.cpp
stream_compaction/unique.cpp
stream_compaction/unique_count.cpp
)

# ##################################################################################################
Expand Down
97 changes: 97 additions & 0 deletions cpp/benchmarks/stream_compaction/stable_distinct.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/rmm_pool_raii.hpp>

#include <cudf/column/column_view.hpp>
#include <cudf/lists/list_view.hpp>
#include <cudf/stream_compaction.hpp>
#include <cudf/types.hpp>

#include <nvbench/nvbench.cuh>

NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ms, "cudf::timestamp_ms", "cudf::timestamp_ms");

template <typename Type>
void nvbench_stable_distinct(nvbench::state& state, nvbench::type_list<Type>)
{
cudf::size_type const num_rows = state.get_int64("NumRows");

data_profile profile = data_profile_builder().cardinality(0).null_probability(0.01).distribution(
cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, 100);

auto source_column = create_random_column(cudf::type_to_id<Type>(), row_count{num_rows}, profile);

auto input_column = source_column->view();
auto input_table = cudf::table_view({input_column, input_column, input_column, input_column});

state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto result = cudf::stable_distinct(input_table,
{0},
cudf::duplicate_keep_option::KEEP_ANY,
cudf::null_equality::EQUAL,
cudf::nan_equality::ALL_EQUAL);
});
}

using data_type = nvbench::type_list<bool, int8_t, int32_t, int64_t, float, cudf::timestamp_ms>;

NVBENCH_BENCH_TYPES(nvbench_stable_distinct, NVBENCH_TYPE_AXES(data_type))
.set_name("stable_distinct")
.set_type_axes_names({"Type"})
.add_int64_axis("NumRows", {10'000, 100'000, 1'000'000, 10'000'000});

template <typename Type>
void nvbench_stable_distinct_list(nvbench::state& state, nvbench::type_list<Type>)
{
auto const size = state.get_int64("ColumnSize");
auto const dtype = cudf::type_to_id<Type>();
double const null_probability = state.get_float64("null_probability");

auto builder = data_profile_builder().null_probability(null_probability);
if (dtype == cudf::type_id::LIST) {
builder.distribution(dtype, distribution_id::UNIFORM, 0, 4)
.distribution(cudf::type_id::INT32, distribution_id::UNIFORM, 0, 4)
.list_depth(1);
} else {
// We're comparing stable_distinct() on a non-nested column to that on a list column with the
// same number of stable_distinct rows. The max list size is 4 and the number of distinct values
// in the list's child is 5. So the number of distinct rows in the list = 1 + 5 + 5^2 + 5^3 +
// 5^4 = 781 We want this column to also have 781 distinct values.
builder.distribution(dtype, distribution_id::UNIFORM, 0, 781);
}

auto const table = create_random_table(
{dtype}, table_size_bytes{static_cast<size_t>(size)}, data_profile{builder}, 0);

state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto result = cudf::stable_distinct(*table,
{0},
cudf::duplicate_keep_option::KEEP_ANY,
cudf::null_equality::EQUAL,
cudf::nan_equality::ALL_EQUAL);
});
}

NVBENCH_BENCH_TYPES(nvbench_stable_distinct_list,
NVBENCH_TYPE_AXES(nvbench::type_list<int32_t, cudf::list_view>))
.set_name("stable_distinct_list")
.set_type_axes_names({"Type"})
.add_float64_axis("null_probability", {0.0, 0.1})
.add_int64_axis("ColumnSize", {100'000'000});
19 changes: 2 additions & 17 deletions cpp/include/cudf/detail/stream_compaction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,24 +86,9 @@ std::unique_ptr<table> distinct(table_view const& input,
rmm::mr::device_memory_resource* mr);

/**
* @brief Create a new table without duplicate rows.
* @copydoc cudf::stable_distinct
*
* Given an `input` table_view, each row is copied to the output table to create a set of distinct
* rows. The row order is guaranteed to be preserved as in the input.
*
* If there are duplicate rows, which row to be copied depends on the specified value of the `keep`
* parameter.
*
* This API produces exactly the same set of output rows as `cudf::distinct`.
*
* @param input The input table
* @param keys Vector of indices indicating key columns in the `input` table
* @param keep Copy any, first, last, or none of the found duplicates
* @param nulls_equal Flag to specify whether null elements should be considered as equal
* @param nans_equal Flag to specify whether NaN elements should be considered as equal
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned table
* @return A table containing the resulting distinct rows
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<table> stable_distinct(table_view const& input,
std::vector<size_type> const& keys,
Expand Down
49 changes: 38 additions & 11 deletions cpp/include/cudf/stream_compaction.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -254,22 +254,19 @@ std::unique_ptr<table> unique(
* @brief Create a new table without duplicate rows.
*
* Given an `input` table_view, each row is copied to the output table to create a set of distinct
* rows. If there are duplicate rows, which row to be copied depends on the specified value of
* the `keep` parameter.
* rows. If there are duplicate rows, which row is copied depends on the `keep` parameter.
*
* The order of rows in the output table is not specified.
*
* Performance hint: if the input is pre-sorted, `cudf::unique` can produce an equivalent result
* (i.e., same set of output rows) but with less running time than `cudf::distinct`.
*
* @param[in] input input table_view to copy only distinct rows
* @param[in] keys vector of indices representing key columns from `input`
* @param[in] keep keep any, first, last, or none of the found duplicates
* @param[in] nulls_equal flag to control if nulls are compared equal or not
* @param[in] nans_equal flag to control if floating-point NaN values are compared equal or not
* @param[in] mr Device memory resource used to allocate the returned table's device
* memory
*
* @param input The input table
* @param keys Vector of indices indicating key columns in the `input` table
* @param keep Copy any, first, last, or none of the found duplicates
* @param nulls_equal Flag to specify whether null elements should be considered as equal
* @param nans_equal Flag to specify whether NaN elements should be considered as equal
* @param mr Device memory resource used to allocate the returned table
* @return Table with distinct rows in an unspecified order
*/
std::unique_ptr<table> distinct(
Expand All @@ -280,6 +277,36 @@ std::unique_ptr<table> distinct(
nan_equality nans_equal = nan_equality::ALL_EQUAL,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Create a new table without duplicate rows, preserving input order.
*
* Given an `input` table_view, each row is copied to the output table to create a set of distinct
* rows. The input row order is preserved. If there are duplicate rows, which row is copied depends
* on the `keep` parameter.
*
* This API produces the same output rows as `cudf::distinct`, but with input order preserved.
*
* Note that when `keep` is `KEEP_ANY`, the choice of which duplicate row to keep is arbitrary, but
* the returned table will retain the input order. That is, if the key column contained `1, 2, 1`
* with another values column `3, 4, 5`, the result could contain values `3, 4` or `4, 5` but not
* `4, 3` or `5, 4`.
*
* @param input The input table
* @param keys Vector of indices indicating key columns in the `input` table
* @param keep Copy any, first, last, or none of the found duplicates
* @param nulls_equal Flag to specify whether null elements should be considered as equal
* @param nans_equal Flag to specify whether NaN elements should be considered as equal
* @param mr Device memory resource used to allocate the returned table
* @return Table with distinct rows, preserving input order
*/
std::unique_ptr<table> stable_distinct(
table_view const& input,
std::vector<size_type> const& keys,
duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY,
null_equality nulls_equal = null_equality::EQUAL,
nan_equality nans_equal = nan_equality::ALL_EQUAL,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Count the number of consecutive groups of equivalent rows in a column.
*
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/io/utilities/data_sink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ class file_sink : public data_sink {

if (detail::cufile_integration::is_kvikio_enabled()) {
_kvikio_file = kvikio::FileHandle(filepath, "w");
CUDF_LOG_INFO("Writing a file using kvikIO, with compatibility mode {}.",
_kvikio_file.is_compat_mode_on() ? "on" : "off");
} else {
_cufile_out = detail::make_cufile_output(filepath);
}
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/io/utilities/datasource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ class file_source : public datasource {
{
if (detail::cufile_integration::is_kvikio_enabled()) {
_kvikio_file = kvikio::FileHandle(filepath);
CUDF_LOG_INFO("Reading a file using kvikIO, with compatibility mode {}.",
_kvikio_file.is_compat_mode_on() ? "on" : "off");
} else {
_cufile_in = detail::make_cufile_input(filepath);
}
Expand Down
37 changes: 27 additions & 10 deletions cpp/src/stream_compaction/stable_distinct.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@
#include <cudf/table/table.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/span.hpp>

#include <thrust/iterator/constant_iterator.h>
#include <thrust/scatter.h>
#include <thrust/uninitialized_fill.h>

namespace cudf::detail {
namespace cudf {
namespace detail {

std::unique_ptr<table> stable_distinct(table_view const& input,
std::vector<size_type> const& keys,
Expand All @@ -45,7 +47,13 @@ std::unique_ptr<table> stable_distinct(table_view const& input,
stream,
rmm::mr::get_current_device_resource());

// Markers to denote which rows to be copied to the output.
// The only difference between this implementation and the unstable version
// is that the stable implementation must retain the input order. The
// distinct indices are not sorted, so we cannot simply copy the rows in the
// order of the distinct indices and retain the input order. Instead, we use
// a boolean mask to indicate which rows to copy to the output. This avoids
// the need to sort the distinct indices, which is slower.

auto const output_markers = [&] {
auto markers = rmm::device_uvector<bool>(input.num_rows(), stream);
thrust::uninitialized_fill(rmm::exec_policy(stream), markers.begin(), markers.end(), false);
Expand All @@ -58,13 +66,22 @@ std::unique_ptr<table> stable_distinct(table_view const& input,
return markers;
}();

return cudf::detail::copy_if(
input,
[output_markers = output_markers.begin()] __device__(auto const idx) {
return *(output_markers + idx);
},
stream,
mr);
return cudf::detail::apply_boolean_mask(
input, cudf::device_span<bool const>(output_markers), stream, mr);
}

} // namespace detail

std::unique_ptr<table> stable_distinct(table_view const& input,
std::vector<size_type> const& keys,
duplicate_keep_option keep,
null_equality nulls_equal,
nan_equality nans_equal,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::stable_distinct(
input, keys, keep, nulls_equal, nans_equal, cudf::get_default_stream(), mr);
}

} // namespace cudf::detail
} // namespace cudf
3 changes: 2 additions & 1 deletion cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -401,8 +401,9 @@ ConfigureTest(
stream_compaction/apply_boolean_mask_tests.cpp
stream_compaction/distinct_count_tests.cpp
stream_compaction/distinct_tests.cpp
stream_compaction/drop_nulls_tests.cpp
stream_compaction/drop_nans_tests.cpp
stream_compaction/drop_nulls_tests.cpp
stream_compaction/stable_distinct_tests.cpp
stream_compaction/unique_count_tests.cpp
stream_compaction/unique_tests.cpp
)
Expand Down
14 changes: 7 additions & 7 deletions cpp/tests/stream_compaction/distinct_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ TEST_F(DistinctKeepAny, EmptyInputTable)
{
int32s_col col(std::initializer_list<int32_t>{});
cudf::table_view input{{col}};
std::vector<cudf::size_type> key_idx{1, 2};
std::vector<cudf::size_type> key_idx{0};

auto got = cudf::distinct(input, key_idx, KEEP_ANY);
CUDF_TEST_EXPECT_TABLES_EQUAL(input, got->view());
Expand Down Expand Up @@ -1217,11 +1217,11 @@ TEST_F(DistinctKeepAny, StructsOfStructs)
// 8 | { {2, 1}, 5} |

auto s1 = [&] {
auto a = int32s_col{1, 1, XXX, XXX, 2, 1, 1, XXX, 2};
auto b = int32s_col{1, 2, XXX, XXX, 2, 1, 1, XXX, 1};
auto a = int32s_col{1, 1, XXX, XXX, XXX, XXX, 1, XXX, 2};
auto b = int32s_col{1, 2, XXX, XXX, XXX, XXX, 1, XXX, 1};
auto s2 = structs_col{{a, b}, nulls_at({2, 3, 7})};

auto c = int32s_col{5, 4, 6, 4, 3, 3, 5, 4, 5};
auto c = int32s_col{5, 4, 6, 4, XXX, XXX, 5, 4, 5};
std::vector<std::unique_ptr<cudf::column>> s1_children;
s1_children.emplace_back(s2.release());
s1_children.emplace_back(c.release());
Expand Down Expand Up @@ -1270,11 +1270,11 @@ TEST_F(DistinctKeepAny, SlicedStructsOfStructs)
// 8 | { {2, 1}, 5} |

auto s1 = [&] {
auto a = int32s_col{1, 1, 2, 2, 2, 1, 1, 1, 2};
auto b = int32s_col{1, 2, 1, 2, 2, 1, 1, 1, 1};
auto a = int32s_col{1, 1, XXX, XXX, XXX, XXX, 1, XXX, 2};
auto b = int32s_col{1, 2, XXX, XXX, XXX, XXX, 1, XXX, 1};
auto s2 = structs_col{{a, b}, nulls_at({2, 3, 7})};

auto c = int32s_col{5, 4, 6, 4, 3, 3, 5, 4, 5};
auto c = int32s_col{5, 4, 6, 4, XXX, XXX, 5, 4, 5};
std::vector<std::unique_ptr<cudf::column>> s1_children;
s1_children.emplace_back(s2.release());
s1_children.emplace_back(c.release());
Expand Down
Loading