diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 5e7862f4b3b..cd6b3cfdc03 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -173,7 +173,7 @@ ConfigureBench(ITERATOR_BENCH iterator/iterator.cu) # ################################################################################################## # * search benchmark ------------------------------------------------------------------------------ ConfigureBench(SEARCH_BENCH search/search.cpp) -ConfigureNVBench(SEARCH_NVBENCH search/contains.cpp) +ConfigureNVBench(SEARCH_NVBENCH search/contains_scalar.cpp search/contains_table.cpp) # ################################################################################################## # * sort benchmark -------------------------------------------------------------------------------- diff --git a/cpp/benchmarks/search/contains.cpp b/cpp/benchmarks/search/contains_scalar.cpp similarity index 100% rename from cpp/benchmarks/search/contains.cpp rename to cpp/benchmarks/search/contains_scalar.cpp diff --git a/cpp/benchmarks/search/contains_table.cpp b/cpp/benchmarks/search/contains_table.cpp new file mode 100644 index 00000000000..17702d0741c --- /dev/null +++ b/cpp/benchmarks/search/contains_table.cpp @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include + +#include + +auto constexpr num_unique_elements = 1000; + +template +static void nvbench_contains_table(nvbench::state& state, nvbench::type_list) +{ + auto const size = state.get_int64("table_size"); + auto const dtype = cudf::type_to_id(); + double const null_probability = state.get_float64("null_probability"); + + auto builder = data_profile_builder().null_probability(null_probability); + if (dtype == cudf::type_id::LIST) { + builder.distribution(dtype, distribution_id::UNIFORM, 0, num_unique_elements) + .distribution(cudf::type_id::INT32, distribution_id::UNIFORM, 0, num_unique_elements) + .list_depth(1); + } else { + builder.distribution(dtype, distribution_id::UNIFORM, 0, num_unique_elements); + } + + auto const haystack = create_random_table( + {dtype}, table_size_bytes{static_cast(size)}, data_profile{builder}, 0); + auto const needles = create_random_table( + {dtype}, table_size_bytes{static_cast(size)}, data_profile{builder}, 1); + + auto mem_stats_logger = cudf::memory_stats_logger(); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto const stream_view = rmm::cuda_stream_view{launch.get_stream()}; + [[maybe_unused]] auto const result = + cudf::detail::contains(haystack->view(), + needles->view(), + cudf::null_equality::EQUAL, + cudf::nan_equality::ALL_EQUAL, + stream_view, + rmm::mr::get_current_device_resource()); + }); + + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); +} + +NVBENCH_BENCH_TYPES(nvbench_contains_table, + NVBENCH_TYPE_AXES(nvbench::type_list)) + .set_name("contains_table") + .set_type_axes_names({"type"}) + .add_float64_axis("null_probability", {0.0, 0.1}) + .add_int64_axis("table_size", {10'000, 100'000, 1'000'000, 10'000'000}); diff --git a/cpp/include/cudf/detail/search.hpp b/cpp/include/cudf/detail/search.hpp index 4c4ad7834f4..4277baf3edd 100644 --- a/cpp/include/cudf/detail/search.hpp +++ b/cpp/include/cudf/detail/search.hpp @@ -81,6 +81,8 @@ std::unique_ptr contains(column_view const& haystack, * output = { false, true, true } * @endcode * + * @throws cudf::logic_error If column types of haystack and needles don't match + * * @param haystack The table containing the search space * @param needles A table of rows whose existence to check in the search space * @param compare_nulls Control whether nulls should be compared as equal or not diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index e37f0686ac3..43624ba691d 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -26,7 +26,7 @@ #include -#include +#include #include @@ -37,69 +37,59 @@ namespace { using cudf::experimental::row::lhs_index_type; using cudf::experimental::row::rhs_index_type; -using static_map = cuco::static_map>>; - /** - * @brief Check if the given type `T` is a strong index type (i.e., `lhs_index_type` or - * `rhs_index_type`). - * - * @return A boolean value indicating if `T` is a strong index type + * @brief An hasher adapter wrapping both haystack hasher and needles hasher */ -template -constexpr auto is_strong_index_type() -{ - return std::is_same_v || std::is_same_v; -} +template +struct hasher_adapter { + hasher_adapter(HaystackHasher const& haystack_hasher, NeedleHasher const& needle_hasher) + : _haystack_hasher{haystack_hasher}, _needle_hasher{needle_hasher} + { + } -/** - * @brief An adapter functor to support strong index types for row hasher that must be operating on - * `cudf::size_type`. - */ -template -struct strong_index_hasher_adapter { - strong_index_hasher_adapter(Hasher const& hasher) : _hasher{hasher} {} + __device__ constexpr auto operator()(lhs_index_type idx) const noexcept + { + return _haystack_hasher(static_cast(idx)); + } - template ())> - __device__ constexpr auto operator()(T const idx) const noexcept + __device__ constexpr auto operator()(rhs_index_type idx) const noexcept { - return _hasher(static_cast(idx)); + return _needle_hasher(static_cast(idx)); } private: - Hasher const _hasher; + HaystackHasher const _haystack_hasher; + NeedleHasher const _needle_hasher; }; /** - * @brief An adapter functor to support strong index type for table row comparator that must be - * operating on `cudf::size_type`. + * @brief An comparator adapter wrapping both self comparator and two table comparator */ -template -struct strong_index_comparator_adapter { - strong_index_comparator_adapter(Comparator const& comparator) : _comparator{comparator} {} - - template () && is_strong_index_type())> - __device__ constexpr auto operator()(T const lhs_index, U const rhs_index) const noexcept +template +struct comparator_adapter { + comparator_adapter(SelfEqual const& self_equal, TwoTableEqual const& two_table_equal) + : _self_equal{self_equal}, _two_table_equal{two_table_equal} + { + } + + __device__ constexpr auto operator()(lhs_index_type lhs_index, + lhs_index_type rhs_index) const noexcept { auto const lhs = static_cast(lhs_index); auto const rhs = static_cast(rhs_index); - if constexpr (std::is_same_v || std::is_same_v) { - return _comparator(lhs, rhs); - } else { - // Here we have T == rhs_index_type. - // This is when the indices are provided in wrong order for two table comparator, so we need - // to switch them back to the right order before calling the underlying comparator. - return _comparator(rhs, lhs); - } + return _self_equal(lhs, rhs); + } + + __device__ constexpr auto operator()(lhs_index_type lhs_index, + rhs_index_type rhs_index) const noexcept + { + return _two_table_equal(lhs_index, rhs_index); } private: - Comparator const _comparator; + SelfEqual const _self_equal; + TwoTableEqual const _two_table_equal; }; /** @@ -134,38 +124,62 @@ std::pair build_row_bitmask(table_view } /** - * @brief Invoke an `operator()` template with a row equality comparator based on the specified - * `compare_nans` parameter. + * @brief Invokes the given `func` with desired comparators based on the specified `compare_nans` + * parameter + * + * @tparam HasNested Flag indicating whether there are nested columns in haystack or needles + * @tparam Hasher Type of device hash function + * @tparam Func Type of the helper function doing `contains` check * - * @param compare_nans The flag to specify whether NaNs should be compared equal or not + * @param compare_nulls Control whether nulls should be compared as equal or not + * @param compare_nans Control whether floating-point NaNs values should be compared as equal or not + * @param haystack_has_nulls Flag indicating whether haystack has nulls or not + * @param has_any_nulls Flag indicating whether there are nested nulls is either haystack or needles + * @param self_equal Self table comparator + * @param two_table_equal Two table comparator + * @param d_hasher Device hash functor * @param func The input functor to invoke */ -template -void dispatch_nan_comparator(nan_equality compare_nans, Func&& func) +template +void dispatch_nan_comparator( + null_equality compare_nulls, + nan_equality compare_nans, + bool haystack_has_nulls, + bool has_any_nulls, + cudf::experimental::row::equality::self_comparator self_equal, + cudf::experimental::row::equality::two_table_comparator two_table_equal, + Hasher const& d_hasher, + Func&& func) { + // Distinguish probing scheme CG sizes between nested and flat types for better performance + auto const probing_scheme = [&]() { + if constexpr (HasNested) { + return cuco::experimental::linear_probing<4, Hasher>{d_hasher}; + } else { + return cuco::experimental::linear_probing<1, Hasher>{d_hasher}; + } + }(); + if (compare_nans == nan_equality::ALL_EQUAL) { using nan_equal_comparator = cudf::experimental::row::equality::nan_equal_physical_equality_comparator; - func(nan_equal_comparator{}); + auto const d_self_equal = self_equal.equal_to( + nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, nan_equal_comparator{}); + auto const d_two_table_equal = two_table_equal.equal_to( + nullate::DYNAMIC{has_any_nulls}, compare_nulls, nan_equal_comparator{}); + func(d_self_equal, d_two_table_equal, probing_scheme); } else { using nan_unequal_comparator = cudf::experimental::row::equality::physical_equality_comparator; - func(nan_unequal_comparator{}); + auto const d_self_equal = self_equal.equal_to( + nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, nan_unequal_comparator{}); + auto const d_two_table_equal = two_table_equal.equal_to( + nullate::DYNAMIC{has_any_nulls}, compare_nulls, nan_unequal_comparator{}); + func(d_self_equal, d_two_table_equal, probing_scheme); } } } // namespace -/** - * @brief Check if rows in the given `needles` table exist in the `haystack` table. - * - * @param haystack The table containing the search space - * @param needles A table of rows whose existence to check in the search space - * @param compare_nulls Control whether nulls should be compared as equal or not - * @param compare_nans Control whether floating-point NaNs values should be compared as equal or not - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned vector - * @return A vector of bools indicating if each row in `needles` has matching rows in `haystack` - */ rmm::device_uvector contains(table_view const& haystack, table_view const& needles, null_equality compare_nulls, @@ -173,124 +187,97 @@ rmm::device_uvector contains(table_view const& haystack, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto map = static_map(compute_hash_table_size(haystack.num_rows()), - cuco::empty_key{lhs_index_type{std::numeric_limits::max()}}, - cuco::empty_value{detail::JoinNoneValue}, - detail::hash_table_allocator_type{default_allocator{}, stream}, - stream.value()); + CUDF_EXPECTS(cudf::have_same_types(haystack, needles), "Column types mismatch"); auto const haystack_has_nulls = has_nested_nulls(haystack); auto const needles_has_nulls = has_nested_nulls(needles); auto const has_any_nulls = haystack_has_nulls || needles_has_nulls; + auto const preprocessed_needles = + cudf::experimental::row::equality::preprocessed_table::create(needles, stream); auto const preprocessed_haystack = cudf::experimental::row::equality::preprocessed_table::create(haystack, stream); - // Insert row indices of the haystack table as map keys. - { - auto const haystack_it = cudf::detail::make_counting_transform_iterator( - size_type{0}, - [] __device__(auto const idx) { return cuco::make_pair(lhs_index_type{idx}, 0); }); - - auto const hasher = cudf::experimental::row::hash::row_hasher(preprocessed_haystack); - auto const d_hasher = - strong_index_hasher_adapter{hasher.device_hasher(nullate::DYNAMIC{has_any_nulls})}; - - auto const comparator = - cudf::experimental::row::equality::self_comparator(preprocessed_haystack); - - // If the haystack table has nulls but they are compared unequal, don't insert them. - // Otherwise, it was known to cause performance issue: - // - https://github.com/rapidsai/cudf/pull/6943 - // - https://github.com/rapidsai/cudf/pull/8277 - if (haystack_has_nulls && compare_nulls == null_equality::UNEQUAL) { - auto const bitmask_buffer_and_ptr = build_row_bitmask(haystack, stream); - auto const row_bitmask_ptr = bitmask_buffer_and_ptr.second; - - auto const insert_map = [&](auto const value_comp) { - if (cudf::detail::has_nested_columns(haystack)) { - auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( - nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; - map.insert_if(haystack_it, - haystack_it + haystack.num_rows(), - thrust::counting_iterator(0), // stencil - row_is_valid{row_bitmask_ptr}, - d_hasher, - d_eqcomp, - stream.value()); - } else { - auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( - nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; - map.insert_if(haystack_it, - haystack_it + haystack.num_rows(), - thrust::counting_iterator(0), // stencil - row_is_valid{row_bitmask_ptr}, - d_hasher, - d_eqcomp, - stream.value()); - } - }; - - // Insert only rows that do not have any null at any level. - dispatch_nan_comparator(compare_nans, insert_map); - } else { // haystack_doesn't_have_nulls || compare_nulls == null_equality::EQUAL - auto const insert_map = [&](auto const value_comp) { - if (cudf::detail::has_nested_columns(haystack)) { - auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( - nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; - map.insert( - haystack_it, haystack_it + haystack.num_rows(), d_hasher, d_eqcomp, stream.value()); - } else { - auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( - nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; - map.insert( - haystack_it, haystack_it + haystack.num_rows(), d_hasher, d_eqcomp, stream.value()); - } - }; - - dispatch_nan_comparator(compare_nans, insert_map); - } - } + + auto const haystack_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_haystack); + auto const d_haystack_hasher = haystack_hasher.device_hasher(nullate::DYNAMIC{has_any_nulls}); + auto const needle_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_needles); + auto const d_needle_hasher = needle_hasher.device_hasher(nullate::DYNAMIC{has_any_nulls}); + auto const d_hasher = hasher_adapter{d_haystack_hasher, d_needle_hasher}; + + auto const self_equal = cudf::experimental::row::equality::self_comparator(preprocessed_haystack); + auto const two_table_equal = cudf::experimental::row::equality::two_table_comparator( + preprocessed_haystack, preprocessed_needles); // The output vector. auto contained = rmm::device_uvector(needles.num_rows(), stream, mr); - auto const preprocessed_needles = - cudf::experimental::row::equality::preprocessed_table::create(needles, stream); - // Check existence for each row of the needles table in the haystack table. - { - auto const needles_it = cudf::detail::make_counting_transform_iterator( - size_type{0}, [] __device__(auto const idx) { return rhs_index_type{idx}; }); - - auto const hasher = cudf::experimental::row::hash::row_hasher(preprocessed_needles); - auto const d_hasher = - strong_index_hasher_adapter{hasher.device_hasher(nullate::DYNAMIC{has_any_nulls})}; - - auto const comparator = cudf::experimental::row::equality::two_table_comparator( - preprocessed_haystack, preprocessed_needles); - - auto const check_contains = [&](auto const value_comp) { - if (cudf::detail::has_nested_columns(haystack) or cudf::detail::has_nested_columns(needles)) { - auto const d_eqcomp = - comparator.equal_to(nullate::DYNAMIC{has_any_nulls}, compare_nulls, value_comp); - map.contains(needles_it, - needles_it + needles.num_rows(), - contained.begin(), - d_hasher, - d_eqcomp, - stream.value()); + auto const haystack_iter = cudf::detail::make_counting_transform_iterator( + size_type{0}, [] __device__(auto idx) { return lhs_index_type{idx}; }); + auto const needles_iter = cudf::detail::make_counting_transform_iterator( + size_type{0}, [] __device__(auto idx) { return rhs_index_type{idx}; }); + + auto const helper_func = + [&](auto const& d_self_equal, auto const& d_two_table_equal, auto const& probing_scheme) { + auto const d_equal = comparator_adapter{d_self_equal, d_two_table_equal}; + + auto set = cuco::experimental::static_set{ + cuco::experimental::extent{compute_hash_table_size(haystack.num_rows())}, + cuco::empty_key{lhs_index_type{-1}}, + d_equal, + probing_scheme, + detail::hash_table_allocator_type{default_allocator{}, stream}, + stream.value()}; + + if (haystack_has_nulls && compare_nulls == null_equality::UNEQUAL) { + auto const bitmask_buffer_and_ptr = build_row_bitmask(haystack, stream); + auto const row_bitmask_ptr = bitmask_buffer_and_ptr.second; + + // If the haystack table has nulls but they are compared unequal, don't insert them. + // Otherwise, it was known to cause performance issue: + // - https://github.com/rapidsai/cudf/pull/6943 + // - https://github.com/rapidsai/cudf/pull/8277 + set.insert_if_async(haystack_iter, + haystack_iter + haystack.num_rows(), + thrust::counting_iterator(0), // stencil + row_is_valid{row_bitmask_ptr}, + stream.value()); } else { - auto const d_eqcomp = - comparator.equal_to(nullate::DYNAMIC{has_any_nulls}, compare_nulls, value_comp); - map.contains(needles_it, - needles_it + needles.num_rows(), - contained.begin(), - d_hasher, - d_eqcomp, - stream.value()); + set.insert_async(haystack_iter, haystack_iter + haystack.num_rows(), stream.value()); + } + + if (needles_has_nulls && compare_nulls == null_equality::UNEQUAL) { + auto const bitmask_buffer_and_ptr = build_row_bitmask(needles, stream); + auto const row_bitmask_ptr = bitmask_buffer_and_ptr.second; + set.contains_if_async(needles_iter, + needles_iter + needles.num_rows(), + thrust::counting_iterator(0), // stencil + row_is_valid{row_bitmask_ptr}, + contained.begin(), + stream.value()); + } else { + set.contains_async( + needles_iter, needles_iter + needles.num_rows(), contained.begin(), stream.value()); } }; - dispatch_nan_comparator(compare_nans, check_contains); + if (cudf::detail::has_nested_columns(haystack)) { + dispatch_nan_comparator(compare_nulls, + compare_nans, + haystack_has_nulls, + has_any_nulls, + self_equal, + two_table_equal, + d_hasher, + helper_func); + } else { + dispatch_nan_comparator(compare_nulls, + compare_nans, + haystack_has_nulls, + has_any_nulls, + self_equal, + two_table_equal, + d_hasher, + helper_func); } return contained;