From 8f2294f3e809304201564fa19ca50df84b4bdccf Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Thu, 7 Sep 2023 16:58:34 -0700 Subject: [PATCH 01/16] Refactor contains_table with cuco::static_set --- cpp/src/search/contains_table.cu | 250 ++++++++++++++----------------- 1 file changed, 109 insertions(+), 141 deletions(-) diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index e37f0686ac3..628ba2c4c3b 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -26,7 +26,7 @@ #include -#include +#include #include @@ -37,11 +37,6 @@ namespace { using cudf::experimental::row::lhs_index_type; using cudf::experimental::row::rhs_index_type; -using static_map = cuco::static_map>>; - /** * @brief Check if the given type `T` is a strong index type (i.e., `lhs_index_type` or * `rhs_index_type`). @@ -58,48 +53,58 @@ constexpr auto is_strong_index_type() * @brief An adapter functor to support strong index types for row hasher that must be operating on * `cudf::size_type`. */ -template -struct strong_index_hasher_adapter { - strong_index_hasher_adapter(Hasher const& hasher) : _hasher{hasher} {} +template +struct hasher_adapter { + hasher_adapter(HaystackHasher const& haystack_hasher, NeedleHasher const& needle_hasher) + : _haystack_hasher{haystack_hasher}, _needle_hasher{needle_hasher} + { + } + + __device__ constexpr auto operator()(lhs_index_type idx) const noexcept + { + return _haystack_hasher(static_cast(idx)); + } - template ())> - __device__ constexpr auto operator()(T const idx) const noexcept + __device__ constexpr auto operator()(rhs_index_type idx) const noexcept { - return _hasher(static_cast(idx)); + return _needle_hasher(static_cast(idx)); } private: - Hasher const _hasher; + HaystackHasher const _haystack_hasher; + NeedleHasher const _needle_hasher; }; /** * @brief An adapter functor to support strong index type for table row comparator that must be * operating on `cudf::size_type`. */ -template -struct strong_index_comparator_adapter { - strong_index_comparator_adapter(Comparator const& comparator) : _comparator{comparator} {} - - template () && is_strong_index_type())> - __device__ constexpr auto operator()(T const lhs_index, U const rhs_index) const noexcept +template +struct comparator_adapter { + comparator_adapter(SelfComparator const& self_comparator, + TwoTableComparator const& two_table_comparator) + : _self_comparator{self_comparator}, _two_table_comparator{two_table_comparator} + { + } + + __device__ constexpr auto operator()(lhs_index_type lhs_index, + lhs_index_type rhs_index) const noexcept { auto const lhs = static_cast(lhs_index); auto const rhs = static_cast(rhs_index); - if constexpr (std::is_same_v || std::is_same_v) { - return _comparator(lhs, rhs); - } else { - // Here we have T == rhs_index_type. - // This is when the indices are provided in wrong order for two table comparator, so we need - // to switch them back to the right order before calling the underlying comparator. - return _comparator(rhs, lhs); - } + return _self_comparator(lhs, rhs); + } + + __device__ constexpr auto operator()(lhs_index_type lhs_index, + rhs_index_type rhs_index) const noexcept + { + return _two_table_comparator(lhs_index, rhs_index); } private: - Comparator const _comparator; + SelfComparator const _self_comparator; + TwoTableComparator const _two_table_comparator; }; /** @@ -133,6 +138,18 @@ std::pair build_row_bitmask(table_view return std::pair(rmm::device_buffer{0, stream}, nullable_columns.front().null_mask()); } +} // namespace +template +void dispatch( + null_equality compare_nulls, auto self_comp, auto two_table_comp, auto nan_comp, Func func) +{ + auto const d_self_eq = self_comp.equal_to( + nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, nan_comp); + auto const d_two_table_eq = two_table_comp.equal_to( + nullate::DYNAMIC{has_any_nulls}, compare_nulls, nan_comp); + func(d_self_eq, d_two_table_eq); +} + /** * @brief Invoke an `operator()` template with a row equality comparator based on the specified * `compare_nans` parameter. @@ -140,21 +157,25 @@ std::pair build_row_bitmask(table_view * @param compare_nans The flag to specify whether NaNs should be compared equal or not * @param func The input functor to invoke */ -template -void dispatch_nan_comparator(nan_equality compare_nans, Func&& func) +template +void dispatch_nan_comparator(nan_equality compare_nans, + null_equality compare_nulls, + auto self_comp, + auto two_table_comp, + Func func) { if (compare_nans == nan_equality::ALL_EQUAL) { using nan_equal_comparator = cudf::experimental::row::equality::nan_equal_physical_equality_comparator; - func(nan_equal_comparator{}); + dispatch( + compare_nulls, self_comp, two_table_comp, nan_equal_comparator{}, func); } else { using nan_unequal_comparator = cudf::experimental::row::equality::physical_equality_comparator; - func(nan_unequal_comparator{}); + dispatch( + compare_nulls, self_comp, two_table_comp, nan_unequal_comparator{}, func); } } -} // namespace - /** * @brief Check if rows in the given `needles` table exist in the `haystack` table. * @@ -173,124 +194,71 @@ rmm::device_uvector contains(table_view const& haystack, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto map = static_map(compute_hash_table_size(haystack.num_rows()), - cuco::empty_key{lhs_index_type{std::numeric_limits::max()}}, - cuco::empty_value{detail::JoinNoneValue}, - detail::hash_table_allocator_type{default_allocator{}, stream}, - stream.value()); - auto const haystack_has_nulls = has_nested_nulls(haystack); auto const needles_has_nulls = has_nested_nulls(needles); auto const has_any_nulls = haystack_has_nulls || needles_has_nulls; + auto const preprocessed_needles = + cudf::experimental::row::equality::preprocessed_table::create(needles, stream); auto const preprocessed_haystack = cudf::experimental::row::equality::preprocessed_table::create(haystack, stream); - // Insert row indices of the haystack table as map keys. - { - auto const haystack_it = cudf::detail::make_counting_transform_iterator( - size_type{0}, - [] __device__(auto const idx) { return cuco::make_pair(lhs_index_type{idx}, 0); }); - auto const hasher = cudf::experimental::row::hash::row_hasher(preprocessed_haystack); - auto const d_hasher = - strong_index_hasher_adapter{hasher.device_hasher(nullate::DYNAMIC{has_any_nulls})}; + auto const haystack_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_haystack); + auto const d_haystack_hasher = haystack_hasher.device_hasher(nullate::DYNAMIC{has_any_nulls}); + auto const needle_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_needles); + auto const d_needle_hasher = needle_hasher.device_hasher(nullate::DYNAMIC{has_any_nulls}); + auto const d_hasher = hasher_adapter{d_haystack_hasher, d_needle_hasher}; + using hasher_type = decltype(d_hasher); + + auto const self_comparator = + cudf::experimental::row::equality::self_comparator(preprocessed_haystack); + auto const two_table_comparator = cudf::experimental::row::equality::two_table_comparator( + preprocessed_haystack, preprocessed_needles); + + // The output vector. + auto contained = rmm::device_uvector(needles.num_rows(), stream, mr); + + auto const haystack_iter = cudf::detail::make_counting_transform_iterator( + size_type{0}, [] __device__(auto idx) { return lhs_index_type{idx}; }); + auto const needles_iter = cudf::detail::make_counting_transform_iterator( + size_type{0}, [] __device__(auto idx) { return rhs_index_type{idx}; }); + + auto const helper_func = [&](auto const& d_self_equal, auto const& d_two_table_equal) { + auto const d_equal = comparator_adapter{d_self_equal, d_two_table_equal}; - auto const comparator = - cudf::experimental::row::equality::self_comparator(preprocessed_haystack); + auto set = cuco::experimental::static_set{ + cuco::experimental::extent{compute_hash_table_size(haystack.num_rows())}, + cuco::empty_key{lhs_index_type{-1}}, + d_equal, + cuco::experimental::linear_probing<1, hasher_type>{d_hasher}, + detail::hash_table_allocator_type{default_allocator{}, stream}, + stream.value()}; - // If the haystack table has nulls but they are compared unequal, don't insert them. - // Otherwise, it was known to cause performance issue: - // - https://github.com/rapidsai/cudf/pull/6943 - // - https://github.com/rapidsai/cudf/pull/8277 if (haystack_has_nulls && compare_nulls == null_equality::UNEQUAL) { - auto const bitmask_buffer_and_ptr = build_row_bitmask(haystack, stream); - auto const row_bitmask_ptr = bitmask_buffer_and_ptr.second; - - auto const insert_map = [&](auto const value_comp) { - if (cudf::detail::has_nested_columns(haystack)) { - auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( - nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; - map.insert_if(haystack_it, - haystack_it + haystack.num_rows(), - thrust::counting_iterator(0), // stencil - row_is_valid{row_bitmask_ptr}, - d_hasher, - d_eqcomp, - stream.value()); - } else { - auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( - nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; - map.insert_if(haystack_it, - haystack_it + haystack.num_rows(), - thrust::counting_iterator(0), // stencil - row_is_valid{row_bitmask_ptr}, - d_hasher, - d_eqcomp, - stream.value()); - } - }; - - // Insert only rows that do not have any null at any level. - dispatch_nan_comparator(compare_nans, insert_map); - } else { // haystack_doesn't_have_nulls || compare_nulls == null_equality::EQUAL - auto const insert_map = [&](auto const value_comp) { - if (cudf::detail::has_nested_columns(haystack)) { - auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( - nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; - map.insert( - haystack_it, haystack_it + haystack.num_rows(), d_hasher, d_eqcomp, stream.value()); - } else { - auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( - nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; - map.insert( - haystack_it, haystack_it + haystack.num_rows(), d_hasher, d_eqcomp, stream.value()); - } - }; - - dispatch_nan_comparator(compare_nans, insert_map); + auto const row_bitmask = build_row_bitmask(haystack, stream).second; + set.insert_if_async(haystack_iter, + haystack_iter + haystack.num_rows(), + thrust::counting_iterator(0), // stencil + row_is_valid{row_bitmask}, + stream.value()); + } else { + set.insert_async(haystack_iter, haystack_iter + haystack.num_rows(), stream.value()); } - } - // The output vector. - auto contained = rmm::device_uvector(needles.num_rows(), stream, mr); + if (needles_has_nulls && compare_nulls == null_equality::UNEQUAL) { + set.contains_if_async( + needles_iter, needles_iter + needles.num_rows(), contained.begin(), stream.value()); + } else { + set.contains_async( + needles_iter, needles_iter + needles.num_rows(), contained.begin(), stream.value()); + } + }; - auto const preprocessed_needles = - cudf::experimental::row::equality::preprocessed_table::create(needles, stream); - // Check existence for each row of the needles table in the haystack table. - { - auto const needles_it = cudf::detail::make_counting_transform_iterator( - size_type{0}, [] __device__(auto const idx) { return rhs_index_type{idx}; }); - - auto const hasher = cudf::experimental::row::hash::row_hasher(preprocessed_needles); - auto const d_hasher = - strong_index_hasher_adapter{hasher.device_hasher(nullate::DYNAMIC{has_any_nulls})}; - - auto const comparator = cudf::experimental::row::equality::two_table_comparator( - preprocessed_haystack, preprocessed_needles); - - auto const check_contains = [&](auto const value_comp) { - if (cudf::detail::has_nested_columns(haystack) or cudf::detail::has_nested_columns(needles)) { - auto const d_eqcomp = - comparator.equal_to(nullate::DYNAMIC{has_any_nulls}, compare_nulls, value_comp); - map.contains(needles_it, - needles_it + needles.num_rows(), - contained.begin(), - d_hasher, - d_eqcomp, - stream.value()); - } else { - auto const d_eqcomp = - comparator.equal_to(nullate::DYNAMIC{has_any_nulls}, compare_nulls, value_comp); - map.contains(needles_it, - needles_it + needles.num_rows(), - contained.begin(), - d_hasher, - d_eqcomp, - stream.value()); - } - }; - - dispatch_nan_comparator(compare_nans, check_contains); + if (haystack_has_nulls) { + if (has_any_nulls) { + dispatch_nan_comparator( + compare_nans, compare_nulls, self_comparator, two_table_comparator, helper_func); + } } return contained; From f2fd99469bc5a105ca5fe274b90e59adb08b985c Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Thu, 7 Sep 2023 16:59:59 -0700 Subject: [PATCH 02/16] Refactor contains_table with cuco::static_set --- cpp/src/search/contains_table.cu | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index 628ba2c4c3b..7455bd1b15d 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -138,7 +138,6 @@ std::pair build_row_bitmask(table_view return std::pair(rmm::device_buffer{0, stream}, nullable_columns.front().null_mask()); } -} // namespace template void dispatch( null_equality compare_nulls, auto self_comp, auto two_table_comp, auto nan_comp, Func func) @@ -176,6 +175,8 @@ void dispatch_nan_comparator(nan_equality compare_nans, } } +} // namespace + /** * @brief Check if rows in the given `needles` table exist in the `haystack` table. * @@ -223,7 +224,7 @@ rmm::device_uvector contains(table_view const& haystack, auto const needles_iter = cudf::detail::make_counting_transform_iterator( size_type{0}, [] __device__(auto idx) { return rhs_index_type{idx}; }); - auto const helper_func = [&](auto const& d_self_equal, auto const& d_two_table_equal) { + auto helper_func = [&](auto const& d_self_equal, auto const& d_two_table_equal) { auto const d_equal = comparator_adapter{d_self_equal, d_two_table_equal}; auto set = cuco::experimental::static_set{ From bec3cd66f197a24b249143fb3c82c05c4bacc6b0 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Wed, 13 Sep 2023 14:59:09 -0700 Subject: [PATCH 03/16] Fix logic issues with hashset --- cpp/src/search/contains_table.cu | 136 ++++++++++++++++++------------- 1 file changed, 78 insertions(+), 58 deletions(-) diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index 7455bd1b15d..418c4d6bd62 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -38,20 +38,7 @@ using cudf::experimental::row::lhs_index_type; using cudf::experimental::row::rhs_index_type; /** - * @brief Check if the given type `T` is a strong index type (i.e., `lhs_index_type` or - * `rhs_index_type`). - * - * @return A boolean value indicating if `T` is a strong index type - */ -template -constexpr auto is_strong_index_type() -{ - return std::is_same_v || std::is_same_v; -} - -/** - * @brief An adapter functor to support strong index types for row hasher that must be operating on - * `cudf::size_type`. + * @brief An hasher adapter wrapping both haystack hasher and needles hasher */ template struct hasher_adapter { @@ -76,14 +63,12 @@ struct hasher_adapter { }; /** - * @brief An adapter functor to support strong index type for table row comparator that must be - * operating on `cudf::size_type`. + * @brief An comparator adapter wrapping both self comparator and two table comparator */ -template +template struct comparator_adapter { - comparator_adapter(SelfComparator const& self_comparator, - TwoTableComparator const& two_table_comparator) - : _self_comparator{self_comparator}, _two_table_comparator{two_table_comparator} + comparator_adapter(SelfEqual const& self_equal, TwoTableEqual const& two_table_equal) + : _self_equal{self_equal}, _two_table_equal{two_table_equal} { } @@ -93,18 +78,18 @@ struct comparator_adapter { auto const lhs = static_cast(lhs_index); auto const rhs = static_cast(rhs_index); - return _self_comparator(lhs, rhs); + return _self_equal(lhs, rhs); } __device__ constexpr auto operator()(lhs_index_type lhs_index, rhs_index_type rhs_index) const noexcept { - return _two_table_comparator(lhs_index, rhs_index); + return _two_table_equal(lhs_index, rhs_index); } private: - SelfComparator const _self_comparator; - TwoTableComparator const _two_table_comparator; + SelfEqual const _self_equal; + TwoTableEqual const _two_table_equal; }; /** @@ -138,40 +123,50 @@ std::pair build_row_bitmask(table_view return std::pair(rmm::device_buffer{0, stream}, nullable_columns.front().null_mask()); } -template -void dispatch( - null_equality compare_nulls, auto self_comp, auto two_table_comp, auto nan_comp, Func func) -{ - auto const d_self_eq = self_comp.equal_to( - nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, nan_comp); - auto const d_two_table_eq = two_table_comp.equal_to( - nullate::DYNAMIC{has_any_nulls}, compare_nulls, nan_comp); - func(d_self_eq, d_two_table_eq); -} - /** - * @brief Invoke an `operator()` template with a row equality comparator based on the specified - * `compare_nans` parameter. + * @brief Invokes the given `func` with desired comparators based on the specified `compare_nans` + parameter. + * + * @tparam HaystackHasNested Flag indicating whether there are nested columns in haystack + * @tparam HasAnyNested Flag indicating whether there are nested columns in either haystack or + needles + * @tparam Func Type of the helper function doing `contains` check + + * @tparam haystack_has_nulls Flag indicating whether haystack has nulls or not * - * @param compare_nans The flag to specify whether NaNs should be compared equal or not + * @param compare_nulls Control whether nulls should be compared as equal or not + * @param compare_nans Control whether floating-point NaNs values should be compared as equal or not + * @param haystack_has_nulls Flag indicating whether haystack has nulls or not + * @param has_any_nulls Flag indicating whether there are nested nulls is either haystack or needles + * @param self_equal Self table comparator + * @param two_table_equal Two table comparator * @param func The input functor to invoke */ -template -void dispatch_nan_comparator(nan_equality compare_nans, - null_equality compare_nulls, - auto self_comp, - auto two_table_comp, - Func func) +template +void dispatch_nan_comparator( + null_equality compare_nulls, + nan_equality compare_nans, + bool haystack_has_nulls, + bool has_any_nulls, + cudf::experimental::row::equality::self_comparator self_equal, + cudf::experimental::row::equality::two_table_comparator two_table_equal, + Func&& func) { if (compare_nans == nan_equality::ALL_EQUAL) { using nan_equal_comparator = cudf::experimental::row::equality::nan_equal_physical_equality_comparator; - dispatch( - compare_nulls, self_comp, two_table_comp, nan_equal_comparator{}, func); + auto const d_self_equal = self_equal.equal_to( + nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, nan_equal_comparator{}); + auto const d_two_table_equal = two_table_equal.equal_to( + nullate::DYNAMIC{has_any_nulls}, compare_nulls, nan_equal_comparator{}); + func(d_self_equal, d_two_table_equal); } else { using nan_unequal_comparator = cudf::experimental::row::equality::physical_equality_comparator; - dispatch( - compare_nulls, self_comp, two_table_comp, nan_unequal_comparator{}, func); + auto const d_self_equal = self_equal.equal_to( + nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, nan_unequal_comparator{}); + auto const d_two_table_equal = two_table_equal.equal_to( + nullate::DYNAMIC{has_any_nulls}, compare_nulls, nan_unequal_comparator{}); + func(d_self_equal, d_two_table_equal); } } @@ -211,9 +206,8 @@ rmm::device_uvector contains(table_view const& haystack, auto const d_hasher = hasher_adapter{d_haystack_hasher, d_needle_hasher}; using hasher_type = decltype(d_hasher); - auto const self_comparator = - cudf::experimental::row::equality::self_comparator(preprocessed_haystack); - auto const two_table_comparator = cudf::experimental::row::equality::two_table_comparator( + auto const self_equal = cudf::experimental::row::equality::self_comparator(preprocessed_haystack); + auto const two_table_equal = cudf::experimental::row::equality::two_table_comparator( preprocessed_haystack, preprocessed_needles); // The output vector. @@ -224,7 +218,7 @@ rmm::device_uvector contains(table_view const& haystack, auto const needles_iter = cudf::detail::make_counting_transform_iterator( size_type{0}, [] __device__(auto idx) { return rhs_index_type{idx}; }); - auto helper_func = [&](auto const& d_self_equal, auto const& d_two_table_equal) { + auto const helper_func = [&](auto const& d_self_equal, auto const& d_two_table_equal) { auto const d_equal = comparator_adapter{d_self_equal, d_two_table_equal}; auto set = cuco::experimental::static_set{ @@ -247,18 +241,44 @@ rmm::device_uvector contains(table_view const& haystack, } if (needles_has_nulls && compare_nulls == null_equality::UNEQUAL) { - set.contains_if_async( - needles_iter, needles_iter + needles.num_rows(), contained.begin(), stream.value()); + auto const row_bitmask = build_row_bitmask(needles, stream).second; + set.contains_if_async(needles_iter, + needles_iter + needles.num_rows(), + thrust::counting_iterator(0), // stencil + row_is_valid{row_bitmask}, + contained.begin(), + stream.value()); } else { set.contains_async( needles_iter, needles_iter + needles.num_rows(), contained.begin(), stream.value()); } }; - if (haystack_has_nulls) { - if (has_any_nulls) { - dispatch_nan_comparator( - compare_nans, compare_nulls, self_comparator, two_table_comparator, helper_func); + if (cudf::detail::has_nested_columns(haystack)) { + dispatch_nan_comparator(compare_nulls, + compare_nans, + haystack_has_nulls, + has_any_nulls, + self_equal, + two_table_equal, + helper_func); + } else { + if (cudf::detail::has_nested_columns(needles)) { + dispatch_nan_comparator(compare_nulls, + compare_nans, + haystack_has_nulls, + has_any_nulls, + self_equal, + two_table_equal, + helper_func); + } else { + dispatch_nan_comparator(compare_nulls, + compare_nans, + haystack_has_nulls, + has_any_nulls, + self_equal, + two_table_equal, + helper_func); } } From 5ada7884a8b93ec82f97917262adeae31d1bc3e4 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Wed, 13 Sep 2023 15:21:54 -0700 Subject: [PATCH 04/16] Get rid of build_row_bitmask function --- cpp/src/search/contains_table.cu | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index 418c4d6bd62..6c89c8e9c6c 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -102,6 +102,7 @@ struct comparator_adapter { * @param stream CUDA stream used for device memory operations and kernel launches * @return A pair of pointer to the output bitmask and the buffer containing the bitmask */ +/* std::pair build_row_bitmask(table_view const& input, rmm::cuda_stream_view stream) { @@ -122,6 +123,9 @@ std::pair build_row_bitmask(table_view return std::pair(rmm::device_buffer{0, stream}, nullable_columns.front().null_mask()); } +// TODO: To doublecheck, under no situation but here we do nested checks with +// `get_nullable_columns`. This seems wrong +*/ /** * @brief Invokes the given `func` with desired comparators based on the specified `compare_nans` @@ -206,8 +210,8 @@ rmm::device_uvector contains(table_view const& haystack, auto const d_hasher = hasher_adapter{d_haystack_hasher, d_needle_hasher}; using hasher_type = decltype(d_hasher); - auto const self_equal = cudf::experimental::row::equality::self_comparator(preprocessed_haystack); - auto const two_table_equal = cudf::experimental::row::equality::two_table_comparator( + auto const self_comp = cudf::experimental::row::equality::self_comparator(preprocessed_haystack); + auto const two_table_comp = cudf::experimental::row::equality::two_table_comparator( preprocessed_haystack, preprocessed_needles); // The output vector. @@ -230,22 +234,24 @@ rmm::device_uvector contains(table_view const& haystack, stream.value()}; if (haystack_has_nulls && compare_nulls == null_equality::UNEQUAL) { - auto const row_bitmask = build_row_bitmask(haystack, stream).second; + auto const row_bitmask = + cudf::detail::bitmask_and(haystack, stream, rmm::mr::get_current_device_resource()).first; set.insert_if_async(haystack_iter, haystack_iter + haystack.num_rows(), thrust::counting_iterator(0), // stencil - row_is_valid{row_bitmask}, + row_is_valid{reinterpret_cast(row_bitmask.data())}, stream.value()); } else { set.insert_async(haystack_iter, haystack_iter + haystack.num_rows(), stream.value()); } if (needles_has_nulls && compare_nulls == null_equality::UNEQUAL) { - auto const row_bitmask = build_row_bitmask(needles, stream).second; + auto const row_bitmask = + cudf::detail::bitmask_and(needles, stream, rmm::mr::get_current_device_resource()).first; set.contains_if_async(needles_iter, needles_iter + needles.num_rows(), thrust::counting_iterator(0), // stencil - row_is_valid{row_bitmask}, + row_is_valid{reinterpret_cast(row_bitmask.data())}, contained.begin(), stream.value()); } else { @@ -259,8 +265,8 @@ rmm::device_uvector contains(table_view const& haystack, compare_nans, haystack_has_nulls, has_any_nulls, - self_equal, - two_table_equal, + self_comp, + two_table_comp, helper_func); } else { if (cudf::detail::has_nested_columns(needles)) { @@ -268,16 +274,16 @@ rmm::device_uvector contains(table_view const& haystack, compare_nans, haystack_has_nulls, has_any_nulls, - self_equal, - two_table_equal, + self_comp, + two_table_comp, helper_func); } else { dispatch_nan_comparator(compare_nulls, compare_nans, haystack_has_nulls, has_any_nulls, - self_equal, - two_table_equal, + self_comp, + two_table_comp, helper_func); } } From 204bd452a9a93a557a981e7f153b23ae72f2d7a5 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Wed, 13 Sep 2023 15:25:02 -0700 Subject: [PATCH 05/16] Minor cleanups: renaming --- cpp/src/search/contains_table.cu | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index 6c89c8e9c6c..916b36cb354 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -210,8 +210,8 @@ rmm::device_uvector contains(table_view const& haystack, auto const d_hasher = hasher_adapter{d_haystack_hasher, d_needle_hasher}; using hasher_type = decltype(d_hasher); - auto const self_comp = cudf::experimental::row::equality::self_comparator(preprocessed_haystack); - auto const two_table_comp = cudf::experimental::row::equality::two_table_comparator( + auto const self_equal = cudf::experimental::row::equality::self_comparator(preprocessed_haystack); + auto const two_table_equal = cudf::experimental::row::equality::two_table_comparator( preprocessed_haystack, preprocessed_needles); // The output vector. @@ -265,8 +265,8 @@ rmm::device_uvector contains(table_view const& haystack, compare_nans, haystack_has_nulls, has_any_nulls, - self_comp, - two_table_comp, + self_equal, + two_table_equal, helper_func); } else { if (cudf::detail::has_nested_columns(needles)) { @@ -274,16 +274,16 @@ rmm::device_uvector contains(table_view const& haystack, compare_nans, haystack_has_nulls, has_any_nulls, - self_comp, - two_table_comp, + self_equal, + two_table_equal, helper_func); } else { dispatch_nan_comparator(compare_nulls, compare_nans, haystack_has_nulls, has_any_nulls, - self_comp, - two_table_comp, + self_equal, + two_table_equal, helper_func); } } From 9c7f4f611a0722b24b4890a45a843c1bdfdcba5a Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Thu, 14 Sep 2023 13:46:26 -0700 Subject: [PATCH 06/16] Use build_row_bitmmask instead of bitmask_and --- cpp/src/search/contains_table.cu | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index 916b36cb354..46f5c85d639 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -102,7 +102,6 @@ struct comparator_adapter { * @param stream CUDA stream used for device memory operations and kernel launches * @return A pair of pointer to the output bitmask and the buffer containing the bitmask */ -/* std::pair build_row_bitmask(table_view const& input, rmm::cuda_stream_view stream) { @@ -123,9 +122,6 @@ std::pair build_row_bitmask(table_view return std::pair(rmm::device_buffer{0, stream}, nullable_columns.front().null_mask()); } -// TODO: To doublecheck, under no situation but here we do nested checks with -// `get_nullable_columns`. This seems wrong -*/ /** * @brief Invokes the given `func` with desired comparators based on the specified `compare_nans` @@ -234,24 +230,24 @@ rmm::device_uvector contains(table_view const& haystack, stream.value()}; if (haystack_has_nulls && compare_nulls == null_equality::UNEQUAL) { - auto const row_bitmask = - cudf::detail::bitmask_and(haystack, stream, rmm::mr::get_current_device_resource()).first; + auto const bitmask_buffer_and_ptr = build_row_bitmask(haystack, stream); +  auto const row_bitmask_ptr = bitmask_buffer_and_ptr.second; set.insert_if_async(haystack_iter, haystack_iter + haystack.num_rows(), thrust::counting_iterator(0), // stencil - row_is_valid{reinterpret_cast(row_bitmask.data())}, + row_is_valid{row_bitmask_ptr}, stream.value()); } else { set.insert_async(haystack_iter, haystack_iter + haystack.num_rows(), stream.value()); } if (needles_has_nulls && compare_nulls == null_equality::UNEQUAL) { - auto const row_bitmask = - cudf::detail::bitmask_and(needles, stream, rmm::mr::get_current_device_resource()).first; + auto const bitmask_buffer_and_ptr = build_row_bitmask(needles, stream); +  auto const row_bitmask_ptr = bitmask_buffer_and_ptr.second; set.contains_if_async(needles_iter, needles_iter + needles.num_rows(), thrust::counting_iterator(0), // stencil - row_is_valid{reinterpret_cast(row_bitmask.data())}, + row_is_valid{row_bitmask_ptr}, contained.begin(), stream.value()); } else { From 237fd70b520532e5b587431477921cfac6a736c4 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Thu, 14 Sep 2023 14:20:19 -0700 Subject: [PATCH 07/16] Code formatting --- cpp/src/search/contains_table.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index 46f5c85d639..2ec4012111b 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -231,7 +231,7 @@ rmm::device_uvector contains(table_view const& haystack, if (haystack_has_nulls && compare_nulls == null_equality::UNEQUAL) { auto const bitmask_buffer_and_ptr = build_row_bitmask(haystack, stream); -  auto const row_bitmask_ptr = bitmask_buffer_and_ptr.second; + auto const row_bitmask_ptr = bitmask_buffer_and_ptr.second; set.insert_if_async(haystack_iter, haystack_iter + haystack.num_rows(), thrust::counting_iterator(0), // stencil @@ -243,7 +243,7 @@ rmm::device_uvector contains(table_view const& haystack, if (needles_has_nulls && compare_nulls == null_equality::UNEQUAL) { auto const bitmask_buffer_and_ptr = build_row_bitmask(needles, stream); -  auto const row_bitmask_ptr = bitmask_buffer_and_ptr.second; + auto const row_bitmask_ptr = bitmask_buffer_and_ptr.second; set.contains_if_async(needles_iter, needles_iter + needles.num_rows(), thrust::counting_iterator(0), // stencil From 0a8b678fbe4356124d5cc993e9313b4c12b5123e Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Mon, 18 Sep 2023 10:12:08 -0700 Subject: [PATCH 08/16] Add comments back --- cpp/src/search/contains_table.cu | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index 2ec4012111b..2a214590153 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -232,6 +232,11 @@ rmm::device_uvector contains(table_view const& haystack, if (haystack_has_nulls && compare_nulls == null_equality::UNEQUAL) { auto const bitmask_buffer_and_ptr = build_row_bitmask(haystack, stream); auto const row_bitmask_ptr = bitmask_buffer_and_ptr.second; + + // If the haystack table has nulls but they are compared unequal, don't insert them. + // Otherwise, it was known to cause performance issue: + // - https://github.com/rapidsai/cudf/pull/6943 + // - https://github.com/rapidsai/cudf/pull/8277 set.insert_if_async(haystack_iter, haystack_iter + haystack.num_rows(), thrust::counting_iterator(0), // stencil From eca017fe8403cc2b743d081999abde71e76e7d4a Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Mon, 18 Sep 2023 16:18:22 -0700 Subject: [PATCH 09/16] Rename contains benchmark file as contains_scalar --- cpp/benchmarks/CMakeLists.txt | 2 +- cpp/benchmarks/search/{contains.cpp => contains_scalar.cpp} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename cpp/benchmarks/search/{contains.cpp => contains_scalar.cpp} (100%) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 5e7862f4b3b..0279b3af529 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -173,7 +173,7 @@ ConfigureBench(ITERATOR_BENCH iterator/iterator.cu) # ################################################################################################## # * search benchmark ------------------------------------------------------------------------------ ConfigureBench(SEARCH_BENCH search/search.cpp) -ConfigureNVBench(SEARCH_NVBENCH search/contains.cpp) +ConfigureNVBench(SEARCH_NVBENCH search/contains_scalar.cpp) # ################################################################################################## # * sort benchmark -------------------------------------------------------------------------------- diff --git a/cpp/benchmarks/search/contains.cpp b/cpp/benchmarks/search/contains_scalar.cpp similarity index 100% rename from cpp/benchmarks/search/contains.cpp rename to cpp/benchmarks/search/contains_scalar.cpp From fc0980fc19202a9661ff476d5ddad53a4cd9894e Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Mon, 18 Sep 2023 16:56:09 -0700 Subject: [PATCH 10/16] Add contains_table benchmark --- cpp/benchmarks/CMakeLists.txt | 2 +- cpp/benchmarks/search/contains_table.cpp | 67 ++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 cpp/benchmarks/search/contains_table.cpp diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 0279b3af529..cd6b3cfdc03 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -173,7 +173,7 @@ ConfigureBench(ITERATOR_BENCH iterator/iterator.cu) # ################################################################################################## # * search benchmark ------------------------------------------------------------------------------ ConfigureBench(SEARCH_BENCH search/search.cpp) -ConfigureNVBench(SEARCH_NVBENCH search/contains_scalar.cpp) +ConfigureNVBench(SEARCH_NVBENCH search/contains_scalar.cpp search/contains_table.cpp) # ################################################################################################## # * sort benchmark -------------------------------------------------------------------------------- diff --git a/cpp/benchmarks/search/contains_table.cpp b/cpp/benchmarks/search/contains_table.cpp new file mode 100644 index 00000000000..256c2396c96 --- /dev/null +++ b/cpp/benchmarks/search/contains_table.cpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +#include + +#include + +auto constexpr num_unique_elements = 1000; + +template +static void nvbench_contains_table(nvbench::state& state, nvbench::type_list) +{ + auto const size = state.get_int64("table_size"); + auto const dtype = cudf::type_to_id(); + double const null_probability = state.get_float64("null_probability"); + + auto builder = data_profile_builder().null_probability(null_probability); + if (dtype == cudf::type_id::LIST) { + builder.distribution(dtype, distribution_id::UNIFORM, 0, num_unique_elements) + .distribution(cudf::type_id::INT32, distribution_id::UNIFORM, 0, num_unique_elements) + .list_depth(1); + } else { + builder.distribution(dtype, distribution_id::UNIFORM, 0, num_unique_elements); + } + + auto const haystack = create_random_table( + {dtype}, table_size_bytes{static_cast(size)}, data_profile{builder}, 0); + auto const needles = create_random_table( + {dtype}, table_size_bytes{static_cast(size)}, data_profile{builder}, 1); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto const stream_view = rmm::cuda_stream_view{launch.get_stream()}; + [[maybe_unused]] auto const result = + cudf::detail::contains(haystack->view(), + needles->view(), + cudf::null_equality::EQUAL, + cudf::nan_equality::ALL_EQUAL, + stream_view, + rmm::mr::get_current_device_resource()); + }); +} + +NVBENCH_BENCH_TYPES(nvbench_contains_table, + NVBENCH_TYPE_AXES(nvbench::type_list)) + .set_name("contains_table") + .set_type_axes_names({"type"}) + .add_float64_axis("null_probability", {0.0, 0.1}) + .add_int64_axis("table_size", {10'000, 100'000, 1'000'000, 10'000'000}); From b2934e56f16f505374afef17db07b5a99f2131d9 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Mon, 18 Sep 2023 17:30:04 -0700 Subject: [PATCH 11/16] Add peak memory usage in contains_table benchmark --- cpp/benchmarks/search/contains_table.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cpp/benchmarks/search/contains_table.cpp b/cpp/benchmarks/search/contains_table.cpp index 256c2396c96..17702d0741c 100644 --- a/cpp/benchmarks/search/contains_table.cpp +++ b/cpp/benchmarks/search/contains_table.cpp @@ -15,6 +15,7 @@ */ #include +#include #include #include @@ -47,6 +48,8 @@ static void nvbench_contains_table(nvbench::state& state, nvbench::type_list(size)}, data_profile{builder}, 1); + auto mem_stats_logger = cudf::memory_stats_logger(); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { auto const stream_view = rmm::cuda_stream_view{launch.get_stream()}; [[maybe_unused]] auto const result = @@ -57,6 +60,9 @@ static void nvbench_contains_table(nvbench::state& state, nvbench::type_list Date: Tue, 19 Sep 2023 09:34:47 -0700 Subject: [PATCH 12/16] Minor doc cleanups --- cpp/src/search/contains_table.cu | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index 2a214590153..162b78e679f 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -125,14 +125,12 @@ std::pair build_row_bitmask(table_view /** * @brief Invokes the given `func` with desired comparators based on the specified `compare_nans` - parameter. + * parameter * * @tparam HaystackHasNested Flag indicating whether there are nested columns in haystack * @tparam HasAnyNested Flag indicating whether there are nested columns in either haystack or - needles + * needles * @tparam Func Type of the helper function doing `contains` check - - * @tparam haystack_has_nulls Flag indicating whether haystack has nulls or not * * @param compare_nulls Control whether nulls should be compared as equal or not * @param compare_nans Control whether floating-point NaNs values should be compared as equal or not From 9553104fd39cb93b89b9fd0a3df9a9129a6ab67c Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Tue, 19 Sep 2023 17:57:48 -0700 Subject: [PATCH 13/16] Distinguish probing scheme CG sizes between nested and flat types for better performance --- cpp/src/search/contains_table.cu | 101 ++++++++++++++++++------------- 1 file changed, 58 insertions(+), 43 deletions(-) diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index 162b78e679f..810da88c162 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -130,6 +130,7 @@ std::pair build_row_bitmask(table_view * @tparam HaystackHasNested Flag indicating whether there are nested columns in haystack * @tparam HasAnyNested Flag indicating whether there are nested columns in either haystack or * needles + * @tparam Hasher Type of device hash function * @tparam Func Type of the helper function doing `contains` check * * @param compare_nulls Control whether nulls should be compared as equal or not @@ -138,9 +139,10 @@ std::pair build_row_bitmask(table_view * @param has_any_nulls Flag indicating whether there are nested nulls is either haystack or needles * @param self_equal Self table comparator * @param two_table_equal Two table comparator + * @param d_hasher Device hash functor * @param func The input functor to invoke */ -template +template void dispatch_nan_comparator( null_equality compare_nulls, nan_equality compare_nans, @@ -148,8 +150,18 @@ void dispatch_nan_comparator( bool has_any_nulls, cudf::experimental::row::equality::self_comparator self_equal, cudf::experimental::row::equality::two_table_comparator two_table_equal, + Hasher const& d_hasher, Func&& func) { + // Distinguish probing scheme CG sizes between nested and flat types for better performance + auto const probing_scheme = [&]() { + if constexpr (HaystackHasNested) { + return cuco::experimental::linear_probing<4, Hasher>{d_hasher}; + } else { + return cuco::experimental::linear_probing<1, Hasher>{d_hasher}; + } + }(); + if (compare_nans == nan_equality::ALL_EQUAL) { using nan_equal_comparator = cudf::experimental::row::equality::nan_equal_physical_equality_comparator; @@ -157,14 +169,14 @@ void dispatch_nan_comparator( nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, nan_equal_comparator{}); auto const d_two_table_equal = two_table_equal.equal_to( nullate::DYNAMIC{has_any_nulls}, compare_nulls, nan_equal_comparator{}); - func(d_self_equal, d_two_table_equal); + func(d_self_equal, d_two_table_equal, probing_scheme); } else { using nan_unequal_comparator = cudf::experimental::row::equality::physical_equality_comparator; auto const d_self_equal = self_equal.equal_to( nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, nan_unequal_comparator{}); auto const d_two_table_equal = two_table_equal.equal_to( nullate::DYNAMIC{has_any_nulls}, compare_nulls, nan_unequal_comparator{}); - func(d_self_equal, d_two_table_equal); + func(d_self_equal, d_two_table_equal, probing_scheme); } } @@ -202,7 +214,6 @@ rmm::device_uvector contains(table_view const& haystack, auto const needle_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_needles); auto const d_needle_hasher = needle_hasher.device_hasher(nullate::DYNAMIC{has_any_nulls}); auto const d_hasher = hasher_adapter{d_haystack_hasher, d_needle_hasher}; - using hasher_type = decltype(d_hasher); auto const self_equal = cudf::experimental::row::equality::self_comparator(preprocessed_haystack); auto const two_table_equal = cudf::experimental::row::equality::two_table_comparator( @@ -216,48 +227,49 @@ rmm::device_uvector contains(table_view const& haystack, auto const needles_iter = cudf::detail::make_counting_transform_iterator( size_type{0}, [] __device__(auto idx) { return rhs_index_type{idx}; }); - auto const helper_func = [&](auto const& d_self_equal, auto const& d_two_table_equal) { - auto const d_equal = comparator_adapter{d_self_equal, d_two_table_equal}; - - auto set = cuco::experimental::static_set{ - cuco::experimental::extent{compute_hash_table_size(haystack.num_rows())}, - cuco::empty_key{lhs_index_type{-1}}, - d_equal, - cuco::experimental::linear_probing<1, hasher_type>{d_hasher}, - detail::hash_table_allocator_type{default_allocator{}, stream}, - stream.value()}; - - if (haystack_has_nulls && compare_nulls == null_equality::UNEQUAL) { - auto const bitmask_buffer_and_ptr = build_row_bitmask(haystack, stream); - auto const row_bitmask_ptr = bitmask_buffer_and_ptr.second; - - // If the haystack table has nulls but they are compared unequal, don't insert them. - // Otherwise, it was known to cause performance issue: - // - https://github.com/rapidsai/cudf/pull/6943 - // - https://github.com/rapidsai/cudf/pull/8277 - set.insert_if_async(haystack_iter, - haystack_iter + haystack.num_rows(), - thrust::counting_iterator(0), // stencil - row_is_valid{row_bitmask_ptr}, - stream.value()); - } else { - set.insert_async(haystack_iter, haystack_iter + haystack.num_rows(), stream.value()); - } - - if (needles_has_nulls && compare_nulls == null_equality::UNEQUAL) { - auto const bitmask_buffer_and_ptr = build_row_bitmask(needles, stream); - auto const row_bitmask_ptr = bitmask_buffer_and_ptr.second; - set.contains_if_async(needles_iter, - needles_iter + needles.num_rows(), + auto const helper_func = + [&](auto const& d_self_equal, auto const& d_two_table_equal, auto const& probing_scheme) { + auto const d_equal = comparator_adapter{d_self_equal, d_two_table_equal}; + + auto set = cuco::experimental::static_set{ + cuco::experimental::extent{compute_hash_table_size(haystack.num_rows())}, + cuco::empty_key{lhs_index_type{-1}}, + d_equal, + probing_scheme, + detail::hash_table_allocator_type{default_allocator{}, stream}, + stream.value()}; + + if (haystack_has_nulls && compare_nulls == null_equality::UNEQUAL) { + auto const bitmask_buffer_and_ptr = build_row_bitmask(haystack, stream); + auto const row_bitmask_ptr = bitmask_buffer_and_ptr.second; + + // If the haystack table has nulls but they are compared unequal, don't insert them. + // Otherwise, it was known to cause performance issue: + // - https://github.com/rapidsai/cudf/pull/6943 + // - https://github.com/rapidsai/cudf/pull/8277 + set.insert_if_async(haystack_iter, + haystack_iter + haystack.num_rows(), thrust::counting_iterator(0), // stencil row_is_valid{row_bitmask_ptr}, - contained.begin(), stream.value()); - } else { - set.contains_async( - needles_iter, needles_iter + needles.num_rows(), contained.begin(), stream.value()); - } - }; + } else { + set.insert_async(haystack_iter, haystack_iter + haystack.num_rows(), stream.value()); + } + + if (needles_has_nulls && compare_nulls == null_equality::UNEQUAL) { + auto const bitmask_buffer_and_ptr = build_row_bitmask(needles, stream); + auto const row_bitmask_ptr = bitmask_buffer_and_ptr.second; + set.contains_if_async(needles_iter, + needles_iter + needles.num_rows(), + thrust::counting_iterator(0), // stencil + row_is_valid{row_bitmask_ptr}, + contained.begin(), + stream.value()); + } else { + set.contains_async( + needles_iter, needles_iter + needles.num_rows(), contained.begin(), stream.value()); + } + }; if (cudf::detail::has_nested_columns(haystack)) { dispatch_nan_comparator(compare_nulls, @@ -266,6 +278,7 @@ rmm::device_uvector contains(table_view const& haystack, has_any_nulls, self_equal, two_table_equal, + d_hasher, helper_func); } else { if (cudf::detail::has_nested_columns(needles)) { @@ -275,6 +288,7 @@ rmm::device_uvector contains(table_view const& haystack, has_any_nulls, self_equal, two_table_equal, + d_hasher, helper_func); } else { dispatch_nan_comparator(compare_nulls, @@ -283,6 +297,7 @@ rmm::device_uvector contains(table_view const& haystack, has_any_nulls, self_equal, two_table_equal, + d_hasher, helper_func); } } From e1125c3ecd7cb293fa95888b707d8a7ba8b06121 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Fri, 22 Sep 2023 09:32:10 -0700 Subject: [PATCH 14/16] Remove redundant docs --- cpp/src/search/contains_table.cu | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index 810da88c162..44f0370a179 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -182,17 +182,6 @@ void dispatch_nan_comparator( } // namespace -/** - * @brief Check if rows in the given `needles` table exist in the `haystack` table. - * - * @param haystack The table containing the search space - * @param needles A table of rows whose existence to check in the search space - * @param compare_nulls Control whether nulls should be compared as equal or not - * @param compare_nans Control whether floating-point NaNs values should be compared as equal or not - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned vector - * @return A vector of bools indicating if each row in `needles` has matching rows in `haystack` - */ rmm::device_uvector contains(table_view const& haystack, table_view const& needles, null_equality compare_nulls, From 37f7048d856326ad061a5246f51d8ef4cf59285d Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Fri, 22 Sep 2023 09:36:45 -0700 Subject: [PATCH 15/16] Throw if needles and haystack column types mismatch --- cpp/include/cudf/detail/search.hpp | 2 ++ cpp/src/search/contains_table.cu | 2 ++ 2 files changed, 4 insertions(+) diff --git a/cpp/include/cudf/detail/search.hpp b/cpp/include/cudf/detail/search.hpp index 4c4ad7834f4..4277baf3edd 100644 --- a/cpp/include/cudf/detail/search.hpp +++ b/cpp/include/cudf/detail/search.hpp @@ -81,6 +81,8 @@ std::unique_ptr contains(column_view const& haystack, * output = { false, true, true } * @endcode * + * @throws cudf::logic_error If column types of haystack and needles don't match + * * @param haystack The table containing the search space * @param needles A table of rows whose existence to check in the search space * @param compare_nulls Control whether nulls should be compared as equal or not diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index 44f0370a179..20febb53224 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -189,6 +189,8 @@ rmm::device_uvector contains(table_view const& haystack, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + CUDF_EXPECTS(cudf::have_same_types(haystack, needles), "Column types mismatch"); + auto const haystack_has_nulls = has_nested_nulls(haystack); auto const needles_has_nulls = has_nested_nulls(needles); auto const has_any_nulls = haystack_has_nulls || needles_has_nulls; From 4f6af5de8070b2cf34b2a21196adb404a6d66721 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Fri, 22 Sep 2023 09:49:17 -0700 Subject: [PATCH 16/16] Simplify nested column handling --- cpp/src/search/contains_table.cu | 59 +++++++++++++------------------- 1 file changed, 23 insertions(+), 36 deletions(-) diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index 20febb53224..43624ba691d 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -127,9 +127,7 @@ std::pair build_row_bitmask(table_view * @brief Invokes the given `func` with desired comparators based on the specified `compare_nans` * parameter * - * @tparam HaystackHasNested Flag indicating whether there are nested columns in haystack - * @tparam HasAnyNested Flag indicating whether there are nested columns in either haystack or - * needles + * @tparam HasNested Flag indicating whether there are nested columns in haystack or needles * @tparam Hasher Type of device hash function * @tparam Func Type of the helper function doing `contains` check * @@ -142,7 +140,7 @@ std::pair build_row_bitmask(table_view * @param d_hasher Device hash functor * @param func The input functor to invoke */ -template +template void dispatch_nan_comparator( null_equality compare_nulls, nan_equality compare_nans, @@ -155,7 +153,7 @@ void dispatch_nan_comparator( { // Distinguish probing scheme CG sizes between nested and flat types for better performance auto const probing_scheme = [&]() { - if constexpr (HaystackHasNested) { + if constexpr (HasNested) { return cuco::experimental::linear_probing<4, Hasher>{d_hasher}; } else { return cuco::experimental::linear_probing<1, Hasher>{d_hasher}; @@ -165,16 +163,16 @@ void dispatch_nan_comparator( if (compare_nans == nan_equality::ALL_EQUAL) { using nan_equal_comparator = cudf::experimental::row::equality::nan_equal_physical_equality_comparator; - auto const d_self_equal = self_equal.equal_to( + auto const d_self_equal = self_equal.equal_to( nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, nan_equal_comparator{}); - auto const d_two_table_equal = two_table_equal.equal_to( + auto const d_two_table_equal = two_table_equal.equal_to( nullate::DYNAMIC{has_any_nulls}, compare_nulls, nan_equal_comparator{}); func(d_self_equal, d_two_table_equal, probing_scheme); } else { using nan_unequal_comparator = cudf::experimental::row::equality::physical_equality_comparator; - auto const d_self_equal = self_equal.equal_to( + auto const d_self_equal = self_equal.equal_to( nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, nan_unequal_comparator{}); - auto const d_two_table_equal = two_table_equal.equal_to( + auto const d_two_table_equal = two_table_equal.equal_to( nullate::DYNAMIC{has_any_nulls}, compare_nulls, nan_unequal_comparator{}); func(d_self_equal, d_two_table_equal, probing_scheme); } @@ -263,34 +261,23 @@ rmm::device_uvector contains(table_view const& haystack, }; if (cudf::detail::has_nested_columns(haystack)) { - dispatch_nan_comparator(compare_nulls, - compare_nans, - haystack_has_nulls, - has_any_nulls, - self_equal, - two_table_equal, - d_hasher, - helper_func); + dispatch_nan_comparator(compare_nulls, + compare_nans, + haystack_has_nulls, + has_any_nulls, + self_equal, + two_table_equal, + d_hasher, + helper_func); } else { - if (cudf::detail::has_nested_columns(needles)) { - dispatch_nan_comparator(compare_nulls, - compare_nans, - haystack_has_nulls, - has_any_nulls, - self_equal, - two_table_equal, - d_hasher, - helper_func); - } else { - dispatch_nan_comparator(compare_nulls, - compare_nans, - haystack_has_nulls, - has_any_nulls, - self_equal, - two_table_equal, - d_hasher, - helper_func); - } + dispatch_nan_comparator(compare_nulls, + compare_nans, + haystack_has_nulls, + has_any_nulls, + self_equal, + two_table_equal, + d_hasher, + helper_func); } return contained;