diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index f8074711b88..005792d187f 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -53,7 +53,6 @@ requirements:
     - cython >=0.29,<0.30
     - scikit-build >=0.13.1
     - setuptools
-    - numba >=0.56.4,<0.57
     - dlpack >=0.5,<0.6.0a0
     - pyarrow =11
     - libcudf ={{ version }}
@@ -69,7 +68,6 @@ requirements:
     - numpy >=1.21,<1.24  # Temporarily upper bound numpy to avoid overflow deprecations
     - {{ pin_compatible('pyarrow', max_pin='x.x.x') }}
     - libcudf {{ version }}
-    - fastavro >=0.22.0
     - {{ pin_compatible('rmm', max_pin='x.x') }}
     - fsspec >=0.6.0
     - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}
diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index bdc72cc4535..95f0db895a8 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -153,8 +153,12 @@ ConfigureBench(APPLY_BOOLEAN_MASK_BENCH stream_compaction/apply_boolean_mask.cpp
 # ##################################################################################################
 # * stream_compaction benchmark -------------------------------------------------------------------
 ConfigureNVBench(
-  STREAM_COMPACTION_NVBENCH stream_compaction/distinct.cpp stream_compaction/distinct_count.cpp
-  stream_compaction/unique.cpp stream_compaction/unique_count.cpp
+  STREAM_COMPACTION_NVBENCH
+  stream_compaction/distinct.cpp
+  stream_compaction/distinct_count.cpp
+  stream_compaction/stable_distinct.cpp
+  stream_compaction/unique.cpp
+  stream_compaction/unique_count.cpp
 )
 
 # ##################################################################################################
diff --git a/cpp/benchmarks/stream_compaction/stable_distinct.cpp b/cpp/benchmarks/stream_compaction/stable_distinct.cpp
new file mode 100644
index 00000000000..6a9542c83a6
--- /dev/null
+++ b/cpp/benchmarks/stream_compaction/stable_distinct.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/rmm_pool_raii.hpp>
+
+#include <cudf/column/column_view.hpp>
+#include <cudf/lists/list_view.hpp>
+#include <cudf/stream_compaction.hpp>
+#include <cudf/types.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ms, "cudf::timestamp_ms", "cudf::timestamp_ms");
+
+template <typename Type>
+void nvbench_stable_distinct(nvbench::state& state, nvbench::type_list<Type>)
+{
+  cudf::size_type const num_rows = state.get_int64("NumRows");
+
+  data_profile profile = data_profile_builder().cardinality(0).null_probability(0.01).distribution(
+    cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, 100);
+
+  auto source_column = create_random_column(cudf::type_to_id<Type>(), row_count{num_rows}, profile);
+
+  auto input_column = source_column->view();
+  auto input_table  = cudf::table_view({input_column, input_column, input_column, input_column});
+
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    auto result = cudf::stable_distinct(input_table,
+                                        {0},
+                                        cudf::duplicate_keep_option::KEEP_ANY,
+                                        cudf::null_equality::EQUAL,
+                                        cudf::nan_equality::ALL_EQUAL);
+  });
+}
+
+using data_type = nvbench::type_list<bool, int8_t, int32_t, int64_t, float, cudf::timestamp_ms>;
+
+NVBENCH_BENCH_TYPES(nvbench_stable_distinct, NVBENCH_TYPE_AXES(data_type))
+  .set_name("stable_distinct")
+  .set_type_axes_names({"Type"})
+  .add_int64_axis("NumRows", {10'000, 100'000, 1'000'000, 10'000'000});
+
+template <typename Type>
+void nvbench_stable_distinct_list(nvbench::state& state, nvbench::type_list<Type>)
+{
+  auto const size               = state.get_int64("ColumnSize");
+  auto const dtype              = cudf::type_to_id<Type>();
+  double const null_probability = state.get_float64("null_probability");
+
+  auto builder = data_profile_builder().null_probability(null_probability);
+  if (dtype == cudf::type_id::LIST) {
+    builder.distribution(dtype, distribution_id::UNIFORM, 0, 4)
+      .distribution(cudf::type_id::INT32, distribution_id::UNIFORM, 0, 4)
+      .list_depth(1);
+  } else {
+    // We're comparing stable_distinct() on a non-nested column to that on a list column with the
+    // same number of stable_distinct rows. The max list size is 4 and the number of distinct values
+    // in the list's child is 5. So the number of distinct rows in the list = 1 + 5 + 5^2 + 5^3 +
+    // 5^4 = 781 We want this column to also have 781 distinct values.
+    builder.distribution(dtype, distribution_id::UNIFORM, 0, 781);
+  }
+
+  auto const table = create_random_table(
+    {dtype}, table_size_bytes{static_cast<size_t>(size)}, data_profile{builder}, 0);
+
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    auto result = cudf::stable_distinct(*table,
+                                        {0},
+                                        cudf::duplicate_keep_option::KEEP_ANY,
+                                        cudf::null_equality::EQUAL,
+                                        cudf::nan_equality::ALL_EQUAL);
+  });
+}
+
+NVBENCH_BENCH_TYPES(nvbench_stable_distinct_list,
+                    NVBENCH_TYPE_AXES(nvbench::type_list<int32_t, cudf::list_view>))
+  .set_name("stable_distinct_list")
+  .set_type_axes_names({"Type"})
+  .add_float64_axis("null_probability", {0.0, 0.1})
+  .add_int64_axis("ColumnSize", {100'000'000});
diff --git a/cpp/include/cudf/detail/stream_compaction.hpp b/cpp/include/cudf/detail/stream_compaction.hpp
index e0fc7b71cd9..5476000fc29 100644
--- a/cpp/include/cudf/detail/stream_compaction.hpp
+++ b/cpp/include/cudf/detail/stream_compaction.hpp
@@ -86,24 +86,9 @@ std::unique_ptr<table> distinct(table_view const& input,
                                 rmm::mr::device_memory_resource* mr);
 
 /**
- * @brief Create a new table without duplicate rows.
+ * @copydoc cudf::stable_distinct
  *
- * Given an `input` table_view, each row is copied to the output table to create a set of distinct
- * rows. The row order is guaranteed to be preserved as in the input.
- *
- * If there are duplicate rows, which row to be copied depends on the specified value of the `keep`
- * parameter.
- *
- * This API produces exactly the same set of output rows as `cudf::distinct`.
- *
- * @param input The input table
- * @param keys Vector of indices indicating key columns in the `input` table
- * @param keep Copy any, first, last, or none of the found duplicates
- * @param nulls_equal Flag to specify whether null elements should be considered as equal
- * @param nans_equal Flag to specify whether NaN elements should be considered as equal
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Device memory resource used to allocate the returned table
- * @return A table containing the resulting distinct rows
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<table> stable_distinct(table_view const& input,
                                        std::vector<size_type> const& keys,
diff --git a/cpp/include/cudf/stream_compaction.hpp b/cpp/include/cudf/stream_compaction.hpp
index e2a6b97256f..984e3037cd1 100644
--- a/cpp/include/cudf/stream_compaction.hpp
+++ b/cpp/include/cudf/stream_compaction.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -254,22 +254,19 @@ std::unique_ptr<table> unique(
  * @brief Create a new table without duplicate rows.
  *
  * Given an `input` table_view, each row is copied to the output table to create a set of distinct
- * rows. If there are duplicate rows, which row to be copied depends on the specified value of
- * the `keep` parameter.
+ * rows. If there are duplicate rows, which row is copied depends on the `keep` parameter.
  *
  * The order of rows in the output table is not specified.
  *
  * Performance hint: if the input is pre-sorted, `cudf::unique` can produce an equivalent result
  * (i.e., same set of output rows) but with less running time than `cudf::distinct`.
  *
- * @param[in] input           input table_view to copy only distinct rows
- * @param[in] keys            vector of indices representing key columns from `input`
- * @param[in] keep            keep any, first, last, or none of the found duplicates
- * @param[in] nulls_equal     flag to control if nulls are compared equal or not
- * @param[in] nans_equal      flag to control if floating-point NaN values are compared equal or not
- * @param[in] mr              Device memory resource used to allocate the returned table's device
- *                            memory
- *
+ * @param input The input table
+ * @param keys Vector of indices indicating key columns in the `input` table
+ * @param keep Copy any, first, last, or none of the found duplicates
+ * @param nulls_equal Flag to specify whether null elements should be considered as equal
+ * @param nans_equal Flag to specify whether NaN elements should be considered as equal
+ * @param mr Device memory resource used to allocate the returned table
  * @return Table with distinct rows in an unspecified order
  */
 std::unique_ptr<table> distinct(
@@ -280,6 +277,36 @@ std::unique_ptr<table> distinct(
   nan_equality nans_equal             = nan_equality::ALL_EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Create a new table without duplicate rows, preserving input order.
+ *
+ * Given an `input` table_view, each row is copied to the output table to create a set of distinct
+ * rows. The input row order is preserved. If there are duplicate rows, which row is copied depends
+ * on the `keep` parameter.
+ *
+ * This API produces the same output rows as `cudf::distinct`, but with input order preserved.
+ *
+ * Note that when `keep` is `KEEP_ANY`, the choice of which duplicate row to keep is arbitrary, but
+ * the returned table will retain the input order. That is, if the key column contained `1, 2, 1`
+ * with another values column `3, 4, 5`, the result could contain values `3, 4` or `4, 5` but not
+ * `4, 3` or `5, 4`.
+ *
+ * @param input The input table
+ * @param keys Vector of indices indicating key columns in the `input` table
+ * @param keep Copy any, first, last, or none of the found duplicates
+ * @param nulls_equal Flag to specify whether null elements should be considered as equal
+ * @param nans_equal Flag to specify whether NaN elements should be considered as equal
+ * @param mr Device memory resource used to allocate the returned table
+ * @return Table with distinct rows, preserving input order
+ */
+std::unique_ptr<table> stable_distinct(
+  table_view const& input,
+  std::vector<size_type> const& keys,
+  duplicate_keep_option keep          = duplicate_keep_option::KEEP_ANY,
+  null_equality nulls_equal           = null_equality::EQUAL,
+  nan_equality nans_equal             = nan_equality::ALL_EQUAL,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @brief Count the number of consecutive groups of equivalent rows in a column.
  *
diff --git a/cpp/src/io/utilities/data_sink.cpp b/cpp/src/io/utilities/data_sink.cpp
index 40b70986eca..f8d8702d1ca 100644
--- a/cpp/src/io/utilities/data_sink.cpp
+++ b/cpp/src/io/utilities/data_sink.cpp
@@ -38,6 +38,8 @@ class file_sink : public data_sink {
 
     if (detail::cufile_integration::is_kvikio_enabled()) {
       _kvikio_file = kvikio::FileHandle(filepath, "w");
+      CUDF_LOG_INFO("Writing a file using kvikIO, with compatibility mode {}.",
+                    _kvikio_file.is_compat_mode_on() ? "on" : "off");
     } else {
       _cufile_out = detail::make_cufile_output(filepath);
     }
diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp
index e2cea7a56ff..a457a803e3c 100644
--- a/cpp/src/io/utilities/datasource.cpp
+++ b/cpp/src/io/utilities/datasource.cpp
@@ -42,6 +42,8 @@ class file_source : public datasource {
   {
     if (detail::cufile_integration::is_kvikio_enabled()) {
       _kvikio_file = kvikio::FileHandle(filepath);
+      CUDF_LOG_INFO("Reading a file using kvikIO, with compatibility mode {}.",
+                    _kvikio_file.is_compat_mode_on() ? "on" : "off");
     } else {
       _cufile_in = detail::make_cufile_input(filepath);
     }
diff --git a/cpp/src/stream_compaction/stable_distinct.cu b/cpp/src/stream_compaction/stable_distinct.cu
index d45897930b0..45a2de9288b 100644
--- a/cpp/src/stream_compaction/stable_distinct.cu
+++ b/cpp/src/stream_compaction/stable_distinct.cu
@@ -19,12 +19,14 @@
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
+#include <cudf/utilities/span.hpp>
 
 #include <thrust/iterator/constant_iterator.h>
 #include <thrust/scatter.h>
 #include <thrust/uninitialized_fill.h>
 
-namespace cudf::detail {
+namespace cudf {
+namespace detail {
 
 std::unique_ptr<table> stable_distinct(table_view const& input,
                                        std::vector<size_type> const& keys,
@@ -45,7 +47,13 @@ std::unique_ptr<table> stable_distinct(table_view const& input,
                                                      stream,
                                                      rmm::mr::get_current_device_resource());
 
-  // Markers to denote which rows to be copied to the output.
+  // The only difference between this implementation and the unstable version
+  // is that the stable implementation must retain the input order. The
+  // distinct indices are not sorted, so we cannot simply copy the rows in the
+  // order of the distinct indices and retain the input order. Instead, we use
+  // a boolean mask to indicate which rows to copy to the output. This avoids
+  // the need to sort the distinct indices, which is slower.
+
   auto const output_markers = [&] {
     auto markers = rmm::device_uvector<bool>(input.num_rows(), stream);
     thrust::uninitialized_fill(rmm::exec_policy(stream), markers.begin(), markers.end(), false);
@@ -58,13 +66,22 @@ std::unique_ptr<table> stable_distinct(table_view const& input,
     return markers;
   }();
 
-  return cudf::detail::copy_if(
-    input,
-    [output_markers = output_markers.begin()] __device__(auto const idx) {
-      return *(output_markers + idx);
-    },
-    stream,
-    mr);
+  return cudf::detail::apply_boolean_mask(
+    input, cudf::device_span<bool const>(output_markers), stream, mr);
+}
+
+}  // namespace detail
+
+std::unique_ptr<table> stable_distinct(table_view const& input,
+                                       std::vector<size_type> const& keys,
+                                       duplicate_keep_option keep,
+                                       null_equality nulls_equal,
+                                       nan_equality nans_equal,
+                                       rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::stable_distinct(
+    input, keys, keep, nulls_equal, nans_equal, cudf::get_default_stream(), mr);
 }
 
-}  // namespace cudf::detail
+}  // namespace cudf
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 7f2807fc30e..1262e065041 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -401,8 +401,9 @@ ConfigureTest(
   stream_compaction/apply_boolean_mask_tests.cpp
   stream_compaction/distinct_count_tests.cpp
   stream_compaction/distinct_tests.cpp
-  stream_compaction/drop_nulls_tests.cpp
   stream_compaction/drop_nans_tests.cpp
+  stream_compaction/drop_nulls_tests.cpp
+  stream_compaction/stable_distinct_tests.cpp
   stream_compaction/unique_count_tests.cpp
   stream_compaction/unique_tests.cpp
 )
diff --git a/cpp/tests/stream_compaction/distinct_tests.cpp b/cpp/tests/stream_compaction/distinct_tests.cpp
index 85955ce7fc9..586792b4b30 100644
--- a/cpp/tests/stream_compaction/distinct_tests.cpp
+++ b/cpp/tests/stream_compaction/distinct_tests.cpp
@@ -126,7 +126,7 @@ TEST_F(DistinctKeepAny, EmptyInputTable)
 {
   int32s_col col(std::initializer_list<int32_t>{});
   cudf::table_view input{{col}};
-  std::vector<cudf::size_type> key_idx{1, 2};
+  std::vector<cudf::size_type> key_idx{0};
 
   auto got = cudf::distinct(input, key_idx, KEEP_ANY);
   CUDF_TEST_EXPECT_TABLES_EQUAL(input, got->view());
@@ -1217,11 +1217,11 @@ TEST_F(DistinctKeepAny, StructsOfStructs)
   // 8 |  { {2, 1}, 5}  |
 
   auto s1 = [&] {
-    auto a  = int32s_col{1, 1, XXX, XXX, 2, 1, 1, XXX, 2};
-    auto b  = int32s_col{1, 2, XXX, XXX, 2, 1, 1, XXX, 1};
+    auto a  = int32s_col{1, 1, XXX, XXX, XXX, XXX, 1, XXX, 2};
+    auto b  = int32s_col{1, 2, XXX, XXX, XXX, XXX, 1, XXX, 1};
     auto s2 = structs_col{{a, b}, nulls_at({2, 3, 7})};
 
-    auto c = int32s_col{5, 4, 6, 4, 3, 3, 5, 4, 5};
+    auto c = int32s_col{5, 4, 6, 4, XXX, XXX, 5, 4, 5};
     std::vector<std::unique_ptr<cudf::column>> s1_children;
     s1_children.emplace_back(s2.release());
     s1_children.emplace_back(c.release());
@@ -1270,11 +1270,11 @@ TEST_F(DistinctKeepAny, SlicedStructsOfStructs)
   // 8 |  { {2, 1}, 5}  |
 
   auto s1 = [&] {
-    auto a  = int32s_col{1, 1, 2, 2, 2, 1, 1, 1, 2};
-    auto b  = int32s_col{1, 2, 1, 2, 2, 1, 1, 1, 1};
+    auto a  = int32s_col{1, 1, XXX, XXX, XXX, XXX, 1, XXX, 2};
+    auto b  = int32s_col{1, 2, XXX, XXX, XXX, XXX, 1, XXX, 1};
     auto s2 = structs_col{{a, b}, nulls_at({2, 3, 7})};
 
-    auto c = int32s_col{5, 4, 6, 4, 3, 3, 5, 4, 5};
+    auto c = int32s_col{5, 4, 6, 4, XXX, XXX, 5, 4, 5};
     std::vector<std::unique_ptr<cudf::column>> s1_children;
     s1_children.emplace_back(s2.release());
     s1_children.emplace_back(c.release());
diff --git a/cpp/tests/stream_compaction/stable_distinct_tests.cpp b/cpp/tests/stream_compaction/stable_distinct_tests.cpp
new file mode 100644
index 00000000000..e28b96fc8be
--- /dev/null
+++ b/cpp/tests/stream_compaction/stable_distinct_tests.cpp
@@ -0,0 +1,1354 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/iterator_utilities.hpp>
+#include <cudf_test/table_utilities.hpp>
+
+#include <cudf/copying.hpp>
+#include <cudf/sorting.hpp>
+#include <cudf/stream_compaction.hpp>
+#include <cudf/table/table.hpp>
+#include <cudf/table/table_view.hpp>
+#include <cudf/types.hpp>
+
+#include <cmath>
+
+auto constexpr null{0};  // null at current level
+auto constexpr XXX{0};   // null pushed down from parent level
+auto constexpr NaN          = std::numeric_limits<double>::quiet_NaN();
+auto constexpr KEEP_ANY     = cudf::duplicate_keep_option::KEEP_ANY;
+auto constexpr KEEP_FIRST   = cudf::duplicate_keep_option::KEEP_FIRST;
+auto constexpr KEEP_LAST    = cudf::duplicate_keep_option::KEEP_LAST;
+auto constexpr KEEP_NONE    = cudf::duplicate_keep_option::KEEP_NONE;
+auto constexpr NULL_EQUAL   = cudf::null_equality::EQUAL;
+auto constexpr NULL_UNEQUAL = cudf::null_equality::UNEQUAL;
+auto constexpr NAN_EQUAL    = cudf::nan_equality::ALL_EQUAL;
+auto constexpr NAN_UNEQUAL  = cudf::nan_equality::UNEQUAL;
+
+using int32s_col  = cudf::test::fixed_width_column_wrapper<int32_t>;
+using floats_col  = cudf::test::fixed_width_column_wrapper<float>;
+using lists_col   = cudf::test::lists_column_wrapper<int32_t>;
+using strings_col = cudf::test::strings_column_wrapper;
+using structs_col = cudf::test::structs_column_wrapper;
+
+using cudf::nan_policy;
+using cudf::null_equality;
+using cudf::null_policy;
+using cudf::test::iterators::no_nulls;
+using cudf::test::iterators::null_at;
+using cudf::test::iterators::nulls_at;
+
+struct StableDistinctKeepAny : public cudf::test::BaseFixture {};
+
+struct StableDistinctKeepFirstLastNone : public cudf::test::BaseFixture {};
+
+TEST_F(StableDistinctKeepAny, StringKeyColumn)
+{
+  // Column(s) used to test KEEP_ANY needs to have same rows in contiguous
+  // groups for equivalent keys because KEEP_ANY is nondeterministic.
+  auto const col = int32s_col{{5, 5, null, null, 5, 8, 1}, nulls_at({2, 3})};
+  auto const keys =
+    strings_col{{"all", "all", "new", "new", "" /*NULL*/, "the", "strings"}, null_at(4)};
+  auto const input   = cudf::table_view{{col, keys}};
+  auto const key_idx = std::vector<cudf::size_type>{1};
+
+  auto const exp_col  = int32s_col{{5, null, 5, 8, 1}, null_at(1)};
+  auto const exp_keys = strings_col{{"all", "new", "" /*NULL*/, "the", "strings"}, null_at(2)};
+  auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+  auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+}
+
+TEST_F(StableDistinctKeepFirstLastNone, StringKeyColumn)
+{
+  // Column(s) used to test needs to have different rows for the same keys.
+  auto const col = int32s_col{{0, null, 2, 3, 4, 5, 6}, null_at(1)};
+  auto const keys =
+    strings_col{{"all", "new", "new", "all", "" /*NULL*/, "the", "strings"}, null_at(4)};
+  auto const input   = cudf::table_view{{col, keys}};
+  auto const key_idx = std::vector<cudf::size_type>{1};
+
+  // KEEP_FIRST
+  {
+    auto const exp_col  = int32s_col{{0, null, 4, 5, 6}, null_at(1)};
+    auto const exp_keys = strings_col{{"all", "new", "" /*NULL*/, "the", "strings"}, null_at(2)};
+    auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_FIRST);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+
+  // KEEP_LAST
+  {
+    auto const exp_col  = int32s_col{{2, 3, 4, 5, 6}, no_nulls()};
+    auto const exp_keys = strings_col{{"new", "all", "" /*NULL*/, "the", "strings"}, null_at(2)};
+    auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_LAST);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+
+  // KEEP_NONE
+  {
+    auto const exp_col  = int32s_col{{4, 5, 6}, no_nulls()};
+    auto const exp_keys = strings_col{{"" /*NULL*/, "the", "strings"}, null_at(0)};
+    auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_NONE);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+}
+
+TEST_F(StableDistinctKeepAny, EmptyInputTable)
+{
+  int32s_col col(std::initializer_list<int32_t>{});
+  cudf::table_view input{{col}};
+  std::vector<cudf::size_type> key_idx{0};
+
+  auto got = cudf::stable_distinct(input, key_idx, KEEP_ANY);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(input, got->view());
+}
+
+TEST_F(StableDistinctKeepAny, NoColumnInputTable)
+{
+  cudf::table_view input{std::vector<cudf::column_view>()};
+  std::vector<cudf::size_type> key_idx{1, 2};
+
+  auto got = cudf::stable_distinct(input, key_idx, KEEP_ANY);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(input, got->view());
+}
+
+TEST_F(StableDistinctKeepAny, EmptyKeys)
+{
+  int32s_col col{{5, 4, 3, 5, 8, 1}, {1, 0, 1, 1, 1, 1}};
+  int32s_col empty_col{};
+  cudf::table_view input{{col}};
+  std::vector<cudf::size_type> key_idx{};
+
+  auto got = cudf::stable_distinct(input, key_idx, KEEP_ANY);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(cudf::table_view{{empty_col}}, got->view());
+}
+
+TEST_F(StableDistinctKeepAny, NoNullsTable)
+{
+  // Column(s) used to test KEEP_ANY needs to have same rows in contiguous
+  // groups for equivalent keys because KEEP_ANY is nondeterministic.
+  auto const col1  = int32s_col{6, 6, 6, 3, 5, 8, 5};
+  auto const col2  = floats_col{6, 6, 6, 3, 4, 9, 4};
+  auto const keys1 = int32s_col{20, 20, 20, 20, 19, 21, 9};
+  auto const keys2 = int32s_col{19, 19, 19, 20, 20, 9, 21};
+
+  auto const input   = cudf::table_view{{col1, col2, keys1, keys2}};
+  auto const key_idx = std::vector<cudf::size_type>{2, 3};
+
+  auto const exp_col1  = int32s_col{6, 3, 5, 8, 5};
+  auto const exp_col2  = floats_col{6, 3, 4, 9, 4};
+  auto const exp_keys1 = int32s_col{20, 20, 19, 21, 9};
+  auto const exp_keys2 = int32s_col{19, 20, 20, 9, 21};
+  auto const expected  = cudf::table_view{{exp_col1, exp_col2, exp_keys1, exp_keys2}};
+
+  auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+}
+
+TEST_F(StableDistinctKeepAny, NoNullsTableWithNaNs)
+{
+  // Column(s) used to test KEEP_ANY needs to have same rows in contiguous
+  // groups for equivalent keys because KEEP_ANY is nondeterministic.
+  auto const col1  = int32s_col{6, 6, 6, 1, 1, 1, 3, 5, 8, 5};
+  auto const col2  = floats_col{6, 6, 6, 1, 1, 1, 3, 4, 9, 4};
+  auto const keys1 = int32s_col{20, 20, 20, 15, 15, 15, 20, 19, 21, 9};
+  auto const keys2 = floats_col{19., 19., 19., NaN, NaN, NaN, 20., 20., 9., 21.};
+
+  auto const input   = cudf::table_view{{col1, col2, keys1, keys2}};
+  auto const key_idx = std::vector<cudf::size_type>{2, 3};
+
+  // NaNs are unequal.
+  {
+    auto const exp_col1  = int32s_col{6, 1, 1, 1, 3, 5, 8, 5};
+    auto const exp_col2  = floats_col{6, 1, 1, 1, 3, 4, 9, 4};
+    auto const exp_keys1 = int32s_col{20, 15, 15, 15, 20, 19, 21, 9};
+    auto const exp_keys2 = floats_col{19., NaN, NaN, NaN, 20., 20., 9., 21.};
+    auto const expected  = cudf::table_view{{exp_col1, exp_col2, exp_keys1, exp_keys2}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_UNEQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+
+  // NaNs are equal.
+  {
+    auto const exp_col1  = int32s_col{6, 1, 3, 5, 8, 5};
+    auto const exp_col2  = floats_col{6, 1, 3, 4, 9, 4};
+    auto const exp_keys1 = int32s_col{20, 15, 20, 19, 21, 9};
+    auto const exp_keys2 = floats_col{19., NaN, 20., 20., 9., 21.};
+    auto const expected  = cudf::table_view{{exp_col1, exp_col2, exp_keys1, exp_keys2}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_EQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+}
+
+TEST_F(StableDistinctKeepFirstLastNone, NoNullsTable)
+{
+  // Column(s) used to test needs to have different rows for the same keys.
+  auto const col1  = int32s_col{0, 1, 2, 3, 4, 5, 6};
+  auto const col2  = floats_col{10, 11, 12, 13, 14, 15, 16};
+  auto const keys1 = int32s_col{20, 20, 20, 20, 19, 21, 9};
+  auto const keys2 = int32s_col{19, 19, 19, 20, 20, 9, 21};
+
+  auto const input   = cudf::table_view{{col1, col2, keys1, keys2}};
+  auto const key_idx = std::vector<cudf::size_type>{2, 3};
+
+  // KEEP_FIRST
+  {
+    auto const exp_col1  = int32s_col{0, 3, 4, 5, 6};
+    auto const exp_col2  = floats_col{10, 13, 14, 15, 16};
+    auto const exp_keys1 = int32s_col{20, 20, 19, 21, 9};
+    auto const exp_keys2 = int32s_col{19, 20, 20, 9, 21};
+    auto const expected  = cudf::table_view{{exp_col1, exp_col2, exp_keys1, exp_keys2}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_FIRST);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+
+  // KEEP_LAST
+  {
+    auto const exp_col1  = int32s_col{2, 3, 4, 5, 6};
+    auto const exp_col2  = floats_col{12, 13, 14, 15, 16};
+    auto const exp_keys1 = int32s_col{20, 20, 19, 21, 9};
+    auto const exp_keys2 = int32s_col{19, 20, 20, 9, 21};
+    auto const expected  = cudf::table_view{{exp_col1, exp_col2, exp_keys1, exp_keys2}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_LAST);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+
+  // KEEP_NONE
+  {
+    auto const exp_col1  = int32s_col{3, 4, 5, 6};
+    auto const exp_col2  = floats_col{13, 14, 15, 16};
+    auto const exp_keys1 = int32s_col{20, 19, 21, 9};
+    auto const exp_keys2 = int32s_col{20, 20, 9, 21};
+    auto const expected  = cudf::table_view{{exp_col1, exp_col2, exp_keys1, exp_keys2}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_NONE);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+}
+
+TEST_F(StableDistinctKeepAny, SlicedNoNullsTable)
+{
+  auto constexpr dont_care = int32_t{0};
+
+  // Column(s) used to test KEEP_ANY needs to have same rows in contiguous
+  // groups for equivalent keys because KEEP_ANY is nondeterministic.
+  auto const col1  = int32s_col{dont_care, dont_care, 6, 6, 6, 3, 5, 8, 5, dont_care};
+  auto const col2  = floats_col{dont_care, dont_care, 6, 6, 6, 3, 4, 9, 4, dont_care};
+  auto const keys1 = int32s_col{dont_care, dont_care, 20, 20, 20, 20, 19, 21, 9, dont_care};
+  auto const keys2 = int32s_col{dont_care, dont_care, 19, 19, 19, 20, 20, 9, 21, dont_care};
+
+  auto const input_original = cudf::table_view{{col1, col2, keys1, keys2}};
+  auto const input          = cudf::slice(input_original, {2, 9})[0];
+  auto const key_idx        = std::vector<cudf::size_type>{2, 3};
+
+  auto const exp_col1  = int32s_col{6, 3, 5, 8, 5};
+  auto const exp_col2  = floats_col{6, 3, 4, 9, 4};
+  auto const exp_keys1 = int32s_col{20, 20, 19, 21, 9};
+  auto const exp_keys2 = int32s_col{19, 20, 20, 9, 21};
+  auto const expected  = cudf::table_view{{exp_col1, exp_col2, exp_keys1, exp_keys2}};
+
+  auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+}
+
+TEST_F(StableDistinctKeepFirstLastNone, SlicedNoNullsTable)
+{
+  auto constexpr dont_care = int32_t{0};
+
+  // Column(s) used to test needs to have different rows for the same keys.
+  // clang-format off
+  auto const col1  = int32s_col{0, 1, 2, // <- don't care
+                                3, 4, 5, 6, 7, 8, 9, dont_care};
+  auto const col2  = floats_col{10, 11, 12, // <- don't care
+                                13, 14, 15, 16, 17, 18, 19, dont_care};
+  auto const keys1 = int32s_col{20, 20, 20, // <- don't care
+                                20, 20, 20, 20, 19, 21, 9, dont_care};
+  auto const keys2 = int32s_col{19, 19, 19, // <- don't care
+                                19, 19, 19, 20, 20, 9, 21, dont_care};
+  // clang-format on
+  auto const input_original = cudf::table_view{{col1, col2, keys1, keys2}};
+  auto const input          = cudf::slice(input_original, {3, 10})[0];
+  auto const key_idx        = std::vector<cudf::size_type>{2, 3};
+
+  // KEEP_FIRST
+  {
+    auto const exp_col1  = int32s_col{3, 6, 7, 8, 9};
+    auto const exp_col2  = floats_col{13, 16, 17, 18, 19};
+    auto const exp_keys1 = int32s_col{20, 20, 19, 21, 9};
+    auto const exp_keys2 = int32s_col{19, 20, 20, 9, 21};
+    auto const expected  = cudf::table_view{{exp_col1, exp_col2, exp_keys1, exp_keys2}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_FIRST);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+
+  // KEEP_LAST
+  {
+    auto const exp_col1  = int32s_col{5, 6, 7, 8, 9};
+    auto const exp_col2  = floats_col{15, 16, 17, 18, 19};
+    auto const exp_keys1 = int32s_col{20, 20, 19, 21, 9};
+    auto const exp_keys2 = int32s_col{19, 20, 20, 9, 21};
+    auto const expected  = cudf::table_view{{exp_col1, exp_col2, exp_keys1, exp_keys2}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_LAST);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+
+  // KEEP_NONE
+  {
+    auto const exp_col1  = int32s_col{6, 7, 8, 9};
+    auto const exp_col2  = floats_col{16, 17, 18, 19};
+    auto const exp_keys1 = int32s_col{20, 19, 21, 9};
+    auto const exp_keys2 = int32s_col{20, 20, 9, 21};
+    auto const expected  = cudf::table_view{{exp_col1, exp_col2, exp_keys1, exp_keys2}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_NONE);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+}
+
+TEST_F(StableDistinctKeepAny, InputWithNulls)
+{
+  // Column(s) used to test KEEP_ANY needs to have same rows in contiguous
+  // groups for equivalent keys because KEEP_ANY is nondeterministic.
+  auto const col     = int32s_col{5, 4, 4, 1, 1, 8};
+  auto const keys    = int32s_col{{20, null, null, 19, 19, 21}, nulls_at({1, 2})};
+  auto const input   = cudf::table_view{{col, keys}};
+  auto const key_idx = std::vector<cudf::size_type>{1};
+
+  // Nulls are equal.
+  {
+    auto const exp_col  = int32s_col{5, 4, 1, 8};
+    auto const exp_keys = int32s_col{{20, null, 19, 21}, null_at(1)};
+    auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+
+  // Nulls are unequal.
+  {
+    auto const exp_col  = int32s_col{5, 4, 4, 1, 8};
+    auto const exp_keys = int32s_col{{20, null, null, 19, 21}, nulls_at({1, 2})};
+    auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY, NULL_UNEQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+}
+
+TEST_F(StableDistinctKeepAny, InputWithNullsAndNaNs)
+{
+  auto constexpr null{0.0};  // shadow the global `null` variable of type int
+
+  // Column(s) used to test KEEP_ANY needs to have same rows in contiguous
+  // groups for equivalent keys because KEEP_ANY is nondeterministic.
+  auto const col   = int32s_col{5, 4, 4, 1, 1, 1, 8, 8, 1};
+  auto const keys  = floats_col{{20., null, null, NaN, NaN, NaN, 19., 19., 21.}, nulls_at({1, 2})};
+  auto const input = cudf::table_view{{col, keys}};
+  auto const key_idx = std::vector<cudf::size_type>{1};
+
+  // Nulls are equal, NaNs are unequal.
+  {
+    auto const exp_col  = int32s_col{5, 4, 1, 1, 1, 8, 1};
+    auto const exp_keys = floats_col{{20., null, NaN, NaN, NaN, 19., 21.}, null_at(1)};
+    auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_UNEQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+
+  // Nulls are equal, NaNs are equal.
+  {
+    auto const exp_col  = int32s_col{5, 4, 1, 8, 1};
+    auto const exp_keys = floats_col{{20., null, NaN, 19., 21.}, null_at(1)};
+    auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_EQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+
+  // Nulls are unequal, NaNs are unequal.
+  {
+    auto const exp_col  = int32s_col{5, 4, 4, 1, 1, 1, 8, 1};
+    auto const exp_keys = floats_col{{20., null, null, NaN, NaN, NaN, 19., 21.}, nulls_at({1, 2})};
+    auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY, NULL_UNEQUAL, NAN_UNEQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+
+  // Nulls are unequal, NaNs are equal.
+  {
+    auto const exp_col  = int32s_col{5, 4, 4, 1, 8, 1};
+    auto const exp_keys = floats_col{{20., null, null, NaN, 19., 21.}, nulls_at({1, 2})};
+    auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY, NULL_UNEQUAL, NAN_EQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+}
+
+TEST_F(StableDistinctKeepFirstLastNone, InputWithNullsEqual)
+{
+  // Column(s) used to test needs to have different rows for the same keys.
+  auto const col     = int32s_col{0, 1, 2, 3, 4, 5, 6};
+  auto const keys    = int32s_col{{20, null, null, 19, 21, 19, 22}, nulls_at({1, 2})};
+  auto const input   = cudf::table_view{{col, keys}};
+  auto const key_idx = std::vector<cudf::size_type>{1};
+
+  // KEEP_FIRST
+  {
+    auto const exp_col  = int32s_col{0, 1, 3, 4, 6};
+    auto const exp_keys = int32s_col{{20, null, 19, 21, 22}, null_at(1)};
+    auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_FIRST, NULL_EQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+
+  // KEEP_LAST
+  {
+    auto const exp_col  = int32s_col{0, 2, 4, 5, 6};
+    auto const exp_keys = int32s_col{{20, null, 21, 19, 22}, null_at(1)};
+    auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_LAST, NULL_EQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+
+  // KEEP_NONE
+  {
+    auto const exp_col  = int32s_col{0, 4, 6};
+    auto const exp_keys = int32s_col{{20, 21, 22}, no_nulls()};
+    auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_NONE, NULL_EQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+}
+
+TEST_F(StableDistinctKeepFirstLastNone, InputWithNullsUnequal)
+{
+  // Column(s) used to test needs to have different rows for the same keys.
+  auto const col     = int32s_col{0, 1, 2, 3, 4, 5, 6, 7};
+  auto const keys    = int32s_col{{20, null, null, 19, 21, 19, 22, 20}, nulls_at({1, 2})};
+  auto const input   = cudf::table_view{{col, keys}};
+  auto const key_idx = std::vector<cudf::size_type>{1};
+
+  // KEEP_FIRST
+  {
+    auto const exp_col  = int32s_col{0, 1, 2, 3, 4, 6};
+    auto const exp_keys = int32s_col{{20, null, null, 19, 21, 22}, nulls_at({1, 2})};
+    auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_FIRST, NULL_UNEQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+
+  // KEEP_LAST
+  {
+    auto const exp_col  = int32s_col{1, 2, 4, 5, 6, 7};
+    auto const exp_keys = int32s_col{{null, null, 21, 19, 22, 20}, nulls_at({0, 1})};
+    auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_LAST, NULL_UNEQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+
+  // KEEP_NONE
+  {
+    auto const exp_col  = int32s_col{1, 2, 4, 6};
+    auto const exp_keys = int32s_col{{null, null, 21, 22}, nulls_at({0, 1})};
+    auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_NONE, NULL_UNEQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+}
+
+TEST_F(StableDistinctKeepFirstLastNone, InputWithNaNsEqual)
+{
+  // Column(s) used to test needs to have different rows for the same keys.
+  auto const col     = int32s_col{0, 1, 2, 3, 4, 5, 6};
+  auto const keys    = floats_col{20., NaN, NaN, 19., 21., 19., 22.};
+  auto const input   = cudf::table_view{{col, keys}};
+  auto const key_idx = std::vector<cudf::size_type>{1};
+
+  // KEEP_FIRST
+  {
+    auto const exp_col  = int32s_col{0, 1, 3, 4, 6};
+    auto const exp_keys = floats_col{20., NaN, 19., 21., 22.};
+    auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_FIRST, NULL_EQUAL, NAN_EQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+
+  // KEEP_LAST
+  {
+    auto const exp_col  = int32s_col{0, 2, 4, 5, 6};
+    auto const exp_keys = floats_col{20., NaN, 21., 19., 22.};
+    auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_LAST, NULL_EQUAL, NAN_EQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+
+  // KEEP_NONE
+  {
+    auto const exp_col  = int32s_col{0, 4, 6};
+    auto const exp_keys = floats_col{20., 21., 22.};
+    auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_NONE, NULL_EQUAL, NAN_EQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+}
+
+TEST_F(StableDistinctKeepFirstLastNone, InputWithNaNsUnequal)
+{
+  // Column(s) used to test needs to have different rows for the same keys.
+  auto const col     = int32s_col{0, 1, 2, 3, 4, 5, 6, 7};
+  auto const keys    = floats_col{20., NaN, NaN, 19., 21., 19., 22., 20.};
+  auto const input   = cudf::table_view{{col, keys}};
+  auto const key_idx = std::vector<cudf::size_type>{1};
+
+  // KEEP_FIRST
+  {
+    auto const exp_col  = int32s_col{0, 1, 2, 3, 4, 6};
+    auto const exp_keys = floats_col{20., NaN, NaN, 19., 21., 22.};
+    auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+    auto const result =
+      cudf::stable_distinct(input, key_idx, KEEP_FIRST, NULL_UNEQUAL, NAN_UNEQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+
+  // KEEP_LAST
+  {
+    auto const exp_col  = int32s_col{1, 2, 4, 5, 6, 7};
+    auto const exp_keys = floats_col{NaN, NaN, 21., 19., 22., 20.};
+    auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_LAST, NULL_UNEQUAL, NAN_UNEQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+
+  // KEEP_NONE
+  {
+    auto const exp_col  = int32s_col{1, 2, 4, 6};
+    auto const exp_keys = floats_col{NaN, NaN, 21., 22.};
+    auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_NONE, NULL_UNEQUAL, NAN_UNEQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+}
+
+TEST_F(StableDistinctKeepAny, BasicLists)
+{
+  // Column(s) used to test KEEP_ANY needs to have same rows in contiguous
+  // groups for equivalent keys because KEEP_ANY is nondeterministic.
+  // clang-format off
+  auto const idx = int32s_col{ 0,  0,   1,   1,      2,      3,      4,      4,      4,   5,   5,      6};
+  auto const keys = lists_col{{}, {}, {1}, {1}, {1, 1}, {1, 2}, {2, 2}, {2, 2}, {2, 2}, {2}, {2}, {2, 1}};
+  // clang-format on
+  auto const input   = cudf::table_view{{idx, keys}};
+  auto const key_idx = std::vector<cudf::size_type>{1};
+
+  auto const exp_idx  = int32s_col{0, 1, 2, 3, 4, 5, 6};
+  auto const exp_keys = lists_col{{}, {1}, {1, 1}, {1, 2}, {2, 2}, {2}, {2, 1}};
+  auto const expected = cudf::table_view{{exp_idx, exp_keys}};
+
+  auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+}
+
+TEST_F(StableDistinctKeepFirstLastNone, BasicLists)
+{
+  // Column(s) used to test needs to have different rows for the same keys.
+  // clang-format off
+  auto const idx = int32s_col{ 0,  1,  2,      3,   4,      5,      6,   7,   8,       9,     10,     11};
+  auto const keys = lists_col{{}, {}, {1}, {1, 1}, {1}, {1, 2}, {2, 2}, {2}, {2}, {2, 1}, {2, 2}, {2, 2}};
+  // clang-format on
+  auto const input   = cudf::table_view{{idx, keys}};
+  auto const key_idx = std::vector<cudf::size_type>{1};
+
+  // KEEP_FIRST
+  {
+    auto const exp_idx  = int32s_col{0, 2, 3, 5, 6, 7, 9};
+    auto const exp_keys = lists_col{{}, {1}, {1, 1}, {1, 2}, {2, 2}, {2}, {2, 1}};
+    auto const expected = cudf::table_view{{exp_idx, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_FIRST);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+
+  // KEEP_LAST
+  {
+    auto const exp_idx  = int32s_col{1, 3, 4, 5, 8, 9, 11};
+    auto const exp_keys = lists_col{{}, {1, 1}, {1}, {1, 2}, {2}, {2, 1}, {2, 2}};
+    auto const expected = cudf::table_view{{exp_idx, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_LAST);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+
+  // KEEP_NONE
+  {
+    auto const exp_idx  = int32s_col{3, 5, 9};
+    auto const exp_keys = lists_col{{1, 1}, {1, 2}, {2, 1}};
+    auto const expected = cudf::table_view{{exp_idx, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_NONE);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+}
+
+TEST_F(StableDistinctKeepAny, SlicedBasicLists)
+{
+  auto constexpr dont_care = int32_t{0};
+
+  // Column(s) used to test KEEP_ANY needs to have same rows in contiguous
+  // groups for equivalent keys because KEEP_ANY is nondeterministic.
+  auto const idx  = int32s_col{dont_care, dont_care, 1, 1, 2, 3, 4, 4, 4, 5, 5, 6, dont_care};
+  auto const keys = lists_col{
+    {0, 0}, {0, 0}, {1}, {1}, {1, 1}, {1, 2}, {2, 2}, {2, 2}, {2, 2}, {2}, {2}, {2, 1}, {5, 5}};
+  auto const input_original = cudf::table_view{{idx, keys}};
+  auto const input          = cudf::slice(input_original, {2, 12})[0];
+  auto const key_idx        = std::vector<cudf::size_type>{1};
+
+  auto const exp_idx  = int32s_col{1, 2, 3, 4, 5, 6};
+  auto const exp_val  = lists_col{{1}, {1, 1}, {1, 2}, {2, 2}, {2}, {2, 1}};
+  auto const expected = cudf::table_view{{exp_idx, exp_val}};
+
+  auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+}
+
+TEST_F(StableDistinctKeepAny, NullableLists)
+{
+  // Column(s) used to test KEEP_ANY needs to have same rows in contiguous
+  // groups for equivalent keys because KEEP_ANY is nondeterministic.
+  auto const idx = int32s_col{0, 0, 1, 1, 2, 2, 2, 3, 3, 4, 4};
+  auto const keys =
+    lists_col{{{}, {}, {1}, {1}, {2, 2}, {2, 2}, {2, 2}, {2}, {2}, {} /*NULL*/, {} /*NULL*/},
+              nulls_at({9, 10})};
+  auto const input   = cudf::table_view{{idx, keys}};
+  auto const key_idx = std::vector<cudf::size_type>{1};
+
+  // Nulls are equal.
+  {
+    auto const exp_idx  = int32s_col{0, 1, 2, 3, 4};
+    auto const exp_keys = lists_col{{{}, {1}, {2, 2}, {2}, {} /*NULL*/}, null_at(4)};
+    auto const expected = cudf::table_view{{exp_idx, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+
+  // Nulls are unequal.
+  {
+    auto const exp_idx = int32s_col{0, 1, 2, 3, 4, 4};
+    auto const exp_keys =
+      lists_col{{{}, {1}, {2, 2}, {2}, {} /*NULL*/, {} /*NULL*/}, nulls_at({4, 5})};
+    auto const expected = cudf::table_view{{exp_idx, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY, NULL_UNEQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+}
+
+TEST_F(StableDistinctKeepFirstLastNone, ListsWithNullsEqual)
+{
+  // Column(s) used to test needs to have different rows for the same keys.
+  auto const idx = int32s_col{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+  auto const keys =
+    lists_col{{{}, {}, {1}, {1}, {2, 2}, {2}, {2}, {} /*NULL*/, {2, 2}, {2, 2}, {} /*NULL*/},
+              nulls_at({7, 10})};
+  auto const input   = cudf::table_view{{idx, keys}};
+  auto const key_idx = std::vector<cudf::size_type>{1};
+
+  // KEEP_FIRST
+  {
+    auto const exp_idx  = int32s_col{0, 2, 4, 5, 7};
+    auto const exp_keys = lists_col{{{}, {1}, {2, 2}, {2}, {} /*NULL*/}, null_at(4)};
+    auto const expected = cudf::table_view{{exp_idx, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_FIRST, NULL_EQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+
+  // KEEP_LAST
+  {
+    auto const exp_idx  = int32s_col{1, 3, 6, 9, 10};
+    auto const exp_keys = lists_col{{{}, {1}, {2}, {2, 2}, {} /*NULL*/}, null_at(4)};
+    auto const expected = cudf::table_view{{exp_idx, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_LAST, NULL_EQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+
+  // KEEP_NONE
+  {
+    auto const exp_idx  = int32s_col{};
+    auto const exp_keys = lists_col{};
+    auto const expected = cudf::table_view{{exp_idx, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_NONE, NULL_EQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+}
+
+TEST_F(StableDistinctKeepFirstLastNone, ListsWithNullsUnequal)
+{
+  // Column(s) used to test needs to have different rows for the same keys.
+  auto const idx = int32s_col{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+  auto const keys =
+    lists_col{{{}, {}, {1}, {1}, {2, 2}, {2}, {2}, {} /*NULL*/, {2, 2}, {2, 2}, {} /*NULL*/},
+              nulls_at({7, 10})};
+  auto const input   = cudf::table_view{{idx, keys}};
+  auto const key_idx = std::vector<cudf::size_type>{1};
+
+  // KEEP_FIRST
+  {
+    auto const exp_idx = int32s_col{0, 2, 4, 5, 7, 10};
+    auto const exp_keys =
+      lists_col{{{}, {1}, {2, 2}, {2}, {} /*NULL*/, {} /*NULL*/}, nulls_at({4, 5})};
+    auto const expected = cudf::table_view{{exp_idx, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_FIRST, NULL_UNEQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+
+  // KEEP_LAST
+  {
+    auto const exp_idx = int32s_col{1, 3, 6, 7, 9, 10};
+    auto const exp_keys =
+      lists_col{{{}, {1}, {2}, {} /*NULL*/, {2, 2}, {} /*NULL*/}, nulls_at({3, 5})};
+    auto const expected = cudf::table_view{{exp_idx, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_LAST, NULL_UNEQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+
+  // KEEP_NONE
+  {
+    auto const exp_idx  = int32s_col{7, 10};
+    auto const exp_keys = lists_col{{lists_col{} /*NULL*/, lists_col{} /*NULL*/}, nulls_at({0, 1})};
+    auto const expected = cudf::table_view{{exp_idx, exp_keys}};
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_NONE, NULL_UNEQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+  }
+}
+
+TEST_F(StableDistinctKeepAny, ListsOfStructs)
+{
+  // Constructing a list of structs of two elements
+  // 0.   []                  ==
+  // 1.   []                  !=
+  // 2.   Null                ==
+  // 3.   Null                !=
+  // 4.   [Null, Null]        !=
+  // 5.   [Null]              ==
+  // 6.   [Null]              ==
+  // 7.   [Null]              !=
+  // 8.   [{Null, Null}]      !=
+  // 9.   [{1,'a'}, {2,'b'}]  !=
+  // 10.  [{0,'a'}, {2,'b'}]  !=
+  // 11.  [{0,'a'}, {2,'c'}]  ==
+  // 12.  [{0,'a'}, {2,'c'}]  !=
+  // 13.  [{0,Null}]          ==
+  // 14.  [{0,Null}]          !=
+  // 15.  [{Null, 'b'}]       ==
+  // 16.  [{Null, 'b'}]
+
+  auto const structs = [] {
+    auto child1 =
+      int32s_col{{XXX, XXX, XXX, XXX, XXX, null, 1, 2, 0, 2, 0, 2, 0, 2, 0, 0, null, null},
+                 nulls_at({5, 16, 17})};
+    auto child2 = strings_col{{"" /*XXX*/,
+                               "" /*XXX*/,
+                               "" /*XXX*/,
+                               "" /*XXX*/,
+                               "" /*XXX*/,
+                               "" /*null*/,
+                               "a",
+                               "b",
+                               "a",
+                               "b",
+                               "a",
+                               "c",
+                               "a",
+                               "c",
+                               "" /*null*/,
+                               "" /*null*/,
+                               "b",
+                               "b"},
+                              nulls_at({5, 14, 15})};
+
+    return structs_col{{child1, child2}, nulls_at({0, 1, 2, 3, 4})};
+  }();
+
+  auto const offsets = int32s_col{0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 8, 10, 12, 14, 15, 16, 17, 18};
+  auto const null_it = nulls_at({2, 3});
+
+  auto [null_mask, null_count] = cudf::test::detail::make_null_mask(null_it, null_it + 17);
+
+  auto const keys = cudf::column_view(cudf::data_type(cudf::type_id::LIST),
+                                      17,
+                                      nullptr,
+                                      static_cast<cudf::bitmask_type const*>(null_mask.data()),
+                                      null_count,
+                                      0,
+                                      {offsets, structs});
+
+  auto const idx     = int32s_col{1, 1, 2, 2, 3, 4, 4, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10};
+  auto const input   = cudf::table_view{{idx, keys}};
+  auto const key_idx = std::vector<cudf::size_type>{1};
+
+  // Nulls are equal.
+  {
+    auto const expect_map   = int32s_col{0, 2, 4, 5, 8, 9, 10, 11, 13, 15};
+    auto const expect_table = cudf::gather(input, expect_map);
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expect_table, *result);
+  }
+
+  // Nulls are unequal.
+  {
+    auto const expect_map   = int32s_col{0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16};
+    auto const expect_table = cudf::gather(input, expect_map);
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY, NULL_UNEQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expect_table, *result);
+  }
+}
+
+TEST_F(StableDistinctKeepFirstLastNone, ListsOfStructs)
+{
+  // Constructing a list of structs of two elements
+  // 0.   []                  ==
+  // 1.   []                  !=
+  // 2.   Null                ==
+  // 3.   Null                !=
+  // 4.   [Null, Null]        !=
+  // 5.   [Null]              ==
+  // 6.   [Null]              ==
+  // 7.   [Null]              !=
+  // 8.   [{Null, Null}]      !=
+  // 9.   [{1,'a'}, {2,'b'}]  !=
+  // 10.  [{0,'a'}, {2,'b'}]  !=
+  // 11.  [{0,'a'}, {2,'c'}]  ==
+  // 12.  [{0,'a'}, {2,'c'}]  !=
+  // 13.  [{0,Null}]          ==
+  // 14.  [{0,Null}]          !=
+  // 15.  [{Null, 'b'}]       ==
+  // 16.  [{Null, 'b'}]
+
+  auto const structs = [] {
+    auto child1 =
+      int32s_col{{XXX, XXX, XXX, XXX, XXX, null, 1, 2, 0, 2, 0, 2, 0, 2, 0, 0, null, null},
+                 nulls_at({5, 16, 17})};
+    auto child2 = strings_col{{"" /*XXX*/,
+                               "" /*XXX*/,
+                               "" /*XXX*/,
+                               "" /*XXX*/,
+                               "" /*XXX*/,
+                               "" /*null*/,
+                               "a",
+                               "b",
+                               "a",
+                               "b",
+                               "a",
+                               "c",
+                               "a",
+                               "c",
+                               "" /*null*/,
+                               "" /*null*/,
+                               "b",
+                               "b"},
+                              nulls_at({5, 14, 15})};
+
+    return structs_col{{child1, child2}, nulls_at({0, 1, 2, 3, 4})};
+  }();
+
+  auto const offsets = int32s_col{0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 8, 10, 12, 14, 15, 16, 17, 18};
+  auto const null_it = nulls_at({2, 3});
+
+  auto [null_mask, null_count] = cudf::test::detail::make_null_mask(null_it, null_it + 17);
+
+  auto const keys = cudf::column_view(cudf::data_type(cudf::type_id::LIST),
+                                      17,
+                                      nullptr,
+                                      static_cast<cudf::bitmask_type const*>(null_mask.data()),
+                                      null_count,
+                                      0,
+                                      {offsets, structs});
+
+  auto const idx     = int32s_col{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  auto const input   = cudf::table_view{{idx, keys}};
+  auto const key_idx = std::vector<cudf::size_type>{1};
+
+  // KEEP_FIRST
+  {
+    auto const expect_map   = int32s_col{0, 2, 4, 5, 8, 9, 10, 11, 13, 15};
+    auto const expect_table = cudf::gather(input, expect_map);
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_FIRST);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expect_table, *result);
+  }
+
+  // KEEP_LAST
+  {
+    auto const expect_map   = int32s_col{1, 3, 4, 7, 8, 9, 10, 12, 14, 16};
+    auto const expect_table = cudf::gather(input, expect_map);
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_LAST);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expect_table, *result);
+  }
+
+  // KEEP_NONE
+  {
+    auto const expect_map   = int32s_col{4, 8, 9, 10};
+    auto const expect_table = cudf::gather(input, expect_map);
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_NONE);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expect_table, *result);
+  }
+}
+
+TEST_F(StableDistinctKeepAny, SlicedListsOfStructs)
+{
+  // Constructing a list of struct of two elements
+  // 0.   []                  ==                <- Don't care
+  // 1.   []                  !=                <- Don't care
+  // 2.   Null                ==                <- Don't care
+  // 3.   Null                !=                <- Don't care
+  // 4.   [Null, Null]        !=                <- Don't care
+  // 5.   [Null]              ==                <- Don't care
+  // 6.   [Null]              ==                <- Don't care
+  // 7.   [Null]              !=                <- Don't care
+  // 8.   [{Null, Null}]      !=
+  // 9.   [{1,'a'}, {2,'b'}]  !=
+  // 10.  [{0,'a'}, {2,'b'}]  !=
+  // 11.  [{0,'a'}, {2,'c'}]  ==
+  // 12.  [{0,'a'}, {2,'c'}]  !=
+  // 13.  [{0,Null}]          ==
+  // 14.  [{0,Null}]          !=
+  // 15.  [{Null, 'b'}]       ==                <- Don't care
+  // 16.  [{Null, 'b'}]                         <- Don't care
+
+  auto const structs = [] {
+    auto child1 =
+      int32s_col{{XXX, XXX, XXX, XXX, XXX, null, 1, 2, 0, 2, 0, 2, 0, 2, 0, 0, null, null},
+                 nulls_at({5, 16, 17})};
+    auto child2 = strings_col{{"" /*XXX*/,
+                               "" /*XXX*/,
+                               "" /*XXX*/,
+                               "" /*XXX*/,
+                               "" /*XXX*/,
+                               "" /*null*/,
+                               "a",
+                               "b",
+                               "a",
+                               "b",
+                               "a",
+                               "c",
+                               "a",
+                               "c",
+                               "" /*null*/,
+                               "" /*null*/,
+                               "b",
+                               "b"},
+                              nulls_at({5, 14, 15})};
+
+    return structs_col{{child1, child2}, nulls_at({0, 1, 2, 3, 4})};
+  }();
+
+  auto const offsets = int32s_col{0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 8, 10, 12, 14, 15, 16, 17, 18};
+  auto const null_it = nulls_at({2, 3});
+
+  auto [null_mask, null_count] = cudf::test::detail::make_null_mask(null_it, null_it + 17);
+
+  auto const keys = cudf::column_view(cudf::data_type(cudf::type_id::LIST),
+                                      17,
+                                      nullptr,
+                                      static_cast<cudf::bitmask_type const*>(null_mask.data()),
+                                      null_count,
+                                      0,
+                                      {offsets, structs});
+
+  auto const idx            = int32s_col{1, 1, 2, 2, 3, 4, 4, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10};
+  auto const input_original = cudf::table_view{{idx, keys}};
+  auto const input          = cudf::slice(input_original, {8, 15})[0];
+  auto const key_idx        = std::vector<cudf::size_type>{1};
+
+  // Nulls are equal.
+  {
+    auto const expect_map   = int32s_col{8, 9, 10, 11, 13};
+    auto const expect_table = cudf::gather(input_original, expect_map);
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY);
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*expect_table, *result);
+  }
+
+  // Nulls are unequal.
+  {
+    auto const expect_map   = int32s_col{8, 9, 10, 11, 13, 14};
+    auto const expect_table = cudf::gather(input_original, expect_map);
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY, NULL_UNEQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*expect_table, *result);
+  }
+}
+
+TEST_F(StableDistinctKeepAny, ListsOfEmptyStructs)
+{
+  // Column(s) used to test KEEP_ANY needs to have same rows in contiguous
+  // groups for equivalent keys because KEEP_ANY is nondeterministic.
+
+  // 0.  []             ==
+  // 1.  []             !=
+  // 2.  Null           ==
+  // 3.  Null           !=
+  // 4.  [Null, Null]   ==
+  // 5.  [Null, Null]   ==
+  // 6.  [Null, Null]   !=
+  // 7.  [Null]         ==
+  // 8.  [Null]         !=
+  // 9.  [{}]           ==
+  // 10. [{}]           !=
+  // 11. [{}, {}]       ==
+  // 12. [{}, {}]
+
+  auto const structs_null_it = nulls_at({0, 1, 2, 3, 4, 5, 6, 7});
+  auto [structs_null_mask, structs_null_count] =
+    cudf::test::detail::make_null_mask(structs_null_it, structs_null_it + 14);
+  auto const structs =
+    cudf::column_view(cudf::data_type(cudf::type_id::STRUCT),
+                      14,
+                      nullptr,
+                      static_cast<cudf::bitmask_type const*>(structs_null_mask.data()),
+                      structs_null_count);
+
+  auto const offsets       = int32s_col{0, 0, 0, 0, 0, 2, 4, 6, 7, 8, 9, 10, 12, 14};
+  auto const lists_null_it = nulls_at({2, 3});
+  auto [lists_null_mask, lists_null_count] =
+    cudf::test::detail::make_null_mask(lists_null_it, lists_null_it + 13);
+  auto const keys =
+    cudf::column_view(cudf::data_type(cudf::type_id::LIST),
+                      13,
+                      nullptr,
+                      static_cast<cudf::bitmask_type const*>(lists_null_mask.data()),
+                      lists_null_count,
+                      0,
+                      {offsets, structs});
+
+  auto const idx     = int32s_col{1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6};
+  auto const input   = cudf::table_view{{idx, keys}};
+  auto const key_idx = std::vector<cudf::size_type>{1};
+
+  // Nulls are equal.
+  {
+    auto const expect_map   = int32s_col{0, 2, 4, 7, 9, 11};
+    auto const expect_table = cudf::gather(input, expect_map);
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expect_table, *result);
+  }
+
+  // Nulls are unequal.
+  {
+    auto const expect_map   = int32s_col{0, 2, 3, 4, 5, 6, 7, 8, 9, 11};
+    auto const expect_table = cudf::gather(input, expect_map);
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY, NULL_UNEQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expect_table, *result);
+  }
+}
+
+TEST_F(StableDistinctKeepAny, EmptyDeepList)
+{
+  // Column(s) used to test KEEP_ANY needs to have same rows in contiguous
+  // groups for equivalent keys because KEEP_ANY is nondeterministic.
+
+  // List<List<int>>, where all lists are empty:
+  //
+  // 0. []
+  // 1. []
+  // 2. Null
+  // 3. Null
+
+  auto const keys =
+    lists_col{{lists_col{}, lists_col{}, lists_col{}, lists_col{}}, nulls_at({2, 3})};
+
+  auto const idx     = int32s_col{1, 1, 2, 2};
+  auto const input   = cudf::table_view{{idx, keys}};
+  auto const key_idx = std::vector<cudf::size_type>{1};
+
+  // Nulls are equal.
+  {
+    auto const expect_map   = int32s_col{0, 2};
+    auto const expect_table = cudf::gather(input, expect_map);
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expect_table, *result);
+  }
+
+  // Nulls are unequal.
+  {
+    auto const expect_map   = int32s_col{0, 2, 3};
+    auto const expect_table = cudf::gather(input, expect_map);
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY, NULL_UNEQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expect_table, *result);
+  }
+}
+
+TEST_F(StableDistinctKeepAny, StructsOfStructs)
+{
+  // Column(s) used to test KEEP_ANY needs to have same rows in contiguous
+  // groups for equivalent keys because KEEP_ANY is nondeterministic.
+
+  //  +-----------------+
+  //  |  s1{s2{a,b}, c} |
+  //  +-----------------+
+  // 0 |  { {1, 1}, 5}  |
+  // 1 |  { {1, 1}, 5}  |  // Same as 0
+  // 2 |  { {1, 2}, 4}  |
+  // 3 |  { Null,   6}  |
+  // 4 |  { Null,   4}  |
+  // 5 |  { Null,   4}  |  // Same as 4
+  // 6 |  Null          |
+  // 7 |  Null          |  // Same as 6
+  // 8 |  { {2, 1}, 5}  |
+
+  auto s1 = [&] {
+    auto a  = int32s_col{1, 1, 1, XXX, XXX, XXX, XXX, XXX, 2};
+    auto b  = int32s_col{1, 1, 2, XXX, XXX, XXX, XXX, XXX, 1};
+    auto s2 = structs_col{{a, b}, nulls_at({3, 4, 5})};
+
+    auto c = int32s_col{5, 5, 4, 6, 4, 4, XXX, XXX, 5};
+    std::vector<std::unique_ptr<cudf::column>> s1_children;
+    s1_children.emplace_back(s2.release());
+    s1_children.emplace_back(c.release());
+    auto const null_it = nulls_at({6, 7});
+    return structs_col(std::move(s1_children), std::vector<bool>{null_it, null_it + 9});
+  }();
+
+  auto const idx     = int32s_col{0, 0, 2, 3, 4, 4, 6, 6, 8};
+  auto const input   = cudf::table_view{{idx, s1}};
+  auto const key_idx = std::vector<cudf::size_type>{1};
+
+  // Nulls are equal.
+  {
+    auto const expect_map   = int32s_col{0, 2, 3, 4, 6, 8};
+    auto const expect_table = cudf::gather(input, expect_map);
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expect_table, *result);
+  }
+
+  // Nulls are unequal.
+  {
+    auto const expect_map   = int32s_col{0, 2, 3, 4, 4, 6, 6, 8};
+    auto const expect_table = cudf::gather(input, expect_map);
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY, NULL_UNEQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expect_table, *result);
+  }
+}
+
+TEST_F(StableDistinctKeepAny, SlicedStructsOfStructs)
+{
+  // Column(s) used to test KEEP_ANY needs to have same rows in contiguous
+  // groups for equivalent keys because KEEP_ANY is nondeterministic.
+
+  //  +-----------------+
+  //  |  s1{s2{a,b}, c} |
+  //  +-----------------+
+  // 0 |  { {1, 1}, 5}  |
+  // 1 |  { {1, 1}, 5}  |  // Same as 0
+  // 2 |  { {1, 2}, 4}  |
+  // 3 |  { Null,   6}  |
+  // 4 |  { Null,   4}  |
+  // 5 |  { Null,   4}  |  // Same as 4
+  // 6 |  Null          |
+  // 7 |  Null          |  // Same as 6
+  // 8 |  { {2, 1}, 5}  |
+
+  auto s1 = [&] {
+    auto a  = int32s_col{1, 1, XXX, XXX, XXX, XXX, 1, XXX, 2};
+    auto b  = int32s_col{1, 2, XXX, XXX, XXX, XXX, 1, XXX, 1};
+    auto s2 = structs_col{{a, b}, nulls_at({3, 4, 5})};
+
+    auto c = int32s_col{5, 4, 6, 4, XXX, XXX, 5, 4, 5};
+    std::vector<std::unique_ptr<cudf::column>> s1_children;
+    s1_children.emplace_back(s2.release());
+    s1_children.emplace_back(c.release());
+    auto const null_it = nulls_at({6, 7});
+    return structs_col(std::move(s1_children), std::vector<bool>{null_it, null_it + 9});
+  }();
+
+  auto const idx            = int32s_col{0, 0, 2, 3, 4, 4, 6, 6, 8};
+  auto const input_original = cudf::table_view{{idx, s1}};
+  auto const input          = cudf::slice(input_original, {1, 7})[0];
+  auto const key_idx        = std::vector<cudf::size_type>{1};
+
+  // Nulls are equal.
+  {
+    auto const expect_map   = int32s_col{1, 2, 3, 4, 6};
+    auto const expect_table = cudf::gather(input_original, expect_map);
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expect_table, *result);
+  }
+
+  // Nulls are unequal.
+  {
+    auto const expect_map   = int32s_col{1, 2, 3, 4, 4, 6};
+    auto const expect_table = cudf::gather(input_original, expect_map);
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY, NULL_UNEQUAL);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expect_table, *result);
+  }
+}
+
+TEST_F(StableDistinctKeepAny, StructsOfLists)
+{
+  // Column(s) used to test KEEP_ANY needs to have same rows in contiguous
+  // groups for equivalent keys because KEEP_ANY is nondeterministic.
+
+  auto const idx  = int32s_col{1, 1, 2, 3, 4, 4, 4, 5, 5, 6};
+  auto const keys = [] {
+    // All child columns are identical.
+    auto child1 = lists_col{{1}, {1}, {1, 1}, {1, 2}, {2, 2}, {2, 2}, {2, 2}, {2}, {2}, {2, 1}};
+    auto child2 = lists_col{{1}, {1}, {1, 1}, {1, 2}, {2, 2}, {2, 2}, {2, 2}, {2}, {2}, {2, 1}};
+    auto child3 = lists_col{{1}, {1}, {1, 1}, {1, 2}, {2, 2}, {2, 2}, {2, 2}, {2}, {2}, {2, 1}};
+    return structs_col{{child1, child2, child3}};
+  }();
+
+  auto const input   = cudf::table_view{{idx, keys}};
+  auto const key_idx = std::vector<cudf::size_type>{1};
+
+  auto const exp_idx  = int32s_col{1, 2, 3, 4, 5, 6};
+  auto const exp_keys = [] {
+    auto child1 = lists_col{{1}, {1, 1}, {1, 2}, {2, 2}, {2}, {2, 1}};
+    auto child2 = lists_col{{1}, {1, 1}, {1, 2}, {2, 2}, {2}, {2, 1}};
+    auto child3 = lists_col{{1}, {1, 1}, {1, 2}, {2, 2}, {2}, {2, 1}};
+    return structs_col{{child1, child2, child3}};
+  }();
+  auto const expected = cudf::table_view{{exp_idx, exp_keys}};
+
+  auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+}
+
+TEST_F(StableDistinctKeepFirstLastNone, StructsOfLists)
+{
+  auto const idx  = int32s_col{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+  auto const keys = [] {
+    // All child columns are identical.
+    auto child1 = lists_col{{1}, {1, 1}, {1}, {1, 2}, {2, 2}, {2}, {2}, {2, 1}, {2, 2}, {2, 2}};
+    auto child2 = lists_col{{1}, {1, 1}, {1}, {1, 2}, {2, 2}, {2}, {2}, {2, 1}, {2, 2}, {2, 2}};
+    auto child3 = lists_col{{1}, {1, 1}, {1}, {1, 2}, {2, 2}, {2}, {2}, {2, 1}, {2, 2}, {2, 2}};
+    return structs_col{{child1, child2, child3}};
+  }();
+
+  auto const input   = cudf::table_view{{idx, keys}};
+  auto const key_idx = std::vector<cudf::size_type>{1};
+
+  // KEEP_FIRST
+  {
+    auto const expect_map   = int32s_col{0, 1, 3, 4, 5, 7};
+    auto const expect_table = cudf::gather(input, expect_map);
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_FIRST);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expect_table, *result);
+  }
+
+  // KEEP_LAST
+  {
+    auto const expect_map   = int32s_col{1, 2, 3, 6, 7, 9};
+    auto const expect_table = cudf::gather(input, expect_map);
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_LAST);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expect_table, *result);
+  }
+
+  // KEEP_NONE
+  {
+    auto const expect_map   = int32s_col{1, 3, 7};
+    auto const expect_table = cudf::gather(input, expect_map);
+
+    auto const result = cudf::stable_distinct(input, key_idx, KEEP_NONE);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expect_table, *result);
+  }
+}
+
+TEST_F(StableDistinctKeepAny, SlicedStructsOfLists)
+{
+  // Column(s) used to test KEEP_ANY needs to have same rows in contiguous
+  // groups for equivalent keys because KEEP_ANY is nondeterministic.
+
+  auto constexpr dont_care = int32_t{0};
+
+  auto const idx  = int32s_col{dont_care, dont_care, 1, 1, 2, 3, 4, 4, 4, 5, 5, 6, dont_care};
+  auto const keys = [] {
+    // All child columns are identical.
+    auto child1 = lists_col{
+      {0, 0}, {0, 0}, {1}, {1}, {1, 1}, {1, 2}, {2, 2}, {2, 2}, {2, 2}, {2}, {2}, {2, 1}, {5, 5}};
+    auto child2 = lists_col{
+      {0, 0}, {0, 0}, {1}, {1}, {1, 1}, {1, 2}, {2, 2}, {2, 2}, {2, 2}, {2}, {2}, {2, 1}, {5, 5}};
+    auto child3 = lists_col{
+      {0, 0}, {0, 0}, {1}, {1}, {1, 1}, {1, 2}, {2, 2}, {2, 2}, {2, 2}, {2}, {2}, {2, 1}, {5, 5}};
+    return structs_col{{child1, child2, child3}};
+  }();
+
+  auto const input_original = cudf::table_view{{idx, keys}};
+  auto const input          = cudf::slice(input_original, {2, 12})[0];
+  auto const key_idx        = std::vector<cudf::size_type>{1};
+
+  auto const exp_idx  = int32s_col{1, 2, 3, 4, 5, 6};
+  auto const exp_keys = [] {
+    auto child1 = lists_col{{1}, {1, 1}, {1, 2}, {2, 2}, {2}, {2, 1}};
+    auto child2 = lists_col{{1}, {1, 1}, {1, 2}, {2, 2}, {2}, {2, 1}};
+    auto child3 = lists_col{{1}, {1, 1}, {1, 2}, {2, 2}, {2}, {2, 1}};
+    return structs_col{{child1, child2, child3}};
+  }();
+  auto const expected = cudf::table_view{{exp_idx, exp_keys}};
+
+  auto const result = cudf::stable_distinct(input, key_idx, KEEP_ANY);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+}
diff --git a/java/src/main/java/ai/rapids/cudf/ColumnVector.java b/java/src/main/java/ai/rapids/cudf/ColumnVector.java
index 4fb20618737..e4da1c2d367 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnVector.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnVector.java
@@ -50,10 +50,11 @@ public interface EventHandler {
      *
      * @note the callback is invoked with this `ColumnVector`'s lock held.
      *
+     * @param cv - a reference to the ColumnVector we are closing
      * @param refCount - the updated ref count for this ColumnVector at the time
      *                 of invocation
      */
-    void onClosed(int refCount);
+    void onClosed(ColumnVector cv, int refCount);
   }
 
   private static final Logger log = LoggerFactory.getLogger(ColumnVector.class);
@@ -260,7 +261,7 @@ public synchronized void close() {
     refCount--;
     offHeap.delRef();
     if (eventHandler != null) {
-      eventHandler.onClosed(refCount);
+      eventHandler.onClosed(this, refCount);
     }
     if (refCount == 0) {
       offHeap.clean(false);
diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
index 20fae236d5b..93003828f34 100644
--- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
@@ -6730,7 +6730,10 @@ void testColumnViewWithNonEmptyNullsIsCleared() {
   public void testEventHandlerIsCalledForEachClose() {
     final AtomicInteger onClosedWasCalled = new AtomicInteger(0);
     try (ColumnVector cv = ColumnVector.fromInts(1,2,3,4)) {
-      cv.setEventHandler(refCount -> onClosedWasCalled.incrementAndGet());
+      cv.setEventHandler((col, refCount) -> {
+        assertEquals(cv, col);
+        onClosedWasCalled.incrementAndGet();
+      });
     }
     assertEquals(1, onClosedWasCalled.get());
   }
@@ -6744,7 +6747,9 @@ public void testEventHandlerIsNotCalledIfNotSet() {
     assertEquals(0, onClosedWasCalled.get());
 
     try (ColumnVector cv = ColumnVector.fromInts(1,2,3,4)) {
-      cv.setEventHandler(refCount -> onClosedWasCalled.incrementAndGet());
+      cv.setEventHandler((col, refCount) -> {
+        onClosedWasCalled.incrementAndGet();
+      });
       cv.setEventHandler(null);
     }
     assertEquals(0, onClosedWasCalled.get());
diff --git a/python/cudf/cudf/_lib/cpp/stream_compaction.pxd b/python/cudf/cudf/_lib/cpp/stream_compaction.pxd
index 61efd040807..bba2d1ffb7c 100644
--- a/python/cudf/cudf/_lib/cpp/stream_compaction.pxd
+++ b/python/cudf/cudf/_lib/cpp/stream_compaction.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
@@ -20,6 +20,7 @@ from cudf._lib.cpp.types cimport (
 cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" \
         nogil:
     ctypedef enum duplicate_keep_option:
+        KEEP_ANY 'cudf::duplicate_keep_option::KEEP_ANY'
         KEEP_FIRST 'cudf::duplicate_keep_option::KEEP_FIRST'
         KEEP_LAST 'cudf::duplicate_keep_option::KEEP_LAST'
         KEEP_NONE 'cudf::duplicate_keep_option::KEEP_NONE'
@@ -33,13 +34,14 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" \
         column_view boolean_mask
     ) except +
 
-    cdef unique_ptr[table] unique(
-        table_view source_table,
-        vector[size_type] keys,
-        duplicate_keep_option keep,
-        null_equality nulls_equal) except +
-
     cdef size_type distinct_count(
         column_view source_table,
         null_policy null_handling,
         nan_policy nan_handling) except +
+
+    cdef unique_ptr[table] stable_distinct(
+        table_view input,
+        vector[size_type] keys,
+        duplicate_keep_option keep,
+        null_equality nulls_equal,
+    ) except +
diff --git a/python/cudf/cudf/_lib/stream_compaction.pyx b/python/cudf/cudf/_lib/stream_compaction.pyx
index 143999e52ef..4422ad83885 100644
--- a/python/cudf/cudf/_lib/stream_compaction.pyx
+++ b/python/cudf/cudf/_lib/stream_compaction.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
 from cudf.core.buffer import acquire_spill_lock
 
@@ -9,22 +9,19 @@ from libcpp.vector cimport vector
 
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column_view cimport column_view
-from cudf._lib.cpp.sorting cimport stable_sort_by_key as cpp_stable_sort_by_key
 from cudf._lib.cpp.stream_compaction cimport (
     apply_boolean_mask as cpp_apply_boolean_mask,
     distinct_count as cpp_distinct_count,
     drop_nulls as cpp_drop_nulls,
     duplicate_keep_option,
-    unique as cpp_unique,
+    stable_distinct as cpp_stable_distinct,
 )
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.types cimport (
     nan_policy,
     null_equality,
-    null_order,
     null_policy,
-    order,
     size_type,
 )
 from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
@@ -145,41 +142,13 @@ def drop_duplicates(list columns,
         if nulls_are_equal
         else null_equality.UNEQUAL
     )
-
-    cdef vector[order] column_order = (
-        vector[order](
-            cpp_keys.size(),
-            order.ASCENDING
-        )
-    )
-    cdef vector[null_order] null_precedence = (
-        vector[null_order](
-            cpp_keys.size(),
-            null_order.BEFORE
-        )
-    )
-
     cdef table_view source_table_view = table_view_from_columns(columns)
-    cdef table_view keys_view = source_table_view.select(cpp_keys)
-    cdef unique_ptr[table] sorted_source_table
     cdef unique_ptr[table] c_result
 
     with nogil:
-        # cudf::unique keeps unique rows in each consecutive group of
-        # equivalent rows. To match the behavior of pandas.DataFrame.
-        # drop_duplicates, users need to stable sort the input first
-        # and then invoke cudf::unique.
-        sorted_source_table = move(
-            cpp_stable_sort_by_key(
-                source_table_view,
-                keys_view,
-                column_order,
-                null_precedence
-            )
-        )
         c_result = move(
-            cpp_unique(
-                sorted_source_table.get().view(),
+            cpp_stable_distinct(
+                source_table_view,
                 cpp_keys,
                 cpp_keep_option,
                 cpp_nulls_equal
diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index a2e3bc44f3a..1fe30179001 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -1476,7 +1476,12 @@ def __array_function__(self, func, types, args, kwargs):
             if cudf_func is func:
                 return NotImplemented
             else:
-                return cudf_func(*args, **kwargs)
+                result = cudf_func(*args, **kwargs)
+                if fname == "unique":
+                    # NumPy expects a sorted result for `unique`, which is not
+                    # guaranteed by cudf.Index.unique.
+                    result = result.sort_values()
+                return result
 
         else:
             return NotImplemented
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 39332807139..d28851f4ace 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -1042,8 +1042,15 @@ def data_array_view(
     ) -> cuda.devicearray.DeviceNDArray:
         return self.codes.data_array_view(mode=mode)
 
-    def unique(self, preserve_order=False) -> CategoricalColumn:
-        codes = self.as_numerical.unique(preserve_order=preserve_order)
+    def unique(self, preserve_order=True) -> CategoricalColumn:
+        if preserve_order is not True:
+            warnings.warn(
+                "The preserve_order argument is deprecated. It will be "
+                "removed in a future version. As of now, unique always "
+                "preserves order regardless of the argument's value.",
+                FutureWarning,
+            )
+        codes = self.as_numerical.unique()
         return column.build_categorical_column(
             categories=self.categories,
             codes=column.build_column(codes.base_data, dtype=codes.dtype),
@@ -1397,9 +1404,7 @@ def _concat(
         head = next((obj for obj in objs if obj.valid_count), objs[0])
 
         # Combine and de-dupe the categories
-        cats = column.concat_columns([o.categories for o in objs]).unique(
-            preserve_order=True
-        )
+        cats = column.concat_columns([o.categories for o in objs]).unique()
         objs = [o._set_categories(cats, is_unique=True) for o in objs]
         codes = [o.codes for o in objs]
 
@@ -1538,10 +1543,7 @@ def _set_categories(
 
         # Ensure new_categories is unique first
         if not (is_unique or new_cats.is_unique):
-            # drop_duplicates() instead of unique() to preserve order
-            new_cats = cudf.Series(new_cats)._column.unique(
-                preserve_order=True
-            )
+            new_cats = cudf.Series(new_cats)._column.unique()
 
         cur_codes = self.codes
         max_cat_size = (
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 607bf83ff6c..255ac2582af 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1021,17 +1021,16 @@ def as_categorical_column(self, dtype, **kwargs) -> ColumnBase:
                 ordered=dtype.ordered,
             )
 
-        cats = self.unique().astype(self.dtype)
+        # Categories must be unique and sorted in ascending order.
+        cats = self.unique().sort_by_values()[0].astype(self.dtype)
         label_dtype = min_unsigned_type(len(cats))
         labels = self._label_encoding(
             cats=cats, dtype=label_dtype, na_sentinel=cudf.Scalar(1)
         )
-
         # columns include null index in factorization; remove:
         if self.has_nulls():
             cats = cats.dropna(drop_nan=False)
             min_type = min_unsigned_type(len(cats), 8)
-            labels = labels - 1
             if cudf.dtype(min_type).itemsize < labels.dtype.itemsize:
                 labels = labels.astype(min_type)
 
@@ -1132,25 +1131,17 @@ def searchsorted(
             values, side, ascending=ascending, na_position=na_position
         )
 
-    def unique(self, preserve_order=False) -> ColumnBase:
+    def unique(self, preserve_order=True) -> ColumnBase:
         """
         Get unique values in the data
         """
-        # TODO: We could avoid performing `drop_duplicates` for
-        # columns with values that already are unique.
-        # Few things to note before we can do this optimization is
-        # the following issue resolved:
-        # https://github.com/rapidsai/cudf/issues/5286
-        if preserve_order:
-            ind = as_column(cupy.arange(0, len(self)))
-
-            # dedup based on the column of data only
-            ind, col = drop_duplicates([ind, self], keys=[1])
-
-            # sort col based on ind
-            map = ind.argsort()
-            return col.take(map)
-
+        if preserve_order is not True:
+            warnings.warn(
+                "The preserve_order argument is deprecated. It will be "
+                "removed in a future version. As of now, unique always "
+                "preserves order regardless of the argument's value.",
+                FutureWarning,
+            )
         return drop_duplicates([self], keep="first")[0]
 
     def serialize(self) -> Tuple[dict, list]:
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index eb6685861d4..5fc4870105b 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -3165,34 +3165,46 @@ def diff(self, periods=1, axis=0):
 
     @_cudf_nvtx_annotate
     def drop_duplicates(
-        self, subset=None, keep="first", inplace=False, ignore_index=False
+        self,
+        subset=None,
+        keep="first",
+        inplace=False,
+        ignore_index=False,
     ):
         """
-        Return DataFrame with duplicate rows removed, optionally only
-        considering certain subset of columns.
+        Return DataFrame with duplicate rows removed.
+
+        Considering certain columns is optional. Indexes, including time
+        indexes are ignored.
 
         Parameters
         ----------
         subset : column label or sequence of labels, optional
             Only consider certain columns for identifying duplicates, by
             default use all of the columns.
-        keep : {'first', 'last', False}, default 'first'
+        keep : {'first', 'last', ``False``}, default 'first'
             Determines which duplicates (if any) to keep.
-            - ``first`` : Drop duplicates except for the first occurrence.
-            - ``last`` : Drop duplicates except for the last occurrence.
-            - False : Drop all duplicates.
-        inplace : bool, default False
+            - 'first' : Drop duplicates except for the first occurrence.
+            - 'last' : Drop duplicates except for the last occurrence.
+            - ``False`` : Drop all duplicates.
+        inplace : bool, default ``False``
             Whether to drop duplicates in place or to return a copy.
-        ignore_index : bool, default False
-            If True, the resulting axis will be labeled 0, 1, …, n - 1.
+        ignore_index : bool, default ``False``
+            If True, the resulting axis will be labeled 0, 1, ..., n - 1.
 
         Returns
         -------
         DataFrame or None
             DataFrame with duplicates removed or None if ``inplace=True``.
 
+        See Also
+        --------
+        DataFrame.value_counts: Count unique combinations of columns.
+
         Examples
         --------
+        Consider a dataset containing ramen ratings.
+
         >>> import cudf
         >>> df = cudf.DataFrame({
         ...     'brand': ['Yum Yum', 'Yum Yum', 'Indomie', 'Indomie', 'Indomie'],
@@ -3207,36 +3219,34 @@ def drop_duplicates(
         3  Indomie  pack    15.0
         4  Indomie  pack     5.0
 
-        By default, it removes duplicate rows based
-        on all columns. Note that order of
-        the rows being returned is not guaranteed
-        to be sorted.
+        By default, it removes duplicate rows based on all columns.
 
         >>> df.drop_duplicates()
              brand style  rating
+        0  Yum Yum   cup     4.0
         2  Indomie   cup     3.5
-        4  Indomie  pack     5.0
         3  Indomie  pack    15.0
-        0  Yum Yum   cup     4.0
+        4  Indomie  pack     5.0
 
-        To remove duplicates on specific column(s),
-        use `subset`.
+        To remove duplicates on specific column(s), use ``subset``.
 
         >>> df.drop_duplicates(subset=['brand'])
              brand style  rating
-        2  Indomie   cup     3.5
         0  Yum Yum   cup     4.0
+        2  Indomie   cup     3.5
 
-        To remove duplicates and keep last occurrences, use `keep`.
+        To remove duplicates and keep last occurrences, use ``keep``.
 
         >>> df.drop_duplicates(subset=['brand', 'style'], keep='last')
              brand style  rating
+        1  Yum Yum   cup     4.0
         2  Indomie   cup     3.5
         4  Indomie  pack     5.0
-        1  Yum Yum   cup     4.0
         """  # noqa: E501
         outdf = super().drop_duplicates(
-            subset=subset, keep=keep, ignore_index=ignore_index
+            subset=subset,
+            keep=keep,
+            ignore_index=ignore_index,
         )
 
         return self._mimic_inplace(outdf, inplace=inplace)
@@ -7693,7 +7703,7 @@ def _find_common_dtypes_and_categories(non_null_columns, dtypes):
             # Combine and de-dupe the categories
             categories[idx] = cudf.Series(
                 concat_columns([col.categories for col in cols])
-            )._column.unique(preserve_order=True)
+            )._column.unique()
             # Set the column dtype to the codes' dtype. The categories
             # will be re-assigned at the end
             dtypes[idx] = min_scalar_type(len(categories[idx]))
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 245c93ff17e..b7faed1dfc3 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -308,7 +308,7 @@ def dtypes(self):
         2  object  int64
         3  object  int64
         """
-        index = self.grouping.keys.unique().to_pandas()
+        index = self.grouping.keys.unique().sort_values().to_pandas()
         return pd.DataFrame(
             {
                 name: [self.obj._dtypes[name]] * len(index)
@@ -678,7 +678,7 @@ def _head_tail(self, n, *, take_head: bool, preserve_order: bool):
             # subsample the gather map from the full input ordering,
             # rather than permuting the gather map of the output.
             _, (ordering,), _ = self._groupby.groups(
-                [arange(0, self.obj._data.nrows)]
+                [arange(0, len(self.obj))]
             )
             # Invert permutation from original order to groups on the
             # subset of entries we want.
@@ -864,25 +864,27 @@ def ngroup(self, ascending=True):
         5    0
         dtype: int64
         """
-        num_groups = len(index := self.grouping.keys.unique())
+        index = self.grouping.keys.unique().sort_values()
+        num_groups = len(index)
         _, has_null_group = bitmask_or([*index._columns])
 
         if ascending:
-            if has_null_group:
-                group_ids = cudf.Series._from_data(
-                    {None: cp.arange(-1, num_groups - 1)}
-                )
-            else:
-                group_ids = cudf.Series._from_data(
-                    {None: cp.arange(num_groups)}
-                )
+            # Count ascending from 0 to num_groups - 1
+            group_ids = cudf.Series._from_data({None: cp.arange(num_groups)})
+        elif has_null_group:
+            # Count descending from num_groups - 1 to 0, but subtract one more
+            # for the null group making it num_groups - 2 to -1.
+            group_ids = cudf.Series._from_data(
+                {None: cp.arange(num_groups - 2, -2, -1)}
+            )
         else:
+            # Count descending from num_groups - 1 to 0
             group_ids = cudf.Series._from_data(
                 {None: cp.arange(num_groups - 1, -1, -1)}
             )
 
         if has_null_group:
-            group_ids.iloc[0] = cudf.NA
+            group_ids.iloc[-1] = cudf.NA
 
         group_ids._index = index
         return self._broadcast(group_ids)
@@ -1065,7 +1067,7 @@ def _grouped(self):
             column_names=self.obj._column_names,
             index_names=self.obj._index_names,
         )
-        group_names = grouped_keys.unique()
+        group_names = grouped_keys.unique().sort_values()
         return (group_names, offsets, grouped_keys, grouped_values)
 
     def _normalize_aggs(
@@ -2270,11 +2272,29 @@ def _mimic_pandas_order(
         """
         # TODO: copy metadata after this method is a common pattern, should
         # merge in this method.
-        _, order_cols, _ = self._groupby.groups(
-            [arange(0, result._data.nrows)]
-        )
-        gather_map = order_cols[0].argsort()
-        result = result.take(gather_map)
+
+        # This function is used to reorder the results of scan-based
+        # groupbys which have the same output size as input size.
+        # However, if the grouping key has NAs and dropna=True, the
+        # result coming back from libcudf has null_count few rows than
+        # the input, so we must produce an ordering from the full
+        # input range.
+        _, (ordering,), _ = self._groupby.groups([arange(0, len(self.obj))])
+        if self._dropna and any(
+            c.has_nulls(include_nan=True) > 0
+            for c in self.grouping._key_columns
+        ):
+            # Scan aggregations with null/nan keys put nulls in the
+            # corresponding output rows in pandas, to do that here
+            # expand the result by reindexing.
+            ri = cudf.RangeIndex(0, len(self.obj))
+            result.index = cudf.Index(ordering)
+            # This reorders and expands
+            result = result.reindex(ri)
+        else:
+            # Just reorder according to the groupings
+            result = result.take(ordering.argsort())
+        # Now produce the actual index we first thought of
         result.index = self.obj.index
         return result
 
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 519b84faea0..8bdf0938dfb 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -3028,6 +3028,10 @@ def __init__(
         if copy:
             data = column.as_column(data, dtype=dtype).copy()
         kwargs = _setdefault_name(data, name=name)
+
+        if closed is None:
+            closed = "right"
+
         if isinstance(data, IntervalColumn):
             data = data
         elif isinstance(data, pd.Series) and (is_interval_dtype(data.dtype)):
diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py
index 4b784ac7b20..2055ecc96a0 100644
--- a/python/cudf/cudf/core/reshape.py
+++ b/python/cudf/cudf/core/reshape.py
@@ -1138,7 +1138,7 @@ def _get_unique(column, dummy_na):
     if isinstance(column, cudf.core.column.CategoricalColumn):
         unique = column.categories
     else:
-        unique = column.unique()
+        unique = column.unique().sort_by_values()[0]
     if not dummy_na:
         if np.issubdtype(unique.dtype, np.floating):
             unique = unique.nans_to_nulls()
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 0e8481dd820..a99eda6bd0b 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -1151,7 +1151,12 @@ def __array_function__(self, func, types, args, kwargs):
         try:
             # Apply a Series method if one exists.
             if cudf_func := getattr(Series, func.__name__, None):
-                return cudf_func(*args, **kwargs)
+                result = cudf_func(*args, **kwargs)
+                if func.__name__ == "unique":
+                    # NumPy expects a sorted result for `unique`, which is not
+                    # guaranteed by cudf.Series.unique.
+                    result = result.sort_values()
+                return result
 
             # Assume that cupy subpackages match numpy and search the
             # corresponding cupy submodule based on the func's __module__.
@@ -1718,20 +1723,20 @@ def drop_duplicates(self, keep="first", inplace=False, ignore_index=False):
         to be sorted.
 
         >>> s.drop_duplicates()
-        3    beetle
+        0      lama
         1       cow
+        3    beetle
         5     hippo
-        0      lama
         Name: animal, dtype: object
 
         The value 'last' for parameter `keep` keeps the last occurrence
         for each set of duplicated entries.
 
         >>> s.drop_duplicates(keep='last')
-        3    beetle
         1       cow
-        5     hippo
+        3    beetle
         4      lama
+        5     hippo
         Name: animal, dtype: object
 
         The value `False` for parameter `keep` discards all sets
@@ -1740,8 +1745,8 @@ def drop_duplicates(self, keep="first", inplace=False, ignore_index=False):
 
         >>> s.drop_duplicates(keep=False, inplace=True)
         >>> s
-        3    beetle
         1       cow
+        3    beetle
         5     hippo
         Name: animal, dtype: object
         """
@@ -2887,9 +2892,9 @@ def unique(self):
         6       c
         dtype: object
         >>> series.unique()
-        0    <NA>
-        1       a
-        2       b
+        0       a
+        1       b
+        2    <NA>
         3       c
         dtype: object
         """
diff --git a/python/cudf/cudf/tests/indexes/test_interval.py b/python/cudf/cudf/tests/indexes/test_interval.py
index 06777c8e6af..f80f6d8bb72 100644
--- a/python/cudf/cudf/tests/indexes/test_interval.py
+++ b/python/cudf/cudf/tests/indexes/test_interval.py
@@ -1 +1,18 @@
 # Copyright (c) 2023, NVIDIA CORPORATION.
+import pandas as pd
+import pyarrow as pa
+
+import cudf
+from cudf.testing._utils import assert_eq
+
+
+def test_interval_constructor_default_closed():
+    idx = cudf.IntervalIndex([pd.Interval(0, 1)])
+    assert idx.closed == "right"
+    assert idx.dtype.closed == "right"
+
+
+def test_interval_to_arrow():
+    expect = pa.Array.from_pandas(pd.IntervalIndex([pd.Interval(0, 1)]))
+    got = cudf.IntervalIndex([pd.Interval(0, 1)]).to_arrow()
+    assert_eq(expect, got)
diff --git a/python/cudf/cudf/tests/test_array_function.py b/python/cudf/cudf/tests/test_array_function.py
index 65874c94b93..a355ebb40b2 100644
--- a/python/cudf/cudf/tests/test_array_function.py
+++ b/python/cudf/cudf/tests/test_array_function.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+# Copyright (c) 2018-2023, NVIDIA CORPORATION.
 import numpy as np
 import pandas as pd
 import pytest
@@ -94,15 +94,26 @@ def test_array_func_missing_cudf_dataframe(pd_df, func):
         func(cudf_df)
 
 
-# we only implement sum among all numpy non-ufuncs
 @pytest.mark.skipif(missing_arrfunc_cond, reason=missing_arrfunc_reason)
 @pytest.mark.parametrize("np_ar", [np.random.random(100)])
-@pytest.mark.parametrize("func", [lambda x: np.sum(x), lambda x: np.dot(x, x)])
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda x: np.mean(x),
+        lambda x: np.sum(x),
+        lambda x: np.var(x, ddof=1),
+        lambda x: np.unique(x),
+        lambda x: np.dot(x, x),
+    ],
+)
 def test_array_func_cudf_index(np_ar, func):
     cudf_index = cudf.core.index.as_index(cudf.Series(np_ar))
     expect = func(np_ar)
     got = func(cudf_index)
-    assert_eq(expect, got)
+    if np.isscalar(expect):
+        assert_eq(expect, got)
+    else:
+        assert_eq(expect, got.to_numpy())
 
 
 @pytest.mark.skipif(missing_arrfunc_cond, reason=missing_arrfunc_reason)
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index 7651382e2ac..e2af85b9e73 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -2131,6 +2131,35 @@ def test_groupby_rank_fails():
         gdf.groupby(["a"]).rank(method="min", axis=1)
 
 
+@pytest.mark.parametrize(
+    "with_nan", [False, True], ids=["just-NA", "also-NaN"]
+)
+@pytest.mark.parametrize("dropna", [False, True], ids=["keepna", "dropna"])
+@pytest.mark.parametrize(
+    "duplicate_index", [False, True], ids=["rangeindex", "dupindex"]
+)
+def test_groupby_scan_null_keys(with_nan, dropna, duplicate_index):
+    key_col = [None, 1, 2, None, 3, None, 3, 1, None, 1]
+    if with_nan:
+        df = pd.DataFrame(
+            {"key": pd.Series(key_col, dtype="float32"), "value": range(10)}
+        )
+    else:
+        df = pd.DataFrame(
+            {"key": pd.Series(key_col, dtype="Int32"), "value": range(10)}
+        )
+
+    if duplicate_index:
+        # Non-default index with duplicates
+        df.index = [1, 2, 3, 1, 3, 2, 4, 1, 6, 10]
+
+    cdf = cudf.from_pandas(df)
+
+    expect = df.groupby("key", dropna=dropna).cumsum()
+    got = cdf.groupby("key", dropna=dropna).cumsum()
+    assert_eq(expect, got)
+
+
 def test_groupby_mix_agg_scan():
     err_msg = "Cannot perform both aggregation and scan in one operation"
     func = ["cumsum", "sum"]
@@ -3231,3 +3260,19 @@ def test_head_tail(self, df, n, take_head, expected, preserve_order):
         else:
             actual = df.groupby("a").tail(n=n, preserve_order=preserve_order)
         assert_eq(actual, expected)
+
+
+def test_head_tail_empty():
+    # GH #13397
+
+    values = [1, 2, 3]
+    pdf = pd.DataFrame({}, index=values)
+    df = cudf.DataFrame({}, index=values)
+
+    expected = pdf.groupby(pd.Series(values)).head()
+    got = df.groupby(cudf.Series(values)).head()
+    assert_eq(expected, got)
+
+    expected = pdf.groupby(pd.Series(values)).tail()
+    got = df.groupby(cudf.Series(values)).tail()
+    assert_eq(expected, got)
diff --git a/python/cudf/cudf/tests/test_onehot.py b/python/cudf/cudf/tests/test_onehot.py
index e5ca2e028c3..6d5bfde7740 100644
--- a/python/cudf/cudf/tests/test_onehot.py
+++ b/python/cudf/cudf/tests/test_onehot.py
@@ -8,7 +8,7 @@
 
 import cudf
 from cudf import DataFrame
-from cudf.testing import _utils as utils
+from cudf.testing._utils import assert_eq
 
 pytestmark = pytest.mark.spilling
 
@@ -31,14 +31,14 @@ def test_get_dummies(data, index):
     with pytest.warns(FutureWarning):
         encoded_actual = cudf.get_dummies(gdf, prefix="test")
 
-    utils.assert_eq(
+    assert_eq(
         encoded_expected,
         encoded_actual,
         check_dtype=len(data) != 0,
     )
     encoded_actual = cudf.get_dummies(gdf, prefix="test", dtype=np.uint8)
 
-    utils.assert_eq(
+    assert_eq(
         encoded_expected,
         encoded_actual,
         check_dtype=len(data) != 0,
@@ -59,7 +59,7 @@ def test_onehot_get_dummies_multicol(n_cols):
     with pytest.warns(FutureWarning):
         encoded_actual = cudf.get_dummies(gdf, prefix="test")
 
-    utils.assert_eq(encoded_expected, encoded_actual)
+    assert_eq(encoded_expected, encoded_actual)
 
 
 @pytest.mark.parametrize("nan_as_null", [True, False])
@@ -75,7 +75,7 @@ def test_onehost_get_dummies_dummy_na(nan_as_null, dummy_na):
     if dummy_na and nan_as_null:
         got = got.rename(columns={"a_null": "a_nan"})[expected.columns]
 
-    utils.assert_eq(expected, got)
+    assert_eq(expected, got)
 
 
 @pytest.mark.parametrize(
@@ -115,7 +115,7 @@ def test_get_dummies_prefix_sep(prefix, prefix_sep):
             gdf, prefix=prefix, prefix_sep=prefix_sep
         )
 
-    utils.assert_eq(encoded_expected, encoded_actual)
+    assert_eq(encoded_expected, encoded_actual)
 
 
 def test_get_dummies_with_nan():
@@ -124,55 +124,55 @@ def test_get_dummies_with_nan():
     )
     expected = cudf.DataFrame(
         {
-            "a_null": [0, 0, 0, 1],
             "a_1.0": [1, 0, 0, 0],
             "a_2.0": [0, 1, 0, 0],
             "a_nan": [0, 0, 1, 0],
+            "a_null": [0, 0, 0, 1],
         },
         dtype="uint8",
     )
     with pytest.warns(FutureWarning):
         actual = cudf.get_dummies(df, dummy_na=True, columns=["a"])
 
-    utils.assert_eq(expected, actual)
+    assert_eq(expected, actual)
 
 
 @pytest.mark.parametrize(
     "data",
     [
-        cudf.Series(["abc", "l", "a", "abc", "z", "xyz"]),
-        cudf.Index([None, 1, 2, 3.3, None, 0.2]),
-        cudf.Series([0.1, 2, 3, None, np.nan]),
-        cudf.Series([23678, 324, 1, 324], name="abc"),
+        lambda: cudf.Series(["abc", "l", "a", "abc", "z", "xyz"]),
+        lambda: cudf.Index([None, 1, 2, 3.3, None, 0.2]),
+        lambda: cudf.Series([0.1, 2, 3, None, np.nan]),
+        lambda: cudf.Series([23678, 324, 1, 324], name="abc"),
     ],
 )
 @pytest.mark.parametrize("prefix_sep", ["-", "#"])
 @pytest.mark.parametrize("prefix", [None, "hi"])
 @pytest.mark.parametrize("dtype", ["uint8", "int16"])
 def test_get_dummies_array_like(data, prefix_sep, prefix, dtype):
-    actual = cudf.get_dummies(
-        data, prefix=prefix, prefix_sep=prefix_sep, dtype=dtype
-    )
-    if isinstance(data, (cudf.Series, cudf.BaseIndex)):
-        pd_data = data.to_pandas()
-    else:
-        pd_data = data
+    data = data()
+    pd_data = data.to_pandas()
 
     expected = pd.get_dummies(
         pd_data, prefix=prefix, prefix_sep=prefix_sep, dtype=dtype
     )
-    utils.assert_eq(expected, actual)
+
+    actual = cudf.get_dummies(
+        data, prefix=prefix, prefix_sep=prefix_sep, dtype=dtype
+    )
+
+    assert_eq(expected, actual)
 
 
 def test_get_dummies_array_like_with_nan():
     ser = cudf.Series([0.1, 2, 3, None, np.nan], nan_as_null=False)
     expected = cudf.DataFrame(
         {
-            "a_null": [0, 0, 0, 1, 0],
             "a_0.1": [1, 0, 0, 0, 0],
             "a_2.0": [0, 1, 0, 0, 0],
             "a_3.0": [0, 0, 1, 0, 0],
             "a_nan": [0, 0, 0, 0, 1],
+            "a_null": [0, 0, 0, 1, 0],
         },
         dtype="uint8",
     )
@@ -181,4 +181,4 @@ def test_get_dummies_array_like_with_nan():
             ser, dummy_na=True, prefix="a", prefix_sep="_"
         )
 
-    utils.assert_eq(expected, actual)
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
index 816eb6468b0..2bddd93ccb8 100644
--- a/python/cudf/cudf/tests/test_string.py
+++ b/python/cudf/cudf/tests/test_string.py
@@ -1103,8 +1103,7 @@ def test_string_unique(item):
     gs = cudf.Series(item)
     # Pandas `unique` returns a numpy array
     pres = pd.Series(ps.unique())
-    # cudf returns sorted unique with `None` placed before other strings
-    pres = pres.sort_values(na_position="first").reset_index(drop=True)
+    # cudf returns a cudf.Series
     gres = gs.unique()
     assert_eq(pres, gres)
 
diff --git a/python/dask_cudf/dask_cudf/tests/test_groupby.py b/python/dask_cudf/dask_cudf/tests/test_groupby.py
index cfb951901d3..84a821aaf79 100644
--- a/python/dask_cudf/dask_cudf/tests/test_groupby.py
+++ b/python/dask_cudf/dask_cudf/tests/test_groupby.py
@@ -1,7 +1,5 @@
 # Copyright (c) 2021-2023, NVIDIA CORPORATION.
 
-import contextlib
-
 import numpy as np
 import pandas as pd
 import pytest
@@ -78,7 +76,18 @@ def test_groupby_basic(series, aggregation, pdf):
 
 # TODO: explore adding support with `.agg()`
 @pytest.mark.parametrize("series", [True, False])
-@pytest.mark.parametrize("aggregation", ["cumsum", "cumcount"])
+@pytest.mark.parametrize(
+    "aggregation",
+    [
+        "cumsum",
+        pytest.param(
+            "cumcount",
+            marks=pytest.mark.xfail(
+                reason="https://github.com/rapidsai/cudf/issues/13390"
+            ),
+        ),
+    ],
+)
 def test_groupby_cumulative(aggregation, pdf, series):
     gdf = cudf.DataFrame.from_pandas(pdf)
     ddf = dask_cudf.from_cudf(gdf, npartitions=5)
@@ -90,17 +99,10 @@ def test_groupby_cumulative(aggregation, pdf, series):
         gdf_grouped = gdf_grouped.xx
         ddf_grouped = ddf_grouped.xx
 
-    if pdf.isna().sum().any():
-        # https://github.com/rapidsai/cudf/issues/12055
-        gdf_grouped = gdf.groupby("xx")
-        context = pytest.raises(ValueError)
-    else:
-        context = contextlib.nullcontext()
-    with context:
-        a = getattr(gdf_grouped, aggregation)()
-        b = getattr(ddf_grouped, aggregation)()
+    a = getattr(gdf_grouped, aggregation)()
+    b = getattr(ddf_grouped, aggregation)()
 
-        dd.assert_eq(a, b)
+    dd.assert_eq(a, b)
 
 
 @pytest.mark.parametrize("aggregation", OPTIMIZED_AGGS)
@@ -136,7 +138,6 @@ def test_groupby_agg(func, aggregation, pdf):
 
 @pytest.mark.parametrize("split_out", [1, 3])
 def test_groupby_agg_empty_partition(tmpdir, split_out):
-
     # Write random and empty cudf DataFrames
     # to two distinct files.
     df = cudf.datasets.randomdata()
@@ -496,7 +497,6 @@ def test_groupby_mean_sort_false():
 
 
 def test_groupby_reset_index_dtype():
-
     # Make sure int8 dtype is properly preserved
     # Through various cudf/dask_cudf ops
     #