rapidsai · rapids-bot · Sep 12, 2022 · Aug 26, 2021 · Aug 27, 2021 · Jan 12, 2022
diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
@@ -113,7 +113,8 @@ outputs:
         - test -f $PREFIX/include/cudf/detail/transpose.hpp
         - test -f $PREFIX/include/cudf/detail/unary.hpp
         - test -f $PREFIX/include/cudf/detail/utilities/alignment.hpp
-        - test -f $PREFIX/include/cudf/detail/utilities/column.hpp
+        - test -f $PREFIX/include/cudf/detail/utilities/dremel.hpp
+        - test -f $PREFIX/include/cudf/detail/utilities/linked_column.hpp
         - test -f $PREFIX/include/cudf/detail/utilities/int_fastdiv.h
         - test -f $PREFIX/include/cudf/detail/utilities/integer_utils.hpp
         - test -f $PREFIX/include/cudf/detail/utilities/vector_factories.hpp

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -237,6 +237,7 @@ add_library(
   src/column/column_factories.cpp
   src/column/column_factories.cu
   src/column/column_view.cpp
+  src/column/dremel.cu
   src/copying/concatenate.cu
   src/copying/contiguous_split.cu
   src/copying/copy.cpp

@@ -165,7 +165,7 @@ ConfigureNVBench(SEARCH_NVBENCH search/contains.cpp)
 # ##################################################################################################
 # * sort benchmark --------------------------------------------------------------------------------
 ConfigureBench(SORT_BENCH sort/rank.cpp sort/sort.cpp sort/sort_strings.cpp)
-ConfigureNVBench(SORT_NVBENCH sort/sort_structs.cpp)
+ConfigureNVBench(SORT_NVBENCH sort/sort_lists.cpp sort/sort_structs.cpp)
 
 # ##################################################################################################
 # * quantiles benchmark

@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/rmm_pool_raii.hpp>
+
+#include <cudf/detail/sorting.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+void nvbench_sort_lists(nvbench::state& state)
+{
+  cudf::rmm_pool_raii pool_raii;
+
+  const size_t size_bytes(state.get_int64("size_bytes"));
+  const cudf::size_type depth{static_cast<cudf::size_type>(state.get_int64("depth"))};
+  const double null_frequency{state.get_float64("null_frequency")};
+
+  data_profile table_profile;
+  table_profile.set_distribution_params(cudf::type_id::LIST, distribution_id::UNIFORM, 0, 5);
+  table_profile.set_list_depth(depth);
+  table_profile.set_null_frequency(null_frequency);
+  auto const table =
+    create_random_table({cudf::type_id::LIST}, table_size_bytes{size_bytes}, table_profile);
+
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    rmm::cuda_stream_view stream_view{launch.get_stream()};
+    cudf::detail::sorted_order(*table, {}, {}, stream_view, rmm::mr::get_current_device_resource());
+  });
+}
+
+NVBENCH_BENCH(nvbench_sort_lists)
+  .set_name("sort_list")
+  .add_int64_power_of_two_axis("size_bytes", {10, 18, 24, 28})
+  .add_int64_axis("depth", {1, 4})
+  .add_float64_axis("null_frequency", {0, 0.2});
diff --git a/cpp/include/cudf/detail/utilities/dremel.hpp b/cpp/include/cudf/detail/utilities/dremel.hpp
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/column/column.hpp>
+
+#include <rmm/device_uvector.hpp>
+
+namespace cudf::detail {
+
+/**
+ * @brief Device view for `dremel_data`.
+ *
+ * @see the `dremel_data` struct for more info.
+ */
+struct dremel_device_view {
+  size_type* offsets;
+  uint8_t* rep_levels;
+  uint8_t* def_levels;
+  size_type leaf_data_size;
+  uint8_t max_def_level;
+};
+
+/**
+ * @brief Dremel data that describes one nested type column
+ *
+ * @see get_dremel_data() for more info.
+ */
+struct dremel_data {
+  rmm::device_uvector<size_type> dremel_offsets;
+  rmm::device_uvector<uint8_t> rep_level;
+  rmm::device_uvector<uint8_t> def_level;
+
+  size_type leaf_data_size;
+  uint8_t max_def_level;
+
+  operator dremel_device_view()
+  {
+    return dremel_device_view{
+      dremel_offsets.data(), rep_level.data(), def_level.data(), leaf_data_size, max_def_level};
+  }
+};
+
+/**
+ * @brief Get the dremel offsets and repetition and definition levels for a LIST column
+ *
+ * Dremel is a query system created by Google for ad hoc data analysis. The Dremel engine is
+ * described in depth in the paper "Dremel: Interactive Analysis of Web-Scale
+ * Datasets" (https://research.google/pubs/pub36632/). One of the key components of Dremel
+ * is an encoding that converts record-like data into a columnar store for efficient memory
+ * accesses. The Parquet file format uses Dremel encoding to handle nested data, so libcudf
+ * requires some facilities for working with this encoding. Furthermore, libcudf leverages
+ * Dremel encoding as a means for performing lexicographic comparisons of nested columns.
+ *
+ * Dremel encoding is built around two concepts, the repetition and definition levels.
+ * Since describing them thoroughly is out of scope for this docstring, here are a couple of
+ * blogs that provide useful background:
+ * http://www.goldsborough.me/distributed-systems/2019/05/18/21-09-00-a_look_at_dremel/
+ * https://akshays-blog.medium.com/wrapping-head-around-repetition-and-definition-levels-in-dremel-powering-bigquery-c1a33c9695da
+ *
+ * The remainder of this documentation assumes familiarity with the Dremel concepts.
+ *
+ * Dremel offsets are the per row offsets into the repetition and definition level arrays for a
+ * column.
+ * Example:
+ * ```
+ * col            = {{1, 2, 3}, { }, {5, 6}}
+ * dremel_offsets = { 0,         3,   4,  6}
+ * rep_level      = { 0, 1, 1,   0,   0, 1}
+ * def_level      = { 1, 1, 1,   0,   1, 1}
+ * ```
+ *
+ * The repetition and definition level values are ideally computed using a recursive call over a
+ * nested structure but in order to better utilize GPU resources, this function calculates them
+ * with a bottom up merge method.
+ *
+ * Given a LIST column of type `List<List<int>>` like so:
+ * ```
+ * col = {
+ *    [],
+ *    [[], [1, 2, 3], [4, 5]],
+ *    [[]]
+ * }
+ * ```
+ * We can represent it in cudf format with two level of offsets like this:
+ * ```
+ * Level 0 offsets = {0, 0, 3, 5, 6}
+ * Level 1 offsets = {0, 0, 3, 5, 5}
+ * Values          = {1, 2, 3, 4, 5}
- * Level 0 offsets = {0, 0, 3, 5, 6}
- * Level 1 offsets = {0, 0, 3, 5, 5}
- * Values          = {1, 2, 3, 4, 5}
+ * Level 0 offsets = {0, 0, 3, 4}
+ * Level 1 offsets = {0, 0, 3, 5, 5}
+ * Values          = {1, 2, 3, 4, 5}
- * Level 0 offsets = {0, 0, 3, 5, 6}
- * Level 1 offsets = {0, 0, 3, 5, 5}
- * Values          = {1, 2, 3, 4, 5}
+ * Level 0 offsets = {0, 0, 3, 4}
+ * Level 1 offsets = {0, 0, 3, 5, 5}
+ * Values          = {1, 2, 3, 4, 5}
+ * ```
+ * The desired result of this function is the repetition and definition level values that
+ * correspond to the data values:
+ * ```
+ * col = {[], [[], [1, 2, 3], [4, 5]], [[]]}
+ * def = { 0    1,  2, 2, 2,   2, 2,     1 }
+ * rep = { 0,   0,  0, 2, 2,   1, 2,     0 }
+ * ```
+ *
+ * Since repetition and definition levels arrays contain a value for each empty list, the size of
+ * the rep/def level array can be given by
+ * ```
+ * rep_level.size() = size of leaf column + number of empty lists in level 0
+ *                                        + number of empty lists in level 1 ...
+ * ```
+ *
+ * We start with finding the empty lists in the penultimate level and merging it with the indices
+ * of the leaf level. The values for the merge are the definition and repetition levels
+ * ```
+ * empties at level 1 = {0, 5}
+ * def values at 1    = {1, 1}
+ * rep values at 1    = {1, 1}
+ * indices at leaf    = {0, 1, 2, 3, 4}
+ * def values at leaf = {2, 2, 2, 2, 2}
+ * rep values at leaf = {2, 2, 2, 2, 2}
+ * ```
+ *
+ * merged def values  = {1, 2, 2, 2, 2, 2, 1}
+ * merged rep values  = {1, 2, 2, 2, 2, 2, 1}
- * We start with finding the empty lists in the penultimate level and merging it with the indices
- * of the leaf level. The values for the merge are the definition and repetition levels
- * ```
- * empties at level 1 = {0, 5}
- * def values at 1    = {1, 1}
- * rep values at 1    = {1, 1}
- * indices at leaf    = {0, 1, 2, 3, 4}
- * def values at leaf = {2, 2, 2, 2, 2}
- * rep values at leaf = {2, 2, 2, 2, 2}
- * ```
- *
- * merged def values  = {1, 2, 2, 2, 2, 2, 1}
- * merged rep values  = {1, 2, 2, 2, 2, 2, 1}
+ * We start with finding the empty lists in the penultimate level and merging it with the indices
+ * of the leaf level. The values for the merge are the definition and repetition levels:
+ * ```
+ * empty lists at level 1 = {0, 5}
+ * definition values at level 1 = {1, 1}
+ * repetition values at level 1 = {1, 1}
+ * indices at leaf = {0, 1, 2, 3, 4}
+ * definition values at leaf = {2, 2, 2, 2, 2}
+ * repetition values at leaf = {2, 2, 2, 2, 2}
+ *
+ * merged def values  = {1, 2, 2, 2, 2, 2, 1}
+ * merged rep values  = {1, 2, 2, 2, 2, 2, 1}
+ * ```
- * We start with finding the empty lists in the penultimate level and merging it with the indices
- * of the leaf level. The values for the merge are the definition and repetition levels
- * ```
- * empties at level 1 = {0, 5}
- * def values at 1    = {1, 1}
- * rep values at 1    = {1, 1}
- * indices at leaf    = {0, 1, 2, 3, 4}
- * def values at leaf = {2, 2, 2, 2, 2}
- * rep values at leaf = {2, 2, 2, 2, 2}
- * ```
- *
- * merged def values  = {1, 2, 2, 2, 2, 2, 1}
- * merged rep values  = {1, 2, 2, 2, 2, 2, 1}
+ * We start with finding the empty lists in the penultimate level and merging it with the indices
+ * of the leaf level. The values for the merge are the definition and repetition levels:
+ * ```
+ * empty lists at level 1 = {0, 5}
+ * definition values at level 1 = {1, 1}
+ * repetition values at level 1 = {1, 1}
+ * indices at leaf = {0, 1, 2, 3, 4}
+ * definition values at leaf = {2, 2, 2, 2, 2}
+ * repetition values at leaf = {2, 2, 2, 2, 2}
+ *
+ * merged def values  = {1, 2, 2, 2, 2, 2, 1}
+ * merged rep values  = {1, 2, 2, 2, 2, 2, 1}
+ * ```
+ *
+ * The size of the rep/def values is now larger than the leaf values and the offsets need to be
+ * adjusted in order to point to the correct start indices. We do this with an exclusive scan over
+ * the indices of offsets of empty lists and adding to existing offsets.
+ * ```
+ * Level 1 new offsets = {0, 1, 4, 6, 7}
+ * ```
+ * Repetition values at the beginning of a list need to be decremented. We use the new offsets to
+ * scatter the rep value.
+ * ```
+ * merged rep values  = {1, 2, 2, 2, 2, 2, 1}
+ * scatter (1, new offsets)
+ * new offsets        = {0, 1,       4,    6, 7}
+ * new rep values     = {1, 1, 2, 2, 1, 2, 1}
+ * ```
+ *
+ * Similarly we merge up all the way till level 0 offsets
+ *
+ * STRUCT COLUMNS :
+ * In case of struct columns, we don't have to merge struct levels with their children because a
+ * struct is the same size as its children. e.g. for a column `struct<int, float>`, if the row `i`
+ * is null, then the children columns `int` and `float` are also null at `i`. They also have the
+ * null entry represented in their respective null masks. So for any case of strictly struct based
+ * nesting, we can get the definition levels merely by iterating over the nesting for the same row.
+ *
+ * In case struct and lists are intermixed, the definition levels of all the contiguous struct
+ * levels can be constructed using the aforementioned iterative method. Only when we reach a list
+ * level, we need to do a merge with the subsequent level.
+ *
+ * So, for a column like `struct<list<int>>`, we are going to merge between the levels `struct<list`
+ * and `int`.
+ * For a column like `list<struct<int>>`, we are going to merge between `list` and `struct<int>`.
+ *
+ * In general, one nesting level is the list level and any struct level that precedes it.
+ *
+ * A few more examples to visualize the partitioning of column hierarchy into nesting levels:
+ * (L is list, S is struct, i is integer(leaf data level), angle brackets omitted)
+ * ```
+ * 1. LSi     = L   Si
+ *              - | --
+ *
+ * 2. LLSi    = L   L   Si
+ *              - | - | --
+ *
+ * 3. SSLi    = SSL   i
+ *              --- | -
+ *
+ * 4. LLSLSSi = L   L   SL   SSi
+ *              - | - | -- | ---
+ * ```
+ *
+ * @param col Column of LIST type
+ * @param level_nullability Pre-determined nullability at each list level. Empty means infer from
+ * `col`
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ *
+ * @return A struct containing dremel data
+ */
+dremel_data get_dremel_data(column_view h_col,
+                            std::vector<uint8_t> nullability,
+                            rmm::cuda_stream_view stream);
+
+}  // namespace cudf::detail
diff --git a/cpp/include/cudf/detail/utilities/column.hpp → ...e/cudf/detail/utilities/linked_column.hpp b/cpp/include/cudf/detail/utilities/column.hpp → ...e/cudf/detail/utilities/linked_column.hpp