dmlc · trivialfis · Jul 8, 2023 · Jul 4, 2023 · Jul 4, 2023 · Jul 4, 2023
diff --git a/include/xgboost/c_api.h b/include/xgboost/c_api.h
@@ -810,7 +810,7 @@ XGB_DLL int XGDMatrixNumCol(DMatrixHandle handle,
  */
 XGB_DLL int XGDMatrixNumNonMissing(DMatrixHandle handle, bst_ulong *out);
 
-/*!
+/**
  * \brief Get the predictors from DMatrix as CSR matrix for testing.  If this is a
  *        quantized DMatrix, quantized values are returned instead.
  *
@@ -819,8 +819,10 @@ XGB_DLL int XGDMatrixNumNonMissing(DMatrixHandle handle, bst_ulong *out);
  * XGBoost. This is to avoid allocating a huge memory buffer that can not be freed until
  * exiting the thread.
  *
+ * @since 1.7.0
+ *
  * \param handle the handle to the DMatrix
- * \param config Json configuration string. At the moment it should be an empty document,
+ * \param config JSON configuration string. At the moment it should be an empty document,
  *               preserved for future use.
  * \param out_indptr  indptr of output CSR matrix.
  * \param out_indices Column index of output CSR matrix.
@@ -831,6 +833,24 @@ XGB_DLL int XGDMatrixNumNonMissing(DMatrixHandle handle, bst_ulong *out);
 XGB_DLL int XGDMatrixGetDataAsCSR(DMatrixHandle const handle, char const *config,
                                   bst_ulong *out_indptr, unsigned *out_indices, float *out_data);
 
+/**
+ * @brief Export the quantile cuts used for training histogram-based models like `hist` and
+ *        `approx`. Useful for model compression.
+ *
+ * @since 2.0.0
+ *
+ * @param handle the handle to the DMatrix
+ * @param config JSON configuration string. At the moment it should be an empty document,
+ *               preserved for future use.
+ *
+ * @param out_indptr indptr of output CSC matrix represented by a JSON encoded
+ *                   __(cuda_)array_interface__.
+ * @param out_data   Data value of CSC matrix represented by a JSON encoded
+ *                   __(cuda_)array_interface__.
+ */
+XGB_DLL int XGDMatrixGetQuantileCut(DMatrixHandle const handle, char const *config,
+                                     char const **out_indptr, char const **out_data);
+
 /** @} */  // End of DMatrix
 
 /**

diff --git a/include/xgboost/data.h b/include/xgboost/data.h
@@ -282,7 +282,7 @@ struct BatchParam {
   BatchParam(bst_bin_t max_bin, common::Span<float> hessian, bool regenerate)
       : max_bin{max_bin}, hess{hessian}, regen{regenerate} {}
 
-  bool ParamNotEqual(BatchParam const& other) const {
+  [[nodiscard]] bool ParamNotEqual(BatchParam const& other) const {
     // Check non-floating parameters.
     bool cond = max_bin != other.max_bin;
     // Check sparse thresh.
@@ -293,11 +293,11 @@ struct BatchParam {
 
     return cond;
   }
-  bool Initialized() const { return max_bin != 0; }
+  [[nodiscard]] bool Initialized() const { return max_bin != 0; }
   /**
    * \brief Make a copy of self for DMatrix to describe how its existing index was generated.
    */
-  BatchParam MakeCache() const {
+  [[nodiscard]] BatchParam MakeCache() const {
     auto p = *this;
     // These parameters have nothing to do with how the gradient index was generated in the
     // first place.
@@ -319,7 +319,7 @@ struct HostSparsePageView {
             static_cast<Inst::index_type>(size)};
   }
 
-  size_t Size() const { return offset.size() == 0 ? 0 : offset.size() - 1; }
+  [[nodiscard]] size_t Size() const { return offset.size() == 0 ? 0 : offset.size() - 1; }
 };
 
 /*!
@@ -337,7 +337,7 @@ class SparsePage {
   /*! \brief an instance of sparse vector in the batch */
   using Inst = common::Span<Entry const>;
 
-  HostSparsePageView GetView() const {
+  [[nodiscard]] HostSparsePageView GetView() const {
     return {offset.ConstHostSpan(), data.ConstHostSpan()};
   }
 
@@ -353,12 +353,12 @@ class SparsePage {
   virtual ~SparsePage() = default;
 
   /*! \return Number of instances in the page. */
-  inline size_t Size() const {
+  [[nodiscard]] size_t Size() const {
     return offset.Size() == 0 ? 0 : offset.Size() - 1;
   }
 
   /*! \return estimation of memory cost of this page */
-  inline size_t MemCostBytes() const {
+  [[nodiscard]] size_t MemCostBytes() const {
     return offset.Size() * sizeof(size_t) + data.Size() * sizeof(Entry);
   }
 
@@ -376,7 +376,7 @@ class SparsePage {
     base_rowid = row_id;
   }
 
-  SparsePage GetTranspose(int num_columns, int32_t n_threads) const;
+  [[nodiscard]] SparsePage GetTranspose(int num_columns, int32_t n_threads) const;
 
   /**
    * \brief Sort the column index.
@@ -385,7 +385,7 @@ class SparsePage {
   /**
    * \brief Check wether the column index is sorted.
    */
-  bool IsIndicesSorted(int32_t n_threads) const;
+  [[nodiscard]] bool IsIndicesSorted(int32_t n_threads) const;
   /**
    * \brief Reindex the column index with an offset.
    */
@@ -440,49 +440,7 @@ class SortedCSCPage : public SparsePage {
   explicit SortedCSCPage(SparsePage page) : SparsePage(std::move(page)) {}
 };
 
-class EllpackPageImpl;
-/*!
- * \brief A page stored in ELLPACK format.
- *
- * This class uses the PImpl idiom (https://en.cppreference.com/w/cpp/language/pimpl) to avoid
- * including CUDA-specific implementation details in the header.
- */
-class EllpackPage {
- public:
-  /*!
-   * \brief Default constructor.
-   *
-   * This is used in the external memory case. An empty ELLPACK page is constructed with its content
-   * set later by the reader.
-   */
-  EllpackPage();
-
-  /*!
-   * \brief Constructor from an existing DMatrix.
-   *
-   * This is used in the in-memory case. The ELLPACK page is constructed from an existing DMatrix
-   * in CSR format.
-   */
-  explicit EllpackPage(Context const* ctx, DMatrix* dmat, const BatchParam& param);
-
-  /*! \brief Destructor. */
-  ~EllpackPage();
-
-  EllpackPage(EllpackPage&& that);
-
-  /*! \return Number of instances in the page. */
-  size_t Size() const;
-
-  /*! \brief Set the base row id for this page. */
-  void SetBaseRowId(std::size_t row_id);
-
-  const EllpackPageImpl* Impl() const { return impl_.get(); }
-  EllpackPageImpl* Impl() { return impl_.get(); }
-
- private:
-  std::unique_ptr<EllpackPageImpl> impl_;
-};
-
+class EllpackPage;
 class GHistIndexMatrix;
 
 template<typename T>
@@ -492,7 +450,7 @@ class BatchIteratorImpl {
   virtual ~BatchIteratorImpl() = default;
   virtual const T& operator*() const = 0;
   virtual BatchIteratorImpl& operator++() = 0;
-  virtual bool AtEnd() const = 0;
+  [[nodiscard]] virtual bool AtEnd() const = 0;
   virtual std::shared_ptr<T const> Page() const = 0;
 };
 
@@ -519,12 +477,12 @@ class BatchIterator {
     return !impl_->AtEnd();
   }
 
-  bool AtEnd() const {
+  [[nodiscard]] bool AtEnd() const {
     CHECK(impl_ != nullptr);
     return impl_->AtEnd();
   }
 
-  std::shared_ptr<T const> Page() const {
+  [[nodiscard]] std::shared_ptr<T const> Page() const {
     return impl_->Page();
   }
 
@@ -563,15 +521,15 @@ class DMatrix {
     this->Info().SetInfo(ctx, key, StringView{interface_str});
   }
   /*! \brief meta information of the dataset */
-  virtual const MetaInfo& Info() const = 0;
+  [[nodiscard]] virtual const MetaInfo& Info() const = 0;
 
   /*! \brief Get thread local memory for returning data from DMatrix. */
-  XGBAPIThreadLocalEntry& GetThreadLocal() const;
+  [[nodiscard]] XGBAPIThreadLocalEntry& GetThreadLocal() const;
   /**
    * \brief Get the context object of this DMatrix.  The context is created during construction of
    *        DMatrix with user specified `nthread` parameter.
    */
-  virtual Context const* Ctx() const = 0;
+  [[nodiscard]] virtual Context const* Ctx() const = 0;
 
   /**
    * \brief Gets batches. Use range based for loop over BatchSet to access individual batches.
@@ -583,16 +541,16 @@ class DMatrix {
   template <typename T>
   BatchSet<T> GetBatches(Context const* ctx, const BatchParam& param);
   template <typename T>
-  bool PageExists() const;
+  [[nodiscard]] bool PageExists() const;
 
   // the following are column meta data, should be able to answer them fast.
   /*! \return Whether the data columns single column block. */
-  virtual bool SingleColBlock() const = 0;
+  [[nodiscard]] virtual bool SingleColBlock() const = 0;
   /*! \brief virtual destructor */
   virtual ~DMatrix();
 
   /*! \brief Whether the matrix is dense. */
-  bool IsDense() const {
+  [[nodiscard]] bool IsDense() const {
     return Info().num_nonzero_ == Info().num_row_ * Info().num_col_;
   }
 
@@ -695,9 +653,9 @@ class DMatrix {
                                                       BatchParam const& param) = 0;
   virtual BatchSet<ExtSparsePage> GetExtBatches(Context const* ctx, BatchParam const& param) = 0;
 
-  virtual bool EllpackExists() const = 0;
-  virtual bool GHistIndexExists() const = 0;
-  virtual bool SparsePageExists() const = 0;
+  [[nodiscard]] virtual bool EllpackExists() const = 0;
+  [[nodiscard]] virtual bool GHistIndexExists() const = 0;
+  [[nodiscard]] virtual bool SparsePageExists() const = 0;
 };
 
 template <>

diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py
@@ -3,6 +3,7 @@
 """Core XGBoost Library."""
 import copy
 import ctypes
+import importlib.util
 import json
 import os
 import re
@@ -381,6 +382,54 @@ def c_array(
     return (ctype * len(values))(*values)
 
 
+def from_array_interface(interface: dict) -> NumpyOrCupy:
+    """Convert array interface to numpy or cupy array"""
+
+    class Array:  # pylint: disable=too-few-public-methods
+        """Wrapper type for communicating with numpy and cupy."""
+
+        _interface: Optional[dict] = None
+
+        @property
+        def __array_interface__(self) -> Optional[dict]:
+            return self._interface
+
+        @__array_interface__.setter
+        def __array_interface__(self, interface: dict) -> None:
+            self._interface = copy.copy(interface)
+            # converts some fields to tuple as required by numpy
+            self._interface["shape"] = tuple(self._interface["shape"])
+            self._interface["data"] = tuple(self._interface["data"])
+            if self._interface.get("strides", None) is not None:
+                self._interface["strides"] = tuple(self._interface["strides"])
+
+        @property
+        def __cuda_array_interface__(self) -> Optional[dict]:
+            return self.__array_interface__
+
+        @__cuda_array_interface__.setter
+        def __cuda_array_interface__(self, interface: dict) -> None:
+            self.__array_interface__ = interface
+
+    arr = Array()
+
+    if "stream" in interface:
+        # CUDA stream is presented, this is a __cuda_array_interface__.
+        spec = importlib.util.find_spec("cupy")
+        if spec is None:
+            raise ImportError("`cupy` is required for handling CUDA buffer.")
+
+        import cupy as cp  # pylint: disable=import-error
+
+        arr.__cuda_array_interface__ = interface
+        out = cp.array(arr, copy=True)
+    else:
+        arr.__array_interface__ = interface
+        out = np.array(arr, copy=True)
+
+    return out
+
+
 def _prediction_output(
     shape: CNumericPtr, dims: c_bst_ulong, predts: CFloatPtr, is_cuda: bool
 ) -> NumpyOrCupy:
@@ -1060,6 +1109,32 @@ def get_data(self) -> scipy.sparse.csr_matrix:
         )
         return ret
 
+    def get_quantile_cut(self) -> Tuple[np.ndarray, np.ndarray]:
+        """Get quantile cuts for quantization."""
+        n_features = self.num_col()
+
+        c_sindptr = ctypes.c_char_p()
+        c_sdata = ctypes.c_char_p()
+        config = make_jcargs()
+        _check_call(
+            _LIB.XGDMatrixGetQuantileCut(
+                self.handle, config, ctypes.byref(c_sindptr), ctypes.byref(c_sdata)
+            )
+        )
+        assert c_sindptr.value is not None
+        assert c_sdata.value is not None
+
+        i_indptr = json.loads(c_sindptr.value)
+        indptr = from_array_interface(i_indptr)
+        assert indptr.size == n_features + 1
+        assert indptr.dtype == np.uint64
+
+        i_data = json.loads(c_sdata.value)
+        data = from_array_interface(i_data)
+        assert data.size == indptr[-1]
+        assert data.dtype == np.float32
+        return indptr, data
+
     def num_row(self) -> int:
         """Get the number of rows in the DMatrix."""
         ret = c_bst_ulong()

diff --git a/python-package/xgboost/testing/__init__.py b/python-package/xgboost/testing/__init__.py
@@ -265,6 +265,14 @@ def make_batches(
     return X, y, w
 
 
+def make_regression(
+    n_samples: int, n_features: int, use_cupy: bool
+) -> Tuple[ArrayLike, ArrayLike, ArrayLike]:
+    """Make a simple regression dataset."""
+    X, y, w = make_batches(n_samples, n_features, 1, use_cupy)
+    return X[0], y[0], w[0]
+
+
 def make_batches_sparse(
     n_samples_per_batch: int, n_features: int, n_batches: int, sparsity: float
 ) -> Tuple[List[sparse.csr_matrix], List[np.ndarray], List[np.ndarray]]: