Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MT-TREE] Support prediction cache and model slicing. #8968

Merged
merged 14 commits into from
Mar 27, 2023
3 changes: 0 additions & 3 deletions demo/json-model/json_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,6 @@ def __init__(self, model: dict) -> None:

# Load the trees
self.num_trees = int(model_shape["num_trees"])
self.leaf_size = int(model_shape["size_leaf_vector"])
# Right now XGBoost doesn't support vector leaf yet
assert self.leaf_size == 0, str(self.leaf_size)

trees: List[Tree] = []
for i in range(self.num_trees):
Expand Down
23 changes: 2 additions & 21 deletions doc/model.schema
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,7 @@
"type": "object",
"properties": {
"tree_param": {
"type": "object",
"properties": {
"num_nodes": {
"type": "string"
},
"size_leaf_vector": {
"type": "string"
},
"num_feature": {
"type": "string"
}
},
"required": [
"num_nodes",
"num_feature",
"size_leaf_vector"
]
"$ref": "#/definitions/tree_param"
},
"id": {
"type": "integer"
Expand Down Expand Up @@ -170,14 +154,11 @@
},
"num_parallel_tree": {
"type": "string"
},
"size_leaf_vector": {
"type": "string"
}
},
"required": [
"num_trees",
"size_leaf_vector"
"num_parallel_tree"
]
},
"tree_param": {
Expand Down
12 changes: 11 additions & 1 deletion include/xgboost/base.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,18 @@ using bst_row_t = std::size_t; // NOLINT
using bst_node_t = std::int32_t; // NOLINT
/*! \brief Type for ranking group index. */
using bst_group_t = std::uint32_t; // NOLINT
/*! \brief Type for indexing into output targets. */
/**
* \brief Type for indexing into output targets.
*/
using bst_target_t = std::uint32_t; // NOLINT
/**
* brief Type for indexing boosted layers.
*/
using bst_layer_t = std::int32_t; // NOLINT
/**
* \brief Type for indexing trees.
*/
using bst_tree_t = std::int32_t; // NOLINT

namespace detail {
/*! \brief Implementation of gradient statistics pair. Template specialisation
Expand Down
49 changes: 23 additions & 26 deletions include/xgboost/gbm.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,16 +59,16 @@ class GradientBooster : public Model, public Configurable {
* \param fo output stream
*/
virtual void Save(dmlc::Stream* fo) const = 0;
/*!
/**
* \brief Slice a model using boosting index. The slice m:n indicates taking all trees
* that were fit during the boosting rounds m, (m+1), (m+2), ..., (n-1).
* \param layer_begin Beginning of boosted tree layer used for prediction.
* \param layer_end End of booster layer. 0 means do not limit trees.
* \param out Output gradient booster
* \param begin Beginning of boosted tree layer used for prediction.
* \param end End of booster layer. 0 means do not limit trees.
* \param out Output gradient booster
*/
virtual void Slice(int32_t /*layer_begin*/, int32_t /*layer_end*/, int32_t /*step*/,
virtual void Slice(bst_layer_t /*begin*/, bst_layer_t /*end*/, bst_layer_t /*step*/,
GradientBooster* /*out*/, bool* /*out_of_bound*/) const {
LOG(FATAL) << "Slice is not supported by current booster.";
LOG(FATAL) << "Slice is not supported by the current booster.";
}
/*! \brief Return number of boosted rounds.
*/
Expand All @@ -88,34 +88,31 @@ class GradientBooster : public Model, public Configurable {
virtual void DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
PredictionCacheEntry*, ObjFunction const* obj) = 0;

/*!
* \brief generate predictions for given feature matrix
* \param dmat feature matrix
/**
* \brief Generate predictions for given feature matrix
*
* \param dmat The feature matrix.
* \param out_preds output vector to hold the predictions
* \param training Whether the prediction value is used for training. For dart booster
* drop out is performed during training.
* \param layer_begin Beginning of boosted tree layer used for prediction.
* \param layer_end End of booster layer. 0 means do not limit trees.
* \param begin Beginning of boosted tree layer used for prediction.
* \param end End of booster layer. 0 means do not limit trees.
*/
virtual void PredictBatch(DMatrix* dmat,
PredictionCacheEntry* out_preds,
bool training,
unsigned layer_begin,
unsigned layer_end) = 0;
virtual void PredictBatch(DMatrix* dmat, PredictionCacheEntry* out_preds, bool training,
bst_layer_t begin, bst_layer_t end) = 0;

/*!
/**
* \brief Inplace prediction.
*
* \param p_fmat A proxy DMatrix that contains the data and related
* meta info.
* \param missing Missing value in the data.
* \param [in,out] out_preds The output preds.
* \param layer_begin (Optional) Beginning of boosted tree layer used for prediction.
* \param layer_end (Optional) End of booster layer. 0 means do not limit trees.
* \param p_fmat A proxy DMatrix that contains the data and related.
* \param missing Missing value in the data.
* \param [in,out] out_preds The output preds.
* \param begin (Optional) Beginning of boosted tree layer used for prediction.
* \param end (Optional) End of booster layer. 0 means do not limit trees.
*/
virtual void InplacePredict(std::shared_ptr<DMatrix>, float, PredictionCacheEntry*, uint32_t,
uint32_t) const {
LOG(FATAL) << "Inplace predict is not supported by current booster.";
virtual void InplacePredict(std::shared_ptr<DMatrix>, float, PredictionCacheEntry*, bst_layer_t,
bst_layer_t) const {
LOG(FATAL) << "Inplace predict is not supported by the current booster.";
}
/*!
* \brief online prediction function, predict score for one instance at a time
Expand Down
8 changes: 4 additions & 4 deletions include/xgboost/learner.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#define XGBOOST_LEARNER_H_

#include <dmlc/io.h> // for Serializable
#include <xgboost/base.h> // for bst_feature_t, bst_target_t, bst_float, Args, GradientPair
#include <xgboost/base.h> // for bst_feature_t, bst_target_t, bst_float, Args, GradientPair, ..
#include <xgboost/context.h> // for Context
#include <xgboost/linalg.h> // for Tensor, TensorView
#include <xgboost/metric.h> // for Metric
Expand Down Expand Up @@ -229,7 +229,7 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
*/
virtual void GetFeatureTypes(std::vector<std::string>* ft) const = 0;

/*!
/**
* \brief Slice the model.
*
* See InplacePredict for layer parameters.
Expand All @@ -239,8 +239,8 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
*
* \return a sliced model.
*/
virtual Learner *Slice(int32_t begin_layer, int32_t end_layer, int32_t step,
bool *out_of_bound) = 0;
virtual Learner* Slice(bst_layer_t begin, bst_layer_t end, bst_layer_t step,
bool* out_of_bound) = 0;
/*!
* \brief dump the model in the requested format
* \param fmap feature map that may help give interpretations of feature
Expand Down
4 changes: 2 additions & 2 deletions include/xgboost/tree_updater.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ class TreeUpdater : public Configurable {
* the prediction cache. If true, the prediction cache will have been
* updated by the time this function returns.
*/
virtual bool UpdatePredictionCache(const DMatrix * /*data*/,
linalg::VectorView<float> /*out_preds*/) {
virtual bool UpdatePredictionCache(const DMatrix* /*data*/,
linalg::MatrixView<float> /*out_preds*/) {
return false;
}

Expand Down
5 changes: 5 additions & 0 deletions python-package/xgboost/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
Any,
Callable,
Dict,
Generator,
Iterable,
List,
Optional,
Expand Down Expand Up @@ -1755,6 +1756,10 @@ def __getitem__(self, val: Union[int, tuple, slice]) -> "Booster":
sliced.handle = sliced_handle
return sliced

def __iter__(self) -> Generator["Booster", None, None]:
for i in range(0, self.num_boosted_rounds()):
yield self[i]

def save_config(self) -> str:
"""Output internal parameter configuration of Booster as a JSON
string.
Expand Down
2 changes: 1 addition & 1 deletion src/gbm/gblinear.cc
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ class GBLinear : public GradientBooster {
}

void PredictBatch(DMatrix* p_fmat, PredictionCacheEntry* predts, bool /*training*/,
uint32_t layer_begin, uint32_t) override {
bst_layer_t layer_begin, bst_layer_t) override {
monitor_.Start("PredictBatch");
LinearCheckLayer(layer_begin);
auto* out_preds = &predts->predictions;
Expand Down
Loading