Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement fit stump. #8607

Merged
merged 7 commits into from
Jan 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions R-package/src/Makevars.in
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ OBJECTS= \
$(PKGROOT)/src/predictor/cpu_predictor.o \
$(PKGROOT)/src/tree/constraints.o \
$(PKGROOT)/src/tree/param.o \
$(PKGROOT)/src/tree/fit_stump.o \
$(PKGROOT)/src/tree/tree_model.o \
$(PKGROOT)/src/tree/tree_updater.o \
$(PKGROOT)/src/tree/updater_approx.o \
Expand Down Expand Up @@ -85,6 +86,7 @@ OBJECTS= \
$(PKGROOT)/src/common/pseudo_huber.o \
$(PKGROOT)/src/common/quantile.o \
$(PKGROOT)/src/common/random.o \
$(PKGROOT)/src/common/stats.o \
$(PKGROOT)/src/common/survival_util.o \
$(PKGROOT)/src/common/threading_utils.o \
$(PKGROOT)/src/common/timer.o \
Expand Down
2 changes: 2 additions & 0 deletions R-package/src/Makevars.win
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ OBJECTS= \
$(PKGROOT)/src/predictor/cpu_predictor.o \
$(PKGROOT)/src/tree/constraints.o \
$(PKGROOT)/src/tree/param.o \
$(PKGROOT)/src/tree/fit_stump.o \
$(PKGROOT)/src/tree/tree_model.o \
$(PKGROOT)/src/tree/tree_updater.o \
$(PKGROOT)/src/tree/updater_approx.o \
Expand Down Expand Up @@ -85,6 +86,7 @@ OBJECTS= \
$(PKGROOT)/src/common/pseudo_huber.o \
$(PKGROOT)/src/common/quantile.o \
$(PKGROOT)/src/common/random.o \
$(PKGROOT)/src/common/stats.o \
$(PKGROOT)/src/common/survival_util.o \
$(PKGROOT)/src/common/threading_utils.o \
$(PKGROOT)/src/common/timer.o \
Expand Down
2 changes: 2 additions & 0 deletions include/xgboost/base.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ using bst_row_t = std::size_t; // NOLINT
using bst_node_t = int32_t; // NOLINT
/*! \brief Type for ranking group index. */
using bst_group_t = uint32_t; // NOLINT
/*! \brief Type for indexing target variables. */
using bst_target_t = std::size_t; // NOLINT

namespace detail {
/*! \brief Implementation of gradient statistics pair. Template specialisation
Expand Down
26 changes: 24 additions & 2 deletions include/xgboost/linalg.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#include <algorithm>
#include <cassert>
#include <cinttypes> // std::int32_t
#include <limits>
#include <string>
#include <tuple>
Expand Down Expand Up @@ -388,9 +389,9 @@ class TensorView {
* \brief Create a tensor with data, shape and strides. Don't use this constructor if
* stride can be calculated from shape.
*/
template <typename I, int32_t D>
template <typename I, std::int32_t D>
LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], I const (&stride)[D],
int32_t device)
std::int32_t device)
: data_{data}, ptr_{data_.data()}, device_{device} {
static_assert(D == kDim, "Invalid shape & stride.");
detail::UnrollLoop<D>([&](auto i) {
Expand Down Expand Up @@ -833,6 +834,27 @@ class Tensor {
int32_t DeviceIdx() const { return data_.DeviceIdx(); }
};

template <typename T>
using Vector = Tensor<T, 1>;

template <typename T, typename... Index>
auto Constant(Context const *ctx, T v, Index &&...index) {
Tensor<T, sizeof...(Index)> t;
t.SetDevice(ctx->gpu_id);
t.Reshape(index...);
t.Data()->Fill(std::move(v));
return t;
}


/**
* \brief Like `np.zeros`, return a new array of given shape and type, filled with zeros.
*/
template <typename T, typename... Index>
auto Zeros(Context const *ctx, Index &&...index) {
return Constant(ctx, static_cast<T>(0), index...);
}

// Only first axis is supported for now.
template <typename T, int32_t D>
void Stack(Tensor<T, D> *l, Tensor<T, D> const &r) {
Expand Down
2 changes: 1 addition & 1 deletion include/xgboost/objective.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ class ObjFunction : public Configurable {
* \brief Return number of targets for input matrix. Right now XGBoost supports only
* multi-target regression.
*/
virtual uint32_t Targets(MetaInfo const& info) const {
virtual bst_target_t Targets(MetaInfo const& info) const {
if (info.labels.Shape(1) > 1) {
LOG(FATAL) << "multioutput is not supported by current objective function";
}
Expand Down
1 change: 1 addition & 0 deletions src/common/host_device_vector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ void HostDeviceVector<T>::SetDevice(int) const {}
template class HostDeviceVector<bst_float>;
template class HostDeviceVector<double>;
template class HostDeviceVector<GradientPair>;
template class HostDeviceVector<GradientPairPrecise>;
template class HostDeviceVector<int32_t>; // bst_node_t
template class HostDeviceVector<uint8_t>;
template class HostDeviceVector<FeatureType>;
Expand Down
1 change: 1 addition & 0 deletions src/common/host_device_vector.cu
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,7 @@ void HostDeviceVector<T>::Resize(size_t new_size, T v) {
template class HostDeviceVector<bst_float>;
template class HostDeviceVector<double>;
template class HostDeviceVector<GradientPair>;
template class HostDeviceVector<GradientPairPrecise>;
template class HostDeviceVector<int32_t>; // bst_node_t
template class HostDeviceVector<uint8_t>;
template class HostDeviceVector<FeatureType>;
Expand Down
9 changes: 2 additions & 7 deletions src/common/numeric.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@
*/
#include "numeric.h"

#include <numeric> // std::accumulate
#include <type_traits> // std::is_same

#include "threading_utils.h" // MemStackAllocator, ParallelFor, DefaultMaxThreads
#include "xgboost/context.h" // Context
#include "xgboost/host_device_vector.h" // HostDeviceVector

Expand All @@ -15,14 +13,11 @@ namespace common {
double Reduce(Context const* ctx, HostDeviceVector<float> const& values) {
if (ctx->IsCPU()) {
auto const& h_values = values.ConstHostVector();
MemStackAllocator<double, DefaultMaxThreads()> result_tloc(ctx->Threads(), 0);
ParallelFor(h_values.size(), ctx->Threads(),
[&](auto i) { result_tloc[omp_get_thread_num()] += h_values[i]; });
auto result = std::accumulate(result_tloc.cbegin(), result_tloc.cend(), 0.0);
auto result = cpu_impl::Reduce(ctx, h_values.cbegin(), h_values.cend(), 0.0);
static_assert(std::is_same<decltype(result), double>::value, "");
return result;
}
return cuda::Reduce(ctx, values);
return cuda_impl::Reduce(ctx, values);
}
} // namespace common
} // namespace xgboost
12 changes: 5 additions & 7 deletions src/common/numeric.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,22 @@
* Copyright 2022 by XGBoost Contributors
*/
#include <thrust/execution_policy.h>
#include <thrust/functional.h> // thrust:plus

#include "device_helpers.cuh" // dh::Reduce, safe_cuda, dh::XGBCachingDeviceAllocator
#include "device_helpers.cuh" // dh::Reduce, dh::XGBCachingDeviceAllocator
#include "numeric.h"
#include "xgboost/context.h" // Context
#include "xgboost/host_device_vector.h" // HostDeviceVector

namespace xgboost {
namespace common {
namespace cuda {
namespace cuda_impl {
double Reduce(Context const* ctx, HostDeviceVector<float> const& values) {
values.SetDevice(ctx->gpu_id);
auto const d_values = values.ConstDeviceSpan();
dh::XGBCachingDeviceAllocator<char> alloc;
auto res = dh::Reduce(thrust::cuda::par(alloc), d_values.data(),
d_values.data() + d_values.size(), 0.0, thrust::plus<double>{});
return res;
return dh::Reduce(thrust::cuda::par(alloc), dh::tcbegin(d_values), dh::tcend(d_values), 0.0,
thrust::plus<float>{});
}
} // namespace cuda
} // namespace cuda_impl
} // namespace common
} // namespace xgboost
22 changes: 19 additions & 3 deletions src/common/numeric.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,17 +95,33 @@ void PartialSum(int32_t n_threads, InIt begin, InIt end, T init, OutIt out_it) {
exc.Rethrow();
}

namespace cuda {
namespace cuda_impl {
double Reduce(Context const* ctx, HostDeviceVector<float> const& values);
#if !defined(XGBOOST_USE_CUDA)
inline double Reduce(Context const*, HostDeviceVector<float> const&) {
AssertGPUSupport();
return 0;
}
#endif // !defined(XGBOOST_USE_CUDA)
} // namespace cuda
} // namespace cuda_impl

/**
* \brief Reduction with iterator. init must be additive identity. (0 for primitive types)
*/
namespace cpu_impl {
template <typename It, typename V = typename It::value_type>
V Reduce(Context const* ctx, It first, It second, V const& init) {
size_t n = std::distance(first, second);
common::MemStackAllocator<V, common::DefaultMaxThreads()> result_tloc(ctx->Threads(), init);
common::ParallelFor(n, ctx->Threads(),
[&](auto i) { result_tloc[omp_get_thread_num()] += first[i]; });
auto result = std::accumulate(result_tloc.cbegin(), result_tloc.cbegin() + ctx->Threads(), init);
return result;
}
} // namespace cpu_impl

/**
* \brief Reduction with summation.
* \brief Reduction on host device vector.
*/
double Reduce(Context const* ctx, HostDeviceVector<float> const& values);

Expand Down
2 changes: 1 addition & 1 deletion src/common/quantile.cu
Original file line number Diff line number Diff line change
Expand Up @@ -641,7 +641,7 @@ void SketchContainer::MakeCuts(HistogramCuts* p_cuts) {
thrust::equal_to<bst_feature_t>{},
[] __device__(auto l, auto r) { return l.value > r.value ? l : r; });
dh::CopyDeviceSpanToVector(&max_values, dh::ToSpan(d_max_values));
auto max_it = common::MakeIndexTransformIter([&](auto i) {
auto max_it = MakeIndexTransformIter([&](auto i) {
if (IsCat(h_feature_types, i)) {
return max_values[i].value;
}
Expand Down
64 changes: 64 additions & 0 deletions src/common/stats.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*!
* Copyright 2022 by XGBoost Contributors
*/
#include "stats.h"

#include <numeric> // std::accumulate

#include "common.h" // OptionalWeights
#include "threading_utils.h" // ParallelFor, MemStackAllocator
#include "transform_iterator.h" // MakeIndexTransformIter
#include "xgboost/context.h" // Context
#include "xgboost/host_device_vector.h" // HostDeviceVector
#include "xgboost/linalg.h" // Tensor, UnravelIndex, Apply
#include "xgboost/logging.h" // CHECK_EQ

namespace xgboost {
namespace common {
float Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
HostDeviceVector<float> const& weights) {
CHECK_LE(t.Shape(1), 1) << "Matrix is not yet supported.";
if (!ctx->IsCPU()) {
weights.SetDevice(ctx->gpu_id);
auto opt_weights = OptionalWeights(weights.ConstDeviceSpan());
auto t_v = t.View(ctx->gpu_id);
return cuda_impl::Median(ctx, t_v, opt_weights);
}

auto opt_weights = OptionalWeights(weights.ConstHostSpan());
auto t_v = t.HostView();
auto iter = common::MakeIndexTransformIter(
[&](size_t i) { return linalg::detail::Apply(t_v, linalg::UnravelIndex(i, t_v.Shape())); });
float q{0};
if (opt_weights.Empty()) {
q = common::Quantile(0.5, iter, iter + t_v.Size());
} else {
CHECK_NE(t_v.Shape(1), 0);
auto w_it = common::MakeIndexTransformIter([&](size_t i) {
auto sample_idx = i / t_v.Shape(1);
return opt_weights[sample_idx];
});
q = common::WeightedQuantile(0.5, iter, iter + t_v.Size(), w_it);
}
return q;
}

void Mean(Context const* ctx, linalg::Vector<float> const& v, linalg::Vector<float>* out) {
v.SetDevice(ctx->gpu_id);
out->SetDevice(ctx->gpu_id);
out->Reshape(1);

if (ctx->IsCPU()) {
auto h_v = v.HostView();
float n = v.Size();
MemStackAllocator<float, DefaultMaxThreads()> tloc(ctx->Threads(), 0.0f);
ParallelFor(v.Size(), ctx->Threads(),
[&](auto i) { tloc[omp_get_thread_num()] += h_v(i) / n; });
auto ret = std::accumulate(tloc.cbegin(), tloc.cend(), .0f);
out->HostView()(0) = ret;
} else {
cuda_impl::Mean(ctx, v.View(ctx->gpu_id), out->View(ctx->gpu_id));
}
}
} // namespace common
} // namespace xgboost
15 changes: 13 additions & 2 deletions src/common/stats.cu
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

namespace xgboost {
namespace common {
namespace cuda {
namespace cuda_impl {
float Median(Context const* ctx, linalg::TensorView<float const, 2> t,
common::OptionalWeights weights) {
HostDeviceVector<size_t> segments{0, t.Size()};
Expand Down Expand Up @@ -42,6 +42,17 @@ float Median(Context const* ctx, linalg::TensorView<float const, 2> t,
CHECK_EQ(quantile.Size(), 1);
return quantile.HostVector().front();
}
} // namespace cuda

void Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::VectorView<float> out) {
float n = v.Size();
auto it = dh::MakeTransformIterator<float>(
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) { return v(i) / n; });
std::size_t bytes;
CHECK_EQ(out.Size(), 1);
cub::DeviceReduce::Sum(nullptr, bytes, it, out.Values().data(), v.Size());
dh::TemporaryArray<char> temp{bytes};
cub::DeviceReduce::Sum(temp.data().get(), bytes, it, out.Values().data(), v.Size());
}
} // namespace cuda_impl
} // namespace common
} // namespace xgboost
47 changes: 15 additions & 32 deletions src/common/stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
#include <limits>
#include <vector>

#include "common.h" // AssertGPUSupport
#include "common.h" // AssertGPUSupport, OptionalWeights
#include "transform_iterator.h" // MakeIndexTransformIter
#include "xgboost/context.h" // Context
#include "xgboost/linalg.h"
#include "xgboost/logging.h" // CHECK_GE

namespace xgboost {
namespace common {
Expand Down Expand Up @@ -93,43 +94,25 @@ float WeightedQuantile(double alpha, Iter begin, Iter end, WeightIter weights) {
return val(idx);
}

namespace cuda {
float Median(Context const* ctx, linalg::TensorView<float const, 2> t,
common::OptionalWeights weights);
namespace cuda_impl {
float Median(Context const* ctx, linalg::TensorView<float const, 2> t, OptionalWeights weights);
void Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::VectorView<float> out);

#if !defined(XGBOOST_USE_CUDA)
inline float Median(Context const*, linalg::TensorView<float const, 2>, common::OptionalWeights) {
AssertGPUSupport();
inline float Median(Context const*, linalg::TensorView<float const, 2>, OptionalWeights) {
common::AssertGPUSupport();
return 0;
}
inline void Mean(Context const*, linalg::VectorView<float const>, linalg::VectorView<float>) {
common::AssertGPUSupport();
}
#endif // !defined(XGBOOST_USE_CUDA)
} // namespace cuda
} // namespace cuda_impl

inline float Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
HostDeviceVector<float> const& weights) {
if (!ctx->IsCPU()) {
weights.SetDevice(ctx->gpu_id);
auto opt_weights = OptionalWeights(weights.ConstDeviceSpan());
auto t_v = t.View(ctx->gpu_id);
return cuda::Median(ctx, t_v, opt_weights);
}
float Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
HostDeviceVector<float> const& weights);

auto opt_weights = OptionalWeights(weights.ConstHostSpan());
auto t_v = t.HostView();
auto iter = common::MakeIndexTransformIter(
[&](size_t i) { return linalg::detail::Apply(t_v, linalg::UnravelIndex(i, t_v.Shape())); });
float q{0};
if (opt_weights.Empty()) {
q = common::Quantile(0.5, iter, iter + t_v.Size());
} else {
CHECK_NE(t_v.Shape(1), 0);
auto w_it = common::MakeIndexTransformIter([&](size_t i) {
auto sample_idx = i / t_v.Shape(1);
return opt_weights[sample_idx];
});
q = common::WeightedQuantile(0.5, iter, iter + t_v.Size(), w_it);
}
return q;
}
void Mean(Context const* ctx, linalg::Vector<float> const& v, linalg::Vector<float>* out);
} // namespace common
} // namespace xgboost
#endif // XGBOOST_COMMON_STATS_H_
1 change: 1 addition & 0 deletions src/objective/adaptive.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "../common/numeric.h"
#include "../common/stats.h"
#include "../common/threading_utils.h"
#include "../common/transform_iterator.h" // MakeIndexTransformIter
#include "xgboost/tree_model.h"

namespace xgboost {
Expand Down
Loading