Skip to content

Commit

Permalink
Release 2.0.0 oneapi (#5)
Browse files Browse the repository at this point in the history
* initial, not ready for work

* fixes for obj functions

* fix some compilation problems

* fix some errors

* fixes

* improve context

* plugin compiled and somtimes works

* fix the errors. tests passed

* fix compilation error wo oneapi

* black

* README update

---------

Co-authored-by: Dmitry Razdoburdin <>
  • Loading branch information
razdoburdin authored Oct 10, 2023
1 parent 66ee89d commit 23b9c61
Show file tree
Hide file tree
Showing 41 changed files with 5,233 additions and 417 deletions.
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,10 @@ if (USE_CUDA)
find_package(CUDAToolkit REQUIRED)
endif (USE_CUDA)

if (PLUGIN_UPDATER_ONEAPI)
target_compile_definitions(xgboost PRIVATE -DXGBOOST_USE_ONEAPI=1)
endif (PLUGIN_UPDATER_ONEAPI)

if (FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR
(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")))
Expand Down
97 changes: 92 additions & 5 deletions include/xgboost/context.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,29 @@ struct CUDAContext;
struct DeviceSym {
static auto constexpr CPU() { return "cpu"; }
static auto constexpr CUDA() { return "cuda"; }
static auto constexpr SYCL_default() { return "sycl"; }
static auto constexpr SYCL_CPU() { return "sycl:cpu"; }
static auto constexpr SYCL_GPU() { return "sycl:gpu"; }
};

/**
* @brief A type for device ordinal. The type is packed into 32-bit for efficient use in
* viewing types like `linalg::TensorView`.
*/
constexpr static bst_d_ordinal_t kDefaultOrdinal = -1;
struct DeviceOrd {
enum Type : std::int16_t { kCPU = 0, kCUDA = 1 } device{kCPU};
// CUDA device ordinal.
bst_d_ordinal_t ordinal{-1};
enum Type : std::int16_t { kCPU = 0, kCUDA = 1, kSyclDefault = 2, kSyclCPU = 3, kSyclGPU = 4} device{kCPU};
// CUDA or Sycl device ordinal.
bst_d_ordinal_t ordinal{kDefaultOrdinal};

[[nodiscard]] bool IsCUDA() const { return device == kCUDA; }
[[nodiscard]] bool IsCPU() const { return device == kCPU; }
[[nodiscard]] bool IsSyclDefault() const { return device == kSyclDefault; }
[[nodiscard]] bool IsSyclCPU() const { return device == kSyclCPU; }
[[nodiscard]] bool IsSyclGPU() const { return device == kSyclGPU; }
[[nodiscard]] bool IsSycl() const { return (IsSyclDefault() ||
IsSyclCPU() ||
IsSyclGPU()); }

DeviceOrd() = default;
constexpr DeviceOrd(Type type, bst_d_ordinal_t ord) : device{type}, ordinal{ord} {}
Expand All @@ -47,14 +57,35 @@ struct DeviceOrd {
/**
* @brief Constructor for CPU.
*/
[[nodiscard]] constexpr static auto CPU() { return DeviceOrd{kCPU, -1}; }
[[nodiscard]] constexpr static auto CPU() { return DeviceOrd{kCPU, kDefaultOrdinal}; }
/**
* @brief Constructor for CUDA device.
*
* @param ordinal CUDA device ordinal.
*/
[[nodiscard]] static auto CUDA(bst_d_ordinal_t ordinal) { return DeviceOrd{kCUDA, ordinal}; }

/**
* @brief Constructor for SYCL.
*
* @param ordinal SYCL device ordinal.
*/
[[nodiscard]] constexpr static auto SYCL_default(bst_d_ordinal_t ordinal = kDefaultOrdinal) { return DeviceOrd{kSyclDefault, ordinal}; }

/**
* @brief Constructor for SYCL CPU.
*
* @param ordinal SYCL CPU device ordinal.
*/
[[nodiscard]] constexpr static auto SYCL_CPU(bst_d_ordinal_t ordinal = kDefaultOrdinal) { return DeviceOrd{kSyclCPU, ordinal}; }

/**
* @brief Constructor for SYCL GPU.
*
* @param ordinal SYCL GPU device ordinal.
*/
[[nodiscard]] constexpr static auto SYCL_GPU(bst_d_ordinal_t ordinal = kDefaultOrdinal) { return DeviceOrd{kSyclGPU, ordinal}; }

[[nodiscard]] bool operator==(DeviceOrd const& that) const {
return device == that.device && ordinal == that.ordinal;
}
Expand All @@ -68,6 +99,12 @@ struct DeviceOrd {
return DeviceSym::CPU();
case DeviceOrd::kCUDA:
return DeviceSym::CUDA() + (':' + std::to_string(ordinal));
case DeviceOrd::kSyclDefault:
return DeviceSym::SYCL_default() + (':' + std::to_string(ordinal));
case DeviceOrd::kSyclCPU:
return DeviceSym::SYCL_CPU() + (':' + std::to_string(ordinal));
case DeviceOrd::kSyclGPU:
return DeviceSym::SYCL_GPU() + (':' + std::to_string(ordinal));
default: {
LOG(FATAL) << "Unknown device.";
return "";
Expand Down Expand Up @@ -135,6 +172,25 @@ struct Context : public XGBoostParameter<Context> {
* @brief Is XGBoost running on a CUDA device?
*/
[[nodiscard]] bool IsCUDA() const { return Device().IsCUDA(); }
/**
* @brief Is XGBoost running on the default SYCL device?
*/
[[nodiscard]] bool IsSyclDefault() const { return Device().IsSyclDefault(); }
/**
* @brief Is XGBoost running on a SYCL CPU?
*/
[[nodiscard]] bool IsSyclCPU() const { return Device().IsSyclCPU(); }
/**
* @brief Is XGBoost running on a SYCL GPU?
*/
[[nodiscard]] bool IsSyclGPU() const { return Device().IsSyclGPU(); }
/**
* @brief Is XGBoost running on any SYCL device?
*/
[[nodiscard]] bool IsSycl() const { return IsSyclDefault()
|| IsSyclCPU()
|| IsSyclGPU(); }

/**
* @brief Get the current device and ordinal.
*/
Expand Down Expand Up @@ -171,6 +227,29 @@ struct Context : public XGBoostParameter<Context> {
/**
* @brief Call function based on the current device.
*/
template <typename CPUFn, typename CUDAFn, typename SYCLFn>
decltype(auto) DispatchDevice(CPUFn&& cpu_fn, CUDAFn&& cuda_fn, SYCLFn&& sycl_fn) const {
static_assert(std::is_same_v<std::invoke_result_t<CPUFn>, std::invoke_result_t<CUDAFn>>);
switch (this->Device().device) {
case DeviceOrd::kCPU:
return cpu_fn();
case DeviceOrd::kCUDA:
return cuda_fn();
case DeviceOrd::kSyclDefault:
return sycl_fn();
case DeviceOrd::kSyclCPU:
return sycl_fn();
case DeviceOrd::kSyclGPU:
return sycl_fn();
default:
// Do not use the device name as this is likely an internal error, the name
// wouldn't be valid.
LOG(FATAL) << "Unknown device type:"
<< static_cast<std::underlying_type_t<DeviceOrd::Type>>(this->Device().device);
break;
}
return std::invoke_result_t<CPUFn>();
}
template <typename CPUFn, typename CUDAFn>
decltype(auto) DispatchDevice(CPUFn&& cpu_fn, CUDAFn&& cuda_fn) const {
static_assert(std::is_same_v<std::invoke_result_t<CPUFn>, std::invoke_result_t<CUDAFn>>);
Expand All @@ -179,6 +258,12 @@ struct Context : public XGBoostParameter<Context> {
return cpu_fn();
case DeviceOrd::kCUDA:
return cuda_fn();
case DeviceOrd::kSyclDefault:
LOG(FATAL) << "The requested feature is not implemented for sycl yet";
case DeviceOrd::kSyclCPU:
LOG(FATAL) << "The requested feature is not implemented for sycl yet";
case DeviceOrd::kSyclGPU:
LOG(FATAL) << "The requested feature is not implemented for sycl yet";
default:
// Do not use the device name as this is likely an internal error, the name
// wouldn't be valid.
Expand Down Expand Up @@ -213,7 +298,9 @@ struct Context : public XGBoostParameter<Context> {
void SetDeviceOrdinal(Args const& kwargs);
Context& SetDevice(DeviceOrd d) {
this->device_ = d;
this->gpu_id = d.ordinal; // this can be removed once we move away from `gpu_id`.
if (d.IsCUDA()) {
this->gpu_id = d.ordinal; // this can be removed once we move away from `gpu_id`.
}
this->device = d.Name();
return *this;
}
Expand Down
4 changes: 2 additions & 2 deletions include/xgboost/linalg.h
Original file line number Diff line number Diff line change
Expand Up @@ -596,13 +596,13 @@ auto MakeTensorView(Context const *ctx, common::Span<T> data, S &&...shape) {

template <typename T, typename... S>
auto MakeTensorView(Context const *ctx, HostDeviceVector<T> *data, S &&...shape) {
auto span = ctx->IsCPU() ? data->HostSpan() : data->DeviceSpan();
auto span = ctx->IsCUDA() ? data->DeviceSpan() : data->HostSpan();
return MakeTensorView(ctx->gpu_id, span, std::forward<S>(shape)...);
}

template <typename T, typename... S>
auto MakeTensorView(Context const *ctx, HostDeviceVector<T> const *data, S &&...shape) {
auto span = ctx->IsCPU() ? data->ConstHostSpan() : data->ConstDeviceSpan();
auto span = ctx->IsCUDA() ? data->ConstDeviceSpan() : data->ConstHostSpan();
return MakeTensorView(ctx->gpu_id, span, std::forward<S>(shape)...);
}

Expand Down
4 changes: 4 additions & 0 deletions plugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ endif (PLUGIN_DENSE_PARSER)

if (PLUGIN_UPDATER_ONEAPI)
add_library(oneapi_plugin OBJECT
${xgboost_SOURCE_DIR}/plugin/updater_oneapi/hist_util_oneapi.cc
${xgboost_SOURCE_DIR}/plugin/updater_oneapi/regression_obj_oneapi.cc
${xgboost_SOURCE_DIR}/plugin/updater_oneapi/multiclass_obj_oneapi.cc
${xgboost_SOURCE_DIR}/plugin/updater_oneapi/updater_quantile_hist_oneapi.cc
${xgboost_SOURCE_DIR}/plugin/updater_oneapi/device_manager_oneapi.cc
${xgboost_SOURCE_DIR}/plugin/updater_oneapi/predictor_oneapi.cc)
target_include_directories(oneapi_plugin
PRIVATE
Expand Down
26 changes: 8 additions & 18 deletions plugin/updater_oneapi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,20 @@
This plugin adds support of OneAPI programming model for tree construction and prediction algorithms to XGBoost.

## Usage
Specify the 'objective' parameter as one of the following options to offload computation of objective function on OneAPI device.
Specify the 'device' parameter as one of the following options to offload model training and inference on OneAPI device.

### Algorithms
| objective | Description |
| device | Description |
| --- | --- |
reg:squarederror_oneapi | regression with squared loss |
reg:squaredlogerror_oneapi | regression with root mean squared logarithmic loss |
reg:logistic_oneapi | logistic regression for probability regression task |
binary:logistic_oneapi | logistic regression for binary classification task |
binary:logitraw_oneapi | logistic regression for classification, output score before logistic transformation |

Specify the 'predictor' parameter as one of the following options to offload prediction stage on OneAPI device.

### Algorithms
| predictor | Description |
| --- | --- |
predictor_oneapi | prediction using OneAPI device |

Please note that parameter names are not finalized and can be changed during further integration of OneAPI support.
sycl | use default sycl device |
sycl:gpu | use default sycl gpu |
sycl:cpu | use default sycl cpu |
sycl:gpu:N | use sycl gpu number N |
sycl:cpu:N | use sycl cpu number N |

Python example:
```python
param['predictor'] = 'predictor_oneapi'
param['objective'] = 'reg:squarederror_oneapi'
param['device'] = 'sycl:gpu:0'
```

## Dependencies
Expand Down
Loading

0 comments on commit 23b9c61

Please sign in to comment.