Skip to content

Commit

Permalink
Merge pull request #5361 from bdice/clang-format-update
Browse files Browse the repository at this point in the history
Update clang-format to 16.0.1.
  • Loading branch information
jolorunyomi authored Apr 25, 2023
2 parents 452f90f + 3b9520d commit 8a84f3c
Show file tree
Hide file tree
Showing 124 changed files with 3,310 additions and 3,783 deletions.
11 changes: 6 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ repos:
types_or: [python, cython]
exclude: thirdparty
additional_dependencies: [flake8-force]
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v16.0.1
hooks:
- id: clang-format
types_or: [c, c++, cuda]
args: ["-fallback-style=none", "-style=file", "-i"]
- repo: https://github.com/codespell-project/codespell
rev: v2.2.2
hooks:
Expand All @@ -33,11 +39,6 @@ repos:
entry: '(category=|\s)DeprecationWarning[,)]'
language: pygrep
types_or: [python, cython]
- id: clang-format
name: clang-format
entry: python ./cpp/scripts/run-clang-format.py
language: python
additional_dependencies: [clang-format==11.1.0]
- id: copyright-check
name: copyright-check
entry: python ./ci/checks/copyright.py --fix-in-place
Expand Down
2 changes: 1 addition & 1 deletion BUILD.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ To install cuML from source, ensure the following dependencies are met:
5. Cython (>= 0.29)
6. gcc (>= 9.0)
7. BLAS - Any BLAS compatible with cmake's [FindBLAS](https://cmake.org/cmake/help/v3.14/module/FindBLAS.html). Note that the blas has to be installed to the same folder system as cmake, for example if using conda installed cmake, the blas implementation should also be installed in the conda environment.
8. clang-format (= 11.1.0) - enforces uniform C++ coding style; required to build cuML from source. The packages `clang=8` and `clang-tools=8` from the conda-forge channel should be sufficient, if you are on conda. If not using conda, install the right version using your OS package manager.
8. clang-format (= 16.0.1) - enforces uniform C++ coding style; required to build cuML from source. The packages `clang=16` and `clang-tools=16` from the conda-forge channel should be sufficient, if you are on conda. If not using conda, install the right version using your OS package manager.
9. NCCL (>=2.4)
10. UCX [optional] (>= 1.7) - enables point-to-point messaging in the cuML standard communicator. This is necessary for many multi-node multi-GPU cuML algorithms to function.

Expand Down
2 changes: 1 addition & 1 deletion cpp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ The `test` directory has subdirectories that reflect this distinction between th
1. cmake (>= 3.23.1)
2. CUDA (>= 11.0)
3. gcc (>=9.3.0)
4. clang-format (= 11.1.0) - enforces uniform C++ coding style; required to build cuML from source. The packages `clang=11` and `clang-tools=11` from the conda-forge channel should be sufficient, if you are on conda. If not using conda, install the right version using your OS package manager.
4. clang-format (= 16.0.1) - enforces uniform C++ coding style; required to build cuML from source. The packages `clang=16` and `clang-tools=16` from the conda-forge channel should be sufficient, if you are on conda. If not using conda, install the right version using your OS package manager.

### Building cuML:

Expand Down
34 changes: 17 additions & 17 deletions cpp/bench/sg/fil.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -43,16 +43,16 @@ class FIL : public RegressionFixture<float> {

public:
FIL(const std::string& name, const Params& p)
/*
fitting to linear combinations in "y" normally yields trees that check
values of all significant columns, as well as their linear
combinations in "X". During inference, the exact threshold
values do not affect speed. The distribution of column popularity does
not affect speed barring lots of uninformative columns in succession.
Hence, this method represents real datasets well enough for both
classification and regression.
*/
: RegressionFixture<float>(name, p.data, p.blobs), model(p.model), p_rest(p)
/*
fitting to linear combinations in "y" normally yields trees that check
values of all significant columns, as well as their linear
combinations in "X". During inference, the exact threshold
values do not affect speed. The distribution of column popularity does
not affect speed barring lots of uninformative columns in succession.
Hence, this method represents real datasets well enough for both
classification and regression.
*/
: RegressionFixture<float>(name, p.data, p.blobs), model(p.model), p_rest(p)
{
}

Expand Down Expand Up @@ -140,12 +140,12 @@ std::vector<Params> getInputs()
Params p;
p.data.rowMajor = true;
p.blobs = {.n_informative = -1, // Just a placeholder value, anyway changed below
.effective_rank = -1, // Just a placeholder value, anyway changed below
.bias = 0.f,
.tail_strength = 0.1,
.noise = 0.01,
.shuffle = false,
.seed = 12345ULL};
.effective_rank = -1, // Just a placeholder value, anyway changed below
.bias = 0.f,
.tail_strength = 0.1,
.noise = 0.01,
.shuffle = false,
.seed = 12345ULL};

p.rf = set_rf_params(10, /*max_depth */
(1 << 20), /* max_leaves */
Expand Down
170 changes: 70 additions & 100 deletions cpp/bench/sg/filex.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@
* limitations under the License.
*/

#include <cuml/fil/fil.h>
#include <cuml/experimental/fil/infer_kind.hpp>
#include <cuml/experimental/fil/treelite_importer.hpp>
#include <cuml/experimental/fil/detail/raft_proto/device_type.hpp>
#include <cuml/experimental/fil/infer_kind.hpp>
#include <cuml/experimental/fil/tree_layout.hpp>
#include <cuml/experimental/fil/treelite_importer.hpp>
#include <cuml/fil/fil.h>

#include "benchmark.cuh"
#include <chrono>
Expand Down Expand Up @@ -49,7 +49,7 @@ class FILEX : public RegressionFixture<float> {

public:
FILEX(const std::string& name, const Params& p)
: RegressionFixture<float>(name, p.data, p.blobs), model(p.model), p_rest(p)
: RegressionFixture<float>(name, p.data, p.blobs), model(p.model), p_rest(p)
{
}

Expand All @@ -59,7 +59,7 @@ class FILEX : public RegressionFixture<float> {
if (!params.rowMajor) { state.SkipWithError("FIL only supports row-major inputs"); }
// create model
ML::RandomForestRegressorF rf_model;
auto* mPtr = &rf_model;
auto* mPtr = &rf_model;
auto train_nrows = std::min(params.nrows, 1000);
fit(*handle, mPtr, data.X.data(), train_nrows, params.ncols, data.y.data(), p_rest.rf);
handle->sync_stream(stream);
Expand All @@ -73,8 +73,7 @@ class FILEX : public RegressionFixture<float> {
false,
raft_proto::device_type::gpu,
0,
stream
);
stream);

ML::fil::treelite_params_t tl_params = {
.algo = ML::fil::algo_t::NAIVE,
Expand All @@ -86,10 +85,10 @@ class FILEX : public RegressionFixture<float> {
.n_items = 0,
.pforest_shape_str = nullptr};
ML::fil::forest_variant forest_variant;
auto optimal_chunk_size = 1;
auto optimal_storage_type = p_rest.storage;
auto optimal_algo_type = ML::fil::algo_t::NAIVE;
auto optimal_layout = ML::experimental::fil::tree_layout::breadth_first;
auto optimal_chunk_size = 1;
auto optimal_storage_type = p_rest.storage;
auto optimal_algo_type = ML::fil::algo_t::NAIVE;
auto optimal_layout = ML::experimental::fil::tree_layout::breadth_first;
auto allowed_storage_types = std::vector<ML::fil::storage_type_t>{};
if (p_rest.storage == ML::fil::storage_type_t::DENSE) {
allowed_storage_types.push_back(ML::fil::storage_type_t::DENSE);
Expand Down Expand Up @@ -119,14 +118,7 @@ class FILEX : public RegressionFixture<float> {
tl_params.algo = algo_type;
for (auto layout : allowed_layouts) {
filex_model = ML::experimental::fil::import_from_treelite_handle(
model,
layout,
128,
false,
raft_proto::device_type::gpu,
0,
stream
);
model, layout, 128, false, raft_proto::device_type::gpu, 0, stream);
for (auto chunk_size = 1; chunk_size <= 32; chunk_size *= 2) {
if (!p_rest.use_experimental) {
tl_params.threads_per_tree = chunk_size;
Expand All @@ -139,104 +131,83 @@ class FILEX : public RegressionFixture<float> {
for (int i = 0; i < p_rest.predict_repetitions; i++) {
// Create FIL forest
if (p_rest.use_experimental) {
filex_model.predict(
*handle,
data.y.data(),
data.X.data(),
params.nrows,
raft_proto::device_type::gpu,
raft_proto::device_type::gpu,
ML::experimental::fil::infer_kind::default_kind,
chunk_size
);
filex_model.predict(*handle,
data.y.data(),
data.X.data(),
params.nrows,
raft_proto::device_type::gpu,
raft_proto::device_type::gpu,
ML::experimental::fil::infer_kind::default_kind,
chunk_size);
} else {
ML::fil::predict(*handle,
forest,
data.y.data(),
data.X.data(),
params.nrows,
false);
ML::fil::predict(
*handle, forest, data.y.data(), data.X.data(), params.nrows, false);
}
}
handle->sync_stream();
handle->sync_stream_pool();
auto end = std::chrono::high_resolution_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(
end - start
).count();
auto elapsed =
std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
if (elapsed < min_time) {
min_time = elapsed;
optimal_chunk_size = chunk_size;
min_time = elapsed;
optimal_chunk_size = chunk_size;
optimal_storage_type = storage_type;
optimal_algo_type = algo_type;
optimal_layout = layout;
optimal_algo_type = algo_type;
optimal_layout = layout;
}

// Clean up from FIL
if (!p_rest.use_experimental) {
ML::fil::free(*handle, forest);
}
}
if (!p_rest.use_experimental) {
break;
if (!p_rest.use_experimental) { ML::fil::free(*handle, forest); }
}
if (!p_rest.use_experimental) { break; }
}
if (p_rest.use_experimental) {
break;
}
}
if (p_rest.use_experimental) {
break;
if (p_rest.use_experimental) { break; }
}
if (p_rest.use_experimental) { break; }
}

// Build optimal FIL tree
tl_params.storage_type = optimal_storage_type;
tl_params.algo = optimal_algo_type;
tl_params.storage_type = optimal_storage_type;
tl_params.algo = optimal_algo_type;
tl_params.threads_per_tree = optimal_chunk_size;
ML::fil::from_treelite(*handle, &forest_variant, model, &tl_params);
forest = std::get<ML::fil::forest_t<float>>(forest_variant);
forest = std::get<ML::fil::forest_t<float>>(forest_variant);
filex_model = ML::experimental::fil::import_from_treelite_handle(
model,
optimal_layout,
128,
false,
raft_proto::device_type::gpu,
0,
stream
);
model, optimal_layout, 128, false, raft_proto::device_type::gpu, 0, stream);

handle->sync_stream();
handle->sync_stream_pool();

// only time prediction
this->loopOnState(state, [this, &filex_model, optimal_chunk_size]() {
for (int i = 0; i < p_rest.predict_repetitions; i++) {
if (p_rest.use_experimental) {
filex_model.predict(
*handle,
this->data.y.data(),
this->data.X.data(),
this->params.nrows,
raft_proto::device_type::gpu,
raft_proto::device_type::gpu,
ML::experimental::fil::infer_kind::default_kind,
optimal_chunk_size
);
handle->sync_stream();
handle->sync_stream_pool();
} else {
ML::fil::predict(*this->handle,
this->forest,
this->data.y.data(),
this->data.X.data(),
this->params.nrows,
false);
handle->sync_stream();
handle->sync_stream_pool();
this->loopOnState(
state,
[this, &filex_model, optimal_chunk_size]() {
for (int i = 0; i < p_rest.predict_repetitions; i++) {
if (p_rest.use_experimental) {
filex_model.predict(*handle,
this->data.y.data(),
this->data.X.data(),
this->params.nrows,
raft_proto::device_type::gpu,
raft_proto::device_type::gpu,
ML::experimental::fil::infer_kind::default_kind,
optimal_chunk_size);
handle->sync_stream();
handle->sync_stream_pool();
} else {
ML::fil::predict(*this->handle,
this->forest,
this->data.y.data(),
this->data.X.data(),
this->params.nrows,
false);
handle->sync_stream();
handle->sync_stream_pool();
}
}
}
}, true);
},
true);
}

void allocateBuffers(const ::benchmark::State& state) override { Base::allocateBuffers(state); }
Expand Down Expand Up @@ -269,12 +240,12 @@ std::vector<Params> getInputs()
Params p;
p.data.rowMajor = true;
p.blobs = {.n_informative = -1, // Just a placeholder value, anyway changed below
.effective_rank = -1, // Just a placeholder value, anyway changed below
.bias = 0.f,
.tail_strength = 0.1,
.noise = 0.01,
.shuffle = false,
.seed = 12345ULL};
.effective_rank = -1, // Just a placeholder value, anyway changed below
.bias = 0.f,
.tail_strength = 0.1,
.noise = 0.01,
.shuffle = false,
.seed = 12345ULL};

p.rf = set_rf_params(10, /*max_depth */
(1 << 20), /* max_leaves */
Expand Down Expand Up @@ -306,8 +277,7 @@ std::vector<Params> getInputs()
{(int)1e6, 20, 1, 10, 10000, storage_type_t::DENSE, false},
{(int)1e6, 20, 1, 10, 10000, storage_type_t::DENSE, true},
{(int)1e6, 200, 1, 10, 1000, storage_type_t::DENSE, false},
{(int)1e6, 200, 1, 10, 1000, storage_type_t::DENSE, true}
};
{(int)1e6, 200, 1, 10, 1000, storage_type_t::DENSE, true}};
for (auto& i : var_params) {
p.data.nrows = i.nrows;
p.data.ncols = i.ncols;
Expand All @@ -326,6 +296,6 @@ std::vector<Params> getInputs()

ML_BENCH_REGISTER(Params, FILEX, "", getInputs());

} // end namespace fil
} // namespace filex
} // end namespace Bench
} // end namespace ML
15 changes: 7 additions & 8 deletions cpp/bench/sg/rf_classifier.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -30,8 +30,7 @@ struct Params {
};

template <typename D>
struct RFClassifierModel {
};
struct RFClassifierModel {};

template <>
struct RFClassifierModel<float> {
Expand Down Expand Up @@ -86,11 +85,11 @@ std::vector<Params> getInputs()
std::vector<Params> out;
Params p;
p.data.rowMajor = false;
p.blobs = {10.0, // cluster_std
false, // shuffle
-10.0, // center_box_min
10.0, // center_box_max
2152953ULL}; // seed
p.blobs = {10.0, // cluster_std
false, // shuffle
-10.0, // center_box_min
10.0, // center_box_max
2152953ULL}; // seed

p.rf = set_rf_params(10, /*max_depth */
(1 << 20), /* max_leaves */
Expand Down
Loading

0 comments on commit 8a84f3c

Please sign in to comment.