Skip to content

Commit

Permalink
Updating Clang Version to 11.0.0 (rapidsai#4029)
Browse files Browse the repository at this point in the history
Follow up PR to: rapidsai/cudf#6695. Performing the same changes but for `rapidsai/cuml`

Depends on: rapidsai/integration#304

Authors:
  - Conor Hoekstra (https://github.com/codereport)

Approvers:
  - William Hicks (https://github.com/wphicks)
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Robert Maynard (https://github.com/robertmaynard)
  - Dante Gama Dessavre (https://github.com/dantegd)

URL: rapidsai#4029
  • Loading branch information
codereport authored Jul 9, 2021
1 parent a52672e commit 1db7e9d
Show file tree
Hide file tree
Showing 482 changed files with 54,289 additions and 48,732 deletions.
1 change: 0 additions & 1 deletion ci/checks/style.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ cd $WORKSPACE
export GIT_DESCRIBE_TAG=`git describe --tags`
export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
conda install "ucx-py=0.21.*" "ucx-proc=*=gpu"
conda install -c conda-forge clang=8.0.1 clang-tools=8.0.1

# Run flake8 and get results/return code
FLAKE=`flake8 --config=python/setup.cfg`
Expand Down
95 changes: 51 additions & 44 deletions cpp/.clang-format
Original file line number Diff line number Diff line change
@@ -1,72 +1,78 @@
---
# Refer to the following link for the explanation of each params:
# http://releases.llvm.org/8.0.0/tools/clang/docs/ClangFormatStyleOptions.html
Language: Cpp
# BasedOnStyle: Google
Language: Cpp
# BasedOnStyle: Google
AccessModifierOffset: -1
AlignAfterOpenBracket: Align
AlignConsecutiveAssignments: false
AlignConsecutiveAssignments: true
AlignConsecutiveBitFields: true
AlignConsecutiveDeclarations: false
AlignConsecutiveMacros: true
AlignEscapedNewlines: Left
AlignOperands: true
AlignOperands: true
AlignTrailingComments: true
AllowAllArgumentsOnNextLine: true
AllowAllConstructorInitializersOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortBlocksOnASingleLine: true
AllowShortCaseLabelsOnASingleLine: true
AllowShortEnumsOnASingleLine: true
AllowShortFunctionsOnASingleLine: All
AllowShortIfStatementsOnASingleLine: true
AllowShortLoopsOnASingleLine: true
AllowShortLambdasOnASingleLine: true
AllowShortLoopsOnASingleLine: false
# This is deprecated
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: Yes
BinPackArguments: true
BinPackParameters: true
BinPackArguments: false
BinPackParameters: false
BraceWrapping:
AfterClass: false
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
# disabling the below splits, else, they'll just add to the vertical length of source files!
SplitEmptyFunction: false
SplitEmptyRecord: false
SplitEmptyNamespace: false
BreakAfterJavaFieldAnnotations: false
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeBraces: WebKit
BreakBeforeInheritanceComma: false
BreakInheritanceList: BeforeColon
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeColon
BreakAfterJavaFieldAnnotations: false
BreakInheritanceList: BeforeColon
BreakStringLiterals: true
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
ColumnLimit: 100
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
# Kept the below 2 to be the same as `IndentWidth` to keep everything uniform
ConstructorInitializerIndentWidth: 2
ContinuationIndentWidth: 2
Cpp11BracedListStyle: true
DerivePointerAlignment: true
DisableFormat: false
DerivePointerAlignment: false
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
ForEachMacros:
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
IncludeBlocks: Preserve
IncludeCategories:
IncludeBlocks: Preserve
IncludeCategories:
- Regex: '^<ext/.*\.h>'
Priority: 2
- Regex: '^<.*\.h>'
Expand Down Expand Up @@ -100,9 +106,9 @@ PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Left
RawStringFormats:
- Language: Cpp
Delimiters:
RawStringFormats:
- Language: Cpp
Delimiters:
- cc
- CC
- cpp
Expand All @@ -111,7 +117,7 @@ RawStringFormats:
- 'c++'
- 'C++'
CanonicalDelimiter: ''
- Language: TextProto
- Language: TextProto
Delimiters:
- pb
- PB
Expand All @@ -126,10 +132,10 @@ RawStringFormats:
- ParseTextOrDie
- ParseTextProtoOrDie
CanonicalDelimiter: ''
BasedOnStyle: google
BasedOnStyle: google
# Enabling comment reflow causes doxygen comments to be messed up in their formats!
ReflowComments: false
SortIncludes: true
ReflowComments: true
SortIncludes: true
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterTemplateKeyword: true
Expand All @@ -139,19 +145,20 @@ SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceBeforeSquareBrackets: false
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: false
SpacesInAngles: false
SpacesInConditionalStatement: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
# We are C++14, but clang-format puts this under `Cpp11` itself
Standard: Cpp11
StatementMacros:
Standard: c++17
StatementMacros:
- Q_UNUSED
- QT_REQUIRE_VERSION
# Be consistent with indent-width, even for people who use tab for indentation!
TabWidth: 2
UseTab: Never
...
TabWidth: 2
UseTab: Never
69 changes: 35 additions & 34 deletions cpp/bench/common/ml_benchmark.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ struct CudaEventTimer {
* the L2 cache flush.
* @param s CUDA stream we are measuring time on.
*/
CudaEventTimer(::benchmark::State& st, char* ptr, int l2CacheSize,
cudaStream_t s)
: state(&st), stream(s) {
CudaEventTimer(::benchmark::State& st, char* ptr, int l2CacheSize, cudaStream_t s)
: state(&st), stream(s)
{
CUDA_CHECK(cudaEventCreate(&start));
CUDA_CHECK(cudaEventCreate(&stop));
// flush L2?
Expand All @@ -67,7 +67,8 @@ struct CudaEventTimer {
* the benchmark::State object provided to the ctor will be set to the
* value given by `cudaEventElapsedTime()`.
*/
~CudaEventTimer() {
~CudaEventTimer()
{
CUDA_CHECK_NO_THROW(cudaEventRecord(stop, stream));
CUDA_CHECK_NO_THROW(cudaEventSynchronize(stop));
float milliseconds = 0.0f;
Expand All @@ -87,21 +88,21 @@ struct CudaEventTimer {
/** Main fixture to be inherited and used by all other c++ benchmarks in cuml */
class Fixture : public ::benchmark::Fixture {
public:
Fixture(const std::string& name,
std::shared_ptr<raft::mr::device::allocator> _alloc)
: ::benchmark::Fixture(), d_alloc(_alloc) {
Fixture(const std::string& name, std::shared_ptr<raft::mr::device::allocator> _alloc)
: ::benchmark::Fixture(), d_alloc(_alloc)
{
SetName(name.c_str());
}
Fixture() = delete;

void SetUp(const ::benchmark::State& state) override {
void SetUp(const ::benchmark::State& state) override
{
CUDA_CHECK(cudaStreamCreate(&stream));
allocateBuffers(state);
int devId = 0;
CUDA_CHECK(cudaGetDevice(&devId));
l2CacheSize = 0;
CUDA_CHECK(
cudaDeviceGetAttribute(&l2CacheSize, cudaDevAttrL2CacheSize, devId));
CUDA_CHECK(cudaDeviceGetAttribute(&l2CacheSize, cudaDevAttrL2CacheSize, devId));
if (l2CacheSize > 0) {
alloc(scratchBuffer, l2CacheSize, false);
} else {
Expand All @@ -110,23 +111,21 @@ class Fixture : public ::benchmark::Fixture {
CUDA_CHECK(cudaStreamSynchronize(stream));
}

void TearDown(const ::benchmark::State& state) override {
void TearDown(const ::benchmark::State& state) override
{
CUDA_CHECK(cudaStreamSynchronize(stream));
if (l2CacheSize > 0) {
dealloc(scratchBuffer, l2CacheSize);
}
if (l2CacheSize > 0) { dealloc(scratchBuffer, l2CacheSize); }
deallocateBuffers(state);
CUDA_CHECK(cudaStreamSynchronize(stream));
CUDA_CHECK(cudaStreamDestroy(stream));
}

// to keep compiler happy
void SetUp(::benchmark::State& st) override {
SetUp(const_cast<const ::benchmark::State&>(st));
}
void SetUp(::benchmark::State& st) override { SetUp(const_cast<const ::benchmark::State&>(st)); }

// to keep compiler happy
void TearDown(::benchmark::State& st) override {
void TearDown(::benchmark::State& st) override
{
TearDown(const_cast<const ::benchmark::State&>(st));
}

Expand All @@ -137,14 +136,15 @@ class Fixture : public ::benchmark::Fixture {
virtual void allocateBuffers(const ::benchmark::State& state) {}
virtual void deallocateBuffers(const ::benchmark::State& state) {}

void BenchmarkCase(::benchmark::State& state) {
void BenchmarkCase(::benchmark::State& state)
{
runBenchmark(state);
generateMetrics(state);
}

template <typename Lambda>
void loopOnState(::benchmark::State& state, Lambda benchmarkFunc,
bool flushL2 = true) {
void loopOnState(::benchmark::State& state, Lambda benchmarkFunc, bool flushL2 = true)
{
char* buff;
int size;
if (flushL2) {
Expand All @@ -161,16 +161,16 @@ class Fixture : public ::benchmark::Fixture {
}

template <typename T>
void alloc(T*& ptr, size_t len, bool init = false) {
void alloc(T*& ptr, size_t len, bool init = false)
{
auto nBytes = len * sizeof(T);
ptr = (T*)d_alloc->allocate(nBytes, stream);
if (init) {
CUDA_CHECK(cudaMemsetAsync(ptr, 0, nBytes, stream));
}
ptr = (T*)d_alloc->allocate(nBytes, stream);
if (init) { CUDA_CHECK(cudaMemsetAsync(ptr, 0, nBytes, stream)); }
}

template <typename T>
void dealloc(T* ptr, size_t len) {
void dealloc(T* ptr, size_t len)
{
d_alloc->deallocate(ptr, len * sizeof(T), stream);
}

Expand All @@ -183,17 +183,18 @@ class Fixture : public ::benchmark::Fixture {
namespace internal {
template <typename Params, typename Class>
struct Registrar {
Registrar(const std::vector<Params>& paramsList, const std::string& testClass,
const std::string& testName) {
Registrar(const std::vector<Params>& paramsList,
const std::string& testClass,
const std::string& testName)
{
int counter = 0;
for (const auto& param : paramsList) {
std::stringstream oss;
oss << testClass;
if (!testName.empty()) oss << "/" << testName;
oss << "/" << counter;
auto testFullName = oss.str();
auto* b = ::benchmark::internal::RegisterBenchmarkInternal(
new Class(testFullName, param));
auto* b = ::benchmark::internal::RegisterBenchmarkInternal(new Class(testFullName, param));
///@todo: expose a currying-like interface to the final macro
b->UseManualTime();
b->Unit(benchmark::kMillisecond);
Expand Down Expand Up @@ -222,9 +223,9 @@ struct Registrar {
* a statically populated vector or from the result of
* calling a function
*/
#define ML_BENCH_REGISTER(ParamsClass, TestClass, TestName, params) \
static MLCommon::Bench::internal::Registrar<ParamsClass, TestClass> \
BENCHMARK_PRIVATE_NAME(registrar)(params, #TestClass, TestName)
#define ML_BENCH_REGISTER(ParamsClass, TestClass, TestName, params) \
static MLCommon::Bench::internal::Registrar<ParamsClass, TestClass> BENCHMARK_PRIVATE_NAME( \
registrar)(params, #TestClass, TestName)

} // end namespace Bench
} // end namespace MLCommon
25 changes: 15 additions & 10 deletions cpp/bench/prims/add.cu
Original file line number Diff line number Diff line change
Expand Up @@ -29,33 +29,38 @@ struct AddParams {
template <typename T>
struct AddBench : public Fixture {
AddBench(const std::string& name, const AddParams& p)
: Fixture(name, std::shared_ptr<raft::mr::device::allocator>(
new raft::mr::device::default_allocator)),
params(p) {}
: Fixture(
name,
std::shared_ptr<raft::mr::device::allocator>(new raft::mr::device::default_allocator)),
params(p)
{
}

protected:
void allocateBuffers(const ::benchmark::State& state) override {
void allocateBuffers(const ::benchmark::State& state) override
{
alloc(ptr0, params.len, true);
alloc(ptr1, params.len, true);
}

void deallocateBuffers(const ::benchmark::State& state) override {
void deallocateBuffers(const ::benchmark::State& state) override
{
dealloc(ptr0, params.len);
dealloc(ptr1, params.len);
}

void runBenchmark(::benchmark::State& state) override {
loopOnState(state, [this]() {
raft::linalg::add(ptr0, ptr0, ptr1, params.len, stream);
});
void runBenchmark(::benchmark::State& state) override
{
loopOnState(state, [this]() { raft::linalg::add(ptr0, ptr0, ptr1, params.len, stream); });
}

private:
AddParams params;
T *ptr0, *ptr1;
}; // struct AddBench

static std::vector<AddParams> getInputs() {
static std::vector<AddParams> getInputs()
{
return {
{256 * 1024 * 1024},
{256 * 1024 * 1024 + 2},
Expand Down
Loading

0 comments on commit 1db7e9d

Please sign in to comment.