Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updating Clang Version to 11.0.0 #4029

Merged
merged 14 commits into from
Jul 9, 2021
  •  
  •  
  •  
1 change: 0 additions & 1 deletion ci/checks/style.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ cd $WORKSPACE
export GIT_DESCRIBE_TAG=`git describe --tags`
export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
conda install "ucx-py=0.21.*" "ucx-proc=*=gpu"
conda install -c conda-forge clang=8.0.1 clang-tools=8.0.1

# Run flake8 and get results/return code
FLAKE=`flake8 --config=python/setup.cfg`
Expand Down
95 changes: 51 additions & 44 deletions cpp/.clang-format
Original file line number Diff line number Diff line change
@@ -1,72 +1,78 @@
---
# Refer to the following link for the explanation of each params:
# http://releases.llvm.org/8.0.0/tools/clang/docs/ClangFormatStyleOptions.html
Language: Cpp
# BasedOnStyle: Google
Language: Cpp
# BasedOnStyle: Google
AccessModifierOffset: -1
AlignAfterOpenBracket: Align
AlignConsecutiveAssignments: false
AlignConsecutiveAssignments: true
AlignConsecutiveBitFields: true
AlignConsecutiveDeclarations: false
AlignConsecutiveMacros: true
AlignEscapedNewlines: Left
AlignOperands: true
AlignOperands: true
AlignTrailingComments: true
AllowAllArgumentsOnNextLine: true
AllowAllConstructorInitializersOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortBlocksOnASingleLine: true
AllowShortCaseLabelsOnASingleLine: true
AllowShortEnumsOnASingleLine: true
AllowShortFunctionsOnASingleLine: All
AllowShortIfStatementsOnASingleLine: true
AllowShortLoopsOnASingleLine: true
AllowShortLambdasOnASingleLine: true
AllowShortLoopsOnASingleLine: false
# This is deprecated
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: Yes
BinPackArguments: true
BinPackParameters: true
BinPackArguments: false
BinPackParameters: false
BraceWrapping:
AfterClass: false
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
# disabling the below splits, else, they'll just add to the vertical length of source files!
SplitEmptyFunction: false
SplitEmptyRecord: false
SplitEmptyNamespace: false
BreakAfterJavaFieldAnnotations: false
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeBraces: WebKit
BreakBeforeInheritanceComma: false
BreakInheritanceList: BeforeColon
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeColon
BreakAfterJavaFieldAnnotations: false
BreakInheritanceList: BeforeColon
BreakStringLiterals: true
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
ColumnLimit: 100
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
# Kept the below 2 to be the same as `IndentWidth` to keep everything uniform
ConstructorInitializerIndentWidth: 2
ContinuationIndentWidth: 2
Cpp11BracedListStyle: true
DerivePointerAlignment: true
DisableFormat: false
DerivePointerAlignment: false
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
ForEachMacros:
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
IncludeBlocks: Preserve
IncludeCategories:
IncludeBlocks: Preserve
IncludeCategories:
- Regex: '^<ext/.*\.h>'
Priority: 2
- Regex: '^<.*\.h>'
Expand Down Expand Up @@ -100,9 +106,9 @@ PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Left
RawStringFormats:
- Language: Cpp
Delimiters:
RawStringFormats:
- Language: Cpp
Delimiters:
- cc
- CC
- cpp
Expand All @@ -111,7 +117,7 @@ RawStringFormats:
- 'c++'
- 'C++'
CanonicalDelimiter: ''
- Language: TextProto
- Language: TextProto
Delimiters:
- pb
- PB
Expand All @@ -126,10 +132,10 @@ RawStringFormats:
- ParseTextOrDie
- ParseTextProtoOrDie
CanonicalDelimiter: ''
BasedOnStyle: google
BasedOnStyle: google
# Enabling comment reflow causes doxygen comments to be messed up in their formats!
ReflowComments: false
SortIncludes: true
ReflowComments: true
SortIncludes: true
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterTemplateKeyword: true
Expand All @@ -139,19 +145,20 @@ SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceBeforeSquareBrackets: false
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: false
SpacesInAngles: false
SpacesInConditionalStatement: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
# We are C++14, but clang-format puts this under `Cpp11` itself
Standard: Cpp11
StatementMacros:
Standard: c++17
StatementMacros:
- Q_UNUSED
- QT_REQUIRE_VERSION
# Be consistent with indent-width, even for people who use tab for indentation!
TabWidth: 2
UseTab: Never
...
TabWidth: 2
UseTab: Never
69 changes: 35 additions & 34 deletions cpp/bench/common/ml_benchmark.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ struct CudaEventTimer {
* the L2 cache flush.
* @param s CUDA stream we are measuring time on.
*/
CudaEventTimer(::benchmark::State& st, char* ptr, int l2CacheSize,
cudaStream_t s)
: state(&st), stream(s) {
CudaEventTimer(::benchmark::State& st, char* ptr, int l2CacheSize, cudaStream_t s)
: state(&st), stream(s)
{
CUDA_CHECK(cudaEventCreate(&start));
CUDA_CHECK(cudaEventCreate(&stop));
// flush L2?
Expand All @@ -67,7 +67,8 @@ struct CudaEventTimer {
* the benchmark::State object provided to the ctor will be set to the
* value given by `cudaEventElapsedTime()`.
*/
~CudaEventTimer() {
~CudaEventTimer()
{
CUDA_CHECK_NO_THROW(cudaEventRecord(stop, stream));
CUDA_CHECK_NO_THROW(cudaEventSynchronize(stop));
float milliseconds = 0.0f;
Expand All @@ -87,21 +88,21 @@ struct CudaEventTimer {
/** Main fixture to be inherited and used by all other c++ benchmarks in cuml */
class Fixture : public ::benchmark::Fixture {
public:
Fixture(const std::string& name,
std::shared_ptr<raft::mr::device::allocator> _alloc)
: ::benchmark::Fixture(), d_alloc(_alloc) {
Fixture(const std::string& name, std::shared_ptr<raft::mr::device::allocator> _alloc)
: ::benchmark::Fixture(), d_alloc(_alloc)
{
SetName(name.c_str());
}
Fixture() = delete;

void SetUp(const ::benchmark::State& state) override {
void SetUp(const ::benchmark::State& state) override
{
CUDA_CHECK(cudaStreamCreate(&stream));
allocateBuffers(state);
int devId = 0;
CUDA_CHECK(cudaGetDevice(&devId));
l2CacheSize = 0;
CUDA_CHECK(
cudaDeviceGetAttribute(&l2CacheSize, cudaDevAttrL2CacheSize, devId));
CUDA_CHECK(cudaDeviceGetAttribute(&l2CacheSize, cudaDevAttrL2CacheSize, devId));
if (l2CacheSize > 0) {
alloc(scratchBuffer, l2CacheSize, false);
} else {
Expand All @@ -110,23 +111,21 @@ class Fixture : public ::benchmark::Fixture {
CUDA_CHECK(cudaStreamSynchronize(stream));
}

void TearDown(const ::benchmark::State& state) override {
void TearDown(const ::benchmark::State& state) override
{
CUDA_CHECK(cudaStreamSynchronize(stream));
if (l2CacheSize > 0) {
dealloc(scratchBuffer, l2CacheSize);
}
if (l2CacheSize > 0) { dealloc(scratchBuffer, l2CacheSize); }
deallocateBuffers(state);
CUDA_CHECK(cudaStreamSynchronize(stream));
CUDA_CHECK(cudaStreamDestroy(stream));
}

// to keep compiler happy
void SetUp(::benchmark::State& st) override {
SetUp(const_cast<const ::benchmark::State&>(st));
}
void SetUp(::benchmark::State& st) override { SetUp(const_cast<const ::benchmark::State&>(st)); }

// to keep compiler happy
void TearDown(::benchmark::State& st) override {
void TearDown(::benchmark::State& st) override
{
TearDown(const_cast<const ::benchmark::State&>(st));
}

Expand All @@ -137,14 +136,15 @@ class Fixture : public ::benchmark::Fixture {
virtual void allocateBuffers(const ::benchmark::State& state) {}
virtual void deallocateBuffers(const ::benchmark::State& state) {}

void BenchmarkCase(::benchmark::State& state) {
void BenchmarkCase(::benchmark::State& state)
{
runBenchmark(state);
generateMetrics(state);
}

template <typename Lambda>
void loopOnState(::benchmark::State& state, Lambda benchmarkFunc,
bool flushL2 = true) {
void loopOnState(::benchmark::State& state, Lambda benchmarkFunc, bool flushL2 = true)
{
char* buff;
int size;
if (flushL2) {
Expand All @@ -161,16 +161,16 @@ class Fixture : public ::benchmark::Fixture {
}

template <typename T>
void alloc(T*& ptr, size_t len, bool init = false) {
void alloc(T*& ptr, size_t len, bool init = false)
{
auto nBytes = len * sizeof(T);
ptr = (T*)d_alloc->allocate(nBytes, stream);
if (init) {
CUDA_CHECK(cudaMemsetAsync(ptr, 0, nBytes, stream));
}
ptr = (T*)d_alloc->allocate(nBytes, stream);
if (init) { CUDA_CHECK(cudaMemsetAsync(ptr, 0, nBytes, stream)); }
}

template <typename T>
void dealloc(T* ptr, size_t len) {
void dealloc(T* ptr, size_t len)
{
d_alloc->deallocate(ptr, len * sizeof(T), stream);
}

Expand All @@ -183,17 +183,18 @@ class Fixture : public ::benchmark::Fixture {
namespace internal {
template <typename Params, typename Class>
struct Registrar {
Registrar(const std::vector<Params>& paramsList, const std::string& testClass,
const std::string& testName) {
Registrar(const std::vector<Params>& paramsList,
const std::string& testClass,
const std::string& testName)
{
int counter = 0;
for (const auto& param : paramsList) {
std::stringstream oss;
oss << testClass;
if (!testName.empty()) oss << "/" << testName;
oss << "/" << counter;
auto testFullName = oss.str();
auto* b = ::benchmark::internal::RegisterBenchmarkInternal(
new Class(testFullName, param));
auto* b = ::benchmark::internal::RegisterBenchmarkInternal(new Class(testFullName, param));
///@todo: expose a currying-like interface to the final macro
b->UseManualTime();
b->Unit(benchmark::kMillisecond);
Expand Down Expand Up @@ -222,9 +223,9 @@ struct Registrar {
* a statically populated vector or from the result of
* calling a function
*/
#define ML_BENCH_REGISTER(ParamsClass, TestClass, TestName, params) \
static MLCommon::Bench::internal::Registrar<ParamsClass, TestClass> \
BENCHMARK_PRIVATE_NAME(registrar)(params, #TestClass, TestName)
#define ML_BENCH_REGISTER(ParamsClass, TestClass, TestName, params) \
static MLCommon::Bench::internal::Registrar<ParamsClass, TestClass> BENCHMARK_PRIVATE_NAME( \
registrar)(params, #TestClass, TestName)

} // end namespace Bench
} // end namespace MLCommon
25 changes: 15 additions & 10 deletions cpp/bench/prims/add.cu
Original file line number Diff line number Diff line change
Expand Up @@ -29,33 +29,38 @@ struct AddParams {
template <typename T>
struct AddBench : public Fixture {
AddBench(const std::string& name, const AddParams& p)
: Fixture(name, std::shared_ptr<raft::mr::device::allocator>(
new raft::mr::device::default_allocator)),
params(p) {}
: Fixture(
name,
std::shared_ptr<raft::mr::device::allocator>(new raft::mr::device::default_allocator)),
params(p)
{
}

protected:
void allocateBuffers(const ::benchmark::State& state) override {
void allocateBuffers(const ::benchmark::State& state) override
{
alloc(ptr0, params.len, true);
alloc(ptr1, params.len, true);
}

void deallocateBuffers(const ::benchmark::State& state) override {
void deallocateBuffers(const ::benchmark::State& state) override
{
dealloc(ptr0, params.len);
dealloc(ptr1, params.len);
}

void runBenchmark(::benchmark::State& state) override {
loopOnState(state, [this]() {
raft::linalg::add(ptr0, ptr0, ptr1, params.len, stream);
});
void runBenchmark(::benchmark::State& state) override
{
loopOnState(state, [this]() { raft::linalg::add(ptr0, ptr0, ptr1, params.len, stream); });
}

private:
AddParams params;
T *ptr0, *ptr1;
}; // struct AddBench

static std::vector<AddParams> getInputs() {
static std::vector<AddParams> getInputs()
{
return {
{256 * 1024 * 1024},
{256 * 1024 * 1024 + 2},
Expand Down
Loading