Updating Clang Version to 11.0.0 (rapidsai#4029)

Follow up PR to: rapidsai/cudf#6695. Performing the same changes but for `rapidsai/cuml` Depends on: rapidsai/integration#304 Authors: - Conor Hoekstra (https://github.com/codereport) Approvers: - William Hicks (https://github.com/wphicks) - AJ Schmidt (https://github.com/ajschmidt8) - Robert Maynard (https://github.com/robertmaynard) - Dante Gama Dessavre (https://github.com/dantegd) URL: rapidsai#4029
vimarsh6739 · Jul 9, 2021 · 1db7e9d · 1db7e9d
1 parent a52672e
commit 1db7e9d
Show file tree

Hide file tree

Showing 482 changed files with 54,289 additions and 48,732 deletions.
diff --git a/ci/checks/style.sh b/ci/checks/style.sh
@@ -14,7 +14,6 @@ cd $WORKSPACE
 export GIT_DESCRIBE_TAG=`git describe --tags`
 export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
 conda install "ucx-py=0.21.*" "ucx-proc=*=gpu"
-conda install -c conda-forge clang=8.0.1 clang-tools=8.0.1
 
 # Run flake8 and get results/return code
 FLAKE=`flake8 --config=python/setup.cfg`

diff --git a/cpp/.clang-format b/cpp/.clang-format
@@ -1,72 +1,78 @@
 ---
 # Refer to the following link for the explanation of each params:
 #   http://releases.llvm.org/8.0.0/tools/clang/docs/ClangFormatStyleOptions.html
-Language:        Cpp
-# BasedOnStyle:  Google
+Language: Cpp
+# BasedOnStyle: Google
 AccessModifierOffset: -1
 AlignAfterOpenBracket: Align
-AlignConsecutiveAssignments: false
+AlignConsecutiveAssignments: true
+AlignConsecutiveBitFields: true
 AlignConsecutiveDeclarations: false
+AlignConsecutiveMacros: true
 AlignEscapedNewlines: Left
-AlignOperands:   true
+AlignOperands: true
 AlignTrailingComments: true
+AllowAllArgumentsOnNextLine: true
+AllowAllConstructorInitializersOnNextLine: true
 AllowAllParametersOfDeclarationOnNextLine: true
-AllowShortBlocksOnASingleLine: false
-AllowShortCaseLabelsOnASingleLine: false
+AllowShortBlocksOnASingleLine: true 
+AllowShortCaseLabelsOnASingleLine: true
+AllowShortEnumsOnASingleLine: true
 AllowShortFunctionsOnASingleLine: All
 AllowShortIfStatementsOnASingleLine: true
-AllowShortLoopsOnASingleLine: true
+AllowShortLambdasOnASingleLine: true
+AllowShortLoopsOnASingleLine: false
 # This is deprecated
 AlwaysBreakAfterDefinitionReturnType: None
 AlwaysBreakAfterReturnType: None
 AlwaysBreakBeforeMultilineStrings: true
 AlwaysBreakTemplateDeclarations: Yes
-BinPackArguments: true
-BinPackParameters: true
+BinPackArguments:  false       
+BinPackParameters: false
 BraceWrapping:
-  AfterClass:      false
+  AfterClass:            false
   AfterControlStatement: false
-  AfterEnum:       false
-  AfterFunction:   false
-  AfterNamespace:  false
-  AfterObjCDeclaration: false
-  AfterStruct:     false
-  AfterUnion:      false
-  AfterExternBlock: false
-  BeforeCatch:     false
-  BeforeElse:      false
-  IndentBraces:    false
+  AfterEnum:             false
+  AfterFunction:         false
+  AfterNamespace:        false
+  AfterObjCDeclaration:  false
+  AfterStruct:           false
+  AfterUnion:            false
+  AfterExternBlock:      false
+  BeforeCatch:           false
+  BeforeElse:            false
+  IndentBraces:          false
   # disabling the below splits, else, they'll just add to the vertical length of source files!
   SplitEmptyFunction: false
   SplitEmptyRecord: false
   SplitEmptyNamespace: false
+BreakAfterJavaFieldAnnotations: false
 BreakBeforeBinaryOperators: None
-BreakBeforeBraces: Attach
+BreakBeforeBraces: WebKit
 BreakBeforeInheritanceComma: false
-BreakInheritanceList: BeforeColon
 BreakBeforeTernaryOperators: true
 BreakConstructorInitializersBeforeComma: false
 BreakConstructorInitializers: BeforeColon
-BreakAfterJavaFieldAnnotations: false
+BreakInheritanceList: BeforeColon
 BreakStringLiterals: true
-ColumnLimit:     80
-CommentPragmas:  '^ IWYU pragma:'
+ColumnLimit: 100
+CommentPragmas: '^ IWYU pragma:'
 CompactNamespaces: false
 ConstructorInitializerAllOnOneLineOrOnePerLine: true
 # Kept the below 2 to be the same as `IndentWidth` to keep everything uniform
 ConstructorInitializerIndentWidth: 2
 ContinuationIndentWidth: 2
 Cpp11BracedListStyle: true
-DerivePointerAlignment: true
-DisableFormat:   false
+DerivePointerAlignment: false
+DisableFormat: false
 ExperimentalAutoDetectBinPacking: false
 FixNamespaceComments: true
-ForEachMacros:   
+ForEachMacros:
   - foreach
   - Q_FOREACH
   - BOOST_FOREACH
-IncludeBlocks:   Preserve
-IncludeCategories: 
+IncludeBlocks: Preserve
+IncludeCategories:
   - Regex:           '^<ext/.*\.h>'
     Priority:        2
   - Regex:           '^<.*\.h>'
@@ -100,9 +106,9 @@ PenaltyBreakTemplateDeclaration: 10
 PenaltyExcessCharacter: 1000000
 PenaltyReturnTypeOnItsOwnLine: 200
 PointerAlignment: Left
-RawStringFormats: 
-  - Language:        Cpp
-    Delimiters:      
+RawStringFormats:
+  - Language: Cpp
+    Delimiters:
       - cc
       - CC
       - cpp
@@ -111,7 +117,7 @@ RawStringFormats:
       - 'c++'
       - 'C++'
     CanonicalDelimiter: ''
-  - Language:        TextProto
+  - Language: TextProto
     Delimiters:
       - pb
       - PB
@@ -126,10 +132,10 @@ RawStringFormats:
       - ParseTextOrDie
       - ParseTextProtoOrDie
     CanonicalDelimiter: ''
-    BasedOnStyle:    google
+    BasedOnStyle: google
 # Enabling comment reflow causes doxygen comments to be messed up in their formats!
-ReflowComments:  false
-SortIncludes:    true
+ReflowComments: true
+SortIncludes: true
 SortUsingDeclarations: true
 SpaceAfterCStyleCast: false
 SpaceAfterTemplateKeyword: true
@@ -139,19 +145,20 @@ SpaceBeforeCtorInitializerColon: true
 SpaceBeforeInheritanceColon: true
 SpaceBeforeParens: ControlStatements
 SpaceBeforeRangeBasedForLoopColon: true
+SpaceBeforeSquareBrackets: false
+SpaceInEmptyBlock: false
 SpaceInEmptyParentheses: false
 SpacesBeforeTrailingComments: 2
-SpacesInAngles:  false
+SpacesInAngles: false
+SpacesInConditionalStatement: false
 SpacesInContainerLiterals: true
 SpacesInCStyleCastParentheses: false
 SpacesInParentheses: false
 SpacesInSquareBrackets: false
-# We are C++14, but clang-format puts this under `Cpp11` itself
-Standard:        Cpp11
-StatementMacros: 
+Standard: c++17
+StatementMacros:
   - Q_UNUSED
   - QT_REQUIRE_VERSION
 # Be consistent with indent-width, even for people who use tab for indentation!
-TabWidth:        2
-UseTab:          Never
-...
+TabWidth: 2
+UseTab: Never
diff --git a/cpp/bench/common/ml_benchmark.hpp b/cpp/bench/common/ml_benchmark.hpp
@@ -48,9 +48,9 @@ struct CudaEventTimer {
    *                    the L2 cache flush.
    * @param s           CUDA stream we are measuring time on.
    */
-  CudaEventTimer(::benchmark::State& st, char* ptr, int l2CacheSize,
-                 cudaStream_t s)
-    : state(&st), stream(s) {
+  CudaEventTimer(::benchmark::State& st, char* ptr, int l2CacheSize, cudaStream_t s)
+    : state(&st), stream(s)
+  {
     CUDA_CHECK(cudaEventCreate(&start));
     CUDA_CHECK(cudaEventCreate(&stop));
     // flush L2?
@@ -67,7 +67,8 @@ struct CudaEventTimer {
    *       the benchmark::State object provided to the ctor will be set to the
    *       value given by `cudaEventElapsedTime()`.
    */
-  ~CudaEventTimer() {
+  ~CudaEventTimer()
+  {
     CUDA_CHECK_NO_THROW(cudaEventRecord(stop, stream));
     CUDA_CHECK_NO_THROW(cudaEventSynchronize(stop));
     float milliseconds = 0.0f;
@@ -87,21 +88,21 @@ struct CudaEventTimer {
 /** Main fixture to be inherited and used by all other c++ benchmarks in cuml */
 class Fixture : public ::benchmark::Fixture {
  public:
-  Fixture(const std::string& name,
-          std::shared_ptr<raft::mr::device::allocator> _alloc)
-    : ::benchmark::Fixture(), d_alloc(_alloc) {
+  Fixture(const std::string& name, std::shared_ptr<raft::mr::device::allocator> _alloc)
+    : ::benchmark::Fixture(), d_alloc(_alloc)
+  {
     SetName(name.c_str());
   }
   Fixture() = delete;
 
-  void SetUp(const ::benchmark::State& state) override {
+  void SetUp(const ::benchmark::State& state) override
+  {
     CUDA_CHECK(cudaStreamCreate(&stream));
     allocateBuffers(state);
     int devId = 0;
     CUDA_CHECK(cudaGetDevice(&devId));
     l2CacheSize = 0;
-    CUDA_CHECK(
-      cudaDeviceGetAttribute(&l2CacheSize, cudaDevAttrL2CacheSize, devId));
+    CUDA_CHECK(cudaDeviceGetAttribute(&l2CacheSize, cudaDevAttrL2CacheSize, devId));
     if (l2CacheSize > 0) {
       alloc(scratchBuffer, l2CacheSize, false);
     } else {
@@ -110,23 +111,21 @@ class Fixture : public ::benchmark::Fixture {
     CUDA_CHECK(cudaStreamSynchronize(stream));
   }
 
-  void TearDown(const ::benchmark::State& state) override {
+  void TearDown(const ::benchmark::State& state) override
+  {
     CUDA_CHECK(cudaStreamSynchronize(stream));
-    if (l2CacheSize > 0) {
-      dealloc(scratchBuffer, l2CacheSize);
-    }
+    if (l2CacheSize > 0) { dealloc(scratchBuffer, l2CacheSize); }
     deallocateBuffers(state);
     CUDA_CHECK(cudaStreamSynchronize(stream));
     CUDA_CHECK(cudaStreamDestroy(stream));
   }
 
   // to keep compiler happy
-  void SetUp(::benchmark::State& st) override {
-    SetUp(const_cast<const ::benchmark::State&>(st));
-  }
+  void SetUp(::benchmark::State& st) override { SetUp(const_cast<const ::benchmark::State&>(st)); }
 
   // to keep compiler happy
-  void TearDown(::benchmark::State& st) override {
+  void TearDown(::benchmark::State& st) override
+  {
     TearDown(const_cast<const ::benchmark::State&>(st));
   }
 
@@ -137,14 +136,15 @@ class Fixture : public ::benchmark::Fixture {
   virtual void allocateBuffers(const ::benchmark::State& state) {}
   virtual void deallocateBuffers(const ::benchmark::State& state) {}
 
-  void BenchmarkCase(::benchmark::State& state) {
+  void BenchmarkCase(::benchmark::State& state)
+  {
     runBenchmark(state);
     generateMetrics(state);
   }
 
   template <typename Lambda>
-  void loopOnState(::benchmark::State& state, Lambda benchmarkFunc,
-                   bool flushL2 = true) {
+  void loopOnState(::benchmark::State& state, Lambda benchmarkFunc, bool flushL2 = true)
+  {
     char* buff;
     int size;
     if (flushL2) {
@@ -161,16 +161,16 @@ class Fixture : public ::benchmark::Fixture {
   }
 
   template <typename T>
-  void alloc(T*& ptr, size_t len, bool init = false) {
+  void alloc(T*& ptr, size_t len, bool init = false)
+  {
     auto nBytes = len * sizeof(T);
-    ptr = (T*)d_alloc->allocate(nBytes, stream);
-    if (init) {
-      CUDA_CHECK(cudaMemsetAsync(ptr, 0, nBytes, stream));
-    }
+    ptr         = (T*)d_alloc->allocate(nBytes, stream);
+    if (init) { CUDA_CHECK(cudaMemsetAsync(ptr, 0, nBytes, stream)); }
   }
 
   template <typename T>
-  void dealloc(T* ptr, size_t len) {
+  void dealloc(T* ptr, size_t len)
+  {
     d_alloc->deallocate(ptr, len * sizeof(T), stream);
   }
 
@@ -183,17 +183,18 @@ class Fixture : public ::benchmark::Fixture {
 namespace internal {
 template <typename Params, typename Class>
 struct Registrar {
-  Registrar(const std::vector<Params>& paramsList, const std::string& testClass,
-            const std::string& testName) {
+  Registrar(const std::vector<Params>& paramsList,
+            const std::string& testClass,
+            const std::string& testName)
+  {
     int counter = 0;
     for (const auto& param : paramsList) {
       std::stringstream oss;
       oss << testClass;
       if (!testName.empty()) oss << "/" << testName;
       oss << "/" << counter;
       auto testFullName = oss.str();
-      auto* b = ::benchmark::internal::RegisterBenchmarkInternal(
-        new Class(testFullName, param));
+      auto* b = ::benchmark::internal::RegisterBenchmarkInternal(new Class(testFullName, param));
       ///@todo: expose a currying-like interface to the final macro
       b->UseManualTime();
       b->Unit(benchmark::kMillisecond);
@@ -222,9 +223,9 @@ struct Registrar {
  *                    a statically populated vector or from the result of
  *                    calling a function
  */
-#define ML_BENCH_REGISTER(ParamsClass, TestClass, TestName, params)   \
-  static MLCommon::Bench::internal::Registrar<ParamsClass, TestClass> \
-    BENCHMARK_PRIVATE_NAME(registrar)(params, #TestClass, TestName)
+#define ML_BENCH_REGISTER(ParamsClass, TestClass, TestName, params)                           \
+  static MLCommon::Bench::internal::Registrar<ParamsClass, TestClass> BENCHMARK_PRIVATE_NAME( \
+    registrar)(params, #TestClass, TestName)
 
 }  // end namespace Bench
 }  // end namespace MLCommon
diff --git a/cpp/bench/prims/add.cu b/cpp/bench/prims/add.cu
@@ -29,33 +29,38 @@ struct AddParams {
 template <typename T>
 struct AddBench : public Fixture {
   AddBench(const std::string& name, const AddParams& p)
-    : Fixture(name, std::shared_ptr<raft::mr::device::allocator>(
-                      new raft::mr::device::default_allocator)),
-      params(p) {}
+    : Fixture(
+        name,
+        std::shared_ptr<raft::mr::device::allocator>(new raft::mr::device::default_allocator)),
+      params(p)
+  {
+  }
 
  protected:
-  void allocateBuffers(const ::benchmark::State& state) override {
+  void allocateBuffers(const ::benchmark::State& state) override
+  {
     alloc(ptr0, params.len, true);
     alloc(ptr1, params.len, true);
   }
 
-  void deallocateBuffers(const ::benchmark::State& state) override {
+  void deallocateBuffers(const ::benchmark::State& state) override
+  {
     dealloc(ptr0, params.len);
     dealloc(ptr1, params.len);
   }
 
-  void runBenchmark(::benchmark::State& state) override {
-    loopOnState(state, [this]() {
-      raft::linalg::add(ptr0, ptr0, ptr1, params.len, stream);
-    });
+  void runBenchmark(::benchmark::State& state) override
+  {
+    loopOnState(state, [this]() { raft::linalg::add(ptr0, ptr0, ptr1, params.len, stream); });
   }
 
  private:
   AddParams params;
   T *ptr0, *ptr1;
 };  // struct AddBench
 
-static std::vector<AddParams> getInputs() {
+static std::vector<AddParams> getInputs()
+{
   return {
     {256 * 1024 * 1024},
     {256 * 1024 * 1024 + 2},