RIVOS: initial ci

rivosinc · Feb 21, 2024 · 05a892a · 05a892a
1 parent 3020196
commit 05a892a
Show file tree

Hide file tree

Showing 14 changed files with 258 additions and 21 deletions.
diff --git a/CMake/resolve_dependency_modules/folly/CMakeLists.txt b/CMake/resolve_dependency_modules/folly/CMakeLists.txt
@@ -28,13 +28,18 @@ message(STATUS "Building Folly from source")
 if(gflags_SOURCE STREQUAL "BUNDLED")
   set(glog_patch && git apply ${CMAKE_CURRENT_LIST_DIR}/folly-gflags-glog.patch)
 endif()
+if(VELOX_ENABLE_GPU)
+  set(cudacc_patch && git apply ${CMAKE_CURRENT_LIST_DIR}/folly-cudacc.patch)
+endif()
+
+set(VELOX_FOLLY_PATCH_COMMAND git apply ${CMAKE_CURRENT_LIST_DIR}/folly-no-export.patch
+                                ${glog_patch} ${cudacc_patch})
 
 FetchContent_Declare(
   folly
   URL ${VELOX_FOLLY_SOURCE_URL}
   URL_HASH ${VELOX_FOLLY_BUILD_SHA256_CHECKSUM}
-  PATCH_COMMAND git apply ${CMAKE_CURRENT_LIST_DIR}/folly-no-export.patch
-                ${glog_patch})
+  PATCH_COMMAND ${VELOX_FOLLY_PATCH_COMMAND})
 
 if(ON_APPLE_M1)
   # folly will wrongly assume x86_64 if this is not set

diff --git a/CMake/resolve_dependency_modules/folly/folly-cudacc.patch b/CMake/resolve_dependency_modules/folly/folly-cudacc.patch
@@ -0,0 +1,68 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# Copyright (c) Rivos, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+--- a/folly/Conv.h
++++ b/folly/Conv.h
+@@ -286,8 +286,8 @@ namespace detail {
+ template <class... T>
+ using LastElement = type_pack_element_t<sizeof...(T) - 1, T...>;
+
+-#ifdef _MSC_VER
+-// MSVC can't quite figure out the LastElementImpl::call() stuff
++#if defined(_MSC_VER) || defined(__CUDACC__)
++// MSVC and NVCC can't quite figure out the LastElementImpl::call() stuff
+ // in the base implementation, so we have to use tuples instead,
+ // which result in significantly more templates being compiled,
+ // though the runtime performance is the same.
+--- a/folly/synchronization/RelaxedAtomic.h
++++ b/folly/synchronization/RelaxedAtomic.h
+@@ -98,7 +98,7 @@ struct relaxed_atomic_base : protected std::atomic<T> {
+ };
+
+ template <typename T>
+-struct relaxed_atomic_integral_base : private relaxed_atomic_base<T> {
++struct relaxed_atomic_integral_base : protected relaxed_atomic_base<T> {
+  private:
+   using atomic = std::atomic<T>;
+   using base = relaxed_atomic_base<T>;
+@@ -108,7 +108,9 @@ struct relaxed_atomic_integral_base : private relaxed_atomic_base<T> {
+
+   using base::relaxed_atomic_base;
+   using base::operator=;
++#ifndef __CUDACC__
+   using base::operator T;
++#endif
+   using base::compare_exchange_strong;
+   using base::compare_exchange_weak;
+   using base::exchange;
+@@ -206,7 +208,9 @@ struct relaxed_atomic : detail::relaxed_atomic_base<T> {
+
+   using base::relaxed_atomic_base;
+   using base::operator=;
++#ifndef __CUDACC__
+   using base::operator T;
++#endif
+ };
+
+ template <typename T>
+@@ -220,7 +224,9 @@ struct relaxed_atomic<T*> : detail::relaxed_atomic_base<T*> {
+
+   using detail::relaxed_atomic_base<T*>::relaxed_atomic_base;
+   using base::operator=;
++#ifndef __CUDACC__
+   using base::operator T*;
++#endif
+
+   T* fetch_add(std::ptrdiff_t arg) noexcept {
+     return atomic::fetch_add(arg, std::memory_order_relaxed);
diff --git a/rivos/gitlab-ci.yml b/rivos/gitlab-ci.yml
@@ -0,0 +1,72 @@
+# Copyright (c) 2022 by Rivos Inc.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+
+include:
+  - project: 'rv/it/int/rivos-sdk'
+    ref: rivos/main
+    file: '/packager/gitlab-ci-packaging-helper.yml'
+
+variables:
+  GIT_STRATEGY: clone
+  GIT_SUBMODULE_STRATEGY: recursive
+  RIG_PATH: "/rivos/rig"
+
+.common-build:
+  script:
+    - >
+      apt-get update -qq &&
+      DEBIAN_FRONTEND=noninteractive
+      apt-get install -qq -y
+      bison
+      build-essential
+      flex
+      libboost-atomic-dev
+      libboost-context-dev
+      libboost-date-time-dev
+      libboost-filesystem-dev
+      libboost-program-options-dev
+      libboost-regex-dev
+      libboost-system-dev
+      libboost-thread-dev
+      libdouble-conversion-dev
+      libevent-dev
+      libgmock-dev
+      liblzo2-dev
+      tzdata
+      ninja-build
+      cmake
+      git
+      libssl-dev
+      curl
+      libz-dev
+      liblz4-dev
+      libsnappy-dev
+      libzstd-dev
+    - >
+      cmake -S . -B build
+      -GNinja
+      -DCMAKE_BUILD_TYPE=Release
+      -DCMAKE_INSTALL_PREFIX=install
+      -DCMAKE_POSITION_INDEPENDENT_CODE=ON
+      -DVELOX_BUILD_MINIMAL=OFF
+      -DVELOX_BUILD_TESTING=ON
+      -DVELOX_ENABLE_BENCHMARKS=ON
+      -DVELOX_ENABLE_GPU=ON
+      -DCMAKE_CUDA_ARCHITECTURES="90"
+    - ninja -C build install
+
+nvidia-build:
+  stage: build
+  image: nvidia/cuda:12.2.2-devel-ubuntu22.04
+  tags: ["nvidia-a30"]
+  dependencies: ["prepare-version"]
+  script:
+    - !reference [.common-build, script]
+  artifacts:
+    when: always
+    paths:
+      - build/Testing
+      - install
+    reports:
+      junit: build/Testing/*.xml
diff --git a/scripts/setup-helper-functions.sh b/scripts/setup-helper-functions.sh
@@ -99,9 +99,9 @@ function get_cxx_flags {
         CPU_ARCH="arm64"
       fi
 
-    # On MacOs prevent the flood of translation visibility settings warnings.
-    ADDITIONAL_FLAGS="-fvisibility=hidden -fvisibility-inlines-hidden"
-    else [ "$OS" = "Linux" ];
+      # On MacOs prevent the flood of translation visibility settings warnings.
+      ADDITIONAL_FLAGS="-fvisibility=hidden -fvisibility-inlines-hidden"
+    elif [ "$OS" = "Linux" ]; then
 
       local CPU_CAPABILITIES
       CPU_CAPABILITIES=$(cat /proc/cpuinfo | grep flags | head -n 1| awk '{print tolower($0)}')
@@ -133,8 +133,11 @@ function get_cxx_flags {
     "aarch64")
       echo -n "-mcpu=neoverse-n1 -std=c++17 $ADDITIONAL_FLAGS"
     ;;
-  *)
-    echo -n "Architecture not supported!"
+
+    *)
+      echo "Architecture not supported: CPU_ARCH=$CPU_ARCH" 1>&2
+      exit 1
+    ;;
   esac
 
 }

diff --git a/velox/core/PlanNode.h b/velox/core/PlanNode.h
@@ -2404,3 +2404,11 @@ struct fmt::formatter<facebook::velox::core::JoinType> : formatter<int> {
     return formatter<int>::format(static_cast<int>(s), ctx);
   }
 };
+
+template <>
+struct fmt::formatter<facebook::velox::core::AggregationNode::Step> : formatter<std::string> {
+  auto format(facebook::velox::core::AggregationNode::Step s, format_context& ctx) {
+    return formatter<std::string>::format(
+        facebook::velox::core::mapAggregationStepToName(s), ctx);
+  }
+};
diff --git a/velox/experimental/gpu/tests/CMakeLists.txt b/velox/experimental/gpu/tests/CMakeLists.txt
@@ -14,5 +14,3 @@
 
 add_executable(velox_gpu_hash_table_test HashTableTest.cu)
 target_link_libraries(velox_gpu_hash_table_test Folly::folly gflags::gflags)
-set_target_properties(velox_gpu_hash_table_test PROPERTIES CUDA_ARCHITECTURES
-                                                           native)
diff --git a/velox/experimental/wave/common/CMakeLists.txt b/velox/experimental/wave/common/CMakeLists.txt
@@ -15,8 +15,6 @@
 add_library(velox_wave_common GpuArena.cpp Buffer.cpp Cuda.cu Exception.cpp
                               Type.cpp)
 
-set_target_properties(velox_wave_common PROPERTIES CUDA_ARCHITECTURES native)
-
 target_link_libraries(velox_wave_common velox_exception velox_common_base
                       velox_type)
 

diff --git a/velox/experimental/wave/common/Type.cpp b/velox/experimental/wave/common/Type.cpp
@@ -50,4 +50,33 @@ PhysicalType fromCpuType(const Type& type) {
   return ans;
 }
 
+std::string PhysicalType::kindString(Kind kind) {
+  switch (kind) {
+    case kInt8:
+      return "Int8";
+    case kInt16:
+      return "Int16";
+    case kInt32:
+      return "Int32";
+    case kInt64:
+      return "Int64";
+    case kInt128:
+      return "Int128";
+    case kFloat32:
+      return "Float32";
+    case kFloat64:
+      return "Float64";
+    case kString:
+      return "String";
+    case kArray:
+      return "Array";
+    case kMap:
+      return "Map";
+    case kRow:
+      return "Row";
+  }
+
+  VELOX_UNREACHABLE();
+}
+
 } // namespace facebook::velox::wave
diff --git a/velox/experimental/wave/common/Type.h b/velox/experimental/wave/common/Type.h
@@ -17,6 +17,11 @@
 #pragma once
 
 #include <cstdint>
+#include <string>
+#include <fmt/format.h>
+#if FMT_VERSION >= 100100
+#include <fmt/std.h>
+#endif
 
 namespace facebook::velox {
 class Type;
@@ -40,8 +45,21 @@ struct PhysicalType {
   } kind;
   int32_t numChildren;
   PhysicalType** children;
+
+  static std::string kindString(Kind kind);
 };
 
 PhysicalType fromCpuType(const Type&);
 
 } // namespace facebook::velox::wave
+
+template <>
+struct fmt::formatter<facebook::velox::wave::PhysicalType::Kind>
+    : formatter<std::string> {
+  auto format(
+      facebook::velox::wave::PhysicalType::Kind s,
+      format_context& ctx) {
+    return formatter<std::string>::format(
+        facebook::velox::wave::PhysicalType::kindString(s), ctx);
+  }
+};
diff --git a/velox/experimental/wave/common/tests/CMakeLists.txt b/velox/experimental/wave/common/tests/CMakeLists.txt
@@ -15,9 +15,6 @@
 add_executable(velox_wave_common_test GpuArenaTest.cpp CudaTest.cpp CudaTest.cu
                                       BlockTest.cpp BlockTest.cu)
 
-set_target_properties(velox_wave_common_test PROPERTIES CUDA_ARCHITECTURES
-                                                        native)
-
 add_test(velox_wave_common_test velox_wave_common_test)
 
 target_link_libraries(

diff --git a/velox/experimental/wave/exec/CMakeLists.txt b/velox/experimental/wave/exec/CMakeLists.txt
@@ -26,8 +26,6 @@ add_library(
   Wave.cpp
   Project.cpp)
 
-set_target_properties(velox_wave_exec PROPERTIES CUDA_ARCHITECTURES native)
-
 target_link_libraries(velox_wave_exec velox_wave_vector velox_wave_common
                       velox_exception velox_common_base velox_exec)
 

diff --git a/velox/experimental/wave/exec/ExprKernel.h b/velox/experimental/wave/exec/ExprKernel.h
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <cstdint>
+#include "velox/common/base/Exceptions.h"
 #include "velox/experimental/wave/common/Cuda.h"
 #include "velox/experimental/wave/exec/ErrorCode.h"
 #include "velox/experimental/wave/vector/Operand.h"
@@ -62,6 +63,45 @@ enum class OpCode {
 
 };
 
+inline std::ostream& operator<<(
+    std::ostream& out,
+    const OpCode& opcode) {
+  switch (opcode) {
+    case OpCode::kFilter:
+      return out << "Filter";
+    case OpCode::kWrap:
+      return out << "Wrap";
+    case OpCode::kPlus:
+      return out << "Plus";
+    case OpCode::kMinus:
+      return out << "Minus";
+    case OpCode::kTimes:
+      return out << "Times";
+    case OpCode::kDivide:
+      return out << "Divide";
+    case OpCode::kEquals:
+      return out << "Equals";
+    case OpCode::kLT:
+      return out << "LT";
+    case OpCode::kLTE:
+      return out << "LTE";
+    case OpCode::kGT:
+      return out << "GT";
+    case OpCode::kGTE:
+      return out << "GTE";
+    case OpCode::kNE:
+      return out << "NE";
+  }
+
+  VELOX_UNREACHABLE();
+}
+
+inline std::string mapOpCodeToName(const OpCode& opcode) {
+  std::stringstream ss;
+  ss << opcode;
+  return ss.str();
+}
+
 #define OP_MIX(op, t) \
   static_cast<OpCode>(static_cast<int32_t>(t) + 8 * static_cast<int32_t>(op))
 
@@ -151,3 +191,11 @@ class WaveKernelStream : public Stream {
 };
 
 } // namespace facebook::velox::wave
+
+template <>
+struct fmt::formatter<facebook::velox::wave::OpCode> : formatter<std::string> {
+  auto format(facebook::velox::wave::OpCode o, format_context& ctx) {
+    return formatter<std::string>::format(
+        facebook::velox::wave::mapOpCodeToName(o), ctx);
+  }
+};
diff --git a/velox/experimental/wave/exec/tests/CMakeLists.txt b/velox/experimental/wave/exec/tests/CMakeLists.txt
@@ -14,8 +14,6 @@
 
 add_executable(velox_wave_exec_test FilterProjectTest.cpp Main.cpp)
 
-set_target_properties(velox_wave_exec_test PROPERTIES CUDA_ARCHITECTURES native)
-
 add_test(velox_wave_exec_test velox_wave_exec_test)
 
 target_link_libraries(

diff --git a/velox/experimental/wave/vector/tests/CMakeLists.txt b/velox/experimental/wave/vector/tests/CMakeLists.txt
@@ -14,9 +14,6 @@
 
 add_executable(velox_wave_vector_test VectorTest.cpp)
 
-set_target_properties(velox_wave_vector_test PROPERTIES CUDA_ARCHITECTURES
-                                                        native)
-
 add_test(veloxwave__vector_test velox_wave_vector_test)
 
 target_link_libraries(