Skip to content

Commit

Permalink
[onnxruntime] Update to v1.18.1, redesign portfile (#227)
Browse files Browse the repository at this point in the history
* [onnxruntime] update to v1.18.1
* [onnxruntime] replace vcpkg_deps.cmake to external_deps.cmake
* [onnxruntime] resurrect training feature
* ci: test onnxruntime[training] in azure, circleci
  • Loading branch information
luncliff authored Aug 10, 2024
1 parent e8bc8c4 commit 18acf49
Show file tree
Hide file tree
Showing 17 changed files with 831 additions and 2,572 deletions.
3 changes: 2 additions & 1 deletion .circleci/port-linux.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ grpc[codegen]
flatbuffers
abseil
liblzma
onnxruntime
onnx[disable-static-registration]
onnxruntime[training]
30 changes: 10 additions & 20 deletions ports/onnxruntime/fix-clang-cl-simd-compile.patch
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
diff --git a/cmake/onnxruntime_mlas.cmake b/cmake/onnxruntime_mlas.cmake
index bee83ff07c..04b4cf42b7 100644
index 682dcfc..405c65b 100644
--- a/cmake/onnxruntime_mlas.cmake
+++ b/cmake/onnxruntime_mlas.cmake
@@ -159,15 +159,27 @@ function(setup_mlas_source_for_windows)
@@ -158,15 +158,31 @@ function(setup_mlas_source_for_windows)
)
set_source_files_properties(${mlas_platform_srcs_avx2} PROPERTIES COMPILE_FLAGS "/arch:AVX2")

Expand All @@ -16,6 +16,9 @@ index bee83ff07c..04b4cf42b7 100644
+ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ set_source_files_properties(${mlas_platform_srcs_avx512} PROPERTIES COMPILE_FLAGS "/arch:AVX512")
+ set_source_files_properties(${mlas_platform_srcs_amx} PROPERTIES COMPILE_FLAGS "/arch:AVX512 -mamx-tile -mamx-int8")
+ # https://clang.llvm.org/docs/UsersManual.html#cpu-architectures-features-and-limitations
+ set_source_files_properties(${MLAS_SRC_DIR}/qgemm_kernel_sse.cpp PROPERTIES COMPILE_FLAGS "-march=x86-64")
+ set_source_files_properties(${MLAS_SRC_DIR}/qgemm_kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "-march=x86-64-v2")
+ endif()
+
target_sources(onnxruntime_mlas PRIVATE
Expand All @@ -25,14 +28,15 @@ index bee83ff07c..04b4cf42b7 100644
- ${MLAS_SRC_DIR}/qgemm_kernel_amx.cpp
+ ${mlas_platform_srcs_avx512}
+ ${mlas_platform_srcs_amx}
+ # ...
${MLAS_SRC_DIR}/qgemm_kernel_avx2.cpp
${MLAS_SRC_DIR}/qgemm_kernel_sse.cpp
${MLAS_SRC_DIR}/qgemm_kernel_sse41.cpp
- ${MLAS_SRC_DIR}/intrinsics/avx512/quantize_avx512f.cpp
${MLAS_SRC_DIR}/amd64/QgemmU8S8KernelAmx.asm
${MLAS_SRC_DIR}/amd64/QgemmU8S8KernelAvx2.asm
${MLAS_SRC_DIR}/amd64/QgemmU8U8KernelAvx2.asm
@@ -205,9 +217,15 @@ function(setup_mlas_source_for_windows)
${MLAS_SRC_DIR}/sqnbitgemm_kernel_avx2.cpp
${MLAS_SRC_DIR}/sqnbitgemm_kernel_avx512.cpp
${MLAS_SRC_DIR}/sqnbitgemm_kernel_avx512vnni.cpp
@@ -208,9 +224,15 @@ function(setup_mlas_source_for_windows)
${MLAS_SRC_DIR}/amd64/ErfKernelFma3.asm
)
if (NOT onnxruntime_ORT_MINIMAL_BUILD)
Expand All @@ -48,20 +52,6 @@ index bee83ff07c..04b4cf42b7 100644
+ target_sources(onnxruntime_mlas PRIVATE ${onnxruntime_mlas_q4gemm_avx512})
endif()
else()
target_sources(onnxruntime_mlas PRIVATE
diff --git a/cmake/onnxruntime_mlas.cmake b/cmake/onnxruntime_mlas.cmake
index 5c294b5..a466c77 100644
--- a/cmake/onnxruntime_mlas.cmake
+++ b/cmake/onnxruntime_mlas.cmake
@@ -169,6 +169,9 @@ function(setup_mlas_source_for_windows)
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set_source_files_properties(${mlas_platform_srcs_avx512} PROPERTIES COMPILE_FLAGS "/arch:AVX512")
set_source_files_properties(${mlas_platform_srcs_amx} PROPERTIES COMPILE_FLAGS "/arch:AVX512 -mamx-tile -mamx-int8")
+ # https://clang.llvm.org/docs/UsersManual.html#cpu-architectures-features-and-limitations
+ set_source_files_properties(${MLAS_SRC_DIR}/qgemm_kernel_sse.cpp PROPERTIES COMPILE_FLAGS "-march=x86-64")
+ set_source_files_properties(${MLAS_SRC_DIR}/qgemm_kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "-march=x86-64-v2")
endif()

target_sources(onnxruntime_mlas PRIVATE
diff --git a/onnxruntime/core/mlas/lib/qgemm_kernel_sse41.cpp b/onnxruntime/core/mlas/lib/qgemm_kernel_sse41.cpp
index 68931c5..6c095bd 100644
Expand Down
37 changes: 37 additions & 0 deletions ports/onnxruntime/fix-cmake-cuda.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index f829cea..bda2d4c 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -715,6 +715,13 @@ if (onnxruntime_USE_CUDA)
if (onnxruntime_USE_CUDA_NHWC_OPS)
add_compile_definitions(ENABLE_CUDA_NHWC_OPS)
endif()
+ # Give more hints for the generator, with FindCUDAToolkit.cmake
+ find_package(CUDAToolkit REQUIRED)
+ if(CMAKE_GENERATOR MATCHES "Visual Studio")
+ set(CMAKE_VS_PLATFORM_TOOLSET_CUDA "${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}")
+ set(CMAKE_VS_PLATFORM_TOOLSET_CUDA_CUSTOM_DIR "${CUDAToolkit_TARGET_DIR}/")
+ endif()
+ get_filename_component(CMAKE_CUDA_COMPILER "${CUDAToolkit_NVCC_EXECUTABLE}" ABSOLUTE)
enable_language(CUDA)
message( STATUS "CMAKE_CUDA_COMPILER_VERSION: ${CMAKE_CUDA_COMPILER_VERSION}")

diff --git a/cmake/onnxruntime_providers_cuda.cmake b/cmake/onnxruntime_providers_cuda.cmake
index 1346a9c..fd60dd8 100644
--- a/cmake/onnxruntime_providers_cuda.cmake
+++ b/cmake/onnxruntime_providers_cuda.cmake
@@ -211,8 +211,12 @@
target_link_libraries(${target} PRIVATE CUDA::cuda_driver)
endif()

- include(cutlass)
- target_include_directories(${target} PRIVATE ${cutlass_SOURCE_DIR}/include ${cutlass_SOURCE_DIR}/examples ${cutlass_SOURCE_DIR}/tools/util/include)
+ find_package(NvidiaCutlass REQUIRED)
+ target_link_libraries(${target} PRIVATE nvidia::cutlass::cutlass)
+ if(MSVC)
+ # CUTLASS_CONSTEXPR_IF_CXX17 must be constexpr. Correct the __cplusplus value with MSVC
+ target_compile_options(${target} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /Zc:__cplusplus>)
+ endif()

target_include_directories(${target} PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} ${eigen_INCLUDE_DIRS} ${TVM_INCLUDES}
PUBLIC ${CUDAToolkit_INCLUDE_DIRS})
43 changes: 43 additions & 0 deletions ports/onnxruntime/fix-cmake-training.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
diff --git a/cmake/tensorboard/compat/proto/CMakeLists.txt b/cmake/tensorboard/compat/proto/CMakeLists.txt
index ad31e40..3e80a6e 100644
--- a/cmake/tensorboard/compat/proto/CMakeLists.txt
+++ b/cmake/tensorboard/compat/proto/CMakeLists.txt
@@ -1,14 +1,21 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

-FetchContent_Declare(
- tensorboard
- URL ${DEP_URL_tensorboard}
- URL_HASH SHA1=${DEP_SHA1_tensorboard}
-)
-FetchContent_MakeAvailable(tensorboard)
-
-set(TENSORBOARD_ROOT ${tensorboard_SOURCE_DIR})
+if(onnxruntime_USE_VCPKG)
+ if(NOT DEFINED TENSORBOARD_ROOT)
+ message(FATAL_ERROR "TENSORBOARD_ROOT not defined")
+ endif()
+ find_path(PROTOBUF_IMPORT_DIR NAMES "google/protobuf/api.proto" REQUIRED)
+else()
+ FetchContent_Declare(
+ tensorboard
+ URL ${DEP_URL_tensorboard}
+ URL_HASH SHA1=${DEP_SHA1_tensorboard}
+ )
+ FetchContent_MakeAvailable(tensorboard)
+ set(TENSORBOARD_ROOT ${tensorboard_SOURCE_DIR})
+ set(PROTOBUF_IMPORT_DIR ${protobuf_SOURCE_DIR}/src)
+endif()

# tensorboard protos
file(GLOB_RECURSE tensorboard_proto_srcs CONFIGURE_DEPENDS
@@ -16,7 +23,7 @@ file(GLOB_RECURSE tensorboard_proto_srcs CONFIGURE_DEPENDS
)

add_library(tensorboard STATIC ${tensorboard_proto_srcs})
-onnxruntime_protobuf_generate(APPEND_PATH IMPORT_DIRS ${tensorboard_SOURCE_DIR} ${protobuf_SOURCE_DIR}/src TARGET tensorboard)
+onnxruntime_protobuf_generate(APPEND_PATH IMPORT_DIRS ${TENSORBOARD_ROOT} ${PROTOBUF_IMPORT_DIR} TARGET tensorboard)
onnxruntime_add_include_to_target(tensorboard ${PROTOBUF_LIB})
target_include_directories(tensorboard PRIVATE ${PROJECT_BINARY_DIR})
add_dependencies(tensorboard ${onnxruntime_EXTERNAL_DEPENDENCIES})
72 changes: 11 additions & 61 deletions ports/onnxruntime/fix-cmake.patch
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,15 @@ diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index 8edbb6ad6f..4987d5af5a 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -38,6 +38,7 @@ include(CheckLanguage)
@@ -38,6 +38,8 @@ include(CheckLanguage)
include(CMakeDependentOption)
include(FetchContent)
include(CheckFunctionExists)
+include(GNUInstallDirs) # onnxruntime_providers_* require CMAKE_INSTALL_* variables
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/external")

# TODO: update this once all system adapt c++20
if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
@@ -583,7 +584,7 @@ get_filename_component(ORTTRAINING_ROOT "${ORTTRAINING_ROOT}" ABSOLUTE)
get_filename_component(REPO_ROOT "${REPO_ROOT}" ABSOLUTE)
set(ONNXRUNTIME_INCLUDE_DIR ${REPO_ROOT}/include/onnxruntime)

-include(external/onnxruntime_external_deps.cmake)
+include(external/onnxruntime_vcpkg_deps.cmake)

set(ORT_WARNING_FLAGS)
if (WIN32)
@@ -956,7 +975,7 @@ function(onnxruntime_set_compile_flags target_name)
target_compile_definitions(${target_name} PRIVATE ORT_NEURAL_SPEED)
endif()

- set_target_properties(${target_name} PROPERTIES COMPILE_WARNING_AS_ERROR ON)
+ set_target_properties(${target_name} PROPERTIES COMPILE_WARNING_AS_ERROR OFF)
if (onnxruntime_USE_CUDA)
# Suppress a "conversion_function_not_usable" warning in gsl/span
target_compile_options(${target_name} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcudafe \"--diag_suppress=conversion_function_not_usable\">")
diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake
index e15c8a046d..d3a2e64bb9 100644
--- a/cmake/onnxruntime.cmake
Expand Down Expand Up @@ -66,50 +49,17 @@ index 66df115..bd313d8 100644

if (MSVC)
# The warning means the type of two integral values around a binary operator is narrow than their result.
@@ -959,6 +959,9 @@ target_compile_definitions(onnx_test_data_proto PRIVATE "-DONNX_API=")
diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake
index 2ae07d5..9ee95a7 100644
--- a/cmake/onnxruntime_unittests.cmake
+++ b/cmake/onnxruntime_unittests.cmake
@@ -962,7 +962,8 @@ target_compile_definitions(onnx_test_data_proto PRIVATE "-DONNX_API=")
onnxruntime_add_include_to_target(onnx_test_data_proto onnx_proto)
target_include_directories(onnx_test_data_proto PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
set_target_properties(onnx_test_data_proto PROPERTIES FOLDER "ONNXRuntimeTest")
+if(NOT DEFINED onnx_SOURCE_DIR)
+ find_path(onnx_SOURCE_DIR NAMES "onnx/onnx-ml.proto3" "onnx/onnx-ml.proto" REQUIRED)
+endif()
onnxruntime_protobuf_generate(APPEND_PATH IMPORT_DIRS ${onnx_SOURCE_DIR} TARGET onnx_test_data_proto)
-onnxruntime_protobuf_generate(APPEND_PATH IMPORT_DIRS ${onnx_SOURCE_DIR} TARGET onnx_test_data_proto)
+find_path(ONNX_IMPORT_DIR NAMES onnx/onnx-ml.proto REQUIRED)
+onnxruntime_protobuf_generate(APPEND_PATH IMPORT_DIRS ${ONNX_IMPORT_DIR} TARGET onnx_test_data_proto)

#
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index f829cea..bda2d4c 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -715,6 +715,13 @@ if (onnxruntime_USE_CUDA)
if (onnxruntime_USE_CUDA_NHWC_OPS)
add_compile_definitions(ENABLE_CUDA_NHWC_OPS)
endif()
+ # Give more hints for the generator, with FindCUDAToolkit.cmake
+ find_package(CUDAToolkit REQUIRED)
+ if(CMAKE_GENERATOR MATCHES "Visual Studio")
+ set(CMAKE_VS_PLATFORM_TOOLSET_CUDA "${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}")
+ set(CMAKE_VS_PLATFORM_TOOLSET_CUDA_CUSTOM_DIR "${CUDAToolkit_TARGET_DIR}/")
+ endif()
+ get_filename_component(CMAKE_CUDA_COMPILER "${CUDAToolkit_NVCC_EXECUTABLE}" ABSOLUTE)
enable_language(CUDA)
message( STATUS "CMAKE_CUDA_COMPILER_VERSION: ${CMAKE_CUDA_COMPILER_VERSION}")

diff --git a/cmake/onnxruntime_providers_cuda.cmake b/cmake/onnxruntime_providers_cuda.cmake
index 1346a9c..fd60dd8 100644
--- a/cmake/onnxruntime_providers_cuda.cmake
+++ b/cmake/onnxruntime_providers_cuda.cmake
@@ -211,8 +211,12 @@
target_link_libraries(${target} PRIVATE CUDA::cuda_driver)
endif()

- include(cutlass)
- target_include_directories(${target} PRIVATE ${cutlass_SOURCE_DIR}/include ${cutlass_SOURCE_DIR}/examples ${cutlass_SOURCE_DIR}/tools/util/include)
+ find_package(NvidiaCutlass REQUIRED)
+ target_link_libraries(${target} PRIVATE nvidia::cutlass::cutlass)
+ if(MSVC)
+ # CUTLASS_CONSTEXPR_IF_CXX17 must be constexpr. Correct the __cplusplus value with MSVC
+ target_compile_options(${target} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /Zc:__cplusplus>)
+ endif()

target_include_directories(${target} PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} ${eigen_INCLUDE_DIRS} ${TVM_INCLUDES}
PUBLIC ${CUDAToolkit_INCLUDE_DIRS})
# onnxruntime_ir_graph test data
16 changes: 0 additions & 16 deletions ports/onnxruntime/fix-llvm-rc-unicode.patch

This file was deleted.

Loading

0 comments on commit 18acf49

Please sign in to comment.