From 5b96e90463b70f06b794c195bef9eea04b10a558 Mon Sep 17 00:00:00 2001 From: Stella Laurenzo Date: Sun, 17 Mar 2024 15:01:14 -0700 Subject: [PATCH 1/2] Enable LTO optimization by default for runtime releases. This is done by generalizing the primordial `IREE_SIZE_OPTIMIZED` flag into a `IREE_RUNTIME_OPTIMIZATION_PROFILE` that: * Can enable 'lto' or 'size'. * Is scoped to just the runtime targets. * Minimally does the right thing for 'size' on Linux vs just on Windows (not the goal of this patch but drops ~300KB from binary sizes when enabled). The compile time delta for a clean build of the runtime in full LTO vs regular mode was not measured precisely but is in the noise (i.e. <1m). As such, just enabling by default for Python release binaries. Others can be enabled via: `-DIREE_RUNTIME_OPTIMIZATION_PROFILE=lto`, which is recommended for benchmarking, etc. Progress on #898. --- CMakeLists.txt | 5 + build_tools/cmake/build_runtime.sh | 2 + build_tools/cmake/build_runtime_small.sh | 2 + build_tools/cmake/external_cc_library.cmake | 2 + build_tools/cmake/iree_cc_binary.cmake | 1 + build_tools/cmake/iree_cc_library.cmake | 2 + build_tools/cmake/iree_copts.cmake | 137 ++++++++++++++----- build_tools/cmake/iree_setup_toolchain.cmake | 43 ++++++ runtime/CMakeLists.txt | 16 +++ runtime/setup.py | 5 + 10 files changed, 177 insertions(+), 38 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7603dd57789d..6bd7261bb6cb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -123,6 +123,11 @@ option(IREE_COMPILER_BUILD_SHARED_LIBS "Enables BUILD_SHARED_LIBS CMake mode for # CI coverage is established. option(BUILD_SHARED_LIBS "Instructs CMake to build libraries as shared if possible" OFF) +# Control of LTO settings for the runtime build. +set(IREE_RUNTIME_OPTIMIZATION_PROFILE "" CACHE STRING + "Build optimization profile to apply. One of '', 'lto', 'size'.") +set(IREE_LTO_MODE "full" CACHE STRING "LTO type, 'thin' or 'full'. Only consulted on clang-like compilers.") + #------------------------------------------------------------------------------- # IREE command-line tooling configuration #------------------------------------------------------------------------------- diff --git a/build_tools/cmake/build_runtime.sh b/build_tools/cmake/build_runtime.sh index 7db354201eb7..4fb41fff0c5e 100755 --- a/build_tools/cmake/build_runtime.sh +++ b/build_tools/cmake/build_runtime.sh @@ -28,6 +28,8 @@ args=( "-DPython3_EXECUTABLE=${IREE_PYTHON3_EXECUTABLE}" "-DPYTHON_EXECUTABLE=${IREE_PYTHON3_EXECUTABLE}" "-DCMAKE_BUILD_TYPE=RelWithDebInfo" + "-DIREE_RUNTIME_OPTIMIZATION_PROFILE=lto" + "-DIREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX=ON" "-DIREE_BUILD_COMPILER=OFF" ) diff --git a/build_tools/cmake/build_runtime_small.sh b/build_tools/cmake/build_runtime_small.sh index 88cb2c98ca1c..eb456e2aff26 100755 --- a/build_tools/cmake/build_runtime_small.sh +++ b/build_tools/cmake/build_runtime_small.sh @@ -22,5 +22,7 @@ source build_tools/cmake/setup_build.sh -DPYTHON_EXECUTABLE="${IREE_PYTHON3_EXECUTABLE}" \ -DCMAKE_BUILD_TYPE=MinSizeRel \ -DIREE_SIZE_OPTIMIZED=ON \ + -DIREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX=size \ + -DIREE_FORCE_GCC_BINUTILS_ON_LINUX=ON \ -DIREE_BUILD_COMPILER=OFF "${CMAKE_BIN?}" --build "${BUILD_DIR}" -- -k 0 diff --git a/build_tools/cmake/external_cc_library.cmake b/build_tools/cmake/external_cc_library.cmake index 02ae7ffe50fa..a49ca65a9e11 100644 --- a/build_tools/cmake/external_cc_library.cmake +++ b/build_tools/cmake/external_cc_library.cmake @@ -134,6 +134,8 @@ function(external_cc_library) PRIVATE ${_RULE_COPTS} ${IREE_DEFAULT_COPTS} + INTERFACE + ${IREE_INTERFACE_COPTS} ) target_link_options(${_NAME} PRIVATE diff --git a/build_tools/cmake/iree_cc_binary.cmake b/build_tools/cmake/iree_cc_binary.cmake index 2e8af2f44da8..c1b6025d033b 100644 --- a/build_tools/cmake/iree_cc_binary.cmake +++ b/build_tools/cmake/iree_cc_binary.cmake @@ -124,6 +124,7 @@ function(iree_cc_binary) target_compile_options(${_NAME} PRIVATE ${IREE_DEFAULT_COPTS} + ${IREE_INTERFACE_COPTS} ${_RULE_COPTS} ) target_link_options(${_NAME} diff --git a/build_tools/cmake/iree_cc_library.cmake b/build_tools/cmake/iree_cc_library.cmake index a9cc1e9b0756..980f8ff0b7a7 100644 --- a/build_tools/cmake/iree_cc_library.cmake +++ b/build_tools/cmake/iree_cc_library.cmake @@ -230,6 +230,8 @@ function(iree_cc_library) PRIVATE ${IREE_DEFAULT_COPTS} ${_RULE_COPTS} + INTERFACE + ${IREE_INTERFACE_COPTS} ) target_link_options(${_NAME} PRIVATE diff --git a/build_tools/cmake/iree_copts.cmake b/build_tools/cmake/iree_copts.cmake index 10342b4bf9df..0f792e1bf7a2 100644 --- a/build_tools/cmake/iree_copts.cmake +++ b/build_tools/cmake/iree_copts.cmake @@ -415,48 +415,109 @@ if(EMSCRIPTEN AND IREE_EXTERNAL_WEBGPU_HAL_DRIVER_FOUND) endif() #------------------------------------------------------------------------------- -# Size-optimized build flags +# Flag sets used different optimization profiles. #------------------------------------------------------------------------------- -# TODO(#898): add a dedicated size-constrained configuration. -if(IREE_SIZE_OPTIMIZED) - iree_select_compiler_opts(IREE_SIZE_OPTIMIZED_DEFAULT_COPTS - MSVC_OR_CLANG_CL - "/GS-" - "/GL" - "/Gw" - "/Gy" - "/DNDEBUG" - "/Os" - "/Oy" - "/Zi" - "/c" - ) - iree_select_compiler_opts(IREE_SIZE_OPTIMIZED_DEFAULT_LINKOPTS - MSVC_OR_CLANG_CL - "-DEBUG:FULL" - "-LTCG" - "-opt:ref,icf" - ) - # TODO(#898): make this only impact the runtime (IREE_RUNTIME_DEFAULT_...). - # These flags come from iree/base/config.h: - set(IREE_DEFAULT_COPTS - "${IREE_DEFAULT_COPTS}" - "${IREE_SIZE_OPTIMIZED_DEFAULT_COPTS}" - "-DIREE_STATUS_MODE=0" - "-DIREE_STATISTICS_ENABLE=0" - "-DIREE_HAL_MODULE_STRING_UTIL_ENABLE=0" - "-DIREE_HAL_COMMAND_BUFFER_VALIDATION_ENABLE=0" - "-DIREE_VM_BACKTRACE_ENABLE=0" - "-DIREE_VM_BYTECODE_VERIFICATION_ENABLE=0" - "-DIREE_VM_EXT_F32_ENABLE=0" - "-DIREE_VM_EXT_F64_ENABLE=0" +iree_select_compiler_opts(IREE_LTO_COPTS + CLANG + "-flto=${IREE_LTO_MODE}" + GCC + "-flto" + "-fuse-linker-plugin" + MSVC_OR_CLANG_CL + "/GL" +) + +iree_select_compiler_opts(IREE_LTO_LINKOPTS + CLANG + "-flto=${IREE_LTO_MODE}" + GCC + "-flto" + MSVC_OR_CLANG_CL + "-LTCG" +) + +iree_select_compiler_opts(IREE_SIZE_OPTIMIZED_DEFAULT_COPTS + MSVC_OR_CLANG_CL + "/GS-" + "/Gw" + "/Gy" + "/DNDEBUG" + "/Os" + "/Oy" + "/Zi" + "/c" +) +iree_select_compiler_opts(IREE_SIZE_OPTIMIZED_DEFAULT_LINKOPTS + MSVC_OR_CLANG_CL + "-DEBUG:FULL" + "-opt:ref,icf" +) + +# Function which enables various optimization options for a sub-tree by +# modifying the IREE_DEFAULT_COPTS and IREE_DEFAULT_LINKOPTS that targets +# created after this point use. +# +# Available profiles: +# "lto": Applies options to enable link time code generation. +# "size": Applies a variety of options to minimize the size of the runtime, +# generally at the expense of features but not performance. This implies +# LTO. +# +# Parameters: +# PROFILE_NAME: Name of a supported profile or falsey for none. +# SIZE_INTERFACE_COPTS: Additional IREE_INTERFACE_COPTS to add for the +# "size" profile. +function(iree_enable_optimization_options) + cmake_parse_arguments( + _RULE + "" + "PROFILE_NAME" + "SIZE_INTERFACE_COPTS" + ${ARGN} ) - set(IREE_DEFAULT_LINKOPTS - "${IREE_DEFAULT_LINKOPTS}" - "${IREE_SIZE_OPTIMIZED_DEFAULT_LINKOPTS}" + + if(NOT _RULE_PROFILE_NAME) + # Do nothing. + return() + endif() + + set(_ADDL_COPTS) + set(_ADDL_INTERFACE_COPTS) + set(_ADDL_LINKOPTS) + + if(_RULE_PROFILE_NAME STREQUAL "lto") + set(_ADDL_COPTS ${IREE_LTO_COPTS}) + set(_ADDL_LINKOPTS ${IREE_LTO_LINKOPTS}) + elseif(_RULE_PROFILE_NAME STREQUAL "size") + # Size optimized assumes LTO. + # Size optimized often also elides logging and various status reporting, + # which can result in unused-but-set-variable style warnings. Disable those. + iree_select_compiler_opts(_ADDL_COPTS + ALL + ${IREE_LTO_COPTS} + ${IREE_SIZE_OPTIMIZED_DEFAULT_COPTS} + CLANG_OR_GCC + -Wno-unused-but-set-variable + ) + set(_ADDL_INTERFACE_COPTS "${_RULE_SIZE_INTERFACE_COPTS}") + set(_ADDL_LINKOPTS + ${IREE_LTO_LINKOPTS} + ${IREE_SIZE_OPTIMIZED_DEFAULT_LINKOPTS} + ) + else() + message(FATAL_ERROR "Unrecognized size optimization profile name '${_RULE_PROFILE_NAME}'. Expected one of 'lto', 'size'") + endif() + + message(STATUS "Enabled optimization profile '${_RULE_PROFILE_NAME}' for targets under ${CMAKE_CURRENT_SOURCE_DIR}: \n" + " COPTS: ${_ADDL_COPTS}\n" + " INTERFACE COPTS: ${_ADDL_INTERFACE_COPTS}\n" + " LINKOPTS: ${_ADDL_LINKOPTS}" ) -endif() + set(IREE_DEFAULT_COPTS "${IREE_DEFAULT_COPTS};${_ADDL_COPTS}" PARENT_SCOPE) + set(IREE_INTERFACE_COPTS "${IREE_INTERFACE_COPTS};${_ADDL_INTERFACE_COPTS}" PARENT_SCOPE) + set(IREE_DEFAULT_LINKOPTS "${IREE_DEFAULT_LINKOPTS};${_ADDL_LINKOPTS}" PARENT_SCOPE) +endfunction() #------------------------------------------------------------------------------- # Compiler: Clang/LLVM diff --git a/build_tools/cmake/iree_setup_toolchain.cmake b/build_tools/cmake/iree_setup_toolchain.cmake index d046dddc4927..a38aef0b2258 100644 --- a/build_tools/cmake/iree_setup_toolchain.cmake +++ b/build_tools/cmake/iree_setup_toolchain.cmake @@ -22,6 +22,49 @@ endfunction() # explicitly or through global properties. Please don't add to it without # a very good reason. macro(iree_setup_toolchain) + #------------------------------------------------------------------------------- + # Force LTO compatible tools. + #------------------------------------------------------------------------------- + + # On older (i.e. gcc 9.x era) systems, the compiler and system toolchains + # were not compatible for general LTO use, and they were further not + # compatible amongst themselves. + # As an aid to CIs, we provide an option which will force toolchain specific + # binutils and linkers only if running on Linux. This lets us use the same + # runtime build scripts across platforms without further shenanigans. + # This is a hack and should be rolled back once 2020 era systems are not in + # use. + # Users should not use this. If they have such an old system, configure CMake + # to use toolchain specific tools. + option(IREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX "Forces use of toolchain specific LTO compatible binutils if on Linux" OFF) + mark_as_advanced(IREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX) + if(IREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX AND CMAKE_SYSTEM_NAME STREQUAL "Linux") + if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + message(STATUS "Running on an old Linux with -DIREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX: Forcing llvm-ar, llvm-nm, llvm-ranlib, and ld.lld") + find_program(IREE_CMAKE_LTO_AR llvm-ar REQUIRED) + find_program(IREE_CMAKE_LTO_RANLIB llvm-ranlib REQUIRED) + find_program(IREE_CMAKE_LTO_NM llvm-nm REQUIRED) + set(IREE_USE_LINKER "lld") + elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + message(STATUS "Running on an old Linux with -DIREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX: Forcing gcc-ar, gcc-nm, gcc-ranlib, and ld.gold") + find_program(IREE_CMAKE_LTO_AR gcc-ar REQUIRED) + find_program(IREE_CMAKE_LTO_RANLIB gcc-ranlib REQUIRED) + find_program(IREE_CMAKE_LTO_NM gcc-nm REQUIRED) + set(IREE_USE_LINKER "gold") + endif() + + set(IREE_ENABLE_LLD OFF) + find_program(IREE_CMAKE_LTO_LD ld.${IREE_USE_LINKER} REQUIRED) + mark_as_advanced(IREE_CMAKE_LTO_AR IREE_CMAKE_LTO_RANLIB IREE_CMAKE_LTO_NM IREE_CMAKE_LTO_LD) + + set(CMAKE_AR ${IREE_CMAKE_LTO_AR} CACHE FILEPATH "Forcing LTO ar instead of ar" FORCE) + set(CMAKE_AR ${IREE_CMAKE_LTO_AR}) + set(CMAKE_NM ${IREE_CMAKE_LTO_NM} CACHE FILEPATH "Forcing LTO nm instead of nm" FORCE) + set(CMAKE_NM ${IREE_CMAKE_LTO_NM}) + set(CMAKE_RANLIB ${IREE_CMAKE_LTO_RANLIB} CACHE FILEPATH "Forcing LTO ranlib instead of ranlib" FORCE) + set(CMAKE_RANLIB ${IREE_CMAKE_LTO_RANLIB}) + endif() + #----------------------------------------------------------------------------- # Supports dynamic library loading. #----------------------------------------------------------------------------- diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index 8ee250b1f217..4d48d72e4bc6 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -4,6 +4,22 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +iree_enable_optimization_options( + PROFILE_NAME + "${IREE_RUNTIME_OPTIMIZATION_PROFILE}" + # TODO: These options should be separated between those required as + # INTERFACE and those that can be private (i.e. to the runtime). + SIZE_INTERFACE_COPTS + "-DIREE_STATUS_MODE=0" + "-DIREE_STATISTICS_ENABLE=0" + "-DIREE_HAL_MODULE_STRING_UTIL_ENABLE=0" + "-DIREE_HAL_COMMAND_BUFFER_VALIDATION_ENABLE=0" + "-DIREE_VM_BACKTRACE_ENABLE=0" + "-DIREE_VM_BYTECODE_VERIFICATION_ENABLE=0" + "-DIREE_VM_EXT_F32_ENABLE=0" + "-DIREE_VM_EXT_F64_ENABLE=0" +) + # Must include runtime plugins before processing the runtime sources so that # the static link list can be set. iree_include_cmake_plugin_dirs( diff --git a/runtime/setup.py b/runtime/setup.py index cf77f43bfae0..cbaee8e42194 100644 --- a/runtime/setup.py +++ b/runtime/setup.py @@ -72,6 +72,10 @@ def combine_dicts(*ds): "*** Tracy tools not enabled (enable with IREE_RUNTIME_BUILD_TRACY_TOOLS=ON)", file=sys.stderr, ) +# Default to LTO builds for our python releases. +IREE_RUNTIME_OPTIMIZATION_PROFILE = os.getenv( + "IREE_RUNTIME_OPTIMIZATION_PROFILE", "lto" +) def check_pip_version(): @@ -264,6 +268,7 @@ def build_configuration(cmake_build_dir, cmake_install_dir, extra_cmake_args=()) cmake_args = [ "-GNinja", "--log-level=VERBOSE", + f"-DIREE_RUNTIME_OPTIMIZATION_PROFILE={IREE_RUNTIME_OPTIMIZATION_PROFILE}", "-DIREE_BUILD_PYTHON_BINDINGS=ON", "-DIREE_BUILD_COMPILER=OFF", "-DIREE_BUILD_SAMPLES=OFF", From e6df8e6c511862d1c9a9162e90453bbeb24fe73f Mon Sep 17 00:00:00 2001 From: Scott Todd Date: Mon, 18 Mar 2024 10:18:35 -0700 Subject: [PATCH 2/2] Update XFAIL sets in ONNX test suite. --- .../external_test_suite/config_cpu_llvm_sync.json | 5 ++++- .../external_test_suite/config_gpu_vulkan.json | 10 ++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/experimental/regression_suite/external_test_suite/config_cpu_llvm_sync.json b/experimental/regression_suite/external_test_suite/config_cpu_llvm_sync.json index 946c1526961b..d578270d95ae 100644 --- a/experimental/regression_suite/external_test_suite/config_cpu_llvm_sync.json +++ b/experimental/regression_suite/external_test_suite/config_cpu_llvm_sync.json @@ -794,9 +794,11 @@ "test_clip_default_int8_min", "test_clip_default_int8_min_expanded", "test_constant_pad", + "test_constantofshape_float_ones", "test_constantofshape_int_shape_zero", "test_constantofshape_int_zeros", "test_div_uint8", + "test_dropout_default_mask_ratio", "test_elu_default", "test_gather_0", "test_gather_1", @@ -834,13 +836,14 @@ "test_pow_types_float32_uint64", "test_qlinearconv", "test_qlinearmatmul_2D_int8_float16", + "test_qlinearmatmul_2D_int8_float32", "test_qlinearmatmul_3D_int8_float16", "test_qlinearmatmul_3D_int8_float32", "test_qlinearmatmul_3D_uint8_float16", - "test_qlinearmatmul_2D_int8_float32", "test_qlinearmatmul_3D_uint8_float32", "test_quantizelinear", "test_range_int32_type_negative_delta", + "test_reduce_min_empty_set", "test_scatter_elements_with_negative_indices", "test_selu_default", "test_shape", diff --git a/experimental/regression_suite/external_test_suite/config_gpu_vulkan.json b/experimental/regression_suite/external_test_suite/config_gpu_vulkan.json index e0ceda7a2b80..2b21bef48fe5 100644 --- a/experimental/regression_suite/external_test_suite/config_gpu_vulkan.json +++ b/experimental/regression_suite/external_test_suite/config_gpu_vulkan.json @@ -795,15 +795,17 @@ "test_castlike_FLOAT_to_BFLOAT16_expanded", "test_castlike_FLOAT_to_DOUBLE", "test_castlike_FLOAT_to_DOUBLE_expanded", - "test_clip_default_int8_min", - "test_clip_default_int8_min_expanded", "test_clip_default_int8_inbounds", "test_clip_default_int8_max", "test_clip_default_int8_max_expanded", + "test_clip_default_int8_min", + "test_clip_default_int8_min_expanded", "test_constant_pad", + "test_constantofshape_float_ones", "test_constantofshape_int_shape_zero", "test_constantofshape_int_zeros", "test_div_uint8", + "test_dropout_default_mask_ratio", "test_elu_default", "test_gather_0", "test_gather_1", @@ -840,12 +842,12 @@ "test_pow_types_float32_uint32", "test_pow_types_float32_uint64", "test_qlinearconv", + "test_qlinearmatmul_2D_int8_float16", + "test_qlinearmatmul_2D_int8_float32", "test_qlinearmatmul_3D_int8_float16", "test_qlinearmatmul_3D_int8_float32", "test_qlinearmatmul_3D_uint8_float16", "test_qlinearmatmul_3D_uint8_float32", - "test_qlinearmatmul_2D_int8_float16", - "test_qlinearmatmul_2D_int8_float32", "test_quantizelinear", "test_range_int32_type_negative_delta", "test_scatter_elements_with_negative_indices",