Skip to content

Commit

Permalink
Enable LTO optimization by default for runtime releases. (#16811)
Browse files Browse the repository at this point in the history
This is done by generalizing the primordial `IREE_SIZE_OPTIMIZED` flag
into a `IREE_RUNTIME_OPTIMIZATION_PROFILE` that:

* Can enable 'lto' or 'size'.
* Is scoped to just the runtime targets.
* Minimally does the right thing for 'size' on Linux vs just on Windows
(not the goal of this patch but drops ~300KB from binary sizes when
enabled).

The compile time delta for a clean build of the runtime in full LTO vs
regular mode was not measured precisely but is in the noise (i.e. <1m).
As such, just enabling by default for Python release binaries.

Others can be enabled via: `-DIREE_RUNTIME_OPTIMIZATION_PROFILE=lto`,
which is recommended for benchmarking, etc.

Note that this removes the use of the CMake option
`IREE_SIZE_OPTIMIZED`. It was never even declared properly as an option
and didn't do the same class of thing across Windows/Linux. This has
been fixed and it can be enabled via
`-DIREE_RUNTIME_OPTIMIZATION_PROFILE=size`. Note that as on Windows,
this implies LTO. If old behavior without LTO is desired, we can add a
profile for that.

Progress on #898.

---------

Co-authored-by: Scott Todd <[email protected]>
  • Loading branch information
stellaraccident and ScottTodd committed Mar 18, 2024
1 parent ee32fc7 commit b61a918
Show file tree
Hide file tree
Showing 12 changed files with 187 additions and 43 deletions.
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,11 @@ option(IREE_COMPILER_BUILD_SHARED_LIBS "Enables BUILD_SHARED_LIBS CMake mode for
# CI coverage is established.
option(BUILD_SHARED_LIBS "Instructs CMake to build libraries as shared if possible" OFF)

# Control of LTO settings for the runtime build.
set(IREE_RUNTIME_OPTIMIZATION_PROFILE "" CACHE STRING
"Build optimization profile to apply. One of '', 'lto', 'size'.")
set(IREE_LTO_MODE "full" CACHE STRING "LTO type, 'thin' or 'full'. Only consulted on clang-like compilers.")

#-------------------------------------------------------------------------------
# IREE command-line tooling configuration
#-------------------------------------------------------------------------------
Expand Down
2 changes: 2 additions & 0 deletions build_tools/cmake/build_runtime.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ args=(
"-DPython3_EXECUTABLE=${IREE_PYTHON3_EXECUTABLE}"
"-DPYTHON_EXECUTABLE=${IREE_PYTHON3_EXECUTABLE}"
"-DCMAKE_BUILD_TYPE=RelWithDebInfo"
"-DIREE_RUNTIME_OPTIMIZATION_PROFILE=lto"
"-DIREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX=ON"
"-DIREE_BUILD_COMPILER=OFF"
)

Expand Down
2 changes: 2 additions & 0 deletions build_tools/cmake/build_runtime_small.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,7 @@ source build_tools/cmake/setup_build.sh
-DPYTHON_EXECUTABLE="${IREE_PYTHON3_EXECUTABLE}" \
-DCMAKE_BUILD_TYPE=MinSizeRel \
-DIREE_SIZE_OPTIMIZED=ON \
-DIREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX=size \
-DIREE_FORCE_GCC_BINUTILS_ON_LINUX=ON \
-DIREE_BUILD_COMPILER=OFF
"${CMAKE_BIN?}" --build "${BUILD_DIR}" -- -k 0
2 changes: 2 additions & 0 deletions build_tools/cmake/external_cc_library.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ function(external_cc_library)
PRIVATE
${_RULE_COPTS}
${IREE_DEFAULT_COPTS}
INTERFACE
${IREE_INTERFACE_COPTS}
)
target_link_options(${_NAME}
PRIVATE
Expand Down
1 change: 1 addition & 0 deletions build_tools/cmake/iree_cc_binary.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ function(iree_cc_binary)
target_compile_options(${_NAME}
PRIVATE
${IREE_DEFAULT_COPTS}
${IREE_INTERFACE_COPTS}
${_RULE_COPTS}
)
target_link_options(${_NAME}
Expand Down
2 changes: 2 additions & 0 deletions build_tools/cmake/iree_cc_library.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,8 @@ function(iree_cc_library)
PRIVATE
${IREE_DEFAULT_COPTS}
${_RULE_COPTS}
INTERFACE
${IREE_INTERFACE_COPTS}
)
target_link_options(${_NAME}
PRIVATE
Expand Down
137 changes: 99 additions & 38 deletions build_tools/cmake/iree_copts.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -415,48 +415,109 @@ if(EMSCRIPTEN AND IREE_EXTERNAL_WEBGPU_HAL_DRIVER_FOUND)
endif()

#-------------------------------------------------------------------------------
# Size-optimized build flags
# Flag sets used different optimization profiles.
#-------------------------------------------------------------------------------

# TODO(#898): add a dedicated size-constrained configuration.
if(IREE_SIZE_OPTIMIZED)
iree_select_compiler_opts(IREE_SIZE_OPTIMIZED_DEFAULT_COPTS
MSVC_OR_CLANG_CL
"/GS-"
"/GL"
"/Gw"
"/Gy"
"/DNDEBUG"
"/Os"
"/Oy"
"/Zi"
"/c"
)
iree_select_compiler_opts(IREE_SIZE_OPTIMIZED_DEFAULT_LINKOPTS
MSVC_OR_CLANG_CL
"-DEBUG:FULL"
"-LTCG"
"-opt:ref,icf"
)
# TODO(#898): make this only impact the runtime (IREE_RUNTIME_DEFAULT_...).
# These flags come from iree/base/config.h:
set(IREE_DEFAULT_COPTS
"${IREE_DEFAULT_COPTS}"
"${IREE_SIZE_OPTIMIZED_DEFAULT_COPTS}"
"-DIREE_STATUS_MODE=0"
"-DIREE_STATISTICS_ENABLE=0"
"-DIREE_HAL_MODULE_STRING_UTIL_ENABLE=0"
"-DIREE_HAL_COMMAND_BUFFER_VALIDATION_ENABLE=0"
"-DIREE_VM_BACKTRACE_ENABLE=0"
"-DIREE_VM_BYTECODE_VERIFICATION_ENABLE=0"
"-DIREE_VM_EXT_F32_ENABLE=0"
"-DIREE_VM_EXT_F64_ENABLE=0"
iree_select_compiler_opts(IREE_LTO_COPTS
CLANG
"-flto=${IREE_LTO_MODE}"
GCC
"-flto"
"-fuse-linker-plugin"
MSVC_OR_CLANG_CL
"/GL"
)

iree_select_compiler_opts(IREE_LTO_LINKOPTS
CLANG
"-flto=${IREE_LTO_MODE}"
GCC
"-flto"
MSVC_OR_CLANG_CL
"-LTCG"
)

iree_select_compiler_opts(IREE_SIZE_OPTIMIZED_DEFAULT_COPTS
MSVC_OR_CLANG_CL
"/GS-"
"/Gw"
"/Gy"
"/DNDEBUG"
"/Os"
"/Oy"
"/Zi"
"/c"
)
iree_select_compiler_opts(IREE_SIZE_OPTIMIZED_DEFAULT_LINKOPTS
MSVC_OR_CLANG_CL
"-DEBUG:FULL"
"-opt:ref,icf"
)

# Function which enables various optimization options for a sub-tree by
# modifying the IREE_DEFAULT_COPTS and IREE_DEFAULT_LINKOPTS that targets
# created after this point use.
#
# Available profiles:
# "lto": Applies options to enable link time code generation.
# "size": Applies a variety of options to minimize the size of the runtime,
# generally at the expense of features but not performance. This implies
# LTO.
#
# Parameters:
# PROFILE_NAME: Name of a supported profile or falsey for none.
# SIZE_INTERFACE_COPTS: Additional IREE_INTERFACE_COPTS to add for the
# "size" profile.
function(iree_enable_optimization_options)
cmake_parse_arguments(
_RULE
""
"PROFILE_NAME"
"SIZE_INTERFACE_COPTS"
${ARGN}
)
set(IREE_DEFAULT_LINKOPTS
"${IREE_DEFAULT_LINKOPTS}"
"${IREE_SIZE_OPTIMIZED_DEFAULT_LINKOPTS}"

if(NOT _RULE_PROFILE_NAME)
# Do nothing.
return()
endif()

set(_ADDL_COPTS)
set(_ADDL_INTERFACE_COPTS)
set(_ADDL_LINKOPTS)

if(_RULE_PROFILE_NAME STREQUAL "lto")
set(_ADDL_COPTS ${IREE_LTO_COPTS})
set(_ADDL_LINKOPTS ${IREE_LTO_LINKOPTS})
elseif(_RULE_PROFILE_NAME STREQUAL "size")
# Size optimized assumes LTO.
# Size optimized often also elides logging and various status reporting,
# which can result in unused-but-set-variable style warnings. Disable those.
iree_select_compiler_opts(_ADDL_COPTS
ALL
${IREE_LTO_COPTS}
${IREE_SIZE_OPTIMIZED_DEFAULT_COPTS}
CLANG_OR_GCC
-Wno-unused-but-set-variable
)
set(_ADDL_INTERFACE_COPTS "${_RULE_SIZE_INTERFACE_COPTS}")
set(_ADDL_LINKOPTS
${IREE_LTO_LINKOPTS}
${IREE_SIZE_OPTIMIZED_DEFAULT_LINKOPTS}
)
else()
message(FATAL_ERROR "Unrecognized size optimization profile name '${_RULE_PROFILE_NAME}'. Expected one of 'lto', 'size'")
endif()

message(STATUS "Enabled optimization profile '${_RULE_PROFILE_NAME}' for targets under ${CMAKE_CURRENT_SOURCE_DIR}: \n"
" COPTS: ${_ADDL_COPTS}\n"
" INTERFACE COPTS: ${_ADDL_INTERFACE_COPTS}\n"
" LINKOPTS: ${_ADDL_LINKOPTS}"
)
endif()
set(IREE_DEFAULT_COPTS "${IREE_DEFAULT_COPTS};${_ADDL_COPTS}" PARENT_SCOPE)
set(IREE_INTERFACE_COPTS "${IREE_INTERFACE_COPTS};${_ADDL_INTERFACE_COPTS}" PARENT_SCOPE)
set(IREE_DEFAULT_LINKOPTS "${IREE_DEFAULT_LINKOPTS};${_ADDL_LINKOPTS}" PARENT_SCOPE)
endfunction()

#-------------------------------------------------------------------------------
# Compiler: Clang/LLVM
Expand Down
43 changes: 43 additions & 0 deletions build_tools/cmake/iree_setup_toolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,49 @@ endfunction()
# explicitly or through global properties. Please don't add to it without
# a very good reason.
macro(iree_setup_toolchain)
#-------------------------------------------------------------------------------
# Force LTO compatible tools.
#-------------------------------------------------------------------------------

# On older (i.e. gcc 9.x era) systems, the compiler and system toolchains
# were not compatible for general LTO use, and they were further not
# compatible amongst themselves.
# As an aid to CIs, we provide an option which will force toolchain specific
# binutils and linkers only if running on Linux. This lets us use the same
# runtime build scripts across platforms without further shenanigans.
# This is a hack and should be rolled back once 2020 era systems are not in
# use.
# Users should not use this. If they have such an old system, configure CMake
# to use toolchain specific tools.
option(IREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX "Forces use of toolchain specific LTO compatible binutils if on Linux" OFF)
mark_as_advanced(IREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX)
if(IREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX AND CMAKE_SYSTEM_NAME STREQUAL "Linux")
if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
message(STATUS "Running on an old Linux with -DIREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX: Forcing llvm-ar, llvm-nm, llvm-ranlib, and ld.lld")
find_program(IREE_CMAKE_LTO_AR llvm-ar REQUIRED)
find_program(IREE_CMAKE_LTO_RANLIB llvm-ranlib REQUIRED)
find_program(IREE_CMAKE_LTO_NM llvm-nm REQUIRED)
set(IREE_USE_LINKER "lld")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
message(STATUS "Running on an old Linux with -DIREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX: Forcing gcc-ar, gcc-nm, gcc-ranlib, and ld.gold")
find_program(IREE_CMAKE_LTO_AR gcc-ar REQUIRED)
find_program(IREE_CMAKE_LTO_RANLIB gcc-ranlib REQUIRED)
find_program(IREE_CMAKE_LTO_NM gcc-nm REQUIRED)
set(IREE_USE_LINKER "gold")
endif()

set(IREE_ENABLE_LLD OFF)
find_program(IREE_CMAKE_LTO_LD ld.${IREE_USE_LINKER} REQUIRED)
mark_as_advanced(IREE_CMAKE_LTO_AR IREE_CMAKE_LTO_RANLIB IREE_CMAKE_LTO_NM IREE_CMAKE_LTO_LD)

set(CMAKE_AR ${IREE_CMAKE_LTO_AR} CACHE FILEPATH "Forcing LTO ar instead of ar" FORCE)
set(CMAKE_AR ${IREE_CMAKE_LTO_AR})
set(CMAKE_NM ${IREE_CMAKE_LTO_NM} CACHE FILEPATH "Forcing LTO nm instead of nm" FORCE)
set(CMAKE_NM ${IREE_CMAKE_LTO_NM})
set(CMAKE_RANLIB ${IREE_CMAKE_LTO_RANLIB} CACHE FILEPATH "Forcing LTO ranlib instead of ranlib" FORCE)
set(CMAKE_RANLIB ${IREE_CMAKE_LTO_RANLIB})
endif()

#-----------------------------------------------------------------------------
# Supports dynamic library loading.
#-----------------------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -794,9 +794,11 @@
"test_clip_default_int8_min",
"test_clip_default_int8_min_expanded",
"test_constant_pad",
"test_constantofshape_float_ones",
"test_constantofshape_int_shape_zero",
"test_constantofshape_int_zeros",
"test_div_uint8",
"test_dropout_default_mask_ratio",
"test_elu_default",
"test_gather_0",
"test_gather_1",
Expand Down Expand Up @@ -834,13 +836,14 @@
"test_pow_types_float32_uint64",
"test_qlinearconv",
"test_qlinearmatmul_2D_int8_float16",
"test_qlinearmatmul_2D_int8_float32",
"test_qlinearmatmul_3D_int8_float16",
"test_qlinearmatmul_3D_int8_float32",
"test_qlinearmatmul_3D_uint8_float16",
"test_qlinearmatmul_2D_int8_float32",
"test_qlinearmatmul_3D_uint8_float32",
"test_quantizelinear",
"test_range_int32_type_negative_delta",
"test_reduce_min_empty_set",
"test_scatter_elements_with_negative_indices",
"test_selu_default",
"test_shape",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -795,15 +795,17 @@
"test_castlike_FLOAT_to_BFLOAT16_expanded",
"test_castlike_FLOAT_to_DOUBLE",
"test_castlike_FLOAT_to_DOUBLE_expanded",
"test_clip_default_int8_min",
"test_clip_default_int8_min_expanded",
"test_clip_default_int8_inbounds",
"test_clip_default_int8_max",
"test_clip_default_int8_max_expanded",
"test_clip_default_int8_min",
"test_clip_default_int8_min_expanded",
"test_constant_pad",
"test_constantofshape_float_ones",
"test_constantofshape_int_shape_zero",
"test_constantofshape_int_zeros",
"test_div_uint8",
"test_dropout_default_mask_ratio",
"test_elu_default",
"test_gather_0",
"test_gather_1",
Expand Down Expand Up @@ -840,12 +842,12 @@
"test_pow_types_float32_uint32",
"test_pow_types_float32_uint64",
"test_qlinearconv",
"test_qlinearmatmul_2D_int8_float16",
"test_qlinearmatmul_2D_int8_float32",
"test_qlinearmatmul_3D_int8_float16",
"test_qlinearmatmul_3D_int8_float32",
"test_qlinearmatmul_3D_uint8_float16",
"test_qlinearmatmul_3D_uint8_float32",
"test_qlinearmatmul_2D_int8_float16",
"test_qlinearmatmul_2D_int8_float32",
"test_quantizelinear",
"test_range_int32_type_negative_delta",
"test_scatter_elements_with_negative_indices",
Expand Down
16 changes: 16 additions & 0 deletions runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,22 @@
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

iree_enable_optimization_options(
PROFILE_NAME
"${IREE_RUNTIME_OPTIMIZATION_PROFILE}"
# TODO: These options should be separated between those required as
# INTERFACE and those that can be private (i.e. to the runtime).
SIZE_INTERFACE_COPTS
"-DIREE_STATUS_MODE=0"
"-DIREE_STATISTICS_ENABLE=0"
"-DIREE_HAL_MODULE_STRING_UTIL_ENABLE=0"
"-DIREE_HAL_COMMAND_BUFFER_VALIDATION_ENABLE=0"
"-DIREE_VM_BACKTRACE_ENABLE=0"
"-DIREE_VM_BYTECODE_VERIFICATION_ENABLE=0"
"-DIREE_VM_EXT_F32_ENABLE=0"
"-DIREE_VM_EXT_F64_ENABLE=0"
)

# Must include runtime plugins before processing the runtime sources so that
# the static link list can be set.
iree_include_cmake_plugin_dirs(
Expand Down
5 changes: 5 additions & 0 deletions runtime/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ def combine_dicts(*ds):
"*** Tracy tools not enabled (enable with IREE_RUNTIME_BUILD_TRACY_TOOLS=ON)",
file=sys.stderr,
)
# Default to LTO builds for our python releases.
IREE_RUNTIME_OPTIMIZATION_PROFILE = os.getenv(
"IREE_RUNTIME_OPTIMIZATION_PROFILE", "lto"
)


def check_pip_version():
Expand Down Expand Up @@ -264,6 +268,7 @@ def build_configuration(cmake_build_dir, cmake_install_dir, extra_cmake_args=())
cmake_args = [
"-GNinja",
"--log-level=VERBOSE",
f"-DIREE_RUNTIME_OPTIMIZATION_PROFILE={IREE_RUNTIME_OPTIMIZATION_PROFILE}",
"-DIREE_BUILD_PYTHON_BINDINGS=ON",
"-DIREE_BUILD_COMPILER=OFF",
"-DIREE_BUILD_SAMPLES=OFF",
Expand Down

0 comments on commit b61a918

Please sign in to comment.