Skip to content

Commit

Permalink
Enable LTO optimization by default for runtime releases.
Browse files Browse the repository at this point in the history
This is done by generalizing the primordial `IREE_SIZE_OPTIMIZED` flag into a `IREE_RUNTIME_OPTIMIZATION_PROFILE` that:

* Can enable 'lto' or 'size'.
* Is scoped to just the runtime targets.
* Minimally does the right thing for 'size' on Linux vs just on Windows (not the goal of this patch but drops ~300KB from binary sizes when enabled).

The compile time delta for a clean build of the runtime in full LTO vs regular mode was not measured precisely but is in the noise (i.e. <1m). As such, just enabling by default for Python release binaries.

Others can be enabled via: `-DIREE_RUNTIME_OPTIMIZATION_PROFILE=lto`, which is recommended for benchmarking, etc.

Progress on #898.
  • Loading branch information
stellaraccident committed Mar 17, 2024
1 parent 2395046 commit 5b96e90
Show file tree
Hide file tree
Showing 10 changed files with 177 additions and 38 deletions.
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,11 @@ option(IREE_COMPILER_BUILD_SHARED_LIBS "Enables BUILD_SHARED_LIBS CMake mode for
# CI coverage is established.
option(BUILD_SHARED_LIBS "Instructs CMake to build libraries as shared if possible" OFF)

# Control of LTO settings for the runtime build.
set(IREE_RUNTIME_OPTIMIZATION_PROFILE "" CACHE STRING
"Build optimization profile to apply. One of '', 'lto', 'size'.")
set(IREE_LTO_MODE "full" CACHE STRING "LTO type, 'thin' or 'full'. Only consulted on clang-like compilers.")

#-------------------------------------------------------------------------------
# IREE command-line tooling configuration
#-------------------------------------------------------------------------------
Expand Down
2 changes: 2 additions & 0 deletions build_tools/cmake/build_runtime.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ args=(
"-DPython3_EXECUTABLE=${IREE_PYTHON3_EXECUTABLE}"
"-DPYTHON_EXECUTABLE=${IREE_PYTHON3_EXECUTABLE}"
"-DCMAKE_BUILD_TYPE=RelWithDebInfo"
"-DIREE_RUNTIME_OPTIMIZATION_PROFILE=lto"
"-DIREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX=ON"
"-DIREE_BUILD_COMPILER=OFF"
)

Expand Down
2 changes: 2 additions & 0 deletions build_tools/cmake/build_runtime_small.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,7 @@ source build_tools/cmake/setup_build.sh
-DPYTHON_EXECUTABLE="${IREE_PYTHON3_EXECUTABLE}" \
-DCMAKE_BUILD_TYPE=MinSizeRel \
-DIREE_SIZE_OPTIMIZED=ON \
-DIREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX=size \
-DIREE_FORCE_GCC_BINUTILS_ON_LINUX=ON \
-DIREE_BUILD_COMPILER=OFF
"${CMAKE_BIN?}" --build "${BUILD_DIR}" -- -k 0
2 changes: 2 additions & 0 deletions build_tools/cmake/external_cc_library.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ function(external_cc_library)
PRIVATE
${_RULE_COPTS}
${IREE_DEFAULT_COPTS}
INTERFACE
${IREE_INTERFACE_COPTS}
)
target_link_options(${_NAME}
PRIVATE
Expand Down
1 change: 1 addition & 0 deletions build_tools/cmake/iree_cc_binary.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ function(iree_cc_binary)
target_compile_options(${_NAME}
PRIVATE
${IREE_DEFAULT_COPTS}
${IREE_INTERFACE_COPTS}
${_RULE_COPTS}
)
target_link_options(${_NAME}
Expand Down
2 changes: 2 additions & 0 deletions build_tools/cmake/iree_cc_library.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,8 @@ function(iree_cc_library)
PRIVATE
${IREE_DEFAULT_COPTS}
${_RULE_COPTS}
INTERFACE
${IREE_INTERFACE_COPTS}
)
target_link_options(${_NAME}
PRIVATE
Expand Down
137 changes: 99 additions & 38 deletions build_tools/cmake/iree_copts.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -415,48 +415,109 @@ if(EMSCRIPTEN AND IREE_EXTERNAL_WEBGPU_HAL_DRIVER_FOUND)
endif()

#-------------------------------------------------------------------------------
# Size-optimized build flags
# Flag sets used different optimization profiles.
#-------------------------------------------------------------------------------

# TODO(#898): add a dedicated size-constrained configuration.
if(IREE_SIZE_OPTIMIZED)
iree_select_compiler_opts(IREE_SIZE_OPTIMIZED_DEFAULT_COPTS
MSVC_OR_CLANG_CL
"/GS-"
"/GL"
"/Gw"
"/Gy"
"/DNDEBUG"
"/Os"
"/Oy"
"/Zi"
"/c"
)
iree_select_compiler_opts(IREE_SIZE_OPTIMIZED_DEFAULT_LINKOPTS
MSVC_OR_CLANG_CL
"-DEBUG:FULL"
"-LTCG"
"-opt:ref,icf"
)
# TODO(#898): make this only impact the runtime (IREE_RUNTIME_DEFAULT_...).
# These flags come from iree/base/config.h:
set(IREE_DEFAULT_COPTS
"${IREE_DEFAULT_COPTS}"
"${IREE_SIZE_OPTIMIZED_DEFAULT_COPTS}"
"-DIREE_STATUS_MODE=0"
"-DIREE_STATISTICS_ENABLE=0"
"-DIREE_HAL_MODULE_STRING_UTIL_ENABLE=0"
"-DIREE_HAL_COMMAND_BUFFER_VALIDATION_ENABLE=0"
"-DIREE_VM_BACKTRACE_ENABLE=0"
"-DIREE_VM_BYTECODE_VERIFICATION_ENABLE=0"
"-DIREE_VM_EXT_F32_ENABLE=0"
"-DIREE_VM_EXT_F64_ENABLE=0"
iree_select_compiler_opts(IREE_LTO_COPTS
CLANG
"-flto=${IREE_LTO_MODE}"
GCC
"-flto"
"-fuse-linker-plugin"
MSVC_OR_CLANG_CL
"/GL"
)

iree_select_compiler_opts(IREE_LTO_LINKOPTS
CLANG
"-flto=${IREE_LTO_MODE}"
GCC
"-flto"
MSVC_OR_CLANG_CL
"-LTCG"
)

iree_select_compiler_opts(IREE_SIZE_OPTIMIZED_DEFAULT_COPTS
MSVC_OR_CLANG_CL
"/GS-"
"/Gw"
"/Gy"
"/DNDEBUG"
"/Os"
"/Oy"
"/Zi"
"/c"
)
iree_select_compiler_opts(IREE_SIZE_OPTIMIZED_DEFAULT_LINKOPTS
MSVC_OR_CLANG_CL
"-DEBUG:FULL"
"-opt:ref,icf"
)

# Function which enables various optimization options for a sub-tree by
# modifying the IREE_DEFAULT_COPTS and IREE_DEFAULT_LINKOPTS that targets
# created after this point use.
#
# Available profiles:
# "lto": Applies options to enable link time code generation.
# "size": Applies a variety of options to minimize the size of the runtime,
# generally at the expense of features but not performance. This implies
# LTO.
#
# Parameters:
# PROFILE_NAME: Name of a supported profile or falsey for none.
# SIZE_INTERFACE_COPTS: Additional IREE_INTERFACE_COPTS to add for the
# "size" profile.
function(iree_enable_optimization_options)
cmake_parse_arguments(
_RULE
""
"PROFILE_NAME"
"SIZE_INTERFACE_COPTS"
${ARGN}
)
set(IREE_DEFAULT_LINKOPTS
"${IREE_DEFAULT_LINKOPTS}"
"${IREE_SIZE_OPTIMIZED_DEFAULT_LINKOPTS}"

if(NOT _RULE_PROFILE_NAME)
# Do nothing.
return()
endif()

set(_ADDL_COPTS)
set(_ADDL_INTERFACE_COPTS)
set(_ADDL_LINKOPTS)

if(_RULE_PROFILE_NAME STREQUAL "lto")
set(_ADDL_COPTS ${IREE_LTO_COPTS})
set(_ADDL_LINKOPTS ${IREE_LTO_LINKOPTS})
elseif(_RULE_PROFILE_NAME STREQUAL "size")
# Size optimized assumes LTO.
# Size optimized often also elides logging and various status reporting,
# which can result in unused-but-set-variable style warnings. Disable those.
iree_select_compiler_opts(_ADDL_COPTS
ALL
${IREE_LTO_COPTS}
${IREE_SIZE_OPTIMIZED_DEFAULT_COPTS}
CLANG_OR_GCC
-Wno-unused-but-set-variable
)
set(_ADDL_INTERFACE_COPTS "${_RULE_SIZE_INTERFACE_COPTS}")
set(_ADDL_LINKOPTS
${IREE_LTO_LINKOPTS}
${IREE_SIZE_OPTIMIZED_DEFAULT_LINKOPTS}
)
else()
message(FATAL_ERROR "Unrecognized size optimization profile name '${_RULE_PROFILE_NAME}'. Expected one of 'lto', 'size'")
endif()

message(STATUS "Enabled optimization profile '${_RULE_PROFILE_NAME}' for targets under ${CMAKE_CURRENT_SOURCE_DIR}: \n"
" COPTS: ${_ADDL_COPTS}\n"
" INTERFACE COPTS: ${_ADDL_INTERFACE_COPTS}\n"
" LINKOPTS: ${_ADDL_LINKOPTS}"
)
endif()
set(IREE_DEFAULT_COPTS "${IREE_DEFAULT_COPTS};${_ADDL_COPTS}" PARENT_SCOPE)
set(IREE_INTERFACE_COPTS "${IREE_INTERFACE_COPTS};${_ADDL_INTERFACE_COPTS}" PARENT_SCOPE)
set(IREE_DEFAULT_LINKOPTS "${IREE_DEFAULT_LINKOPTS};${_ADDL_LINKOPTS}" PARENT_SCOPE)
endfunction()

#-------------------------------------------------------------------------------
# Compiler: Clang/LLVM
Expand Down
43 changes: 43 additions & 0 deletions build_tools/cmake/iree_setup_toolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,49 @@ endfunction()
# explicitly or through global properties. Please don't add to it without
# a very good reason.
macro(iree_setup_toolchain)
#-------------------------------------------------------------------------------
# Force LTO compatible tools.
#-------------------------------------------------------------------------------

# On older (i.e. gcc 9.x era) systems, the compiler and system toolchains
# were not compatible for general LTO use, and they were further not
# compatible amongst themselves.
# As an aid to CIs, we provide an option which will force toolchain specific
# binutils and linkers only if running on Linux. This lets us use the same
# runtime build scripts across platforms without further shenanigans.
# This is a hack and should be rolled back once 2020 era systems are not in
# use.
# Users should not use this. If they have such an old system, configure CMake
# to use toolchain specific tools.
option(IREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX "Forces use of toolchain specific LTO compatible binutils if on Linux" OFF)
mark_as_advanced(IREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX)
if(IREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX AND CMAKE_SYSTEM_NAME STREQUAL "Linux")
if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
message(STATUS "Running on an old Linux with -DIREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX: Forcing llvm-ar, llvm-nm, llvm-ranlib, and ld.lld")
find_program(IREE_CMAKE_LTO_AR llvm-ar REQUIRED)
find_program(IREE_CMAKE_LTO_RANLIB llvm-ranlib REQUIRED)
find_program(IREE_CMAKE_LTO_NM llvm-nm REQUIRED)
set(IREE_USE_LINKER "lld")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
message(STATUS "Running on an old Linux with -DIREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX: Forcing gcc-ar, gcc-nm, gcc-ranlib, and ld.gold")
find_program(IREE_CMAKE_LTO_AR gcc-ar REQUIRED)
find_program(IREE_CMAKE_LTO_RANLIB gcc-ranlib REQUIRED)
find_program(IREE_CMAKE_LTO_NM gcc-nm REQUIRED)
set(IREE_USE_LINKER "gold")
endif()

set(IREE_ENABLE_LLD OFF)
find_program(IREE_CMAKE_LTO_LD ld.${IREE_USE_LINKER} REQUIRED)
mark_as_advanced(IREE_CMAKE_LTO_AR IREE_CMAKE_LTO_RANLIB IREE_CMAKE_LTO_NM IREE_CMAKE_LTO_LD)

set(CMAKE_AR ${IREE_CMAKE_LTO_AR} CACHE FILEPATH "Forcing LTO ar instead of ar" FORCE)
set(CMAKE_AR ${IREE_CMAKE_LTO_AR})
set(CMAKE_NM ${IREE_CMAKE_LTO_NM} CACHE FILEPATH "Forcing LTO nm instead of nm" FORCE)
set(CMAKE_NM ${IREE_CMAKE_LTO_NM})
set(CMAKE_RANLIB ${IREE_CMAKE_LTO_RANLIB} CACHE FILEPATH "Forcing LTO ranlib instead of ranlib" FORCE)
set(CMAKE_RANLIB ${IREE_CMAKE_LTO_RANLIB})
endif()

#-----------------------------------------------------------------------------
# Supports dynamic library loading.
#-----------------------------------------------------------------------------
Expand Down
16 changes: 16 additions & 0 deletions runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,22 @@
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

iree_enable_optimization_options(
PROFILE_NAME
"${IREE_RUNTIME_OPTIMIZATION_PROFILE}"
# TODO: These options should be separated between those required as
# INTERFACE and those that can be private (i.e. to the runtime).
SIZE_INTERFACE_COPTS
"-DIREE_STATUS_MODE=0"
"-DIREE_STATISTICS_ENABLE=0"
"-DIREE_HAL_MODULE_STRING_UTIL_ENABLE=0"
"-DIREE_HAL_COMMAND_BUFFER_VALIDATION_ENABLE=0"
"-DIREE_VM_BACKTRACE_ENABLE=0"
"-DIREE_VM_BYTECODE_VERIFICATION_ENABLE=0"
"-DIREE_VM_EXT_F32_ENABLE=0"
"-DIREE_VM_EXT_F64_ENABLE=0"
)

# Must include runtime plugins before processing the runtime sources so that
# the static link list can be set.
iree_include_cmake_plugin_dirs(
Expand Down
5 changes: 5 additions & 0 deletions runtime/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ def combine_dicts(*ds):
"*** Tracy tools not enabled (enable with IREE_RUNTIME_BUILD_TRACY_TOOLS=ON)",
file=sys.stderr,
)
# Default to LTO builds for our python releases.
IREE_RUNTIME_OPTIMIZATION_PROFILE = os.getenv(
"IREE_RUNTIME_OPTIMIZATION_PROFILE", "lto"
)


def check_pip_version():
Expand Down Expand Up @@ -264,6 +268,7 @@ def build_configuration(cmake_build_dir, cmake_install_dir, extra_cmake_args=())
cmake_args = [
"-GNinja",
"--log-level=VERBOSE",
f"-DIREE_RUNTIME_OPTIMIZATION_PROFILE={IREE_RUNTIME_OPTIMIZATION_PROFILE}",
"-DIREE_BUILD_PYTHON_BINDINGS=ON",
"-DIREE_BUILD_COMPILER=OFF",
"-DIREE_BUILD_SAMPLES=OFF",
Expand Down

0 comments on commit 5b96e90

Please sign in to comment.