Skip to content

Commit

Permalink
Warn when --iree-llvmcpu-target-cpu defaults to "generic". (#18682)
Browse files Browse the repository at this point in the history
Progress on #18561. This
introduces a warning (which we intend to promote to an error in the
future) when targeting a generic CPU without explicitly asking for it.
This addresses a performance footgun as that IREE default results in low
performance.

Along the way this grew into a substantial change to e2e testing rules:
- `TARGET_CPU` and `TARGET_CPU_FEATURES` arguments are gone (were
redundant with `COMPILER_FLAGS`).
- For `TARGET_CPU_FEATURES_VARIANTS`, the special value `"default"` is
renamed to `"generic"` and a new value `"host"` is also supported.

Example warning (this is customized to the target architecture, here
x86):

```
/home/benoit/matmul_i8.mlir:0:0: warning: while creating CPU target: 
Defaulting to targeting a generic CPU for the target architecture will result in poor performance. Please specify a target CPU and/or a target CPU feature set. If it is intended to target a generic CPU, specify "generic" as the CPU.

This can be done in two ways:
1. With command-line flags:
    --iree-llvmcpu-target-cpu=...
    --iree-llvmcpu-target-cpu-features=...
2. Within the IR:
    #hal.executable.target< ... , cpu="...", cpu_features="...">

In the rest of this message, these fields are referred to as just `cpu` and `cpu_features`.

Examples:

    cpu=generic
        Target a generic CPU of the target architecture. The generated code will have poor performance, but will run on any CPU.

    cpu=host
        Target the host CPU. The generated code will have optimal performance on the host CPU but will crash on other CPUs not supporting the same CPU features.

    cpu="name"
        Target a specific CPU. This is mostly used on x86. The accepted values are the same as in Clang command lines.
        List of accepted x86 CPUs: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, znver5, x86-64, x86-64-v2, x86-64-v3, x86-64-v4

    cpu_features="+feature1,..."
        Target a CPU supporting the comma-separated of (+-prefixed) features. The accepted values are the same as in Clang command lines.
```

---------

Signed-off-by: Benoit Jacob <[email protected]>
  • Loading branch information
bjacob authored Oct 17, 2024
1 parent fecccdc commit 0c6a151
Show file tree
Hide file tree
Showing 34 changed files with 437 additions and 284 deletions.
34 changes: 7 additions & 27 deletions build_tools/bazel/iree_check_test.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ def iree_check_test(
input_type = None,
runner_args = [],
tags = [],
target_cpu_features = None,
timeout = None,
**kwargs):
"""Creates an iree-check-module test for the specified source file.
Expand All @@ -43,14 +42,10 @@ def iree_check_test(
driver and input file are passed automatically.
tags: additional tags to apply to the generated test. Tag
"driver=DRIVER" and "target=TARGET" are added automatically.
target_cpu_features: currently unimplemented (must be empty), will
eventually allow specifying target CPU features.
timeout: timeout for the generated tests.
**kwargs: any additional attributes to pass to the underlying native_test.
"""

if target_cpu_features:
fail("target_cpu_features must currently be empty")
input_type_flags = []
if input_type:
input_type_flags = ["--iree-input-type=%s" % input_type]
Expand Down Expand Up @@ -91,7 +86,6 @@ def iree_check_single_backend_test_suite(
input_type = None,
runner_args = [],
tags = [],
target_cpu_features = None,
timeout = None,
**kwargs):
"""Creates a test suite of iree-check-module tests for a single backend/driver pair.
Expand All @@ -112,8 +106,6 @@ def iree_check_single_backend_test_suite(
iree-check-module tests. The driver and input file are passed
automatically. To use different runner_args per test, create a
separate suite or iree_check_test.
target_cpu_features: currently unimplemented (must be empty), will
eventually allow specifying target CPU features.
tags: tags to apply to the generated tests. Note that as in standard test
suites, manual is treated specially and will also apply to the test
suite itself.
Expand All @@ -130,13 +122,6 @@ def iree_check_single_backend_test_suite(
if target_backend == "rocm" or driver == "hip":
return

# We haven't implemented this so far because we have been using target_cpu_features so far only
# for aarch64 targets, for which we use the CMake build. To future people implementing this:
# target_cpu_features should be a list, and here it should be joined into a comma-separated
# string to be passed to --iree-llvmcpu-target-cpu-features
if target_cpu_features:
fail("target_cpu_features must currently be empty")

tests = []
for src in srcs:
test_name = "_".join([name, src])
Expand Down Expand Up @@ -196,21 +181,16 @@ def iree_check_test_suite(
tags: tags to apply to the generated tests. Note that as in standard test
suites, manual is treated specially and will also apply to the test
suite itself.
target_cpu_features_variants: list of target cpu features variants.
Currently unimplemented in Bazel due to difficulty of specializing
to target architecture in Bazel. The following describes the
semantics that this should have if implemented. Each
entry is either "default" for the architecture defaults, or a colon-
separated triple "arch:name:cpu_features" where "arch" filters
for a target CPU architecture (in IREE_ARCH format), "name" is a
short name for the CPU features set (used to generate target names)
and cpu_features is a comma-separated list of LLVM target attributes
to enable. Example:
x86_64:avx2_fma:+avx,+avx2,+fma
target_cpu_features_variants: ignored, assumed to be ["generic"] in this
Bazel implementation. See the CMake implementation for what this does
in general.
**kwargs: any additional attributes to pass to the underlying tests and
test suite.
"""

# Like CMake, default to "generic". Unlike CMake, do not honor other values.
generic_flags = compiler_flags + ["--iree-llvmcpu-target-cpu=generic"]

# We could have complicated argument override logic for runner_args and such, or... the client
# could just create a test suite. The latter seems simpler and more readable.
tests = []
Expand All @@ -221,7 +201,7 @@ def iree_check_test_suite(
srcs = srcs,
driver = driver,
target_backend = backend,
compiler_flags = compiler_flags,
compiler_flags = generic_flags,
input_type = input_type,
runner_args = runner_args,
tags = tags,
Expand Down
31 changes: 8 additions & 23 deletions build_tools/bazel/iree_e2e_generated_runner_test.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ def iree_e2e_runner_test(
compiler_flags = [],
runner_args = [],
tags = [],
target_cpu_features = None,
timeout = None,
**kwargs):
"""Creates a test using a specified test runner program.
Expand All @@ -44,23 +43,17 @@ def iree_e2e_runner_test(
added automatically.
test_runner: test runner program to run.
timeout: timeout for the generated tests.
target_cpu_features: target CPU features. Only for llvm-cpu backend.
**kwargs: any additional attributes to pass to the underlying tests and
test suite.
"""

if target_cpu_features:
fail("target_cpu_features must currently be empty")

iree_bytecode_module(
name = name + "_%s_module" % test_type,
module_name = tests_vmfb,
src = tests_src,
flags = [
"--iree-hal-target-backends=%s" % target_backend,
] + ([
"--iree-llvmcpu-target-cpu-features=%s" % target_cpu_features,
] if target_cpu_features else []) + compiler_flags,
] + compiler_flags,
visibility = ["//visibility:private"],
testonly = True,
**kwargs
Expand Down Expand Up @@ -106,7 +99,6 @@ def iree_single_backend_e2e_runner_test(
compiler_flags = [],
runner_args = [],
tags = [],
target_cpu_features = None,
timeout = None,
**kwargs):
"""Generates an iree_e2e_runner_test using a custom python generator script.
Expand All @@ -133,7 +125,6 @@ def iree_single_backend_e2e_runner_test(
added automatically.
test_runner: test runner program to run.
timeout: timeout for the generated tests.
target_cpu_features: target CPU features. Only for llvm-cpu backend.
**kwargs: any additional attributes to pass to the underlying tests and
test suite.
"""
Expand Down Expand Up @@ -171,7 +162,6 @@ def iree_single_backend_e2e_runner_test(
runner_args = runner_args,
tags = tags,
timeout = timeout,
target_cpu_features = target_cpu_features,
**kwargs
)

Expand Down Expand Up @@ -209,20 +199,15 @@ def iree_generated_e2e_runner_test(
added automatically.
test_runner: test runner program to run.
timeout: timeout for the generated tests.
target_cpu_features_variants: list of target cpu features variants.
Currently unimplemented in Bazel due to difficulty of specializing
to target architecture in Bazel. The following describes the
semantics that this should have if implemented. Each
entry is either "default" for the architecture defaults, or a colon-
separated triple "arch:name:cpu_features" where "arch" filters
for a target CPU architecture (in IREE_ARCH format), "name" is a
short name for the CPU features set (used to generate target names)
and cpu_features is a comma-separated list of LLVM target attributes
to enable. Example:
x86_64:avx2_fma:+avx,+avx2,+fma
target_cpu_features_variants: ignored, assumed to be ["generic"] in this
Bazel implementation. See the CMake implementation for what this does
in general.
**kwargs: any additional attributes to pass to the underlying tests and test suite.
"""

# Like CMake, default to "generic". Unlike CMake, do not honor other values.
generic_flags = compiler_flags + ["--iree-llvmcpu-target-cpu=generic"]

tests = []
for backend, driver in target_backends_and_drivers:
# CUDA/ROCm backend/driver not supported by Bazel build.
Expand All @@ -237,7 +222,7 @@ def iree_generated_e2e_runner_test(
driver = driver,
target_backend = backend,
generator_args = generator_args,
compiler_flags = compiler_flags,
compiler_flags = generic_flags,
runner_args = runner_args,
tags = tags,
timeout = timeout,
Expand Down
5 changes: 0 additions & 5 deletions build_tools/bazel_to_cmake/bazel_to_cmake_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -769,7 +769,6 @@ def iree_check_single_backend_test_suite(
target_backends_and_drivers=None,
runner_args=None,
tags=None,
target_cpu_features=None,
timeout=None,
**kwargs,
):
Expand All @@ -787,9 +786,6 @@ def iree_check_single_backend_test_suite(
input_type_block = self._convert_string_arg_block("INPUT_TYPE", input_type)
runner_args_block = self._convert_string_list_block("RUNNER_ARGS", runner_args)
labels_block = self._convert_string_list_block("LABELS", tags)
target_cpu_features_block = self._convert_string_arg_block(
"TARGET_CPU_FEATURES", target_cpu_features
)
timeout_block = self._convert_timeout_arg_block("TIMEOUT", timeout)

self._converter.body += (
Expand All @@ -802,7 +798,6 @@ def iree_check_single_backend_test_suite(
f"{input_type_block}"
f"{runner_args_block}"
f"{labels_block}"
f"{target_cpu_features_block}"
f"{timeout_block}"
f")\n\n"
)
Expand Down
48 changes: 20 additions & 28 deletions build_tools/cmake/iree_check_test.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@ endfunction()
# "driver=${DRIVER}" are added automatically.
# MODULE_FILE_NAME: Optional, specifies the absolute path to the filename
# to use for the generated IREE module (.vmfb).
# TARGET_CPU_FEATURES: If specified, a string passed as argument to
# --iree-llvmcpu-target-cpu-features.
# DEPENDS: Optional. Additional dependencies beyond SRC and the tools.
# INPUT_TYPE: The value for the --iree-input-type= flag. Also disables tests
# if no compiled support for that configuration.
Expand All @@ -53,7 +51,7 @@ function(iree_check_test)
_RULE
""
"NAME;SRC;TARGET_BACKEND;DRIVER;MODULE_FILE_NAME;INPUT_TYPE"
"COMPILER_FLAGS;RUNNER_ARGS;LABELS;TARGET_CPU_FEATURES;DEPENDS;TIMEOUT"
"COMPILER_FLAGS;RUNNER_ARGS;LABELS;DEPENDS;TIMEOUT"
${ARGN}
)

Expand Down Expand Up @@ -162,9 +160,6 @@ function(iree_check_test)
if(_RULE_INPUT_TYPE)
list(APPEND _BASE_COMPILER_FLAGS "--iree-input-type=${_RULE_INPUT_TYPE}")
endif()
if(_RULE_TARGET_CPU_FEATURES)
list(APPEND _BASE_COMPILER_FLAGS "--iree-llvmcpu-target-cpu-features=${_RULE_TARGET_CPU_FEATURES}")
endif()
if(_NORMALIZED_TARGET_BACKEND STREQUAL "ROCM")
list(APPEND _BASE_COMPILER_FLAGS "--iree-hip-target=${IREE_HIP_TEST_TARGET_CHIP}")
endif()
Expand Down Expand Up @@ -242,8 +237,6 @@ endfunction()
# different args per test, create a separate suite or iree_check_test.
# LABELS: Additional labels to apply to the generated tests. The package path
# is added automatically.
# TARGET_CPU_FEATURES: If specified, a string passed as argument to
# --iree-llvmcpu-target-cpu-features.
# DEPENDS: Optional. Additional dependencies beyond SRC and the tools.
# INPUT_TYPE: The value for the --iree-input-type= flag. Also disables tests
# if no compiled support for that configuration.
Expand All @@ -256,7 +249,7 @@ function(iree_check_single_backend_test_suite)
_RULE
""
"NAME;TARGET_BACKEND;DRIVER;INPUT_TYPE"
"SRCS;COMPILER_FLAGS;RUNNER_ARGS;LABELS;TARGET_CPU_FEATURES;DEPENDS;TIMEOUT"
"SRCS;COMPILER_FLAGS;RUNNER_ARGS;LABELS;DEPENDS;TIMEOUT"
${ARGN}
)

Expand Down Expand Up @@ -284,7 +277,6 @@ function(iree_check_single_backend_test_suite)
INPUT_TYPE ${_RULE_INPUT_TYPE}
RUNNER_ARGS ${_RULE_RUNNER_ARGS}
LABELS ${_RULE_LABELS}
TARGET_CPU_FEATURES ${_RULE_TARGET_CPU_FEATURES}
DEPENDS ${_RULE_DEPENDS}
TIMEOUT ${_RULE_TIMEOUT}
)
Expand All @@ -309,7 +301,6 @@ function(iree_check_single_backend_test_suite)
INPUT_TYPE ${_RULE_INPUT_TYPE}
RUNNER_ARGS ${_RULE_RUNNER_ARGS}
LABELS ${_RULE_LABELS}
TARGET_CPU_FEATURES ${_RULE_TARGET_CPU_FEATURES}
DEPENDS ${_RULE_DEPENDS}
TIMEOUT ${_RULE_TIMEOUT}
)
Expand All @@ -328,7 +319,6 @@ function(iree_check_single_backend_test_suite)
INPUT_TYPE ${_RULE_INPUT_TYPE}
RUNNER_ARGS ${_RULE_RUNNER_ARGS}
LABELS ${_RULE_LABELS}
TARGET_CPU_FEATURES ${_RULE_TARGET_CPU_FEATURES}
DEPENDS ${_RULE_DEPENDS}
TIMEOUT ${_RULE_TIMEOUT}
)
Expand All @@ -342,9 +332,7 @@ endfunction()
# This function has 3 output-params: variables that it sets with PARENT_SCOPE:
# _ENABLED, _FEATURES_NAME, _FEATURES.
#
# "default" is handled specially. _ENABLED is always set to "TRUE" and
# _FEATURES_NAME and _FEATURES are set to
# the empty string.
# "generic" and "host" are handled specially, as CPU names.
#
# Other values are parsed as "arch:features_name:features". The `arch`
# component is matched with `IREE_ARCH`, `_ENABLED` is set to "TRUE" if and
Expand All @@ -354,23 +342,28 @@ endfunction()
#
# Examples:
#
# default:
# _ENABLED="TRUE" unconditionally,
# other output strings are "".
# generic:
# _ENABLED="TRUE" unconditionally, _FEATURES_NAME="generic", _FEATURES="".
#
# host:
# _ENABLED="TRUE" unconditionally, _FEATURES_NAME="host", _FEATURES="".
#
# aarch64:dotprod:+dotprod:
# _ENABLED="TRUE" if the target architecture is aarch64, and in that case:
# _FEATURES_NAME="dotprod".
# _FEATURES="+dotprod".
function(parse_target_cpu_features_variant _VARIANT_STRING _ENABLED_VAR
_FEATURES_NAME_VAR _FEATURES_VAR)
_FEATURES_NAME_VAR _COMPILER_FLAGS_VAR)
set("${_ENABLED_VAR}" FALSE PARENT_SCOPE)
set("${_FEATURES_NAME_VAR}" "" PARENT_SCOPE)
set("${_FEATURES_VAR}" "" PARENT_SCOPE)
if("${_VARIANT_STRING}" STREQUAL "default")
set("${_COMPILER_FLAGS_VAR}" "" PARENT_SCOPE)
if("${_VARIANT_STRING}" STREQUAL "generic" OR "${_VARIANT_STRING}" STREQUAL "host")
set("${_ENABLED_VAR}" TRUE PARENT_SCOPE)
set("${_FEATURES_NAME_VAR}" "${_VARIANT_STRING}" PARENT_SCOPE)
set("${_COMPILER_FLAGS_VAR}" "--iree-llvmcpu-target-cpu=${_VARIANT_STRING}" PARENT_SCOPE)
return()
endif()

# Interpret _VARIANT_STRING as a CMake list (;-separated).
string(REPLACE ":" ";" _COMPONENTS "${_VARIANT_STRING}")
list(LENGTH _COMPONENTS _NUM_COMPONENTS)
Expand All @@ -385,7 +378,7 @@ function(parse_target_cpu_features_variant _VARIANT_STRING _ENABLED_VAR
if(_FILTER_ARCH STREQUAL IREE_ARCH)
set("${_ENABLED_VAR}" TRUE PARENT_SCOPE)
set("${_FEATURES_NAME_VAR}" "${_FEATURES_NAME}" PARENT_SCOPE)
set("${_FEATURES_VAR}" "${_FEATURES}" PARENT_SCOPE)
set("${_COMPILER_FLAGS_VAR}" "--iree-llvmcpu-target-cpu-features=${_FEATURES}" PARENT_SCOPE)
endif()
endfunction()

Expand Down Expand Up @@ -413,8 +406,8 @@ endfunction()
# LABELS: Additional labels to apply to the generated tests. The package path is
# added automatically.
# TARGET_CPU_FEATURES_VARIANTS: list of target cpu features variants. Each
# entry is either "default" for the architecture defaults, or a colon-
# separated triple "arch:name:cpu_features" where "arch" filters
# entry is either "generic" for the architecture defaults, or "host" for
# the host CPU, or a colon-separated triple "arch:name:cpu_features" where "arch" filters
# for a target CPU architecture (in IREE_ARCH format), "name" is a
# short name for the CPU features set (used to generate target names)
# and cpu_features is a comma-separated list of LLVM target attributes
Expand Down Expand Up @@ -443,7 +436,7 @@ function(iree_check_test_suite)
if(_RULE_TARGET_CPU_FEATURES_VARIANTS)
set(_TARGET_CPU_FEATURES_VARIANTS "${_RULE_TARGET_CPU_FEATURES_VARIANTS}")
else()
set(_TARGET_CPU_FEATURES_VARIANTS "default")
set(_TARGET_CPU_FEATURES_VARIANTS "generic")
endif()

if(NOT DEFINED _RULE_TARGET_BACKENDS AND NOT DEFINED _RULE_DRIVERS)
Expand All @@ -466,7 +459,7 @@ function(iree_check_test_suite)
list(GET _RULE_DRIVERS ${_INDEX} _DRIVER)
foreach(_VARIANT_STRING IN LISTS _TARGET_CPU_FEATURES_VARIANTS)
parse_target_cpu_features_variant("${_VARIANT_STRING}"
_ENABLED _TARGET_CPU_FEATURES_NAME _TARGET_CPU_FEATURES)
_ENABLED _TARGET_CPU_FEATURES_NAME _VARIANT_COMPILER_FLAGS)
if(NOT _ENABLED)
# The current entry is disabled on the target CPU architecture.
continue()
Expand All @@ -488,12 +481,11 @@ function(iree_check_test_suite)
${_DRIVER}
COMPILER_FLAGS
${_RULE_COMPILER_FLAGS}
${_VARIANT_COMPILER_FLAGS}
RUNNER_ARGS
${_RULE_RUNNER_ARGS}
LABELS
${_LABELS}
TARGET_CPU_FEATURES
${_TARGET_CPU_FEATURES}
TIMEOUT
${_RULE_TIMEOUT}
INPUT_TYPE
Expand Down
Loading

0 comments on commit 0c6a151

Please sign in to comment.