From cbe7fd0c35243f4aa72ff76b15eac96c6884d5bd Mon Sep 17 00:00:00 2001 From: Phil Ramsey Date: Wed, 8 Nov 2023 17:47:39 +0000 Subject: [PATCH] i#5365: Build core unit tests with SVE enabled Build most core tests with SVE flags and high optimisation (-O3), if building on a AARCH64 SVE machine. Tests which fail when built with -O3 are not included. Add some error checking to a few tests to allow the -O3 build and update template (expected output) files as necessary. Issue: #5365 --- suite/tests/CMakeLists.txt | 94 +++++++++++++++++++++++++------------- 1 file changed, 61 insertions(+), 33 deletions(-) diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index ada39aa6273..c4da5a13064 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -279,6 +279,9 @@ if (UNIX) string(REGEX REPLACE "-Wall" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") # some tests rely on specific "nop;nop" patterns that optimization ruins # we should probably move the -O from top level into core/CMakeLists.txt + # + # -O3 is selectively re-added to some tests using the add_sve_flags() or + # optimize() functions string(REGEX REPLACE "-O[0-9]? " " " CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") else (UNIX) # W2 is default (we're using W3). We should also replace @@ -474,35 +477,53 @@ function(append_link_flags target newflags) LINK_FLAGS "${cur_ldflags} ${newflags}") endfunction(append_link_flags) -set( sve_tests - simple_app api.ir api.ir_negative api.ir_v81 api.ir_v82 api.ir_v83 api.ir_v84 api.ir_v86 - api.ir_sve api.ir_sve2 api.ir-static api.drdecode common.broadfun common.fib common.nzcv - common.getretaddr common.segfault common.allasm_aarch64_isa common.allasm_aarch64_cache - allasm_aarch64_prefetch allasm_aarch64_flush libutil.frontend_test libutil.drconfig_test - client.call-retarget client.modules client.annotation-concurrency client.partial_module_map - client.execfault client.events client.events_cpp client.timer client.mangle_suspend - client.syscall-mod client.signal client.cleancallsig client.file_io client.cleancall-opt-1 - client.inline client.null_instrument client.large_options client.stolen-reg-index - client.gonative client.drmgr-test client.drx_buf-test client.drbbdup-drwrap-test - client.drbbdup-emul-test client.process-id client.drreg-test client.low_on_memory client.tls - client.drx-scattergather client.drwrap-test client.drwrap-drreg-test drstatecmp-fuzz-app - client.drutil-test client.stolen-reg client.ldstex api.dis-a64 api.reenc-a64 api.opnd - api.detach api.detach_state api.detach_signal api.detach_spawn api.detach_spawn_stress_FLAKY - api.detach_spawn_quick_exit api.ibl-stress api.ibl-stress-aarch64-far-link_LONG - api.static_startstop api.static_noclient api.static_noinit api.static_detach - api.static_prepop api.static_reattach_client_flags api.static_crash api.static_sideline_FLAKY - api.static_symbols api.static_maps_mixup_yesvars api.static_maps_mixup_novars_FLAKY - api.thread_churn client.app_args client.destructor builtin_prefetch tool.multiproc - stride_benchmark tool.fib_plus tool.heap_test tool.drcacheoff.gencode linux.eintr - linux.execve-sub linux.execve-null linux.execve-config linux.execv linux.execve-rec - linux.exit linux.fork linux.fork-sleep linux.infinite linux.longjmp linux.prctl linux.mmap - linux.zero-length-mem-ranges linux.sigaction linux.syscall_pwait linux.sigaction_nosignals - linux.thread linux.threadexit linux.threadexit2 linux.signalfd linux.alarm - linux.signal_racesys linux.signal_pre_syscall linux.bad-signal-stack linux.sigsuspend - linux.sigmask linux.mangle_asynch linux.app_tls linux.readlink - linux.fib-conflict linux.fib-static linux.fib-pie linux.vfork pthreads.pthreads - pthreads.pthreads_exit pthreads.ptsig pthreads.pthreads_fork_FLAKY security-linux.trampoline - linux.infloop linux.rseq_disable security-common.codemod security-common.ret_noncall_trace +# It would be nice to get rid of the +# string(REGEX REPLACE "-O[0-9]? " " " CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") +# line above and instead selectively remove -O3 from tests that cannot +# support it. +# This was investigated and proved to be non trivial as the client tests +# use _DR_set_compile_flags() to set the compile flags whereas the core +# tests use set_cflags(). _DR_set_compile_flags() is a public function and we +# didn't want to add a blocklist in there. +# For now we create a list of tests that can be built with -O3. +# This is for AARCH64 UNIX only. +# TODO change this allowlist to a blocklist +set(sve_tests + simple_app api.ir api.ir_negative api.ir_v81 api.ir_v82 api.ir_v83 api.ir_v84 + api.ir_v86 api.ir_sve api.ir_sve2 api.ir-static api.drdecode common.broadfun + common.fib common.nzcv common.getretaddr common.segfault + common.allasm_aarch64_isa common.allasm_aarch64_cache allasm_aarch64_prefetch + allasm_aarch64_flush libutil.frontend_test libutil.drconfig_test + client.call-retarget client.modules client.annotation-concurrency + client.partial_module_map client.execfault client.events client.events_cpp + client.timer client.mangle_suspend client.syscall-mod client.signal + client.cleancallsig client.file_io client.cleancall-opt-1 client.inline + client.null_instrument client.large_options client.stolen-reg-index + client.gonative client.drmgr-test client.drx_buf-test + client.drbbdup-drwrap-test client.drbbdup-emul-test client.process-id + client.drreg-test client.low_on_memory client.tls client.drx-scattergather + client.drwrap-test client.drwrap-drreg-test drstatecmp-fuzz-app + client.drutil-test client.stolen-reg client.ldstex api.dis-a64 api.reenc-a64 + api.opnd api.detach api.detach_state api.detach_signal api.detach_spawn + api.detach_spawn_stress_FLAKY api.detach_spawn_quick_exit api.ibl-stress + api.ibl-stress-aarch64-far-link_LONG api.static_startstop api.static_noclient + api.static_noinit api.static_detach api.static_prepop + api.static_reattach_client_flags api.static_crash api.static_sideline_FLAKY + api.static_symbols api.static_maps_mixup_yesvars + api.static_maps_mixup_novars_FLAKY api.thread_churn client.app_args + client.destructor builtin_prefetch tool.multiproc stride_benchmark + tool.fib_plus tool.heap_test tool.drcacheoff.gencode linux.eintr + linux.execve-sub linux.execve-null linux.execve-config linux.execv + linux.execve-rec linux.exit linux.fork linux.fork-sleep linux.infinite + linux.longjmp linux.prctl linux.mmap linux.zero-length-mem-ranges + linux.sigaction linux.syscall_pwait linux.sigaction_nosignals linux.thread + linux.threadexit linux.threadexit2 linux.signalfd linux.alarm + linux.signal_racesys linux.signal_pre_syscall linux.bad-signal-stack + linux.sigsuspend linux.sigmask linux.mangle_asynch linux.app_tls + linux.readlink linux.fib-conflict linux.fib-static linux.fib-pie linux.vfork + pthreads.pthreads pthreads.pthreads_exit pthreads.ptsig + pthreads.pthreads_fork_FLAKY security-linux.trampoline linux.infloop + linux.rseq_disable security-common.codemod security-common.ret_noncall_trace security-common.retnonexisting security-common.TestAllocWE security-common.TestMemProtChg_FLAKY client.drx-test ) @@ -512,6 +533,9 @@ function(add_sve_flags target) if (target IN_LIST sve_tests) target_compile_options(${target} PRIVATE -march=armv8.4-a+crypto+rcpc+sha3+sm4+sve+rng+ssbs+nodotprod + # reinstate the -03 flag which is removed with the + # string(REGEX REPLACE "-O[0-9]? " " " CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") + # line above -O3) endif() endif() @@ -597,9 +621,9 @@ function(add_exe test source) add_dependencies(${test} ${gen_asm_tgt}) endif () - if (AARCH64) - add_sve_flags(${test}) - endif(AARCH64) + # AARCH64 AND UNIX only + add_sve_flags(${test}) + endfunction(add_exe) # normal app @@ -1746,7 +1770,11 @@ function(use_MD_not_MTd source_file) endif () endfunction() -# optimize the target +# the -O3 flag is removed above with the +# string(REGEX REPLACE "-O[0-9]? " " " CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") +# line. -O3 is then selectively readded with this function or add_sve_flags(). +# It will be desirable to one day remove the "REGEX REPLACE" line and just +# remove -O3 for targets that cannot support it. function(optimize target) if (UNIX) append_property_string(TARGET ${target} COMPILE_FLAGS "-O3")