diff --git a/.github/workflows/ci-docs.yml b/.github/workflows/ci-docs.yml index 2f3c06c496f..1be585beebe 100644 --- a/.github/workflows/ci-docs.yml +++ b/.github/workflows/ci-docs.yml @@ -90,7 +90,7 @@ jobs: # We only use a non-zero build # when making multiple manual builds in one day. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=9.93.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=9.94.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi diff --git a/.github/workflows/ci-package.yml b/.github/workflows/ci-package.yml index 55d5f35625c..08c0c9711e1 100644 --- a/.github/workflows/ci-package.yml +++ b/.github/workflows/ci-package.yml @@ -102,7 +102,7 @@ jobs: # We only use a non-zero build # when making multiple manual builds in one day. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=9.93.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=9.94.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi @@ -194,7 +194,7 @@ jobs: # XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=9.93.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=9.94.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi @@ -282,7 +282,7 @@ jobs: # XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=9.93.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=9.94.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi @@ -370,7 +370,7 @@ jobs: # XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=9.93.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=9.94.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi @@ -450,7 +450,7 @@ jobs: # XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=9.93.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=9.94.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi @@ -535,7 +535,7 @@ jobs: # XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER="9.93.$((`git log -n 1 --format=%ct` / (60*60*24)))" + export VERSION_NUMBER="9.94.$((`git log -n 1 --format=%ct` / (60*60*24)))" export PREFIX="cronbuild-" else export VERSION_NUMBER=${{ github.event.inputs.version }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f3b5dce1b2..f640fce640f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -568,7 +568,7 @@ endif (EXISTS "${PROJECT_SOURCE_DIR}/.svn") # N.B.: When updating this, update all the default versions in ci-package.yml # and ci-docs.yml. We should find a way to share (xref i#1565). -set(VERSION_NUMBER_DEFAULT "9.93.${VERSION_NUMBER_PATCHLEVEL}") +set(VERSION_NUMBER_DEFAULT "9.94.${VERSION_NUMBER_PATCHLEVEL}") # do not store the default VERSION_NUMBER in the cache to prevent a stale one # from preventing future version updates in a pre-existing build dir set(VERSION_NUMBER "" CACHE STRING "Version number: leave empty for default") @@ -1381,7 +1381,7 @@ math(EXPR VERSION_NUMBER_INTEGER # 5.0 broke backcompat in drsyms and xmm opnd sizes # 4.1 broke backcompat in drsyms + 64-bit core (opcodes + reachability) # 4.0 broke backcompat in drmgr, drsyms, drinjectlib, and dr_get_milliseconds() -set(OLDEST_COMPATIBLE_VERSION_DEFAULT "990") +set(OLDEST_COMPATIBLE_VERSION_DEFAULT "994") set(OLDEST_COMPATIBLE_VERSION "" CACHE STRING "Oldest compatible version: leave empty for default") if ("${OLDEST_COMPATIBLE_VERSION}" STREQUAL "") diff --git a/api/docs/release.dox b/api/docs/release.dox index 1f756800102..670a372793b 100644 --- a/api/docs/release.dox +++ b/api/docs/release.dox @@ -183,6 +183,11 @@ changes: their precise counterparts int64_t and uint64_t. - The #dynamorio::drmemtrace::memref_t structure has a new field appended for holding the actual target of each indirect branch. + - Increased the size of dr_simd_t to accommodate AArch64's Scalable Vector + Extension (SVE) as well as adding two new dr_simd_t instances to + #dr_mcontext_t: SVE predicate registers svep[] and the SVE first-fault + register, ffr. This is a significant binary compatibility change and will + require re-building clients built before SVE was added. Further non-compatibility-affecting changes include: - Added new drmemtrace option -L0_filter_until_instrs which enables filtering @@ -279,6 +284,9 @@ Further non-compatibility-affecting changes include: - Added a new drmemtrace analysis tool: syscall_mix, to count frequency of system calls in a trace. This tool works in both the online and offline modes of drmemtrace. + - Added proc_get_vector_length_bytes() for AArch64. This returns the current + vector length on all ARMv8 hardware including hardware which supports the + Scalable Vector Extension (SVE). **************************************************
diff --git a/api/samples/memtrace_simple.c b/api/samples/memtrace_simple.c index ba05d67884a..3227c46c2f3 100644 --- a/api/samples/memtrace_simple.c +++ b/api/samples/memtrace_simple.c @@ -121,6 +121,10 @@ static int tls_idx; #define MINSERT instrlist_meta_preinsert +#ifdef AARCH64 +static bool reported_sg_warning = false; +#endif + static void memtrace(void *drcontext) { @@ -314,13 +318,47 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *wher DR_ASSERT(instr_is_app(instr_operands)); for (i = 0; i < instr_num_srcs(instr_operands); i++) { - if (opnd_is_memory_reference(instr_get_src(instr_operands, i))) - instrument_mem(drcontext, bb, where, instr_get_src(instr_operands, i), false); + const opnd_t src = instr_get_src(instr_operands, i); + if (opnd_is_memory_reference(src)) { +#ifdef AARCH64 + /* TODO i#5844: Memory references involving SVE registers are not + * supported yet. To be implemented as part of scatter/gather work. + */ + if (opnd_is_base_disp(src) && + (reg_is_z(opnd_get_base(src)) || reg_is_z(opnd_get_index(src)))) { + if (!reported_sg_warning) { + dr_fprintf(STDERR, + "WARNING: Scatter/gather is not supported, results will " + "be inaccurate\n"); + reported_sg_warning = true; + } + continue; + } +#endif + instrument_mem(drcontext, bb, where, src, false); + } } for (i = 0; i < instr_num_dsts(instr_operands); i++) { - if (opnd_is_memory_reference(instr_get_dst(instr_operands, i))) - instrument_mem(drcontext, bb, where, instr_get_dst(instr_operands, i), true); + const opnd_t dst = instr_get_dst(instr_operands, i); + if (opnd_is_memory_reference(dst)) { +#ifdef AARCH64 + /* TODO i#5844: Memory references involving SVE registers are not + * supported yet. To be implemented as part of scatter/gather work. + */ + if (opnd_is_base_disp(dst) && + (reg_is_z(opnd_get_base(dst)) || reg_is_z(opnd_get_index(dst)))) { + if (!reported_sg_warning) { + dr_fprintf(STDERR, + "WARNING: Scatter/gather is not supported, results will " + "be inaccurate\n"); + reported_sg_warning = true; + } + continue; + } +#endif + instrument_mem(drcontext, bb, where, dst, true); + } } /* insert code to call clean_call for processing the buffer */ diff --git a/api/samples/memval_simple.c b/api/samples/memval_simple.c index d22b869456b..15a7539c308 100644 --- a/api/samples/memval_simple.c +++ b/api/samples/memval_simple.c @@ -104,6 +104,10 @@ static int tls_idx; static drx_buf_t *write_buffer; static drx_buf_t *trace_buffer; +#ifdef AARCH64 +static bool reported_sg_warning = false; +#endif + /* Requires that hex_buf be at least as long as 2*memref->size + 1. */ static char * write_hexdump(char *hex_buf, byte *write_base, mem_ref_t *mem_ref) @@ -322,14 +326,31 @@ handle_post_write(void *drcontext, instrlist_t *ilist, instr_t *where, reg_id_t * this. */ for (i = 0; i < instr_num_dsts(prev_instr); ++i) { - if (opnd_is_memory_reference(instr_get_dst(prev_instr, i))) { + const opnd_t dst = instr_get_dst(prev_instr, i); + if (opnd_is_memory_reference(dst)) { if (seen_memref) { DR_ASSERT_MSG(false, "Found inst with multiple memory destinations"); break; } + +#ifdef AARCH64 + /* TODO i#5844: Memory references involving SVE registers are not + * supported yet. To be implemented as part of scatter/gather work. + */ + if (opnd_is_base_disp(dst) && + (reg_is_z(opnd_get_base(dst)) || reg_is_z(opnd_get_index(dst)))) { + if (!reported_sg_warning) { + dr_fprintf(STDERR, + "WARNING: Scatter/gather is not supported, results " + "will be inaccurate\n"); + reported_sg_warning = true; + } + continue; + } +#endif + seen_memref = true; - instrument_post_write(drcontext, ilist, where, instr_get_dst(prev_instr, i), - prev_instr, reg_addr); + instrument_post_write(drcontext, ilist, where, dst, prev_instr, reg_addr); } } } @@ -377,14 +398,29 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *wher * we assume no instruction has multiple distinct memory destination operands. */ for (i = 0; i < instr_num_dsts(instr_operands); ++i) { - if (opnd_is_memory_reference(instr_get_dst(instr_operands, i))) { + const opnd_t dst = instr_get_dst(instr_operands, i); + if (opnd_is_memory_reference(dst)) { if (seen_memref) { DR_ASSERT_MSG(false, "Found inst with multiple memory destinations"); break; } - data->reg_addr = instrument_pre_write(drcontext, bb, where, - data->last_opcode, instr_operands, - instr_get_dst(instr_operands, i)); +#ifdef AARCH64 + /* TODO i#5844: Memory references involving SVE registers are not + * supported yet. To be implemented as part of scatter/gather work. + */ + if (opnd_is_base_disp(dst) && + (reg_is_z(opnd_get_base(dst)) || reg_is_z(opnd_get_index(dst)))) { + if (!reported_sg_warning) { + dr_fprintf(STDERR, + "WARNING: Scatter/gather is not supported, results " + "will be inaccurate\n"); + reported_sg_warning = true; + } + continue; + } +#endif + data->reg_addr = instrument_pre_write( + drcontext, bb, where, data->last_opcode, instr_operands, dst); seen_memref = true; } } diff --git a/clients/drcachesim/tests/burst_gencode.cpp b/clients/drcachesim/tests/burst_gencode.cpp index 0a7e4e392eb..ef3828dab16 100644 --- a/clients/drcachesim/tests/burst_gencode.cpp +++ b/clients/drcachesim/tests/burst_gencode.cpp @@ -198,6 +198,8 @@ class code_generator_t { #ifdef X86 replace = INSTR_CREATE_lahf(dc); #elif defined(AARCH64) + // OP_psb requires SPE feature. + proc_set_feature(FEATURE_SPE, true); replace = INSTR_CREATE_psb_csync(dc); #elif defined(ARM) replace = INSTR_CREATE_yield(dc); diff --git a/clients/drcachesim/tracer/tracer.cpp b/clients/drcachesim/tracer/tracer.cpp index 117fc317db3..1b3365cf5f8 100644 --- a/clients/drcachesim/tracer/tracer.cpp +++ b/clients/drcachesim/tracer/tracer.cpp @@ -175,6 +175,10 @@ static void *trace_thread_cb_user_data; static bool thread_filtering_enabled; bool attached_midway; +#ifdef AARCH64 +static bool reported_sg_warning = false; +#endif + static bool bbdup_instr_counting_enabled() { @@ -1304,18 +1308,50 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst /* insert code to add an entry for each memory reference opnd */ for (i = 0; i < instr_num_srcs(instr_operands); i++) { - if (opnd_is_memory_reference(instr_get_src(instr_operands, i))) { - adjust = instrument_memref( - drcontext, ud, bb, where, reg_ptr, adjust, instr_operands, - instr_get_src(instr_operands, i), i, false, pred, mode); + const opnd_t src = instr_get_src(instr_operands, i); + if (opnd_is_memory_reference(src)) { +#ifdef AARCH64 + /* TODO i#5844: Memory references involving SVE registers are not + * supported yet. To be implemented as part of scatter/gather work. + */ + if (opnd_is_base_disp(src) && + (reg_is_z(opnd_get_base(src)) || reg_is_z(opnd_get_index(src)))) { + if (!reported_sg_warning) { + NOTIFY( + 0, + "WARNING: Scatter/gather is not supported, results will be " + "inaccurate\n"); + reported_sg_warning = true; + } + continue; + } +#endif + adjust = instrument_memref(drcontext, ud, bb, where, reg_ptr, adjust, + instr_operands, src, i, false, pred, mode); } } for (i = 0; i < instr_num_dsts(instr_operands); i++) { - if (opnd_is_memory_reference(instr_get_dst(instr_operands, i))) { - adjust = instrument_memref( - drcontext, ud, bb, where, reg_ptr, adjust, instr_operands, - instr_get_dst(instr_operands, i), i, true, pred, mode); + const opnd_t dst = instr_get_dst(instr_operands, i); + if (opnd_is_memory_reference(dst)) { +#ifdef AARCH64 + /* TODO i#5844: Memory references involving SVE registers are not + * supported yet. To be implemented as part of scatter/gather work. + */ + if (opnd_is_base_disp(dst) && + (reg_is_z(opnd_get_base(dst)) || reg_is_z(opnd_get_index(dst)))) { + if (!reported_sg_warning) { + NOTIFY( + 0, + "WARNING: Scatter/gather is not supported, results will be " + "inaccurate\n"); + reported_sg_warning = true; + } + continue; + } +#endif + adjust = instrument_memref(drcontext, ud, bb, where, reg_ptr, adjust, + instr_operands, dst, i, true, pred, mode); } } if (adjust != 0) diff --git a/clients/drdisas/drdisas.cpp b/clients/drdisas/drdisas.cpp index 89204513852..77c96e7fcfb 100644 --- a/clients/drdisas/drdisas.cpp +++ b/clients/drdisas/drdisas.cpp @@ -148,7 +148,7 @@ main(int argc, const char *argv[]) #endif #ifdef AARCH64 - dr_set_sve_vl(op_sve_vl.get_value()); + dr_set_sve_vector_length(op_sve_vl.get_value()); #endif // XXX i#4021: arm not yet supported. diff --git a/core/arch/aarch64/aarch64.asm b/core/arch/aarch64/aarch64.asm index 232247ec8ac..1fbd09c0406 100644 --- a/core/arch/aarch64/aarch64.asm +++ b/core/arch/aarch64/aarch64.asm @@ -47,14 +47,7 @@ START_FILE #endif /* sizeof(priv_mcontext_t) rounded up to a multiple of 16 */ -#define PRIV_MCONTEXT_SIZE 800 - -/* offset of priv_mcontext_t in dr_mcontext_t */ -#define PRIV_MCONTEXT_OFFSET 16 - -#if PRIV_MCONTEXT_OFFSET < 16 || PRIV_MCONTEXT_OFFSET % 16 != 0 -# error PRIV_MCONTEXT_OFFSET -#endif +#define PRIV_MCONTEXT_SIZE 3424 /* offsetof(spill_state_t, r0) */ #define spill_state_r0_OFFSET 0 @@ -76,7 +69,7 @@ START_FILE /* offsetof(priv_mcontext_t, simd) */ #define simd_OFFSET (16 * ARG_SZ*2 + 32) /* offsetof(dcontext_t, dstack) */ -#define dstack_OFFSET 0x368 +#define dstack_OFFSET 0xda8 /* offsetof(dcontext_t, is_exiting) */ #define is_exiting_OFFSET (dstack_OFFSET+1*ARG_SZ) /* offsetof(struct tlsdesc_t, arg) */ @@ -252,6 +245,9 @@ save_priv_mcontext_helper: st1 {v20.2d-v23.2d}, [x4], #64 st1 {v24.2d-v27.2d}, [x4], #64 st1 {v28.2d-v31.2d}, [x4], #64 + /* TODO i#5365: Save Z/P regs as well? Will require runtime check of + * ID_AA64PFR0_EL1 for FEAT_SVE. + */ ret DECLARE_EXPORTED_FUNC(dr_app_start) diff --git a/core/arch/aarch64/clean_call_opt.c b/core/arch/aarch64/clean_call_opt.c index 44b83b186ed..c95a4bd3047 100644 --- a/core/arch/aarch64/clean_call_opt.c +++ b/core/arch/aarch64/clean_call_opt.c @@ -183,8 +183,7 @@ analyze_callee_regs_usage(dcontext_t *dcontext, callee_info_t *ci) memset(ci->reg_used, 0, sizeof(bool) * DR_NUM_GPR_REGS); ci->num_simd_used = 0; /* num_opmask_used is not applicable to ARM/AArch64. */ - ASSERT(proc_num_simd_registers() == MCXT_NUM_SIMD_SLOTS); - memset(ci->simd_used, 0, sizeof(bool) * proc_num_simd_registers()); + memset(ci->simd_used, 0, sizeof(bool) * MCXT_NUM_SIMD_SLOTS); ci->write_flags = false; num_regparm = MIN(ci->num_args, NUM_REGPARM); @@ -200,7 +199,6 @@ analyze_callee_regs_usage(dcontext_t *dcontext, callee_info_t *ci) } for (instr = instrlist_first(ilist); instr != NULL; instr = instr_get_next(instr)) { - /* General purpose registers */ for (i = 0; i < DR_NUM_GPR_REGS; i++) { reg_id_t reg = DR_REG_START_GPR + (reg_id_t)i; @@ -213,9 +211,12 @@ analyze_callee_regs_usage(dcontext_t *dcontext, callee_info_t *ci) } } - /* SIMD register usage */ - for (i = 0; i < proc_num_simd_registers(); i++) { - if (!ci->simd_used[i] && instr_uses_reg(instr, (DR_REG_Q0 + (reg_id_t)i))) { + /* SIMD/SVE register usage. */ + for (i = 0; i < MCXT_NUM_SIMD_SVE_SLOTS; i++) { + if (!ci->simd_used[i] && + instr_uses_reg(instr, + (proc_has_feature(FEATURE_SVE) ? DR_REG_Z0 : DR_REG_Q0) + + (reg_id_t)i)) { LOG(THREAD, LOG_CLEANCALL, 2, "CLEANCALL: callee " PFX " uses VREG%d at " PFX "\n", ci->start, i, instr_get_app_pc(instr)); @@ -224,6 +225,32 @@ analyze_callee_regs_usage(dcontext_t *dcontext, callee_info_t *ci) } } + if (proc_has_feature(FEATURE_SVE)) { + /* SVE predicate register usage */ + for (i = MCXT_NUM_SIMD_SVE_SLOTS; + i < (MCXT_NUM_SIMD_SVE_SLOTS + MCXT_NUM_SVEP_SLOTS); i++) { + const uint reg_idx = i - MCXT_NUM_SIMD_SVE_SLOTS; + if (!ci->simd_used[i] && + instr_uses_reg(instr, DR_REG_P0 + (reg_id_t)reg_idx)) { + LOG(THREAD, LOG_CLEANCALL, 2, + "CLEANCALL: callee " PFX " uses P%d at " PFX "\n", ci->start, + reg_idx, instr_get_app_pc(instr)); + ci->simd_used[i] = true; + ci->num_simd_used++; + } + } + + /* SVE FFR register usage */ + const uint ffr_index = MCXT_NUM_SIMD_SVE_SLOTS + MCXT_NUM_SVEP_SLOTS; + if (!ci->simd_used[ffr_index] && instr_uses_reg(instr, DR_REG_FFR)) { + LOG(THREAD, LOG_CLEANCALL, 2, + "CLEANCALL: callee " PFX " uses FFR at " PFX "\n", ci->start, + instr_get_app_pc(instr)); + ci->simd_used[ffr_index] = true; + ci->num_simd_used++; + } + } + /* NZCV register usage */ if (!ci->write_flags && TESTANY(EFLAGS_WRITE_ARITH, @@ -476,7 +503,7 @@ insert_inline_reg_save(dcontext_t *dcontext, clean_call_info_t *cci, instrlist_t insert_get_mcontext_base(dcontext, ilist, where, ci->spill_reg); insert_save_inline_registers(dcontext, ilist, where, cci->reg_skip, DR_REG_START_GPR, - true, (void *)ci); + GPR_REG_TYPE, (void *)ci); /* Save nzcv */ if (!cci->skip_save_flags && ci->write_flags) { @@ -512,7 +539,7 @@ insert_inline_reg_restore(dcontext_t *dcontext, clean_call_info_t *cci, } insert_restore_inline_registers(dcontext, ilist, where, cci->reg_skip, DR_REG_X0, - true, (void *)ci); + GPR_REG_TYPE, (void *)ci); /* Restore reg used for unprotected_context_t pointer. */ PRE(ilist, where, diff --git a/core/arch/aarch64/emit_utils.c b/core/arch/aarch64/emit_utils.c index de66c0f4a4b..db64f5875ec 100644 --- a/core/arch/aarch64/emit_utils.c +++ b/core/arch/aarch64/emit_utils.c @@ -574,7 +574,7 @@ void append_restore_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute) { int i; - /* add x1, x(dcxt), #(off) */ + /* add x1, x(dcxt), #(offset simd) */ APP(ilist, XINST_CREATE_add_2src(dcontext, opnd_create_reg(DR_REG_X1), opnd_create_reg(REG_DCXT), @@ -587,6 +587,67 @@ append_restore_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute) opnd_create_reg(DR_REG_Q0 + i + 1), opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0, i * 16, OPSZ_32))); } + if (proc_has_feature(FEATURE_SVE)) { + for (i = 0; i < 32; i++) { + /* ldr z(i), [x1, #(i mul vl)] + * From the SVE manual: + * "Load a vector register from a memory address generated by a + * 64-bit scalar base, plus an immediate offset in the range -256 + * to 255 which is multiplied by the current vector register size + * in bytes." + */ + APP(ilist, + INSTR_CREATE_ldr( + dcontext, opnd_create_reg(DR_REG_Z0 + i), + opnd_create_base_disp( + DR_REG_X1, DR_REG_NULL, 0, i * proc_get_vector_length_bytes(), + opnd_size_from_bytes(proc_get_vector_length_bytes())))); + } + /* add x1, x(dcxt), #(offset svep) */ + APP(ilist, + XINST_CREATE_add_2src(dcontext, opnd_create_reg(DR_REG_X1), + opnd_create_reg(REG_DCXT), + OPND_CREATE_INTPTR(offsetof(priv_mcontext_t, svep)))); + /* No need to load DR_REG_P15 because it will be used as a temporary + * register for FFR load below, then restored from svep afterwards. + */ + for (i = 0; i < 15; i++) { + /* ldr p(i), [x1, #(i mul vl)] */ + APP(ilist, + INSTR_CREATE_ldr( + dcontext, opnd_create_reg(DR_REG_P0 + i), + opnd_create_base_disp( + DR_REG_X1, DR_REG_NULL, 0, + i * (proc_get_vector_length_bytes() / 8), + opnd_size_from_bytes(proc_get_vector_length_bytes() / 8)))); + } + /* There is no load instruction for the first-fault register (FFR). Use + * a temporary predicate register to load: + * add x2, x(dcxt), #(offset ffr) + * ldr p15, [x2, #(ffr)] + * wrffr p15.b + * ldr p15, [x1, #(15 mul vl)] + */ + APP(ilist, + XINST_CREATE_add_2src(dcontext, opnd_create_reg(DR_REG_X2), + opnd_create_reg(REG_DCXT), + OPND_CREATE_INTPTR(offsetof(priv_mcontext_t, ffr)))); + APP(ilist, + INSTR_CREATE_ldr( + dcontext, opnd_create_reg(DR_REG_P15), + opnd_create_base_disp( + DR_REG_X2, DR_REG_NULL, 0, 0, + opnd_size_from_bytes(proc_get_vector_length_bytes() / 8)))); + APP(ilist, + INSTR_CREATE_wrffr_sve(dcontext, + opnd_create_reg_element_vector(DR_REG_P15, OPSZ_1))); + APP(ilist, + INSTR_CREATE_ldr( + dcontext, opnd_create_reg(DR_REG_P15), + opnd_create_base_disp( + DR_REG_X1, DR_REG_NULL, 0, 15 * (proc_get_vector_length_bytes() / 8), + opnd_size_from_bytes(proc_get_vector_length_bytes() / 8)))); + } } /* Append instructions to restore gpr on fcache enter, to be executed @@ -730,13 +791,78 @@ append_save_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute) opnd_create_reg(REG_DCXT), OPND_CREATE_INTPTR(offsetof(priv_mcontext_t, simd)))); for (i = 0; i < 32; i += 2) { - /* stp q(i), q(i + 1), [x1, #(i * 16)] */ + /* stp q(i), q(i + 1), [x1, #(i * 16)] + * From the AArch64 manual: + * "The signed immediate byte offset is a multiple of 16 in the range + * -1024 to 1008, defaulting to 0 and encoded in the imm7 field as + * /16." + */ APP(ilist, INSTR_CREATE_stp( dcontext, opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0, i * 16, OPSZ_32), opnd_create_reg(DR_REG_Q0 + i), opnd_create_reg(DR_REG_Q0 + i + 1))); } + if (proc_has_feature(FEATURE_SVE)) { + for (i = 0; i < 32; i++) { + /* str z(i), [x1, #(i mul vl)] + * "Store a vector register to a memory address generated by a + * 64-bit scalar base, plus an immediate offset in the range -256 + * to 255 which is multiplied by the current vector register size + * in bytes." + */ + APP(ilist, + INSTR_CREATE_str( + dcontext, + opnd_create_base_disp( + DR_REG_X1, DR_REG_NULL, 0, i * proc_get_vector_length_bytes(), + opnd_size_from_bytes(proc_get_vector_length_bytes())), + opnd_create_reg(DR_REG_Z0 + i))); + } + /* add x1, x(dcxt), #(off) */ + APP(ilist, + XINST_CREATE_add_2src(dcontext, opnd_create_reg(DR_REG_X1), + opnd_create_reg(REG_DCXT), + OPND_CREATE_INTPTR(offsetof(priv_mcontext_t, svep)))); + for (i = 0; i < 16; i++) { + /* str p(i), [x1, #(i mul vl)] */ + APP(ilist, + INSTR_CREATE_str( + dcontext, + opnd_create_base_disp( + DR_REG_X1, DR_REG_NULL, 0, + i * (proc_get_vector_length_bytes() / 8), + opnd_size_from_bytes(proc_get_vector_length_bytes() / 8)), + opnd_create_reg(DR_REG_P0 + i))); + } + /* There is no store instruction for the first-fault register (FFR). Use + * a temporary predicate register to store: + * rdffr p15.b + * add x2, x(dcxt), #(offset ffr) + * str p15, [x2, #(ffr)] + * ldr p15, [x1, #(15 mul vl)] + */ + APP(ilist, + INSTR_CREATE_rdffr_sve(dcontext, + opnd_create_reg_element_vector(DR_REG_P15, OPSZ_1))); + APP(ilist, + XINST_CREATE_add_2src(dcontext, opnd_create_reg(DR_REG_X2), + opnd_create_reg(REG_DCXT), + OPND_CREATE_INTPTR(offsetof(priv_mcontext_t, ffr)))); + APP(ilist, + INSTR_CREATE_str( + dcontext, + opnd_create_base_disp( + DR_REG_X2, DR_REG_NULL, 0, 0, + opnd_size_from_bytes(proc_get_vector_length_bytes() / 8)), + opnd_create_reg(DR_REG_P15))); + APP(ilist, + INSTR_CREATE_ldr( + dcontext, opnd_create_reg(DR_REG_P15), + opnd_create_base_disp( + DR_REG_X1, DR_REG_NULL, 0, 15 * (proc_get_vector_length_bytes() / 8), + opnd_size_from_bytes(proc_get_vector_length_bytes() / 8)))); + } } /* Scratch reg0 is holding exit stub. */ diff --git a/core/arch/aarch64/proc.c b/core/arch/aarch64/proc.c index 7538c70e05d..813e28257b4 100644 --- a/core/arch/aarch64/proc.c +++ b/core/arch/aarch64/proc.c @@ -38,6 +38,8 @@ static int num_simd_saved; static int num_simd_registers; +static int num_svep_registers; +static int num_ffr_registers; static int num_opmask_registers; #ifndef DR_HOST_NOT_TARGET @@ -101,12 +103,43 @@ get_processor_specific_info(void) cpu_info.features.flags_aa64zfr0 = isa_features[AA64ZFR0]; cpu_info.features.flags_aa64pfr1 = isa_features[AA64PFR1]; -# if !defined(DR_HOST_NOT_TARGET) && defined(SVE) - /* TODO i#3044: Vector length will be set by reading value from h/w. */ - CLIENT_ASSERT(false, "TODO i#3044: SVE requires initialisation of vector length!"); -# elif !defined(STANDALONE_DECODER) || defined(DR_HOST_NOT_TARGET) - /* Set SVE vector length for unit tests. */ - dr_set_sve_vl(256); + /* The SVE vector length is set to: + * - A value read from the host hardware. + * or: + * - 32 bytes, 256 bits. + * Which of the above depends on: + * - SVE or non-SVE AArch64 or x86 host h/w. + * and: + * - Release or development test build. + */ +# if !defined(DR_HOST_NOT_TARGET) + if (proc_has_feature(FEATURE_SVE)) { +# if !defined(BUILD_TESTS) + uint64 vl; + /* This RDVL instruction is inserted as raw hex because we don't build + * with SVE enabled: i.e. not -march=armv8-a+sve, so that we can run a + * single DynamoRIO release on both SVE and non-SVE h/w. + * TODO i#5365: Ideally this should be generated by INSTR_CREATE_rdvl() + * and executed at startup time with other initialisation code. + */ + asm(".inst 0x04bf5020\n" /* rdvl x0, #1 */ + "mov %0, x0" + : "=r"(vl) + : + : "x0"); + cpu_info.sve_vector_length_bytes = vl; + dr_set_sve_vector_length(vl * 8); +# else + cpu_info.sve_vector_length_bytes = 32; + dr_set_sve_vector_length(256); +# endif + } else { + cpu_info.sve_vector_length_bytes = 32; + dr_set_sve_vector_length(256); + } +# else + /* Set SVE vector length for unit testing the off-line decoder. */ + dr_set_sve_vector_length(256); # endif } # endif @@ -120,8 +153,10 @@ get_processor_specific_info(void) void proc_init_arch(void) { - num_simd_saved = MCXT_NUM_SIMD_SLOTS; - num_simd_registers = MCXT_NUM_SIMD_SLOTS; + num_simd_saved = MCXT_NUM_SIMD_SVE_SLOTS; + num_simd_registers = MCXT_NUM_SIMD_SVE_SLOTS; + num_svep_registers = MCXT_NUM_SVEP_SLOTS; + num_ffr_registers = MCXT_NUM_FFR_SLOTS; num_opmask_registers = MCXT_NUM_OPMASK_SLOTS; /* When DR_HOST_NOT_TARGET, get_cache_line_size returns false and does @@ -198,54 +233,81 @@ proc_init_arch(void) #define GET_FEAT_VAL(FEATURE) (((ushort)FEATURE) & 0x000F) #define GET_FEAT_NSFLAG(FEATURE) ((((ushort)FEATURE) & 0x8000) >> 15) +void +proc_set_feature(feature_bit_t f, bool enable) +{ + uint64 *freg_val = 0; + ushort feat_nibble = GET_FEAT_NIBPOS(f); + uint64 feat_nsflag = GET_FEAT_NSFLAG(f); + uint64 feat_val = GET_FEAT_VAL(f); + + feature_reg_idx_t feat_reg = GET_FEAT_REG(f); + switch (feat_reg) { + case AA64ISAR0: { + freg_val = &cpu_info.features.flags_aa64isar0; + break; + } + case AA64ISAR1: { + freg_val = &cpu_info.features.flags_aa64isar1; + break; + } + case AA64PFR0: { + freg_val = &cpu_info.features.flags_aa64pfr0; + break; + } + case AA64MMFR1: { + freg_val = &cpu_info.features.flags_aa64mmfr1; + break; + } + case AA64DFR0: { + freg_val = &cpu_info.features.flags_aa64dfr0; + break; + } + case AA64ZFR0: { + freg_val = &cpu_info.features.flags_aa64zfr0; + break; + } + case AA64PFR1: { + freg_val = &cpu_info.features.flags_aa64pfr1; + break; + } + default: CLIENT_ASSERT(false, "proc_has_feature: invalid feature register"); + } + + /* Clear the current feature state. */ + *freg_val &= ~(0xFULL << (feat_nibble * 4)); + if (enable) { + /* Write the feature value into the feature nibble. */ + *freg_val |= feat_val << (feat_nibble * 4); + } else if (feat_nsflag == 0xF) { + /* If the not-set flag is 0xF, then that needs manually setting. */ + *freg_val |= feat_nsflag << (feat_nibble * 4); + } +} + +void +enable_all_test_cpu_features() +{ + const feature_bit_t features[] = { + FEATURE_LSE, FEATURE_RDM, FEATURE_FP16, FEATURE_DotProd, + FEATURE_SVE, FEATURE_LOR, FEATURE_FHM, FEATURE_SM3, + FEATURE_SM4, FEATURE_SHA512, FEATURE_SHA3, FEATURE_RAS, + FEATURE_SPE, FEATURE_PAUTH, FEATURE_LRCPC, FEATURE_LRCPC2, + FEATURE_BF16, FEATURE_I8MM, FEATURE_F64MM, FEATURE_FlagM, + FEATURE_JSCVT, FEATURE_DPB, FEATURE_DPB2, FEATURE_SVE2, + FEATURE_SVEAES, FEATURE_SVEBitPerm, FEATURE_SVESHA3, FEATURE_SVESM4, + FEATURE_MTE + }; + for (int i = 0; i < BUFFER_SIZE_ELEMENTS(features); ++i) { + proc_set_feature(features[i], true); + } + dr_set_sve_vector_length(256); +} + bool proc_has_feature(feature_bit_t f) { #ifndef DR_HOST_NOT_TARGET - /* Pretend features are supported for codec tests run on h/w which does not - * support all features. - */ -# if defined(BUILD_TESTS) - switch (f) { - case FEATURE_LSE: - case FEATURE_RDM: - case FEATURE_FP16: - case FEATURE_DotProd: - case FEATURE_SVE: - case FEATURE_LOR: - case FEATURE_FHM: - case FEATURE_SM3: - case FEATURE_SM4: - case FEATURE_SHA512: - case FEATURE_SHA3: - case FEATURE_RAS: - case FEATURE_SPE: - case FEATURE_PAUTH: - case FEATURE_LRCPC: - case FEATURE_LRCPC2: - case FEATURE_BF16: - case FEATURE_I8MM: - case FEATURE_F64MM: - case FEATURE_FlagM: - case FEATURE_JSCVT: - case FEATURE_DPB: - case FEATURE_DPB2: - case FEATURE_SVE2: - case FEATURE_SVEAES: - case FEATURE_SVESHA3: - case FEATURE_SVESM4: - case FEATURE_SVEBitPerm: - case FEATURE_MTE: return true; - - case FEATURE_AESX: - case FEATURE_PMULL: - case FEATURE_SHA1: - case FEATURE_SHA256: - case FEATURE_CRC32: - case FEATURE_FlagM2: - case FEATURE_RNG: break; - } -# endif ushort feat_nibble, feat_val, freg_nibble, feat_nsflag; uint64 freg_val = 0; @@ -335,7 +397,8 @@ DR_API int proc_num_simd_registers(void) { - return num_simd_registers; + return num_simd_registers + + (proc_has_feature(FEATURE_SVE) ? (num_svep_registers + num_ffr_registers) : 0); } DR_API diff --git a/core/arch/aarchxx/mangle.c b/core/arch/aarchxx/mangle.c index 171636724b1..9806b65a2ab 100644 --- a/core/arch/aarchxx/mangle.c +++ b/core/arch/aarchxx/mangle.c @@ -103,29 +103,38 @@ insert_clear_eflags(dcontext_t *dcontext, clean_call_info_t *cci, instrlist_t *i #ifdef AARCH64 /* Maximum positive immediate offset for STP/LDP with 64 bit registers. */ # define MAX_STP_OFFSET 504 +/* Maximum positive immediate offset for SVE STR/LDR with Z/P registers. */ +# define MAX_SVE_STR_OFFSET 255 /* Creates a memory reference for registers saved/restored to memory. */ static opnd_t -create_base_disp_for_save_restore(uint base_reg, bool is_single_reg, bool is_gpr, +create_base_disp_for_save_restore(uint base_reg, bool is_single_reg, reg_type_t rtype, uint num_saved, callee_info_t *ci) { /* opzs depends on the kind of register and whether a single register or * a pair of registers is saved/restored using stp/ldp. */ - uint opsz; - if (is_gpr) { - if (is_single_reg) - opsz = OPSZ_8; - else - opsz = OPSZ_16; - } else { - if (is_single_reg) - opsz = OPSZ_16; - else - opsz = OPSZ_32; + uint opsz = OPSZ_NA; + uint offset = 0; + switch (rtype) { + case GPR_REG_TYPE: + opsz = is_single_reg ? OPSZ_8 : OPSZ_16; + offset = num_saved * sizeof(reg_t); + break; + case SIMD_REG_TYPE: + opsz = is_single_reg ? OPSZ_16 : OPSZ_32; + offset = num_saved * 16; + break; + case SVE_ZREG_TYPE: + opsz = opnd_size_from_bytes(proc_get_vector_length_bytes()); + offset = num_saved * proc_get_vector_length_bytes(); + break; + case SVE_PREG_TYPE: + opsz = opnd_size_from_bytes(proc_get_vector_length_bytes() / 8); + offset = num_saved * (proc_get_vector_length_bytes() / 8); + break; + default: ASSERT_NOT_REACHED(); } - - uint offset = num_saved * (is_gpr ? sizeof(reg_t) : sizeof(dr_simd_t)); return opnd_create_base_disp(base_reg, DR_REG_NULL, 0, offset, opsz); } @@ -144,15 +153,17 @@ create_load_or_store_instr(dcontext_t *dcontext, reg_id_t reg, opnd_t mem, bool * is odd. Optionally takes reg_skip into account. */ static void -insert_save_or_restore_registers(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, - bool *reg_skip, reg_id_t base_reg, reg_id_t first_reg, - bool save, bool is_gpr, - opnd_t (*get_mem_opnd)(uint base_reg, bool is_single_reg, - bool is_gpr, uint num_saved, - callee_info_t *ci), - callee_info_t *ci) +insert_save_or_restore_gpr_simd_registers( + dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, bool *reg_skip, + reg_id_t base_reg, reg_id_t first_reg, bool save, reg_type_t rtype, + opnd_t (*get_mem_opnd)(uint base_reg, bool is_single_reg, reg_type_t rtype, + uint num_saved, callee_info_t *ci), + callee_info_t *ci) { - uint i, reg1 = UINT_MAX, num_regs = is_gpr ? 30 : 32; + ASSERT(rtype == GPR_REG_TYPE || rtype == SIMD_REG_TYPE); + + uint i, reg1 = UINT_MAX, + num_regs = (rtype == GPR_REG_TYPE) ? 30 : MCXT_NUM_SIMD_SVE_SLOTS; uint saved_regs = 0; instr_t *new_instr; /* Use stp/ldp to save/restore as many register pairs to memory, skipping @@ -166,7 +177,7 @@ insert_save_or_restore_registers(dcontext_t *dcontext, instrlist_t *ilist, instr reg1 = i; else { opnd_t mem1 = - get_mem_opnd(base_reg, false /* is_single_reg */, is_gpr, + get_mem_opnd(base_reg, /*is_single_reg=*/false, rtype, /* When creating save/restore instructions * for inlining, we need the register id * to compute the address. @@ -180,7 +191,7 @@ insert_save_or_restore_registers(dcontext_t *dcontext, instrlist_t *ilist, instr create_load_or_store_instr(dcontext, first_reg + reg1, mem1, save)); opnd_t mem2 = - get_mem_opnd(base_reg, false /* is_single_reg */, is_gpr, + get_mem_opnd(base_reg, /*is_single_reg=*/false, rtype, /* When creating save/restore instructions * for inlining, we need the register id * to compute the address. @@ -211,34 +222,173 @@ insert_save_or_restore_registers(dcontext_t *dcontext, instrlist_t *ilist, instr */ if (reg1 != UINT_MAX) { opnd_t mem = - get_mem_opnd(base_reg, true /* is_single_reg */, is_gpr, + get_mem_opnd(base_reg, /*is_single_reg=*/true, rtype, ci != NULL ? first_reg + (reg_id_t)reg1 : saved_regs, ci); PRE(ilist, instr, create_load_or_store_instr(dcontext, first_reg + reg1, mem, save)); } } +static void +insert_save_or_restore_svep_registers( + dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, bool *reg_skip, + reg_id_t base_reg, bool save, + opnd_t (*get_mem_opnd)(uint base_reg, bool is_single_reg, reg_type_t rtype, + uint num_saved, callee_info_t *ci), + callee_info_t *ci) +{ + uint i, saved_regs = 0; + for (i = 0; i < MCXT_NUM_SVEP_SLOTS; i++) { + if (reg_skip != NULL && reg_skip[MCXT_NUM_SIMD_SVE_SLOTS + i]) + continue; + + opnd_t mem = + get_mem_opnd(base_reg, /*is_single_reg=*/true, SVE_PREG_TYPE, saved_regs, ci); + /* disp should never be greater than MAX_SVE_STR_OFFSET because it + * is the immediate multiplied by the current vector register size + * in bytes: STR , [{, #, MUL VL}] and we only go up + * num_regs registers. + */ + ASSERT(opnd_get_disp(mem) / proc_get_vector_length_bytes() <= MAX_SVE_STR_OFFSET); + PRE(ilist, instr, create_load_or_store_instr(dcontext, DR_REG_P0 + i, mem, save)); + saved_regs++; + } +} +static void +insert_save_or_restore_sve_registers( + dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, bool *reg_skip, + reg_id_t base_reg, reg_id_t first_reg, bool save, reg_type_t rtype, + opnd_t (*get_mem_opnd)(uint base_reg, bool is_single_reg, reg_type_t rtype, + uint num_saved, callee_info_t *ci), + callee_info_t *ci) +{ + ASSERT(rtype == SVE_ZREG_TYPE); + ASSERT(first_reg == DR_REG_Z0); + ASSERT(MCXT_NUM_FFR_SLOTS == 1); + + // SVE Z registers + uint i, saved_regs = 0; + for (i = 0; i < MCXT_NUM_SIMD_SVE_SLOTS; i++) { + if (reg_skip != NULL && reg_skip[i]) + continue; + + opnd_t mem = + get_mem_opnd(base_reg, /*is_single_reg=*/true, SVE_ZREG_TYPE, saved_regs, ci); + /* disp should never be greater than MAX_SVE_STR_OFFSET because it + * is the immediate multiplied by the current vector register size + * in bytes: STR , [{, #, MUL VL}] and we only go up + * MCXT_NUM_SIMD_SVE_SLOTS registers. + */ + ASSERT(opnd_get_disp(mem) / proc_get_vector_length_bytes() <= MAX_SVE_STR_OFFSET); + PRE(ilist, instr, create_load_or_store_instr(dcontext, DR_REG_Z0 + i, mem, save)); + saved_regs++; + } + + /* add base_reg, base_reg, #(SVE register offset) */ + PRE(ilist, instr, + XINST_CREATE_add(dcontext, opnd_create_reg(base_reg), + OPND_CREATE_INT16(MCXT_NUM_SIMD_SVE_SLOTS * sizeof(dr_simd_t)))); + + /* The FFR register cannot be loaded directly into the base as the ld/str register has + * to be a predicate. Which means that the FFR saving has to be after the predicates, + * and vice versa when loading. + * + * Save Seq: + * - Save preds + * - Save FFR to P15 + * - Store P15 to x0 (offset 16 to skip past preds) + * + * Load Seq: + * - Read x0 to P15 (offset 16 to skip past preds) + * - Write P15 to FFR + * - Restore preds + */ + const bool handle_ffr = + reg_skip == NULL || !reg_skip[MCXT_NUM_SIMD_SVE_SLOTS + MCXT_NUM_SVEP_SLOTS]; + // SVE P and FFR registers. + if (save) { + insert_save_or_restore_svep_registers(dcontext, ilist, instr, reg_skip, base_reg, + save, get_mem_opnd, ci); + + if (handle_ffr) { + PRE(ilist, instr, + INSTR_CREATE_rdffr_sve( + dcontext, opnd_create_reg_element_vector(DR_REG_P15, OPSZ_1))); + opnd_t mem = + get_mem_opnd(base_reg, /*is_single_reg=*/true, SVE_PREG_TYPE, 16, ci); + PRE(ilist, instr, + create_load_or_store_instr(dcontext, DR_REG_P15, mem, save)); + } + } else { + if (handle_ffr) { + opnd_t mem = + get_mem_opnd(base_reg, /*is_single_reg=*/true, SVE_PREG_TYPE, 16, ci); + PRE(ilist, instr, + create_load_or_store_instr(dcontext, DR_REG_P15, mem, save)); + PRE(ilist, instr, + INSTR_CREATE_wrffr_sve( + dcontext, opnd_create_reg_element_vector(DR_REG_P15, OPSZ_1))); + } + + insert_save_or_restore_svep_registers(dcontext, ilist, instr, reg_skip, base_reg, + save, get_mem_opnd, ci); + } +} + +static void +insert_save_or_restore_registers(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, + bool *reg_skip, reg_id_t base_reg, reg_id_t first_reg, + bool save, reg_type_t rtype, + opnd_t (*get_mem_opnd)(uint base_reg, bool is_single_reg, + reg_type_t rtype, uint num_saved, + callee_info_t *ci), + callee_info_t *ci) +{ + switch (rtype) { + case GPR_REG_TYPE: + case SIMD_REG_TYPE: + insert_save_or_restore_gpr_simd_registers(dcontext, ilist, instr, reg_skip, + base_reg, first_reg, save, rtype, + get_mem_opnd, ci); + break; + case SVE_ZREG_TYPE: + insert_save_or_restore_sve_registers(dcontext, ilist, instr, reg_skip, base_reg, + first_reg, save, rtype, get_mem_opnd, ci); + break; + case SVE_PREG_TYPE: + /* SVE Z, P and FFR registers are saved/restored sequentially in + * insert_save_or_restore_sve_registers(). At this top level call layer + * we use SVE_ZREG_TYPE to indicate all of SVE register bank. + */ + CLIENT_ASSERT(false, + "internal error, use SVE_ZREG_TYPE for top level save/restore of " + "SVE registers."); + default: ASSERT_NOT_REACHED(); + } +} + static void insert_save_registers(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, - bool *reg_skip, reg_id_t base_reg, reg_id_t first_reg, bool is_gpr) + bool *reg_skip, reg_id_t base_reg, reg_id_t first_reg, + reg_type_t rtype) { insert_save_or_restore_registers(dcontext, ilist, instr, reg_skip, base_reg, - first_reg, true /* save */, is_gpr, + first_reg, true /* save */, rtype, create_base_disp_for_save_restore, NULL); } static void insert_restore_registers(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, bool *reg_skip, reg_id_t base_reg, reg_id_t first_reg, - bool is_gpr) + reg_type_t rtype) { insert_save_or_restore_registers(dcontext, ilist, instr, reg_skip, base_reg, - first_reg, false /* restore */, is_gpr, + first_reg, false /* restore */, rtype, create_base_disp_for_save_restore, NULL); } static opnd_t -inline_get_mem_opnd(uint base_reg, bool is_single_reg, bool is_gpr, uint reg_id, +inline_get_mem_opnd(uint base_reg, bool is_single_reg, reg_type_t rtype, uint reg_id, callee_info_t *ci) { return callee_info_slot_opnd(ci, SLOT_REG, reg_id); @@ -246,19 +396,21 @@ inline_get_mem_opnd(uint base_reg, bool is_single_reg, bool is_gpr, uint reg_id, void insert_save_inline_registers(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, - bool *reg_skip, reg_id_t first_reg, bool is_gpr, void *ci) + bool *reg_skip, reg_id_t first_reg, reg_type_t rtype, + void *ci) { insert_save_or_restore_registers(dcontext, ilist, instr, reg_skip, 0, first_reg, - true /* save */, is_gpr, inline_get_mem_opnd, + true /* save */, rtype, inline_get_mem_opnd, (callee_info_t *)ci); } void insert_restore_inline_registers(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, - bool *reg_skip, reg_id_t first_reg, bool is_gpr, void *ci) + bool *reg_skip, reg_id_t first_reg, reg_type_t rtype, + void *ci) { insert_save_or_restore_registers(dcontext, ilist, instr, reg_skip, 0, first_reg, - false /* restore */, is_gpr, inline_get_mem_opnd, + false /* restore */, rtype, inline_get_mem_opnd, (callee_info_t *)ci); } @@ -283,12 +435,9 @@ insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, _IF_AARCH64(bool out_of_line)) { uint dstack_offs = 0; -#ifdef AARCH64 - uint max_offs; -#endif + if (cci == NULL) cci = &default_clean_call_info; - ASSERT(proc_num_simd_registers() == MCXT_NUM_SIMD_SLOTS); if (cci->preserve_mcontext || cci->num_simd_skip != proc_num_simd_registers()) { /* FIXME i#1551: once we add skipping of regs, need to keep shape here. * Also, num_opmask_skip is not applicable to ARM/AArch64. @@ -296,6 +445,11 @@ insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, } /* FIXME i#1551: once we have cci->num_simd_skip, skip this if possible */ #ifdef AARCH64 + ASSERT(proc_num_simd_registers() == + (MCXT_NUM_SIMD_SVE_SLOTS + + (proc_has_feature(FEATURE_SVE) ? (MCXT_NUM_SVEP_SLOTS + MCXT_NUM_FFR_SLOTS) + : 0))); + /* X0 is used to hold the stack pointer. */ cci->reg_skip[DR_REG_X0 - DR_REG_START_GPR] = false; /* X1 and X2 are used to save and restore the status and control registers. */ @@ -304,8 +458,6 @@ insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, /* X11 is used to calculate the target address of the clean call. */ cci->reg_skip[DR_REG_X11 - DR_REG_START_GPR] = false; - max_offs = get_clean_call_switch_stack_size(); - /* For out-of-line clean calls, the stack pointer is adjusted before jumping * to this code. */ @@ -313,16 +465,16 @@ insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, /* sub sp, sp, #clean_call_switch_stack_size */ PRE(ilist, instr, XINST_CREATE_sub(dcontext, opnd_create_reg(DR_REG_SP), - OPND_CREATE_INT16(max_offs))); + OPND_CREATE_INT16(get_clean_call_switch_stack_size()))); } /* Push GPRs. */ insert_save_registers(dcontext, ilist, instr, cci->reg_skip, DR_REG_SP, DR_REG_X0, - true /* is_gpr */); + GPR_REG_TYPE); dstack_offs += 32 * XSP_SZ; - /* mov x0, sp */ + /* mov x0, sp (add %sp $0x0000 lsl $0x00 -> %x0) */ PRE(ilist, instr, XINST_CREATE_move(dcontext, opnd_create_reg(DR_REG_X0), opnd_create_reg(DR_REG_SP))); @@ -339,7 +491,7 @@ insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, opnd_create_reg(DR_REG_X30), opnd_create_reg(DR_REG_X0))); } - /* add x0, x0, #dstack_offs */ + /* add x0, x0, #dstack_offs (add %x0 $0x0100 lsl $0x00 -> %x0) */ PRE(ilist, instr, XINST_CREATE_add(dcontext, opnd_create_reg(DR_REG_X0), OPND_CREATE_INT16(dstack_offs))); @@ -347,6 +499,7 @@ insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, /* save the push_pc operand to the priv_mcontext_t.pc field */ if (!(cci->skip_save_flags)) { if (opnd_is_immed_int(push_pc)) { + /* movz $0x0000 lsl $0x00 -> %x1 */ PRE(ilist, instr, XINST_CREATE_load_int(dcontext, opnd_create_reg(DR_REG_X1), push_pc)); } else { @@ -359,7 +512,7 @@ insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, OPND_CREATE_MEM64(DR_REG_SP, REG_OFFSET(push_pc_reg)))); } - /* str x1, [sp, #dstack_offset] */ + /* str x1, [sp, #dstack_offset] (str %x1 -> +0x0100(%sp)[8byte]) */ PRE(ilist, instr, INSTR_CREATE_str(dcontext, OPND_CREATE_MEM64(DR_REG_SP, dstack_offs), opnd_create_reg(DR_REG_X1))); @@ -368,24 +521,25 @@ insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, dstack_offs += XSP_SZ; /* Save flag values using x1, x2. */ - /* mrs x1, nzcv */ + /* mrs x1, nzcv (mrs %nzcv -> %x1) + */ PRE(ilist, instr, INSTR_CREATE_mrs(dcontext, opnd_create_reg(DR_REG_X1), opnd_create_reg(DR_REG_NZCV))); - /* mrs x2, fpcr */ + /* mrs x2, fpcr (mrs %fpcr -> %x2) */ PRE(ilist, instr, INSTR_CREATE_mrs(dcontext, opnd_create_reg(DR_REG_X2), opnd_create_reg(DR_REG_FPCR))); - /* stp w1, w2, [x0, #8] */ + /* stp w1, w2, [x0, #8] (stp %w1 %w2 -> +0x08(%x0)[8byte]) */ PRE(ilist, instr, INSTR_CREATE_stp(dcontext, OPND_CREATE_MEM64(DR_REG_X0, 8), opnd_create_reg(DR_REG_W1), opnd_create_reg(DR_REG_W2))); - /* mrs x1, fpsr */ + /* mrs x1, fpsr (mrs %fpsr -> %x1) */ PRE(ilist, instr, INSTR_CREATE_mrs(dcontext, opnd_create_reg(DR_REG_X1), opnd_create_reg(DR_REG_FPSR))); - /* str w1, [x0, #16] */ + /* str w1, [x0, #16] (str %w1 -> +0x10(%x0)[4byte]) */ PRE(ilist, instr, INSTR_CREATE_str(dcontext, OPND_CREATE_MEM32(DR_REG_X0, 16), opnd_create_reg(DR_REG_W1))); @@ -401,12 +555,17 @@ insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, XINST_CREATE_add(dcontext, opnd_create_reg(DR_REG_X0), OPND_CREATE_INT16(dstack_offs - 32 * XSP_SZ))); - /* Push SIMD registers. */ - insert_save_registers(dcontext, ilist, instr, cci->simd_skip, DR_REG_X0, DR_REG_Q0, - false /* is_gpr */); + if (proc_has_feature(FEATURE_SVE)) { + /* Save the SVE regs */ + insert_save_registers(dcontext, ilist, instr, cci->simd_skip, DR_REG_X0, + DR_REG_Z0, SVE_ZREG_TYPE); + } else { + /* Save the SIMD registers. */ + insert_save_registers(dcontext, ilist, instr, cci->simd_skip, DR_REG_X0, + DR_REG_Q0, SIMD_REG_TYPE); + } - dstack_offs += (proc_num_simd_registers() * sizeof(dr_simd_t)); - ASSERT(proc_num_simd_registers() == MCXT_NUM_SIMD_SLOTS); + dstack_offs += MCXT_NUM_SIMD_SLOTS * sizeof(dr_simd_t); /* Restore the registers we used. */ /* ldp x0, x1, [sp] */ @@ -419,7 +578,6 @@ insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, opnd_create_base_disp(DR_REG_SP, DR_REG_NULL, 0, REG_OFFSET(DR_REG_X2), OPSZ_8))); #else - /* vstmdb always does writeback */ PRE(ilist, instr, INSTR_CREATE_vstmdb(dcontext, OPND_CREATE_MEMLIST(DR_REG_SP), SIMD_REG_LIST_LEN, @@ -520,18 +678,23 @@ insert_pop_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, instrlist XINST_CREATE_move(dcontext, opnd_create_reg(DR_REG_X0), opnd_create_reg(DR_REG_SP))); - current_offs = get_clean_call_switch_stack_size() - - proc_num_simd_registers() * sizeof(dr_simd_t); - ASSERT(proc_num_simd_registers() == MCXT_NUM_SIMD_SLOTS); + current_offs = + get_clean_call_switch_stack_size() - (MCXT_NUM_SIMD_SLOTS * sizeof(dr_simd_t)); /* add x0, x0, current_offs */ PRE(ilist, instr, XINST_CREATE_add(dcontext, opnd_create_reg(DR_REG_X0), OPND_CREATE_INT32(current_offs))); - /* Pop SIMD registers. */ - insert_restore_registers(dcontext, ilist, instr, cci->simd_skip, DR_REG_X0, DR_REG_Q0, - false /* is_gpr */); + if (proc_has_feature(FEATURE_SVE)) { + /* Restore the SVE regs */ + insert_restore_registers(dcontext, ilist, instr, cci->simd_skip, DR_REG_X0, + DR_REG_Z0, SVE_ZREG_TYPE); + } else { + /* Restore the SIMD registers. */ + insert_restore_registers(dcontext, ilist, instr, cci->simd_skip, DR_REG_X0, + DR_REG_Q0, SIMD_REG_TYPE); + } /* mov x0, sp */ PRE(ilist, instr, @@ -553,11 +716,11 @@ insert_pop_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, instrlist INSTR_CREATE_ldp(dcontext, opnd_create_reg(DR_REG_W1), opnd_create_reg(DR_REG_W2), OPND_CREATE_MEM64(DR_REG_X0, 8))); - /* msr nzcv, w1 */ + /* msr nzcv, x1 */ PRE(ilist, instr, INSTR_CREATE_msr(dcontext, opnd_create_reg(DR_REG_NZCV), opnd_create_reg(DR_REG_X1))); - /* msr fpcr, w2 */ + /* msr fpcr, x2 */ PRE(ilist, instr, INSTR_CREATE_msr(dcontext, opnd_create_reg(DR_REG_FPCR), opnd_create_reg(DR_REG_X2))); @@ -567,7 +730,7 @@ insert_pop_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, instrlist INSTR_CREATE_ldr(dcontext, opnd_create_reg(DR_REG_W1), OPND_CREATE_MEM32(DR_REG_X0, 16))); - /* msr fpsr, w1 */ + /* msr fpsr, x1 */ PRE(ilist, instr, INSTR_CREATE_msr(dcontext, opnd_create_reg(DR_REG_FPSR), opnd_create_reg(DR_REG_X1))); @@ -575,14 +738,14 @@ insert_pop_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, instrlist /* Pop GPRs */ insert_restore_registers(dcontext, ilist, instr, cci->reg_skip, DR_REG_SP, DR_REG_X0, - true /* is_gpr */); + GPR_REG_TYPE); /* For out-of-line clean calls, X30 is restored after jumping back from this * code, because it is used for the return address. */ if (!out_of_line) { /* Recover x30 */ - /* ldr w3, [x0, #16] */ + /* ldr x30, [sp, #x30_offset] */ PRE(ilist, instr, INSTR_CREATE_ldr(dcontext, opnd_create_reg(DR_REG_X30), OPND_CREATE_MEM64(DR_REG_SP, REG_OFFSET(DR_REG_X30)))); diff --git a/core/arch/arch.c b/core/arch/arch.c index 730caf19892..a7237327f16 100644 --- a/core/arch/arch.c +++ b/core/arch/arch.c @@ -914,7 +914,8 @@ arch_profile_exit() #endif /* WINDOWS_PC_SAMPLE */ /* arch-specific atexit cleanup */ -void d_r_arch_exit(IF_WINDOWS_ELSE_NP(bool detach_stacked_callbacks, void)) +void +d_r_arch_exit(IF_WINDOWS_ELSE_NP(bool detach_stacked_callbacks, void)) { /* we only need to unprotect shared_code for profile extraction * so we do it there to also cover the fast exit path @@ -1984,7 +1985,8 @@ fcache_return_routine_ex(dcontext_t *dcontext _IF_X86_64(gencode_mode_t mode)) return (cache_pc)code->fcache_return; } -cache_pc fcache_return_coarse_routine(IF_X86_64_ELSE(gencode_mode_t mode, void)) +cache_pc +fcache_return_coarse_routine(IF_X86_64_ELSE(gencode_mode_t mode, void)) { generated_code_t *code = get_shared_gencode(GLOBAL_DCONTEXT _IF_X86_64(mode)); ASSERT(DYNAMO_OPTION(coarse_units)); @@ -1994,7 +1996,8 @@ cache_pc fcache_return_coarse_routine(IF_X86_64_ELSE(gencode_mode_t mode, void)) return (cache_pc)code->fcache_return_coarse; } -cache_pc trace_head_return_coarse_routine(IF_X86_64_ELSE(gencode_mode_t mode, void)) +cache_pc +trace_head_return_coarse_routine(IF_X86_64_ELSE(gencode_mode_t mode, void)) { generated_code_t *code = get_shared_gencode(GLOBAL_DCONTEXT _IF_X86_64(mode)); ASSERT(DYNAMO_OPTION(coarse_units)); @@ -2769,7 +2772,8 @@ fcache_enter_shared_routine(dcontext_t *dcontext) SHARED_GENCODE_MATCH_THREAD(dcontext)->fcache_enter); } -cache_pc fcache_return_shared_routine(IF_X86_64_ELSE(gencode_mode_t mode, void)) +cache_pc +fcache_return_shared_routine(IF_X86_64_ELSE(gencode_mode_t mode, void)) { generated_code_t *code = get_shared_gencode(GLOBAL_DCONTEXT _IF_X86_64(mode)); ASSERT(USE_SHARED_GENCODE()); @@ -2780,7 +2784,8 @@ cache_pc fcache_return_shared_routine(IF_X86_64_ELSE(gencode_mode_t mode, void)) } #ifdef TRACE_HEAD_CACHE_INCR -cache_pc trace_head_incr_shared_routine(IF_X86_64_ELSE(gencode_mode_t mode, void)) +cache_pc +trace_head_incr_shared_routine(IF_X86_64_ELSE(gencode_mode_t mode, void)) { generated_code_t *code = get_shared_gencode(GLOBAL_DCONTEXT _IF_X86_64(mode)); ASSERT(USE_SHARED_GENCODE()); @@ -3545,6 +3550,18 @@ priv_mcontext_to_dr_mcontext(dr_mcontext_t *dst, priv_mcontext_t *src) */ if (dst->size > sizeof(dr_mcontext_t)) return false; +#if defined(AARCH64) + /* We could support binary compatibility for clients built before the + * addition of AArch64's SVE support, by evaluating the machine context's + * user set-size field. But currently do not, preferring to detect + * incompatibility and asserting or returning false. + */ + if (TEST(DR_MC_MULTIMEDIA, dst->flags) && dst->size != sizeof(dr_mcontext_t)) { + CLIENT_ASSERT( + false, "A pre-SVE client is running on an Arm AArch64 SVE DynamoRIO build!"); + return false; + } +#endif if (TESTALL(DR_MC_ALL, dst->flags) && dst->size == sizeof(dr_mcontext_t)) { *(priv_mcontext_t *)(&MCXT_FIRST_REG_FIELD(dst)) = *src; } else { @@ -3628,7 +3645,7 @@ priv_mcontext_to_dr_mcontext(dr_mcontext_t *dst, priv_mcontext_t *src) return false; memcpy(&dst->opmask, &src->opmask, sizeof(dst->opmask)); } -#else +#elif defined(AARCHXX) /* FIXME i#1551: NYI on ARM */ ASSERT_NOT_IMPLEMENTED(false); #endif @@ -3811,14 +3828,20 @@ dump_mcontext(priv_mcontext_t *context, file_t f, bool dump_xml) #elif defined(AARCHXX) { int i, j; +# ifdef AARCH64 + int words = proc_has_feature(FEATURE_SVE) ? 16 : 4; +# else + int words = 4; +# endif /* XXX: should be proc_num_simd_saved(). */ for (i = 0; i < proc_num_simd_registers(); i++) { print_file(f, dump_xml ? "\t\tqd= \"0x" : "\tq%-3d= 0x", i); - for (j = 0; j < 4; j++) { + for (j = 0; j < words; j++) { print_file(f, "%08x ", context->simd[i].u32[j]); } print_file(f, dump_xml ? "\"\n" : "\n"); } + /* TODO i#5365: SVE predicate registers and FFR dump. */ } #endif diff --git a/core/arch/arch.h b/core/arch/arch.h index 79efddd9614..d159b2fe16c 100644 --- a/core/arch/arch.h +++ b/core/arch/arch.h @@ -669,14 +669,18 @@ void convert_to_near_rel(dcontext_t *dcontext, instr_t *instr); instr_t * convert_to_near_rel_meta(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr); + #ifdef AARCH64 +typedef enum { GPR_REG_TYPE, SIMD_REG_TYPE, SVE_ZREG_TYPE, SVE_PREG_TYPE } reg_type_t; + void insert_save_inline_registers(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, - bool *reg_skip, reg_id_t first_reg, bool is_gpr, void *ci); + bool *reg_skip, reg_id_t first_reg, reg_type_t rtype, + void *ci); void insert_restore_inline_registers(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, - bool *reg_skip, reg_id_t first_reg, bool is_gpr, + bool *reg_skip, reg_id_t first_reg, reg_type_t rtype, void *ci); #endif diff --git a/core/arch/proc.h b/core/arch/proc.h index 316abccf140..e4b204edd05 100644 --- a/core/arch/proc.h +++ b/core/arch/proc.h @@ -71,6 +71,7 @@ typedef struct _cpu_info_t { uint vendor; #ifdef AARCHXX uint architecture; + uint sve_vector_length_bytes; #endif uint family; uint type; diff --git a/core/arch/proc_api.h b/core/arch/proc_api.h index ed1b6e5b5c8..ebbb87a2b15 100644 --- a/core/arch/proc_api.h +++ b/core/arch/proc_api.h @@ -466,6 +466,25 @@ DR_API bool proc_has_feature(feature_bit_t feature); +#if defined(AARCH64) && defined(BUILD_TESTS) +DR_API +/** + * Allows overriding the available state of CPU features. + * This is only for unit testing and offline decode, and must be called after + * proc_init_arch() (e.g. after dr_standalone_init() or dr_app_setup()). + */ +void +proc_set_feature(feature_bit_t f, bool enable); + +DR_API +/** + * Uses proc_set_feature() to forcibly enable CPU features for unit testing and offline + * decode. + */ +void +enable_all_test_cpu_features(); +#endif + DR_API /** * Returns all 4 32-bit feature values on X86 and architectural feature @@ -500,6 +519,19 @@ DR_API const char * proc_get_cache_size_str(cache_size_t size); +#ifdef AARCHXX +DR_API +/** + * Returns the size in bytes of the SVE registers' vector length set by the + * AArch64 hardware implementor. Length can be from 128 to 2048 bits in + * multiples of 128 bits: + * 128 256 384 512 640 768 896 1024 1152 1280 1408 1536 1664 1792 1920 2048 + * Currently DynamoRIO supports implementations of up to 512 bits. + */ +uint +proc_get_vector_length_bytes(void); +#endif + DR_API /** * Returns the size in bytes needed for a buffer for saving the x87 floating point state. diff --git a/core/arch/proc_shared.c b/core/arch/proc_shared.c index 8394d379947..e74e83fbb48 100644 --- a/core/arch/proc_shared.c +++ b/core/arch/proc_shared.c @@ -66,6 +66,7 @@ static ptr_uint_t mask; /* bits that should be 0 to be cache-line-aligned */ cpu_info_t cpu_info = { VENDOR_UNKNOWN, #ifdef AARCHXX 0, + 0, #endif 0, 0, @@ -195,6 +196,12 @@ proc_get_architecture(void) { return cpu_info.architecture; } + +uint +proc_get_vector_length_bytes(void) +{ + return cpu_info.sve_vector_length_bytes; +} #endif features_t * diff --git a/core/globals.h b/core/globals.h index 4cb7b6a9ef3..8fc73e2f1d5 100644 --- a/core/globals.h +++ b/core/globals.h @@ -702,7 +702,7 @@ extern thread_id_t global_try_tid; typedef struct { /* WARNING: if you change the offsets of any of these fields, - * you must also change the offsets in / + * you must also change the offsets in / */ priv_mcontext_t mcontext; /* real machine context (in globals_shared.h + mcxtx.h) */ #ifdef UNIX diff --git a/core/ir/aarch64/codec.c b/core/ir/aarch64/codec.c index d7e8f626d44..c8169b84f16 100644 --- a/core/ir/aarch64/codec.c +++ b/core/ir/aarch64/codec.c @@ -1021,7 +1021,7 @@ get_elements_in_sve_vector(aarch64_reg_offset element_size) { const uint element_length = opnd_size_in_bits(get_opnd_size_from_offset(element_size)); - return opnd_size_in_bits(OPSZ_SVE_VL) / element_length; + return opnd_size_in_bits(OPSZ_SVE_VL_BYTES) / element_length; } /******************************************************************************* @@ -5180,7 +5180,7 @@ decode_opnd_svemem_gpr_simm6_vl(uint enc, int opcode, byte *pc, OUT opnd_t *opnd const int offset = extract_int(enc, 16, 6); IF_RETURN_FALSE(offset < -32 || offset > 31) const reg_id_t rn = decode_reg(extract_uint(enc, 5, 5), true, true); - const opnd_size_t mem_transfer = op_is_prefetch(opcode) ? OPSZ_0 : OPSZ_SVE_VL; + const opnd_size_t mem_transfer = op_is_prefetch(opcode) ? OPSZ_0 : OPSZ_SVE_VL_BYTES; /* As specified in the AArch64 SVE reference manual for contiguous prefetch * instructions, the immediate index value is a vector index into memory, NOT @@ -5189,7 +5189,7 @@ decode_opnd_svemem_gpr_simm6_vl(uint enc, int opcode, byte *pc, OUT opnd_t *opnd * memory displacement. So when creating the address operand here, it should be * multiplied by the current vector register length in bytes. */ - int vl_bytes = dr_get_sve_vl() / 8; + int vl_bytes = dr_get_sve_vector_length() / 8; *opnd = opnd_create_base_disp(rn, DR_REG_NULL, 0, offset * vl_bytes, mem_transfer); return true; @@ -5199,7 +5199,7 @@ static inline bool encode_opnd_svemem_gpr_simm6_vl(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out) { - const opnd_size_t mem_transfer = op_is_prefetch(opcode) ? OPSZ_0 : OPSZ_SVE_VL; + const opnd_size_t mem_transfer = op_is_prefetch(opcode) ? OPSZ_0 : OPSZ_SVE_VL_BYTES; if (!opnd_is_base_disp(opnd) || opnd_get_index(opnd) != DR_REG_NULL || opnd_get_size(opnd) != mem_transfer) return false; @@ -5210,7 +5210,7 @@ encode_opnd_svemem_gpr_simm6_vl(uint enc, int opcode, byte *pc, opnd_t opnd, * vector length at the IR level, transformed to a vector index in the * encoding. */ - int vl_bytes = dr_get_sve_vl() / 8; + int vl_bytes = dr_get_sve_vector_length() / 8; if ((opnd_get_disp(opnd) % vl_bytes) != 0) return false; int disp = opnd_get_disp(opnd) / vl_bytes; @@ -5329,7 +5329,7 @@ decode_opnd_svemem_gpr_simm9_vl(uint enc, int opcode, byte *pc, OUT opnd_t *opnd bool is_vector = TEST(1u << 14, enc); /* Transfer size depends on whether we are transferring a Z or a P register. */ - opnd_size_t memory_transfer_size = is_vector ? OPSZ_SVE_VL : OPSZ_SVE_PL; + opnd_size_t memory_transfer_size = is_vector ? OPSZ_SVE_VL_BYTES : OPSZ_SVE_PL_BYTES; /* As specified in the AArch64 SVE reference manual for unpredicated vector * register load LDR and store STR instructions, the immediate index value is a @@ -5339,7 +5339,7 @@ decode_opnd_svemem_gpr_simm9_vl(uint enc, int opcode, byte *pc, OUT opnd_t *opnd * address operand here, it should be multiplied by the current vector or * predicate register length in bytes. */ - int vl_bytes = dr_get_sve_vl() / 8; + int vl_bytes = dr_get_sve_vector_length() / 8; int pl_bytes = vl_bytes / 8; int mul_len = is_vector ? vl_bytes : pl_bytes; *opnd = @@ -5359,7 +5359,7 @@ encode_opnd_svemem_gpr_simm9_vl(uint enc, int opcode, byte *pc, opnd_t opnd, bool is_vector = TEST(1u << 14, enc); /* Transfer size depends on whether we are transferring a Z or a P register. */ - opnd_size_t memory_transfer_size = is_vector ? OPSZ_SVE_VL : OPSZ_SVE_PL; + opnd_size_t memory_transfer_size = is_vector ? OPSZ_SVE_VL_BYTES : OPSZ_SVE_PL_BYTES; if (!opnd_is_base_disp(opnd) || opnd_get_size(opnd) != memory_transfer_size) return false; @@ -5367,7 +5367,7 @@ encode_opnd_svemem_gpr_simm9_vl(uint enc, int opcode, byte *pc, opnd_t opnd, * vector or predicate length at the IR level, transformed to a vector or * predicate index in the encoding. */ - int vl_bytes = dr_get_sve_vl() / 8; + int vl_bytes = dr_get_sve_vector_length() / 8; int pl_bytes = vl_bytes / 8; if (is_vector) { if ((opnd_get_disp(opnd) % vl_bytes) != 0) @@ -5512,7 +5512,7 @@ decode_opnd_svemem_gpr_simm4_vl_xreg(uint enc, int opcode, byte *pc, OUT opnd_t { const uint register_count = BITS(enc, 22, 21) + 1; const opnd_size_t transfer_size = - opnd_size_from_bytes((register_count * dr_get_sve_vl()) / 8); + opnd_size_from_bytes((register_count * dr_get_sve_vector_length()) / 8); return decode_svemem_gpr_simm4(enc, transfer_size, register_count, opnd); } @@ -5523,7 +5523,7 @@ encode_opnd_svemem_gpr_simm4_vl_xreg(uint enc, int opcode, byte *pc, opnd_t opnd { const uint register_count = BITS(enc, 22, 21) + 1; const opnd_size_t transfer_size = - opnd_size_from_bytes((register_count * dr_get_sve_vl()) / 8); + opnd_size_from_bytes((register_count * dr_get_sve_vector_length()) / 8); return encode_svemem_gpr_simm4(enc, transfer_size, register_count, opnd, enc_out); } @@ -9695,10 +9695,5 @@ uint encode_common(byte *pc, instr_t *i, decode_info_t *di) { ASSERT(((ptr_int_t)pc & 3) == 0); - -#if defined(DR_HOST_NOT_TARGET) || defined(STANDALONE_DECODER) - dr_set_sve_vl(256); -#endif - return encoder_v80(pc, i, di); } diff --git a/core/ir/aarch64/codec.h b/core/ir/aarch64/codec.h index e28b6bc656b..4fe2eaa54c8 100644 --- a/core/ir/aarch64/codec.h +++ b/core/ir/aarch64/codec.h @@ -58,12 +58,8 @@ encode_common(byte *pc, instr_t *i, decode_info_t *di); ((((uint32)(_enc)) >> (bitmin)) & (uint32)MASK((bitmax) - (bitmin) + 1)) #if !defined(DR_HOST_NOT_TARGET) && !defined(STANDALONE_DECODER) -/* TODO i#3044: Vector length will be read from cpuinfo, e.g. - * opnd_size_from_bytes(proc_get_vector_length())); - * Setting to fixed size for now in order to pass unit tests. - */ -# define OPSZ_SVE_VL opnd_size_from_bytes(dr_get_sve_vl() / 8) -# define OPSZ_SVE_PL opnd_size_from_bytes((dr_get_sve_vl() / 8) / 8) +# define OPSZ_SVE_VL_BYTES opnd_size_from_bytes(proc_get_vector_length_bytes()) +# define OPSZ_SVE_PL_BYTES opnd_size_from_bytes(proc_get_vector_length_bytes() / 8) #else /* SVE vector length for off-line decoder set using -vl option with drdisas, * e.g. @@ -72,8 +68,8 @@ encode_common(byte *pc, instr_t *i, decode_info_t *di); * 85865e6b ldr +0x37(%x19)[32byte] -> %z11 * $ */ -# define OPSZ_SVE_VL opnd_size_from_bytes(dr_get_sve_vl() / 8) -# define OPSZ_SVE_PL opnd_size_from_bytes((dr_get_sve_vl() / 8) / 8) +# define OPSZ_SVE_VL_BYTES opnd_size_from_bytes(dr_get_sve_vector_length() / 8) +# define OPSZ_SVE_PL_BYTES opnd_size_from_bytes((dr_get_sve_vector_length() / 8) / 8) #endif #define RETURN_FALSE \ diff --git a/core/ir/aarch64/decode.c b/core/ir/aarch64/decode.c index ef006a8d807..e056a7ef9d3 100644 --- a/core/ir/aarch64/decode.c +++ b/core/ir/aarch64/decode.c @@ -185,7 +185,7 @@ decode_first_opcode_byte(int opcode) const instr_info_t * opcode_to_encoding_info(uint opc, dr_isa_mode_t isa_mode) { - /* We do not use instr_info_t encoding info on AArch64. */ + /* We do not use instr_info_t encoding info on AArch64. FIXME i#1569 */ ASSERT_NOT_REACHED(); return NULL; } diff --git a/core/ir/aarch64/instr_create_api.h b/core/ir/aarch64/instr_create_api.h index 75a38c8db16..7a699c55f28 100644 --- a/core/ir/aarch64/instr_create_api.h +++ b/core/ir/aarch64/instr_create_api.h @@ -10974,10 +10974,10 @@ * DR_EXTEND_UXTX, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) * For the [\, \.D, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 64), 0) * For the [\, \.S, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) */ #define INSTR_CREATE_ldff1b_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ldff1b, Zt, Rn, Pg) @@ -11007,16 +11007,16 @@ * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) * For the [\, \.D, LSL #3] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 3) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 3) * For the [\, \.D] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) * For the [\, \.D, \ #3] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 3) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 3) * For the [\, \.D, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) */ #define INSTR_CREATE_ldff1d_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ldff1d, Zt, Rn, Pg) @@ -11054,22 +11054,22 @@ * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) * For the [\, \.D, LSL #1] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 1) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 1) * For the [\, \.D] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) * For the [\, \.D, \ #1] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 1) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 1) * For the [\, \.D, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) * For the [\, \.S, \ #1] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 1) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 1) * For the [\, \.S, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) */ #define INSTR_CREATE_ldff1h_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ldff1h, Zt, Rn, Pg) @@ -11103,13 +11103,13 @@ * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) * For the [\, \.D] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 64), 0) * For the [\, \.D, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 64), 0) * For the [\, \.S, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) */ #define INSTR_CREATE_ldff1sb_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ldff1sb, Zt, Rn, Pg) @@ -11146,22 +11146,22 @@ * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) * For the [\, \.D, LSL #1] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 1) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 1) * For the [\, \.D] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) * For the [\, \.D, \ #1] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 1) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 1) * For the [\, \.D, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) * For the [\, \.S, \ #1] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 1) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 1) * For the [\, \.S, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) */ #define INSTR_CREATE_ldff1sh_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ldff1sh, Zt, Rn, Pg) @@ -11193,22 +11193,22 @@ * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) * For the [\, \.D, LSL #2] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 2) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 2) * For the [\, \.D] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) * For the [\, \.D, \ #2] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 2) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 2) * For the [\, \.D, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) * For the [\, \.S, \ #2] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 8), 2) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 8), 2) * For the [\, \.S, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 8), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 8), 0) */ #define INSTR_CREATE_ldff1sw_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ldff1sw, Zt, Rn, Pg) @@ -11322,16 +11322,16 @@ * DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1) * For the B element size [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 8)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) * For the H element size [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 16)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 16)) * For the S element size [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 32)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 32)) * For the D element size [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 64)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 64)) * For the [\.S{, #\}] variant: * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_4, * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) @@ -11340,13 +11340,13 @@ * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) * For the [\, \.D] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 64), 0) * For the [\, \.D, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 64), 0) * For the [\, \.S, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) */ #define INSTR_CREATE_ld1b_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ld1b, Zt, Rn, Pg) @@ -11488,13 +11488,13 @@ * DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1) * For the H element size [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 16)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 16)) * For the S element size [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 32)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 32)) * For the D element size [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 64)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 64)) * For the [\.S{, #\}] variant: * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_4, * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) @@ -11503,13 +11503,13 @@ * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) * For the [\, \.D] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 64), 0) * For the [\, \.D, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 64), 0) * For the [\, \.S, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) */ #define INSTR_CREATE_ld1sb_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ld1sb, Zt, Rn, Pg) @@ -11529,10 +11529,9 @@ * constructed with the function: * For the [\, \] variant: * opnd_create_base_disp_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, 0, 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 8)) - * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 8)) + * DR_EXTEND_UXTX, 0, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / + * 8)) For the [\{, #\, MUL VL}] variant: opnd_create_base_disp(Rn, + * DR_REG_NULL, 0, imm, opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) */ #define INSTR_CREATE_ldnt1b_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ldnt1b, Zt, Rn, Pg) @@ -11560,8 +11559,8 @@ * DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / (8 * opnd_size_to_bytes(Ts)))) - * For the [\.S{, #\}] variant: + * opnd_size_from_bytes(dr_get_sve_vector_length() / (8 * + * opnd_size_to_bytes(Ts)))) For the [\.S{, #\}] variant: * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_4, * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) * For the [\.D{, #\}] variant: @@ -11569,13 +11568,13 @@ * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) * For the [\, \.D] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 64), 0) * For the [\, \.D, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 64), 0) * For the [\, \.S, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) */ #define INSTR_CREATE_st1b_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_st1b, Rn, Zt, Pg) @@ -11595,10 +11594,9 @@ * constructed with the function: * For the [\, \] variant: * opnd_create_base_disp_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, 0, 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 8)) - * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 8)) + * DR_EXTEND_UXTX, 0, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / + * 8)) For the [\{, #\, MUL VL}] variant: opnd_create_base_disp(Rn, + * DR_REG_NULL, 0, imm, opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) */ #define INSTR_CREATE_stnt1b_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_stnt1b, Rn, Zt, Pg) @@ -12023,10 +12021,10 @@ * constructed with the function: * For the [\, \] variant: * opnd_create_base_disp_aarch64(Rn, Rm, DR_EXTEND_UXTX, 0, 0, 0, - * opnd_size_from_bytes(2 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8))) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(2 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8))) */ #define INSTR_CREATE_ld2b_sve_pred(dc, Zt, Pg, Rn) \ instr_create_2dst_2src(dc, OP_ld2b, Zt, opnd_create_increment_reg(Zt, 1), Rn, Pg) @@ -12046,10 +12044,10 @@ * constructed with the function: * For the [\, \] variant: * opnd_create_base_disp_aarch64(Rn, Rm, DR_EXTEND_UXTX, 0, 0, 0, - * opnd_size_from_bytes(3 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8))) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(3 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8))) */ #define INSTR_CREATE_ld3b_sve_pred(dc, Zt, Pg, Rn) \ instr_create_3dst_2src(dc, OP_ld3b, Zt, opnd_create_increment_reg(Zt, 1), \ @@ -12070,10 +12068,10 @@ * constructed with the function: * For the [\, \] variant: * opnd_create_base_disp_aarch64(Rn, Rm, DR_EXTEND_UXTX, 0, 0, 0, - * opnd_size_from_bytes(4 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8))) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(4 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8))) */ #define INSTR_CREATE_ld4b_sve_pred(dc, Zt, Pg, Rn) \ instr_create_4dst_2src(dc, OP_ld4b, Zt, opnd_create_increment_reg(Zt, 1), \ @@ -12095,10 +12093,10 @@ * constructed with the function: * For the [\, \] variant: * opnd_create_base_disp_aarch64(Rn, Rm, DR_EXTEND_UXTX, 0, 0, 0, - * opnd_size_from_bytes(2 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8))) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(2 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8))) */ #define INSTR_CREATE_st2b_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_3src(dc, OP_st2b, Rn, Zt, opnd_create_increment_reg(Zt, 1), Pg) @@ -12118,10 +12116,10 @@ * constructed with the function: * For the [\, \] variant: * opnd_create_base_disp_aarch64(Rn, Rm, DR_EXTEND_UXTX, 0, 0, 0, - * opnd_size_from_bytes(3 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8))) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(3 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8))) */ #define INSTR_CREATE_st3b_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_4src(dc, OP_st3b, Rn, Zt, opnd_create_increment_reg(Zt, 1), \ @@ -12142,10 +12140,10 @@ * constructed with the function: * For the [\, \] variant: * opnd_create_base_disp_aarch64(Rn, Rm, DR_EXTEND_UXTX, 0, 0, 0, - * opnd_size_from_bytes(4 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8))) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(4 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8))) */ #define INSTR_CREATE_st4b_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_5src(dc, OP_st4b, Rn, Zt, opnd_create_increment_reg(Zt, 1), \ @@ -12185,35 +12183,35 @@ * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) * For the [\, \.D, LSL #1] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 1) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 1) * For the [\, \.D] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) * For the [\, \.D, \ #1] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 1) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 1) * For the [\, \.D, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) * For the [\, \.S, \ #1] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 1) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 1) * For the [\, \.S, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) * For the [\, \, LSL #1] variants: * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vl() + * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() * /8/16/32), 1) * For the H element size [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 8)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) * For the S element size [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 16)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 16)) * For the D element size [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 32)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 32)) */ #define INSTR_CREATE_ld1h_sve_pred(dc, Zt, Pg, Zn) \ instr_create_1dst_2src(dc, OP_ld1h, Zt, Zn, Pg) @@ -12249,32 +12247,32 @@ * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) * For the [\, \.D, LSL #1] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 1) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 1) * For the [\, \.D] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) * For the [\, \.D, \ #1] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 1) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 1) * For the [\, \.D, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) * For the [\, \.S, \ #1] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 1) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 1) * For the [\, \.S, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) * For the [\, \, LSL #1] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vl() + * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() * / 16/32), 1) depending on Zt's element size. * For the S element size [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 16)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 16)) * For the D element size [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 32)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 32)) */ #define INSTR_CREATE_ld1sh_sve_pred(dc, Zt, Pg, Zn) \ instr_create_1dst_2src(dc, OP_ld1sh, Zt, Zn, Pg) @@ -12310,32 +12308,32 @@ * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) * For the [\, \.D, LSL #2] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 2) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 2) * For the [\, \.D] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) * For the [\, \.D, \ #2] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 2) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 2) * For the [\, \.D, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) * For the [\, \.S, \ #2] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 8), 2) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 8), 2) * For the [\, \.S, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 8), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 8), 0) * For the [\, \, LSL #2] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vl() + * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() * / 8/16), 2) depending on Zt's element size. * For the S element size [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 8)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) * For the D element size [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 16)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 16)) */ #define INSTR_CREATE_ld1w_sve_pred(dc, Zt, Pg, Zn) \ instr_create_1dst_2src(dc, OP_ld1w, Zt, Zn, Pg) @@ -12363,22 +12361,22 @@ * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 8), 0) * For the [\, \.D, LSL #3] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 3) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 3) * For the [\, \.D] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) * For the [\, \.D, \ #3] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 3) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 3) * For the [\, \.D, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) * For the variant \, \, LSL #3]: * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, - * true, 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 8), 3) + * true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 8), 3) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 8)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) */ #define INSTR_CREATE_ld1d_sve_pred(dc, Zt, Pg, Zn) \ instr_create_1dst_2src(dc, OP_ld1d, Zt, Zn, Pg) @@ -12402,11 +12400,11 @@ * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) * For the [\, \, LSL #2] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vl() + * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() * / 16), 2) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 16)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 16)) */ #define INSTR_CREATE_ld1sw_sve_pred(dc, Zt, Pg, Zn) \ instr_create_1dst_2src(dc, OP_ld1sw, Zt, Zn, Pg) @@ -12440,29 +12438,29 @@ * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) * For the [\, \.D, LSL #1] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 1) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 1) * For the [\, \.D] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) * For the [\, \.D, \ #1] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 1) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 1) * For the [\, \.D, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 32), 0) * For the [\, \.S, \ #1] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 1) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 1) * For the [\, \.S, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) * For the [\, \, LSL #1] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vl() + * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() * /8/16/32), 1) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / opnd_size_to_bytes(Ts))) + * opnd_size_from_bytes(dr_get_sve_vector_length() / opnd_size_to_bytes(Ts))) */ #define INSTR_CREATE_st1h_sve_pred(dc, Zt, Pg, Zn) \ instr_create_1dst_2src(dc, OP_st1h, Zn, Zt, Pg) @@ -12496,29 +12494,30 @@ * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) * For the [\, \.D, LSL #2] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 2) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 2) * For the [\, \.D] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) * For the [\, \.D, \ #2] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 2) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 2) * For the [\, \.D, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 16), 0) * For the [\, \.S, \ #2] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 8), 2) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 8), 2) * For the [\, \.S, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_4, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 8), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 8), 0) * For the [\, \, LSL #2] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vl() + * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() * / 8/16), 2) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / (8 * opnd_size_to_bytes(Ts)))) + * opnd_size_from_bytes(dr_get_sve_vector_length() / (8 * + * opnd_size_to_bytes(Ts)))) */ #define INSTR_CREATE_st1w_sve_pred(dc, Zt, Pg, Zn) \ instr_create_1dst_2src(dc, OP_st1w, Zn, Zt, Pg) @@ -12546,22 +12545,22 @@ * DR_EXTEND_UXTX, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 8), 0) * For the [\, \.D, LSL #3] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 8), 3) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 8), 3) * For the [\, \.D] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 8), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 8), 0) * For the [\, \.D, \ #3] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 8), 3) + * true, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 8), 3) * For the [\, \.D, \] variant: * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, extend, - * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 8), 0) + * 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() / 8), 0) * For the [\, \, LSL #3] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 8), 3) - * For the [\{, #\, MUL VL}] variant: - * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / (8 * opnd_size_to_bytes(Ts)))) + * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() + * / 8), 3) For the [\{, #\, MUL VL}] variant: opnd_create_base_disp(Rn, + * DR_REG_NULL, 0, imm, opnd_size_from_bytes(dr_get_sve_vector_length() / (8 * + * opnd_size_to_bytes(Ts)))) */ #define INSTR_CREATE_st1d_sve_pred(dc, Zt, Pg, Zn) \ instr_create_1dst_2src(dc, OP_st1d, Zn, Zt, Pg) @@ -12581,10 +12580,10 @@ * constructed with the function: * For the [\, \, LSL #3] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(2 * (dr_get_sve_vl() / 8)), 3) + * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8)), 3) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(2 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8))) */ #define INSTR_CREATE_ld2d_sve_pred(dc, Zt, Pg, Rn) \ instr_create_2dst_2src(dc, OP_ld2d, Zt, opnd_create_increment_reg(Zt, 1), Rn, Pg) @@ -12604,10 +12603,10 @@ * constructed with the function: * For the [\, \, LSL #1] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(2 * (dr_get_sve_vl() / 8)), 1) + * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8)), 1) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(2 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8))) */ #define INSTR_CREATE_ld2h_sve_pred(dc, Zt, Pg, Rn) \ instr_create_2dst_2src(dc, OP_ld2h, Zt, opnd_create_increment_reg(Zt, 1), Rn, Pg) @@ -12627,11 +12626,11 @@ * constructed with the function: * For the [\, \, LSL #2] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(2 * (dr_get_sve_vl() / 8)), 2) + * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8)), 2) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(dr_get_sve_vl() / 4)) - * opnd_size_from_bytes(2 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 4)) + * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8))) */ #define INSTR_CREATE_ld2w_sve_pred(dc, Zt, Pg, Rn) \ instr_create_2dst_2src(dc, OP_ld2w, Zt, opnd_create_increment_reg(Zt, 1), Rn, Pg) @@ -12651,10 +12650,10 @@ * constructed with the function: * For the [\, \, LSL #3] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(3 * (dr_get_sve_vl() / 8)), 3) + * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8)), 3) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(3 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8))) */ #define INSTR_CREATE_ld3d_sve_pred(dc, Zt, Pg, Rn) \ instr_create_3dst_2src(dc, OP_ld3d, Zt, opnd_create_increment_reg(Zt, 1), \ @@ -12675,10 +12674,10 @@ * constructed with the function: * For the [\, \, LSL #1] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(3 * (dr_get_sve_vl() / 8)), 1) + * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8)), 1) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(3 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8))) */ #define INSTR_CREATE_ld3h_sve_pred(dc, Zt, Pg, Rn) \ instr_create_3dst_2src(dc, OP_ld3h, Zt, opnd_create_increment_reg(Zt, 1), \ @@ -12699,10 +12698,10 @@ * constructed with the function: * For the [\, \, LSL #2] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(3 * (dr_get_sve_vl() / 8)), 2) + * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8)), 2) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(3 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8))) */ #define INSTR_CREATE_ld3w_sve_pred(dc, Zt, Pg, Rn) \ instr_create_3dst_2src(dc, OP_ld3w, Zt, opnd_create_increment_reg(Zt, 1), \ @@ -12723,10 +12722,10 @@ * constructed with the function: * For the [\, \, LSL #3] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(4 * (dr_get_sve_vl() / 8)), 3) + * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8)), 3) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(4 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8))) */ #define INSTR_CREATE_ld4d_sve_pred(dc, Zt, Pg, Rn) \ instr_create_4dst_2src(dc, OP_ld4d, Zt, opnd_create_increment_reg(Zt, 1), \ @@ -12748,10 +12747,10 @@ * constructed with the function: * For the [\, \, LSL #1] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(4 * (dr_get_sve_vl() / 8)), 1) + * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8)), 1) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(4 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8))) */ #define INSTR_CREATE_ld4h_sve_pred(dc, Zt, Pg, Rn) \ instr_create_4dst_2src(dc, OP_ld4h, Zt, opnd_create_increment_reg(Zt, 1), \ @@ -12773,10 +12772,10 @@ * constructed with the function: * For the [\, \, LSL #2] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(4 * (dr_get_sve_vl() / 8)), 2) + * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8)), 2) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(4 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8))) */ #define INSTR_CREATE_ld4w_sve_pred(dc, Zt, Pg, Rn) \ instr_create_4dst_2src(dc, OP_ld4w, Zt, opnd_create_increment_reg(Zt, 1), \ @@ -12798,11 +12797,11 @@ * constructed with the function: * For the [\, \, LSL #3] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vl() + * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() * / 8), 3) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 8)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) */ #define INSTR_CREATE_ldnt1d_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ldnt1d, Zt, Rn, Pg) @@ -12822,11 +12821,11 @@ * constructed with the function: * For the [\, \, LSL #1] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vl() + * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() * / 8), 1) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 8)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) */ #define INSTR_CREATE_ldnt1h_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ldnt1h, Zt, Rn, Pg) @@ -12846,11 +12845,11 @@ * constructed with the function: * For the [\, \, LSL #2] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vl() + * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() * / 8), 2) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 8)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) */ #define INSTR_CREATE_ldnt1w_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ldnt1w, Zt, Rn, Pg) @@ -12870,10 +12869,10 @@ * constructed with the function: * For the [\, \, LSL #3] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(2 * (dr_get_sve_vl() / 8)), 3) + * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8)), 3) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(2 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8))) */ #define INSTR_CREATE_st2d_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_3src(dc, OP_st2d, Rn, Zt, opnd_create_increment_reg(Zt, 1), Pg) @@ -12893,10 +12892,10 @@ * constructed with the function: * For the [\, \, LSL #1] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(2 * (dr_get_sve_vl() / 8)), 1) + * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8)), 1) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(2 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8))) */ #define INSTR_CREATE_st2h_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_3src(dc, OP_st2h, Rn, Zt, opnd_create_increment_reg(Zt, 1), Pg) @@ -12916,10 +12915,10 @@ * constructed with the function: * For the [\, \, LSL #2] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(2 * (dr_get_sve_vl() / 8)), 2) + * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8)), 2) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(2 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(2 * (dr_get_sve_vector_length() / 8))) */ #define INSTR_CREATE_st2w_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_3src(dc, OP_st2w, Rn, Zt, opnd_create_increment_reg(Zt, 1), Pg) @@ -12939,10 +12938,10 @@ * constructed with the function: * For the [\, \, LSL #3] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(3 * (dr_get_sve_vl() / 8)), 3) + * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8)), 3) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(3 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8))) */ #define INSTR_CREATE_st3d_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_4src(dc, OP_st3d, Rn, Zt, opnd_create_increment_reg(Zt, 1), \ @@ -12963,10 +12962,10 @@ * constructed with the function: * For the [\, \, LSL #1] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(3 * (dr_get_sve_vl() / 8)), 1) + * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8)), 1) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(3 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8))) */ #define INSTR_CREATE_st3h_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_4src(dc, OP_st3h, Rn, Zt, opnd_create_increment_reg(Zt, 1), \ @@ -12987,10 +12986,10 @@ * constructed with the function: * For the [\, \, LSL #2] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(3 * (dr_get_sve_vl() / 8)), 2) + * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8)), 2) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(3 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(3 * (dr_get_sve_vector_length() / 8))) */ #define INSTR_CREATE_st3w_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_4src(dc, OP_st3w, Rn, Zt, opnd_create_increment_reg(Zt, 1), \ @@ -13011,10 +13010,10 @@ * constructed with the function: * For the [\, \, LSL #3] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(4 * (dr_get_sve_vl() / 8)), 3) + * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8)), 3) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(4 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8))) */ #define INSTR_CREATE_st4d_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_5src(dc, OP_st4d, Rn, Zt, opnd_create_increment_reg(Zt, 1), \ @@ -13036,10 +13035,10 @@ * constructed with the function: * For the [\, \, LSL #1] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(4 * (dr_get_sve_vl() / 8)), 1) + * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8)), 1) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(4 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8))) */ #define INSTR_CREATE_st4h_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_5src(dc, OP_st4h, Rn, Zt, opnd_create_increment_reg(Zt, 1), \ @@ -13061,10 +13060,10 @@ * constructed with the function: * For the [\, \, LSL #2] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, DR_EXTEND_UXTX, true, 0, 0, - * opnd_size_from_bytes(4 * (dr_get_sve_vl() / 8)), 2) + * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8)), 2) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm4, - * opnd_size_from_bytes(4 * (dr_get_sve_vl() / 8))) + * opnd_size_from_bytes(4 * (dr_get_sve_vector_length() / 8))) */ #define INSTR_CREATE_st4w_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_5src(dc, OP_st4w, Rn, Zt, opnd_create_increment_reg(Zt, 1), \ @@ -13086,12 +13085,12 @@ * constructed with the function: * For the [\, \, LSL #3] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vl() + * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() * / 8), 3) * For the [\, \, LSL #3] variant: * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 8)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) */ #define INSTR_CREATE_stnt1d_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_stnt1d, Rn, Zt, Pg) @@ -13111,11 +13110,11 @@ * constructed with the function: * For the [\, \, LSL #1] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vl() + * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() * / 8), 1) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 8)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) */ #define INSTR_CREATE_stnt1h_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_stnt1h, Rn, Zt, Pg) @@ -13135,11 +13134,11 @@ * constructed with the function: * For the [\, \, LSL #2] variant: * opnd_create_base_disp_shift_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vl() + * DR_EXTEND_UXTX, true, 0, 0, opnd_size_from_bytes(dr_get_sve_vector_length() * / 8), 2) * For the [\{, #\, MUL VL}] variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 8)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) */ #define INSTR_CREATE_stnt1w_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_stnt1w, Rn, Zt, Pg) @@ -13161,16 +13160,16 @@ * constructed with the function: * For the B element size variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 8)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) * For the H element size variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 16)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 16)) * For the S element size variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 32)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 32)) * For the D element size variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 64)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 64)) */ #define INSTR_CREATE_ldnf1b_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ldnf1b, Zt, Rn, Pg) @@ -13188,7 +13187,7 @@ * \param Rn The first source base register with an immediate offset, * constructed with the function: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 8)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) */ #define INSTR_CREATE_ldnf1d_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ldnf1d, Zt, Rn, Pg) @@ -13209,13 +13208,13 @@ * constructed with the function: * For the H element size variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 8)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) * For the S element size variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 16)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 16)) * For the D element size variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 32)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 32)) */ #define INSTR_CREATE_ldnf1h_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ldnf1h, Zt, Rn, Pg) @@ -13236,13 +13235,13 @@ * constructed with the function: * For the H element size variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 16)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 16)) * For the S element size variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 32)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 32)) * For the D element size variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 64)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 64)) */ #define INSTR_CREATE_ldnf1sb_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ldnf1sb, Zt, Rn, Pg) @@ -13262,10 +13261,10 @@ * constructed with the function: * For the S element size variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 16)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 16)) * For the D element size variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 32)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 32)) */ #define INSTR_CREATE_ldnf1sh_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ldnf1sh, Zt, Rn, Pg) @@ -13283,7 +13282,7 @@ * \param Rn The first source base register with an immediate offset, * constructed with the function: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 16)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 16)) */ #define INSTR_CREATE_ldnf1sw_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ldnf1sw, Zt, Rn, Pg) @@ -13303,10 +13302,10 @@ * constructed with the function: * For the S element size variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 8)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 8)) * For the D element size variant: * opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm, - * opnd_size_from_bytes(dr_get_sve_vl() / 16)) + * opnd_size_from_bytes(dr_get_sve_vector_length() / 16)) */ #define INSTR_CREATE_ldnf1w_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ldnf1w, Zt, Rn, Pg) diff --git a/core/ir/decode_shared.c b/core/ir/decode_shared.c index ab47cd928d0..5c5475179dd 100644 --- a/core/ir/decode_shared.c +++ b/core/ir/decode_shared.c @@ -180,7 +180,7 @@ int sve_veclens[] = { 128, 256, 384, 512, 640, 768, 896, 1024, 1152, 1280, 1408, 1536, 1664, 1792, 1920, 2048 }; void -dr_set_sve_vl(int vl) +dr_set_sve_vector_length(int vl) { /* TODO i#3044: Vector length will be read from h/w when running on SVE. */ for (int i = 0; i < sizeof(sve_veclens); i++) { @@ -193,7 +193,7 @@ dr_set_sve_vl(int vl) } int -dr_get_sve_vl(void) +dr_get_sve_vector_length(void) { return sve_veclen; } diff --git a/core/ir/disassemble_shared.c b/core/ir/disassemble_shared.c index 936bad963b6..19241476ca2 100644 --- a/core/ir/disassemble_shared.c +++ b/core/ir/disassemble_shared.c @@ -1188,7 +1188,7 @@ internal_instr_disassemble(char *buf, size_t bufsz, size_t *sofar INOUT, return; } else if (instr_opcode_valid(instr)) { #ifdef AARCH64 - /* We do not use instr_info_t encoding info on AArch64. */ + /* We do not use instr_info_t encoding info on AArch64. FIXME i#1569 */ name = get_opcode_name(instr_get_opcode(instr)); #else const instr_info_t *info = instr_get_instr_info(instr); diff --git a/core/ir/encode_api.h b/core/ir/encode_api.h index 912d9c22a63..48e669be79a 100644 --- a/core/ir/encode_api.h +++ b/core/ir/encode_api.h @@ -80,13 +80,13 @@ dr_get_isa_mode(void *drcontext); * running on SVE. */ void -dr_set_sve_vl(int vl); +dr_set_sve_vector_length(int vl); /** * Read AArch64 Scalable Vector Extension's vector length, in bits. */ int -dr_get_sve_vl(void); +dr_get_sve_vector_length(void); enum { #ifdef X86 diff --git a/core/ir/instr_shared.c b/core/ir/instr_shared.c index 4e0ca8ae09e..66c00c74957 100644 --- a/core/ir/instr_shared.c +++ b/core/ir/instr_shared.c @@ -377,11 +377,17 @@ private_instr_encode(dcontext_t *dcontext, instr_t *instr, bool always_cache) if (nxt == NULL) { nxt = instr_encode_ignore_reachability(dcontext, instr, buf); if (nxt == NULL) { +#ifdef AARCH64 + /* We do not use instr_info_t encoding info on AArch64. FIXME i#1569 */ + SYSLOG_INTERNAL_WARNING("cannot encode %s", + get_opcode_name(instr_get_opcode(instr))); +#else SYSLOG_INTERNAL_WARNING("cannot encode %s", opcode_to_encoding_info(instr->opcode, instr_get_isa_mode(instr) _IF_ARM(false)) ->name); +#endif if (!TEST(INSTR_IS_NOALLOC_STRUCT, instr->flags)) heap_reachable_free(dcontext, buf, MAX_INSTR_LENGTH HEAPACCT(ACCT_IR)); return 0; @@ -910,8 +916,7 @@ instr_get_eflags(instr_t *instr, dr_opnd_query_flags_t flags) encoded = true; len = private_instr_encode(dcontext, instr, true /*cache*/); if (len == 0) { - if (!instr_is_label(instr)) - CLIENT_ASSERT(false, "instr_get_eflags: invalid instr"); + CLIENT_ASSERT(instr_is_label(instr), "instr_get_eflags: invalid instr"); return 0; } } @@ -1805,6 +1810,35 @@ instr_uses_reg(instr_t *instr, reg_id_t reg) bool instr_reg_in_dst(instr_t *instr, reg_id_t reg) { +#ifdef AARCH64 + /* FFR does not appear in any operand, it is implicit upon the instruction type or + * accessed via SVE predicate registers. + */ + if (reg == DR_REG_FFR) { + switch (instr_get_opcode(instr)) { + case OP_setffr: + case OP_rdffr: + + case OP_ldff1b: + case OP_ldff1d: + case OP_ldff1h: + case OP_ldff1sb: + case OP_ldff1sh: + case OP_ldff1sw: + case OP_ldff1w: + + case OP_ldnf1b: + case OP_ldnf1d: + case OP_ldnf1h: + case OP_ldnf1sb: + case OP_ldnf1sh: + case OP_ldnf1sw: + case OP_ldnf1w: return true; + default: break; + } + } +#endif + int i; for (i = 0; i < instr_num_dsts(instr); i++) { if (opnd_uses_reg(instr_get_dst(instr, i), reg)) @@ -1822,6 +1856,19 @@ instr_reg_in_src(instr_t *instr, reg_id_t reg) if (instr_get_opcode(instr) == OP_nop_modrm) return false; #endif + +#ifdef AARCH64 + /* FFR does not appear in any operand, it is implicit upon the instruction type or + * accessed via SVE predicate registers. + */ + if (reg == DR_REG_FFR) { + switch (instr_get_opcode(instr)) { + case OP_wrffr: + case OP_rdffrs: return true; + default: break; + } + } +#endif for (i = 0; i < instr_num_srcs(instr); i++) { if (opnd_uses_reg(instr_get_src(instr, i), reg)) return true; diff --git a/core/ir/opnd_shared.c b/core/ir/opnd_shared.c index 8b9a0997ce6..982362e9f66 100644 --- a/core/ir/opnd_shared.c +++ b/core/ir/opnd_shared.c @@ -2759,8 +2759,13 @@ reg_get_size(reg_id_t reg) return OPSZ_4; if (reg >= DR_REG_MDCCSR_EL0 && reg <= DR_REG_SPSR_FIQ) return OPSZ_8; - if (reg >= DR_REG_Z0 && reg <= DR_REG_Z31) + if (reg >= DR_REG_Z0 && reg <= DR_REG_Z31) { +# if !defined(DR_HOST_NOT_TARGET) && !defined(STANDALONE_DECODER) + return opnd_size_from_bytes(proc_get_vector_length_bytes()); +# else return OPSZ_SCALABLE; +# endif + } if ((reg >= DR_REG_P0 && reg <= DR_REG_P15) || reg == DR_REG_FFR) return OPSZ_SCALABLE_PRED; if (reg == DR_REG_CNTVCT_EL0) diff --git a/core/lib/globals_api.h b/core/lib/globals_api.h index ae6e08390fc..5891206228a 100644 --- a/core/lib/globals_api.h +++ b/core/lib/globals_api.h @@ -664,19 +664,25 @@ typedef uint64 dr_opmask_t; #if defined(AARCHXX) /** - * 128-bit ARM SIMD Vn register. - * In AArch64, align to 16 bytes for better performance. - * In AArch32, we're not using any uint64 fields here to avoid alignment - * padding in sensitive structs. We could alternatively use pragma pack. + * 512-bit ARM Scalable Vector Extension (SVE) vector registers Zn and + * predicate registers Pn. Low 128 bits of Zn overlap with existing ARM + * Advanced SIMD (NEON) Vn registers. The SVE specification defines the + * following valid vector lengths: + * 128 256 384 512 640 768 896 1024 1152 1280 1408 1536 1664 1792 1920 2048 + * We currently support 512-bit maximum due to DR's stack size limitation, + * (machine context stored in the stack). In AArch64, align to 16 bytes for + * better performance. In AArch32, we're not using any uint64 fields here to + * avoid alignment padding in sensitive structs. We could alternatively use + * pragma pack. */ # ifdef X64 typedef union ALIGN_VAR(16) _dr_simd_t { - byte b; /**< Bottom 8 bits of Vn == Bn. */ - ushort h; /**< Bottom 16 bits of Vn == Hn. */ - uint s; /**< Bottom 32 bits of Vn == Sn. */ - uint d[2]; /**< Bottom 64 bits of Vn == Dn as d[1]:d[0]. */ - uint q[4]; /**< 128-bit Qn as q[3]:q[2]:q[1]:q[0]. */ - uint u32[4]; /**< The full 128-bit register. */ + byte b; /**< Byte (8 bit, Bn) scalar element of Vn, Zn, or Pn. */ + ushort h; /**< Halfword (16 bit, Hn) scalar element of Vn, Zn and Pn. */ + uint s; /**< Singleword (32 bit, Sn) scalar element of Vn, Zn and Pn. */ + uint64 d; /**< Doubleword (64 bit, Dn) scalar element of Vn, Zn and Pn. */ + uint q[4]; /**< The full 128 bit Vn register, Qn as q[3]:q[2]:q[1]:q[0]. */ + uint u32[16]; /**< The full 512 bit Zn, Pn and FFR registers. */ } dr_simd_t; # else typedef union _dr_simd_t { @@ -686,16 +692,26 @@ typedef union _dr_simd_t { } dr_simd_t; # endif # ifdef X64 -# define MCXT_NUM_SIMD_SLOTS \ - 32 /**< Number of 128-bit SIMD Vn slots in dr_mcontext_t \ +# define MCXT_NUM_SIMD_SVE_SLOTS \ + 32 /**< Number of 128-bit SIMD Vn/Zn slots in dr_mcontext_t. \ */ +# define MCXT_NUM_SVEP_SLOTS 16 /**< Number of SIMD Pn slots in dr_mcontext_t. */ +# define MCXT_NUM_FFR_SLOTS \ + 1 /**< Number of first-fault register slots in dr_mcontext_t. */ + /** Total number of SIMD register slots in dr_mcontext_t. */ +# define MCXT_NUM_SIMD_SLOTS \ + (MCXT_NUM_SIMD_SVE_SLOTS + MCXT_NUM_SVEP_SLOTS + MCXT_NUM_FFR_SLOTS) # else -# define MCXT_NUM_SIMD_SLOTS \ - 16 /**< Number of 128-bit SIMD Vn slots in dr_mcontext_t \ +# define MCXT_NUM_SIMD_SLOTS \ + 16 /**< Number of 128-bit SIMD Vn slots in dr_mcontext_t. \ */ +/* 32bit ARM does not have these slots, but they are defined for compatibility. + */ +# define MCXT_NUM_SVEP_SLOTS 0 +# define MCXT_NUM_FFR_SLOTS 0 # endif -# define PRE_SIMD_PADDING \ - 0 /**< Bytes of padding before xmm/ymm dr_mcontext_t slots \ +# define PRE_SIMD_PADDING \ + 0 /**< Bytes of padding before xmm/ymm dr_mcontext_t slots. \ */ # define MCXT_NUM_OPMASK_SLOTS \ 0 /**< Number of 16-64-bit OpMask Kn slots in dr_mcontext_t, \ diff --git a/core/lib/mcxtx_api.h b/core/lib/mcxtx_api.h index d7f36cd2a6f..e02543783a0 100644 --- a/core/lib/mcxtx_api.h +++ b/core/lib/mcxtx_api.h @@ -129,13 +129,36 @@ uint cpsr; /**< The current program status registers in AArch32. */ }; /**< The anonymous union of alternative names for apsr/cpsr register. */ # endif /* 64/32-bit */ + +# ifdef X64 /* 64-bit */ + /** + * The Arm AArch64 SIMD (DR_REG_Q0->DR_REG_Q31) and Scalable Vector + * Extension (SVE) vector registers (DR_REG_Z0->DR_REG_Z31). + */ + dr_simd_t simd[MCXT_NUM_SIMD_SVE_SLOTS]; + /** + * The Arm AArch64 Scalable Vector Extension (SVE) predicate registers + * DR_REG_P0 to DR_REG_P15. + */ + dr_simd_t svep[MCXT_NUM_SVEP_SLOTS]; + /** + * The Arm AArch64 Scalable Vector Extension (SVE) first fault register + * DR_REG_FFR, for vector load instrcutions. + */ + dr_simd_t ffr; +# else + /* + * For the Arm AArch32 SIMD registers, we would probably be ok if we did + * not preserve the callee-saved registers (q4-q7 == d8-d15) but to be safe + * we preserve them all. We do not need anything more than word alignment + * for OP_vldm/OP_vstm, and dr_simd_t has no fields larger than 32 bits, so + * we have no padding. + */ /** - * The SIMD registers. We would probably be ok if we did not preserve the - * callee-saved registers (q4-q7 == d8-d15) but to be safe we preserve them - * all. We do not need anything more than word alignment for OP_vldm/OP_vstm, - * and dr_simd_t has no fields larger than 32 bits, so we have no padding. + * The Arm AArch32 SIMD registers. */ dr_simd_t simd[MCXT_NUM_SIMD_SLOTS]; +# endif #elif defined(X86) /* Our inlined ibl uses eax-edx, so we place them together to fit * on the same 32-byte cache line; yet we also want to simplify diff --git a/core/unix/include/sigcontext.h b/core/unix/include/sigcontext.h index ba0837af6a8..b4acbbdcfb0 100644 --- a/core/unix/include/sigcontext.h +++ b/core/unix/include/sigcontext.h @@ -357,6 +357,19 @@ struct fpsimd_context { __u32 fpcr; __uint128_t vregs[32]; }; + +/* TODO i#5365: Storage of sve_context in kernel_sigcontext_t.__reserved, see + * above. See also sigcontext_to_mcontext_simd() and + * mcontext_to_sigcontext_simd(). + */ + +# define SVE_MAGIC 0x53564501 + +struct sve_context { + struct _aarch64_ctx head; + __u16 vl; + __u16 __reserved[3]; +}; # endif #endif /* AARCH64 */ diff --git a/core/unix/os.c b/core/unix/os.c index ed7981d64b5..d5133bf1646 100644 --- a/core/unix/os.c +++ b/core/unix/os.c @@ -4039,6 +4039,12 @@ client_thread_run(void) dcontext_t *dcontext; byte *xsp; GET_STACK_PTR(xsp); +# ifdef AARCH64 + /* AArch64's Scalable Vector Extension (SVE) requires more space on the + * stack. Align to page boundary, similar to that in get_clone_record(). + */ + xsp = (app_pc)ALIGN_BACKWARD(xsp, PAGE_SIZE); +# endif void *crec = get_clone_record((reg_t)xsp); /* i#2335: we support setup separate from start, and we want to allow a client * to create a client thread during init, but we do not support that thread diff --git a/core/unix/signal.c b/core/unix/signal.c index 6ff6a40269d..0cbc9433721 100644 --- a/core/unix/signal.c +++ b/core/unix/signal.c @@ -909,8 +909,8 @@ set_clone_record_fields(void *record, reg_t app_thread_xsp, app_pc continuation_ * * CAUTION: don't use a lot of stack in this routine as it gets invoked on the * dstack from new_thread_setup - this is because this routine assumes - * no more than a page of dstack has been used so far since the clone - * system call was done. + * no more than a page of dstack for X86 and 2 pages of dstack for + * AArch64 have been used so far since the clone system call was done. */ void * get_clone_record(reg_t xsp) @@ -924,14 +924,20 @@ get_clone_record(reg_t xsp) /* The (size of the clone record + * stack used by new_thread_start (only for setting up priv_mcontext_t) + * stack used by new_thread_setup before calling get_clone_record()) - * is less than a page. This is verified by the assert below. If it does - * exceed a page, it won't happen at random during runtime, but in a - * predictable way during development, which will be caught by the assert. - * The current usage is about 800 bytes for clone_record + - * sizeof(priv_mcontext_t) + few words in new_thread_setup before - * get_clone_record() is called. + * is less than a page for X86 and 2 pages for AArch64. This is verified by + * the assert below. If it does exceed 1 page for X86 and 2 for AArch64, it + * won't happen at random during runtime, but in a predictable way during + * development, which will be caught by the assert. + * + * The current usage is about 800 bytes (X86) or 1920 bytes (AArch64) for + * clone_record + sizeof(priv_mcontext_t) + few words in new_thread_setup + * before get_clone_record() is called. */ +#ifdef AARCH64 + dstack_base = (byte *)ALIGN_FORWARD(xsp, PAGE_SIZE) + PAGE_SIZE; +#else dstack_base = (byte *)ALIGN_FORWARD(xsp, PAGE_SIZE); +#endif record = (clone_record_t *)(dstack_base - sizeof(clone_record_t)); /* dstack_base and the dstack in the clone record should be the same. */ diff --git a/core/unix/signal_linux_aarch64.c b/core/unix/signal_linux_aarch64.c index e44558247c0..585365930a2 100644 --- a/core/unix/signal_linux_aarch64.c +++ b/core/unix/signal_linux_aarch64.c @@ -76,8 +76,11 @@ sigcontext_to_mcontext_simd(priv_mcontext_t *mc, sig_full_cxt_t *sc_full) ASSERT(fpc->head.size == sizeof(struct fpsimd_context)); mc->fpsr = fpc->fpsr; mc->fpcr = fpc->fpcr; - ASSERT(sizeof(mc->simd) == sizeof(fpc->vregs)); + ASSERT((sizeof(mc->simd->q) * MCXT_NUM_SIMD_SVE_SLOTS) == sizeof(fpc->vregs)); memcpy(&mc->simd, &fpc->vregs, sizeof(mc->simd)); + /* TODO i#5365: memcpy(&mc->simd->u32,...) + * See also sve_context in core/unix/include/sigcontext.h. + */ } void @@ -91,8 +94,11 @@ mcontext_to_sigcontext_simd(sig_full_cxt_t *sc_full, priv_mcontext_t *mc) fpc->head.size = sizeof(struct fpsimd_context); fpc->fpsr = mc->fpsr; fpc->fpcr = mc->fpcr; - ASSERT(sizeof(fpc->vregs) == sizeof(mc->simd)); + ASSERT(sizeof(fpc->vregs) == (sizeof(mc->simd->q) * MCXT_NUM_SIMD_SVE_SLOTS)); memcpy(&fpc->vregs, &mc->simd, sizeof(fpc->vregs)); + /* TODO i#5365: memcpy(..., &mc->simd->u32) + * See also sve_context in core/unix/include/sigcontext.h. + */ next->magic = 0; next->size = 0; } diff --git a/ext/drstatecmp/drstatecmp.c b/ext/drstatecmp/drstatecmp.c index 8f3a01fca1a..a1fea81071f 100644 --- a/ext/drstatecmp/drstatecmp.c +++ b/ext/drstatecmp/drstatecmp.c @@ -483,7 +483,8 @@ drstatecmp_check_simd_value #elif defined(AARCHXX) (void *tag, dr_simd_t *value, dr_simd_t *expected) { - if (memcmp(value, expected, sizeof(dr_simd_t))) + size_t vl = proc_get_vector_length_bytes(); + if (memcmp(value, expected, vl)) drstatecmp_report_error("SIMD mismatch", tag); } #elif defined(RISCV64) @@ -616,7 +617,11 @@ drstatecmp_check_machine_state(dr_mcontext_t *mc_instrumented, dr_mcontext_t *mc #endif drstatecmp_check_gpr_value("xsp", tag, mc_instrumented->xsp, mc_expected->xsp); +#ifdef AARCH64 + for (int i = 0; i < MCXT_NUM_SIMD_SVE_SLOTS; i++) { +#else for (int i = 0; i < MCXT_NUM_SIMD_SLOTS; i++) { +#endif drstatecmp_check_simd_value(tag, &mc_instrumented->simd[i], &mc_expected->simd[i]); } diff --git a/suite/runsuite_wrapper.pl b/suite/runsuite_wrapper.pl index 7d8d8df958e..eb9485d530d 100755 --- a/suite/runsuite_wrapper.pl +++ b/suite/runsuite_wrapper.pl @@ -49,6 +49,7 @@ my $mydir = dirname(abs_path($0)); my $is_CI = 0; my $is_aarchxx = $Config{archname} =~ /(aarch64)|(arm)/; +my $is_x86_64 = $Config{archname} =~ /x86_64/; my $is_long = $ENV{'CI_TRIGGER'} eq 'push' && $ENV{'CI_BRANCH'} eq 'refs/heads/master'; # Forward args to runsuite.cmake: @@ -348,6 +349,13 @@ } else { $issue_no = "#2417"; } + } elsif ($is_x86_64 && ($ENV{'DYNAMORIO_CROSS_AARCHXX_LINUX_ONLY'} eq 'yes') && $args =~ /64_only/) { + # These AArch64 cross-compiled tests fail on x86-64 QEMU but pass + # on native AArch64 hardware. + $ignore_failures_64{'code_api|client.drx_buf-test'} = 1; + $ignore_failures_64{'code_api|sample.memval_simple'} = 1; + $ignore_failures_64{'code_api|client.drreg-test'} = 1; + $issue_no = "#6260"; } elsif ($^O eq 'darwin') { %ignore_failures_32 = ('code_api|common.decode-bad' => 1, # i#3127 'code_api|linux.signal0000' => 1, # i#3127 diff --git a/suite/tests/api/dis-a64.c b/suite/tests/api/dis-a64.c index f673a259524..762feaf70ff 100644 --- a/suite/tests/api/dis-a64.c +++ b/suite/tests/api/dis-a64.c @@ -35,6 +35,7 @@ #include "configure.h" #include "dr_api.h" + #include #include #include @@ -260,6 +261,8 @@ main(int argc, char *argv[]) return 0; } + enable_all_test_cpu_features(); + if (strcmp(argv[1], "-d") == 0) { run_decode(dc, argv[2]); dr_standalone_exit(); diff --git a/suite/tests/api/ir_aarch64_sve.c b/suite/tests/api/ir_aarch64_sve.c index 688965f985f..81d42f0e009 100644 --- a/suite/tests/api/ir_aarch64_sve.c +++ b/suite/tests/api/ir_aarch64_sve.c @@ -20540,6 +20540,8 @@ main(int argc, char *argv[]) bool test_result; instr_t *instr; + enable_all_test_cpu_features(); + RUN_INSTR_TEST(add_sve_pred); RUN_INSTR_TEST(add_sve_shift); RUN_INSTR_TEST(add_sve); diff --git a/suite/tests/api/ir_aarch64_sve2.c b/suite/tests/api/ir_aarch64_sve2.c index 8fb7d185369..56c9810de66 100644 --- a/suite/tests/api/ir_aarch64_sve2.c +++ b/suite/tests/api/ir_aarch64_sve2.c @@ -8371,6 +8371,8 @@ main(int argc, char *argv[]) bool test_result; instr_t *instr; + enable_all_test_cpu_features(); + RUN_INSTR_TEST(aesd_sve); RUN_INSTR_TEST(aese_sve); RUN_INSTR_TEST(bcax_sve); diff --git a/suite/tests/api/ir_aarch64_v81.c b/suite/tests/api/ir_aarch64_v81.c index bd58f89ad1f..4d9485c2da6 100644 --- a/suite/tests/api/ir_aarch64_v81.c +++ b/suite/tests/api/ir_aarch64_v81.c @@ -362,6 +362,8 @@ main(int argc, char *argv[]) bool test_result; instr_t *instr; + enable_all_test_cpu_features(); + RUN_INSTR_TEST(sqrdmlsh_scalar); RUN_INSTR_TEST(sqrdmlsh_scalar_idx); RUN_INSTR_TEST(sqrdmlsh_vector); diff --git a/suite/tests/api/ir_aarch64_v82.c b/suite/tests/api/ir_aarch64_v82.c index 50d33d88116..77876dc4864 100644 --- a/suite/tests/api/ir_aarch64_v82.c +++ b/suite/tests/api/ir_aarch64_v82.c @@ -5583,6 +5583,8 @@ main(int argc, char *argv[]) bool test_result; instr_t *instr; + enable_all_test_cpu_features(); + RUN_INSTR_TEST(fcvtas_vector); RUN_INSTR_TEST(fcvtas_scalar); RUN_INSTR_TEST(fcvtau_vector); diff --git a/suite/tests/api/ir_aarch64_v83.c b/suite/tests/api/ir_aarch64_v83.c index e53b8fd1d74..64b029ce71d 100644 --- a/suite/tests/api/ir_aarch64_v83.c +++ b/suite/tests/api/ir_aarch64_v83.c @@ -616,6 +616,8 @@ main(int argc, char *argv[]) bool test_result; instr_t *instr; + enable_all_test_cpu_features(); + RUN_INSTR_TEST(fcadd_vector); RUN_INSTR_TEST(fcmla_vector); RUN_INSTR_TEST(fcmla_vector_idx); diff --git a/suite/tests/api/ir_aarch64_v84.c b/suite/tests/api/ir_aarch64_v84.c index 516237371bf..b4b310106b5 100644 --- a/suite/tests/api/ir_aarch64_v84.c +++ b/suite/tests/api/ir_aarch64_v84.c @@ -283,6 +283,8 @@ main(int argc, char *argv[]) bool test_result; instr_t *instr; + enable_all_test_cpu_features(); + /* ARMv8.4-RCPC */ RUN_INSTR_TEST(ldapur); RUN_INSTR_TEST(ldapurb); diff --git a/suite/tests/api/ir_aarch64_v86.c b/suite/tests/api/ir_aarch64_v86.c index 46e6905807e..d8616343ff7 100644 --- a/suite/tests/api/ir_aarch64_v86.c +++ b/suite/tests/api/ir_aarch64_v86.c @@ -698,6 +698,8 @@ main(int argc, char *argv[]) bool test_result; instr_t *instr; + enable_all_test_cpu_features(); + RUN_INSTR_TEST(bfcvt); RUN_INSTR_TEST(bfcvtn2_vector); RUN_INSTR_TEST(bfcvtn_vector); diff --git a/suite/tests/api/opnd-a64.c b/suite/tests/api/opnd-a64.c index f07a2b73bac..5278196e87f 100644 --- a/suite/tests/api/opnd-a64.c +++ b/suite/tests/api/opnd-a64.c @@ -59,18 +59,37 @@ test_get_size() // Check sizes of FP/SIMD regs. for (int i = 0; i < proc_num_simd_registers(); i++) { - ASSERT(reg_get_size((reg_id_t)DR_REG_H0 + i) == OPSZ_2); - ASSERT(reg_get_size((reg_id_t)DR_REG_S0 + i) == OPSZ_4); - ASSERT(reg_get_size((reg_id_t)DR_REG_D0 + i) == OPSZ_8); - ASSERT(reg_get_size((reg_id_t)DR_REG_Q0 + i) == OPSZ_16); + if (i < MCXT_NUM_SIMD_SVE_SLOTS) { + ASSERT(reg_get_size((reg_id_t)DR_REG_H0 + i) == OPSZ_2); + ASSERT(reg_get_size((reg_id_t)DR_REG_S0 + i) == OPSZ_4); + ASSERT(reg_get_size((reg_id_t)DR_REG_D0 + i) == OPSZ_8); + ASSERT(reg_get_size((reg_id_t)DR_REG_Q0 + i) == OPSZ_16); + } } - // Check sizes of SVE vector regs. + opnd_size_t opsz_vl = OPSZ_NA; + if (proc_has_feature(FEATURE_SVE)) { + /* Check sizes of SVE vector and predicate registers. Read vector length + * directly from hardware and compare with OPSZ_ value reg_get_size() + * returns. + */ + uint64 vl; + /* Read vector length from SVE hardware. */ + asm(".inst 0x04bf5020\n" /* rdvl x0, #1 */ + "mov %0, x0" + : "=r"(vl) + : + : "x0"); + opsz_vl = opnd_size_from_bytes(vl); + } else { + /* Set vector length to 256 bits for unit tests on non-SVE hardware. */ + opsz_vl = OPSZ_32; + } for (uint i = 0; i < 32; i++) { - ASSERT(reg_get_size((reg_id_t)DR_REG_Z0 + i) == OPSZ_SCALABLE); + ASSERT(reg_get_size((reg_id_t)DR_REG_Z0 + i) == opsz_vl); } - // Check sizes of SVE predicate regs. + /* TODO i#5365: Check sizes of SVE predicate regs. */ for (uint i = 0; i < 16; i++) { ASSERT(reg_get_size((reg_id_t)DR_REG_P0 + i) == OPSZ_SCALABLE_PRED); } @@ -287,6 +306,12 @@ test_opnd_invert_immed_int() int main(int argc, char *argv[]) { + /* Required for proc_init() -> proc_init_arch() establishing vector length + * on SVE h/w. This is validated with the direct read of vector length + * using the SVE RDVL instruction in test_get_size() above. + */ + dr_standalone_init(); + test_get_size(); test_opnd_compute_address(); diff --git a/suite/tests/client-interface/cleancall-opt-shared.h b/suite/tests/client-interface/cleancall-opt-shared.h index de2595ceb6e..6d2fc746a7b 100644 --- a/suite/tests/client-interface/cleancall-opt-shared.h +++ b/suite/tests/client-interface/cleancall-opt-shared.h @@ -286,8 +286,17 @@ mcontexts_equal(dr_mcontext_t *mc_a, dr_mcontext_t *mc_b, int func_index) return false; } #elif defined(AARCH64) - for (i = 0; i < proc_num_simd_registers(); i++) { - if (memcmp(&mc_a->simd[i], &mc_b->simd[i], sizeof(dr_simd_t)) != 0) + size_t vl = proc_get_vector_length_bytes(); + for (i = 0; i < MCXT_NUM_SIMD_SVE_SLOTS; i++) { + if (memcmp(&mc_a->simd[i], &mc_b->simd[i], vl) != 0) + return false; + } + if (proc_has_feature(FEATURE_SVE)) { + for (i = 0; i < MCXT_NUM_SVEP_SLOTS; i++) { + if (memcmp(&mc_a->svep[i], &mc_b->svep[i], vl / 8) != 0) + return false; + } + if (memcmp(&mc_a->ffr, &mc_b->ffr, vl / 8) != 0) return false; } #endif @@ -312,7 +321,11 @@ dump_diff_mcontexts(void) after_reg, diff_str); } +#ifdef X86 dr_fprintf(STDERR, "Printing XMM regs:\n"); +#elif defined(AARCH64) + dr_fprintf(STDERR, "Printing SIMD/SVE regs:\n"); +#endif /* XXX i#1312: check if test can get extended to AVX-512. */ for (i = 0; i < proc_num_simd_registers(); i++) { #ifdef X86 @@ -340,12 +353,27 @@ dump_diff_mcontexts(void) after_reg.u32[6], after_reg.u32[7]); } #elif defined(AARCH64) - dr_simd_t before_reg = before_mcontext.simd[i]; - dr_simd_t after_reg = after_mcontext.simd[i]; - size_t mmsz = sizeof(dr_simd_t); + const size_t mmsz = proc_get_vector_length_bytes(); + dr_simd_t before_reg, after_reg; + char reg_name[4]; + if (i >= (MCXT_NUM_SIMD_SVE_SLOTS + MCXT_NUM_SVEP_SLOTS)) { + strcpy(reg_name, "FFR"); + before_reg = before_mcontext.ffr; + after_reg = after_mcontext.ffr; + } else if (i >= MCXT_NUM_SIMD_SVE_SLOTS) { + dr_snprintf(reg_name, 4, "P%2d", i - MCXT_NUM_SIMD_SVE_SLOTS); + before_reg = before_mcontext.svep[i - MCXT_NUM_SIMD_SVE_SLOTS]; + after_reg = after_mcontext.svep[i - MCXT_NUM_SIMD_SVE_SLOTS]; + } else { + dr_snprintf(reg_name, 4, "Z%2d", i); + before_reg = before_mcontext.simd[i]; + after_reg = after_mcontext.simd[i]; + } + const char *diff_str = (memcmp(&before_reg, &after_reg, mmsz) == 0 ? "" : " <- DIFFERS"); - dr_fprintf(STDERR, "xmm%2d before: %08x%08x%08x%08x", i, before_reg.u32[0], + + dr_fprintf(STDERR, "%s before: %08x%08x%08x%08x", reg_name, before_reg.u32[0], before_reg.u32[1], before_reg.u32[2], before_reg.u32[3]); dr_fprintf(STDERR, " after: %08x%08x%08x%08x", after_reg.u32[0], after_reg.u32[1], after_reg.u32[2], after_reg.u32[3]);