From a54610b8de4d4533793007bb78e53184d90bb9b5 Mon Sep 17 00:00:00 2001 From: Jack Gallagher Date: Fri, 26 Jan 2024 16:06:29 +0000 Subject: [PATCH 01/15] i#6585: Add drcachesim vector length trace marker Adds a new trace marker TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH to drcachesim that indicates the current vector length for architectures which have a dynamic vector length that can't be statically determined from the instruction. The marker is emitted as part of the thread header when running on AArch64 with SVE support, but in the future could also be used to track changes in the vector length after prctl(PR_SVE_SET_VL, ..) system calls. Some SVE load and store instructions such as ``` LDR , [{, #, MUL VL}]``` or ``` ST1D { .D }, , [{, #, MUL VL}]``` scale the immediate offset based on the hardware vector length so knowing the correct vector length for the traced application is important to properly decode and analyse these instructions. Fixes: #6585 --- clients/drcachesim/common/trace_entry.h | 7 ++ ...sm-scattergather-vl-view-aarch64.templatex | 9 ++ .../tests/allasm_scattergather_aarch64.asm | 115 +++++++++--------- ...sm-scattergather-vl-view-aarch64.templatex | 15 +++ .../drcachesim/tests/offline-view.templatex | 9 ++ clients/drcachesim/tests/view_test.cpp | 59 +++++---- clients/drcachesim/tools/view.cpp | 4 + clients/drcachesim/tracer/instru_offline.cpp | 6 + clients/drcachesim/tracer/instru_online.cpp | 6 + clients/drcachesim/tracer/raw2trace.cpp | 35 ++++-- clients/drcachesim/tracer/raw2trace.h | 1 + suite/tests/CMakeLists.txt | 20 +++ 12 files changed, 194 insertions(+), 92 deletions(-) create mode 100644 clients/drcachesim/tests/allasm-scattergather-vl-view-aarch64.templatex create mode 100644 clients/drcachesim/tests/offline-allasm-scattergather-vl-view-aarch64.templatex diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h index a34a2963225..e45018f4a2c 100644 --- a/clients/drcachesim/common/trace_entry.h +++ b/clients/drcachesim/common/trace_entry.h @@ -612,6 +612,13 @@ typedef enum { */ TRACE_MARKER_TYPE_CONTEXT_SWITCH_END, + /** + * The marker is used to indicate the current vector length in bytes for + * architectures with a dynamic vector length, for example the AArch64 SVE vector + * length. This does not apply to fixed length vector architectures. + */ + TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH, + // ... // These values are reserved for future built-in marker types. // ... diff --git a/clients/drcachesim/tests/allasm-scattergather-vl-view-aarch64.templatex b/clients/drcachesim/tests/allasm-scattergather-vl-view-aarch64.templatex new file mode 100644 index 00000000000..e609dea3c8f --- /dev/null +++ b/clients/drcachesim/tests/allasm-scattergather-vl-view-aarch64.templatex @@ -0,0 +1,9 @@ +.* +#if __ARM_FEATURE_SVE_BITS == 128 +.* +#elif __ARM_FEATURE_SVE_BITS == 256 +.* +#elif __ARM_FEATURE_SVE_BITS == 512 +.* +#endif +.* diff --git a/clients/drcachesim/tests/allasm_scattergather_aarch64.asm b/clients/drcachesim/tests/allasm_scattergather_aarch64.asm index 658e12a40a0..8e08ceee19b 100644 --- a/clients/drcachesim/tests/allasm_scattergather_aarch64.asm +++ b/clients/drcachesim/tests/allasm_scattergather_aarch64.asm @@ -292,75 +292,75 @@ test_scalar_plus_scalar: test_scalar_plus_immediate: - ld1b DEST_REG1.b, B_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 16 - ld1b DEST_REG1.h, H_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 8 - ld1b DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 4 - ld1b DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 2 - ldnt1b DEST_REG1.b, B_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 16 - ld1sb DEST_REG1.h, H_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 8 - ld1sb DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 4 - ld1sb DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 2 - ld1h DEST_REG1.h, H_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 8 - ld1h DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 4 - ld1h DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 2 - ldnt1h DEST_REG1.h, H_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 8 - ld1sh DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 4 - ld1sh DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 2 - ld1w DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 4 - ld1w DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 2 - ldnt1w DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 4 - ld1sw DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 2 - ld1d DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 2 - ldnt1d DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 2 + ld1b DEST_REG1.b, B_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 16 + ld1b DEST_REG1.h, H_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 8 + ld1b DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 4 + ld1b DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 2 + ldnt1b DEST_REG1.b, B_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 16 + ld1sb DEST_REG1.h, H_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 8 + ld1sb DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 4 + ld1sb DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 2 + ld1h DEST_REG1.h, H_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 8 + ld1h DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 4 + ld1h DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 2 + ldnt1h DEST_REG1.h, H_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 8 + ld1sh DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 4 + ld1sh DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 2 + ld1w DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 4 + ld1w DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 2 + ldnt1w DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 4 + ld1sw DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 2 + ld1d DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 2 + ldnt1d DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 2 // Total: 104 - ld2b { DEST_REG1.b, DEST_REG2.b }, B_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 32 - ld2h { DEST_REG1.h, DEST_REG2.h }, H_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 16 - ld2w { DEST_REG1.s, DEST_REG2.s }, S_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 8 - ld2d { DEST_REG1.d, DEST_REG2.d }, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 4 + ld2b { DEST_REG1.b, DEST_REG2.b }, B_MASK_REG/z, [BUFFER_REG, #2, mul vl] // 32 + ld2h { DEST_REG1.h, DEST_REG2.h }, H_MASK_REG/z, [BUFFER_REG, #2, mul vl] // 16 + ld2w { DEST_REG1.s, DEST_REG2.s }, S_MASK_REG/z, [BUFFER_REG, #2, mul vl] // 8 + ld2d { DEST_REG1.d, DEST_REG2.d }, D_MASK_REG/z, [BUFFER_REG, #2, mul vl] // 4 // Total: 60 - ld3b { DEST_REG1.b, DEST_REG2.b, DEST_REG3.b }, B_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 48 - ld3h { DEST_REG1.h, DEST_REG2.h, DEST_REG3.h }, H_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 24 - ld3w { DEST_REG1.s, DEST_REG2.s, DEST_REG3.s }, S_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 12 - ld3d { DEST_REG1.d, DEST_REG2.d, DEST_REG3.d }, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 6 + ld3b { DEST_REG1.b, DEST_REG2.b, DEST_REG3.b }, B_MASK_REG/z, [BUFFER_REG, #3, mul vl] // 48 + ld3h { DEST_REG1.h, DEST_REG2.h, DEST_REG3.h }, H_MASK_REG/z, [BUFFER_REG, #3, mul vl] // 24 + ld3w { DEST_REG1.s, DEST_REG2.s, DEST_REG3.s }, S_MASK_REG/z, [BUFFER_REG, #3, mul vl] // 12 + ld3d { DEST_REG1.d, DEST_REG2.d, DEST_REG3.d }, D_MASK_REG/z, [BUFFER_REG, #3, mul vl] // 6 // Total: 90 - ld4b { DEST_REG1.b, DEST_REG2.b, DEST_REG3.b, DEST_REG4.b }, B_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 64 - ld4h { DEST_REG1.h, DEST_REG2.h, DEST_REG3.h, DEST_REG4.h }, H_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 32 - ld4w { DEST_REG1.s, DEST_REG2.s, DEST_REG3.s, DEST_REG4.s }, S_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 16 - ld4d { DEST_REG1.d, DEST_REG2.d, DEST_REG3.d, DEST_REG4.d }, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 8 + ld4b { DEST_REG1.b, DEST_REG2.b, DEST_REG3.b, DEST_REG4.b }, B_MASK_REG/z, [BUFFER_REG, #4, mul vl] // 64 + ld4h { DEST_REG1.h, DEST_REG2.h, DEST_REG3.h, DEST_REG4.h }, H_MASK_REG/z, [BUFFER_REG, #4, mul vl] // 32 + ld4w { DEST_REG1.s, DEST_REG2.s, DEST_REG3.s, DEST_REG4.s }, S_MASK_REG/z, [BUFFER_REG, #4, mul vl] // 16 + ld4d { DEST_REG1.d, DEST_REG2.d, DEST_REG3.d, DEST_REG4.d }, D_MASK_REG/z, [BUFFER_REG, #4, mul vl] // 8 // Total: 120 // Total loads: 104 + 60 + 90 + 120 = 374 - st1b SRC_REG1.b, B_MASK_REG, [BUFFER_REG, #0, mul vl] // 16 - st1b SRC_REG1.h, H_MASK_REG, [BUFFER_REG, #0, mul vl] // 8 - st1b SRC_REG1.s, S_MASK_REG, [BUFFER_REG, #0, mul vl] // 4 - st1b SRC_REG1.d, D_MASK_REG, [BUFFER_REG, #0, mul vl] // 2 - st1h SRC_REG1.h, H_MASK_REG, [BUFFER_REG, #0, mul vl] // 8 - st1h SRC_REG1.s, S_MASK_REG, [BUFFER_REG, #0, mul vl] // 4 - st1h SRC_REG1.d, D_MASK_REG, [BUFFER_REG, #0, mul vl] // 2 - st1w SRC_REG1.s, S_MASK_REG, [BUFFER_REG, #0, mul vl] // 4 - st1w SRC_REG1.d, D_MASK_REG, [BUFFER_REG, #0, mul vl] // 2 - st1d SRC_REG1.d, D_MASK_REG, [BUFFER_REG, #0, mul vl] // 2 + st1b SRC_REG1.b, B_MASK_REG, [BUFFER_REG, #1, mul vl] // 16 + st1b SRC_REG1.h, H_MASK_REG, [BUFFER_REG, #1, mul vl] // 8 + st1b SRC_REG1.s, S_MASK_REG, [BUFFER_REG, #1, mul vl] // 4 + st1b SRC_REG1.d, D_MASK_REG, [BUFFER_REG, #1, mul vl] // 2 + st1h SRC_REG1.h, H_MASK_REG, [BUFFER_REG, #1, mul vl] // 8 + st1h SRC_REG1.s, S_MASK_REG, [BUFFER_REG, #1, mul vl] // 4 + st1h SRC_REG1.d, D_MASK_REG, [BUFFER_REG, #1, mul vl] // 2 + st1w SRC_REG1.s, S_MASK_REG, [BUFFER_REG, #1, mul vl] // 4 + st1w SRC_REG1.d, D_MASK_REG, [BUFFER_REG, #1, mul vl] // 2 + st1d SRC_REG1.d, D_MASK_REG, [BUFFER_REG, #1, mul vl] // 2 // Total: 52 - st2b { SRC_REG1.b, SRC_REG2.b }, B_MASK_REG, [BUFFER_REG, #0, mul vl] // 32 - st2h { SRC_REG1.h, SRC_REG2.h }, H_MASK_REG, [BUFFER_REG, #0, mul vl] // 16 - st2w { SRC_REG1.s, SRC_REG2.s }, S_MASK_REG, [BUFFER_REG, #0, mul vl] // 8 - st2d { SRC_REG1.d, SRC_REG2.d }, D_MASK_REG, [BUFFER_REG, #0, mul vl] // 4 + st2b { SRC_REG1.b, SRC_REG2.b }, B_MASK_REG, [BUFFER_REG, #2, mul vl] // 32 + st2h { SRC_REG1.h, SRC_REG2.h }, H_MASK_REG, [BUFFER_REG, #2, mul vl] // 16 + st2w { SRC_REG1.s, SRC_REG2.s }, S_MASK_REG, [BUFFER_REG, #2, mul vl] // 8 + st2d { SRC_REG1.d, SRC_REG2.d }, D_MASK_REG, [BUFFER_REG, #2, mul vl] // 4 // Total: 60 - st3b { SRC_REG1.b, SRC_REG2.b, SRC_REG3.b }, B_MASK_REG, [BUFFER_REG, #0, mul vl] // 48 - st3h { SRC_REG1.h, SRC_REG2.h, SRC_REG3.h }, H_MASK_REG, [BUFFER_REG, #0, mul vl] // 24 - st3w { SRC_REG1.s, SRC_REG2.s, SRC_REG3.s }, S_MASK_REG, [BUFFER_REG, #0, mul vl] // 12 - st3d { SRC_REG1.d, SRC_REG2.d, SRC_REG3.d }, D_MASK_REG, [BUFFER_REG, #0, mul vl] // 6 + st3b { SRC_REG1.b, SRC_REG2.b, SRC_REG3.b }, B_MASK_REG, [BUFFER_REG, #3, mul vl] // 48 + st3h { SRC_REG1.h, SRC_REG2.h, SRC_REG3.h }, H_MASK_REG, [BUFFER_REG, #3, mul vl] // 24 + st3w { SRC_REG1.s, SRC_REG2.s, SRC_REG3.s }, S_MASK_REG, [BUFFER_REG, #3, mul vl] // 12 + st3d { SRC_REG1.d, SRC_REG2.d, SRC_REG3.d }, D_MASK_REG, [BUFFER_REG, #3, mul vl] // 6 // Total: 90 - st4b { SRC_REG1.b, SRC_REG2.b, SRC_REG3.b, SRC_REG4.b }, B_MASK_REG, [BUFFER_REG, #0, mul vl] // 64 - st4h { SRC_REG1.h, SRC_REG2.h, SRC_REG3.h, SRC_REG4.h }, H_MASK_REG, [BUFFER_REG, #0, mul vl] // 32 - st4w { SRC_REG1.s, SRC_REG2.s, SRC_REG3.s, SRC_REG4.s }, S_MASK_REG, [BUFFER_REG, #0, mul vl] // 16 - st4d { SRC_REG1.d, SRC_REG2.d, SRC_REG3.d, SRC_REG4.d }, D_MASK_REG, [BUFFER_REG, #0, mul vl] // 8 + st4b { SRC_REG1.b, SRC_REG2.b, SRC_REG3.b, SRC_REG4.b }, B_MASK_REG, [BUFFER_REG, #4, mul vl] // 64 + st4h { SRC_REG1.h, SRC_REG2.h, SRC_REG3.h, SRC_REG4.h }, H_MASK_REG, [BUFFER_REG, #4, mul vl] // 32 + st4w { SRC_REG1.s, SRC_REG2.s, SRC_REG3.s, SRC_REG4.s }, S_MASK_REG, [BUFFER_REG, #4, mul vl] // 16 + st4d { SRC_REG1.d, SRC_REG2.d, SRC_REG3.d, SRC_REG4.d }, D_MASK_REG, [BUFFER_REG, #4, mul vl] // 8 // Total: 120 // Total stores: 52 + 60 + 90 + 120 = 322 @@ -557,4 +557,9 @@ helloworld: .ascii "Hello, world!\n" buffer: - .zero 1024 // Maximum size of an SVE Z register * 4. + .zero 2048 // Maximum size of an SVE Z register * 8. + // This gives us enough space to use an offset of + // #1, mul vl for scalar+immediate/vector+immediate + // instructions which lets us check the VL scaling of + // offsets in the IR in + // tool.drcacheoff.allasm-scattergather-vl-view diff --git a/clients/drcachesim/tests/offline-allasm-scattergather-vl-view-aarch64.templatex b/clients/drcachesim/tests/offline-allasm-scattergather-vl-view-aarch64.templatex new file mode 100644 index 00000000000..7e7d070e6f4 --- /dev/null +++ b/clients/drcachesim/tests/offline-allasm-scattergather-vl-view-aarch64.templatex @@ -0,0 +1,15 @@ +.* +#if __ARM_FEATURE_SVE_BITS == 128 +.* +.*a401a03c ld1b \+0x10\(%x1\)\[1byte\] %p0/z -> %z28\.b +.*e5d1ec3c st3d %z28\.d %z29\.d %z30\.d %p3 -> \+0x30\(%x1\)\[8byte\] +#elif __ARM_FEATURE_SVE_BITS == 256 +.* +.*a401a03c ld1b \+0x20\(%x1\)\[1byte\] %p0/z -> %z28.b +.*e5d1ec3c st3d %z28\.d %z29\.d %z30\.d %p3 -> \+0x60\(%x1\)\[8byte\] +#elif __ARM_FEATURE_SVE_BITS == 512 +.* +.*a401a03c ld1b \+0x40\(%x1\)\[1byte\] %p0/z -> %z28.b +.*e5d1ec3c st3d %z28\.d %z29\.d %z30\.d %p3 -> \+0xc0\(%x1\)\[8byte\] +#endif +.* diff --git a/clients/drcachesim/tests/offline-view.templatex b/clients/drcachesim/tests/offline-view.templatex index c1f4943ae9d..9c0f108026c 100644 --- a/clients/drcachesim/tests/offline-view.templatex +++ b/clients/drcachesim/tests/offline-view.templatex @@ -5,11 +5,20 @@ Output format: 1 0: +[0-9]+ 2 0: +[0-9]+ 3 0: +[0-9]+ +#ifdef __ARM_FEATURE_SVE + 4 0: +[0-9]+ + 5 0: +[0-9]+ + 6 0: +[0-9]+ + 7 0: +[0-9]+ + 8 0: +[0-9]+ + 9 1: +[0-9]+ ifetch .* +#else 4 0: +[0-9]+ 5 0: +[0-9]+ 6 0: +[0-9]+ 7 0: +[0-9]+ 8 1: +[0-9]+ ifetch .* +#endif .* View tool results: *[0-9]* : total instructions diff --git a/clients/drcachesim/tests/view_test.cpp b/clients/drcachesim/tests/view_test.cpp index 5aa2ba3363b..a501d123c4a 100644 --- a/clients/drcachesim/tests/view_test.cpp +++ b/clients/drcachesim/tests/view_test.cpp @@ -305,6 +305,7 @@ run_limit_tests(void *drcontext) gen_marker(t1, TRACE_MARKER_TYPE_VERSION, 3), gen_marker(t1, TRACE_MARKER_TYPE_FILETYPE, 0), gen_marker(t1, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, 64), + gen_marker(t1, TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH, 32), gen_marker(t1, TRACE_MARKER_TYPE_TIMESTAMP, 1001), gen_marker(t1, TRACE_MARKER_TYPE_CPU_ID, 2), gen_instr(t1, offs_nop1), @@ -463,6 +464,7 @@ run_single_thread_chunk_test(void *drcontext) { TRACE_TYPE_THREAD, 0, { t1 } }, { TRACE_TYPE_PID, 0, { t1 } }, { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, { 64 } }, + { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH, { 16 } }, { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT, { 2 } }, { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 1002 } }, { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 2 } }, @@ -478,13 +480,14 @@ run_single_thread_chunk_test(void *drcontext) const char *expect = R"DELIM( 1 0: 3 2 0: 3 3 0: 3 - 4 0: 3 - 5 0: 3 - 6 0: 3 - 7 1: 3 ifetch 4 byte(s) @ 0x0000002a non-branch - 8 2: 3 ifetch 4 byte(s) @ 0x0000002a non-branch - 9 2: 3 - 10 3: 3 ifetch 4 byte(s) @ 0x0000002a non-branch + 4 0: 3 + 5 0: 3 + 6 0: 3 + 7 0: 3 + 8 1: 3 ifetch 4 byte(s) @ 0x0000002a non-branch + 9 2: 3 ifetch 4 byte(s) @ 0x0000002a non-branch + 10 2: 3 + 11 3: 3 ifetch 4 byte(s) @ 0x0000002a non-branch )DELIM"; instrlist_t *ilist_unused = nullptr; view_nomod_test_t view(drcontext, *ilist_unused, 0, 0); @@ -515,6 +518,7 @@ run_serial_chunk_test(void *drcontext) { TRACE_TYPE_THREAD, 0, { t1 } }, { TRACE_TYPE_PID, 0, { t1 } }, { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, { 64 } }, + { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH, { 16 } }, { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT, { 20 } }, { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 1001 } }, { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 2 } }, @@ -531,6 +535,7 @@ run_serial_chunk_test(void *drcontext) { TRACE_TYPE_THREAD, 0, { t2 } }, { TRACE_TYPE_PID, 0, { t2 } }, { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, { 64 } }, + { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH, { 16 } }, { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT, { 2 } }, { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 1002 } }, { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 2 } }, @@ -545,28 +550,30 @@ run_serial_chunk_test(void *drcontext) R"DELIM( 1 0: 3 2 0: 3 3 0: 3 - 4 0: 3 - 5 0: 3 - 6 0: 3 - 7 1: 3 ifetch 4 byte(s) @ 0x0000002a non-branch - 8 2: 3 ifetch 4 byte(s) @ 0x0000002a non-branch + 4 0: 3 + 5 0: 3 + 6 0: 3 + 7 0: 3 + 8 1: 3 ifetch 4 byte(s) @ 0x0000002a non-branch + 9 2: 3 ifetch 4 byte(s) @ 0x0000002a non-branch ------------------------------------------------------------ - 9 2: 7 - 10 2: 7 - 11 2: 7 - 12 2: 7 - 13 2: 7 - 14 2: 7 - 15 3: 7 ifetch 4 byte(s) @ 0x0000002a non-branch - 16 4: 7 ifetch 4 byte(s) @ 0x0000002a non-branch + 10 2: 7 + 11 2: 7 + 12 2: 7 + 13 2: 7 + 14 2: 7 + 15 2: 7 + 16 2: 7 + 17 3: 7 ifetch 4 byte(s) @ 0x0000002a non-branch + 18 4: 7 ifetch 4 byte(s) @ 0x0000002a non-branch ------------------------------------------------------------ - 17 4: 3 - 18 4: 3 - 19 5: 3 ifetch 4 byte(s) @ 0x0000002a non-branch + 19 4: 3 + 20 4: 3 + 21 5: 3 ifetch 4 byte(s) @ 0x0000002a non-branch ------------------------------------------------------------ - 20 5: 7 - 21 5: 7 - 22 6: 7 ifetch 4 byte(s) @ 0x0000002a non-branch + 22 5: 7 + 23 5: 7 + 24 6: 7 ifetch 4 byte(s) @ 0x0000002a non-branch )DELIM"; instrlist_t *ilist_unused = nullptr; view_nomod_test_t view(drcontext, *ilist_unused, 0, 0); diff --git a/clients/drcachesim/tools/view.cpp b/clients/drcachesim/tools/view.cpp index b98e4d6a165..d228312d968 100644 --- a/clients/drcachesim/tools/view.cpp +++ b/clients/drcachesim/tools/view.cpp @@ -443,6 +443,10 @@ view_t::parallel_shard_memref(void *shard_data, const memref_t &memref) std::cerr << "\n"; break; case TRACE_MARKER_TYPE_CORE_IDLE: std::cerr << "\n"; break; + case TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH: + std::cerr << "\n"; + break; default: std::cerr << "\n"; diff --git a/clients/drcachesim/tracer/instru_offline.cpp b/clients/drcachesim/tracer/instru_offline.cpp index 96ff48a21f2..0526f08d383 100644 --- a/clients/drcachesim/tracer/instru_offline.cpp +++ b/clients/drcachesim/tracer/instru_offline.cpp @@ -396,6 +396,12 @@ offline_instru_t::append_thread_header(byte *buf_ptr, thread_id_t tid, new_buf += append_pid(new_buf, dr_get_process_id()); new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, proc_get_cache_line_size()); +#if defined(AARCH64) + if (proc_has_feature(FEATURE_SVE)) { + new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH, + proc_get_vector_length_bytes()); + } +#endif new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_PAGE_SIZE, dr_page_size()); return (int)(new_buf - buf_ptr); } diff --git a/clients/drcachesim/tracer/instru_online.cpp b/clients/drcachesim/tracer/instru_online.cpp index c2d78050f8d..03a10cc1c4f 100644 --- a/clients/drcachesim/tracer/instru_online.cpp +++ b/clients/drcachesim/tracer/instru_online.cpp @@ -178,6 +178,12 @@ online_instru_t::append_thread_header(byte *buf_ptr, thread_id_t tid, new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_FILETYPE, file_type); new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, proc_get_cache_line_size()); +#if defined(AARCH64) + if (proc_has_feature(FEATURE_SVE)) { + new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH, + proc_get_vector_length_bytes()); + } +#endif new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_PAGE_SIZE, dr_page_size()); return (int)(new_buf - buf_ptr); } diff --git a/clients/drcachesim/tracer/raw2trace.cpp b/clients/drcachesim/tracer/raw2trace.cpp index 736f7c8f3b9..02b6c851482 100644 --- a/clients/drcachesim/tracer/raw2trace.cpp +++ b/clients/drcachesim/tracer/raw2trace.cpp @@ -932,6 +932,23 @@ raw2trace_t::read_header(raw2trace_thread_data_t *tdata, header->cache_line_size = proc_get_cache_line_size(); unread_last_entry(tdata); } + + in_entry = get_next_entry(tdata); + if (in_entry == nullptr) { + tdata->error = "Failed to read header from input file"; + return false; + } + if (in_entry->extended.type == OFFLINE_TYPE_EXTENDED && + in_entry->extended.ext == OFFLINE_EXT_TYPE_MARKER && + in_entry->extended.valueB == TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH) { + header->vector_length_bytes = in_entry->extended.valueA; + } else { + // process_header() interprets a value of 0 to mean there was no dynamic vector + // length marker. + header->vector_length_bytes = 0; + unread_last_entry(tdata); + } + return true; } @@ -1002,6 +1019,13 @@ raw2trace_t::process_header(raw2trace_thread_data_t *tdata) create_essential_header_entries(buf, version, tdata->file_type, tid, pid); buf += trace_metadata_writer_t::write_marker(buf, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, header.cache_line_size); + if (header.vector_length_bytes > 0) { +#ifdef AARCH64 + dr_set_sve_vector_length(header.vector_length_bytes * 8); +#endif + buf += trace_metadata_writer_t::write_marker( + buf, TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH, header.vector_length_bytes); + } // Write out further markers. // Even if tdata->out_archive == nullptr we write out a (0-valued) marker, // partly to simplify our test output. @@ -3783,17 +3807,6 @@ raw2trace_t::raw2trace_t( decode_cache_.reserve(cache_count); for (int i = 0; i < cache_count; ++i) decode_cache_.emplace_back(cache_count); - -#if defined(AARCH64) - // TODO i#6556, i#1684: The decoder uses a global sve_veclen variable to store the - // vector length value it uses when decoding. drdecodelib ends up being linked into - // drcachesim twice: once into the drcachesim executable, and one into libdynamorio. - // When we call dr_standalone_init() above it will initialize the version of - // sve_veclen in libdynamorio, but not the one in drcachesim. - // Unfortunately it is the version of sve_veclen in drcachesim that gets used when - // decoding in raw2trace so we need to explicitly initialize its sve_veclen here. - dr_set_sve_vector_length(proc_get_vector_length_bytes() * 8); -#endif } raw2trace_t::~raw2trace_t() diff --git a/clients/drcachesim/tracer/raw2trace.h b/clients/drcachesim/tracer/raw2trace.h index 3e674d1a60b..f010ac53711 100644 --- a/clients/drcachesim/tracer/raw2trace.h +++ b/clients/drcachesim/tracer/raw2trace.h @@ -695,6 +695,7 @@ struct trace_header_t { thread_id_t tid; uint64 timestamp; size_t cache_line_size; + size_t vector_length_bytes; }; /** diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index c741d29838a..40103373d1d 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -4237,6 +4237,10 @@ if (BUILD_CLIENTS) torunonly_drcacheoff(view ${ci_shared_app} "" "@-simulator_type@view@-sim_refs@16384" "") + unset(tool.drcacheoff.view_rawtemp) # Use preprocessor + if (AARCH64 AND proc_supports_sve) + set(tool.drcacheoff.view_runsve 1) + endif () set(tool.drcacheoff.func_view_full_run ON) # Fails on Windows if truncated. torunonly_drcacheoff(func_view common.fib "-record_function fib|1" @@ -4499,6 +4503,22 @@ if (BUILD_CLIENTS) "allasm-scattergather-basic-counts-${ARCH_NAME}") endif () + if (UNIX AND (AARCH64 AND proc_supports_sve)) + torunonly_drcacheoff(allasm-scattergather-vl-view allasm_scattergather + "" "@-simulator_type@view" "") + unset(tool.drcacheoff.allasm-scattergather-vl-view_rawtemp) # use preprocessor + set(tool.drcacheoff.allasm-scattergather-vl-view_runsve 1) + set(tool.drcacheoff.allasm-scattergather-vl-view_expectbase + "offline-allasm-scattergather-vl-view-${ARCH_NAME}") + + torunonly_drcachesim(allasm-scattergather-vl-view allasm_scattergather + "-simulator_type view" "") + unset(tool.drcachesim.allasm-scattergather-vl-view_rawtemp) # use preprocessor + set(tool.drcachesim.allasm-scattergather-vl-view_runsve 1) + set(tool.drcachesim.allasm-scattergather-vl-view_expectbase + "allasm-scattergather-vl-view-${ARCH_NAME}") + endif () + if (UNIX AND X86 AND X64) torunonly_drcacheoff(allasm-repstr-basic-counts allasm_repstr "" "@-simulator_type@basic_counts" "") From 225ead8ff5910c7904901f886cc6ac5703f5035a Mon Sep 17 00:00:00 2001 From: Jack Gallagher Date: Thu, 1 Feb 2024 13:27:42 +0000 Subject: [PATCH 02/15] Improve comment --- clients/drcachesim/common/trace_entry.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h index e45018f4a2c..8e7686a0194 100644 --- a/clients/drcachesim/common/trace_entry.h +++ b/clients/drcachesim/common/trace_entry.h @@ -613,9 +613,9 @@ typedef enum { TRACE_MARKER_TYPE_CONTEXT_SWITCH_END, /** - * The marker is used to indicate the current vector length in bytes for - * architectures with a dynamic vector length, for example the AArch64 SVE vector - * length. This does not apply to fixed length vector architectures. + * This marker's value is the current vector length in bytes for architectures with a + * dynamic vector length, for example the AArch64 SVE vector length. This does not + * apply to fixed length vector architectures. */ TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH, From a2cc97aa288022033538873fce77697ac95e04cd Mon Sep 17 00:00:00 2001 From: Jack Gallagher Date: Fri, 2 Feb 2024 10:00:36 +0000 Subject: [PATCH 03/15] Move vl handling to mid-trace processing --- .../drcachesim/tests/offline-view.templatex | 8 +-- clients/drcachesim/tests/view_test.cpp | 59 ++++++++----------- clients/drcachesim/tracer/instru_offline.cpp | 2 +- clients/drcachesim/tracer/instru_online.cpp | 2 +- clients/drcachesim/tracer/raw2trace.cpp | 38 +++++------- clients/drcachesim/tracer/raw2trace.h | 1 - 6 files changed, 46 insertions(+), 64 deletions(-) diff --git a/clients/drcachesim/tests/offline-view.templatex b/clients/drcachesim/tests/offline-view.templatex index 9c0f108026c..c2a3ddecf7c 100644 --- a/clients/drcachesim/tests/offline-view.templatex +++ b/clients/drcachesim/tests/offline-view.templatex @@ -5,16 +5,14 @@ Output format: 1 0: +[0-9]+ 2 0: +[0-9]+ 3 0: +[0-9]+ + 4 0: +[0-9]+ + 5 0: +[0-9]+ #ifdef __ARM_FEATURE_SVE - 4 0: +[0-9]+ - 5 0: +[0-9]+ - 6 0: +[0-9]+ + 6 0: +[0-9]+ 7 0: +[0-9]+ 8 0: +[0-9]+ 9 1: +[0-9]+ ifetch .* #else - 4 0: +[0-9]+ - 5 0: +[0-9]+ 6 0: +[0-9]+ 7 0: +[0-9]+ 8 1: +[0-9]+ ifetch .* diff --git a/clients/drcachesim/tests/view_test.cpp b/clients/drcachesim/tests/view_test.cpp index a501d123c4a..5aa2ba3363b 100644 --- a/clients/drcachesim/tests/view_test.cpp +++ b/clients/drcachesim/tests/view_test.cpp @@ -305,7 +305,6 @@ run_limit_tests(void *drcontext) gen_marker(t1, TRACE_MARKER_TYPE_VERSION, 3), gen_marker(t1, TRACE_MARKER_TYPE_FILETYPE, 0), gen_marker(t1, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, 64), - gen_marker(t1, TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH, 32), gen_marker(t1, TRACE_MARKER_TYPE_TIMESTAMP, 1001), gen_marker(t1, TRACE_MARKER_TYPE_CPU_ID, 2), gen_instr(t1, offs_nop1), @@ -464,7 +463,6 @@ run_single_thread_chunk_test(void *drcontext) { TRACE_TYPE_THREAD, 0, { t1 } }, { TRACE_TYPE_PID, 0, { t1 } }, { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, { 64 } }, - { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH, { 16 } }, { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT, { 2 } }, { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 1002 } }, { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 2 } }, @@ -480,14 +478,13 @@ run_single_thread_chunk_test(void *drcontext) const char *expect = R"DELIM( 1 0: 3 2 0: 3 3 0: 3 - 4 0: 3 - 5 0: 3 - 6 0: 3 - 7 0: 3 - 8 1: 3 ifetch 4 byte(s) @ 0x0000002a non-branch - 9 2: 3 ifetch 4 byte(s) @ 0x0000002a non-branch - 10 2: 3 - 11 3: 3 ifetch 4 byte(s) @ 0x0000002a non-branch + 4 0: 3 + 5 0: 3 + 6 0: 3 + 7 1: 3 ifetch 4 byte(s) @ 0x0000002a non-branch + 8 2: 3 ifetch 4 byte(s) @ 0x0000002a non-branch + 9 2: 3 + 10 3: 3 ifetch 4 byte(s) @ 0x0000002a non-branch )DELIM"; instrlist_t *ilist_unused = nullptr; view_nomod_test_t view(drcontext, *ilist_unused, 0, 0); @@ -518,7 +515,6 @@ run_serial_chunk_test(void *drcontext) { TRACE_TYPE_THREAD, 0, { t1 } }, { TRACE_TYPE_PID, 0, { t1 } }, { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, { 64 } }, - { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH, { 16 } }, { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT, { 20 } }, { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 1001 } }, { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 2 } }, @@ -535,7 +531,6 @@ run_serial_chunk_test(void *drcontext) { TRACE_TYPE_THREAD, 0, { t2 } }, { TRACE_TYPE_PID, 0, { t2 } }, { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, { 64 } }, - { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH, { 16 } }, { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT, { 2 } }, { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 1002 } }, { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 2 } }, @@ -550,30 +545,28 @@ run_serial_chunk_test(void *drcontext) R"DELIM( 1 0: 3 2 0: 3 3 0: 3 - 4 0: 3 - 5 0: 3 - 6 0: 3 - 7 0: 3 - 8 1: 3 ifetch 4 byte(s) @ 0x0000002a non-branch - 9 2: 3 ifetch 4 byte(s) @ 0x0000002a non-branch + 4 0: 3 + 5 0: 3 + 6 0: 3 + 7 1: 3 ifetch 4 byte(s) @ 0x0000002a non-branch + 8 2: 3 ifetch 4 byte(s) @ 0x0000002a non-branch ------------------------------------------------------------ - 10 2: 7 - 11 2: 7 - 12 2: 7 - 13 2: 7 - 14 2: 7 - 15 2: 7 - 16 2: 7 - 17 3: 7 ifetch 4 byte(s) @ 0x0000002a non-branch - 18 4: 7 ifetch 4 byte(s) @ 0x0000002a non-branch + 9 2: 7 + 10 2: 7 + 11 2: 7 + 12 2: 7 + 13 2: 7 + 14 2: 7 + 15 3: 7 ifetch 4 byte(s) @ 0x0000002a non-branch + 16 4: 7 ifetch 4 byte(s) @ 0x0000002a non-branch ------------------------------------------------------------ - 19 4: 3 - 20 4: 3 - 21 5: 3 ifetch 4 byte(s) @ 0x0000002a non-branch + 17 4: 3 + 18 4: 3 + 19 5: 3 ifetch 4 byte(s) @ 0x0000002a non-branch ------------------------------------------------------------ - 22 5: 7 - 23 5: 7 - 24 6: 7 ifetch 4 byte(s) @ 0x0000002a non-branch + 20 5: 7 + 21 5: 7 + 22 6: 7 ifetch 4 byte(s) @ 0x0000002a non-branch )DELIM"; instrlist_t *ilist_unused = nullptr; view_nomod_test_t view(drcontext, *ilist_unused, 0, 0); diff --git a/clients/drcachesim/tracer/instru_offline.cpp b/clients/drcachesim/tracer/instru_offline.cpp index 0526f08d383..80379f3c043 100644 --- a/clients/drcachesim/tracer/instru_offline.cpp +++ b/clients/drcachesim/tracer/instru_offline.cpp @@ -396,13 +396,13 @@ offline_instru_t::append_thread_header(byte *buf_ptr, thread_id_t tid, new_buf += append_pid(new_buf, dr_get_process_id()); new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, proc_get_cache_line_size()); + new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_PAGE_SIZE, dr_page_size()); #if defined(AARCH64) if (proc_has_feature(FEATURE_SVE)) { new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH, proc_get_vector_length_bytes()); } #endif - new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_PAGE_SIZE, dr_page_size()); return (int)(new_buf - buf_ptr); } diff --git a/clients/drcachesim/tracer/instru_online.cpp b/clients/drcachesim/tracer/instru_online.cpp index 03a10cc1c4f..536c667096f 100644 --- a/clients/drcachesim/tracer/instru_online.cpp +++ b/clients/drcachesim/tracer/instru_online.cpp @@ -178,13 +178,13 @@ online_instru_t::append_thread_header(byte *buf_ptr, thread_id_t tid, new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_FILETYPE, file_type); new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, proc_get_cache_line_size()); + new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_PAGE_SIZE, dr_page_size()); #if defined(AARCH64) if (proc_has_feature(FEATURE_SVE)) { new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH, proc_get_vector_length_bytes()); } #endif - new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_PAGE_SIZE, dr_page_size()); return (int)(new_buf - buf_ptr); } diff --git a/clients/drcachesim/tracer/raw2trace.cpp b/clients/drcachesim/tracer/raw2trace.cpp index 02b6c851482..a70b1256509 100644 --- a/clients/drcachesim/tracer/raw2trace.cpp +++ b/clients/drcachesim/tracer/raw2trace.cpp @@ -883,6 +883,21 @@ raw2trace_t::process_marker_additionally(raw2trace_thread_data_t *tdata, log(2, "Maybe-blocking syscall %zu\n", marker_val); buf += trace_metadata_writer_t::write_marker( buf, TRACE_MARKER_TYPE_MAYBE_BLOCKING_SYSCALL, 0); + } else if (marker_type == TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH) { +#ifdef AARCH64 + log(4, "Setting SVE vector length to %zu bytes\n", marker_val); + + const int new_vl_bits = marker_val * 8; + if (dr_get_sve_vector_length() != new_vl_bits) { + dr_set_sve_vector_length(new_vl_bits); + // Some SVE load/store instructions have an offset which is scaled by a value + // that depends on the vector length. These instructions will need to be + // re-decoded after the vector length changes. + *flush_decode_cache = true; + } +#else + log(2, "Ignoring unexpected dynamic vector length marker\n"); +#endif } return true; } @@ -933,22 +948,6 @@ raw2trace_t::read_header(raw2trace_thread_data_t *tdata, unread_last_entry(tdata); } - in_entry = get_next_entry(tdata); - if (in_entry == nullptr) { - tdata->error = "Failed to read header from input file"; - return false; - } - if (in_entry->extended.type == OFFLINE_TYPE_EXTENDED && - in_entry->extended.ext == OFFLINE_EXT_TYPE_MARKER && - in_entry->extended.valueB == TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH) { - header->vector_length_bytes = in_entry->extended.valueA; - } else { - // process_header() interprets a value of 0 to mean there was no dynamic vector - // length marker. - header->vector_length_bytes = 0; - unread_last_entry(tdata); - } - return true; } @@ -1019,13 +1018,6 @@ raw2trace_t::process_header(raw2trace_thread_data_t *tdata) create_essential_header_entries(buf, version, tdata->file_type, tid, pid); buf += trace_metadata_writer_t::write_marker(buf, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, header.cache_line_size); - if (header.vector_length_bytes > 0) { -#ifdef AARCH64 - dr_set_sve_vector_length(header.vector_length_bytes * 8); -#endif - buf += trace_metadata_writer_t::write_marker( - buf, TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH, header.vector_length_bytes); - } // Write out further markers. // Even if tdata->out_archive == nullptr we write out a (0-valued) marker, // partly to simplify our test output. diff --git a/clients/drcachesim/tracer/raw2trace.h b/clients/drcachesim/tracer/raw2trace.h index f010ac53711..3e674d1a60b 100644 --- a/clients/drcachesim/tracer/raw2trace.h +++ b/clients/drcachesim/tracer/raw2trace.h @@ -695,7 +695,6 @@ struct trace_header_t { thread_id_t tid; uint64 timestamp; size_t cache_line_size; - size_t vector_length_bytes; }; /** From 99be95a8b9945f730e2467f6ba677240c93a4e7a Mon Sep 17 00:00:00 2001 From: Jack Gallagher Date: Fri, 2 Feb 2024 10:27:00 +0000 Subject: [PATCH 04/15] Set vl in opcode_mix --- clients/drcachesim/tools/opcode_mix.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/clients/drcachesim/tools/opcode_mix.cpp b/clients/drcachesim/tools/opcode_mix.cpp index 5389829c5ac..8d334ed91a4 100644 --- a/clients/drcachesim/tools/opcode_mix.cpp +++ b/clients/drcachesim/tools/opcode_mix.cpp @@ -164,6 +164,17 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref) " but tool built for " + trace_arch_string(build_target_arch_type()); return false; } + } else if (memref.marker.type == TRACE_TYPE_MARKER && + memref.marker.marker_type == TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH) { +#ifdef AARCH64 + const int new_vl_bits = memref.marker.marker_value * 8; + if (dr_get_sve_vector_length() != new_vl_bits) { + dr_set_sve_vector_length(new_vl_bits); + // Changing the vector length can change the IR representation of some SVE + // instructions but it will never change the opcode so we don't need to + // flush the opcode cache. + } +#endif } if (!type_is_instr(memref.instr.type) && memref.data.type != TRACE_TYPE_INSTR_NO_FETCH) { From 1c6d73d855cb4f676895dbece279f021123adef1 Mon Sep 17 00:00:00 2001 From: Jack Gallagher Date: Fri, 2 Feb 2024 13:32:19 +0000 Subject: [PATCH 05/15] Check vl in invariant_checker --- .../drcachesim/tools/invariant_checker.cpp | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/clients/drcachesim/tools/invariant_checker.cpp b/clients/drcachesim/tools/invariant_checker.cpp index 14fd1f119d4..0a269f1bfa0 100644 --- a/clients/drcachesim/tools/invariant_checker.cpp +++ b/clients/drcachesim/tools/invariant_checker.cpp @@ -366,6 +366,28 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem memref.marker.marker_value == shard->stream->get_page_size(), "Stream interface page size != trace marker"); } + if (memref.marker.type == TRACE_TYPE_MARKER && + memref.marker.marker_type == TRACE_MARKER_TYPE_DYNAMIC_VECTOR_SIZE) { +#ifdef AARCH64 + static const int MAX_VL_BYTES = 256; // SVE's maximum vector length is 2048-bit + // Vector length must be a multiple of 16 bytes between 16 and 256. + report_if_false(shard, + (memref.marker.marker_value > 0) && + (memref.marker.marker_value <= MAX_VL_BYTES) && + (memref.marker.marker_value % 16 == 0), + "Dynamic vector length marker has invalid size"); + + const int new_vl_bits = memref.marker.marker_value * 8; + if (dr_get_sve_vector_length() != new_vl_bits) { + dr_set_sve_vector_length(new_vl_bits); + // Changing the vector length can change the IR representation of some SVE + // instructions but it doesn't effect any off the metadata that is stored + // in decode_cache_ so we don't need to flush the cache. + } +#else + report_if_false(shard, false, "Unexpected dynamic vector length marker"); +#endif + } if (memref.marker.type == TRACE_TYPE_MARKER && memref.marker.marker_type == TRACE_MARKER_TYPE_VERSION) { shard->trace_version_ = memref.marker.marker_value; From 8e7eb14e1c521b9fe64eda3f02cd6a5922d4e49b Mon Sep 17 00:00:00 2001 From: Jack Gallagher Date: Fri, 2 Feb 2024 13:45:40 +0000 Subject: [PATCH 06/15] Remove redundant () --- suite/tests/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index 40103373d1d..623a16d7a22 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -4503,7 +4503,7 @@ if (BUILD_CLIENTS) "allasm-scattergather-basic-counts-${ARCH_NAME}") endif () - if (UNIX AND (AARCH64 AND proc_supports_sve)) + if (UNIX AND AARCH64 AND proc_supports_sve) torunonly_drcacheoff(allasm-scattergather-vl-view allasm_scattergather "" "@-simulator_type@view" "") unset(tool.drcacheoff.allasm-scattergather-vl-view_rawtemp) # use preprocessor From a6951b0d81a0c85cc3c02338a45f438f5bfeadd7 Mon Sep 17 00:00:00 2001 From: Jack Gallagher Date: Mon, 5 Feb 2024 13:56:47 +0000 Subject: [PATCH 07/15] Update documentation --- api/docs/release.dox | 3 +++ clients/drcachesim/docs/drcachesim.dox.in | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/api/docs/release.dox b/api/docs/release.dox index d74f8f58c76..9aa9f83a1f9 100644 --- a/api/docs/release.dox +++ b/api/docs/release.dox @@ -193,6 +193,9 @@ Further non-compatibility-affecting changes include: - Added instr_is_opnd_store_source(). - Added kernel context switch sequence injection support to the drmemtrace scheduler. - Added dr_running_under_dynamorio(). + - Added #dynamorio::drmemtrace::TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH marker to + indicate the current vector length for architectures with a hardware defined or + runtime changeable vector length (such as AArch64's SVE scalable vectors). **************************************************
diff --git a/clients/drcachesim/docs/drcachesim.dox.in b/clients/drcachesim/docs/drcachesim.dox.in index 447fd74764f..7258db5e95c 100644 --- a/clients/drcachesim/docs/drcachesim.dox.in +++ b/clients/drcachesim/docs/drcachesim.dox.in @@ -125,7 +125,11 @@ using the drdecode decoder or any other decoder. An additional field information should be invalidated due to possibly changed application code. (For online traces, encodings are not provided unless the option `-instr_encodings` is passed, as encodings add overhead and -are not needed for many tools.) +are not needed for many tools.) Cached decoding information might also +need to be discarded if there is a +#dynamorio::drmemtrace::TRACE_MARKER_DYNAMIC_VECTOR_LENGTH marker entry +indicating a change of vector length on architectures such as AArch64 +which have a dynamic vector length. Older legacy traces may not contain instruction encodings. For those traces, encodings for static code can be obtained by From 8b53d900407e0df564248ea4ced7a50b9ed9722c Mon Sep 17 00:00:00 2001 From: Jack Gallagher Date: Mon, 5 Feb 2024 14:05:44 +0000 Subject: [PATCH 08/15] Fix typo --- clients/drcachesim/tools/invariant_checker.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clients/drcachesim/tools/invariant_checker.cpp b/clients/drcachesim/tools/invariant_checker.cpp index 829d974991d..5c0a7e71511 100644 --- a/clients/drcachesim/tools/invariant_checker.cpp +++ b/clients/drcachesim/tools/invariant_checker.cpp @@ -367,7 +367,7 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem "Stream interface page size != trace marker"); } if (memref.marker.type == TRACE_TYPE_MARKER && - memref.marker.marker_type == TRACE_MARKER_TYPE_DYNAMIC_VECTOR_SIZE) { + memref.marker.marker_type == TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH) { #ifdef AARCH64 static const int MAX_VL_BYTES = 256; // SVE's maximum vector length is 2048-bit // Vector length must be a multiple of 16 bytes between 16 and 256. @@ -381,7 +381,7 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem if (dr_get_sve_vector_length() != new_vl_bits) { dr_set_sve_vector_length(new_vl_bits); // Changing the vector length can change the IR representation of some SVE - // instructions but it doesn't effect any off the metadata that is stored + // instructions but it doesn't effect any of the metadata that is stored // in decode_cache_ so we don't need to flush the cache. } #else From 33449f972e17e2eb4bf56376d0e4d38a639ba168 Mon Sep 17 00:00:00 2001 From: Jack Gallagher Date: Mon, 5 Feb 2024 14:19:52 +0000 Subject: [PATCH 09/15] Fix name in docs --- clients/drcachesim/docs/drcachesim.dox.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clients/drcachesim/docs/drcachesim.dox.in b/clients/drcachesim/docs/drcachesim.dox.in index 7258db5e95c..3d796539193 100644 --- a/clients/drcachesim/docs/drcachesim.dox.in +++ b/clients/drcachesim/docs/drcachesim.dox.in @@ -127,8 +127,8 @@ code. (For online traces, encodings are not provided unless the option `-instr_encodings` is passed, as encodings add overhead and are not needed for many tools.) Cached decoding information might also need to be discarded if there is a -#dynamorio::drmemtrace::TRACE_MARKER_DYNAMIC_VECTOR_LENGTH marker entry -indicating a change of vector length on architectures such as AArch64 +#dynamorio::drmemtrace::TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH marker +entry indicating a change of vector length on architectures such as AArch64 which have a dynamic vector length. Older legacy traces may not contain instruction encodings. For those From d021d3f988d5d381c7a30dee5c966c652e37d851 Mon Sep 17 00:00:00 2001 From: Jack Gallagher Date: Wed, 7 Feb 2024 09:22:40 +0000 Subject: [PATCH 10/15] Remove DYNAMIC from marker name --- api/docs/release.dox | 6 +++--- clients/drcachesim/common/trace_entry.h | 2 +- clients/drcachesim/docs/drcachesim.dox.in | 4 ++-- clients/drcachesim/tools/invariant_checker.cpp | 6 +++--- clients/drcachesim/tools/opcode_mix.cpp | 2 +- clients/drcachesim/tools/view.cpp | 2 +- clients/drcachesim/tracer/instru_offline.cpp | 2 +- clients/drcachesim/tracer/instru_online.cpp | 2 +- clients/drcachesim/tracer/raw2trace.cpp | 2 +- 9 files changed, 14 insertions(+), 14 deletions(-) diff --git a/api/docs/release.dox b/api/docs/release.dox index 9aa9f83a1f9..3d864c96cd6 100644 --- a/api/docs/release.dox +++ b/api/docs/release.dox @@ -193,9 +193,9 @@ Further non-compatibility-affecting changes include: - Added instr_is_opnd_store_source(). - Added kernel context switch sequence injection support to the drmemtrace scheduler. - Added dr_running_under_dynamorio(). - - Added #dynamorio::drmemtrace::TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH marker to - indicate the current vector length for architectures with a hardware defined or - runtime changeable vector length (such as AArch64's SVE scalable vectors). + - Added #dynamorio::drmemtrace::TRACE_MARKER_TYPE_VECTOR_LENGTH marker to indicate the + current vector length for architectures with a hardware defined or runtime changeable + vector length (such as AArch64's SVE scalable vectors). **************************************************
diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h index 8e7686a0194..623b261e38a 100644 --- a/clients/drcachesim/common/trace_entry.h +++ b/clients/drcachesim/common/trace_entry.h @@ -617,7 +617,7 @@ typedef enum { * dynamic vector length, for example the AArch64 SVE vector length. This does not * apply to fixed length vector architectures. */ - TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH, + TRACE_MARKER_TYPE_VECTOR_LENGTH, // ... // These values are reserved for future built-in marker types. diff --git a/clients/drcachesim/docs/drcachesim.dox.in b/clients/drcachesim/docs/drcachesim.dox.in index 3d796539193..13f6baba7d4 100644 --- a/clients/drcachesim/docs/drcachesim.dox.in +++ b/clients/drcachesim/docs/drcachesim.dox.in @@ -127,8 +127,8 @@ code. (For online traces, encodings are not provided unless the option `-instr_encodings` is passed, as encodings add overhead and are not needed for many tools.) Cached decoding information might also need to be discarded if there is a -#dynamorio::drmemtrace::TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH marker -entry indicating a change of vector length on architectures such as AArch64 +#dynamorio::drmemtrace::TRACE_MARKER_TYPE_VECTOR_LENGTH marker entry +indicating a change of vector length on architectures such as AArch64 which have a dynamic vector length. Older legacy traces may not contain instruction encodings. For those diff --git a/clients/drcachesim/tools/invariant_checker.cpp b/clients/drcachesim/tools/invariant_checker.cpp index 5c0a7e71511..c1966053f70 100644 --- a/clients/drcachesim/tools/invariant_checker.cpp +++ b/clients/drcachesim/tools/invariant_checker.cpp @@ -367,7 +367,7 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem "Stream interface page size != trace marker"); } if (memref.marker.type == TRACE_TYPE_MARKER && - memref.marker.marker_type == TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH) { + memref.marker.marker_type == TRACE_MARKER_TYPE_VECTOR_LENGTH) { #ifdef AARCH64 static const int MAX_VL_BYTES = 256; // SVE's maximum vector length is 2048-bit // Vector length must be a multiple of 16 bytes between 16 and 256. @@ -375,7 +375,7 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem (memref.marker.marker_value > 0) && (memref.marker.marker_value <= MAX_VL_BYTES) && (memref.marker.marker_value % 16 == 0), - "Dynamic vector length marker has invalid size"); + "Vector length marker has invalid size"); const int new_vl_bits = memref.marker.marker_value * 8; if (dr_get_sve_vector_length() != new_vl_bits) { @@ -385,7 +385,7 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem // in decode_cache_ so we don't need to flush the cache. } #else - report_if_false(shard, false, "Unexpected dynamic vector length marker"); + report_if_false(shard, false, "Unexpected vector length marker"); #endif } if (memref.marker.type == TRACE_TYPE_MARKER && diff --git a/clients/drcachesim/tools/opcode_mix.cpp b/clients/drcachesim/tools/opcode_mix.cpp index 8d334ed91a4..18743ff8a35 100644 --- a/clients/drcachesim/tools/opcode_mix.cpp +++ b/clients/drcachesim/tools/opcode_mix.cpp @@ -165,7 +165,7 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref) return false; } } else if (memref.marker.type == TRACE_TYPE_MARKER && - memref.marker.marker_type == TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH) { + memref.marker.marker_type == TRACE_MARKER_TYPE_VECTOR_LENGTH) { #ifdef AARCH64 const int new_vl_bits = memref.marker.marker_value * 8; if (dr_get_sve_vector_length() != new_vl_bits) { diff --git a/clients/drcachesim/tools/view.cpp b/clients/drcachesim/tools/view.cpp index d228312d968..2d7391dcee6 100644 --- a/clients/drcachesim/tools/view.cpp +++ b/clients/drcachesim/tools/view.cpp @@ -443,7 +443,7 @@ view_t::parallel_shard_memref(void *shard_data, const memref_t &memref) std::cerr << "\n"; break; case TRACE_MARKER_TYPE_CORE_IDLE: std::cerr << "\n"; break; - case TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH: + case TRACE_MARKER_TYPE_VECTOR_LENGTH: std::cerr << "\n"; break; diff --git a/clients/drcachesim/tracer/instru_offline.cpp b/clients/drcachesim/tracer/instru_offline.cpp index 80379f3c043..e69596ad7e8 100644 --- a/clients/drcachesim/tracer/instru_offline.cpp +++ b/clients/drcachesim/tracer/instru_offline.cpp @@ -399,7 +399,7 @@ offline_instru_t::append_thread_header(byte *buf_ptr, thread_id_t tid, new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_PAGE_SIZE, dr_page_size()); #if defined(AARCH64) if (proc_has_feature(FEATURE_SVE)) { - new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH, + new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_VECTOR_LENGTH, proc_get_vector_length_bytes()); } #endif diff --git a/clients/drcachesim/tracer/instru_online.cpp b/clients/drcachesim/tracer/instru_online.cpp index 536c667096f..afbf74e71e0 100644 --- a/clients/drcachesim/tracer/instru_online.cpp +++ b/clients/drcachesim/tracer/instru_online.cpp @@ -181,7 +181,7 @@ online_instru_t::append_thread_header(byte *buf_ptr, thread_id_t tid, new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_PAGE_SIZE, dr_page_size()); #if defined(AARCH64) if (proc_has_feature(FEATURE_SVE)) { - new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH, + new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_VECTOR_LENGTH, proc_get_vector_length_bytes()); } #endif diff --git a/clients/drcachesim/tracer/raw2trace.cpp b/clients/drcachesim/tracer/raw2trace.cpp index a70b1256509..e809f245765 100644 --- a/clients/drcachesim/tracer/raw2trace.cpp +++ b/clients/drcachesim/tracer/raw2trace.cpp @@ -883,7 +883,7 @@ raw2trace_t::process_marker_additionally(raw2trace_thread_data_t *tdata, log(2, "Maybe-blocking syscall %zu\n", marker_val); buf += trace_metadata_writer_t::write_marker( buf, TRACE_MARKER_TYPE_MAYBE_BLOCKING_SYSCALL, 0); - } else if (marker_type == TRACE_MARKER_TYPE_DYNAMIC_VECTOR_LENGTH) { + } else if (marker_type == TRACE_MARKER_TYPE_VECTOR_LENGTH) { #ifdef AARCH64 log(4, "Setting SVE vector length to %zu bytes\n", marker_val); From 85a1a55ba8e7498f5e920ef5aa0a871d55f017ee Mon Sep 17 00:00:00 2001 From: Jack Gallagher Date: Wed, 7 Feb 2024 09:41:37 +0000 Subject: [PATCH 11/15] Update comment --- clients/drcachesim/common/trace_entry.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h index 623b261e38a..e678575e7ac 100644 --- a/clients/drcachesim/common/trace_entry.h +++ b/clients/drcachesim/common/trace_entry.h @@ -613,9 +613,9 @@ typedef enum { TRACE_MARKER_TYPE_CONTEXT_SWITCH_END, /** - * This marker's value is the current vector length in bytes for architectures with a - * dynamic vector length, for example the AArch64 SVE vector length. This does not - * apply to fixed length vector architectures. + * This marker's value is the current thread's vector length in bytes, for + * architectures with a dynamic vector length. For example the AArch64 SVE vector + * length. This does not apply to fixed length vector architectures. */ TRACE_MARKER_TYPE_VECTOR_LENGTH, From 34f027d2d895ab360d207976492a6c86a65f34bf Mon Sep 17 00:00:00 2001 From: Jack Gallagher Date: Wed, 7 Feb 2024 10:07:25 +0000 Subject: [PATCH 12/15] Log thread id --- clients/drcachesim/tracer/raw2trace.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/clients/drcachesim/tracer/raw2trace.cpp b/clients/drcachesim/tracer/raw2trace.cpp index e809f245765..dafcc254780 100644 --- a/clients/drcachesim/tracer/raw2trace.cpp +++ b/clients/drcachesim/tracer/raw2trace.cpp @@ -885,7 +885,9 @@ raw2trace_t::process_marker_additionally(raw2trace_thread_data_t *tdata, buf, TRACE_MARKER_TYPE_MAYBE_BLOCKING_SYSCALL, 0); } else if (marker_type == TRACE_MARKER_TYPE_VECTOR_LENGTH) { #ifdef AARCH64 - log(4, "Setting SVE vector length to %zu bytes\n", marker_val); + log(4, + "Setting SVE vector length for thread " INT64_FORMAT_STRING " to %zu bytes\n", + tdata->tid, marker_val); const int new_vl_bits = marker_val * 8; if (dr_get_sve_vector_length() != new_vl_bits) { From 639641918f052acc7b552a3e2f885380f818f022 Mon Sep 17 00:00:00 2001 From: Jack Gallagher Date: Wed, 7 Feb 2024 10:23:50 +0000 Subject: [PATCH 13/15] Remove () --- clients/drcachesim/tools/invariant_checker.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clients/drcachesim/tools/invariant_checker.cpp b/clients/drcachesim/tools/invariant_checker.cpp index c1966053f70..73fbd2ce4b5 100644 --- a/clients/drcachesim/tools/invariant_checker.cpp +++ b/clients/drcachesim/tools/invariant_checker.cpp @@ -372,9 +372,9 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem static const int MAX_VL_BYTES = 256; // SVE's maximum vector length is 2048-bit // Vector length must be a multiple of 16 bytes between 16 and 256. report_if_false(shard, - (memref.marker.marker_value > 0) && - (memref.marker.marker_value <= MAX_VL_BYTES) && - (memref.marker.marker_value % 16 == 0), + memref.marker.marker_value > 0 && + memref.marker.marker_value <= MAX_VL_BYTES && + memref.marker.marker_value % 16 == 0, "Vector length marker has invalid size"); const int new_vl_bits = memref.marker.marker_value * 8; From 93c0869065381fdca4240b20e82ccb7c18529fb4 Mon Sep 17 00:00:00 2001 From: Jack Gallagher Date: Wed, 7 Feb 2024 11:16:20 +0000 Subject: [PATCH 14/15] Add comments --- clients/drcachesim/tracer/instru_offline.cpp | 3 +++ clients/drcachesim/tracer/instru_online.cpp | 3 +++ 2 files changed, 6 insertions(+) diff --git a/clients/drcachesim/tracer/instru_offline.cpp b/clients/drcachesim/tracer/instru_offline.cpp index e69596ad7e8..bf3e63564e5 100644 --- a/clients/drcachesim/tracer/instru_offline.cpp +++ b/clients/drcachesim/tracer/instru_offline.cpp @@ -398,6 +398,9 @@ offline_instru_t::append_thread_header(byte *buf_ptr, thread_id_t tid, proc_get_cache_line_size()); new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_PAGE_SIZE, dr_page_size()); #if defined(AARCH64) + // TRACE_MARKER_TYPE_VECTOR_LENGTH is emitted in the thread header to establish the + // initial vector length for the thread, but the marker can also be emitted again + // later if the app changes the vector length. if (proc_has_feature(FEATURE_SVE)) { new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_VECTOR_LENGTH, proc_get_vector_length_bytes()); diff --git a/clients/drcachesim/tracer/instru_online.cpp b/clients/drcachesim/tracer/instru_online.cpp index afbf74e71e0..58b234f915f 100644 --- a/clients/drcachesim/tracer/instru_online.cpp +++ b/clients/drcachesim/tracer/instru_online.cpp @@ -180,6 +180,9 @@ online_instru_t::append_thread_header(byte *buf_ptr, thread_id_t tid, proc_get_cache_line_size()); new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_PAGE_SIZE, dr_page_size()); #if defined(AARCH64) + // TRACE_MARKER_TYPE_VECTOR_LENGTH is emitted in the thread header to establish the + // initial vector length for the thread, but the marker can also be emitted again + // later if the app changes the vector length. if (proc_has_feature(FEATURE_SVE)) { new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_VECTOR_LENGTH, proc_get_vector_length_bytes()); From 3e2bb1ad28fbe6c69ec1e7f5958a0a256c93c2db Mon Sep 17 00:00:00 2001 From: Jack Gallagher Date: Wed, 7 Feb 2024 12:15:19 +0000 Subject: [PATCH 15/15] Update comment --- clients/drcachesim/common/trace_entry.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h index e678575e7ac..454deddb2b2 100644 --- a/clients/drcachesim/common/trace_entry.h +++ b/clients/drcachesim/common/trace_entry.h @@ -614,8 +614,16 @@ typedef enum { /** * This marker's value is the current thread's vector length in bytes, for - * architectures with a dynamic vector length. For example the AArch64 SVE vector - * length. This does not apply to fixed length vector architectures. + * architectures with a dynamic vector length. It is currently only used on AArch64. + * + * On AArch64 the marker's value contains the SVE vector length. The marker is + * emitted with the thread header to establish the initial vector length for that + * thread. In the future it will also be emitted later in the trace if the app + * changes the vector length at runtime (TODO i#6625). In all cases the vector + * length value is specific to the current thread. + * The vector length affects how some SVE instructions are decoded so any tools which + * decode instructions should clear any cached data and set the vector length used by + * the decoder using dr_set_sve_vector_length(). */ TRACE_MARKER_TYPE_VECTOR_LENGTH,