diff --git a/api/docs/release.dox b/api/docs/release.dox
index d74f8f58c76..3d864c96cd6 100644
--- a/api/docs/release.dox
+++ b/api/docs/release.dox
@@ -193,6 +193,9 @@ Further non-compatibility-affecting changes include:
- Added instr_is_opnd_store_source().
- Added kernel context switch sequence injection support to the drmemtrace scheduler.
- Added dr_running_under_dynamorio().
+ - Added #dynamorio::drmemtrace::TRACE_MARKER_TYPE_VECTOR_LENGTH marker to indicate the
+ current vector length for architectures with a hardware defined or runtime changeable
+ vector length (such as AArch64's SVE scalable vectors).
**************************************************
diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h
index a34a2963225..454deddb2b2 100644
--- a/clients/drcachesim/common/trace_entry.h
+++ b/clients/drcachesim/common/trace_entry.h
@@ -612,6 +612,21 @@ typedef enum {
*/
TRACE_MARKER_TYPE_CONTEXT_SWITCH_END,
+ /**
+ * This marker's value is the current thread's vector length in bytes, for
+ * architectures with a dynamic vector length. It is currently only used on AArch64.
+ *
+ * On AArch64 the marker's value contains the SVE vector length. The marker is
+ * emitted with the thread header to establish the initial vector length for that
+ * thread. In the future it will also be emitted later in the trace if the app
+ * changes the vector length at runtime (TODO i#6625). In all cases the vector
+ * length value is specific to the current thread.
+ * The vector length affects how some SVE instructions are decoded so any tools which
+ * decode instructions should clear any cached data and set the vector length used by
+ * the decoder using dr_set_sve_vector_length().
+ */
+ TRACE_MARKER_TYPE_VECTOR_LENGTH,
+
// ...
// These values are reserved for future built-in marker types.
// ...
diff --git a/clients/drcachesim/docs/drcachesim.dox.in b/clients/drcachesim/docs/drcachesim.dox.in
index 447fd74764f..13f6baba7d4 100644
--- a/clients/drcachesim/docs/drcachesim.dox.in
+++ b/clients/drcachesim/docs/drcachesim.dox.in
@@ -125,7 +125,11 @@ using the drdecode decoder or any other decoder. An additional field
information should be invalidated due to possibly changed application
code. (For online traces, encodings are not provided unless the
option `-instr_encodings` is passed, as encodings add overhead and
-are not needed for many tools.)
+are not needed for many tools.) Cached decoding information might also
+need to be discarded if there is a
+#dynamorio::drmemtrace::TRACE_MARKER_TYPE_VECTOR_LENGTH marker entry
+indicating a change of vector length on architectures such as AArch64
+which have a dynamic vector length.
Older legacy traces may not contain instruction encodings. For those
traces, encodings for static code can be obtained by
diff --git a/clients/drcachesim/tests/allasm-scattergather-vl-view-aarch64.templatex b/clients/drcachesim/tests/allasm-scattergather-vl-view-aarch64.templatex
new file mode 100644
index 00000000000..e609dea3c8f
--- /dev/null
+++ b/clients/drcachesim/tests/allasm-scattergather-vl-view-aarch64.templatex
@@ -0,0 +1,9 @@
+.*
+#if __ARM_FEATURE_SVE_BITS == 128
+.*
+#elif __ARM_FEATURE_SVE_BITS == 256
+.*
+#elif __ARM_FEATURE_SVE_BITS == 512
+.*
+#endif
+.*
diff --git a/clients/drcachesim/tests/allasm_scattergather_aarch64.asm b/clients/drcachesim/tests/allasm_scattergather_aarch64.asm
index 658e12a40a0..8e08ceee19b 100644
--- a/clients/drcachesim/tests/allasm_scattergather_aarch64.asm
+++ b/clients/drcachesim/tests/allasm_scattergather_aarch64.asm
@@ -292,75 +292,75 @@ test_scalar_plus_scalar:
test_scalar_plus_immediate:
- ld1b DEST_REG1.b, B_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 16
- ld1b DEST_REG1.h, H_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 8
- ld1b DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 4
- ld1b DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 2
- ldnt1b DEST_REG1.b, B_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 16
- ld1sb DEST_REG1.h, H_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 8
- ld1sb DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 4
- ld1sb DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 2
- ld1h DEST_REG1.h, H_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 8
- ld1h DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 4
- ld1h DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 2
- ldnt1h DEST_REG1.h, H_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 8
- ld1sh DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 4
- ld1sh DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 2
- ld1w DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 4
- ld1w DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 2
- ldnt1w DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 4
- ld1sw DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 2
- ld1d DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 2
- ldnt1d DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 2
+ ld1b DEST_REG1.b, B_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 16
+ ld1b DEST_REG1.h, H_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 8
+ ld1b DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 4
+ ld1b DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 2
+ ldnt1b DEST_REG1.b, B_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 16
+ ld1sb DEST_REG1.h, H_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 8
+ ld1sb DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 4
+ ld1sb DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 2
+ ld1h DEST_REG1.h, H_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 8
+ ld1h DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 4
+ ld1h DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 2
+ ldnt1h DEST_REG1.h, H_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 8
+ ld1sh DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 4
+ ld1sh DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 2
+ ld1w DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 4
+ ld1w DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 2
+ ldnt1w DEST_REG1.s, S_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 4
+ ld1sw DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 2
+ ld1d DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 2
+ ldnt1d DEST_REG1.d, D_MASK_REG/z, [BUFFER_REG, #1, mul vl] // 2
// Total: 104
- ld2b { DEST_REG1.b, DEST_REG2.b }, B_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 32
- ld2h { DEST_REG1.h, DEST_REG2.h }, H_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 16
- ld2w { DEST_REG1.s, DEST_REG2.s }, S_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 8
- ld2d { DEST_REG1.d, DEST_REG2.d }, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 4
+ ld2b { DEST_REG1.b, DEST_REG2.b }, B_MASK_REG/z, [BUFFER_REG, #2, mul vl] // 32
+ ld2h { DEST_REG1.h, DEST_REG2.h }, H_MASK_REG/z, [BUFFER_REG, #2, mul vl] // 16
+ ld2w { DEST_REG1.s, DEST_REG2.s }, S_MASK_REG/z, [BUFFER_REG, #2, mul vl] // 8
+ ld2d { DEST_REG1.d, DEST_REG2.d }, D_MASK_REG/z, [BUFFER_REG, #2, mul vl] // 4
// Total: 60
- ld3b { DEST_REG1.b, DEST_REG2.b, DEST_REG3.b }, B_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 48
- ld3h { DEST_REG1.h, DEST_REG2.h, DEST_REG3.h }, H_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 24
- ld3w { DEST_REG1.s, DEST_REG2.s, DEST_REG3.s }, S_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 12
- ld3d { DEST_REG1.d, DEST_REG2.d, DEST_REG3.d }, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 6
+ ld3b { DEST_REG1.b, DEST_REG2.b, DEST_REG3.b }, B_MASK_REG/z, [BUFFER_REG, #3, mul vl] // 48
+ ld3h { DEST_REG1.h, DEST_REG2.h, DEST_REG3.h }, H_MASK_REG/z, [BUFFER_REG, #3, mul vl] // 24
+ ld3w { DEST_REG1.s, DEST_REG2.s, DEST_REG3.s }, S_MASK_REG/z, [BUFFER_REG, #3, mul vl] // 12
+ ld3d { DEST_REG1.d, DEST_REG2.d, DEST_REG3.d }, D_MASK_REG/z, [BUFFER_REG, #3, mul vl] // 6
// Total: 90
- ld4b { DEST_REG1.b, DEST_REG2.b, DEST_REG3.b, DEST_REG4.b }, B_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 64
- ld4h { DEST_REG1.h, DEST_REG2.h, DEST_REG3.h, DEST_REG4.h }, H_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 32
- ld4w { DEST_REG1.s, DEST_REG2.s, DEST_REG3.s, DEST_REG4.s }, S_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 16
- ld4d { DEST_REG1.d, DEST_REG2.d, DEST_REG3.d, DEST_REG4.d }, D_MASK_REG/z, [BUFFER_REG, #0, mul vl] // 8
+ ld4b { DEST_REG1.b, DEST_REG2.b, DEST_REG3.b, DEST_REG4.b }, B_MASK_REG/z, [BUFFER_REG, #4, mul vl] // 64
+ ld4h { DEST_REG1.h, DEST_REG2.h, DEST_REG3.h, DEST_REG4.h }, H_MASK_REG/z, [BUFFER_REG, #4, mul vl] // 32
+ ld4w { DEST_REG1.s, DEST_REG2.s, DEST_REG3.s, DEST_REG4.s }, S_MASK_REG/z, [BUFFER_REG, #4, mul vl] // 16
+ ld4d { DEST_REG1.d, DEST_REG2.d, DEST_REG3.d, DEST_REG4.d }, D_MASK_REG/z, [BUFFER_REG, #4, mul vl] // 8
// Total: 120
// Total loads: 104 + 60 + 90 + 120 = 374
- st1b SRC_REG1.b, B_MASK_REG, [BUFFER_REG, #0, mul vl] // 16
- st1b SRC_REG1.h, H_MASK_REG, [BUFFER_REG, #0, mul vl] // 8
- st1b SRC_REG1.s, S_MASK_REG, [BUFFER_REG, #0, mul vl] // 4
- st1b SRC_REG1.d, D_MASK_REG, [BUFFER_REG, #0, mul vl] // 2
- st1h SRC_REG1.h, H_MASK_REG, [BUFFER_REG, #0, mul vl] // 8
- st1h SRC_REG1.s, S_MASK_REG, [BUFFER_REG, #0, mul vl] // 4
- st1h SRC_REG1.d, D_MASK_REG, [BUFFER_REG, #0, mul vl] // 2
- st1w SRC_REG1.s, S_MASK_REG, [BUFFER_REG, #0, mul vl] // 4
- st1w SRC_REG1.d, D_MASK_REG, [BUFFER_REG, #0, mul vl] // 2
- st1d SRC_REG1.d, D_MASK_REG, [BUFFER_REG, #0, mul vl] // 2
+ st1b SRC_REG1.b, B_MASK_REG, [BUFFER_REG, #1, mul vl] // 16
+ st1b SRC_REG1.h, H_MASK_REG, [BUFFER_REG, #1, mul vl] // 8
+ st1b SRC_REG1.s, S_MASK_REG, [BUFFER_REG, #1, mul vl] // 4
+ st1b SRC_REG1.d, D_MASK_REG, [BUFFER_REG, #1, mul vl] // 2
+ st1h SRC_REG1.h, H_MASK_REG, [BUFFER_REG, #1, mul vl] // 8
+ st1h SRC_REG1.s, S_MASK_REG, [BUFFER_REG, #1, mul vl] // 4
+ st1h SRC_REG1.d, D_MASK_REG, [BUFFER_REG, #1, mul vl] // 2
+ st1w SRC_REG1.s, S_MASK_REG, [BUFFER_REG, #1, mul vl] // 4
+ st1w SRC_REG1.d, D_MASK_REG, [BUFFER_REG, #1, mul vl] // 2
+ st1d SRC_REG1.d, D_MASK_REG, [BUFFER_REG, #1, mul vl] // 2
// Total: 52
- st2b { SRC_REG1.b, SRC_REG2.b }, B_MASK_REG, [BUFFER_REG, #0, mul vl] // 32
- st2h { SRC_REG1.h, SRC_REG2.h }, H_MASK_REG, [BUFFER_REG, #0, mul vl] // 16
- st2w { SRC_REG1.s, SRC_REG2.s }, S_MASK_REG, [BUFFER_REG, #0, mul vl] // 8
- st2d { SRC_REG1.d, SRC_REG2.d }, D_MASK_REG, [BUFFER_REG, #0, mul vl] // 4
+ st2b { SRC_REG1.b, SRC_REG2.b }, B_MASK_REG, [BUFFER_REG, #2, mul vl] // 32
+ st2h { SRC_REG1.h, SRC_REG2.h }, H_MASK_REG, [BUFFER_REG, #2, mul vl] // 16
+ st2w { SRC_REG1.s, SRC_REG2.s }, S_MASK_REG, [BUFFER_REG, #2, mul vl] // 8
+ st2d { SRC_REG1.d, SRC_REG2.d }, D_MASK_REG, [BUFFER_REG, #2, mul vl] // 4
// Total: 60
- st3b { SRC_REG1.b, SRC_REG2.b, SRC_REG3.b }, B_MASK_REG, [BUFFER_REG, #0, mul vl] // 48
- st3h { SRC_REG1.h, SRC_REG2.h, SRC_REG3.h }, H_MASK_REG, [BUFFER_REG, #0, mul vl] // 24
- st3w { SRC_REG1.s, SRC_REG2.s, SRC_REG3.s }, S_MASK_REG, [BUFFER_REG, #0, mul vl] // 12
- st3d { SRC_REG1.d, SRC_REG2.d, SRC_REG3.d }, D_MASK_REG, [BUFFER_REG, #0, mul vl] // 6
+ st3b { SRC_REG1.b, SRC_REG2.b, SRC_REG3.b }, B_MASK_REG, [BUFFER_REG, #3, mul vl] // 48
+ st3h { SRC_REG1.h, SRC_REG2.h, SRC_REG3.h }, H_MASK_REG, [BUFFER_REG, #3, mul vl] // 24
+ st3w { SRC_REG1.s, SRC_REG2.s, SRC_REG3.s }, S_MASK_REG, [BUFFER_REG, #3, mul vl] // 12
+ st3d { SRC_REG1.d, SRC_REG2.d, SRC_REG3.d }, D_MASK_REG, [BUFFER_REG, #3, mul vl] // 6
// Total: 90
- st4b { SRC_REG1.b, SRC_REG2.b, SRC_REG3.b, SRC_REG4.b }, B_MASK_REG, [BUFFER_REG, #0, mul vl] // 64
- st4h { SRC_REG1.h, SRC_REG2.h, SRC_REG3.h, SRC_REG4.h }, H_MASK_REG, [BUFFER_REG, #0, mul vl] // 32
- st4w { SRC_REG1.s, SRC_REG2.s, SRC_REG3.s, SRC_REG4.s }, S_MASK_REG, [BUFFER_REG, #0, mul vl] // 16
- st4d { SRC_REG1.d, SRC_REG2.d, SRC_REG3.d, SRC_REG4.d }, D_MASK_REG, [BUFFER_REG, #0, mul vl] // 8
+ st4b { SRC_REG1.b, SRC_REG2.b, SRC_REG3.b, SRC_REG4.b }, B_MASK_REG, [BUFFER_REG, #4, mul vl] // 64
+ st4h { SRC_REG1.h, SRC_REG2.h, SRC_REG3.h, SRC_REG4.h }, H_MASK_REG, [BUFFER_REG, #4, mul vl] // 32
+ st4w { SRC_REG1.s, SRC_REG2.s, SRC_REG3.s, SRC_REG4.s }, S_MASK_REG, [BUFFER_REG, #4, mul vl] // 16
+ st4d { SRC_REG1.d, SRC_REG2.d, SRC_REG3.d, SRC_REG4.d }, D_MASK_REG, [BUFFER_REG, #4, mul vl] // 8
// Total: 120
// Total stores: 52 + 60 + 90 + 120 = 322
@@ -557,4 +557,9 @@ helloworld:
.ascii "Hello, world!\n"
buffer:
- .zero 1024 // Maximum size of an SVE Z register * 4.
+ .zero 2048 // Maximum size of an SVE Z register * 8.
+ // This gives us enough space to use an offset of
+ // #1, mul vl for scalar+immediate/vector+immediate
+ // instructions which lets us check the VL scaling of
+ // offsets in the IR in
+ // tool.drcacheoff.allasm-scattergather-vl-view
diff --git a/clients/drcachesim/tests/offline-allasm-scattergather-vl-view-aarch64.templatex b/clients/drcachesim/tests/offline-allasm-scattergather-vl-view-aarch64.templatex
new file mode 100644
index 00000000000..7e7d070e6f4
--- /dev/null
+++ b/clients/drcachesim/tests/offline-allasm-scattergather-vl-view-aarch64.templatex
@@ -0,0 +1,15 @@
+.*
+#if __ARM_FEATURE_SVE_BITS == 128
+.*
+.*a401a03c ld1b \+0x10\(%x1\)\[1byte\] %p0/z -> %z28\.b
+.*e5d1ec3c st3d %z28\.d %z29\.d %z30\.d %p3 -> \+0x30\(%x1\)\[8byte\]
+#elif __ARM_FEATURE_SVE_BITS == 256
+.*
+.*a401a03c ld1b \+0x20\(%x1\)\[1byte\] %p0/z -> %z28.b
+.*e5d1ec3c st3d %z28\.d %z29\.d %z30\.d %p3 -> \+0x60\(%x1\)\[8byte\]
+#elif __ARM_FEATURE_SVE_BITS == 512
+.*
+.*a401a03c ld1b \+0x40\(%x1\)\[1byte\] %p0/z -> %z28.b
+.*e5d1ec3c st3d %z28\.d %z29\.d %z30\.d %p3 -> \+0xc0\(%x1\)\[8byte\]
+#endif
+.*
diff --git a/clients/drcachesim/tests/offline-view.templatex b/clients/drcachesim/tests/offline-view.templatex
index c1f4943ae9d..c2a3ddecf7c 100644
--- a/clients/drcachesim/tests/offline-view.templatex
+++ b/clients/drcachesim/tests/offline-view.templatex
@@ -7,9 +7,16 @@ Output format:
3 0: +[0-9]+
4 0: +[0-9]+
5 0: +[0-9]+
+#ifdef __ARM_FEATURE_SVE
+ 6 0: +[0-9]+
+ 7 0: +[0-9]+
+ 8 0: +[0-9]+
+ 9 1: +[0-9]+ ifetch .*
+#else
6 0: +[0-9]+
7 0: +[0-9]+
8 1: +[0-9]+ ifetch .*
+#endif
.*
View tool results:
*[0-9]* : total instructions
diff --git a/clients/drcachesim/tools/invariant_checker.cpp b/clients/drcachesim/tools/invariant_checker.cpp
index 66cac4b11c6..73fbd2ce4b5 100644
--- a/clients/drcachesim/tools/invariant_checker.cpp
+++ b/clients/drcachesim/tools/invariant_checker.cpp
@@ -366,6 +366,28 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem
memref.marker.marker_value == shard->stream->get_page_size(),
"Stream interface page size != trace marker");
}
+ if (memref.marker.type == TRACE_TYPE_MARKER &&
+ memref.marker.marker_type == TRACE_MARKER_TYPE_VECTOR_LENGTH) {
+#ifdef AARCH64
+ static const int MAX_VL_BYTES = 256; // SVE's maximum vector length is 2048-bit
+ // Vector length must be a multiple of 16 bytes between 16 and 256.
+ report_if_false(shard,
+ memref.marker.marker_value > 0 &&
+ memref.marker.marker_value <= MAX_VL_BYTES &&
+ memref.marker.marker_value % 16 == 0,
+ "Vector length marker has invalid size");
+
+ const int new_vl_bits = memref.marker.marker_value * 8;
+ if (dr_get_sve_vector_length() != new_vl_bits) {
+ dr_set_sve_vector_length(new_vl_bits);
+ // Changing the vector length can change the IR representation of some SVE
+ // instructions but it doesn't effect any of the metadata that is stored
+ // in decode_cache_ so we don't need to flush the cache.
+ }
+#else
+ report_if_false(shard, false, "Unexpected vector length marker");
+#endif
+ }
if (memref.marker.type == TRACE_TYPE_MARKER &&
memref.marker.marker_type == TRACE_MARKER_TYPE_VERSION) {
shard->trace_version_ = memref.marker.marker_value;
diff --git a/clients/drcachesim/tools/opcode_mix.cpp b/clients/drcachesim/tools/opcode_mix.cpp
index 5389829c5ac..18743ff8a35 100644
--- a/clients/drcachesim/tools/opcode_mix.cpp
+++ b/clients/drcachesim/tools/opcode_mix.cpp
@@ -164,6 +164,17 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref)
" but tool built for " + trace_arch_string(build_target_arch_type());
return false;
}
+ } else if (memref.marker.type == TRACE_TYPE_MARKER &&
+ memref.marker.marker_type == TRACE_MARKER_TYPE_VECTOR_LENGTH) {
+#ifdef AARCH64
+ const int new_vl_bits = memref.marker.marker_value * 8;
+ if (dr_get_sve_vector_length() != new_vl_bits) {
+ dr_set_sve_vector_length(new_vl_bits);
+ // Changing the vector length can change the IR representation of some SVE
+ // instructions but it will never change the opcode so we don't need to
+ // flush the opcode cache.
+ }
+#endif
}
if (!type_is_instr(memref.instr.type) &&
memref.data.type != TRACE_TYPE_INSTR_NO_FETCH) {
diff --git a/clients/drcachesim/tools/view.cpp b/clients/drcachesim/tools/view.cpp
index b98e4d6a165..2d7391dcee6 100644
--- a/clients/drcachesim/tools/view.cpp
+++ b/clients/drcachesim/tools/view.cpp
@@ -443,6 +443,10 @@ view_t::parallel_shard_memref(void *shard_data, const memref_t &memref)
std::cerr << "\n";
break;
case TRACE_MARKER_TYPE_CORE_IDLE: std::cerr << "\n"; break;
+ case TRACE_MARKER_TYPE_VECTOR_LENGTH:
+ std::cerr << "\n";
+ break;
default:
std::cerr << "\n";
diff --git a/clients/drcachesim/tracer/instru_offline.cpp b/clients/drcachesim/tracer/instru_offline.cpp
index 96ff48a21f2..bf3e63564e5 100644
--- a/clients/drcachesim/tracer/instru_offline.cpp
+++ b/clients/drcachesim/tracer/instru_offline.cpp
@@ -397,6 +397,15 @@ offline_instru_t::append_thread_header(byte *buf_ptr, thread_id_t tid,
new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_CACHE_LINE_SIZE,
proc_get_cache_line_size());
new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_PAGE_SIZE, dr_page_size());
+#if defined(AARCH64)
+ // TRACE_MARKER_TYPE_VECTOR_LENGTH is emitted in the thread header to establish the
+ // initial vector length for the thread, but the marker can also be emitted again
+ // later if the app changes the vector length.
+ if (proc_has_feature(FEATURE_SVE)) {
+ new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_VECTOR_LENGTH,
+ proc_get_vector_length_bytes());
+ }
+#endif
return (int)(new_buf - buf_ptr);
}
diff --git a/clients/drcachesim/tracer/instru_online.cpp b/clients/drcachesim/tracer/instru_online.cpp
index c2d78050f8d..58b234f915f 100644
--- a/clients/drcachesim/tracer/instru_online.cpp
+++ b/clients/drcachesim/tracer/instru_online.cpp
@@ -179,6 +179,15 @@ online_instru_t::append_thread_header(byte *buf_ptr, thread_id_t tid,
new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_CACHE_LINE_SIZE,
proc_get_cache_line_size());
new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_PAGE_SIZE, dr_page_size());
+#if defined(AARCH64)
+ // TRACE_MARKER_TYPE_VECTOR_LENGTH is emitted in the thread header to establish the
+ // initial vector length for the thread, but the marker can also be emitted again
+ // later if the app changes the vector length.
+ if (proc_has_feature(FEATURE_SVE)) {
+ new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_VECTOR_LENGTH,
+ proc_get_vector_length_bytes());
+ }
+#endif
return (int)(new_buf - buf_ptr);
}
diff --git a/clients/drcachesim/tracer/raw2trace.cpp b/clients/drcachesim/tracer/raw2trace.cpp
index 736f7c8f3b9..dafcc254780 100644
--- a/clients/drcachesim/tracer/raw2trace.cpp
+++ b/clients/drcachesim/tracer/raw2trace.cpp
@@ -883,6 +883,23 @@ raw2trace_t::process_marker_additionally(raw2trace_thread_data_t *tdata,
log(2, "Maybe-blocking syscall %zu\n", marker_val);
buf += trace_metadata_writer_t::write_marker(
buf, TRACE_MARKER_TYPE_MAYBE_BLOCKING_SYSCALL, 0);
+ } else if (marker_type == TRACE_MARKER_TYPE_VECTOR_LENGTH) {
+#ifdef AARCH64
+ log(4,
+ "Setting SVE vector length for thread " INT64_FORMAT_STRING " to %zu bytes\n",
+ tdata->tid, marker_val);
+
+ const int new_vl_bits = marker_val * 8;
+ if (dr_get_sve_vector_length() != new_vl_bits) {
+ dr_set_sve_vector_length(new_vl_bits);
+ // Some SVE load/store instructions have an offset which is scaled by a value
+ // that depends on the vector length. These instructions will need to be
+ // re-decoded after the vector length changes.
+ *flush_decode_cache = true;
+ }
+#else
+ log(2, "Ignoring unexpected dynamic vector length marker\n");
+#endif
}
return true;
}
@@ -932,6 +949,7 @@ raw2trace_t::read_header(raw2trace_thread_data_t *tdata,
header->cache_line_size = proc_get_cache_line_size();
unread_last_entry(tdata);
}
+
return true;
}
@@ -3783,17 +3801,6 @@ raw2trace_t::raw2trace_t(
decode_cache_.reserve(cache_count);
for (int i = 0; i < cache_count; ++i)
decode_cache_.emplace_back(cache_count);
-
-#if defined(AARCH64)
- // TODO i#6556, i#1684: The decoder uses a global sve_veclen variable to store the
- // vector length value it uses when decoding. drdecodelib ends up being linked into
- // drcachesim twice: once into the drcachesim executable, and one into libdynamorio.
- // When we call dr_standalone_init() above it will initialize the version of
- // sve_veclen in libdynamorio, but not the one in drcachesim.
- // Unfortunately it is the version of sve_veclen in drcachesim that gets used when
- // decoding in raw2trace so we need to explicitly initialize its sve_veclen here.
- dr_set_sve_vector_length(proc_get_vector_length_bytes() * 8);
-#endif
}
raw2trace_t::~raw2trace_t()
diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt
index 102d14794a7..bb16d436851 100644
--- a/suite/tests/CMakeLists.txt
+++ b/suite/tests/CMakeLists.txt
@@ -4241,6 +4241,10 @@ if (BUILD_CLIENTS)
torunonly_drcacheoff(view ${ci_shared_app} ""
"@-simulator_type@view@-sim_refs@16384" "")
+ unset(tool.drcacheoff.view_rawtemp) # Use preprocessor
+ if (AARCH64 AND proc_supports_sve)
+ set(tool.drcacheoff.view_runsve 1)
+ endif ()
set(tool.drcacheoff.func_view_full_run ON) # Fails on Windows if truncated.
torunonly_drcacheoff(func_view common.fib "-record_function fib|1"
@@ -4503,6 +4507,22 @@ if (BUILD_CLIENTS)
"allasm-scattergather-basic-counts-${ARCH_NAME}")
endif ()
+ if (UNIX AND AARCH64 AND proc_supports_sve)
+ torunonly_drcacheoff(allasm-scattergather-vl-view allasm_scattergather
+ "" "@-simulator_type@view" "")
+ unset(tool.drcacheoff.allasm-scattergather-vl-view_rawtemp) # use preprocessor
+ set(tool.drcacheoff.allasm-scattergather-vl-view_runsve 1)
+ set(tool.drcacheoff.allasm-scattergather-vl-view_expectbase
+ "offline-allasm-scattergather-vl-view-${ARCH_NAME}")
+
+ torunonly_drcachesim(allasm-scattergather-vl-view allasm_scattergather
+ "-simulator_type view" "")
+ unset(tool.drcachesim.allasm-scattergather-vl-view_rawtemp) # use preprocessor
+ set(tool.drcachesim.allasm-scattergather-vl-view_runsve 1)
+ set(tool.drcachesim.allasm-scattergather-vl-view_expectbase
+ "allasm-scattergather-vl-view-${ARCH_NAME}")
+ endif ()
+
if (UNIX AND X86 AND X64)
torunonly_drcacheoff(allasm-repstr-basic-counts allasm_repstr
"" "@-simulator_type@basic_counts" "")