Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

i#5365 AArch64: Fix 0 size read/write records in drmemtrace #6544

Merged
merged 5 commits into from
Jan 18, 2024
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 121 additions & 2 deletions clients/drcachesim/tracer/raw2trace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2882,6 +2882,82 @@ raw2trace_t::set_instr_summary_flags(raw2trace_thread_data_t *tdata, uint64 modi
return true;
jackgallagher-arm marked this conversation as resolved.
Show resolved Hide resolved
}

#if defined(AARCH64)
/* TODO i#5365: append_bb_entries() takes the size of the scatter/gather memory operand
* to be the per-element value and uses that to create the read/write memref entries.
* The AArch64 IR currently uses the maximum amount of data transferred by the instruction
* (number of elements * per element size) instead so until we change the codec to use the
jackgallagher-arm marked this conversation as resolved.
Show resolved Hide resolved
* per-element size we need to use this function to set the per-element size based on the
* instruction opcode.
* When we have made the codec/IR changes this function can be removed.
jackgallagher-arm marked this conversation as resolved.
Show resolved Hide resolved
*/
opnd_size_t
get_aarch64_scatter_gather_value_size(int opcode)
{
switch (opcode) {
case OP_ld1b:
case OP_ld1sb:
case OP_ldff1b:
case OP_ldnf1b:
case OP_ldnt1b:
case OP_ld1rqb:
case OP_ld2b:
case OP_ld3b:
case OP_ld4b:
case OP_st1b:
case OP_stnt1b:
case OP_st2b:
case OP_st3b:
case OP_st4b: return OPSZ_1;
case OP_ld1h:
case OP_ld1sh:
case OP_ldff1h:
case OP_ldnf1h:
case OP_ldnt1h:
case OP_ld1rqh:
case OP_ld2h:
case OP_ld3h:
case OP_ld4h:
case OP_st1h:
case OP_stnt1h:
case OP_st2h:
case OP_st3h:
case OP_st4h: return OPSZ_2;
case OP_ld1w:
case OP_ld1sw:
case OP_ldff1w:
case OP_ldnf1w:
case OP_ldnt1w:
case OP_ld1rqw:
case OP_ld2w:
case OP_ld3w:
case OP_ld4w:
case OP_st1w:
case OP_stnt1w:
case OP_st2w:
case OP_st3w:
case OP_st4w: return OPSZ_4;
case OP_ld1d:
case OP_ldff1d:
case OP_ldnf1d:
case OP_ldnt1d:
case OP_ld1rqd:
case OP_ld2d:
case OP_ld3d:
case OP_ld4d:
case OP_st1d:
case OP_stnt1d:
case OP_st2d:
case OP_st3d:
case OP_st4d: return OPSZ_8;
}
DR_ASSERT_MSG(
false,
"Instruction is not a scatter/gather/predicated contiguous load/store operation");
return OPSZ_0;
}
#endif // defined(AARCH64)

bool
instr_summary_t::construct(void *dcontext, app_pc block_start, DR_PARAM_INOUT app_pc *pc,
app_pc orig_pc, DR_PARAM_OUT instr_summary_t *desc,
Expand Down Expand Up @@ -2962,15 +3038,47 @@ instr_summary_t::construct(void *dcontext, app_pc block_start, DR_PARAM_INOUT ap
if (reads_memory || writes_memory) {
for (int i = 0, e = instr_num_srcs(instr); i < e; ++i) {
opnd_t op = instr_get_src(instr, i);
if (opnd_is_memory_reference(op))
if (opnd_is_memory_reference(op)) {
#if defined(AARCH64)
/* TODO i#5365: append_bb_entries() takes the size of the scatter/gather
* memory operand to be the per-element value and uses that to create the
* read/write memref entries.
* The AArch64 IR currently uses the maximum amount of data transferred
* by the instruction (number of elements * per element size) instead so
* until we change the codec to use the per-element size we need to fix
* it up here.
*/
if (desc->is_scatter_or_gather()) {
opnd_set_size(
&op,
get_aarch64_scatter_gather_value_size(instr_get_opcode(instr)));
}
#endif
desc->mem_srcs_and_dests_.push_back(memref_summary_t(op));
}
}
desc->num_mem_srcs_ = static_cast<uint8_t>(desc->mem_srcs_and_dests_.size());

for (int i = 0, e = instr_num_dsts(instr); i < e; ++i) {
opnd_t op = instr_get_dst(instr, i);
if (opnd_is_memory_reference(op))
if (opnd_is_memory_reference(op)) {
#if defined(AARCH64)
/* TODO i#5365: append_bb_entries() takes the size of the scatter/gather
* memory operand to be the per-element value and uses that to create the
* read/write memref entries.
* The AArch64 IR currently uses the maximum amount of data transferred
* by the instruction (number of elements * per element size) instead so
* until we change the codec to use the per-element size we need to fix
* it up here.
*/
if (desc->is_scatter_or_gather()) {
opnd_set_size(
&op,
get_aarch64_scatter_gather_value_size(instr_get_opcode(instr)));
}
#endif
desc->mem_srcs_and_dests_.push_back(memref_summary_t(op));
}
}
}
return true;
Expand Down Expand Up @@ -3738,6 +3846,17 @@ raw2trace_t::raw2trace_t(
decode_cache_.reserve(cache_count);
for (int i = 0; i < cache_count; ++i)
decode_cache_.emplace_back(cache_count);

#if defined(AARCH64)
// TODO i#5365: The decoder uses a global sve_veclen variable to store the vector
// length value it uses when decoding. drdecodelib ends up being linked into
// drcachesim twice: once into the drcachesim executable, and one into libdynamorio.
jackgallagher-arm marked this conversation as resolved.
Show resolved Hide resolved
// When we call dr_standalone_init() above it will initialize the version of
// sve_veclen in libdynamorio, but not the one in drcachesim.
// Unfortunately it is the version of sve_veclen in drcachesim that gets used when
// decoding in raw2trace so we need to explicitly initialize its sve_veclen here.
dr_set_sve_vector_length(proc_get_vector_length_bytes() * 8);
#endif
}

raw2trace_t::~raw2trace_t()
Expand Down
Loading