Skip to content

Commit

Permalink
i#5994 opcode_mix, part1: add categories to opcode_mix output (#6512)
Browse files Browse the repository at this point in the history
Groups instructions that belong to the same categories together, and
prints (to std::cerr) a count of them at the end of opcode counts.

Issue: #5994
  • Loading branch information
edeiana authored Feb 14, 2024
1 parent 1f306f7 commit 4627576
Show file tree
Hide file tree
Showing 8 changed files with 193 additions and 21 deletions.
2 changes: 2 additions & 0 deletions api/docs/release.dox
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,8 @@ Further non-compatibility-affecting changes include:
- Added instr_is_opnd_store_source().
- Added kernel context switch sequence injection support to the drmemtrace scheduler.
- Added dr_running_under_dynamorio().
- Added instr_get_category_name() API that returns the string version (as char*) of a
category.
- Added #dynamorio::drmemtrace::TRACE_MARKER_TYPE_VECTOR_LENGTH marker to indicate the
current vector length for architectures with a hardware defined or runtime changeable
vector length (such as AArch64's SVE scalable vectors).
Expand Down
32 changes: 32 additions & 0 deletions clients/drcachesim/tests/offline-opcode_categories.templatex
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
Hello world!
Hello world!
Hello world!
Hello world!
Hello world!
Hello world!
Hello world!
Hello world!
Hello world!
Hello world!
Hello world!
Hello world!
Hello world!
Hello world!
Hello world!
Hello world!
Opcode mix tool results:
133 : total executed instructions
34 : mov
17 : mov
17 : syscall
16 : sub
16 : cmp
16 : jnz
16 : lea
1 : and

4 : sets of categories
51 : move
33 : branch
33 : math
16 : load
58 changes: 52 additions & 6 deletions clients/drcachesim/tools/opcode_mix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref)
decode_pc = const_cast<app_pc>(memref.instr.encoding);
if (memref.instr.encoding_is_new) {
// The code may have changed: invalidate the cache.
shard->worker->opcode_cache.erase(trace_pc);
shard->worker->opcode_data_cache.erase(trace_pc);
}
} else {
// Legacy trace support where we need the binaries.
Expand Down Expand Up @@ -221,9 +221,11 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref)
}
}
int opcode;
auto cached_opcode = shard->worker->opcode_cache.find(trace_pc);
if (cached_opcode != shard->worker->opcode_cache.end()) {
opcode = cached_opcode->second;
uint category;
auto cached_opcode_category = shard->worker->opcode_data_cache.find(trace_pc);
if (cached_opcode_category != shard->worker->opcode_data_cache.end()) {
opcode = cached_opcode_category->second.opcode;
category = cached_opcode_category->second.category;
} else {
instr_t instr;
instr_init(dcontext_.dcontext, &instr);
Expand All @@ -236,10 +238,12 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref)
return false;
}
opcode = instr_get_opcode(&instr);
shard->worker->opcode_cache[trace_pc] = opcode;
category = instr_get_category(&instr);
shard->worker->opcode_data_cache[trace_pc] = opcode_data_t(opcode, category);
instr_free(dcontext_.dcontext, &instr);
}
++shard->opcode_counts[opcode];
++shard->category_counts[category];
return true;
}

Expand All @@ -263,7 +267,35 @@ opcode_mix_t::process_memref(const memref_t &memref)
static bool
cmp_val(const std::pair<int, int64_t> &l, const std::pair<int, int64_t> &r)
{
return (l.second > r.second);
return (l.second > r.second) || (l.second == r.second && l.first < r.first);
}

std::string
opcode_mix_t::get_category_names(uint category)
{
std::string category_name;
if (category == DR_INSTR_CATEGORY_UNCATEGORIZED) {
category_name += instr_get_category_name(DR_INSTR_CATEGORY_UNCATEGORIZED);
return category_name;
}

const uint max_mask = 0x80000000;
for (uint mask = 0x1; mask <= max_mask; mask <<= 1) {
if (TESTANY(mask, category)) {
category_name += " ";
category_name +=
instr_get_category_name(static_cast<dr_instr_category_t>(mask));
}

/*
* Guard against 32 bit overflow.
*/
if (mask == max_mask) {
break;
}
}

return category_name;
}

bool
Expand All @@ -278,6 +310,9 @@ opcode_mix_t::print_results()
for (const auto &keyvals : shard.second->opcode_counts) {
total.opcode_counts[keyvals.first] += keyvals.second;
}
for (const auto &keyvals : shard.second->category_counts) {
total.category_counts[keyvals.first] += keyvals.second;
}
}
}
std::cerr << TOOL_NAME << " results:\n";
Expand All @@ -289,6 +324,17 @@ opcode_mix_t::print_results()
std::cerr << std::setw(15) << keyvals.second << " : " << std::setw(9)
<< decode_opcode_name(keyvals.first) << "\n";
}
std::cerr << "\n";
std::cerr << std::setw(15) << total.category_counts.size()
<< " : sets of categories\n";
std::vector<std::pair<uint, int64_t>> sorted_category_counts(
total.category_counts.begin(), total.category_counts.end());
std::sort(sorted_category_counts.begin(), sorted_category_counts.end(), cmp_val);
for (const auto &keyvals : sorted_category_counts) {
std::cerr << std::setw(15) << keyvals.second << " : " << std::setw(9)
<< get_category_names(keyvals.first) << "\n";
}

return true;
}

Expand Down
28 changes: 27 additions & 1 deletion clients/drcachesim/tools/opcode_mix.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include <memory>
#include <mutex>
#include <string>
#include <utility>
#include <unordered_map>

#include "dr_api.h" // Must be before trace_entry.h from analysis_tool.h.
Expand Down Expand Up @@ -82,8 +83,32 @@ class opcode_mix_t : public analysis_tool_t {
parallel_shard_error(void *shard_data) override;

protected:
std::string
get_category_names(uint category);

struct opcode_data_t {
opcode_data_t()
: opcode(OP_INVALID)
, category(DR_INSTR_CATEGORY_UNCATEGORIZED)
{
}
opcode_data_t(int opcode, uint category)
: opcode(opcode)
, category(category)
{
}
int opcode;
/*
* The category field is a uint instead of a dr_instr_category_t because
* multiple category bits can be set when an instruction belongs to more
* than one category. We assume 32 bits (i.e., 32 categories) is enough
* to be future-proof.
*/
uint category;
};

struct worker_data_t {
std::unordered_map<app_pc, int> opcode_cache;
std::unordered_map<app_pc, opcode_data_t> opcode_data_cache;
};

struct shard_data_t {
Expand All @@ -103,6 +128,7 @@ class opcode_mix_t : public analysis_tool_t {
worker_data_t *worker;
int64_t instr_count;
std::unordered_map<int, int64_t> opcode_counts;
std::unordered_map<uint, int64_t> category_counts;
std::string error;
app_pc last_trace_module_start;
size_t last_trace_module_size;
Expand Down
11 changes: 11 additions & 0 deletions core/ir/instr_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -1911,6 +1911,8 @@ instr_is_rep_string_op(instr_t *instr);

/**
* Indicates which category the instruction corresponds to.
* Update instr_get_category_name() in core/ir/instr_shared.c
* when adding new categories in this enum.
*/
typedef enum {
DR_INSTR_CATEGORY_UNCATEGORIZED = 0x0, /**< Uncategorized. */
Expand All @@ -1937,6 +1939,15 @@ typedef enum {
DR_FP_MATH, /**< Performs arithmetic or conditional operations. */
} dr_fp_type_t;

DR_API
/**
* Assumes \p category is a DR_INSTR_CATEGORY_ constant.
* See #dr_instr_category_t.
* Returns \p category name in string format.
*/
const char *
instr_get_category_name(dr_instr_category_t category);

DR_API
/**
* Returns true iff \p instr is a floating point instruction.
Expand Down
19 changes: 19 additions & 0 deletions core/ir/instr_shared.c
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,25 @@ instr_get_category(instr_t *instr)
/* in rest of file, directly de-reference for performance (PR 622253) */
#define instr_get_category inlined_instr_get_category

const char *
instr_get_category_name(dr_instr_category_t category)
{
switch (category) {
case DR_INSTR_CATEGORY_UNCATEGORIZED: return "uncategorized";
case DR_INSTR_CATEGORY_FP: return "fp";
case DR_INSTR_CATEGORY_LOAD: return "load";
case DR_INSTR_CATEGORY_STORE: return "store";
case DR_INSTR_CATEGORY_BRANCH: return "branch";
case DR_INSTR_CATEGORY_SIMD: return "simd";
case DR_INSTR_CATEGORY_STATE: return "state";
case DR_INSTR_CATEGORY_MOVE: return "move";
case DR_INSTR_CATEGORY_CONVERT: return "convert";
case DR_INSTR_CATEGORY_MATH: return "math";
case DR_INSTR_CATEGORY_OTHER: return "other";
default: return "";
}
}

static inline void
instr_being_modified(instr_t *instr, bool raw_bits_valid)
{
Expand Down
3 changes: 3 additions & 0 deletions suite/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4254,6 +4254,9 @@ if (BUILD_CLIENTS)
"@-simulator_type@func_view" "only_5")
endif (NOT RISCV64)
if (DR_HOST_X86 AND DR_HOST_X64 AND LINUX)
torunonly_drcacheoff(opcode_categories allasm_x86_64 ""
"@-simulator_type@opcode_mix" "")

# Requires sudo to access pagemap.
# XXX: Should we not enable this outside of the Github suite where we know
# we have passwordless sudo? The pause for a password may cause problems
Expand Down
61 changes: 47 additions & 14 deletions suite/tests/api/drdecode_x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,16 @@

#include "configure.h"
#include "dr_api.h"
#include "tools.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define GD GLOBAL_DCONTEXT

#define ASSERT(x) \
((void)((!(x)) ? (printf("ASSERT FAILURE: %s:%d: %s\n", __FILE__, __LINE__, #x), \
abort(), 0) \
#define ASSERT(x) \
((void)((!(x)) ? (print("ASSERT FAILURE: %s:%d: %s\n", __FILE__, __LINE__, #x), \
abort(), 0) \
: 0))

#define BUFFER_SIZE_BYTES(buf) sizeof(buf)
Expand Down Expand Up @@ -155,13 +157,25 @@ test_noalloc(void)
*/
}

#define CHECK_CATEGORY(dcontext, instr, pc, category) \
ASSERT(instr_encode(dcontext, instr, pc) - pc < BUFFER_SIZE_ELEMENTS(pc)); \
instr_reset(dcontext, instr); \
instr_set_operands_valid(instr, true); \
ASSERT(decode(dcontext, pc, instr) != NULL); \
ASSERT(instr_get_category(instr) == category); \
instr_destroy(dcontext, instr);
#define CHECK_CATEGORY(dcontext, instr, pc, categories, category_names) \
do { \
byte *instr_encoded_pc = instr_encode(dcontext, instr, pc); \
ASSERT(instr_encoded_pc - pc < BUFFER_SIZE_ELEMENTS(pc)); \
instr_reset(dcontext, instr); \
instr_set_operands_valid(instr, true); \
byte *instr_decoded_pc = decode(dcontext, pc, instr); \
ASSERT(instr_decoded_pc != NULL); \
for (int i = 0; i < BUFFER_SIZE_ELEMENTS(categories); ++i) { \
if (categories[i] == DR_INSTR_CATEGORY_UNCATEGORIZED) { \
ASSERT(instr_get_category(instr) == categories[i]); \
} else { \
ASSERT(TESTANY(categories[i], instr_get_category(instr))); \
} \
ASSERT(strncmp(instr_get_category_name(categories[i]), category_names[i], \
strlen(category_names[i])) == 0); \
} \
instr_destroy(dcontext, instr); \
} while (0);

static void
test_categories(void)
Expand All @@ -172,17 +186,36 @@ test_categories(void)
/* 55 OP_mov_ld */
instr = XINST_CREATE_load(GD, opnd_create_reg(DR_REG_XAX),
OPND_CREATE_MEMPTR(DR_REG_XAX, 42));
CHECK_CATEGORY(GD, instr, buf, DR_INSTR_CATEGORY_LOAD);
const dr_instr_category_t categories_load[] = { DR_INSTR_CATEGORY_LOAD };
const char *category_names_load[] = { "load" };
CHECK_CATEGORY(GD, instr, buf, categories_load, category_names_load);

/* 14 OP_cmp */
instr =
XINST_CREATE_cmp(GD, opnd_create_reg(DR_REG_EAX), opnd_create_reg(DR_REG_EAX));
CHECK_CATEGORY(GD, instr, buf, DR_INSTR_CATEGORY_MATH);
const dr_instr_category_t categories_cmp[] = { DR_INSTR_CATEGORY_MATH };
const char *category_names_cmp[] = { "math" };
CHECK_CATEGORY(GD, instr, buf, categories_cmp, category_names_cmp);

/* 46 OP_jmp */
instr_t *after_callee = INSTR_CREATE_label(GD);
instr = XINST_CREATE_jump(GD, opnd_create_instr(after_callee));
CHECK_CATEGORY(GD, instr, buf, DR_INSTR_CATEGORY_BRANCH);
const dr_instr_category_t categories_jmp[] = { DR_INSTR_CATEGORY_BRANCH };
const char *category_names_jmp[] = { "branch" };
CHECK_CATEGORY(GD, instr, buf, categories_jmp, category_names_jmp);

/* OP_fwait */
instr = INSTR_CREATE_fwait(GD);
const dr_instr_category_t categories_fwait[] = { DR_INSTR_CATEGORY_FP,
DR_INSTR_CATEGORY_STATE };
const char *category_names_fwait[] = { "fp", "state" };
CHECK_CATEGORY(GD, instr, buf, categories_fwait, category_names_fwait);

/* OP_in */
instr = INSTR_CREATE_in_1(GD);
const dr_instr_category_t categories_in[] = { DR_INSTR_CATEGORY_UNCATEGORIZED };
const char *category_names_in[] = { "uncategorized" };
CHECK_CATEGORY(GD, instr, buf, categories_in, category_names_in);
}

static void
Expand Down Expand Up @@ -241,7 +274,7 @@ main()

test_store_source();

printf("done\n");
print("done\n");

return 0;
}

0 comments on commit 4627576

Please sign in to comment.