-
Notifications
You must be signed in to change notification settings - Fork 561
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
i#2440: Macros for creating conditional instructions #4500
Changes from all commits
14bfb4b
08ce2c4
3c13a13
16f30ba
4d8f379
8a9a574
feecbf1
e4cf5af
8195d88
f985439
c84871d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -743,7 +743,85 @@ opnd_create_base_disp_aarch64(reg_id_t base_reg, reg_id_t index_reg, | |
CLIENT_ASSERT(false, "opnd_create_base_disp_aarch64: invalid extend type"); | ||
return opnd; | ||
} | ||
#endif | ||
|
||
opnd_t | ||
opnd_create_cond(int cond) | ||
{ | ||
/* FIXME i#1569: Move definition of dr_pred_type_t to opnd.h for AArch64 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please create an issue separate from the AArch64 master issue #1569 to track this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Created: #4502. |
||
* to avoid using int instead of dr_pred_type_t */ | ||
switch (cond) { | ||
case DR_PRED_EQ: return opnd_create_immed_uint(0b0000, OPSZ_4b); | ||
case DR_PRED_NE: return opnd_create_immed_uint(0b0001, OPSZ_4b); | ||
case DR_PRED_CS: return opnd_create_immed_uint(0b0010, OPSZ_4b); | ||
case DR_PRED_CC: return opnd_create_immed_uint(0b0011, OPSZ_4b); | ||
case DR_PRED_MI: return opnd_create_immed_uint(0b0100, OPSZ_4b); | ||
case DR_PRED_PL: return opnd_create_immed_uint(0b0101, OPSZ_4b); | ||
case DR_PRED_VS: return opnd_create_immed_uint(0b0110, OPSZ_4b); | ||
case DR_PRED_VC: return opnd_create_immed_uint(0b0111, OPSZ_4b); | ||
case DR_PRED_HI: return opnd_create_immed_uint(0b1000, OPSZ_4b); | ||
case DR_PRED_LS: return opnd_create_immed_uint(0b1001, OPSZ_4b); | ||
case DR_PRED_GE: return opnd_create_immed_uint(0b1010, OPSZ_4b); | ||
case DR_PRED_LT: return opnd_create_immed_uint(0b1011, OPSZ_4b); | ||
case DR_PRED_GT: return opnd_create_immed_uint(0b1100, OPSZ_4b); | ||
case DR_PRED_LE: return opnd_create_immed_uint(0b1101, OPSZ_4b); | ||
case DR_PRED_AL: return opnd_create_immed_uint(0b1110, OPSZ_4b); | ||
case DR_PRED_NV: return opnd_create_immed_uint(0b1111, OPSZ_4b); | ||
default: | ||
CLIENT_ASSERT(false, "invalid condition constant"); | ||
return opnd_create_null(); | ||
} | ||
} | ||
|
||
int | ||
opnd_get_cond(opnd_t opnd) | ||
{ | ||
/* FIXME i#1569: Move definition of dr_pred_type_t to opnd.h for AArch64 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As above. |
||
* to avoid using int instead of dr_pred_type_t */ | ||
switch (((uint64)opnd_get_immed_int(opnd) & 0xfu)) { | ||
case 0b0000: return DR_PRED_EQ; | ||
case 0b0001: return DR_PRED_NE; | ||
case 0b0010: return DR_PRED_CS; | ||
case 0b0011: return DR_PRED_CC; | ||
case 0b0100: return DR_PRED_MI; | ||
case 0b0101: return DR_PRED_PL; | ||
case 0b0110: return DR_PRED_VS; | ||
case 0b0111: return DR_PRED_VC; | ||
case 0b1000: return DR_PRED_HI; | ||
case 0b1001: return DR_PRED_LS; | ||
case 0b1010: return DR_PRED_GE; | ||
case 0b1011: return DR_PRED_LT; | ||
case 0b1100: return DR_PRED_GT; | ||
case 0b1101: return DR_PRED_LE; | ||
case 0b1110: return DR_PRED_AL; | ||
case 0b1111: return DR_PRED_NV; | ||
} | ||
return DR_PRED_NONE; | ||
} | ||
|
||
opnd_t | ||
opnd_invert_cond(opnd_t opnd) | ||
{ | ||
switch (opnd_get_cond(opnd)) { | ||
case DR_PRED_EQ: return opnd_create_cond(DR_PRED_NE); | ||
case DR_PRED_NE: return opnd_create_cond(DR_PRED_EQ); | ||
case DR_PRED_CS: return opnd_create_cond(DR_PRED_CC); | ||
case DR_PRED_CC: return opnd_create_cond(DR_PRED_CS); | ||
case DR_PRED_MI: return opnd_create_cond(DR_PRED_PL); | ||
case DR_PRED_PL: return opnd_create_cond(DR_PRED_MI); | ||
case DR_PRED_VS: return opnd_create_cond(DR_PRED_VC); | ||
case DR_PRED_VC: return opnd_create_cond(DR_PRED_VS); | ||
case DR_PRED_HI: return opnd_create_cond(DR_PRED_LS); | ||
case DR_PRED_LS: return opnd_create_cond(DR_PRED_HI); | ||
case DR_PRED_GE: return opnd_create_cond(DR_PRED_LT); | ||
case DR_PRED_LT: return opnd_create_cond(DR_PRED_GE); | ||
case DR_PRED_GT: return opnd_create_cond(DR_PRED_LE); | ||
case DR_PRED_LE: return opnd_create_cond(DR_PRED_GT); | ||
default: | ||
CLIENT_ASSERT(false, "invalid condition constant"); | ||
return opnd_create_null(); | ||
} | ||
} | ||
#endif /* AARCH64 */ | ||
|
||
#undef opnd_get_base | ||
#undef opnd_get_disp | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this needs some thought as to how to integrate with existing interfaces and code, especially with ARM code.
The API already has
instr_invert_predicate
. I would agree it seems less of an instr-level operation, but it would be confusing to have two different sets of interfaces.The API also already has predicates passed as dr_pred_type_t to things like
XINST_CREATE_jump_cond
in arm/instr_create.h. If a new type of operand, even if it's an immediate underneath is created, it would be best to use it everywhere. Whatever is decided, it seems best to have the same approach for all platforms, rather than special AArch64-only functions that don't match ARM functions yet do the same thing. That makes for confusing interfaces and complex cross-platform clients.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be a very big change to move all the condition-related interfaces that currently exist on the level of instructions to the level of operands. I don't think I know enough about other platforms to come up with a uniform solution, and I don't know if such change actually makes sense for x86 (after all
instr_invert_predicate
was create for some reason).Anyway, I'd like to do it in steps rather than in a single big pull request which is very difficult to merge. At least some guidance, suggestions or ideas would be helpful.
I would start with #4502, but again as a separate patch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right now I could remove
opnd_invert_cond
and useinstr_invert_predicate
aroundinstr_create_...
to flip the operand, however I'm not sure ifinstr_invert_predicate
is actually implemented for AArch64.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See https://dynamorio.org/dynamorio_docs/API_BT.html#sec_predication which lists one key current use of predicates/conditions:
instr_{get,set}_predicate
;INSTR_PRED
short form. While perhaps this could have been an operand (along with other things like condition code effects: tradeoffs with matching ISA conventions), it is not. Yet we want to be able to invert these predicates too. The currentinstr_invert_predicate
does not actually operate oninstr_t
: it operates ondr_pred_type_t
and thus operates on the instr-level predicates as well as operand-level.Another current use, similar to the macros added here:
XINST_CREATE_jump_cond()
. This, on x86, arm, and aarch64, takes in adr_pred_type_t
-typed value. I would think that the signature/operation of this instruction should match things like theINSTR_CREATE_ccmn
added here. One shouldn't take adr_pred_type_t
while the other takes anopnd_t
. Maybe the argument could be made that we should change theXINST_CREATE_jump_cond
to take anopnd_t
of this new type: though note that the condition turns into different opcodes on x86, the instruction-level predicate field on arm, and an operand on aarch64, so they are not all treating it as an operand. See also other high-level discussions on what should constitute an operand versus an opcode or sub-opcode: i#4329: Implement effective address of DC cache operations #4386 (comment). Perhaps these aarch64 comparison should be using separate opcodes, rather than operands that change behavior which goes against what operands are supposed to do. Having a single opcode doing all these different comparisons is sort of like having one opcode "arithmetic" with an operand that says whether it's an add or a subtract or whatnot, instead of separate add and subtract opcodes.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Continuing in the thread of the last few sentences: xref other issues where we want to split up the current aarch64 opcodes which have behavior-controlling integer operands which are perhaps better as separate opcodes rather than operands: #4388, #4393.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why only these instructions? All conditional instructions in AArch64 (all mentioned in this PR) have
cond
operand.Also, in my opinion, inventing non-documented opcodes may create extra confusion. If some notion is not common to the rest of the architectures it's not the reason to disallow low-level access as documented. I think in DynamoRIO, when we work at the level of instructions and operands, we don't really have the benefit of "write once run everywhere". In my understanding, the point of the tool is to allow working on the lowest possible level.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How about compare and swap instructions (e.g. CAS)? The behaviour depends on the value of the register being equal to the value loaded from memory. It is similar to the conditional instructions, although in the latter case the comparison has happened before.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It might be useful to think of the pseudocode that specifies instruction behaviors. CCMP is specified by one pseudocode function, and the operands are inputs to that function. That single behavior function acts uniformly on all values of the operands. So it makes sense to treat it as one instruction, rather than single out one operand and split it into one instruction for each value of that operand.
The situation with SYS instructions is very different (similar to CDP instructions in 32-bit Arm) - they really are separate functions, selected by a function code, and it does make much more sense to split them out, at least when their specific functions are being differently handled by DynamoRIO.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If I understand correctly we have 3 choices:
1 - Create a new operand for
cond
.2 - Split out the opcodes into specific
cond
types, e.g.ccmp_eq
,ccmp_ne
etc.3 - Use existing
dr_pred_type_t
as inXINST_CREATE_jump_cond
(alias forINSTR_CREATE_bcond
).1 requires a non-trivial amount of work for a type which has essentially the same intent as
dr_pred_type_t
.2 is not consistent with AArch64's existing codec. We may run into problems as I did in #4386
3 would be my choice. It makes use of an existing type with the same intent which has already been implemented and does not involve a lot of codec reworking.
e.g.
Which users can call with:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@AssadHashmi highlights what is perhaps the main issue here: using a simple and consistent set of abstractions and types in the IR. If we take each encoded behavior immediate integer that AArch64 presents as an operand (but I would call a sub-opcode and another ISA might well make part of the opcode, such as x86 conditional moves) and place it as a separate operand in DR's IR, we then need to make a new operand type with a new set of rules and enums and handling code and introduce a new concept to tool writers for what could logically fit into existing concepts. We could end up with a dozen new operand types without much benefit but incurring non-trivial costs. Using an existing IR concept keeps the IR simpler and cleaner and more consistent.
We can make it as easy as possible, though. It does make a difference: each IR choice that lets a tool or analysis library be more platform-agnostic makes tool writing easier.
None of any of the discussion here has any bearing on whether a tool writer can generate any precise machine code desired: we're only talking about how to represent ISA concepts in the DR IR. None of the choices affect whether the ISA concept can be represented or whether the tool writer can generate it. And while the IR should be more abstracted and cross-platform, we can and do have layers that are closer to the assembly conventions of that ISA: the INSTR_CREATE_ macros try to look more like assembly, disassembly has both an abstracted DR format and an ISA format, and we'd love to have an even closer layer where assembler strings are converted into IR (can't seem to find it in the tracker...#4510) when writing code for a specific machine. But code analyzing and manipulating at the IR level should have as few types and special operand concepts as possible.