Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[JIT] X64 - Centralize peephole optimization for removing redundant mov instructions #85780

Merged
merged 10 commits into from
May 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7373,13 +7373,11 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast)
case GenIntCastDesc::LOAD_ZERO_EXTEND_INT:
ins = INS_mov;
insSize = 4;
canSkip = compiler->opts.OptimizationEnabled() && emit->AreUpper32BitsZero(srcReg);
break;
case GenIntCastDesc::SIGN_EXTEND_INT:
case GenIntCastDesc::LOAD_SIGN_EXTEND_INT:
ins = INS_movsxd;
insSize = 4;
canSkip = compiler->opts.OptimizationEnabled() && emit->AreUpper32BitsSignExtended(srcReg);
break;
#endif
case GenIntCastDesc::COPY:
Expand Down
129 changes: 100 additions & 29 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -506,17 +506,19 @@ bool emitter::IsRexW1EvexInstruction(instruction ins)

#ifdef TARGET_64BIT
//------------------------------------------------------------------------
// AreUpper32BitsZero: check if some previously emitted
// instruction set the upper 32 bits of reg to zero.
// AreUpperBitsZero: check if some previously emitted
// instruction set the upper bits of reg to zero.
//
// Arguments:
// reg - register of interest
// size - the size of data that the given register of interest is working with;
// remaining upper bits of the register that represent a larger size are the bits that are checked for zero
//
// Return Value:
// true if previous instruction zeroed reg's upper 32 bits.
// true if previous instruction zeroed reg's upper bits.
// false if it did not, or if we can't safely determine.
//
bool emitter::AreUpper32BitsZero(regNumber reg)
bool emitter::AreUpperBitsZero(regNumber reg, emitAttr size)
TIHan marked this conversation as resolved.
Show resolved Hide resolved
{
// Only allow GPRs.
// If not a valid register, then return false.
Expand Down Expand Up @@ -548,17 +550,27 @@ bool emitter::AreUpper32BitsZero(regNumber reg)
case INS_movsxd:
return PEEPHOLE_ABORT;

// movzx always zeroes the upper 32 bits.
case INS_movzx:
result = true;
if ((size == EA_1BYTE) || (size == EA_2BYTE))
{
result = (id->idOpSize() <= size);
}
// movzx always zeroes the upper 32 bits.
else if (size == EA_4BYTE)
{
result = true;
}
return PEEPHOLE_ABORT;

default:
break;
}

// otherwise rely on operation size.
result = (id->idOpSize() == EA_4BYTE);
if (size == EA_4BYTE)
{
result = (id->idOpSize() == EA_4BYTE);
}
return PEEPHOLE_ABORT;
}
else
Expand All @@ -572,15 +584,18 @@ bool emitter::AreUpper32BitsZero(regNumber reg)

//------------------------------------------------------------------------
// AreUpper32BitsSignExtended: check if some previously emitted
// instruction sign-extended the upper 32 bits.
// instruction sign-extended the upper bits.
//
// Arguments:
// reg - register of interest
// size - the size of data that the given register of interest is working with;
// remaining upper bits of the register that represent a larger size are the bits that are checked for
// sign-extended
//
// Return Value:
// true if previous instruction upper 32 bits are sign-extended.
// true if previous instruction upper bits are sign-extended.
// false if it did not, or if we can't safely determine.
bool emitter::AreUpper32BitsSignExtended(regNumber reg)
bool emitter::AreUpperBitsSignExtended(regNumber reg, emitAttr size)
TIHan marked this conversation as resolved.
Show resolved Hide resolved
{
// Only allow GPRs.
// If not a valid register, then return false.
Expand All @@ -596,24 +611,43 @@ bool emitter::AreUpper32BitsSignExtended(regNumber reg)

instrDesc* id = emitLastIns;

if (id->idReg1() != reg)
{
return false;
}
bool result = false;

// movsx always sign extends to 8 bytes. W-bit is set.
if (id->idIns() == INS_movsx)
{
return true;
}
emitPeepholeIterateLastInstrs([&](instrDesc* id) {
if (emitIsInstrWritingToReg(id, reg))
{
switch (id->idIns())
{
// Conservative.
case INS_call:
return PEEPHOLE_ABORT;

// movsxd is always an 8 byte operation. W-bit is set.
if (id->idIns() == INS_movsxd)
{
return true;
}
case INS_movsx:
case INS_movsxd:
if ((size == EA_1BYTE) || (size == EA_2BYTE))
{
result = (id->idOpSize() <= size);
}
// movsx/movsxd always sign extends to 8 bytes. W-bit is set.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does the W-bit matter here?

Copy link
Contributor Author

@TIHan TIHan May 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

W-bit on the encoding is what makes movsx and movsxd sign-extend to 8 bytes. We always default to using the W-bit. Technically, we could have a movsx or movsxd instruction not set the W-bit which would not sign-extend to 8 bytes (that would be a problem and make this optimization not safe), but we never actually emit those instructions that way.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function cares about instructions and sizes, but not encoding, though.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment W-bit is set itself is just meant to inform the reason why this optimization is safe to do. It was already there before.

else if (size == EA_4BYTE)
{
result = true;
}
break;

return false;
default:
break;
}

return PEEPHOLE_ABORT;
}
else
{
return PEEPHOLE_CONTINUE;
}
});

return result;
}
#endif // TARGET_64BIT

Expand Down Expand Up @@ -6223,11 +6257,48 @@ bool emitter::IsRedundantMov(

bool hasSideEffect = HasSideEffect(ins, size);

// Check if we are already in the correct register and don't have a side effect
if ((dst == src) && !hasSideEffect)
// Peephole optimization to eliminate redundant 'mov' instructions.
if (dst == src)
{
JITDUMP("\n -- suppressing mov because src and dst is same register and the mov has no side-effects.\n");
return true;
// Check if we are already in the correct register and don't have a side effect
if (!hasSideEffect)
{
JITDUMP("\n -- suppressing mov because src and dst is same register and the mov has no side-effects.\n");
return true;
}

#ifdef TARGET_64BIT
switch (ins)
{
case INS_movzx:
if (AreUpperBitsZero(src, size))
{
JITDUMP("\n -- suppressing movzx because upper bits are zero.\n");
return true;
}
break;

case INS_movsx:
case INS_movsxd:
if (AreUpperBitsSignExtended(src, size))
{
JITDUMP("\n -- suppressing movsx or movsxd because upper bits are sign-extended.\n");
return true;
}
break;

case INS_mov:
if ((size == EA_4BYTE) && AreUpperBitsZero(src, size))
{
JITDUMP("\n -- suppressing mov because upper bits are zero.\n");
return true;
}
break;

default:
break;
}
#endif // TARGET_64BIT
}

// TODO-XArch-CQ: Certain instructions, such as movaps vs movups, are equivalent in
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/jit/emitxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,8 @@ static bool IsJccInstruction(instruction ins);
static bool IsJmpInstruction(instruction ins);

#ifdef TARGET_64BIT
TIHan marked this conversation as resolved.
Show resolved Hide resolved
bool AreUpper32BitsZero(regNumber reg);
bool AreUpper32BitsSignExtended(regNumber reg);
bool AreUpperBitsZero(regNumber reg, emitAttr size);
bool AreUpperBitsSignExtended(regNumber reg, emitAttr size);
#endif // TARGET_64BIT

bool IsRedundantCmp(emitAttr size, regNumber reg1, regNumber reg2);
Expand Down