Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: "Scaled" addressing mode on ARM #60808

Merged
merged 6 commits into from
Oct 29, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 2 additions & 9 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,8 @@ class CodeGen final : public CodeGenInterface

// TODO-Cleanup: Abstract out the part of this that finds the addressing mode, and
// move it to Lower
virtual bool genCreateAddrMode(GenTree* addr,
bool fold,
bool* revPtr,
GenTree** rv1Ptr,
GenTree** rv2Ptr,
#if SCALED_ADDR_MODES
unsigned* mulPtr,
#endif // SCALED_ADDR_MODES
ssize_t* cnsPtr);
virtual bool genCreateAddrMode(
GenTree* addr, bool fold, bool* revPtr, GenTree** rv1Ptr, GenTree** rv2Ptr, unsigned* mulPtr, ssize_t* cnsPtr);

private:
#if defined(TARGET_XARCH)
Expand Down
44 changes: 7 additions & 37 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1299,26 +1299,17 @@ unsigned CodeGenInterface::InferStructOpSizeAlign(GenTree* op, unsigned* alignme
* *rv1Ptr ... base operand
* *rv2Ptr ... optional operand
* *revPtr ... true if rv2 is before rv1 in the evaluation order
* #if SCALED_ADDR_MODES
* *mulPtr ... optional multiplier (2/4/8) for rv2
* Note that for [reg1 + reg2] and [reg1 + reg2 + icon], *mulPtr == 0.
* #endif
* *cnsPtr ... integer constant [optional]
*
* IMPORTANT NOTE: This routine doesn't generate any code, it merely
* identifies the components that might be used to
* form an address mode later on.
*/

bool CodeGen::genCreateAddrMode(GenTree* addr,
bool fold,
bool* revPtr,
GenTree** rv1Ptr,
GenTree** rv2Ptr,
#if SCALED_ADDR_MODES
unsigned* mulPtr,
#endif // SCALED_ADDR_MODES
ssize_t* cnsPtr)
bool CodeGen::genCreateAddrMode(
GenTree* addr, bool fold, bool* revPtr, GenTree** rv1Ptr, GenTree** rv2Ptr, unsigned* mulPtr, ssize_t* cnsPtr)
{
/*
The following indirections are valid address modes on x86/x64:
Expand Down Expand Up @@ -1368,10 +1359,8 @@ bool CodeGen::genCreateAddrMode(GenTree* addr,
GenTree* op1;
GenTree* op2;

ssize_t cns;
#if SCALED_ADDR_MODES
ssize_t cns;
unsigned mul;
#endif // SCALED_ADDR_MODES

GenTree* tmp;

Expand All @@ -1395,19 +1384,15 @@ bool CodeGen::genCreateAddrMode(GenTree* addr,

op1 ... base address
op2 ... optional scaled index
#if SCALED_ADDR_MODES
mul ... optional multiplier (2/4/8) for op2
#endif
cns ... optional displacement

Here we try to find such a set of operands and arrange for these
to sit in registers.
*/

cns = 0;
#if SCALED_ADDR_MODES
mul = 0;
#endif // SCALED_ADDR_MODES

AGAIN:
/* We come back to 'AGAIN' if we have an add of a constant, and we are folding that
Expand All @@ -1416,9 +1401,7 @@ bool CodeGen::genCreateAddrMode(GenTree* addr,
*/
CLANG_FORMAT_COMMENT_ANCHOR;

#if SCALED_ADDR_MODES
assert(mul == 0);
#endif // SCALED_ADDR_MODES

/* Special case: keep constants as 'op2' */

Expand Down Expand Up @@ -1461,7 +1444,7 @@ bool CodeGen::genCreateAddrMode(GenTree* addr,

goto AGAIN;

#if SCALED_ADDR_MODES && !defined(TARGET_ARMARCH)
#if !defined(TARGET_ARMARCH)
// TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
case GT_MUL:
if (op1->gtOverflow())
Expand All @@ -1484,7 +1467,7 @@ bool CodeGen::genCreateAddrMode(GenTree* addr,
goto FOUND_AM;
}
break;
#endif // SCALED_ADDR_MODES && !defined(TARGET_ARMARCH)
#endif // !defined(TARGET_ARMARCH)

default:
break;
Expand Down Expand Up @@ -1525,8 +1508,6 @@ bool CodeGen::genCreateAddrMode(GenTree* addr,

break;

#if SCALED_ADDR_MODES

case GT_MUL:

if (op1->gtOverflow())
Expand Down Expand Up @@ -1566,8 +1547,6 @@ bool CodeGen::genCreateAddrMode(GenTree* addr,
goto FOUND_AM;
}
break;

#endif // SCALED_ADDR_MODES
#endif // !TARGET_ARMARCH

case GT_NOP:
Expand Down Expand Up @@ -1607,8 +1586,7 @@ bool CodeGen::genCreateAddrMode(GenTree* addr,

break;

#if SCALED_ADDR_MODES

#else // !TARGET_ARMARCH
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
case GT_MUL:

if (op2->gtOverflow())
Expand Down Expand Up @@ -1644,9 +1622,7 @@ bool CodeGen::genCreateAddrMode(GenTree* addr,
goto FOUND_AM;
}
break;

#endif // SCALED_ADDR_MODES
#endif // !TARGET_ARMARCH
#endif // TARGET_ARMARCH

case GT_NOP:

Expand Down Expand Up @@ -1720,24 +1696,20 @@ bool CodeGen::genCreateAddrMode(GenTree* addr,
/* Get hold of the index value */
ssize_t ixv = index->AsIntConCommon()->IconValue();

#if SCALED_ADDR_MODES
/* Scale the index if necessary */
if (tmpMul)
{
ixv *= tmpMul;
}
#endif

if (FitsIn<INT32>(cns + ixv))
{
/* Add the scaled index to the offset value */

cns += ixv;

#if SCALED_ADDR_MODES
/* There is no scaled operand any more */
mul = 0;
#endif
rv2 = nullptr;
}
}
Expand All @@ -1759,9 +1731,7 @@ bool CodeGen::genCreateAddrMode(GenTree* addr,
*revPtr = rev;
*rv1Ptr = rv1;
*rv2Ptr = rv2;
#if SCALED_ADDR_MODES
*mulPtr = mul;
#endif
*cnsPtr = cns;

return true;
Expand Down
4 changes: 1 addition & 3 deletions src/coreclr/jit/codegeninterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,8 @@ class CodeGenInterface
bool* revPtr,
GenTree** rv1Ptr,
GenTree** rv2Ptr,
#if SCALED_ADDR_MODES
unsigned* mulPtr,
#endif // SCALED_ADDR_MODES
ssize_t* cnsPtr) = 0;
ssize_t* cnsPtr) = 0;

GCInfo gcInfo;

Expand Down
9 changes: 7 additions & 2 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6991,7 +6991,12 @@ void emitter::emitIns_R_R_R_Ext(instruction ins,
id->idReg1(reg1);
id->idReg2(reg2);
id->idReg3(reg3);
id->idReg3Scaled(shiftAmount == scale);
if (shiftAmount == scale)
{
id->idReg3Scaled(true);
// Scale is calculated from opsize
id->idOpSize((emitAttr)(1 << shiftAmount));
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
}

dispIns(id);
appendToCurIG(id);
Expand Down Expand Up @@ -13435,7 +13440,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR
if (lsl > 0)
{
// Then load/store dataReg from/to [memBase + index*scale]
emitIns_R_R_R_I(ins, attr, dataReg, memBase->GetRegNum(), index->GetRegNum(), lsl, INS_OPTS_LSL);
emitIns_R_R_R_Ext(ins, attr, dataReg, memBase->GetRegNum(), index->GetRegNum(), INS_OPTS_LSL, lsl);
jakobbotsch marked this conversation as resolved.
Show resolved Hide resolved
}
else // no scale
{
Expand Down
28 changes: 13 additions & 15 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2968,21 +2968,23 @@ bool Compiler::gtCanSwapOrder(GenTree* firstNode, GenTree* secondNode)
bool Compiler::gtMarkAddrMode(GenTree* addr, int* pCostEx, int* pCostSz, var_types type)
{
// These are "out" parameters on the call to genCreateAddrMode():
bool rev; // This will be true if the operands will need to be reversed. At this point we
// don't care about this because we're not yet instantiating this addressing mode.
#if SCALED_ADDR_MODES
unsigned mul; // This is the index (scale) value for the addressing mode
#endif
bool rev; // This will be true if the operands will need to be reversed. At this point we
// don't care about this because we're not yet instantiating this addressing mode.
unsigned mul; // This is the index (scale) value for the addressing mode
ssize_t cns; // This is the constant offset
GenTree* base; // This is the base of the address.
GenTree* idx; // This is the index.

if (codeGen->genCreateAddrMode(addr, false /*fold*/, &rev, &base, &idx,
#if SCALED_ADDR_MODES
&mul,
#endif // SCALED_ADDR_MODES
&cns))
if (codeGen->genCreateAddrMode(addr, false /*fold*/, &rev, &base, &idx, &mul, &cns))
{

#ifdef TARGET_ARMARCH
if ((mul > 0) && (genTypeSize(type) != mul))
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
{
return false;
}
#endif

// We can form a complex addressing mode, so mark each of the interior
// nodes with GTF_ADDRMODE_NO_CSE and calculate a more accurate cost.

Expand Down Expand Up @@ -3157,11 +3159,7 @@ bool Compiler::gtMarkAddrMode(GenTree* addr, int* pCostEx, int* pCostSz, var_typ
// Note that cns can be zero.
CLANG_FORMAT_COMMENT_ANCHOR;

#if SCALED_ADDR_MODES
assert((base != nullptr) || (idx != nullptr && mul >= 2));
#else
assert(base != NULL);
#endif

INDEBUG(GenTree* op1Save = addr);

Expand All @@ -3176,12 +3174,12 @@ bool Compiler::gtMarkAddrMode(GenTree* addr, int* pCostEx, int* pCostSz, var_typ
assert(op1 != op1Save);
assert(op2 != nullptr);

#if defined(TARGET_XARCH)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a comment here explaining why arm64 doesn't/shouldn't do this walk?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added

// Walk the operands again (the third operand is unused in this case).
// This time we will only consider adds with constant op2's, since
// we have already found either a non-ADD op1 or a non-constant op2.
gtWalkOp(&op1, &op2, nullptr, true);

#if defined(TARGET_XARCH)
// For XARCH we will fold GT_ADDs in the op2 position into the addressing mode, so we call
// gtWalkOp on both operands of the original GT_ADD.
// This is not done for ARMARCH. Though the stated reason is that we don't try to create a
Expand Down
28 changes: 17 additions & 11 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4897,11 +4897,12 @@ bool Lowering::AreSourcesPossiblyModifiedLocals(GenTree* addr, GenTree* base, Ge
// Arguments:
// use - the use of the address we want to transform
// isContainable - true if this addressing mode can be contained
// targetType - on arm we can use "scale" only for appropriate target type
//
// Returns:
// true if the address node was changed to a LEA, false otherwise.
//
bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable)
bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable, var_types targetType)
{
if (!addr->OperIs(GT_ADD) || addr->gtOverflow())
{
Expand All @@ -4915,16 +4916,21 @@ bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable)
bool rev = false;

// Find out if an addressing mode can be constructed
bool doAddrMode = comp->codeGen->genCreateAddrMode(addr, // address
true, // fold
&rev, // reverse ops
&base, // base addr
&index, // index val
#if SCALED_ADDR_MODES
bool doAddrMode = comp->codeGen->genCreateAddrMode(addr, // address
true, // fold
&rev, // reverse ops
&base, // base addr
&index, // index val
&scale, // scaling
#endif // SCALED_ADDR_MODES
&offset); // displacement

#ifdef TARGET_ARMARCH
if ((scale > 0) && (genTypeSize(targetType) != scale))
{
return false;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are there cases now where we look for and find a scale, but it is not the appropriate number, and so we bail out on creating an addressing mode that we would previously find?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Example: https://godbolt.org/z/xG4TsYYTG - in case of Test2 we bail out

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, but in that case we would also bail out in the baseline as well as with this change, right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From my understanding genCreateAddrMode is called from two places (gtSetEvalOrder and in Lower) and these checks are needed in both otherwise it leads to asserts.

}
#endif

if (scale == 0)
{
scale = 1;
Expand Down Expand Up @@ -6678,7 +6684,7 @@ void Lowering::ContainCheckBitCast(GenTree* node)
void Lowering::LowerStoreIndirCommon(GenTreeStoreInd* ind)
{
assert(ind->TypeGet() != TYP_STRUCT);
TryCreateAddrMode(ind->Addr(), true);
TryCreateAddrMode(ind->Addr(), true, ind->TypeGet());
if (!comp->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(ind))
{
LowerStoreIndir(ind);
Expand All @@ -6701,7 +6707,7 @@ void Lowering::LowerIndir(GenTreeIndir* ind)
// TODO-Cleanup: We're passing isContainable = true but ContainCheckIndir rejects
// address containment in some cases so we end up creating trivial (reg + offfset)
// or (reg + reg) LEAs that are not necessary.
TryCreateAddrMode(ind->Addr(), true);
TryCreateAddrMode(ind->Addr(), true, ind->TypeGet());
ContainCheckIndir(ind);

if (ind->OperIs(GT_NULLCHECK) || ind->IsUnusedValue())
Expand All @@ -6714,7 +6720,7 @@ void Lowering::LowerIndir(GenTreeIndir* ind)
// If the `ADDR` node under `STORE_OBJ(dstAddr, IND(struct(ADDR))`
// is a complex one it could benefit from an `LEA` that is not contained.
const bool isContainable = false;
TryCreateAddrMode(ind->Addr(), isContainable);
TryCreateAddrMode(ind->Addr(), isContainable, ind->TypeGet());
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/lower.h
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ class Lowering final : public Phase
void ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenTree* addr);
void LowerPutArgStk(GenTreePutArgStk* tree);

bool TryCreateAddrMode(GenTree* addr, bool isContainable);
bool TryCreateAddrMode(GenTree* addr, bool isContainable, var_types targetType = TYP_UNDEF);

bool TryTransformStoreObjAsStoreInd(GenTreeBlk* blkNode);

Expand Down
2 changes: 0 additions & 2 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12507,7 +12507,6 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
op2->AsIntConCommon()->SetIconValue(genLog2(abs_mult));
changeToShift = true;
}
#if LEA_AVAILABLE
else if ((lowestBit > 1) && jitIsScaleIndexMul(lowestBit) && optAvoidIntMult())
{
int shift = genLog2(lowestBit);
Expand Down Expand Up @@ -12537,7 +12536,6 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
changeToShift = true;
}
}
#endif // LEA_AVAILABLE
if (changeToShift)
{
// vnStore is null before the ValueNumber phase has run
Expand Down
5 changes: 0 additions & 5 deletions src/coreclr/jit/target.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,11 +214,6 @@ typedef unsigned char regNumberSmall;

/*****************************************************************************/

#define LEA_AVAILABLE 1
#define SCALED_ADDR_MODES 1

/*****************************************************************************/

#ifdef DEBUG
#define DSP_SRC_OPER_LEFT 0
#define DSP_SRC_OPER_RIGHT 1
Expand Down