diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 07f372b94936e..8473b573dc765 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -31,15 +31,8 @@ class CodeGen final : public CodeGenInterface // TODO-Cleanup: Abstract out the part of this that finds the addressing mode, and // move it to Lower - virtual bool genCreateAddrMode(GenTree* addr, - bool fold, - bool* revPtr, - GenTree** rv1Ptr, - GenTree** rv2Ptr, -#if SCALED_ADDR_MODES - unsigned* mulPtr, -#endif // SCALED_ADDR_MODES - ssize_t* cnsPtr); + virtual bool genCreateAddrMode( + GenTree* addr, bool fold, bool* revPtr, GenTree** rv1Ptr, GenTree** rv2Ptr, unsigned* mulPtr, ssize_t* cnsPtr); private: #if defined(TARGET_XARCH) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 38a134cbe67a4..e121d8033413f 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -1299,10 +1299,8 @@ unsigned CodeGenInterface::InferStructOpSizeAlign(GenTree* op, unsigned* alignme * *rv1Ptr ... base operand * *rv2Ptr ... optional operand * *revPtr ... true if rv2 is before rv1 in the evaluation order - * #if SCALED_ADDR_MODES * *mulPtr ... optional multiplier (2/4/8) for rv2 * Note that for [reg1 + reg2] and [reg1 + reg2 + icon], *mulPtr == 0. - * #endif * *cnsPtr ... integer constant [optional] * * IMPORTANT NOTE: This routine doesn't generate any code, it merely @@ -1310,15 +1308,8 @@ unsigned CodeGenInterface::InferStructOpSizeAlign(GenTree* op, unsigned* alignme * form an address mode later on. */ -bool CodeGen::genCreateAddrMode(GenTree* addr, - bool fold, - bool* revPtr, - GenTree** rv1Ptr, - GenTree** rv2Ptr, -#if SCALED_ADDR_MODES - unsigned* mulPtr, -#endif // SCALED_ADDR_MODES - ssize_t* cnsPtr) +bool CodeGen::genCreateAddrMode( + GenTree* addr, bool fold, bool* revPtr, GenTree** rv1Ptr, GenTree** rv2Ptr, unsigned* mulPtr, ssize_t* cnsPtr) { /* The following indirections are valid address modes on x86/x64: @@ -1368,10 +1359,8 @@ bool CodeGen::genCreateAddrMode(GenTree* addr, GenTree* op1; GenTree* op2; - ssize_t cns; -#if SCALED_ADDR_MODES + ssize_t cns; unsigned mul; -#endif // SCALED_ADDR_MODES GenTree* tmp; @@ -1395,9 +1384,7 @@ bool CodeGen::genCreateAddrMode(GenTree* addr, op1 ... base address op2 ... optional scaled index -#if SCALED_ADDR_MODES mul ... optional multiplier (2/4/8) for op2 -#endif cns ... optional displacement Here we try to find such a set of operands and arrange for these @@ -1405,9 +1392,7 @@ bool CodeGen::genCreateAddrMode(GenTree* addr, */ cns = 0; -#if SCALED_ADDR_MODES mul = 0; -#endif // SCALED_ADDR_MODES AGAIN: /* We come back to 'AGAIN' if we have an add of a constant, and we are folding that @@ -1416,9 +1401,7 @@ bool CodeGen::genCreateAddrMode(GenTree* addr, */ CLANG_FORMAT_COMMENT_ANCHOR; -#if SCALED_ADDR_MODES assert(mul == 0); -#endif // SCALED_ADDR_MODES /* Special case: keep constants as 'op2' */ @@ -1461,7 +1444,7 @@ bool CodeGen::genCreateAddrMode(GenTree* addr, goto AGAIN; -#if SCALED_ADDR_MODES && !defined(TARGET_ARMARCH) +#if !defined(TARGET_ARMARCH) // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index. case GT_MUL: if (op1->gtOverflow()) @@ -1484,7 +1467,7 @@ bool CodeGen::genCreateAddrMode(GenTree* addr, goto FOUND_AM; } break; -#endif // SCALED_ADDR_MODES && !defined(TARGET_ARMARCH) +#endif // !defined(TARGET_ARMARCH) default: break; @@ -1525,8 +1508,6 @@ bool CodeGen::genCreateAddrMode(GenTree* addr, break; -#if SCALED_ADDR_MODES - case GT_MUL: if (op1->gtOverflow()) @@ -1566,8 +1547,6 @@ bool CodeGen::genCreateAddrMode(GenTree* addr, goto FOUND_AM; } break; - -#endif // SCALED_ADDR_MODES #endif // !TARGET_ARMARCH case GT_NOP: @@ -1588,7 +1567,9 @@ bool CodeGen::genCreateAddrMode(GenTree* addr, switch (op2->gtOper) { #if !defined(TARGET_ARMARCH) - // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index. + // TODO-ARM64-CQ, TODO-ARM-CQ: For now we only handle MUL and LSH because + // arm doesn't support both scale and offset at the same. Offset is handled + // at the emitter as a peephole optimization. case GT_ADD: if (op2->gtOverflow()) @@ -1607,8 +1588,7 @@ bool CodeGen::genCreateAddrMode(GenTree* addr, break; -#if SCALED_ADDR_MODES - +#endif // TARGET_ARMARCH case GT_MUL: if (op2->gtOverflow()) @@ -1645,9 +1625,6 @@ bool CodeGen::genCreateAddrMode(GenTree* addr, } break; -#endif // SCALED_ADDR_MODES -#endif // !TARGET_ARMARCH - case GT_NOP: op2 = op2->AsOp()->gtOp1; @@ -1720,13 +1697,11 @@ bool CodeGen::genCreateAddrMode(GenTree* addr, /* Get hold of the index value */ ssize_t ixv = index->AsIntConCommon()->IconValue(); -#if SCALED_ADDR_MODES /* Scale the index if necessary */ if (tmpMul) { ixv *= tmpMul; } -#endif if (FitsIn(cns + ixv)) { @@ -1734,10 +1709,8 @@ bool CodeGen::genCreateAddrMode(GenTree* addr, cns += ixv; -#if SCALED_ADDR_MODES /* There is no scaled operand any more */ mul = 0; -#endif rv2 = nullptr; } } @@ -1759,9 +1732,7 @@ bool CodeGen::genCreateAddrMode(GenTree* addr, *revPtr = rev; *rv1Ptr = rv1; *rv2Ptr = rv2; -#if SCALED_ADDR_MODES *mulPtr = mul; -#endif *cnsPtr = cns; return true; diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index 8755ee1d0fe73..82519973d1ea8 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -96,10 +96,8 @@ class CodeGenInterface bool* revPtr, GenTree** rv1Ptr, GenTree** rv2Ptr, -#if SCALED_ADDR_MODES unsigned* mulPtr, -#endif // SCALED_ADDR_MODES - ssize_t* cnsPtr) = 0; + ssize_t* cnsPtr) = 0; GCInfo gcInfo; diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index d1703a927f487..414d1eaffa0d1 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -13477,7 +13477,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR if (lsl > 0) { // Then load/store dataReg from/to [memBase + index*scale] - emitIns_R_R_R_I(ins, attr, dataReg, memBase->GetRegNum(), index->GetRegNum(), lsl, INS_OPTS_LSL); + emitIns_R_R_R_Ext(ins, attr, dataReg, memBase->GetRegNum(), index->GetRegNum(), INS_OPTS_LSL, lsl); } else // no scale { diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 4b7e0b557efe8..90093f9e7bba6 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -2968,21 +2968,29 @@ bool Compiler::gtCanSwapOrder(GenTree* firstNode, GenTree* secondNode) bool Compiler::gtMarkAddrMode(GenTree* addr, int* pCostEx, int* pCostSz, var_types type) { // These are "out" parameters on the call to genCreateAddrMode(): - bool rev; // This will be true if the operands will need to be reversed. At this point we - // don't care about this because we're not yet instantiating this addressing mode. -#if SCALED_ADDR_MODES - unsigned mul; // This is the index (scale) value for the addressing mode -#endif + bool rev; // This will be true if the operands will need to be reversed. At this point we + // don't care about this because we're not yet instantiating this addressing mode. + unsigned mul; // This is the index (scale) value for the addressing mode ssize_t cns; // This is the constant offset GenTree* base; // This is the base of the address. GenTree* idx; // This is the index. - if (codeGen->genCreateAddrMode(addr, false /*fold*/, &rev, &base, &idx, -#if SCALED_ADDR_MODES - &mul, -#endif // SCALED_ADDR_MODES - &cns)) + if (codeGen->genCreateAddrMode(addr, false /*fold*/, &rev, &base, &idx, &mul, &cns)) { + +#ifdef TARGET_ARMARCH + // Multiplier should be a "natural-scale" power of two number which is equal to target's width. + // + // *(ulong*)(data + index * 8); - can be optimized + // *(ulong*)(data + index * 7); - can not be optimized + // *(int*)(data + index * 2); - can not be optimized + // + if ((mul > 0) && (genTypeSize(type) != mul)) + { + return false; + } +#endif + // We can form a complex addressing mode, so mark each of the interior // nodes with GTF_ADDRMODE_NO_CSE and calculate a more accurate cost. @@ -3157,11 +3165,7 @@ bool Compiler::gtMarkAddrMode(GenTree* addr, int* pCostEx, int* pCostSz, var_typ // Note that cns can be zero. CLANG_FORMAT_COMMENT_ANCHOR; -#if SCALED_ADDR_MODES assert((base != nullptr) || (idx != nullptr && mul >= 2)); -#else - assert(base != NULL); -#endif INDEBUG(GenTree* op1Save = addr); @@ -3176,12 +3180,14 @@ bool Compiler::gtMarkAddrMode(GenTree* addr, int* pCostEx, int* pCostSz, var_typ assert(op1 != op1Save); assert(op2 != nullptr); +#if defined(TARGET_XARCH) // Walk the operands again (the third operand is unused in this case). // This time we will only consider adds with constant op2's, since // we have already found either a non-ADD op1 or a non-constant op2. + // NOTE: we don't support ADD(op1, cns) addressing for ARM/ARM64 yet so + // this walk makes no sense there. gtWalkOp(&op1, &op2, nullptr, true); -#if defined(TARGET_XARCH) // For XARCH we will fold GT_ADDs in the op2 position into the addressing mode, so we call // gtWalkOp on both operands of the original GT_ADD. // This is not done for ARMARCH. Though the stated reason is that we don't try to create a diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 289fe5e0e5d18..2c9ed01a1ebeb 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -4897,11 +4897,12 @@ bool Lowering::AreSourcesPossiblyModifiedLocals(GenTree* addr, GenTree* base, Ge // Arguments: // use - the use of the address we want to transform // isContainable - true if this addressing mode can be contained +// targetType - on arm we can use "scale" only for appropriate target type // // Returns: // true if the address node was changed to a LEA, false otherwise. // -bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable) +bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable, var_types targetType) { if (!addr->OperIs(GT_ADD) || addr->gtOverflow()) { @@ -4915,16 +4916,27 @@ bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable) bool rev = false; // Find out if an addressing mode can be constructed - bool doAddrMode = comp->codeGen->genCreateAddrMode(addr, // address - true, // fold - &rev, // reverse ops - &base, // base addr - &index, // index val -#if SCALED_ADDR_MODES + bool doAddrMode = comp->codeGen->genCreateAddrMode(addr, // address + true, // fold + &rev, // reverse ops + &base, // base addr + &index, // index val &scale, // scaling -#endif // SCALED_ADDR_MODES &offset); // displacement +#ifdef TARGET_ARMARCH + // Multiplier should be a "natural-scale" power of two number which is equal to target's width. + // + // *(ulong*)(data + index * 8); - can be optimized + // *(ulong*)(data + index * 7); - can not be optimized + // *(int*)(data + index * 2); - can not be optimized + // + if ((scale > 0) && (genTypeSize(targetType) != scale)) + { + return false; + } +#endif + if (scale == 0) { scale = 1; @@ -6678,7 +6690,7 @@ void Lowering::ContainCheckBitCast(GenTree* node) void Lowering::LowerStoreIndirCommon(GenTreeStoreInd* ind) { assert(ind->TypeGet() != TYP_STRUCT); - TryCreateAddrMode(ind->Addr(), true); + TryCreateAddrMode(ind->Addr(), true, ind->TypeGet()); if (!comp->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(ind)) { LowerStoreIndir(ind); @@ -6701,7 +6713,7 @@ void Lowering::LowerIndir(GenTreeIndir* ind) // TODO-Cleanup: We're passing isContainable = true but ContainCheckIndir rejects // address containment in some cases so we end up creating trivial (reg + offfset) // or (reg + reg) LEAs that are not necessary. - TryCreateAddrMode(ind->Addr(), true); + TryCreateAddrMode(ind->Addr(), true, ind->TypeGet()); ContainCheckIndir(ind); if (ind->OperIs(GT_NULLCHECK) || ind->IsUnusedValue()) @@ -6714,7 +6726,7 @@ void Lowering::LowerIndir(GenTreeIndir* ind) // If the `ADDR` node under `STORE_OBJ(dstAddr, IND(struct(ADDR))` // is a complex one it could benefit from an `LEA` that is not contained. const bool isContainable = false; - TryCreateAddrMode(ind->Addr(), isContainable); + TryCreateAddrMode(ind->Addr(), isContainable, ind->TypeGet()); } } diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 65e936b8b2167..b64f4a6410944 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -305,7 +305,7 @@ class Lowering final : public Phase void ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenTree* addr); void LowerPutArgStk(GenTreePutArgStk* tree); - bool TryCreateAddrMode(GenTree* addr, bool isContainable); + bool TryCreateAddrMode(GenTree* addr, bool isContainable, var_types targetType = TYP_UNDEF); bool TryTransformStoreObjAsStoreInd(GenTreeBlk* blkNode); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 92fa7ce548a07..b5fb3d2075976 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -12507,7 +12507,6 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) op2->AsIntConCommon()->SetIconValue(genLog2(abs_mult)); changeToShift = true; } -#if LEA_AVAILABLE else if ((lowestBit > 1) && jitIsScaleIndexMul(lowestBit) && optAvoidIntMult()) { int shift = genLog2(lowestBit); @@ -12537,7 +12536,6 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) changeToShift = true; } } -#endif // LEA_AVAILABLE if (changeToShift) { // vnStore is null before the ValueNumber phase has run diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 4c339fb3b80f3..b6ab3166e10f8 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -214,11 +214,6 @@ typedef unsigned char regNumberSmall; /*****************************************************************************/ -#define LEA_AVAILABLE 1 -#define SCALED_ADDR_MODES 1 - -/*****************************************************************************/ - #ifdef DEBUG #define DSP_SRC_OPER_LEFT 0 #define DSP_SRC_OPER_RIGHT 1