From 86992b6aefd2181d97c3cf64f5cd7f137a9a78ab Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 7 Jan 2022 14:12:56 -0800 Subject: [PATCH 01/27] JIT: simple forward substitution pass Extend ref counting done by local morph so that we can determine single-def single-use locals. Add a phase that runs just after local morph that will attempt to forward single-def single-use local defs to uses when they are in adjacent statements. Fix or work around issues uncovered elsewhere: * `gtFoldExprCompare` might fold "identical" volatile subtrees * `fgGetStubAddrArg` cannot handle complex trees * some simd/hw operations can lose struct handles * some calls cannot handle struct local args Addresses #6973 and related issues. Still sorting through exactly which ones are fixed, so list below may need revising. Fixes #48605. Fixes #51599. Fixes #55472. Improves some but not all cases in #12280 and #62064. Does not fix #33002, #47082, or #63116; these require handling multiple uses or bypassing statements. --- src/coreclr/jit/CMakeLists.txt | 1 + src/coreclr/jit/compiler.cpp | 7 + src/coreclr/jit/compiler.h | 6 +- src/coreclr/jit/compphases.h | 1 + src/coreclr/jit/forwardsub.cpp | 392 +++++++++++++++++++++++++++++++++ src/coreclr/jit/gentree.cpp | 8 +- src/coreclr/jit/lclmorph.cpp | 36 +-- src/coreclr/jit/morph.cpp | 11 +- src/coreclr/jit/phase.cpp | 3 +- 9 files changed, 441 insertions(+), 24 deletions(-) create mode 100644 src/coreclr/jit/forwardsub.cpp diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index c46cdd18a164e..7659fca21bb1e 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -101,6 +101,7 @@ set( JIT_SOURCES fgprofile.cpp fgstmt.cpp flowgraph.cpp + forwardsub.cpp gcdecode.cpp gcencode.cpp gcinfo.cpp diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 394aca1719aeb..68d53d7ec3b94 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4796,6 +4796,13 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_STR_ADRLCL, &Compiler::fgMarkAddressExposedLocals); + // Run a simple forward sub pass. + // + if (opts.OptimizationEnabled()) + { + DoPhase(this, PHASE_FWD_SUB, &Compiler::fgForwardSub); + } + // Apply the type update to implicit byref parameters; also choose (based on address-exposed // analysis) which implicit byref promotions to keep (requires copy to initialize) or discard. // diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index e195ba991d563..b2d3b015c1119 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6411,7 +6411,7 @@ class Compiler bool fgMorphCanUseLclFldForCopy(unsigned lclNum1, unsigned lclNum2); - GenTreeLclVar* fgMorphTryFoldObjAsLclVar(GenTreeObj* obj); + GenTreeLclVar* fgMorphTryFoldObjAsLclVar(GenTreeObj* obj, bool destroyNodes = true); GenTree* fgMorphCommutative(GenTreeOp* tree); GenTree* fgMorphCastedBitwiseOp(GenTreeOp* tree); @@ -6550,6 +6550,10 @@ class Compiler void fgMarkAddressExposedLocals(); void fgMarkAddressExposedLocals(Statement* stmt); + PhaseStatus fgForwardSub(); + bool fgForwardSub(BasicBlock* block); + bool fgForwardSub(Statement* statement); + static fgWalkPreFn fgUpdateSideEffectsPre; static fgWalkPostFn fgUpdateSideEffectsPost; diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index e5c15c1d438a6..9b03ec7984cce 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -43,6 +43,7 @@ CompPhaseNameMacro(PHASE_UPDATE_FINALLY_FLAGS, "Update finally target flags", CompPhaseNameMacro(PHASE_COMPUTE_PREDS, "Compute preds", "PREDS", false, -1, false) CompPhaseNameMacro(PHASE_EARLY_UPDATE_FLOW_GRAPH,"Update flow graph early pass", "UPD-FG-E", false, -1, false) CompPhaseNameMacro(PHASE_STR_ADRLCL, "Morph - Structs/AddrExp", "MOR-STRAL",false, -1, false) +CompPhaseNameMacro(PHASE_FWD_SUB, "Forward Sub", "FWD-SUB", false, -1, false) CompPhaseNameMacro(PHASE_MORPH_IMPBYREF, "Morph - ByRefs", "MOR-BYREF",false, -1, false) CompPhaseNameMacro(PHASE_PROMOTE_STRUCTS, "Morph - Promote Structs", "PROMOTER" ,false, -1, false) CompPhaseNameMacro(PHASE_MORPH_GLOBAL, "Morph - Global", "MOR-GLOB", false, -1, false) diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp new file mode 100644 index 0000000000000..5aadf5aa3feae --- /dev/null +++ b/src/coreclr/jit/forwardsub.cpp @@ -0,0 +1,392 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "jitpch.h" + +#ifdef _MSC_VER +#pragma hdrstop +#endif + +// Simple Forward Substitution + +//------------------------------------------------------------------------ +// fgForwardSub: try forward substitution in this method +// +// Returns: +// suitable phase status +// +PhaseStatus Compiler::fgForwardSub() +{ + bool changed = false; + for (BasicBlock* const block : Blocks()) + { + JITDUMP("\n\n===> " FMT_BB "\n", block->bbNum); + changed |= fgForwardSub(block); + } + return changed ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; +} + +//------------------------------------------------------------------------ +// fgForwardSub(BasicBlock) -- try forward substitution in this block +// +// Arguments: +// block -- block to process +// +// Returns: +// true if any forward substitution took place +// +bool Compiler::fgForwardSub(BasicBlock* block) +{ + // Look for top-level ASG where the value defined is + // a local that's not address-exposed, and the local has + // exactly one use. + + Statement* stmt = block->firstStmt(); + Statement* lastStmt = block->lastStmt(); + bool changed = false; + + while (stmt != lastStmt) + { + Statement* const prevStmt = stmt->GetPrevStmt(); + Statement* const nextStmt = stmt->GetNextStmt(); + bool const substituted = fgForwardSub(stmt); + + if (substituted) + { + fgRemoveStmt(block, stmt); + changed = true; + } + + if (substituted && (prevStmt != lastStmt) && prevStmt->GetRootNode()->OperIs(GT_ASG)) + { + stmt = prevStmt; + } + else + { + stmt = nextStmt; + } + } + + return changed; +} + +//------------------------------------------------------------------------ +// ForwardSubVisitor: tree visitor to locate uses of a local in a tree +// +// Also computes the set of side effects that happen "before" the use. +// +class ForwardSubVisitor final : public GenTreeVisitor +{ +public: + enum + { + ComputeStack = true, + DoPostOrder = true, + UseExecutionOrder = true + }; + + ForwardSubVisitor(Compiler* compiler, unsigned lclNum) + : GenTreeVisitor(compiler) + , m_lclNum(lclNum) + , m_use(nullptr) + , m_node(nullptr) + , m_count(0) + , m_useFlags(0) + , m_accumulatedFlags(0) + , m_isCallArg(false) + { + } + + Compiler::fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) + { + GenTree* const node = *use; + + if (node->OperIs(GT_LCL_VAR)) + { + unsigned const lclNum = node->AsLclVarCommon()->GetLclNum(); + + if (lclNum == m_lclNum) + { + m_count++; + + // Screen out contextual "uses" + // + GenTree* const parent = m_ancestors.Top(1); + bool const isDef = parent->OperIs(GT_ASG) && (parent->gtGetOp1() == node); + bool const isAddr = parent->OperIs(GT_ADDR); + + bool isCallTarget = false; + + // Quirk: + // + // fgGetStubAddrArg cannot handle complex trees (it calls gtClone) + // + if (parent->IsCall()) + { + GenTreeCall* const parentCall = parent->AsCall(); + isCallTarget = (parentCall->gtCallType == CT_INDIRECT) && (parentCall->gtCallAddr == node); + } + + if (!isDef && !isAddr && !isCallTarget) + { + m_node = node; + m_use = use; + m_useFlags = m_accumulatedFlags; + m_isCallArg = parent->IsCall(); + } + } + } + + m_accumulatedFlags |= (node->gtFlags & GTF_GLOB_EFFECT); + + return fgWalkResult::WALK_CONTINUE; + } + + int GetCount() + { + return m_count; + } + + GenTree* GetNode() + { + return m_node; + } + + GenTree** GetUse() + { + return m_use; + } + + unsigned GetFlags() + { + return m_useFlags; + } + + bool IsCallArg() + { + return m_isCallArg; + } + +private: + unsigned m_lclNum; + GenTree** m_use; + GenTree* m_node; + int m_count; + unsigned m_useFlags; + unsigned m_accumulatedFlags; + bool m_isCallArg; +}; + +//------------------------------------------------------------------------ +// fgForwardSub(Statement) -- forward sub this statement's computation +// to the next statement, if legal and profitable +// +// arguments: +// stmt - statement in question +// +// Returns: +// true if statement computation was forwarded. +// caller is responsible for removing the now-dead statement. +// +bool Compiler::fgForwardSub(Statement* stmt) +{ + // Is this tree a def of a single use, unaliased local? + // + GenTree* const rootNode = stmt->GetRootNode(); + + if (!rootNode->OperIs(GT_ASG)) + { + return false; + } + + GenTree* const lhsNode = rootNode->gtGetOp1(); + + if (!lhsNode->OperIs(GT_LCL_VAR)) + { + return false; + } + + JITDUMP(" [%06u]: ", dspTreeID(rootNode)) + + unsigned const lclNum = lhsNode->AsLclVarCommon()->GetLclNum(); + LclVarDsc* const varDsc = lvaGetDesc(lclNum); + + // Leave pinned locals alone. + // This is just a perf opt -- we shouldn't find any uses. + // + if (varDsc->lvPinned) + { + JITDUMP(" pinned local\n"); + return false; + } + + // Only fwd sub if we expect no code duplication + // We expect one def and one use. + // + if (varDsc->lvRefCnt(RCS_EARLY) != 2) + { + JITDUMP(" not asg (single-use lcl)\n"); + return false; + } + + // And local is unalised + // + if (varDsc->IsAddressExposed()) + { + JITDUMP(" not asg (unaliased single-use lcl)\n"); + return false; + } + + // Could handle this case --perhaps-- but we'd want to update ref counts. + // + if (lvaIsImplicitByRefLocal(lclNum)) + { + JITDUMP(" implicit by-ref local\n"); + return false; + } + + // Local seems suitable. See if the next statement + // contains the one and only use. + // + Statement* const nextStmt = stmt->GetNextStmt(); + + // We often see stale flags, eg call flags after inlining. + // Try and clean these up. + // + gtUpdateStmtSideEffects(nextStmt); + gtUpdateStmtSideEffects(stmt); + + // Scan for the (single) use. + // + ForwardSubVisitor fsv(this, lclNum); + fsv.WalkTree(nextStmt->GetRootNodePointer(), nullptr); + + assert(fsv.GetCount() <= 1); + + if ((fsv.GetCount() == 0) || (fsv.GetNode() == nullptr)) + { + JITDUMP(" no next stmt use\n"); + return false; + } + + JITDUMP(" [%06u] is only use of [%06u] (V%02u) ", dspTreeID(fsv.GetNode()), dspTreeID(lhsNode), lclNum); + + // Next statement seems suitable. + // See if we can forward sub without changing semantics. + // + // We could just extract the value portion and forward sub that, + // but cleanup would be more complicated. + // + GenTree* const rhsNode = rootNode->gtGetOp2(); + GenTree* const nextRootNode = nextStmt->GetRootNode(); + GenTree* fwdSubNode = rhsNode; + + // Can't substitute a qmark (unless the use is RHS of an assign... could check for this) + // Can't substitute GT_CATCH_ARG. + // + if (fwdSubNode->OperIs(GT_QMARK, GT_CATCH_ARG)) + { + JITDUMP(" node to sub is qmark or catch arg\n"); + return false; + } + + // Bail if sub node has embedded assignment. + // + if ((fwdSubNode->gtFlags & GTF_ASG) != 0) + { + JITDUMP(" node to sub has effects\n"); + return false; + } + + // Bail if types disagree. + // + if (fsv.GetNode()->TypeGet() != fwdSubNode->TypeGet()) + { + JITDUMP(" mismatched types\n"); + return false; + } + + // We can forward sub if + // + // the value of the fwdSubNode can't change and its evaluation won't cause side effects, + // + // or, + // + // if the next tree can't change the value of fwdSubNode or be adversly impacted by fwdSubNode effects + // + const bool fwdSubNodeInvariant = ((fwdSubNode->gtFlags & GTF_ALL_EFFECT) == 0); + const bool nextTreeIsPureUpToUse = ((fsv.GetFlags() & (GTF_EXCEPT | GTF_GLOB_REF | GTF_CALL)) == 0); + if (!fwdSubNodeInvariant && !nextTreeIsPureUpToUse) + { + // Fwd sub may impact global values and or reorder exceptions... + // + JITDUMP(" potentially interacting effects\n"); + return false; + } + + // Finally, profitability checks. + // + // These conditions can be checked earlier in the final version to save some throughput. + // Perhaps allowing for bypass with jit stress. + // + // If fwdSubNode is an address-exposed local, forwarding it may lose optimizations. + // (maybe similar for dner?) + // + if (fwdSubNode->OperIs(GT_LCL_VAR)) + { + unsigned const fwdLclNum = fwdSubNode->AsLclVarCommon()->GetLclNum(); + LclVarDsc* const fwdVarDsc = lvaGetDesc(fwdLclNum); + + if (fwdVarDsc->IsAddressExposed()) + { + JITDUMP(" V%02u is address exposed\n", fwdLclNum); + return false; + } + } + + // Quirks: + // + // We may sometimes lose a simd type handle. Avoid substituting if so. + // + if (varTypeIsSIMD(fwdSubNode)) + { + if (gtGetStructHandleIfPresent(fwdSubNode) != gtGetStructHandleIfPresent(fsv.GetNode())) + { + JITDUMP(" would lose or gain struct handle\n"); + return false; + } + } + + // Optimization: + // + // If we are about to substitute GT_OBJ, see if we can simplify it first. + // Not doing so can lead to regressions... + // + // Hold off on doing this for call args for now (per issue #51569). + // + if (fwdSubNode->OperIs(GT_OBJ) && !fsv.IsCallArg()) + { + const bool destroyNodes = false; + GenTree* const optTree = fgMorphTryFoldObjAsLclVar(fwdSubNode->AsObj(), destroyNodes); + if (optTree != nullptr) + { + JITDUMP(" -- folding OBJ(ADDR(LCL...)) -- "); + fwdSubNode = optTree; + } + } + + // Looks good, forward sub! + // + GenTree** use = fsv.GetUse(); + *use = fwdSubNode; + + if (!fwdSubNodeInvariant) + { + gtUpdateStmtSideEffects(nextStmt); + } + + JITDUMP(" -- fwd subbing [%06u]; new next stmt is\n", dspTreeID(fwdSubNode)); + DISPSTMT(nextStmt); + + return true; +} diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 5769db69e06e3..85e4938fa468f 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -2419,8 +2419,9 @@ unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp) costEx = IND_COST_EX; costSz = 2; - GenTree* addr = hwTree->Op(1)->gtEffectiveVal(); - level = gtSetEvalOrder(addr); + GenTree* const addrNode = hwTree->Op(1); + level = gtSetEvalOrder(addrNode); + GenTree* const addr = addrNode->gtEffectiveVal(); // See if we can form a complex addressing mode. if (addr->OperIs(GT_ADD) && gtMarkAddrMode(addr, &costEx, &costSz, hwTree->TypeGet())) @@ -11764,8 +11765,9 @@ GenTree* Compiler::gtFoldExprCompare(GenTree* tree) } /* Currently we can only fold when the two subtrees exactly match */ + /* ORDER_SIDEFF here covers volatile subtrees */ - if ((tree->gtFlags & GTF_SIDE_EFFECT) || GenTree::Compare(op1, op2, true) == false) + if ((tree->gtFlags & (GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF)) || GenTree::Compare(op1, op2, true) == false) { return tree; /* return unfolded tree */ } diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 84d7965d395bf..ec662935d5e79 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -442,14 +442,15 @@ class LocalAddressVisitor final : public GenTreeVisitor #endif // DEBUG } - // Morph promoted struct fields and count implicit byref argument occurrences. + // Morph promoted struct fields and count local occurrences. + // // Also create and push the value produced by the visited node. This is done here // rather than in PostOrderVisit because it makes it easy to handle nodes with an // arbitrary number of operands - just pop values until the value corresponding // to the visited node is encountered. fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) { - GenTree* node = *use; + GenTree* const node = *use; if (node->OperIs(GT_FIELD)) { @@ -462,19 +463,17 @@ class LocalAddressVisitor final : public GenTreeVisitor if (node->OperIsLocal()) { - unsigned lclNum = node->AsLclVarCommon()->GetLclNum(); + unsigned const lclNum = node->AsLclVarCommon()->GetLclNum(); + LclVarDsc* const varDsc = m_compiler->lvaGetDesc(lclNum); + + UpdateEarlyRefCount(lclNum); - LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); if (varDsc->lvIsStructField) { // Promoted field, increase counter for the parent lclVar. assert(!m_compiler->lvaIsImplicitByRefLocal(lclNum)); unsigned parentLclNum = varDsc->lvParentLcl; - UpdateEarlyRefCountForImplicitByRef(parentLclNum); - } - else - { - UpdateEarlyRefCountForImplicitByRef(lclNum); + UpdateEarlyRefCount(parentLclNum); } } @@ -1183,7 +1182,7 @@ class LocalAddressVisitor final : public GenTreeVisitor } //------------------------------------------------------------------------ - // UpdateEarlyRefCountForImplicitByRef: updates the ref count for implicit byref params. + // UpdateEarlyRefCount: updates the ref count for locals // // Arguments: // lclNum - the local number to update the count for. @@ -1192,19 +1191,24 @@ class LocalAddressVisitor final : public GenTreeVisitor // fgMakeOutgoingStructArgCopy checks the ref counts for implicit byref params when it decides // if it's legal to elide certain copies of them; // fgRetypeImplicitByRefArgs checks the ref counts when it decides to undo promotions. + // fgForwardSub uses ref counts to decide when to forward sub. // - void UpdateEarlyRefCountForImplicitByRef(unsigned lclNum) + void UpdateEarlyRefCount(unsigned lclNum) { + LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); + + // Note we don't need accurate counts when the values are large. + // + if (varDsc->lvRefCnt(RCS_EARLY) < USHRT_MAX) + { + varDsc->incLvRefCnt(1, RCS_EARLY); + } + if (!m_compiler->lvaIsImplicitByRefLocal(lclNum)) { return; } - LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); - JITDUMP("LocalAddressVisitor incrementing ref count from %d to %d for implicit by-ref V%02d\n", - varDsc->lvRefCnt(RCS_EARLY), varDsc->lvRefCnt(RCS_EARLY) + 1, lclNum); - varDsc->incLvRefCnt(1, RCS_EARLY); - // See if this struct is an argument to a call. This information is recorded // via the weighted early ref count for the local, and feeds the undo promotion // heuristic. diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 1ee03c84a398f..449e0d595088e 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -9485,6 +9485,7 @@ GenTree* Compiler::fgMorphConst(GenTree* tree) // // Arguments: // obj - the obj node. +// destroyNodes -- destroy nodes that are optimized away // // Return value: // GenTreeLclVar if the obj can be replaced by it, null otherwise. @@ -9495,7 +9496,7 @@ GenTree* Compiler::fgMorphConst(GenTree* tree) // for some platforms does not expect struct `LCL_VAR` as a source, so // it needs more work. // -GenTreeLclVar* Compiler::fgMorphTryFoldObjAsLclVar(GenTreeObj* obj) +GenTreeLclVar* Compiler::fgMorphTryFoldObjAsLclVar(GenTreeObj* obj, bool destroyNodes) { if (opts.OptimizationEnabled()) { @@ -9528,8 +9529,12 @@ GenTreeLclVar* Compiler::fgMorphTryFoldObjAsLclVar(GenTreeObj* obj) lclVar->gtFlags &= ~GTF_DONT_CSE; lclVar->gtFlags |= (obj->gtFlags & GTF_DONT_CSE); - DEBUG_DESTROY_NODE(obj); - DEBUG_DESTROY_NODE(addr); + if (destroyNodes) + { + DEBUG_DESTROY_NODE(obj); + DEBUG_DESTROY_NODE(addr); + } + return lclVar; } } diff --git a/src/coreclr/jit/phase.cpp b/src/coreclr/jit/phase.cpp index f08134bf11532..6c93c2b042f17 100644 --- a/src/coreclr/jit/phase.cpp +++ b/src/coreclr/jit/phase.cpp @@ -171,7 +171,8 @@ void Phase::PostPhase(PhaseStatus status) PHASE_MORPH_GLOBAL, PHASE_INVERT_LOOPS, PHASE_OPTIMIZE_LAYOUT, PHASE_FIND_LOOPS, PHASE_BUILD_SSA, PHASE_RATIONALIZE, - PHASE_LOWERING, PHASE_STACK_LEVEL_SETTER}; + PHASE_LOWERING, PHASE_STACK_LEVEL_SETTER, + PHASE_FWD_SUB}; if (madeChanges) { From 28cdc5eeeaf23e45010594f2ff087ba2b0e1fa1e Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 13 Jan 2022 14:28:05 -0800 Subject: [PATCH 02/27] Fix windows x86 and arm64 issues: * For x86 we can't forward sub `GT_LCLHEAP`. * For arm64 we need to take more care with multi-reg struct types. --- src/coreclr/jit/forwardsub.cpp | 90 +++++++++++++++++++++++++++++----- 1 file changed, 78 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp index 5aadf5aa3feae..5d0fd905b77a5 100644 --- a/src/coreclr/jit/forwardsub.cpp +++ b/src/coreclr/jit/forwardsub.cpp @@ -87,13 +87,13 @@ class ForwardSubVisitor final : public GenTreeVisitor ForwardSubVisitor(Compiler* compiler, unsigned lclNum) : GenTreeVisitor(compiler) - , m_lclNum(lclNum) , m_use(nullptr) , m_node(nullptr) + , m_parentNode(nullptr) + , m_lclNum(lclNum) , m_count(0) , m_useFlags(0) , m_accumulatedFlags(0) - , m_isCallArg(false) { } @@ -129,10 +129,10 @@ class ForwardSubVisitor final : public GenTreeVisitor if (!isDef && !isAddr && !isCallTarget) { - m_node = node; - m_use = use; - m_useFlags = m_accumulatedFlags; - m_isCallArg = parent->IsCall(); + m_node = node; + m_use = use; + m_useFlags = m_accumulatedFlags; + m_parentNode = parent; } } } @@ -157,6 +157,11 @@ class ForwardSubVisitor final : public GenTreeVisitor return m_use; } + GenTree* GetParentNode() + { + return m_parentNode; + } + unsigned GetFlags() { return m_useFlags; @@ -164,17 +169,17 @@ class ForwardSubVisitor final : public GenTreeVisitor bool IsCallArg() { - return m_isCallArg; + return m_parentNode->IsCall(); } private: - unsigned m_lclNum; GenTree** m_use; GenTree* m_node; + GenTree* m_parentNode; + unsigned m_lclNum; int m_count; unsigned m_useFlags; unsigned m_accumulatedFlags; - bool m_isCallArg; }; //------------------------------------------------------------------------ @@ -283,10 +288,11 @@ bool Compiler::fgForwardSub(Statement* stmt) // Can't substitute a qmark (unless the use is RHS of an assign... could check for this) // Can't substitute GT_CATCH_ARG. + // Can't substitute GT_LCLHEAP. // - if (fwdSubNode->OperIs(GT_QMARK, GT_CATCH_ARG)) + if (fwdSubNode->OperIs(GT_QMARK, GT_CATCH_ARG, GT_LCLHEAP)) { - JITDUMP(" node to sub is qmark or catch arg\n"); + JITDUMP(" node to sub is qmark, catch arg, or lcl heap\n"); return false; } @@ -357,6 +363,66 @@ bool Compiler::fgForwardSub(Statement* stmt) } } + // There are implicit assumptions downstream on where/how multi-reg ops + // can appear. + // + // Eg if this is a multi-reg call, parent node must be GT_ASG and the + // local must be specially marked. + // + // Defer forwarding these for now. + // + if (fwdSubNode->IsMultiRegCall()) + { + GenTree* const parentNode = fsv.GetParentNode(); + + if (!parentNode->OperIs(GT_ASG)) + { + JITDUMP(" multi-reg call, parent not asg\n"); + return false; + } + + GenTree* const parentNodeLHS = parentNode->gtGetOp1(); + + if (!parentNodeLHS->OperIs(GT_LCL_VAR)) + { + JITDUMP(" multi-reg call, parent not asg(lcl, ...)\n"); + return false; + } + + GenTreeLclVar* const parentNodeLHSLocal = parentNodeLHS->AsLclVar(); + + unsigned const lhsLclNum = parentNodeLHSLocal->GetLclNum(); + LclVarDsc* const lhsVarDsc = lvaGetDesc(lhsLclNum); + + JITDUMP(" [marking V%02u as multi-reg-ret]", lhsLclNum); + lhsVarDsc->lvIsMultiRegRet = true; + parentNodeLHSLocal->SetMultiReg(); + } + + // If a method returns a multi-reg type, only forward sub locals, + // and ensure the local and operand have the required markup. + // + // (see eg impFixupStructReturnType) + // + if (compMethodReturnsMultiRegRetType() && fsv.GetParentNode()->OperIs(GT_RETURN)) + { + if (!fwdSubNode->OperIs(GT_LCL_VAR)) + { + JITDUMP(" parent is return, fwd sub node is not lcl var\n"); + return false; + } + + GenTreeLclVar* const fwdSubNodeLocal = fwdSubNode->AsLclVar(); + + unsigned const fwdLclNum = fwdSubNodeLocal->GetLclNum(); + LclVarDsc* const fwdVarDsc = lvaGetDesc(fwdLclNum); + + JITDUMP(" [marking V%02u as multi-reg-ret]", fwdLclNum); + fwdVarDsc->lvIsMultiRegRet = true; + fwdSubNodeLocal->SetMultiReg(); + fwdSubNode->gtFlags |= GTF_DONT_CSE; + } + // Optimization: // // If we are about to substitute GT_OBJ, see if we can simplify it first. @@ -370,7 +436,7 @@ bool Compiler::fgForwardSub(Statement* stmt) GenTree* const optTree = fgMorphTryFoldObjAsLclVar(fwdSubNode->AsObj(), destroyNodes); if (optTree != nullptr) { - JITDUMP(" -- folding OBJ(ADDR(LCL...)) -- "); + JITDUMP(" [folding OBJ(ADDR(LCL...))]"); fwdSubNode = optTree; } } From 721938e7dd769d18a5ace659e1d0809a43175588 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 13 Jan 2022 14:35:02 -0800 Subject: [PATCH 03/27] reorder checks --- src/coreclr/jit/forwardsub.cpp | 42 ++++++++++++++++------------------ 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp index 5d0fd905b77a5..ceeba5aae76ad 100644 --- a/src/coreclr/jit/forwardsub.cpp +++ b/src/coreclr/jit/forwardsub.cpp @@ -350,6 +350,24 @@ bool Compiler::fgForwardSub(Statement* stmt) } } + // Optimization: + // + // If we are about to substitute GT_OBJ, see if we can simplify it first. + // Not doing so can lead to regressions... + // + // Hold off on doing this for call args for now (per issue #51569). + // + if (fwdSubNode->OperIs(GT_OBJ) && !fsv.IsCallArg()) + { + const bool destroyNodes = false; + GenTree* const optTree = fgMorphTryFoldObjAsLclVar(fwdSubNode->AsObj(), destroyNodes); + if (optTree != nullptr) + { + JITDUMP(" [folding OBJ(ADDR(LCL...))]"); + fwdSubNode = optTree; + } + } + // Quirks: // // We may sometimes lose a simd type handle. Avoid substituting if so. @@ -366,10 +384,8 @@ bool Compiler::fgForwardSub(Statement* stmt) // There are implicit assumptions downstream on where/how multi-reg ops // can appear. // - // Eg if this is a multi-reg call, parent node must be GT_ASG and the - // local must be specially marked. - // - // Defer forwarding these for now. + // Eg if fwdSubNode is a multi-reg call, parent node must be GT_ASG and the + // local being defined must be specially marked up. // if (fwdSubNode->IsMultiRegCall()) { @@ -423,24 +439,6 @@ bool Compiler::fgForwardSub(Statement* stmt) fwdSubNode->gtFlags |= GTF_DONT_CSE; } - // Optimization: - // - // If we are about to substitute GT_OBJ, see if we can simplify it first. - // Not doing so can lead to regressions... - // - // Hold off on doing this for call args for now (per issue #51569). - // - if (fwdSubNode->OperIs(GT_OBJ) && !fsv.IsCallArg()) - { - const bool destroyNodes = false; - GenTree* const optTree = fgMorphTryFoldObjAsLclVar(fwdSubNode->AsObj(), destroyNodes); - if (optTree != nullptr) - { - JITDUMP(" [folding OBJ(ADDR(LCL...))]"); - fwdSubNode = optTree; - } - } - // Looks good, forward sub! // GenTree** use = fsv.GetUse(); From 6e4a5ad2ce10d1ad259acc32205397d3634506cb Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 13 Jan 2022 15:07:10 -0800 Subject: [PATCH 04/27] longs on x86 are special --- src/coreclr/jit/forwardsub.cpp | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp index ceeba5aae76ad..8d73394d85994 100644 --- a/src/coreclr/jit/forwardsub.cpp +++ b/src/coreclr/jit/forwardsub.cpp @@ -405,6 +405,14 @@ bool Compiler::fgForwardSub(Statement* stmt) return false; } +#if defined(TARGET_X86) + if (fwdSubNode->TypeGet() == TYP_LONG) + { + JITDUMP(" TYP_LONG fwd sub node, x86\n"); + return false; + } +#endif // defined(TARGET_X86) + GenTreeLclVar* const parentNodeLHSLocal = parentNodeLHS->AsLclVar(); unsigned const lhsLclNum = parentNodeLHSLocal->GetLclNum(); @@ -428,6 +436,14 @@ bool Compiler::fgForwardSub(Statement* stmt) return false; } +#if defined(TARGET_X86) + if (fwdSubNode->TypeGet() == TYP_LONG) + { + JITDUMP(" TYP_LONG fwd sub node, x86\n"); + return false; + } +#endif // defined(TARGET_X86) + GenTreeLclVar* const fwdSubNodeLocal = fwdSubNode->AsLclVar(); unsigned const fwdLclNum = fwdSubNodeLocal->GetLclNum(); @@ -436,7 +452,7 @@ bool Compiler::fgForwardSub(Statement* stmt) JITDUMP(" [marking V%02u as multi-reg-ret]", fwdLclNum); fwdVarDsc->lvIsMultiRegRet = true; fwdSubNodeLocal->SetMultiReg(); - fwdSubNode->gtFlags |= GTF_DONT_CSE; + fwdSubNodeLocal->gtFlags |= GTF_DONT_CSE; } // Looks good, forward sub! From f4599a95f242b3c7042f59afdc1aad2327727f67 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 13 Jan 2022 17:30:33 -0800 Subject: [PATCH 05/27] fix arm; don't forward sub no return calls --- src/coreclr/jit/forwardsub.cpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp index 8d73394d85994..51140322f9105 100644 --- a/src/coreclr/jit/forwardsub.cpp +++ b/src/coreclr/jit/forwardsub.cpp @@ -290,12 +290,20 @@ bool Compiler::fgForwardSub(Statement* stmt) // Can't substitute GT_CATCH_ARG. // Can't substitute GT_LCLHEAP. // + // Don't substitute a no return call (trips up morph in some cases). + // if (fwdSubNode->OperIs(GT_QMARK, GT_CATCH_ARG, GT_LCLHEAP)) { JITDUMP(" node to sub is qmark, catch arg, or lcl heap\n"); return false; } + if (fwdSubNode->IsCall() && fwdSubNode->AsCall()->IsNoReturn()) + { + JITDUMP(" node to sub is no return call\n"); + return false; + } + // Bail if sub node has embedded assignment. // if ((fwdSubNode->gtFlags & GTF_ASG) != 0) @@ -405,13 +413,13 @@ bool Compiler::fgForwardSub(Statement* stmt) return false; } -#if defined(TARGET_X86) +#if defined(TARGET_X86) || defined(TARGET_ARM) if (fwdSubNode->TypeGet() == TYP_LONG) { - JITDUMP(" TYP_LONG fwd sub node, x86\n"); + JITDUMP(" TYP_LONG fwd sub node, target is x86/arm\n"); return false; } -#endif // defined(TARGET_X86) +#endif // defined(TARGET_X86) || defined(TARGET_ARM) GenTreeLclVar* const parentNodeLHSLocal = parentNodeLHS->AsLclVar(); @@ -436,13 +444,13 @@ bool Compiler::fgForwardSub(Statement* stmt) return false; } -#if defined(TARGET_X86) +#if defined(TARGET_X86) || defined(TARGET_ARM) if (fwdSubNode->TypeGet() == TYP_LONG) { - JITDUMP(" TYP_LONG fwd sub node, x86\n"); + JITDUMP(" TYP_LONG fwd sub node, target is x86/arm\n"); return false; } -#endif // defined(TARGET_X86) +#endif // defined(TARGET_X86) || defined(TARGET_ARM) GenTreeLclVar* const fwdSubNodeLocal = fwdSubNode->AsLclVar(); From 81e9ec2c78c4f87319340c5c932d506138e6c85c Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 13 Jan 2022 17:55:29 -0800 Subject: [PATCH 06/27] add extra check for address exposed uses --- src/coreclr/jit/forwardsub.cpp | 59 ++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp index 51140322f9105..05e0d3e41236f 100644 --- a/src/coreclr/jit/forwardsub.cpp +++ b/src/coreclr/jit/forwardsub.cpp @@ -182,6 +182,50 @@ class ForwardSubVisitor final : public GenTreeVisitor unsigned m_accumulatedFlags; }; +//------------------------------------------------------------------------ +// EffectsVisitor: tree visitor to compute missing effects of a tree. +// +class EffectsVisitor final : public GenTreeVisitor +{ +public: + enum + { + DoPostOrder = true, + UseExecutionOrder = true + }; + + EffectsVisitor(Compiler* compiler) : GenTreeVisitor(compiler), m_flags(0) + { + } + + Compiler::fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) + { + GenTree* const node = *use; + m_flags |= node->gtFlags & GTF_ALL_EFFECT; + + if (node->OperIs(GT_LCL_VAR)) + { + unsigned const lclNum = node->AsLclVarCommon()->GetLclNum(); + LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); + + if (varDsc->IsAddressExposed()) + { + m_flags |= GTF_GLOB_REF; + } + } + + return fgWalkResult::WALK_CONTINUE; + } + + unsigned GetFlags() + { + return m_flags; + } + +private: + unsigned m_flags; +}; + //------------------------------------------------------------------------ // fgForwardSub(Statement) -- forward sub this statement's computation // to the next statement, if legal and profitable @@ -338,6 +382,21 @@ bool Compiler::fgForwardSub(Statement* stmt) return false; } + // If we're relying on purity of fwdSubNode for legality of forward sub, + // do some extra checks for global uses that might not be reflected in the flags. + // + if (fwdSubNodeInvariant && ((fsv.GetFlags() & (GTF_CALL | GTF_ASG)) != 0)) + { + EffectsVisitor ev(this); + ev.WalkTree(&fwdSubNode, nullptr); + + if ((ev.GetFlags() & GTF_GLOB_REF) != 0) + { + JITDUMP(" potentially interacting effects from address-exposed locals in node to sub\n"); + return false; + } + } + // Finally, profitability checks. // // These conditions can be checked earlier in the final version to save some throughput. From 70195af33ae901d2f6b6281748956dfcf717f31b Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 13 Jan 2022 18:30:20 -0800 Subject: [PATCH 07/27] add extra globals check, add disable switch --- src/coreclr/jit/forwardsub.cpp | 20 +++++++++++++++----- src/coreclr/jit/jitconfigvalues.h | 1 + 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp index 05e0d3e41236f..daa4b5f475701 100644 --- a/src/coreclr/jit/forwardsub.cpp +++ b/src/coreclr/jit/forwardsub.cpp @@ -18,11 +18,21 @@ PhaseStatus Compiler::fgForwardSub() { bool changed = false; - for (BasicBlock* const block : Blocks()) + bool enabled = true; + +#if defined(DEBUG) + enabled = JitConfig.JitNoForwardSub() == 0; +#endif + + if (enabled) { - JITDUMP("\n\n===> " FMT_BB "\n", block->bbNum); - changed |= fgForwardSub(block); + for (BasicBlock* const block : Blocks()) + { + JITDUMP("\n\n===> " FMT_BB "\n", block->bbNum); + changed |= fgForwardSub(block); + } } + return changed ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } @@ -370,7 +380,7 @@ bool Compiler::fgForwardSub(Statement* stmt) // // or, // - // if the next tree can't change the value of fwdSubNode or be adversly impacted by fwdSubNode effects + // if the next tree can't change the value of fwdSubNode or be impacted by fwdSubNode effects // const bool fwdSubNodeInvariant = ((fwdSubNode->gtFlags & GTF_ALL_EFFECT) == 0); const bool nextTreeIsPureUpToUse = ((fsv.GetFlags() & (GTF_EXCEPT | GTF_GLOB_REF | GTF_CALL)) == 0); @@ -392,7 +402,7 @@ bool Compiler::fgForwardSub(Statement* stmt) if ((ev.GetFlags() & GTF_GLOB_REF) != 0) { - JITDUMP(" potentially interacting effects from address-exposed locals in node to sub\n"); + JITDUMP(" potentially interacting effects (AX locals)\n"); return false; } } diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 73b71a07a8c49..af2a89d7fceb6 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -118,6 +118,7 @@ CONFIG_INTEGER(JitNoCSE2, W("JitNoCSE2"), 0) CONFIG_INTEGER(JitNoForceFallback, W("JitNoForceFallback"), 0) // Set to non-zero to prevent NOWAY assert testing. // Overrides COMPlus_JitForceFallback and JIT stress // flags. +CONFIG_INTEGER(JitNoForwardSub, W("JitNoForwardSub"), 0) // Disables forward sub CONFIG_INTEGER(JitNoHoist, W("JitNoHoist"), 0) CONFIG_INTEGER(JitNoInline, W("JitNoInline"), 0) // Disables inlining of all methods CONFIG_INTEGER(JitNoMemoryBarriers, W("JitNoMemoryBarriers"), 0) // If 1, don't generate memory barriers From 0c4ec67b90bf66ad376fb147d3e9aa38c0d00969 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 19 Jan 2022 17:11:27 -0800 Subject: [PATCH 08/27] properly count implicit promoted field uses --- src/coreclr/jit/lclmorph.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index ec662935d5e79..d898a2b025c68 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -470,11 +470,23 @@ class LocalAddressVisitor final : public GenTreeVisitor if (varDsc->lvIsStructField) { - // Promoted field, increase counter for the parent lclVar. + // Promoted field, increase count for the parent lclVar. + // assert(!m_compiler->lvaIsImplicitByRefLocal(lclNum)); unsigned parentLclNum = varDsc->lvParentLcl; UpdateEarlyRefCount(parentLclNum); } + + if (varDsc->lvPromoted) + { + // Promoted struct, increase count for each promoted field. + // + for (unsigned childLclNum = varDsc->lvFieldLclStart; + childLclNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++childLclNum) + { + UpdateEarlyRefCount(childLclNum); + } + } } PushValue(node); From 2e6bd0b6d62eae3748bb71b2fb4178c12631a35b Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 21 Jan 2022 13:06:38 -0800 Subject: [PATCH 09/27] look for exposed uses in target statement too --- src/coreclr/jit/forwardsub.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp index daa4b5f475701..fdeaa041f19e6 100644 --- a/src/coreclr/jit/forwardsub.cpp +++ b/src/coreclr/jit/forwardsub.cpp @@ -145,6 +145,15 @@ class ForwardSubVisitor final : public GenTreeVisitor m_parentNode = parent; } } + + // Uses of address-exposed locals are modelled as global refs. + // + LclVarDsc* const varDsc = m_compiler->lvaGetDesc(lclNum); + + if (varDsc->IsAddressExposed()) + { + m_accumulatedFlags |= GTF_GLOB_REF; + } } m_accumulatedFlags |= (node->gtFlags & GTF_GLOB_EFFECT); @@ -215,8 +224,8 @@ class EffectsVisitor final : public GenTreeVisitor if (node->OperIs(GT_LCL_VAR)) { - unsigned const lclNum = node->AsLclVarCommon()->GetLclNum(); - LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); + unsigned const lclNum = node->AsLclVarCommon()->GetLclNum(); + LclVarDsc* const varDsc = m_compiler->lvaGetDesc(lclNum); if (varDsc->IsAddressExposed()) { From ae37a9696c4e109973edbbb4442f94b013be0383 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 21 Jan 2022 13:30:56 -0800 Subject: [PATCH 10/27] fix bad merge --- src/coreclr/jit/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index e64d27f3de4ee..1c44d8072c828 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6423,7 +6423,7 @@ class Compiler bool fgMorphCanUseLclFldForCopy(unsigned lclNum1, unsigned lclNum2); GenTreeLclVar* fgMorphTryFoldObjAsLclVar(GenTreeObj* obj, bool destroyNodes = true); - GenTree* fgMorphCommutative(GenTreeOp* tree); + GenTreeOp* fgMorphCommutative(GenTreeOp* tree); GenTree* fgMorphCastedBitwiseOp(GenTreeOp* tree); GenTree* fgMorphReduceAddOps(GenTree* tree); From cd92c727d2ed0daab4f0284a8565a54052ac4633 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 21 Jan 2022 13:45:19 -0800 Subject: [PATCH 11/27] update debuginfo test --- src/tests/JIT/Directed/debugging/debuginfo/tests.il | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/tests/JIT/Directed/debugging/debuginfo/tests.il b/src/tests/JIT/Directed/debugging/debuginfo/tests.il index 9f6dbfa8a5821..7175b596692d2 100644 --- a/src/tests/JIT/Directed/debugging/debuginfo/tests.il +++ b/src/tests/JIT/Directed/debugging/debuginfo/tests.il @@ -53,11 +53,13 @@ IL_000c: ret } + // fwdSub moves second call under the return tree + // .method public hidebysig static int32 TestLateFailingInlineCandidate() cil managed { .custom instance void [attribute]ExpectedILMappings::.ctor() = { property int32[] Debug = int32[2]( 0 6 ) - property int32[] Opts = int32[2]( 0 6 ) + property int32[] Opts = int32[2]( 0 0xE ) } .maxstack 2 .locals init (int32 V_0) From 7b0688759daaf7e9116f496d7fe6fd77e316f750 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sat, 22 Jan 2022 09:42:16 -0800 Subject: [PATCH 12/27] be more cautious about struct fwd sub --- src/coreclr/jit/forwardsub.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp index fdeaa041f19e6..109f5358d72a0 100644 --- a/src/coreclr/jit/forwardsub.cpp +++ b/src/coreclr/jit/forwardsub.cpp @@ -456,15 +456,18 @@ bool Compiler::fgForwardSub(Statement* stmt) // Quirks: // - // We may sometimes lose a simd type handle. Avoid substituting if so. + // We may sometimes lose or change a type handle. Avoid substituting if so. // - if (varTypeIsSIMD(fwdSubNode)) + if (gtGetStructHandleIfPresent(fwdSubNode) != gtGetStructHandleIfPresent(fsv.GetNode())) { - if (gtGetStructHandleIfPresent(fwdSubNode) != gtGetStructHandleIfPresent(fsv.GetNode())) - { - JITDUMP(" would lose or gain struct handle\n"); - return false; - } + JITDUMP(" would change struct handle (substitution)\n"); + return false; + } + + if (gtGetStructHandleIfPresent(fwdSubNode) != gtGetStructHandleIfPresent(lhsNode)) + { + JITDUMP(" would change struct handle (assignment)\n"); + return false; } // There are implicit assumptions downstream on where/how multi-reg ops From f415fe5e05165b388940bacba89493cff98b3c99 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sun, 23 Jan 2022 09:02:14 -0800 Subject: [PATCH 13/27] avoid if initial assign has mismatched type --- src/coreclr/jit/forwardsub.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp index 109f5358d72a0..d0d03830b4b5e 100644 --- a/src/coreclr/jit/forwardsub.cpp +++ b/src/coreclr/jit/forwardsub.cpp @@ -376,10 +376,17 @@ bool Compiler::fgForwardSub(Statement* stmt) } // Bail if types disagree. + // Might be able to tolerate these by retyping. // if (fsv.GetNode()->TypeGet() != fwdSubNode->TypeGet()) { - JITDUMP(" mismatched types\n"); + JITDUMP(" mismatched types (substitution)\n"); + return false; + } + + if (lhsNode->TypeGet() != fwdSubNode->TypeGet()) + { + JITDUMP(" mismatched types (assignment)\n"); return false; } From 727037e3b0e520e79c23ef1a0927021a534520b3 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sun, 23 Jan 2022 18:29:23 -0800 Subject: [PATCH 14/27] complexity limit; move tree to sub checks earlier --- src/coreclr/jit/forwardsub.cpp | 107 +++++++++++++++++++-------------- 1 file changed, 62 insertions(+), 45 deletions(-) diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp index d0d03830b4b5e..d079ccac8c78e 100644 --- a/src/coreclr/jit/forwardsub.cpp +++ b/src/coreclr/jit/forwardsub.cpp @@ -313,41 +313,13 @@ bool Compiler::fgForwardSub(Statement* stmt) return false; } - // Local seems suitable. See if the next statement - // contains the one and only use. - // - Statement* const nextStmt = stmt->GetNextStmt(); - - // We often see stale flags, eg call flags after inlining. - // Try and clean these up. - // - gtUpdateStmtSideEffects(nextStmt); - gtUpdateStmtSideEffects(stmt); - - // Scan for the (single) use. - // - ForwardSubVisitor fsv(this, lclNum); - fsv.WalkTree(nextStmt->GetRootNodePointer(), nullptr); - - assert(fsv.GetCount() <= 1); - - if ((fsv.GetCount() == 0) || (fsv.GetNode() == nullptr)) - { - JITDUMP(" no next stmt use\n"); - return false; - } - - JITDUMP(" [%06u] is only use of [%06u] (V%02u) ", dspTreeID(fsv.GetNode()), dspTreeID(lhsNode), lclNum); - - // Next statement seems suitable. - // See if we can forward sub without changing semantics. + // Check the tree to substitute. // // We could just extract the value portion and forward sub that, // but cleanup would be more complicated. // - GenTree* const rhsNode = rootNode->gtGetOp2(); - GenTree* const nextRootNode = nextStmt->GetRootNode(); - GenTree* fwdSubNode = rhsNode; + GenTree* const rhsNode = rootNode->gtGetOp2(); + GenTree* fwdSubNode = rhsNode; // Can't substitute a qmark (unless the use is RHS of an assign... could check for this) // Can't substitute GT_CATCH_ARG. @@ -357,13 +329,13 @@ bool Compiler::fgForwardSub(Statement* stmt) // if (fwdSubNode->OperIs(GT_QMARK, GT_CATCH_ARG, GT_LCLHEAP)) { - JITDUMP(" node to sub is qmark, catch arg, or lcl heap\n"); + JITDUMP(" tree to sub is qmark, catch arg, or lcl heap\n"); return false; } if (fwdSubNode->IsCall() && fwdSubNode->AsCall()->IsNoReturn()) { - JITDUMP(" node to sub is no return call\n"); + JITDUMP(" tree to sub is no return call\n"); return false; } @@ -371,22 +343,73 @@ bool Compiler::fgForwardSub(Statement* stmt) // if ((fwdSubNode->gtFlags & GTF_ASG) != 0) { - JITDUMP(" node to sub has effects\n"); + JITDUMP(" tree to sub has effects\n"); return false; } - // Bail if types disagree. + // Bail if sub node has mismatched types. // Might be able to tolerate these by retyping. // - if (fsv.GetNode()->TypeGet() != fwdSubNode->TypeGet()) + if (lhsNode->TypeGet() != fwdSubNode->TypeGet()) { - JITDUMP(" mismatched types (substitution)\n"); + JITDUMP(" mismatched types (assignment)\n"); return false; } - if (lhsNode->TypeGet() != fwdSubNode->TypeGet()) + if (gtGetStructHandleIfPresent(fwdSubNode) != gtGetStructHandleIfPresent(lhsNode)) { - JITDUMP(" mismatched types (assignment)\n"); + JITDUMP(" would change struct handle (assignment)\n"); + return false; + } + + // Don't fwd sub overly large trees. + // Size limit here is ad-hoc. Need to tune. + // + unsigned const nodeLimit = 16; + + if (gtComplexityExceeds(&fwdSubNode, nodeLimit)) + { + JITDUMP(" tree to sub has more than %u nodes\n", nodeLimit); + return false; + } + + // Local and tree to substitute seem suitable. + // See if the next statement contains the one and only use. + // + Statement* const nextStmt = stmt->GetNextStmt(); + + // We often see stale flags, eg call flags after inlining. + // Try and clean these up. + // + gtUpdateStmtSideEffects(nextStmt); + gtUpdateStmtSideEffects(stmt); + + // Scan for the (single) use. + // + ForwardSubVisitor fsv(this, lclNum); + fsv.WalkTree(nextStmt->GetRootNodePointer(), nullptr); + + assert(fsv.GetCount() <= 1); + + if ((fsv.GetCount() == 0) || (fsv.GetNode() == nullptr)) + { + JITDUMP(" no next stmt use\n"); + return false; + } + + JITDUMP(" [%06u] is only use of [%06u] (V%02u) ", dspTreeID(fsv.GetNode()), dspTreeID(lhsNode), lclNum); + + // Next statement seems suitable. + // See if we can forward sub without changing semantics. + // + GenTree* const nextRootNode = nextStmt->GetRootNode(); + + // Bail if types disagree. + // Might be able to tolerate these by retyping. + // + if (fsv.GetNode()->TypeGet() != fwdSubNode->TypeGet()) + { + JITDUMP(" mismatched types (substitution)\n"); return false; } @@ -471,12 +494,6 @@ bool Compiler::fgForwardSub(Statement* stmt) return false; } - if (gtGetStructHandleIfPresent(fwdSubNode) != gtGetStructHandleIfPresent(lhsNode)) - { - JITDUMP(" would change struct handle (assignment)\n"); - return false; - } - // There are implicit assumptions downstream on where/how multi-reg ops // can appear. // From 6003ea9bae7fca8cfe88a47bf50b9c4c1f73ff9a Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Mon, 24 Jan 2022 09:19:47 -0800 Subject: [PATCH 15/27] fix crossgen2 simd issue --- src/coreclr/jit/forwardsub.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp index d079ccac8c78e..13883832fe39d 100644 --- a/src/coreclr/jit/forwardsub.cpp +++ b/src/coreclr/jit/forwardsub.cpp @@ -494,6 +494,17 @@ bool Compiler::fgForwardSub(Statement* stmt) return false; } +#ifdef FEATURE_SIMD + // Don't forward sub a SIMD call under a HW intrinsic node. + // LowerCallStruct is not prepared for this. + // + if (fwdSubNode->IsCall() && varTypeIsSIMD(fwdSubNode->TypeGet()) && fsv.GetParentNode()->OperIs(GT_HWINTRINSIC)) + { + JITDUMP(" simd returning call; hw intrinsic\n"); + return false; + } +#endif // FEATURE_SIMD + // There are implicit assumptions downstream on where/how multi-reg ops // can appear. // From 52ca482be2ad893d9079d48513ad402b94a1750b Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Mon, 24 Jan 2022 11:43:30 -0800 Subject: [PATCH 16/27] make sure we don't lose multireg uses --- src/coreclr/jit/forwardsub.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp index 13883832fe39d..46c3ae3f0f32a 100644 --- a/src/coreclr/jit/forwardsub.cpp +++ b/src/coreclr/jit/forwardsub.cpp @@ -362,6 +362,14 @@ bool Compiler::fgForwardSub(Statement* stmt) return false; } + // If lhs is mulit-reg, rhs must be too. + // + if (lhsNode->IsMultiRegNode() && !fwdSubNode->IsMultiRegNode()) + { + JITDUMP(" would change multi-reg (assignment)\n"); + return false; + } + // Don't fwd sub overly large trees. // Size limit here is ad-hoc. Need to tune. // @@ -579,6 +587,14 @@ bool Compiler::fgForwardSub(Statement* stmt) fwdSubNodeLocal->gtFlags |= GTF_DONT_CSE; } + // If the use is a multi-reg arg, don't forward sub non-locals. + // + if (fsv.GetNode()->IsMultiRegNode() && !fwdSubNode->IsMultiRegNode()) + { + JITDUMP(" would change multi-reg (substitution)\n"); + return false; + } + // Looks good, forward sub! // GenTree** use = fsv.GetUse(); From 44ac7d17be1ec4df81b0ca539e7f3840e7927ea0 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 25 Jan 2022 11:15:22 -0800 Subject: [PATCH 17/27] avoid call under call sub for now --- src/coreclr/jit/forwardsub.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp index 46c3ae3f0f32a..8894657272264 100644 --- a/src/coreclr/jit/forwardsub.cpp +++ b/src/coreclr/jit/forwardsub.cpp @@ -595,6 +595,16 @@ bool Compiler::fgForwardSub(Statement* stmt) return false; } + // If we're substituting a call into a call arg use, we may run afoul of + // call operand reordering in morph, where a call can move past an + // (unmarked) aliased local ref. + // + if (fwdSubNode->IsCall() && fsv.IsCallArg()) + { + JITDUMP(" tree to sub is call, use is call arg\n"); + return false; + } + // Looks good, forward sub! // GenTree** use = fsv.GetUse(); From 7abf2fb595dbcf1a5dc979388e25c3eb22023286 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 26 Jan 2022 14:30:02 -0800 Subject: [PATCH 18/27] size limit for next stmt too --- src/coreclr/jit/forwardsub.cpp | 53 +++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp index 8894657272264..e67271b9cc36c 100644 --- a/src/coreclr/jit/forwardsub.cpp +++ b/src/coreclr/jit/forwardsub.cpp @@ -83,7 +83,8 @@ bool Compiler::fgForwardSub(BasicBlock* block) //------------------------------------------------------------------------ // ForwardSubVisitor: tree visitor to locate uses of a local in a tree // -// Also computes the set of side effects that happen "before" the use. +// Also computes the set of side effects that happen "before" the use, +// and counts the size of the tree. // class ForwardSubVisitor final : public GenTreeVisitor { @@ -101,14 +102,16 @@ class ForwardSubVisitor final : public GenTreeVisitor , m_node(nullptr) , m_parentNode(nullptr) , m_lclNum(lclNum) - , m_count(0) + , m_useCount(0) , m_useFlags(0) , m_accumulatedFlags(0) + , m_treeSize(0) { } Compiler::fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) { + m_treeSize++; GenTree* const node = *use; if (node->OperIs(GT_LCL_VAR)) @@ -117,7 +120,7 @@ class ForwardSubVisitor final : public GenTreeVisitor if (lclNum == m_lclNum) { - m_count++; + m_useCount++; // Screen out contextual "uses" // @@ -161,44 +164,50 @@ class ForwardSubVisitor final : public GenTreeVisitor return fgWalkResult::WALK_CONTINUE; } - int GetCount() + unsigned GetUseCount() const { - return m_count; + return m_useCount; } - GenTree* GetNode() + GenTree* GetNode() const { return m_node; } - GenTree** GetUse() + GenTree** GetUse() const { return m_use; } - GenTree* GetParentNode() + GenTree* GetParentNode() const { return m_parentNode; } - unsigned GetFlags() + unsigned GetFlags() const { return m_useFlags; } - bool IsCallArg() + bool IsCallArg() const { return m_parentNode->IsCall(); } + unsigned GetComplexity() const + { + return m_treeSize; + } + private: GenTree** m_use; GenTree* m_node; GenTree* m_parentNode; unsigned m_lclNum; - int m_count; + unsigned m_useCount; unsigned m_useFlags; unsigned m_accumulatedFlags; + unsigned m_treeSize; }; //------------------------------------------------------------------------ @@ -373,6 +382,8 @@ bool Compiler::fgForwardSub(Statement* stmt) // Don't fwd sub overly large trees. // Size limit here is ad-hoc. Need to tune. // + // Consider instead using the height of the fwdSubNode. + // unsigned const nodeLimit = 16; if (gtComplexityExceeds(&fwdSubNode, nodeLimit)) @@ -397,9 +408,12 @@ bool Compiler::fgForwardSub(Statement* stmt) ForwardSubVisitor fsv(this, lclNum); fsv.WalkTree(nextStmt->GetRootNodePointer(), nullptr); - assert(fsv.GetCount() <= 1); + // LclMorph (via RCS_Early) said there was just one use. + // It had better have gotten this right. + // + assert(fsv.GetUseCount() <= 1); - if ((fsv.GetCount() == 0) || (fsv.GetNode() == nullptr)) + if ((fsv.GetUseCount() == 0) || (fsv.GetNode() == nullptr)) { JITDUMP(" no next stmt use\n"); return false; @@ -407,6 +421,19 @@ bool Compiler::fgForwardSub(Statement* stmt) JITDUMP(" [%06u] is only use of [%06u] (V%02u) ", dspTreeID(fsv.GetNode()), dspTreeID(lhsNode), lclNum); + // If next statement already has a large tree, hold off + // on making it even larger. + // + // We use total node count. Consider instead using the depth of the use and the + // height of the fwdSubNode. + // + unsigned const nextTreeLimit = 200; + if ((fsv.GetComplexity() > nextTreeLimit) && gtComplexityExceeds(&fwdSubNode, 1)) + { + JITDUMP(" next stmt tree is too large (%u)\n", fsv.GetComplexity()); + return false; + } + // Next statement seems suitable. // See if we can forward sub without changing semantics. // From 985bb7bc3487c1b1f43592f67f613a9d2dbaf638 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 28 Jan 2022 14:27:42 -0800 Subject: [PATCH 19/27] JIT: fix x86 codegen issue We were not calling `genProduceReg` in one case, leading to a missing def and liveness assert. Normally this would have been a temp assign followed by a copy to a local; evidently we don't track liveness for temps and so don't notice the missing def, and the subsequent copy defs the local and all seems well. Forward sub gets rid of the copy and exposes the issue under jit stress. --- src/coreclr/jit/codegenxarch.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 7da0bd7285625..cfc97dc6b1370 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -1972,6 +1972,7 @@ void CodeGen::genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode) inst_Mov(TYP_FLOAT, tempXmm, reg1, /* canSkip */ false); GetEmitter()->emitIns_SIMD_R_R_R(INS_punpckldq, size, targetReg, targetReg, tempXmm); } + genProduceReg(lclNode); } #elif defined(TARGET_AMD64) assert(!TargetOS::IsWindows || !"Multireg store to SIMD reg not supported on Windows x64"); From d3dfc249515489b790b3b28096099029fcc43dc9 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 28 Jan 2022 16:20:09 -0800 Subject: [PATCH 20/27] review feedback --- src/coreclr/jit/compiler.cpp | 8 +- src/coreclr/jit/compiler.h | 4 +- src/coreclr/jit/compphases.h | 2 +- src/coreclr/jit/forwardsub.cpp | 160 +++++++++++++++++++++++++-------- src/coreclr/jit/gentree.cpp | 2 +- 5 files changed, 130 insertions(+), 46 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 44a622c191279..b0d9cfc0e508a 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4657,13 +4657,9 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // Figure out what locals are address-taken. // DoPhase(this, PHASE_STR_ADRLCL, &Compiler::fgMarkAddressExposedLocals); - - // Run a simple forward sub pass. + // Run a simple forward substitution pass. // - if (opts.OptimizationEnabled()) - { - DoPhase(this, PHASE_FWD_SUB, &Compiler::fgForwardSub); - } + DoPhase(this, PHASE_FWD_SUB, &Compiler::fgForwardSub); // Apply the type update to implicit byref parameters; also choose (based on address-exposed // analysis) which implicit byref promotions to keep (requires copy to initialize) or discard. diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 1c44d8072c828..e76c7a05a236a 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6562,8 +6562,8 @@ class Compiler void fgMarkAddressExposedLocals(Statement* stmt); PhaseStatus fgForwardSub(); - bool fgForwardSub(BasicBlock* block); - bool fgForwardSub(Statement* statement); + bool fgForwardSubBlock(BasicBlock* block); + bool fgForwardSubStatement(Statement* statement); static fgWalkPreFn fgUpdateSideEffectsPre; static fgWalkPostFn fgUpdateSideEffectsPost; diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index 9b03ec7984cce..5e87aedc7cf88 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -43,7 +43,7 @@ CompPhaseNameMacro(PHASE_UPDATE_FINALLY_FLAGS, "Update finally target flags", CompPhaseNameMacro(PHASE_COMPUTE_PREDS, "Compute preds", "PREDS", false, -1, false) CompPhaseNameMacro(PHASE_EARLY_UPDATE_FLOW_GRAPH,"Update flow graph early pass", "UPD-FG-E", false, -1, false) CompPhaseNameMacro(PHASE_STR_ADRLCL, "Morph - Structs/AddrExp", "MOR-STRAL",false, -1, false) -CompPhaseNameMacro(PHASE_FWD_SUB, "Forward Sub", "FWD-SUB", false, -1, false) +CompPhaseNameMacro(PHASE_FWD_SUB, "Forward Substitution", "FWD-SUB", false, -1, false) CompPhaseNameMacro(PHASE_MORPH_IMPBYREF, "Morph - ByRefs", "MOR-BYREF",false, -1, false) CompPhaseNameMacro(PHASE_PROMOTE_STRUCTS, "Morph - Promote Structs", "PROMOTER" ,false, -1, false) CompPhaseNameMacro(PHASE_MORPH_GLOBAL, "Morph - Global", "MOR-GLOB", false, -1, false) diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp index e67271b9cc36c..056d319a79d0f 100644 --- a/src/coreclr/jit/forwardsub.cpp +++ b/src/coreclr/jit/forwardsub.cpp @@ -7,50 +7,132 @@ #pragma hdrstop #endif +//------------------------------------------------------------------------ // Simple Forward Substitution +// +// This phase tries to reconnect trees that were split early on by +// phases like the importer and inlining. We run it before morph +// to provide more context for morph's tree based optimizations, and +// we run it after the local address visitor because that phase sets +// address exposure for locals and computes (early) ref counts. +// +// The general pattern we look for is +// +// Statement(n): +// GT_ASG(lcl, tree) +// Statement(n+1): +// ... use of lcl ... +// +// where those are the only appearances of lcl and lcl is not address +// exposed. +// +// The "optimization" here transforms this to +// +// ~~Statement(n)~~ (removed) +// Statement(n+1): +// ... use of tree ... +// +// As always our main concerns are throughput, legality, profitability, +// and ensuring downstream phases do not get confused. +// +// For throughput, we try and early out on illegal or unprofitable cases +// before doing the more costly bits of analysis. We only scan a limited +// amount of IR and just give up if we can't find what we are looking for. +// +// If we're successful we will backtrack a bit, to try and catch cases like +// +// Statement(n): +// lcl1 = tree1 +// Statement(n+1): +// lcl2 = tree2 +// Statement(n+2): +// use ... lcl1 ... use ... lcl2 ... +// +// If we can forward sub tree2, then the def aind use of lcl1 become +// adjacent. +// +// For legality we must show that evaluating "tree" at its new position +// can't change any observable behavior. This largely means running an +// interference analysis between tree and the portion of Statement(n+1) +// that will evaluate before "tree". This analysis is complicated by some +// missing flags on trees, in particular modelling the potential uses +// of exposed locals. We run supplementary scans looking for those. +// +// Ideally we'd update the tree with our findings, or better yet ensure +// that upstream phases didn't leave the wrong flags. +// +// For profitability we first try and avoid code growth. We do this +// by only substituting in cases where lcl has exactly one def and one use. +// This info is computed for us but the RCS_Early ref counting done during +// the immediately preceeding fgMarkAddressExposedLocals phase. +// +// Because of this, once we've substituted "tree" we know that lcl is dead +// and we can remove the assignment statement. +// +// Even with ref count screening, we don't know for sure where the +// single use of local might be, so we have to seach for it. +// +// We also take pains not to create overly large trees as the recursion +// done by morph incorporates a lot of state; deep trees may lead to +// stack overflows. +// +// There are a fair number of ad-hoc restrictions on what can be +// substituted where; these reflect various blemishes or implicit +// contracts in our IR shapes that we should either remove or mandate. +// +// Possible enhancements: +// * Allow fwd sub of "simple, cheap" trees when there's more than one use. +// * Search more widely for the use. +// * Use height/depth to avoid blowing morph's recursion, rather than tree size. +// * Sub accross a block boundary if successor block is unique, join-free, +// and in the same EH region. +// * Retrun this later, after we have built SSA, and handle single-def single-use +// from SSA perspective. +// +//------------------------------------------------------------------------ //------------------------------------------------------------------------ -// fgForwardSub: try forward substitution in this method +// fgForwardSub: run forward substitution in this method // // Returns: // suitable phase status // PhaseStatus Compiler::fgForwardSub() { - bool changed = false; - bool enabled = true; + if (!opts.OptimizationEnabled()) + { + return PhaseStatus::MODIFIED_NOTHING; + } #if defined(DEBUG) - enabled = JitConfig.JitNoForwardSub() == 0; + if (JitConfig.JitNoForwardSub() > 0) + { + return PhaseStatus::MODIFIED_NOTHING; + } #endif - if (enabled) + bool changed = false; + + for (BasicBlock* const block : Blocks()) { - for (BasicBlock* const block : Blocks()) - { - JITDUMP("\n\n===> " FMT_BB "\n", block->bbNum); - changed |= fgForwardSub(block); - } + JITDUMP("\n\n===> " FMT_BB "\n", block->bbNum); + changed |= fgForwardSubBlock(block); } return changed ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } //------------------------------------------------------------------------ -// fgForwardSub(BasicBlock) -- try forward substitution in this block +// fgForwardSubBlock: run forward substitution in this block // // Arguments: // block -- block to process // // Returns: -// true if any forward substitution took place +// true if any IR was modified // -bool Compiler::fgForwardSub(BasicBlock* block) +bool Compiler::fgForwardSubBlock(BasicBlock* block) { - // Look for top-level ASG where the value defined is - // a local that's not address-exposed, and the local has - // exactly one use. - Statement* stmt = block->firstStmt(); Statement* lastStmt = block->lastStmt(); bool changed = false; @@ -59,7 +141,7 @@ bool Compiler::fgForwardSub(BasicBlock* block) { Statement* const prevStmt = stmt->GetPrevStmt(); Statement* const nextStmt = stmt->GetNextStmt(); - bool const substituted = fgForwardSub(stmt); + bool const substituted = fgForwardSubStatement(stmt); if (substituted) { @@ -67,12 +149,18 @@ bool Compiler::fgForwardSub(BasicBlock* block) changed = true; } + // Try backtracking if we substituted. + // if (substituted && (prevStmt != lastStmt) && prevStmt->GetRootNode()->OperIs(GT_ASG)) { + // Yep, bactrack. + // stmt = prevStmt; } else { + // Move on to the next. + // stmt = nextStmt; } } @@ -103,8 +191,8 @@ class ForwardSubVisitor final : public GenTreeVisitor , m_parentNode(nullptr) , m_lclNum(lclNum) , m_useCount(0) - , m_useFlags(0) - , m_accumulatedFlags(0) + , m_useFlags(GTF_EMPTY) + , m_accumulatedFlags(GTF_EMPTY) , m_treeSize(0) { } @@ -184,7 +272,7 @@ class ForwardSubVisitor final : public GenTreeVisitor return m_parentNode; } - unsigned GetFlags() const + GenTreeFlags GetFlags() const { return m_useFlags; } @@ -200,14 +288,14 @@ class ForwardSubVisitor final : public GenTreeVisitor } private: - GenTree** m_use; - GenTree* m_node; - GenTree* m_parentNode; - unsigned m_lclNum; - unsigned m_useCount; - unsigned m_useFlags; - unsigned m_accumulatedFlags; - unsigned m_treeSize; + GenTree** m_use; + GenTree* m_node; + GenTree* m_parentNode; + unsigned m_lclNum; + unsigned m_useCount; + GenTreeFlags m_useFlags; + GenTreeFlags m_accumulatedFlags; + unsigned m_treeSize; }; //------------------------------------------------------------------------ @@ -222,7 +310,7 @@ class EffectsVisitor final : public GenTreeVisitor UseExecutionOrder = true }; - EffectsVisitor(Compiler* compiler) : GenTreeVisitor(compiler), m_flags(0) + EffectsVisitor(Compiler* compiler) : GenTreeVisitor(compiler), m_flags(GTF_EMPTY) { } @@ -245,18 +333,18 @@ class EffectsVisitor final : public GenTreeVisitor return fgWalkResult::WALK_CONTINUE; } - unsigned GetFlags() + GenTreeFlags GetFlags() { return m_flags; } private: - unsigned m_flags; + GenTreeFlags m_flags; }; //------------------------------------------------------------------------ -// fgForwardSub(Statement) -- forward sub this statement's computation -// to the next statement, if legal and profitable +// fgForwardSubStatement: forward substitute this statement's +// computation to the next statement, if legal and profitable // // arguments: // stmt - statement in question @@ -265,7 +353,7 @@ class EffectsVisitor final : public GenTreeVisitor // true if statement computation was forwarded. // caller is responsible for removing the now-dead statement. // -bool Compiler::fgForwardSub(Statement* stmt) +bool Compiler::fgForwardSubStatement(Statement* stmt) { // Is this tree a def of a single use, unaliased local? // @@ -344,7 +432,7 @@ bool Compiler::fgForwardSub(Statement* stmt) if (fwdSubNode->IsCall() && fwdSubNode->AsCall()->IsNoReturn()) { - JITDUMP(" tree to sub is no return call\n"); + JITDUMP(" tree to sub is a 'no return' call\n"); return false; } diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 21d11db2cb26e..2d2af1f902a54 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -11759,7 +11759,7 @@ GenTree* Compiler::gtFoldExprCompare(GenTree* tree) } /* Currently we can only fold when the two subtrees exactly match */ - /* ORDER_SIDEFF here covers volatile subtrees */ + /* GTF_ORDER_SIDEFF here covers volatile subtrees */ if ((tree->gtFlags & (GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF)) || GenTree::Compare(op1, op2, true) == false) { From 79d2255aae883b1804de84ba6203290f0469a9e5 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 28 Jan 2022 19:39:51 -0800 Subject: [PATCH 21/27] Apply suggestions from code review Thanks for spotting those typos. Co-authored-by: Bruce Forstall --- src/coreclr/jit/forwardsub.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp index 056d319a79d0f..d42db86a5919f 100644 --- a/src/coreclr/jit/forwardsub.cpp +++ b/src/coreclr/jit/forwardsub.cpp @@ -48,7 +48,7 @@ // Statement(n+2): // use ... lcl1 ... use ... lcl2 ... // -// If we can forward sub tree2, then the def aind use of lcl1 become +// If we can forward sub tree2, then the def and use of lcl1 become // adjacent. // // For legality we must show that evaluating "tree" at its new position @@ -84,9 +84,9 @@ // * Allow fwd sub of "simple, cheap" trees when there's more than one use. // * Search more widely for the use. // * Use height/depth to avoid blowing morph's recursion, rather than tree size. -// * Sub accross a block boundary if successor block is unique, join-free, +// * Sub across a block boundary if successor block is unique, join-free, // and in the same EH region. -// * Retrun this later, after we have built SSA, and handle single-def single-use +// * Rerun this later, after we have built SSA, and handle single-def single-use // from SSA perspective. // //------------------------------------------------------------------------ From b3d52fe65014542333cd5235a4dd7a8396970307 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sat, 29 Jan 2022 16:12:31 -0800 Subject: [PATCH 22/27] fix issue with subbing past normalize on store assignment --- src/coreclr/jit/forwardsub.cpp | 12 +++ .../JIT/opt/ForwardSub/normalizeOnStore.cs | 30 ++++++ .../JIT/opt/ForwardSub/normalizeOnStore.il | 95 +++++++++++++++++++ .../opt/ForwardSub/normalizeOnStore.ilproj | 13 +++ 4 files changed, 150 insertions(+) create mode 100644 src/tests/JIT/opt/ForwardSub/normalizeOnStore.cs create mode 100644 src/tests/JIT/opt/ForwardSub/normalizeOnStore.il create mode 100644 src/tests/JIT/opt/ForwardSub/normalizeOnStore.ilproj diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp index d42db86a5919f..7d795cdf47d6a 100644 --- a/src/coreclr/jit/forwardsub.cpp +++ b/src/coreclr/jit/forwardsub.cpp @@ -557,6 +557,9 @@ bool Compiler::fgForwardSubStatement(Statement* stmt) // If we're relying on purity of fwdSubNode for legality of forward sub, // do some extra checks for global uses that might not be reflected in the flags. // + // TODO: remove this once we can trust upstream phases and/or gtUpdateStmtSideEffects + // to set GTF_GLOB_REF properly. + // if (fwdSubNodeInvariant && ((fsv.GetFlags() & (GTF_CALL | GTF_ASG)) != 0)) { EffectsVisitor ev(this); @@ -720,6 +723,15 @@ bool Compiler::fgForwardSubStatement(Statement* stmt) return false; } + // If the intial has truncate on store semantics, we need to replicate + // that here with a cast. + // + if (varDsc->lvNormalizeOnStore() && fgCastNeeded(fwdSubNode, varDsc->TypeGet())) + { + JITDUMP(" [adding cast for normalize on store]"); + fwdSubNode = gtNewCastNode(TYP_INT, fwdSubNode, false, varDsc->TypeGet()); + } + // Looks good, forward sub! // GenTree** use = fsv.GetUse(); diff --git a/src/tests/JIT/opt/ForwardSub/normalizeOnStore.cs b/src/tests/JIT/opt/ForwardSub/normalizeOnStore.cs new file mode 100644 index 0000000000000..193fc3bdddbba --- /dev/null +++ b/src/tests/JIT/opt/ForwardSub/normalizeOnStore.cs @@ -0,0 +1,30 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// Note: This test file is the source of the normalizeOnStore.il file. It requires +// InlineIL.Fody to compile. It is not used as anything but a reference of that +// IL file. + +using System; +using System.Runtime.CompilerServices; +using InlineIL; + +class ForwardSubTests +{ + [MethodImpl(MethodImplOptions.NoInlining)] + private static int Problem(int a) + { + IL.DeclareLocals(new LocalVar(typeof(short))); + + IL.Emit.Ldarg(0); + IL.Emit.Stloc(0); + + IL.Emit.Ldloc(0); + return IL.Return(); + } + + public static int Main() + { + return Problem(0xF0000 + 100); + } +} diff --git a/src/tests/JIT/opt/ForwardSub/normalizeOnStore.il b/src/tests/JIT/opt/ForwardSub/normalizeOnStore.il new file mode 100644 index 0000000000000..f3dccc43bcca7 --- /dev/null +++ b/src/tests/JIT/opt/ForwardSub/normalizeOnStore.il @@ -0,0 +1,95 @@ + +// Microsoft (R) .NET Framework IL Disassembler. Version 4.8.3928.0 +// Copyright (c) Microsoft Corporation. All rights reserved. + + + +// Metadata version: v4.0.30319 +.assembly extern System.Runtime +{ + .publickeytoken = (B0 3F 5F 7F 11 D5 0A 3A ) // .?_....: + .ver 6:0:0:0 +} +.assembly store +{ + .custom instance void [System.Runtime]System.Runtime.CompilerServices.CompilationRelaxationsAttribute::.ctor(int32) = ( 01 00 08 00 00 00 00 00 ) + .custom instance void [System.Runtime]System.Runtime.CompilerServices.RuntimeCompatibilityAttribute::.ctor() = ( 01 00 01 00 54 02 16 57 72 61 70 4E 6F 6E 45 78 // ....T..WrapNonEx + 63 65 70 74 69 6F 6E 54 68 72 6F 77 73 01 ) // ceptionThrows. + + // --- The following custom attribute is added automatically, do not uncomment ------- + // .custom instance void [System.Runtime]System.Diagnostics.DebuggableAttribute::.ctor(valuetype [System.Runtime]System.Diagnostics.DebuggableAttribute/DebuggingModes) = ( 01 00 02 00 00 00 00 00 ) + + .custom instance void [System.Runtime]System.Runtime.Versioning.TargetFrameworkAttribute::.ctor(string) = ( 01 00 18 2E 4E 45 54 43 6F 72 65 41 70 70 2C 56 // ....NETCoreApp,V + 65 72 73 69 6F 6E 3D 76 36 2E 30 01 00 54 0E 14 // ersion=v6.0..T.. + 46 72 61 6D 65 77 6F 72 6B 44 69 73 70 6C 61 79 // FrameworkDisplay + 4E 61 6D 65 00 ) // Name. + .custom instance void [System.Runtime]System.Reflection.AssemblyCompanyAttribute::.ctor(string) = ( 01 00 05 73 74 6F 72 65 00 00 ) // ...store.. + .custom instance void [System.Runtime]System.Reflection.AssemblyConfigurationAttribute::.ctor(string) = ( 01 00 07 52 65 6C 65 61 73 65 00 00 ) // ...Release.. + .custom instance void [System.Runtime]System.Reflection.AssemblyFileVersionAttribute::.ctor(string) = ( 01 00 07 31 2E 30 2E 30 2E 30 00 00 ) // ...1.0.0.0.. + .custom instance void [System.Runtime]System.Reflection.AssemblyInformationalVersionAttribute::.ctor(string) = ( 01 00 05 31 2E 30 2E 30 00 00 ) // ...1.0.0.. + .custom instance void [System.Runtime]System.Reflection.AssemblyProductAttribute::.ctor(string) = ( 01 00 05 73 74 6F 72 65 00 00 ) // ...store.. + .custom instance void [System.Runtime]System.Reflection.AssemblyTitleAttribute::.ctor(string) = ( 01 00 05 73 74 6F 72 65 00 00 ) // ...store.. + .hash algorithm 0x00008004 + .ver 1:0:0:0 +} +.module store.dll +// MVID: {A3A2457C-4D44-404C-9480-A353C3DB86A4} +.imagebase 0x00400000 +.file alignment 0x00000200 +.stackreserve 0x00100000 +.subsystem 0x0003 // WINDOWS_CUI +.corflags 0x00000001 // ILONLY +// Image base: 0x06D00000 + + +// =============== CLASS MEMBERS DECLARATION =================== + +.class private auto ansi beforefieldinit ForwardSubTests + extends [System.Runtime]System.Object +{ + .method private hidebysig static int32 + Problem(int32 a) cil managed noinlining + { + // Code size 4 (0x4) + .maxstack 1 + .locals init (int16 V_0) + IL_0000: ldarg.0 + IL_0001: stloc.0 + IL_0002: ldloc.0 + IL_0003: ret + } // end of method ForwardSubTests::Problem + + .method public hidebysig static int32 Main() cil managed + { + .entrypoint + // Code size 11 (0xb) + .maxstack 8 + IL_0000: ldc.i4 0xf0064 + IL_0005: call int32 ForwardSubTests::Problem(int32) + IL_000a: ret + } // end of method ForwardSubTests::Main + + .method public hidebysig specialname rtspecialname + instance void .ctor() cil managed + { + // Code size 7 (0x7) + .maxstack 8 + IL_0000: ldarg.0 + IL_0001: call instance void [System.Runtime]System.Object::.ctor() + IL_0006: ret + } // end of method ForwardSubTests::.ctor + +} // end of class ForwardSubTests + +.class private auto ansi store_ProcessedByFody + extends [System.Runtime]System.Object +{ + .field static assembly literal string FodyVersion = "6.6.0.0" + .field static assembly literal string InlineIL = "1.7.1.0" +} // end of class store_ProcessedByFody + + +// ============================================================= + +// *********** DISASSEMBLY COMPLETE *********************** +// WARNING: Created Win32 resource file normalizeOnStore.res diff --git a/src/tests/JIT/opt/ForwardSub/normalizeOnStore.ilproj b/src/tests/JIT/opt/ForwardSub/normalizeOnStore.ilproj new file mode 100644 index 0000000000000..477cccfe1def0 --- /dev/null +++ b/src/tests/JIT/opt/ForwardSub/normalizeOnStore.ilproj @@ -0,0 +1,13 @@ + + + Exe + 1 + + + None + True + + + + + From f1509ceb8634c9aa234bbe82737dd921dd431a99 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Mon, 31 Jan 2022 18:06:00 -0800 Subject: [PATCH 23/27] clean up nullcheck of new helper --- src/coreclr/jit/morph.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index b882b03375c55..2a8134e2f37c8 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -11540,6 +11540,23 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) case GT_PUTARG_TYPE: return fgMorphTree(tree->AsUnOp()->gtGetOp1()); + case GT_NULLCHECK: + { + op1 = tree->AsUnOp()->gtGetOp1(); + if (op1->IsCall()) + { + GenTreeCall* const call = op1->AsCall(); + if (call->IsHelperCall() && s_helperCallProperties.NonNullReturn(eeGetHelperNum(call->gtCallMethHnd))) + { + JITDUMP("\nNULLCHECK on [%06u] will always succeed\n", dspTreeID(call)); + + // TODO: Can we also remove the call? + // + return fgMorphTree(call); + } + } + } + default: break; } From 6cb1c9cf23c213ab0a1761503cd56eaf6db72187 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Mon, 31 Jan 2022 18:33:14 -0800 Subject: [PATCH 24/27] fix build break --- src/coreclr/jit/morph.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 2a8134e2f37c8..ded011d2043ca 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -11556,6 +11556,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) } } } + break; default: break; From a9599d3b80747ebd85cda65b7ee7d74e912afdf7 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Mon, 31 Jan 2022 19:38:07 -0800 Subject: [PATCH 25/27] revise check in gtFoldExprCompare to avoid a few regressions --- src/coreclr/jit/gentree.cpp | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 63ad12ba553b2..9d6b36a154f75 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -11686,12 +11686,38 @@ GenTree* Compiler::gtFoldExprCompare(GenTree* tree) return tree; } - /* Currently we can only fold when the two subtrees exactly match */ - /* GTF_ORDER_SIDEFF here covers volatile subtrees */ + // Currently we can only fold when the two subtrees exactly match + // and everything is side effect free. + // + if (((tree->gtFlags & GTF_SIDE_EFFECT) != 0) || !GenTree::Compare(op1, op2, true)) + { + // No folding. + // + return tree; + } - if ((tree->gtFlags & (GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF)) || GenTree::Compare(op1, op2, true) == false) + // GTF_ORDER_SIDEEFF here may indicate volatile subtrees. + // Or it may indicate a non-null assertion prop into an indir subtree. + // + // Check the operands. + // + if ((tree->gtFlags & GTF_ORDER_SIDEEFF) != 0) { - return tree; /* return unfolded tree */ + if ((op1->OperIs(GT_IND, GT_BLK, GT_OBJ) && ((op1->gtFlags & GTF_IND_VOLATILE) != 0)) || + (op1->OperIs(GT_FIELD, GT_CLS_VAR) && ((op1->gtFlags & GTF_FLD_VOLATILE) != 0))) + { + // No folding. + // + return tree; + } + + if ((op2->OperIs(GT_IND, GT_BLK, GT_OBJ) && ((op2->gtFlags & GTF_IND_VOLATILE) != 0)) || + (op2->OperIs(GT_FIELD, GT_CLS_VAR) && ((op2->gtFlags & GTF_FLD_VOLATILE) != 0))) + { + // No folding. + // + return tree; + } } GenTree* cons; From ba9a4443343bf423d5f1b386d1b618be4f000193 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 1 Feb 2022 09:08:44 -0800 Subject: [PATCH 26/27] refine gtFoldExprCompare --- src/coreclr/jit/gentree.cpp | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 9d6b36a154f75..449e00f046bb0 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -11703,16 +11703,12 @@ GenTree* Compiler::gtFoldExprCompare(GenTree* tree) // if ((tree->gtFlags & GTF_ORDER_SIDEEFF) != 0) { - if ((op1->OperIs(GT_IND, GT_BLK, GT_OBJ) && ((op1->gtFlags & GTF_IND_VOLATILE) != 0)) || - (op1->OperIs(GT_FIELD, GT_CLS_VAR) && ((op1->gtFlags & GTF_FLD_VOLATILE) != 0))) - { - // No folding. - // - return tree; - } + // If op1 is "volatle" and op2 is not, we can still fold. + // + const bool op1MayBeVolatile = (op1->gtFlags & GTF_ORDER_SIDEEFF) != 0; + const bool op2MayBeVolatile = (op2->gtFlags & GTF_ORDER_SIDEEFF) != 0; - if ((op2->OperIs(GT_IND, GT_BLK, GT_OBJ) && ((op2->gtFlags & GTF_IND_VOLATILE) != 0)) || - (op2->OperIs(GT_FIELD, GT_CLS_VAR) && ((op2->gtFlags & GTF_FLD_VOLATILE) != 0))) + if (!op1MayBeVolatile || op2MayBeVolatile) { // No folding. // From 44aa5802870859c162a24c25b1e994840af2b245 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 2 Feb 2022 19:56:36 -0800 Subject: [PATCH 27/27] add more thorough checking for call arg interference --- src/coreclr/jit/forwardsub.cpp | 61 +++++++++++++++--- .../JIT/opt/ForwardSub/callArgInterference.cs | 63 +++++++++++++++++++ .../opt/ForwardSub/callArgInterference.csproj | 10 +++ .../JIT/opt/ForwardSub/normalizeOnStore.cs | 2 +- .../JIT/opt/ForwardSub/normalizeOnStore.il | 2 +- 5 files changed, 126 insertions(+), 12 deletions(-) create mode 100644 src/tests/JIT/opt/ForwardSub/callArgInterference.cs create mode 100644 src/tests/JIT/opt/ForwardSub/callArgInterference.csproj diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp index 7d795cdf47d6a..77f0c6d99f436 100644 --- a/src/coreclr/jit/forwardsub.cpp +++ b/src/coreclr/jit/forwardsub.cpp @@ -88,6 +88,11 @@ // and in the same EH region. // * Rerun this later, after we have built SSA, and handle single-def single-use // from SSA perspective. +// * Fix issue in morph that can unsoundly reorder call args, and remove +// extra effects computation from ForwardSubVisitor. +// * We can be more aggressive with GTF_IND_INVARIANT / GTF_IND_NONFAULTING +// nodes--even though they may be marked GTF_GLOB_REF, they can be freely +// reordered. See if this offers any benefit. // //------------------------------------------------------------------------ @@ -174,6 +179,9 @@ bool Compiler::fgForwardSubBlock(BasicBlock* block) // Also computes the set of side effects that happen "before" the use, // and counts the size of the tree. // +// Effects accounting is complicated by missing flags and by the need +// to avoid introducing interfering call args. +// class ForwardSubVisitor final : public GenTreeVisitor { public: @@ -189,6 +197,7 @@ class ForwardSubVisitor final : public GenTreeVisitor , m_use(nullptr) , m_node(nullptr) , m_parentNode(nullptr) + , m_callAncestor(nullptr) , m_lclNum(lclNum) , m_useCount(0) , m_useFlags(GTF_EMPTY) @@ -234,6 +243,19 @@ class ForwardSubVisitor final : public GenTreeVisitor m_use = use; m_useFlags = m_accumulatedFlags; m_parentNode = parent; + + // If this use contributes to a call arg we need to + // remember the call and handle it specially when we + // see it later in the postorder walk. + // + for (int i = 1; i < m_ancestors.Height(); i++) + { + if (m_ancestors.Top(i)->IsCall()) + { + m_callAncestor = m_ancestors.Top(i)->AsCall(); + break; + } + } } } @@ -247,6 +269,34 @@ class ForwardSubVisitor final : public GenTreeVisitor } } + // Is this the use's call ancestor? + // + if ((m_callAncestor != nullptr) && (node == m_callAncestor)) + { + // To be conservative and avoid issues with morph + // reordering call args, we merge in effects of all args + // to this call. + // + // Remove this if/when morph's arg sorting is fixed. + // + GenTreeFlags oldUseFlags = m_useFlags; + + if (m_callAncestor->gtCallThisArg != nullptr) + { + m_useFlags |= (m_callAncestor->gtCallThisArg->GetNode()->gtFlags & GTF_GLOB_EFFECT); + } + + for (GenTreeCall::Use& use : m_callAncestor->Args()) + { + m_useFlags |= (use.GetNode()->gtFlags & GTF_GLOB_EFFECT); + } + + if (oldUseFlags != m_useFlags) + { + JITDUMP(" [added other call arg use flags: 0x%x]", m_useFlags & ~oldUseFlags); + } + } + m_accumulatedFlags |= (node->gtFlags & GTF_GLOB_EFFECT); return fgWalkResult::WALK_CONTINUE; @@ -291,6 +341,7 @@ class ForwardSubVisitor final : public GenTreeVisitor GenTree** m_use; GenTree* m_node; GenTree* m_parentNode; + GenTreeCall* m_callAncestor; unsigned m_lclNum; unsigned m_useCount; GenTreeFlags m_useFlags; @@ -713,16 +764,6 @@ bool Compiler::fgForwardSubStatement(Statement* stmt) return false; } - // If we're substituting a call into a call arg use, we may run afoul of - // call operand reordering in morph, where a call can move past an - // (unmarked) aliased local ref. - // - if (fwdSubNode->IsCall() && fsv.IsCallArg()) - { - JITDUMP(" tree to sub is call, use is call arg\n"); - return false; - } - // If the intial has truncate on store semantics, we need to replicate // that here with a cast. // diff --git a/src/tests/JIT/opt/ForwardSub/callArgInterference.cs b/src/tests/JIT/opt/ForwardSub/callArgInterference.cs new file mode 100644 index 0000000000000..60c8a908e85bc --- /dev/null +++ b/src/tests/JIT/opt/ForwardSub/callArgInterference.cs @@ -0,0 +1,63 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// Generated by Fuzzlyn v1.5 on 2022-02-02 18:45:57 +// Run on X86 Windows +// Seed: 2552302029114048182 +// Reduced from 26.6 KiB to 0.5 KiB in 00:00:36 +// Debug: Outputs -1 +// Release: Outputs -2 +using System; +using System.Runtime.CompilerServices; + +public interface I1 +{ +} + +public struct S0 : I1 +{ + public static bool result; + public short F5; + public S0(short f5): this() + { + F5 = f5; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public void M4(short arg0, I1 arg1) + { + result = arg0 == -1; + } + + public I1 M7() + { + ref S0 var0 = ref this; + var0 = new S0(1); + return this; + } +} + +public class ForwardSubCallArgInterference +{ + public static IRT s_rt; + public static int Main() + { + s_rt = new C(); + S0 vr0 = default(S0); + new S0(0).M4((short)~vr0.F5, vr0.M7()); + return S0.result ? 100 : -1; + } +} + +public interface IRT +{ + void WriteLine(T val); +} + +public class C : IRT +{ + public void WriteLine(T val) + { + Console.WriteLine(val); + } +} diff --git a/src/tests/JIT/opt/ForwardSub/callArgInterference.csproj b/src/tests/JIT/opt/ForwardSub/callArgInterference.csproj new file mode 100644 index 0000000000000..19781e26c20d8 --- /dev/null +++ b/src/tests/JIT/opt/ForwardSub/callArgInterference.csproj @@ -0,0 +1,10 @@ + + + Exe + + True + + + + + diff --git a/src/tests/JIT/opt/ForwardSub/normalizeOnStore.cs b/src/tests/JIT/opt/ForwardSub/normalizeOnStore.cs index 193fc3bdddbba..f952758e42e08 100644 --- a/src/tests/JIT/opt/ForwardSub/normalizeOnStore.cs +++ b/src/tests/JIT/opt/ForwardSub/normalizeOnStore.cs @@ -9,7 +9,7 @@ using System.Runtime.CompilerServices; using InlineIL; -class ForwardSubTests +class ForwardSubNormalizeOnStore { [MethodImpl(MethodImplOptions.NoInlining)] private static int Problem(int a) diff --git a/src/tests/JIT/opt/ForwardSub/normalizeOnStore.il b/src/tests/JIT/opt/ForwardSub/normalizeOnStore.il index f3dccc43bcca7..6416e7492eaea 100644 --- a/src/tests/JIT/opt/ForwardSub/normalizeOnStore.il +++ b/src/tests/JIT/opt/ForwardSub/normalizeOnStore.il @@ -44,7 +44,7 @@ // =============== CLASS MEMBERS DECLARATION =================== -.class private auto ansi beforefieldinit ForwardSubTests +.class private auto ansi beforefieldinit ForwardSubNormalizeOnStore extends [System.Runtime]System.Object { .method private hidebysig static int32