diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index c46cdd18a164e..7659fca21bb1e 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -101,6 +101,7 @@ set( JIT_SOURCES fgprofile.cpp fgstmt.cpp flowgraph.cpp + forwardsub.cpp gcdecode.cpp gcencode.cpp gcinfo.cpp diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 394aca1719aeb..68d53d7ec3b94 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4796,6 +4796,13 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_STR_ADRLCL, &Compiler::fgMarkAddressExposedLocals); + // Run a simple forward sub pass. + // + if (opts.OptimizationEnabled()) + { + DoPhase(this, PHASE_FWD_SUB, &Compiler::fgForwardSub); + } + // Apply the type update to implicit byref parameters; also choose (based on address-exposed // analysis) which implicit byref promotions to keep (requires copy to initialize) or discard. // diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index e195ba991d563..b2d3b015c1119 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6411,7 +6411,7 @@ class Compiler bool fgMorphCanUseLclFldForCopy(unsigned lclNum1, unsigned lclNum2); - GenTreeLclVar* fgMorphTryFoldObjAsLclVar(GenTreeObj* obj); + GenTreeLclVar* fgMorphTryFoldObjAsLclVar(GenTreeObj* obj, bool destroyNodes = true); GenTree* fgMorphCommutative(GenTreeOp* tree); GenTree* fgMorphCastedBitwiseOp(GenTreeOp* tree); @@ -6550,6 +6550,10 @@ class Compiler void fgMarkAddressExposedLocals(); void fgMarkAddressExposedLocals(Statement* stmt); + PhaseStatus fgForwardSub(); + bool fgForwardSub(BasicBlock* block); + bool fgForwardSub(Statement* statement); + static fgWalkPreFn fgUpdateSideEffectsPre; static fgWalkPostFn fgUpdateSideEffectsPost; diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index e5c15c1d438a6..9b03ec7984cce 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -43,6 +43,7 @@ CompPhaseNameMacro(PHASE_UPDATE_FINALLY_FLAGS, "Update finally target flags", CompPhaseNameMacro(PHASE_COMPUTE_PREDS, "Compute preds", "PREDS", false, -1, false) CompPhaseNameMacro(PHASE_EARLY_UPDATE_FLOW_GRAPH,"Update flow graph early pass", "UPD-FG-E", false, -1, false) CompPhaseNameMacro(PHASE_STR_ADRLCL, "Morph - Structs/AddrExp", "MOR-STRAL",false, -1, false) +CompPhaseNameMacro(PHASE_FWD_SUB, "Forward Sub", "FWD-SUB", false, -1, false) CompPhaseNameMacro(PHASE_MORPH_IMPBYREF, "Morph - ByRefs", "MOR-BYREF",false, -1, false) CompPhaseNameMacro(PHASE_PROMOTE_STRUCTS, "Morph - Promote Structs", "PROMOTER" ,false, -1, false) CompPhaseNameMacro(PHASE_MORPH_GLOBAL, "Morph - Global", "MOR-GLOB", false, -1, false) diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp new file mode 100644 index 0000000000000..5aadf5aa3feae --- /dev/null +++ b/src/coreclr/jit/forwardsub.cpp @@ -0,0 +1,392 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "jitpch.h" + +#ifdef _MSC_VER +#pragma hdrstop +#endif + +// Simple Forward Substitution + +//------------------------------------------------------------------------ +// fgForwardSub: try forward substitution in this method +// +// Returns: +// suitable phase status +// +PhaseStatus Compiler::fgForwardSub() +{ + bool changed = false; + for (BasicBlock* const block : Blocks()) + { + JITDUMP("\n\n===> " FMT_BB "\n", block->bbNum); + changed |= fgForwardSub(block); + } + return changed ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; +} + +//------------------------------------------------------------------------ +// fgForwardSub(BasicBlock) -- try forward substitution in this block +// +// Arguments: +// block -- block to process +// +// Returns: +// true if any forward substitution took place +// +bool Compiler::fgForwardSub(BasicBlock* block) +{ + // Look for top-level ASG where the value defined is + // a local that's not address-exposed, and the local has + // exactly one use. + + Statement* stmt = block->firstStmt(); + Statement* lastStmt = block->lastStmt(); + bool changed = false; + + while (stmt != lastStmt) + { + Statement* const prevStmt = stmt->GetPrevStmt(); + Statement* const nextStmt = stmt->GetNextStmt(); + bool const substituted = fgForwardSub(stmt); + + if (substituted) + { + fgRemoveStmt(block, stmt); + changed = true; + } + + if (substituted && (prevStmt != lastStmt) && prevStmt->GetRootNode()->OperIs(GT_ASG)) + { + stmt = prevStmt; + } + else + { + stmt = nextStmt; + } + } + + return changed; +} + +//------------------------------------------------------------------------ +// ForwardSubVisitor: tree visitor to locate uses of a local in a tree +// +// Also computes the set of side effects that happen "before" the use. +// +class ForwardSubVisitor final : public GenTreeVisitor +{ +public: + enum + { + ComputeStack = true, + DoPostOrder = true, + UseExecutionOrder = true + }; + + ForwardSubVisitor(Compiler* compiler, unsigned lclNum) + : GenTreeVisitor(compiler) + , m_lclNum(lclNum) + , m_use(nullptr) + , m_node(nullptr) + , m_count(0) + , m_useFlags(0) + , m_accumulatedFlags(0) + , m_isCallArg(false) + { + } + + Compiler::fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) + { + GenTree* const node = *use; + + if (node->OperIs(GT_LCL_VAR)) + { + unsigned const lclNum = node->AsLclVarCommon()->GetLclNum(); + + if (lclNum == m_lclNum) + { + m_count++; + + // Screen out contextual "uses" + // + GenTree* const parent = m_ancestors.Top(1); + bool const isDef = parent->OperIs(GT_ASG) && (parent->gtGetOp1() == node); + bool const isAddr = parent->OperIs(GT_ADDR); + + bool isCallTarget = false; + + // Quirk: + // + // fgGetStubAddrArg cannot handle complex trees (it calls gtClone) + // + if (parent->IsCall()) + { + GenTreeCall* const parentCall = parent->AsCall(); + isCallTarget = (parentCall->gtCallType == CT_INDIRECT) && (parentCall->gtCallAddr == node); + } + + if (!isDef && !isAddr && !isCallTarget) + { + m_node = node; + m_use = use; + m_useFlags = m_accumulatedFlags; + m_isCallArg = parent->IsCall(); + } + } + } + + m_accumulatedFlags |= (node->gtFlags & GTF_GLOB_EFFECT); + + return fgWalkResult::WALK_CONTINUE; + } + + int GetCount() + { + return m_count; + } + + GenTree* GetNode() + { + return m_node; + } + + GenTree** GetUse() + { + return m_use; + } + + unsigned GetFlags() + { + return m_useFlags; + } + + bool IsCallArg() + { + return m_isCallArg; + } + +private: + unsigned m_lclNum; + GenTree** m_use; + GenTree* m_node; + int m_count; + unsigned m_useFlags; + unsigned m_accumulatedFlags; + bool m_isCallArg; +}; + +//------------------------------------------------------------------------ +// fgForwardSub(Statement) -- forward sub this statement's computation +// to the next statement, if legal and profitable +// +// arguments: +// stmt - statement in question +// +// Returns: +// true if statement computation was forwarded. +// caller is responsible for removing the now-dead statement. +// +bool Compiler::fgForwardSub(Statement* stmt) +{ + // Is this tree a def of a single use, unaliased local? + // + GenTree* const rootNode = stmt->GetRootNode(); + + if (!rootNode->OperIs(GT_ASG)) + { + return false; + } + + GenTree* const lhsNode = rootNode->gtGetOp1(); + + if (!lhsNode->OperIs(GT_LCL_VAR)) + { + return false; + } + + JITDUMP(" [%06u]: ", dspTreeID(rootNode)) + + unsigned const lclNum = lhsNode->AsLclVarCommon()->GetLclNum(); + LclVarDsc* const varDsc = lvaGetDesc(lclNum); + + // Leave pinned locals alone. + // This is just a perf opt -- we shouldn't find any uses. + // + if (varDsc->lvPinned) + { + JITDUMP(" pinned local\n"); + return false; + } + + // Only fwd sub if we expect no code duplication + // We expect one def and one use. + // + if (varDsc->lvRefCnt(RCS_EARLY) != 2) + { + JITDUMP(" not asg (single-use lcl)\n"); + return false; + } + + // And local is unalised + // + if (varDsc->IsAddressExposed()) + { + JITDUMP(" not asg (unaliased single-use lcl)\n"); + return false; + } + + // Could handle this case --perhaps-- but we'd want to update ref counts. + // + if (lvaIsImplicitByRefLocal(lclNum)) + { + JITDUMP(" implicit by-ref local\n"); + return false; + } + + // Local seems suitable. See if the next statement + // contains the one and only use. + // + Statement* const nextStmt = stmt->GetNextStmt(); + + // We often see stale flags, eg call flags after inlining. + // Try and clean these up. + // + gtUpdateStmtSideEffects(nextStmt); + gtUpdateStmtSideEffects(stmt); + + // Scan for the (single) use. + // + ForwardSubVisitor fsv(this, lclNum); + fsv.WalkTree(nextStmt->GetRootNodePointer(), nullptr); + + assert(fsv.GetCount() <= 1); + + if ((fsv.GetCount() == 0) || (fsv.GetNode() == nullptr)) + { + JITDUMP(" no next stmt use\n"); + return false; + } + + JITDUMP(" [%06u] is only use of [%06u] (V%02u) ", dspTreeID(fsv.GetNode()), dspTreeID(lhsNode), lclNum); + + // Next statement seems suitable. + // See if we can forward sub without changing semantics. + // + // We could just extract the value portion and forward sub that, + // but cleanup would be more complicated. + // + GenTree* const rhsNode = rootNode->gtGetOp2(); + GenTree* const nextRootNode = nextStmt->GetRootNode(); + GenTree* fwdSubNode = rhsNode; + + // Can't substitute a qmark (unless the use is RHS of an assign... could check for this) + // Can't substitute GT_CATCH_ARG. + // + if (fwdSubNode->OperIs(GT_QMARK, GT_CATCH_ARG)) + { + JITDUMP(" node to sub is qmark or catch arg\n"); + return false; + } + + // Bail if sub node has embedded assignment. + // + if ((fwdSubNode->gtFlags & GTF_ASG) != 0) + { + JITDUMP(" node to sub has effects\n"); + return false; + } + + // Bail if types disagree. + // + if (fsv.GetNode()->TypeGet() != fwdSubNode->TypeGet()) + { + JITDUMP(" mismatched types\n"); + return false; + } + + // We can forward sub if + // + // the value of the fwdSubNode can't change and its evaluation won't cause side effects, + // + // or, + // + // if the next tree can't change the value of fwdSubNode or be adversly impacted by fwdSubNode effects + // + const bool fwdSubNodeInvariant = ((fwdSubNode->gtFlags & GTF_ALL_EFFECT) == 0); + const bool nextTreeIsPureUpToUse = ((fsv.GetFlags() & (GTF_EXCEPT | GTF_GLOB_REF | GTF_CALL)) == 0); + if (!fwdSubNodeInvariant && !nextTreeIsPureUpToUse) + { + // Fwd sub may impact global values and or reorder exceptions... + // + JITDUMP(" potentially interacting effects\n"); + return false; + } + + // Finally, profitability checks. + // + // These conditions can be checked earlier in the final version to save some throughput. + // Perhaps allowing for bypass with jit stress. + // + // If fwdSubNode is an address-exposed local, forwarding it may lose optimizations. + // (maybe similar for dner?) + // + if (fwdSubNode->OperIs(GT_LCL_VAR)) + { + unsigned const fwdLclNum = fwdSubNode->AsLclVarCommon()->GetLclNum(); + LclVarDsc* const fwdVarDsc = lvaGetDesc(fwdLclNum); + + if (fwdVarDsc->IsAddressExposed()) + { + JITDUMP(" V%02u is address exposed\n", fwdLclNum); + return false; + } + } + + // Quirks: + // + // We may sometimes lose a simd type handle. Avoid substituting if so. + // + if (varTypeIsSIMD(fwdSubNode)) + { + if (gtGetStructHandleIfPresent(fwdSubNode) != gtGetStructHandleIfPresent(fsv.GetNode())) + { + JITDUMP(" would lose or gain struct handle\n"); + return false; + } + } + + // Optimization: + // + // If we are about to substitute GT_OBJ, see if we can simplify it first. + // Not doing so can lead to regressions... + // + // Hold off on doing this for call args for now (per issue #51569). + // + if (fwdSubNode->OperIs(GT_OBJ) && !fsv.IsCallArg()) + { + const bool destroyNodes = false; + GenTree* const optTree = fgMorphTryFoldObjAsLclVar(fwdSubNode->AsObj(), destroyNodes); + if (optTree != nullptr) + { + JITDUMP(" -- folding OBJ(ADDR(LCL...)) -- "); + fwdSubNode = optTree; + } + } + + // Looks good, forward sub! + // + GenTree** use = fsv.GetUse(); + *use = fwdSubNode; + + if (!fwdSubNodeInvariant) + { + gtUpdateStmtSideEffects(nextStmt); + } + + JITDUMP(" -- fwd subbing [%06u]; new next stmt is\n", dspTreeID(fwdSubNode)); + DISPSTMT(nextStmt); + + return true; +} diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 5769db69e06e3..85e4938fa468f 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -2419,8 +2419,9 @@ unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp) costEx = IND_COST_EX; costSz = 2; - GenTree* addr = hwTree->Op(1)->gtEffectiveVal(); - level = gtSetEvalOrder(addr); + GenTree* const addrNode = hwTree->Op(1); + level = gtSetEvalOrder(addrNode); + GenTree* const addr = addrNode->gtEffectiveVal(); // See if we can form a complex addressing mode. if (addr->OperIs(GT_ADD) && gtMarkAddrMode(addr, &costEx, &costSz, hwTree->TypeGet())) @@ -11764,8 +11765,9 @@ GenTree* Compiler::gtFoldExprCompare(GenTree* tree) } /* Currently we can only fold when the two subtrees exactly match */ + /* ORDER_SIDEFF here covers volatile subtrees */ - if ((tree->gtFlags & GTF_SIDE_EFFECT) || GenTree::Compare(op1, op2, true) == false) + if ((tree->gtFlags & (GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF)) || GenTree::Compare(op1, op2, true) == false) { return tree; /* return unfolded tree */ } diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 84d7965d395bf..ec662935d5e79 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -442,14 +442,15 @@ class LocalAddressVisitor final : public GenTreeVisitor #endif // DEBUG } - // Morph promoted struct fields and count implicit byref argument occurrences. + // Morph promoted struct fields and count local occurrences. + // // Also create and push the value produced by the visited node. This is done here // rather than in PostOrderVisit because it makes it easy to handle nodes with an // arbitrary number of operands - just pop values until the value corresponding // to the visited node is encountered. fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) { - GenTree* node = *use; + GenTree* const node = *use; if (node->OperIs(GT_FIELD)) { @@ -462,19 +463,17 @@ class LocalAddressVisitor final : public GenTreeVisitor if (node->OperIsLocal()) { - unsigned lclNum = node->AsLclVarCommon()->GetLclNum(); + unsigned const lclNum = node->AsLclVarCommon()->GetLclNum(); + LclVarDsc* const varDsc = m_compiler->lvaGetDesc(lclNum); + + UpdateEarlyRefCount(lclNum); - LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); if (varDsc->lvIsStructField) { // Promoted field, increase counter for the parent lclVar. assert(!m_compiler->lvaIsImplicitByRefLocal(lclNum)); unsigned parentLclNum = varDsc->lvParentLcl; - UpdateEarlyRefCountForImplicitByRef(parentLclNum); - } - else - { - UpdateEarlyRefCountForImplicitByRef(lclNum); + UpdateEarlyRefCount(parentLclNum); } } @@ -1183,7 +1182,7 @@ class LocalAddressVisitor final : public GenTreeVisitor } //------------------------------------------------------------------------ - // UpdateEarlyRefCountForImplicitByRef: updates the ref count for implicit byref params. + // UpdateEarlyRefCount: updates the ref count for locals // // Arguments: // lclNum - the local number to update the count for. @@ -1192,19 +1191,24 @@ class LocalAddressVisitor final : public GenTreeVisitor // fgMakeOutgoingStructArgCopy checks the ref counts for implicit byref params when it decides // if it's legal to elide certain copies of them; // fgRetypeImplicitByRefArgs checks the ref counts when it decides to undo promotions. + // fgForwardSub uses ref counts to decide when to forward sub. // - void UpdateEarlyRefCountForImplicitByRef(unsigned lclNum) + void UpdateEarlyRefCount(unsigned lclNum) { + LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); + + // Note we don't need accurate counts when the values are large. + // + if (varDsc->lvRefCnt(RCS_EARLY) < USHRT_MAX) + { + varDsc->incLvRefCnt(1, RCS_EARLY); + } + if (!m_compiler->lvaIsImplicitByRefLocal(lclNum)) { return; } - LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); - JITDUMP("LocalAddressVisitor incrementing ref count from %d to %d for implicit by-ref V%02d\n", - varDsc->lvRefCnt(RCS_EARLY), varDsc->lvRefCnt(RCS_EARLY) + 1, lclNum); - varDsc->incLvRefCnt(1, RCS_EARLY); - // See if this struct is an argument to a call. This information is recorded // via the weighted early ref count for the local, and feeds the undo promotion // heuristic. diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 1ee03c84a398f..449e0d595088e 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -9485,6 +9485,7 @@ GenTree* Compiler::fgMorphConst(GenTree* tree) // // Arguments: // obj - the obj node. +// destroyNodes -- destroy nodes that are optimized away // // Return value: // GenTreeLclVar if the obj can be replaced by it, null otherwise. @@ -9495,7 +9496,7 @@ GenTree* Compiler::fgMorphConst(GenTree* tree) // for some platforms does not expect struct `LCL_VAR` as a source, so // it needs more work. // -GenTreeLclVar* Compiler::fgMorphTryFoldObjAsLclVar(GenTreeObj* obj) +GenTreeLclVar* Compiler::fgMorphTryFoldObjAsLclVar(GenTreeObj* obj, bool destroyNodes) { if (opts.OptimizationEnabled()) { @@ -9528,8 +9529,12 @@ GenTreeLclVar* Compiler::fgMorphTryFoldObjAsLclVar(GenTreeObj* obj) lclVar->gtFlags &= ~GTF_DONT_CSE; lclVar->gtFlags |= (obj->gtFlags & GTF_DONT_CSE); - DEBUG_DESTROY_NODE(obj); - DEBUG_DESTROY_NODE(addr); + if (destroyNodes) + { + DEBUG_DESTROY_NODE(obj); + DEBUG_DESTROY_NODE(addr); + } + return lclVar; } } diff --git a/src/coreclr/jit/phase.cpp b/src/coreclr/jit/phase.cpp index f08134bf11532..6c93c2b042f17 100644 --- a/src/coreclr/jit/phase.cpp +++ b/src/coreclr/jit/phase.cpp @@ -171,7 +171,8 @@ void Phase::PostPhase(PhaseStatus status) PHASE_MORPH_GLOBAL, PHASE_INVERT_LOOPS, PHASE_OPTIMIZE_LAYOUT, PHASE_FIND_LOOPS, PHASE_BUILD_SSA, PHASE_RATIONALIZE, - PHASE_LOWERING, PHASE_STACK_LEVEL_SETTER}; + PHASE_LOWERING, PHASE_STACK_LEVEL_SETTER, + PHASE_FWD_SUB}; if (madeChanges) {