From d78d2e7045ba9a38f9305ea6b3f1b32a1d592fa6 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 24 Feb 2021 16:44:40 -0800 Subject: [PATCH 1/7] JIT: profile updates for return merges and tail calls Stop trying to update the common return block profile data during return merging, as it is not yet known which return blocks will become tail calls. Start updating constant return block profile data during return merging as this is when we transform the flow. Update the common return block profile data during return merging in morph (adding more countss) and when creating tail calls (removing counts). Update profile consistency checker to handle switches properly and to use tolerant compares. Add extra dumping when solving for edge weights or adjusting flow edge weights to help track down where errors are coming from. Add new FMT_WT formatting string for profile weights, and start using it in fgprofile. handle constant return merges too --- src/coreclr/jit/block.h | 13 ++- src/coreclr/jit/fgprofile.cpp | 173 ++++++++++++++++++++++++---------- src/coreclr/jit/flowgraph.cpp | 42 ++++++--- src/coreclr/jit/jit.h | 4 + src/coreclr/jit/morph.cpp | 84 +++++++++++++++-- 5 files changed, 241 insertions(+), 75 deletions(-) diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h index c21131e8772ca..938b7394af14b 100644 --- a/src/coreclr/jit/block.h +++ b/src/coreclr/jit/block.h @@ -44,6 +44,9 @@ typedef BitVec_ValRet_T ASSERT_VALRET_TP; // This define is used with string concatenation to put this in printf format strings (Note that %u means unsigned int) #define FMT_BB "BB%02u" +// And this format for profile weights +#define FMT_WT "%.7f" + /***************************************************************************** * * Each basic block ends with a jump which is described as a value @@ -1291,8 +1294,14 @@ struct flowList // They return false if the newWeight is not between the current [min..max] // when slop is non-zero we allow for the case where our weights might be off by 'slop' // - bool setEdgeWeightMinChecked(BasicBlock::weight_t newWeight, BasicBlock::weight_t slop, bool* wbUsedSlop); - bool setEdgeWeightMaxChecked(BasicBlock::weight_t newWeight, BasicBlock::weight_t slop, bool* wbUsedSlop); + bool setEdgeWeightMinChecked(BasicBlock::weight_t newWeight, + BasicBlock* bDst, + BasicBlock::weight_t slop, + bool* wbUsedSlop); + bool setEdgeWeightMaxChecked(BasicBlock::weight_t newWeight, + BasicBlock* bDst, + BasicBlock::weight_t slop, + bool* wbUsedSlop); void setEdgeWeights(BasicBlock::weight_t newMinWeight, BasicBlock::weight_t newMaxWeight); flowList(BasicBlock* block, flowList* rest) diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 6481095e31f17..926ac88082d05 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -144,7 +144,8 @@ void Compiler::fgComputeProfileScale() impInlineInfo->profileScaleFactor = scale; impInlineInfo->profileScaleState = InlineInfo::ProfileScaleState::KNOWN; - JITDUMP(" call site count %f callee entry count %f scale %f\n", callSiteWeight, calleeWeight, scale); + JITDUMP(" call site count " FMT_WT " callee entry count " FMT_WT " scale " FMT_WT "\n", callSiteWeight, + calleeWeight, scale); } //------------------------------------------------------------------------ @@ -2164,8 +2165,8 @@ void EfficientEdgeCountReconstructor::Prepare() Edge* const edge = new (m_allocator) Edge(sourceBlock, targetBlock); - JITDUMP("... adding known edge " FMT_BB " -> " FMT_BB ": weight %0f\n", edge->m_sourceBlock->bbNum, - edge->m_targetBlock->bbNum, weight); + JITDUMP("... adding known edge " FMT_BB " -> " FMT_BB ": weight " FMT_WT "\n", + edge->m_sourceBlock->bbNum, edge->m_targetBlock->bbNum, weight); edge->m_weightKnown = true; edge->m_weight = weight; @@ -2238,11 +2239,11 @@ void EfficientEdgeCountReconstructor::Solve() edge->m_sourceBlock->bbNum, edge->m_targetBlock->bbNum); } assert(edge->m_weightKnown); - JITDUMP(" " FMT_BB " -> " FMT_BB " has weight %0f\n", edge->m_sourceBlock->bbNum, + JITDUMP(" " FMT_BB " -> " FMT_BB " has weight " FMT_WT "\n", edge->m_sourceBlock->bbNum, edge->m_targetBlock->bbNum, edge->m_weight); weight += edge->m_weight; } - JITDUMP(FMT_BB ": all incoming edge weights known, sum is %0f\n", block->bbNum, weight); + JITDUMP(FMT_BB ": all incoming edge weights known, sum is " FMT_WT "\n", block->bbNum, weight); weightKnown = true; } else if (info->m_outgoingUnknown == 0) @@ -2256,11 +2257,11 @@ void EfficientEdgeCountReconstructor::Solve() edge->m_sourceBlock->bbNum, edge->m_targetBlock->bbNum); } assert(edge->m_weightKnown); - JITDUMP(" " FMT_BB " -> " FMT_BB " has weight %0f\n", edge->m_sourceBlock->bbNum, + JITDUMP(" " FMT_BB " -> " FMT_BB " has weight " FMT_WT "\n", edge->m_sourceBlock->bbNum, edge->m_targetBlock->bbNum, edge->m_weight); weight += edge->m_weight; } - JITDUMP(FMT_BB ": all outgoing edge weights known, sum is %0f\n", block->bbNum, weight); + JITDUMP(FMT_BB ": all outgoing edge weights known, sum is " FMT_WT "\n", block->bbNum, weight); weightKnown = true; } @@ -2304,7 +2305,8 @@ void EfficientEdgeCountReconstructor::Solve() weight = info->m_weight - weight; JITDUMP(FMT_BB " -> " FMT_BB - ": target block weight and all other incoming edge weights known, so weight is %0f\n", + ": target block weight and all other incoming edge weights known, so weight is " FMT_WT + "\n", resolvedEdge->m_sourceBlock->bbNum, resolvedEdge->m_targetBlock->bbNum, weight); // If we arrive at a negative count for this edge, set it to zero. @@ -2350,7 +2352,8 @@ void EfficientEdgeCountReconstructor::Solve() weight = info->m_weight - weight; JITDUMP(FMT_BB " -> " FMT_BB - ": source block weight and all other outgoing edge weights known, so weight is %0f\n", + ": source block weight and all other outgoing edge weights known, so weight is " FMT_WT + "\n", resolvedEdge->m_sourceBlock->bbNum, resolvedEdge->m_targetBlock->bbNum, weight); // If we arrive at a negative count for this edge, set it to zero. @@ -2462,7 +2465,19 @@ void Compiler::fgIncorporateEdgeCounts() e.Propagate(); } -bool flowList::setEdgeWeightMinChecked(BasicBlock::weight_t newWeight, BasicBlock::weight_t slop, bool* wbUsedSlop) +//------------------------------------------------------------------------ +// setEdgeWeightMinChecked: possibly update minimum edge weight +// +// Arguments: +// newWeight - proposed new weight +// bDst - destintion block for edge +// slop - profile slush fund +// wbUsedSlop [out] - true if we tapped into the slush fund +// +bool flowList::setEdgeWeightMinChecked(BasicBlock::weight_t newWeight, + BasicBlock* bDst, + BasicBlock::weight_t slop, + bool* wbUsedSlop) { bool result = false; if ((newWeight <= flEdgeWeightMax) && (newWeight >= flEdgeWeightMin)) @@ -2533,14 +2548,34 @@ bool flowList::setEdgeWeightMinChecked(BasicBlock::weight_t newWeight, BasicBloc #if DEBUG if (result == false) { + JITDUMP("Not adjusting min weight of " FMT_BB " -> " FMT_BB "; new value " FMT_WT " not in range [" FMT_WT + ".." FMT_WT "] (+/- " FMT_WT ")\n", + getBlock()->bbNum, bDst->bbNum, newWeight, flEdgeWeightMin, flEdgeWeightMax, slop); result = false; // break here } + else + { + JITDUMP("Updated min weight of " FMT_BB " -> " FMT_BB " to [" FMT_WT ".." FMT_WT "]\n", getBlock()->bbNum, + bDst->bbNum, flEdgeWeightMin, flEdgeWeightMax); + } #endif // DEBUG return result; } -bool flowList::setEdgeWeightMaxChecked(BasicBlock::weight_t newWeight, BasicBlock::weight_t slop, bool* wbUsedSlop) +//------------------------------------------------------------------------ +// setEdgeWeightMaxChecked: possibly update maximum edge weight +// +// Arguments: +// newWeight - proposed new weight +// bDst - destination block for edge +// slop - profile slush fund +// wbUsedSlop [out] - true if we tapped into the slush fund +// +bool flowList::setEdgeWeightMaxChecked(BasicBlock::weight_t newWeight, + BasicBlock* bDst, + BasicBlock::weight_t slop, + bool* wbUsedSlop) { bool result = false; if ((newWeight >= flEdgeWeightMin) && (newWeight <= flEdgeWeightMax)) @@ -2608,8 +2643,16 @@ bool flowList::setEdgeWeightMaxChecked(BasicBlock::weight_t newWeight, BasicBloc #if DEBUG if (result == false) { + JITDUMP("Not adjusting max weight of " FMT_BB " -> " FMT_BB "; new value " FMT_WT " not in range [" FMT_WT + ".." FMT_WT "] (+/- " FMT_WT ")\n", + getBlock()->bbNum, bDst->bbNum, newWeight, flEdgeWeightMin, flEdgeWeightMax, slop); result = false; // break here } + else + { + JITDUMP("Updated max weight of " FMT_BB " -> " FMT_BB " to [" FMT_WT ".." FMT_WT "]\n", getBlock()->bbNum, + bDst->bbNum, flEdgeWeightMin, flEdgeWeightMax); + } #endif // DEBUG return result; @@ -2628,6 +2671,9 @@ void flowList::setEdgeWeights(BasicBlock::weight_t theMinWeight, BasicBlock::wei { assert(theMinWeight <= theMaxWeight); + JITDUMP("\nSetting edge weights for BB?? -> " FMT_BB " to [" FMT_WT " .. " FMT_WT "]\n", getBlock()->bbNum, + theMinWeight, theMaxWeight); + flEdgeWeightMin = theMinWeight; flEdgeWeightMax = theMaxWeight; } @@ -2862,7 +2908,7 @@ void Compiler::fgComputeCalledCount(BasicBlock::weight_t returnWeight) #if DEBUG if (verbose) { - printf("We are using the Profile Weights and fgCalledCount is %.0f.\n", fgCalledCount); + printf("We are using the Profile Weights and fgCalledCount is " FMT_WT "\n", fgCalledCount); } #endif } @@ -2893,6 +2939,8 @@ void Compiler::fgComputeEdgeWeights() unsigned numEdges = 0; unsigned iterations = 0; + JITDUMP("Initial weight assignments\n\n"); + // Now we will compute the initial flEdgeWeightMin and flEdgeWeightMax values for (bDst = fgFirstBB; bDst != nullptr; bDst = bDst->bbNext) { @@ -2933,8 +2981,8 @@ void Compiler::fgComputeEdgeWeights() case BBJ_NONE: case BBJ_CALLFINALLY: // We know the exact edge weight - assignOK &= edge->setEdgeWeightMinChecked(bSrc->bbWeight, slop, &usedSlop); - assignOK &= edge->setEdgeWeightMaxChecked(bSrc->bbWeight, slop, &usedSlop); + assignOK &= edge->setEdgeWeightMinChecked(bSrc->bbWeight, bDst, slop, &usedSlop); + assignOK &= edge->setEdgeWeightMaxChecked(bSrc->bbWeight, bDst, slop, &usedSlop); break; case BBJ_COND: @@ -2944,7 +2992,7 @@ void Compiler::fgComputeEdgeWeights() if (edge->edgeWeightMax() > bSrc->bbWeight) { // The maximum edge weight to block can't be greater than the weight of bSrc - assignOK &= edge->setEdgeWeightMaxChecked(bSrc->bbWeight, slop, &usedSlop); + assignOK &= edge->setEdgeWeightMaxChecked(bSrc->bbWeight, bDst, slop, &usedSlop); } break; @@ -2957,7 +3005,7 @@ void Compiler::fgComputeEdgeWeights() // The maximum edge weight to block can't be greater than the weight of bDst if (edge->edgeWeightMax() > bDstWeight) { - assignOK &= edge->setEdgeWeightMaxChecked(bDstWeight, slop, &usedSlop); + assignOK &= edge->setEdgeWeightMaxChecked(bDstWeight, bDst, slop, &usedSlop); } if (!assignOK) @@ -2976,11 +3024,14 @@ void Compiler::fgComputeEdgeWeights() do { + JITDUMP("\nSolver pass %u\n", iterations); + iterations++; goodEdgeCountPrevious = goodEdgeCountCurrent; goodEdgeCountCurrent = 0; hasIncompleteEdgeWeights = false; + JITDUMP("\n -- step 1 --\n"); for (bDst = fgFirstBB; bDst != nullptr; bDst = bDst->bbNext) { for (edge = bDst->bbPreds; edge != nullptr; edge = edge->flNext) @@ -2995,14 +3046,17 @@ void Compiler::fgComputeEdgeWeights() { BasicBlock::weight_t diff; flowList* otherEdge; + BasicBlock* otherDst; if (bSrc->bbNext == bDst) { - otherEdge = fgGetPredForBlock(bSrc->bbJumpDest, bSrc); + otherDst = bSrc->bbJumpDest; } else { - otherEdge = fgGetPredForBlock(bSrc->bbNext, bSrc); + otherDst = bSrc->bbNext; } + otherEdge = fgGetPredForBlock(otherDst, bSrc); + noway_assert(edge->edgeWeightMin() <= edge->edgeWeightMax()); noway_assert(otherEdge->edgeWeightMin() <= otherEdge->edgeWeightMax()); @@ -3010,24 +3064,24 @@ void Compiler::fgComputeEdgeWeights() diff = bSrc->bbWeight - (edge->edgeWeightMin() + otherEdge->edgeWeightMax()); if (diff > 0) { - assignOK &= edge->setEdgeWeightMinChecked(edge->edgeWeightMin() + diff, slop, &usedSlop); + assignOK &= edge->setEdgeWeightMinChecked(edge->edgeWeightMin() + diff, bDst, slop, &usedSlop); } else if (diff < 0) { - assignOK &= - otherEdge->setEdgeWeightMaxChecked(otherEdge->edgeWeightMax() + diff, slop, &usedSlop); + assignOK &= otherEdge->setEdgeWeightMaxChecked(otherEdge->edgeWeightMax() + diff, otherDst, + slop, &usedSlop); } // Adjust otherEdge->flEdgeWeightMin up or adjust edge->flEdgeWeightMax down diff = bSrc->bbWeight - (otherEdge->edgeWeightMin() + edge->edgeWeightMax()); if (diff > 0) { - assignOK &= - otherEdge->setEdgeWeightMinChecked(otherEdge->edgeWeightMin() + diff, slop, &usedSlop); + assignOK &= otherEdge->setEdgeWeightMinChecked(otherEdge->edgeWeightMin() + diff, otherDst, + slop, &usedSlop); } else if (diff < 0) { - assignOK &= edge->setEdgeWeightMaxChecked(edge->edgeWeightMax() + diff, slop, &usedSlop); + assignOK &= edge->setEdgeWeightMaxChecked(edge->edgeWeightMax() + diff, bDst, slop, &usedSlop); } if (!assignOK) @@ -3050,6 +3104,8 @@ void Compiler::fgComputeEdgeWeights() } } + JITDUMP("\n -- step 2 --\n"); + for (bDst = fgFirstBB; bDst != nullptr; bDst = bDst->bbNext) { BasicBlock::weight_t bDstWeight = bDst->bbWeight; @@ -3111,7 +3167,7 @@ void Compiler::fgComputeEdgeWeights() (BasicBlock::weight_t)(bDstWeight - otherMaxEdgesWeightSum); if (minWeightCalc > edge->edgeWeightMin()) { - assignOK &= edge->setEdgeWeightMinChecked(minWeightCalc, slop, &usedSlop); + assignOK &= edge->setEdgeWeightMinChecked(minWeightCalc, bDst, slop, &usedSlop); } } @@ -3122,15 +3178,15 @@ void Compiler::fgComputeEdgeWeights() (BasicBlock::weight_t)(bDstWeight - otherMinEdgesWeightSum); if (maxWeightCalc < edge->edgeWeightMax()) { - assignOK &= edge->setEdgeWeightMaxChecked(maxWeightCalc, slop, &usedSlop); + assignOK &= edge->setEdgeWeightMaxChecked(maxWeightCalc, bDst, slop, &usedSlop); } } if (!assignOK) { // Here we have inconsistent profile data - JITDUMP("Inconsistent profile data at " FMT_BB " -> " FMT_BB - ": dest weight %f, min/max into dest is %f/%f, edge %f/%f\n", + JITDUMP("Inconsistent profile data at " FMT_BB " -> " FMT_BB ": dest weight " FMT_WT + ", min/max into dest is " FMT_WT "/" FMT_WT ", edge " FMT_WT "/" FMT_WT "\n", bSrc->bbNum, bDst->bbNum, bDstWeight, minEdgeWeightSum, maxEdgeWeightSum, edge->edgeWeightMin(), edge->edgeWeightMax()); @@ -3280,10 +3336,15 @@ bool Compiler::fgProfileWeightsConsistent(BasicBlock::weight_t weight1, BasicBlo // void Compiler::fgDebugCheckProfileData() { - // We can't check before we have pred lists built. - // assert(fgComputePredsDone); + // We can't check before we have computed edge weights. + // + if (!fgEdgeWeightsComputed) + { + return; + } + JITDUMP("Checking Profile Data\n"); unsigned problemBlocks = 0; unsigned unprofiledBlocks = 0; @@ -3377,10 +3438,10 @@ void Compiler::fgDebugCheckProfileData() // if (entryProfiled && exitProfiled) { - if (entryWeight != exitWeight) + if (!fgProfileWeightsConsistent(entryWeight, exitWeight)) { problemBlocks++; - JITDUMP(" Entry %f exit %f weight mismatch\n", entryWeight, exitWeight); + JITDUMP(" Entry " FMT_WT " exit " FMT_WT " weight mismatch\n", entryWeight, exitWeight); } } @@ -3444,22 +3505,24 @@ bool Compiler::fgDebugCheckIncomingProfileData(BasicBlock* block) return true; } - if (incomingWeightMin > incomingWeightMax) + if (!fgProfileWeightsConsistent(incomingWeightMin, incomingWeightMax)) { - JITDUMP(" " FMT_BB " - incoming min %f > incoming max %f\n", block->bbNum, incomingWeightMin, - incomingWeightMax); + JITDUMP(" " FMT_BB " - incoming min " FMT_WT " inconsistent with incoming max " FMT_WT "\n", block->bbNum, + incomingWeightMin, incomingWeightMax); return false; } - if (blockWeight < incomingWeightMin) + if (!fgProfileWeightsConsistent(blockWeight, incomingWeightMin)) { - JITDUMP(" " FMT_BB " - block weight %f < incoming min %f\n", block->bbNum, blockWeight, incomingWeightMin); + JITDUMP(" " FMT_BB " - block weight " FMT_WT " inconsistent with incoming min " FMT_WT "\n", block->bbNum, + blockWeight, incomingWeightMin); return false; } - if (blockWeight > incomingWeightMax) + if (!fgProfileWeightsConsistent(blockWeight, incomingWeightMax)) { - JITDUMP(" " FMT_BB " - block weight %f > incoming max %f\n", block->bbNum, blockWeight, incomingWeightMax); + JITDUMP(" " FMT_BB " - block weight " FMT_WT " inconsistent with incoming max " FMT_WT "\n", block->bbNum, + blockWeight, incomingWeightMax); return false; } @@ -3481,7 +3544,9 @@ bool Compiler::fgDebugCheckIncomingProfileData(BasicBlock* block) // bool Compiler::fgDebugCheckOutgoingProfileData(BasicBlock* block) { - const unsigned numSuccs = block->NumSucc(); + // We want switch targets unified, but not EH edges. + // + const unsigned numSuccs = block->NumSucc(this); if (numSuccs == 0) { @@ -3490,18 +3555,24 @@ bool Compiler::fgDebugCheckOutgoingProfileData(BasicBlock* block) return true; } + // We won't check finally or filter returns (for now). + // + if ((block->bbJumpKind == BBJ_EHFINALLYRET) || (block->bbJumpKind == BBJ_EHFILTERRET)) + { + return true; + } + BasicBlock::weight_t const blockWeight = block->bbWeight; BasicBlock::weight_t outgoingWeightMin = 0; BasicBlock::weight_t outgoingWeightMax = 0; - // Walking successor edges is a bit wonky. Seems like it should be easier. - // Note this can also fail to enumerate all the edges, if we have a multigraph + // Walk successor edges and add up flow counts. // int missingEdges = 0; for (unsigned i = 0; i < numSuccs; i++) { - BasicBlock* succBlock = block->GetSucc(i); + BasicBlock* succBlock = block->GetSucc(i, this); flowList* succEdge = nullptr; for (flowList* edge = succBlock->bbPreds; edge != nullptr; edge = edge->flNext) @@ -3529,22 +3600,24 @@ bool Compiler::fgDebugCheckOutgoingProfileData(BasicBlock* block) JITDUMP(" " FMT_BB " - missing %d successor edges\n", block->bbNum, missingEdges); } - if (outgoingWeightMin > outgoingWeightMax) + if (!fgProfileWeightsConsistent(outgoingWeightMin, outgoingWeightMax)) { - JITDUMP(" " FMT_BB " - outgoing min %f > outgoing max %f\n", block->bbNum, outgoingWeightMin, - outgoingWeightMax); + JITDUMP(" " FMT_BB " - outgoing min " FMT_WT " inconsistent with outgoing max " FMT_WT "\n", block->bbNum, + outgoingWeightMin, outgoingWeightMax); return false; } - if (blockWeight < outgoingWeightMin) + if (!fgProfileWeightsConsistent(blockWeight, outgoingWeightMin)) { - JITDUMP(" " FMT_BB " - block weight %f < outgoing min %f\n", block->bbNum, blockWeight, outgoingWeightMin); + JITDUMP(" " FMT_BB " - block weight " FMT_WT " inconsistent with outgoing min " FMT_WT "\n", block->bbNum, + blockWeight, outgoingWeightMin); return false; } - if (blockWeight > outgoingWeightMax) + if (!fgProfileWeightsConsistent(blockWeight, outgoingWeightMax)) { - JITDUMP(" " FMT_BB " - block weight %f > outgoing max %f\n", block->bbNum, blockWeight, outgoingWeightMax); + JITDUMP(" " FMT_BB " - block weight " FMT_WT " inconsistent with outgoing max " FMT_WT "\n", block->bbNum, + blockWeight, outgoingWeightMax); return false; } diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index d6a9ed7642b15..d3e7edf69d7f9 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -2505,6 +2505,32 @@ class MergedReturns // merged return block are lexically forward. insertionPoints[index] = returnBlock; + + // Update profile information in the mergedReturnBlock to + // reflect the additional flow. + // + if (returnBlock->hasProfileWeight()) + { + BasicBlock::weight_t const oldWeight = + mergedReturnBlock->hasProfileWeight() ? mergedReturnBlock->bbWeight : 0.0f; + BasicBlock::weight_t const newWeight = oldWeight + returnBlock->bbWeight; + + JITDUMP("merging profile weight %.6f from " FMT_BB " to const return " FMT_BB "\n", + returnBlock->bbWeight, returnBlock->bbNum, mergedReturnBlock->bbNum); + + mergedReturnBlock->setBBProfileWeight(newWeight); + + if (newWeight > 0.0f) + { + mergedReturnBlock->bbFlags &= ~BBF_RUN_RARELY; + } + else + { + mergedReturnBlock->bbFlags |= BBF_RUN_RARELY; + } + + DISPBLOCK(mergedReturnBlock); + } } } } @@ -2512,6 +2538,8 @@ class MergedReturns if (mergedReturnBlock == nullptr) { // No constant return block for this return; use the general one. + // We defer flow update and profile update to morph. + // mergedReturnBlock = comp->genReturnBB; if (mergedReturnBlock == nullptr) { @@ -2528,20 +2556,6 @@ class MergedReturns if (returnBlock != nullptr) { - // Propagate profile weight and related annotations to the merged block. - // Return weight should never exceed entry weight, so cap it to avoid nonsensical - // hot returns in synthetic profile settings. - mergedReturnBlock->bbWeight = - min(mergedReturnBlock->bbWeight + returnBlock->bbWeight, comp->fgFirstBB->bbWeight); - if (!returnBlock->hasProfileWeight()) - { - mergedReturnBlock->bbFlags &= ~BBF_PROF_WEIGHT; - } - if (mergedReturnBlock->bbWeight > 0) - { - mergedReturnBlock->bbFlags &= ~BBF_RUN_RARELY; - } - // Update fgReturnCount to reflect or anticipate that `returnBlock` will no longer // be a return point. comp->fgReturnCount--; diff --git a/src/coreclr/jit/jit.h b/src/coreclr/jit/jit.h index c5d9774dfa639..4c0a5bf63487d 100644 --- a/src/coreclr/jit/jit.h +++ b/src/coreclr/jit/jit.h @@ -548,6 +548,9 @@ const bool dspGCtbls = true; #define DISPTREERANGE(range, t) \ if (JitTls::GetCompiler()->verbose) \ JitTls::GetCompiler()->gtDispTreeRange(range, t); +#define DISPBLOCK(b) \ + if (JitTls::GetCompiler()->verbose) \ + JitTls::GetCompiler()->fgTableDispBasicBlock(b); #define VERBOSE JitTls::GetCompiler()->verbose #else // !DEBUG #define JITDUMP(...) @@ -559,6 +562,7 @@ const bool dspGCtbls = true; #define DISPSTMT(t) #define DISPRANGE(range) #define DISPTREERANGE(range, t) +#define DISPBLOCK(b) #define VERBOSE 0 #endif // !DEBUG diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 0c07628678cdd..7da1ec015cd72 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -7637,10 +7637,58 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) } #endif - // This block is not longer any block's predecessor. If we end up - // converting this tail call to a branch, we'll add appropriate - // successor information then. - fgRemoveBlockAsPred(compCurBB); + // If this block has a flow successor, make suitable updates. + // + BasicBlock* const nextBlock = compCurBB->GetUniqueSucc(); + + if (nextBlock != nullptr) + { + // Flow no longer reaches nextBlock from here. + // + fgRemoveRefPred(nextBlock, compCurBB); + + if (compCurBB->hasProfileWeight() && nextBlock->hasProfileWeight()) + { + // Since we had linear flow we can update the next block weight. + // + // Note if this is a tail call to loop, further updates + // are needed once we install the loop edge. + // + // We should not need to handle the case where this block has already + // been "merged" to a common return. + // + assert(nextBlock->bbJumpKind == BBJ_RETURN); + + BasicBlock::weight_t const blockWeight = compCurBB->bbWeight; + BasicBlock::weight_t const nextWeight = nextBlock->bbWeight; + BasicBlock::weight_t const newWeight = nextWeight - blockWeight; + + // If the math would result in an negative weight then there's + // no local repair we can do; just leave things inconsistent. + // + if (newWeight >= 0) + { + // Note if we'd already morphed the IR in nextblock we might + // have done something profile sensitive that we should arguably reconsider. + // + JITDUMP("Reducing profile weight of " FMT_BB " from " FMT_WT " to " FMT_WT "\n", nextBlock->bbNum, + nextWeight, newWeight); + + nextBlock->setBBProfileWeight(newWeight); + + if (newWeight == 0.0f) + { + nextBlock->bbFlags |= BBF_RUN_RARELY; + } + } + else + { + JITDUMP("Not reducing profile weight of " FMT_BB " as its weight " FMT_WT + " is less than direct flow pred " FMT_BB " weight " FMT_WT "\n", + nextBlock->bbNum, nextWeight, compCurBB->bbNum, blockWeight); + } + } + } #if !FEATURE_TAILCALL_OPT_SHARED_RETURN // We enable shared-ret tail call optimization for recursive calls even if @@ -17074,13 +17122,31 @@ void Compiler::fgMergeBlockReturn(BasicBlock* block) fgRemoveStmt(block, lastStmt); } -#ifdef DEBUG - if (verbose) + + JITDUMP("\nUpdate " FMT_BB " to jump to common return block.\n", block->bbNum); + DISPBLOCK(block); + + if (block->hasProfileWeight()) { - printf("morph " FMT_BB " to point at onereturn. New block is\n", block->bbNum); - fgTableDispBasicBlock(block); + BasicBlock::weight_t const oldWeight = genReturnBB->hasProfileWeight() ? genReturnBB->bbWeight : 0.0f; + BasicBlock::weight_t const newWeight = oldWeight + block->bbWeight; + + JITDUMP("merging profile weight %.6f from " FMT_BB " to common return " FMT_BB "\n", block->bbWeight, + block->bbNum, genReturnBB->bbNum); + + genReturnBB->setBBProfileWeight(newWeight); + + if (newWeight > 0.0f) + { + genReturnBB->bbFlags &= ~BBF_RUN_RARELY; + } + else + { + genReturnBB->bbFlags |= BBF_RUN_RARELY; + } + + DISPBLOCK(genReturnBB); } -#endif } } From ccac6e496cb32011d5f942d5a0a911264ccd8135 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 25 Feb 2021 13:39:40 -0800 Subject: [PATCH 2/7] Fix non-PGO return merge weights (basically, don't set them). Update FMT_WT to %g so we don't see huge digit strings. --- src/coreclr/jit/block.h | 2 +- src/coreclr/jit/fgprofile.cpp | 2 +- src/coreclr/jit/flowgraph.cpp | 12 +----------- 3 files changed, 3 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h index 938b7394af14b..6832715137fff 100644 --- a/src/coreclr/jit/block.h +++ b/src/coreclr/jit/block.h @@ -45,7 +45,7 @@ typedef BitVec_ValRet_T ASSERT_VALRET_TP; #define FMT_BB "BB%02u" // And this format for profile weights -#define FMT_WT "%.7f" +#define FMT_WT "%.7g" /***************************************************************************** * diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 926ac88082d05..68ed800db7fdb 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -2671,7 +2671,7 @@ void flowList::setEdgeWeights(BasicBlock::weight_t theMinWeight, BasicBlock::wei { assert(theMinWeight <= theMaxWeight); - JITDUMP("\nSetting edge weights for BB?? -> " FMT_BB " to [" FMT_WT " .. " FMT_WT "]\n", getBlock()->bbNum, + JITDUMP("Setting edge weights for BB?? -> " FMT_BB " to [" FMT_WT " .. " FMT_WT "]\n", getBlock()->bbNum, theMinWeight, theMaxWeight); flEdgeWeightMin = theMinWeight; diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index d3e7edf69d7f9..80c42556da95c 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -2313,17 +2313,7 @@ class MergedReturns noway_assert(newReturnBB->bbNext == nullptr); -#ifdef DEBUG - if (comp->verbose) - { - printf("\n newReturnBB [" FMT_BB "] created\n", newReturnBB->bbNum); - } -#endif - - // We have profile weight, the weight is zero, and the block is run rarely, - // until we prove otherwise by merging other returns into this one. - newReturnBB->bbFlags |= (BBF_PROF_WEIGHT | BBF_RUN_RARELY); - newReturnBB->bbWeight = 0; + JITDUMP("\n newReturnBB [" FMT_BB "] created\n", newReturnBB->bbNum); GenTree* returnExpr; From 899dda15f66e2d0df52915d436fc67de48ec2037 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 25 Feb 2021 15:32:43 -0800 Subject: [PATCH 3/7] one more fix for common return block --- src/coreclr/jit/flowgraph.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 80c42556da95c..e9252630227a7 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -2750,13 +2750,6 @@ void Compiler::fgAddInternal() // will expect to find it. BasicBlock* mergedReturn = merger.EagerCreate(); assert(mergedReturn == genReturnBB); - // Assume weight equal to entry weight for this BB. - mergedReturn->bbFlags &= ~BBF_PROF_WEIGHT; - mergedReturn->bbWeight = fgFirstBB->bbWeight; - if (mergedReturn->bbWeight > 0) - { - mergedReturn->bbFlags &= ~BBF_RUN_RARELY; - } } else { From 33870e6c05e1d496f4f0ed2c42bf48c4a408188c Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 25 Feb 2021 19:46:38 -0800 Subject: [PATCH 4/7] Properly update edge weights when creating a preheader. Also fix dump output from `setEdgeWeights` and pass in the destination of the edge. --- src/coreclr/jit/block.h | 2 +- src/coreclr/jit/fgflow.cpp | 8 ++++---- src/coreclr/jit/fgopt.cpp | 2 +- src/coreclr/jit/fgprofile.cpp | 9 +++++---- src/coreclr/jit/morph.cpp | 10 +++++----- src/coreclr/jit/optimizer.cpp | 34 ++++++++++++++++++++++------------ 6 files changed, 38 insertions(+), 27 deletions(-) diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h index 6832715137fff..cab33119ca69a 100644 --- a/src/coreclr/jit/block.h +++ b/src/coreclr/jit/block.h @@ -1302,7 +1302,7 @@ struct flowList BasicBlock* bDst, BasicBlock::weight_t slop, bool* wbUsedSlop); - void setEdgeWeights(BasicBlock::weight_t newMinWeight, BasicBlock::weight_t newMaxWeight); + void setEdgeWeights(BasicBlock::weight_t newMinWeight, BasicBlock::weight_t newMaxWeight, BasicBlock* bDst); flowList(BasicBlock* block, flowList* rest) : flNext(rest), m_block(block), flEdgeWeightMin(0), flEdgeWeightMax(0), flDupCount(0) diff --git a/src/coreclr/jit/fgflow.cpp b/src/coreclr/jit/fgflow.cpp index 30b3cbf1a6699..8c5cd174ca153 100644 --- a/src/coreclr/jit/fgflow.cpp +++ b/src/coreclr/jit/fgflow.cpp @@ -272,7 +272,7 @@ flowList* Compiler::fgAddRefPred(BasicBlock* block, // If our caller has given us the old edge weights // then we will use them. // - flow->setEdgeWeights(oldEdge->edgeWeightMin(), oldEdge->edgeWeightMax()); + flow->setEdgeWeights(oldEdge->edgeWeightMin(), oldEdge->edgeWeightMax(), block); } else { @@ -284,17 +284,17 @@ flowList* Compiler::fgAddRefPred(BasicBlock* block, // otherwise it is the same as the edge's max weight. if (blockPred->NumSucc() > 1) { - flow->setEdgeWeights(BB_ZERO_WEIGHT, newWeightMax); + flow->setEdgeWeights(BB_ZERO_WEIGHT, newWeightMax, block); } else { - flow->setEdgeWeights(flow->edgeWeightMax(), newWeightMax); + flow->setEdgeWeights(flow->edgeWeightMax(), newWeightMax, block); } } } else { - flow->setEdgeWeights(BB_ZERO_WEIGHT, BB_MAX_WEIGHT); + flow->setEdgeWeights(BB_ZERO_WEIGHT, BB_MAX_WEIGHT, block); } } diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 5fb1255252299..bd455e19f3d57 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -2323,7 +2323,7 @@ bool Compiler::fgOptimizeBranchToEmptyUnconditional(BasicBlock* block, BasicBloc { newEdge2Max = BB_ZERO_WEIGHT; } - edge2->setEdgeWeights(newEdge2Min, newEdge2Max); + edge2->setEdgeWeights(newEdge2Min, newEdge2Max, bDest); } } diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 68ed800db7fdb..92ae6d1780d19 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -2666,13 +2666,14 @@ bool flowList::setEdgeWeightMaxChecked(BasicBlock::weight_t newWeight, // Arguments: // theMinWeight - the new minimum lower (flEdgeWeightMin) // theMaxWeight - the new maximum upper (flEdgeWeightMin) +// bDst - the destination block for the edge // -void flowList::setEdgeWeights(BasicBlock::weight_t theMinWeight, BasicBlock::weight_t theMaxWeight) +void flowList::setEdgeWeights(BasicBlock::weight_t theMinWeight, BasicBlock::weight_t theMaxWeight, BasicBlock* bDst) { assert(theMinWeight <= theMaxWeight); - JITDUMP("Setting edge weights for BB?? -> " FMT_BB " to [" FMT_WT " .. " FMT_WT "]\n", getBlock()->bbNum, - theMinWeight, theMaxWeight); + JITDUMP("Setting edge weights for " FMT_BB " -> " FMT_BB " to [" FMT_WT " .. " FMT_WT "]\n", getBlock()->bbNum, + bDst->bbNum, theMinWeight, theMaxWeight); flEdgeWeightMin = theMinWeight; flEdgeWeightMax = theMaxWeight; @@ -2970,7 +2971,7 @@ void Compiler::fgComputeEdgeWeights() if (!bSrc->hasProfileWeight() || !bDst->hasProfileWeight()) { - edge->setEdgeWeights(BB_ZERO_WEIGHT, BB_MAX_WEIGHT); + edge->setEdgeWeights(BB_ZERO_WEIGHT, BB_MAX_WEIGHT, bDst); } slop = BasicBlock::GetSlopFraction(bSrc, bDst) + 1; diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 7da1ec015cd72..a98e1ae2ad1ce 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -16321,7 +16321,7 @@ bool Compiler::fgFoldConditional(BasicBlock* block) { // The edge weights for (block -> bTaken) are 100% of block's weight - edgeTaken->setEdgeWeights(block->bbWeight, block->bbWeight); + edgeTaken->setEdgeWeights(block->bbWeight, block->bbWeight, bTaken); if (!bTaken->hasProfileWeight()) { @@ -16338,7 +16338,7 @@ bool Compiler::fgFoldConditional(BasicBlock* block) if (bTaken->countOfInEdges() == 1) { // There is only one in edge to bTaken - edgeTaken->setEdgeWeights(bTaken->bbWeight, bTaken->bbWeight); + edgeTaken->setEdgeWeights(bTaken->bbWeight, bTaken->bbWeight, bTaken); // Update the weight of block block->inheritWeight(bTaken); @@ -16359,21 +16359,21 @@ bool Compiler::fgFoldConditional(BasicBlock* block) edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated); newMaxWeight = bUpdated->bbWeight; newMinWeight = min(edge->edgeWeightMin(), newMaxWeight); - edge->setEdgeWeights(newMinWeight, newMaxWeight); + edge->setEdgeWeights(newMinWeight, newMaxWeight, bUpdated->bbNext); break; case BBJ_COND: edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated); newMaxWeight = bUpdated->bbWeight; newMinWeight = min(edge->edgeWeightMin(), newMaxWeight); - edge->setEdgeWeights(newMinWeight, newMaxWeight); + edge->setEdgeWeights(newMinWeight, newMaxWeight, bUpdated->bbNext); FALLTHROUGH; case BBJ_ALWAYS: edge = fgGetPredForBlock(bUpdated->bbJumpDest, bUpdated); newMaxWeight = bUpdated->bbWeight; newMinWeight = min(edge->edgeWeightMin(), newMaxWeight); - edge->setEdgeWeights(newMinWeight, newMaxWeight); + edge->setEdgeWeights(newMinWeight, newMaxWeight, bUpdated->bbNext); break; default: diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 79b32874f58fd..1244ec3454912 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -4423,8 +4423,8 @@ void Compiler::optInvertWhileLoop(BasicBlock* block) JITDUMP("Setting weight of " FMT_BB " -> " FMT_BB " to %f (exit loop)\n", bTest->bbNum, bTest->bbNext->bbNum, testToAfterWeight); - edgeTestToNext->setEdgeWeights(testToNextWeight, testToNextWeight); - edgeTestToAfter->setEdgeWeights(testToAfterWeight, testToAfterWeight); + edgeTestToNext->setEdgeWeights(testToNextWeight, testToNextWeight, bTest->bbJumpDest); + edgeTestToAfter->setEdgeWeights(testToAfterWeight, testToAfterWeight, bTest->bbNext); // Adjust edges out of block, using the same distribution. // @@ -4444,8 +4444,8 @@ void Compiler::optInvertWhileLoop(BasicBlock* block) JITDUMP("Setting weight of " FMT_BB " -> " FMT_BB " to %f (avoid loop)\n", block->bbNum, block->bbJumpDest->bbNum, blockToAfterWeight); - edgeBlockToNext->setEdgeWeights(blockToNextWeight, blockToNextWeight); - edgeBlockToAfter->setEdgeWeights(blockToAfterWeight, blockToAfterWeight); + edgeBlockToNext->setEdgeWeights(blockToNextWeight, blockToNextWeight, block->bbNext); + edgeBlockToAfter->setEdgeWeights(blockToAfterWeight, blockToAfterWeight, block->bbJumpDest); #ifdef DEBUG // Verify profile for the two target blocks is consistent. @@ -7578,17 +7578,25 @@ void Compiler::fgCreateLoopPreHeader(unsigned lnum) loopSkippedCount = head->bbJumpDest->bbWeight; } + JITDUMP("%s; loopEnterCount " FMT_WT " loopSkipCount " FMT_WT "\n", + fgHaveValidEdgeWeights ? "valid edge weights" : "no edge weights", loopEnteredCount, + loopSkippedCount); + BasicBlock::weight_t loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount); + JITDUMP("%s; loopEnterCount " FMT_WT " loopSkipCount " FMT_WT " taken ratio " FMT_WT "\n", + fgHaveValidEdgeWeights ? "valid edge weights" : "no edge weights", loopEnteredCount, + loopSkippedCount, loopTakenRatio); + // Calculate a good approximation of the preHead's block weight - BasicBlock::weight_t preHeadWeight = (head->bbWeight * loopTakenRatio) + 0.5f; - preHead->setBBWeight(max(preHeadWeight, 1)); + BasicBlock::weight_t preHeadWeight = (head->bbWeight * loopTakenRatio); + preHead->setBBProfileWeight(preHeadWeight); noway_assert(!preHead->isRunRarely()); } } } - // Link in the preHead block. + // Link in the preHead block fgInsertBBbefore(top, preHead); // Ideally we would re-run SSA and VN if we optimized by doing loop hoisting. @@ -7639,8 +7647,9 @@ void Compiler::fgCreateLoopPreHeader(unsigned lnum) All predecessors of 'beg', (which is the entry in the loop) now have to jump to 'preHead', unless they are dominated by 'head' */ - preHead->bbRefs = 0; - fgAddRefPred(preHead, head); + preHead->bbRefs = 0; + flowList* const edgeToPreHeader = fgAddRefPred(preHead, head); + edgeToPreHeader->setEdgeWeights(preHead->bbWeight, preHead->bbWeight, preHead); bool checkNestedLoops = false; for (flowList* pred = top->bbPreds; pred; pred = pred->flNext) @@ -7722,7 +7731,8 @@ void Compiler::fgCreateLoopPreHeader(unsigned lnum) noway_assert(!fgGetPredForBlock(top, preHead)); fgRemoveRefPred(top, head); - fgAddRefPred(top, preHead); + flowList* const edgeFromPreHeader = fgAddRefPred(top, preHead); + edgeFromPreHeader->setEdgeWeights(preHead->bbWeight, preHead->bbWeight, top); /* If we found at least one back-edge in the flowgraph pointing to the top/entry of the loop @@ -9308,11 +9318,11 @@ void Compiler::optOptimizeBools() BasicBlock::weight_t edgeSumMax = edge1->edgeWeightMax() + edge2->edgeWeightMax(); if ((edgeSumMax >= edge1->edgeWeightMax()) && (edgeSumMax >= edge2->edgeWeightMax())) { - edge1->setEdgeWeights(edgeSumMin, edgeSumMax); + edge1->setEdgeWeights(edgeSumMin, edgeSumMax, b1->bbJumpDest); } else { - edge1->setEdgeWeights(BB_ZERO_WEIGHT, BB_MAX_WEIGHT); + edge1->setEdgeWeights(BB_ZERO_WEIGHT, BB_MAX_WEIGHT, b1->bbJumpDest); } /* Get rid of the second block (which is a BBJ_COND) */ From 581f923340ca3e98f464f2825c1949676d784ab3 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 26 Feb 2021 11:45:41 -0800 Subject: [PATCH 5/7] Handle the more complex tail call profile fixup where two blocks must have their counts updated. --- src/coreclr/jit/morph.cpp | 76 ++++++++++++++++++++++++++++++--------- 1 file changed, 59 insertions(+), 17 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index a98e1ae2ad1ce..e56cc07a287fd 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -7641,42 +7641,45 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) // BasicBlock* const nextBlock = compCurBB->GetUniqueSucc(); - if (nextBlock != nullptr) + if (nextBlock == nullptr) + { + // No unique successor. compCurBB should be a return. + // + assert(compCurBB->bbJumpKind == BBJ_RETURN); + } + else { // Flow no longer reaches nextBlock from here. // fgRemoveRefPred(nextBlock, compCurBB); + // Adjust profile weights. + // + // Note if this is a tail call to loop, further updates + // are needed once we install the loop edge. + // if (compCurBB->hasProfileWeight() && nextBlock->hasProfileWeight()) { - // Since we had linear flow we can update the next block weight. - // - // Note if this is a tail call to loop, further updates - // are needed once we install the loop edge. + // Since we have linear flow we can update the next block weight. // - // We should not need to handle the case where this block has already - // been "merged" to a common return. - // - assert(nextBlock->bbJumpKind == BBJ_RETURN); - - BasicBlock::weight_t const blockWeight = compCurBB->bbWeight; - BasicBlock::weight_t const nextWeight = nextBlock->bbWeight; - BasicBlock::weight_t const newWeight = nextWeight - blockWeight; + BasicBlock::weight_t const blockWeight = compCurBB->bbWeight; + BasicBlock::weight_t const nextWeight = nextBlock->bbWeight; + BasicBlock::weight_t const newNextWeight = nextWeight - blockWeight; // If the math would result in an negative weight then there's // no local repair we can do; just leave things inconsistent. // - if (newWeight >= 0) + if (newNextWeight >= 0) { // Note if we'd already morphed the IR in nextblock we might // have done something profile sensitive that we should arguably reconsider. // JITDUMP("Reducing profile weight of " FMT_BB " from " FMT_WT " to " FMT_WT "\n", nextBlock->bbNum, - nextWeight, newWeight); + nextWeight, newNextWeight); - nextBlock->setBBProfileWeight(newWeight); + nextBlock->setBBProfileWeight(newNextWeight); - if (newWeight == 0.0f) + if (newNextWeight == 0.0f) { nextBlock->bbFlags |= BBF_RUN_RARELY; } @@ -7687,6 +7690,45 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) " is less than direct flow pred " FMT_BB " weight " FMT_WT "\n", nextBlock->bbNum, nextWeight, compCurBB->bbNum, blockWeight); } + + // If nextBlock is not a BBJ_RETURN, it should have a unique successor that + // is a BBJ_RETURN, as we allow a little bit of flow after a tail call. + // + if (nextBlock->bbJumpKind != BBJ_RETURN) + { + BasicBlock* const nextNextBlock = nextBlock->GetUniqueSucc(); + assert(nextNextBlock->bbJumpKind == BBJ_RETURN); + + if (nextNextBlock->hasProfileWeight()) + { + // Do similar updates here. + // + BasicBlock::weight_t const nextNextWeight = nextNextBlock->bbWeight; + BasicBlock::weight_t const newNextNextWeight = nextNextWeight - blockWeight; + + // If the math would result in an negative weight then there's + // no local repair we can do; just leave things inconsistent. + // + if (newNextNextWeight >= 0) + { + JITDUMP("Reducing profile weight of " FMT_BB " from " FMT_WT " to " FMT_WT "\n", + nextNextBlock->bbNum, nextNextWeight, newNextNextWeight); + + nextNextBlock->setBBProfileWeight(newNextNextWeight); + + if (newNextNextWeight == 0.0f) + { + nextNextBlock->bbFlags |= BBF_RUN_RARELY; + } + } + else + { + JITDUMP("Not reducing profile weight of " FMT_BB " as its weight " FMT_WT + " is less than direct flow pred " FMT_BB " weight " FMT_WT "\n", + nextNextBlock->bbNum, nextNextWeight, compCurBB->bbNum, blockWeight); + } + } + } } } From a8bda5fc9f709edf517189152e60e37d0a82cbfd Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 2 Mar 2021 10:58:57 -0800 Subject: [PATCH 6/7] review feedback --- src/coreclr/jit/fgprofile.cpp | 26 ++++++++++++++------------ src/coreclr/jit/flowgraph.cpp | 6 +++--- src/coreclr/jit/morph.cpp | 13 +++++++------ 3 files changed, 24 insertions(+), 21 deletions(-) diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 92ae6d1780d19..99564db473547 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -176,7 +176,7 @@ bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, BasicBlock::wei if (hash % 3 == 0) { - weight = 0; + weight = BB_ZERO_WEIGHT; } else if (hash % 11 == 0) { @@ -188,7 +188,7 @@ bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, BasicBlock::wei } // The first block is never given a weight of zero - if ((offset == 0) && (weight == 0)) + if ((offset == 0) && (weight == BB_ZERO_WEIGHT)) { weight = (BasicBlock::weight_t)1 + (hash % 5); } @@ -2470,7 +2470,7 @@ void Compiler::fgIncorporateEdgeCounts() // // Arguments: // newWeight - proposed new weight -// bDst - destintion block for edge +// bDst - destination block for edge // slop - profile slush fund // wbUsedSlop [out] - true if we tapped into the slush fund // @@ -2532,11 +2532,12 @@ bool flowList::setEdgeWeightMinChecked(BasicBlock::weight_t newWeight, } // If we are returning true then we should have adjusted the range so that - // the newWeight is in new range [Min..Max] or fgEdjeWeightMax is zero. + // the newWeight is in new range [Min..Max] or fgEdgeWeightMax is zero. // Also we should have set wbUsedSlop to true. if (result == true) { - assert((flEdgeWeightMax == 0) || ((newWeight <= flEdgeWeightMax) && (newWeight >= flEdgeWeightMin))); + assert((flEdgeWeightMax == BB_ZERO_WEIGHT) || + ((newWeight <= flEdgeWeightMax) && (newWeight >= flEdgeWeightMin))); if (wbUsedSlop != nullptr) { @@ -2630,11 +2631,12 @@ bool flowList::setEdgeWeightMaxChecked(BasicBlock::weight_t newWeight, } // If we are returning true then we should have adjusted the range so that - // the newWeight is in new range [Min..Max] or fgEdjeWeightMax is zero + // the newWeight is in new range [Min..Max] or fgEdgeWeightMax is zero // Also we should have set wbUsedSlop to true, unless it is NULL if (result == true) { - assert((flEdgeWeightMax == 0) || ((newWeight <= flEdgeWeightMax) && (newWeight >= flEdgeWeightMin))); + assert((flEdgeWeightMax == BB_ZERO_WEIGHT) || + ((newWeight <= flEdgeWeightMax) && (newWeight >= flEdgeWeightMin))); assert((wbUsedSlop == nullptr) || (*wbUsedSlop == true)); } @@ -2808,7 +2810,7 @@ BasicBlock::weight_t Compiler::fgComputeMissingBlockWeights() changed = true; modified = true; bDst->bbWeight = newWeight; - if (newWeight == 0) + if (newWeight == BB_ZERO_WEIGHT) { bDst->bbFlags |= BBF_RUN_RARELY; } @@ -2881,7 +2883,7 @@ void Compiler::fgComputeCalledCount(BasicBlock::weight_t returnWeight) // (i.e. the function never returns because it always throws) // then just use the first block weight rather than 0. // - if ((firstILBlock->countOfInEdges() == 1) || (returnWeight == 0)) + if ((firstILBlock->countOfInEdges() == 1) || (returnWeight == BB_ZERO_WEIGHT)) { assert(firstILBlock->hasProfileWeight()); // This should always be a profile-derived weight fgCalledCount = firstILBlock->bbWeight; @@ -2896,7 +2898,7 @@ void Compiler::fgComputeCalledCount(BasicBlock::weight_t returnWeight) if (fgFirstBBisScratch()) { fgFirstBB->setBBProfileWeight(fgCalledCount); - if (fgFirstBB->bbWeight == 0) + if (fgFirstBB->bbWeight == BB_ZERO_WEIGHT) { fgFirstBB->bbFlags |= BBF_RUN_RARELY; } @@ -3311,14 +3313,14 @@ bool Compiler::fgProfileWeightsEqual(BasicBlock::weight_t weight1, BasicBlock::w // bool Compiler::fgProfileWeightsConsistent(BasicBlock::weight_t weight1, BasicBlock::weight_t weight2) { - if (weight2 == 0) + if (weight2 == BB_ZERO_WEIGHT) { return fgProfileWeightsEqual(weight1, weight2); } BasicBlock::weight_t const relativeDiff = (weight2 - weight1) / weight2; - return fgProfileWeightsEqual(relativeDiff, 0.0f); + return fgProfileWeightsEqual(relativeDiff, BB_ZERO_WEIGHT); } #ifdef DEBUG diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index e9252630227a7..98b8bd734b5f2 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -2502,15 +2502,15 @@ class MergedReturns if (returnBlock->hasProfileWeight()) { BasicBlock::weight_t const oldWeight = - mergedReturnBlock->hasProfileWeight() ? mergedReturnBlock->bbWeight : 0.0f; + mergedReturnBlock->hasProfileWeight() ? mergedReturnBlock->bbWeight : BB_ZERO_WEIGHT; BasicBlock::weight_t const newWeight = oldWeight + returnBlock->bbWeight; - JITDUMP("merging profile weight %.6f from " FMT_BB " to const return " FMT_BB "\n", + JITDUMP("merging profile weight " FMT_WT " from " FMT_BB " to const return " FMT_BB "\n", returnBlock->bbWeight, returnBlock->bbNum, mergedReturnBlock->bbNum); mergedReturnBlock->setBBProfileWeight(newWeight); - if (newWeight > 0.0f) + if (newWeight > BB_ZERO_WEIGHT) { mergedReturnBlock->bbFlags &= ~BBF_RUN_RARELY; } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index e56cc07a287fd..5a11bc2cf32e5 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -7666,7 +7666,7 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) BasicBlock::weight_t const nextWeight = nextBlock->bbWeight; BasicBlock::weight_t const newNextWeight = nextWeight - blockWeight; - // If the math would result in an negative weight then there's + // If the math would result in a negative weight then there's // no local repair we can do; just leave things inconsistent. // if (newNextWeight >= 0) @@ -7679,7 +7679,7 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) nextBlock->setBBProfileWeight(newNextWeight); - if (newNextWeight == 0.0f) + if (newNextWeight == BB_ZERO_WEIGHT) { nextBlock->bbFlags |= BBF_RUN_RARELY; } @@ -7716,7 +7716,7 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) nextNextBlock->setBBProfileWeight(newNextNextWeight); - if (newNextNextWeight == 0.0f) + if (newNextNextWeight == BB_ZERO_WEIGHT) { nextNextBlock->bbFlags |= BBF_RUN_RARELY; } @@ -17170,15 +17170,16 @@ void Compiler::fgMergeBlockReturn(BasicBlock* block) if (block->hasProfileWeight()) { - BasicBlock::weight_t const oldWeight = genReturnBB->hasProfileWeight() ? genReturnBB->bbWeight : 0.0f; + BasicBlock::weight_t const oldWeight = + genReturnBB->hasProfileWeight() ? genReturnBB->bbWeight : BB_ZERO_WEIGHT; BasicBlock::weight_t const newWeight = oldWeight + block->bbWeight; - JITDUMP("merging profile weight %.6f from " FMT_BB " to common return " FMT_BB "\n", block->bbWeight, + JITDUMP("merging profile weight " FMT_WT " from " FMT_BB " to common return " FMT_BB "\n", block->bbWeight, block->bbNum, genReturnBB->bbNum); genReturnBB->setBBProfileWeight(newWeight); - if (newWeight > 0.0f) + if (newWeight > BB_ZERO_WEIGHT) { genReturnBB->bbFlags &= ~BBF_RUN_RARELY; } From 75697159eedfa2936fe03ffeb965ddfa184138d1 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 2 Mar 2021 11:07:24 -0800 Subject: [PATCH 7/7] refactor setBBProfileWeight --- src/coreclr/jit/block.h | 10 ++++++++++ src/coreclr/jit/fgprofile.cpp | 17 ----------------- src/coreclr/jit/flowgraph.cpp | 10 ---------- src/coreclr/jit/morph.cpp | 20 -------------------- 4 files changed, 10 insertions(+), 47 deletions(-) diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h index cab33119ca69a..863b253528f47 100644 --- a/src/coreclr/jit/block.h +++ b/src/coreclr/jit/block.h @@ -557,10 +557,20 @@ struct BasicBlock : private LIR::Range } // setBBProfileWeight -- Set the profile-derived weight for a basic block + // and update the run rarely flag as appropriate. void setBBProfileWeight(weight_t weight) { this->bbFlags |= BBF_PROF_WEIGHT; this->bbWeight = weight; + + if (weight == BB_ZERO_WEIGHT) + { + this->bbFlags |= BBF_RUN_RARELY; + } + else + { + this->bbFlags &= ~BBF_RUN_RARELY; + } } // modifyBBWeight -- same as setBBWeight, but also make sure that if the block is rarely run, it stays that diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 99564db473547..479ea37de5dcb 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -1752,15 +1752,6 @@ void Compiler::fgSetProfileWeight(BasicBlock* block, BasicBlock::weight_t profil block->setBBProfileWeight(profileWeight); - if (profileWeight == BB_ZERO_WEIGHT) - { - block->bbSetRunRarely(); - } - else - { - block->bbFlags &= ~BBF_RUN_RARELY; - } - #if HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION // Handle a special case -- some handler entries can't have zero profile count. // @@ -2898,14 +2889,6 @@ void Compiler::fgComputeCalledCount(BasicBlock::weight_t returnWeight) if (fgFirstBBisScratch()) { fgFirstBB->setBBProfileWeight(fgCalledCount); - if (fgFirstBB->bbWeight == BB_ZERO_WEIGHT) - { - fgFirstBB->bbFlags |= BBF_RUN_RARELY; - } - else - { - fgFirstBB->bbFlags &= ~BBF_RUN_RARELY; - } } #if DEBUG diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 98b8bd734b5f2..8b89c2e5945a5 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -2509,16 +2509,6 @@ class MergedReturns returnBlock->bbWeight, returnBlock->bbNum, mergedReturnBlock->bbNum); mergedReturnBlock->setBBProfileWeight(newWeight); - - if (newWeight > BB_ZERO_WEIGHT) - { - mergedReturnBlock->bbFlags &= ~BBF_RUN_RARELY; - } - else - { - mergedReturnBlock->bbFlags |= BBF_RUN_RARELY; - } - DISPBLOCK(mergedReturnBlock); } } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 5a11bc2cf32e5..d939f9aad4990 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -7678,11 +7678,6 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) nextWeight, newNextWeight); nextBlock->setBBProfileWeight(newNextWeight); - - if (newNextWeight == BB_ZERO_WEIGHT) - { - nextBlock->bbFlags |= BBF_RUN_RARELY; - } } else { @@ -7715,11 +7710,6 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) nextNextBlock->bbNum, nextNextWeight, newNextNextWeight); nextNextBlock->setBBProfileWeight(newNextNextWeight); - - if (newNextNextWeight == BB_ZERO_WEIGHT) - { - nextNextBlock->bbFlags |= BBF_RUN_RARELY; - } } else { @@ -17178,16 +17168,6 @@ void Compiler::fgMergeBlockReturn(BasicBlock* block) block->bbNum, genReturnBB->bbNum); genReturnBB->setBBProfileWeight(newWeight); - - if (newWeight > BB_ZERO_WEIGHT) - { - genReturnBB->bbFlags &= ~BBF_RUN_RARELY; - } - else - { - genReturnBB->bbFlags |= BBF_RUN_RARELY; - } - DISPBLOCK(genReturnBB); } }