diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h index c21131e8772ca..863b253528f47 100644 --- a/src/coreclr/jit/block.h +++ b/src/coreclr/jit/block.h @@ -44,6 +44,9 @@ typedef BitVec_ValRet_T ASSERT_VALRET_TP; // This define is used with string concatenation to put this in printf format strings (Note that %u means unsigned int) #define FMT_BB "BB%02u" +// And this format for profile weights +#define FMT_WT "%.7g" + /***************************************************************************** * * Each basic block ends with a jump which is described as a value @@ -554,10 +557,20 @@ struct BasicBlock : private LIR::Range } // setBBProfileWeight -- Set the profile-derived weight for a basic block + // and update the run rarely flag as appropriate. void setBBProfileWeight(weight_t weight) { this->bbFlags |= BBF_PROF_WEIGHT; this->bbWeight = weight; + + if (weight == BB_ZERO_WEIGHT) + { + this->bbFlags |= BBF_RUN_RARELY; + } + else + { + this->bbFlags &= ~BBF_RUN_RARELY; + } } // modifyBBWeight -- same as setBBWeight, but also make sure that if the block is rarely run, it stays that @@ -1291,9 +1304,15 @@ struct flowList // They return false if the newWeight is not between the current [min..max] // when slop is non-zero we allow for the case where our weights might be off by 'slop' // - bool setEdgeWeightMinChecked(BasicBlock::weight_t newWeight, BasicBlock::weight_t slop, bool* wbUsedSlop); - bool setEdgeWeightMaxChecked(BasicBlock::weight_t newWeight, BasicBlock::weight_t slop, bool* wbUsedSlop); - void setEdgeWeights(BasicBlock::weight_t newMinWeight, BasicBlock::weight_t newMaxWeight); + bool setEdgeWeightMinChecked(BasicBlock::weight_t newWeight, + BasicBlock* bDst, + BasicBlock::weight_t slop, + bool* wbUsedSlop); + bool setEdgeWeightMaxChecked(BasicBlock::weight_t newWeight, + BasicBlock* bDst, + BasicBlock::weight_t slop, + bool* wbUsedSlop); + void setEdgeWeights(BasicBlock::weight_t newMinWeight, BasicBlock::weight_t newMaxWeight, BasicBlock* bDst); flowList(BasicBlock* block, flowList* rest) : flNext(rest), m_block(block), flEdgeWeightMin(0), flEdgeWeightMax(0), flDupCount(0) diff --git a/src/coreclr/jit/fgflow.cpp b/src/coreclr/jit/fgflow.cpp index 30b3cbf1a6699..8c5cd174ca153 100644 --- a/src/coreclr/jit/fgflow.cpp +++ b/src/coreclr/jit/fgflow.cpp @@ -272,7 +272,7 @@ flowList* Compiler::fgAddRefPred(BasicBlock* block, // If our caller has given us the old edge weights // then we will use them. // - flow->setEdgeWeights(oldEdge->edgeWeightMin(), oldEdge->edgeWeightMax()); + flow->setEdgeWeights(oldEdge->edgeWeightMin(), oldEdge->edgeWeightMax(), block); } else { @@ -284,17 +284,17 @@ flowList* Compiler::fgAddRefPred(BasicBlock* block, // otherwise it is the same as the edge's max weight. if (blockPred->NumSucc() > 1) { - flow->setEdgeWeights(BB_ZERO_WEIGHT, newWeightMax); + flow->setEdgeWeights(BB_ZERO_WEIGHT, newWeightMax, block); } else { - flow->setEdgeWeights(flow->edgeWeightMax(), newWeightMax); + flow->setEdgeWeights(flow->edgeWeightMax(), newWeightMax, block); } } } else { - flow->setEdgeWeights(BB_ZERO_WEIGHT, BB_MAX_WEIGHT); + flow->setEdgeWeights(BB_ZERO_WEIGHT, BB_MAX_WEIGHT, block); } } diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 5fb1255252299..bd455e19f3d57 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -2323,7 +2323,7 @@ bool Compiler::fgOptimizeBranchToEmptyUnconditional(BasicBlock* block, BasicBloc { newEdge2Max = BB_ZERO_WEIGHT; } - edge2->setEdgeWeights(newEdge2Min, newEdge2Max); + edge2->setEdgeWeights(newEdge2Min, newEdge2Max, bDest); } } diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 6481095e31f17..479ea37de5dcb 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -144,7 +144,8 @@ void Compiler::fgComputeProfileScale() impInlineInfo->profileScaleFactor = scale; impInlineInfo->profileScaleState = InlineInfo::ProfileScaleState::KNOWN; - JITDUMP(" call site count %f callee entry count %f scale %f\n", callSiteWeight, calleeWeight, scale); + JITDUMP(" call site count " FMT_WT " callee entry count " FMT_WT " scale " FMT_WT "\n", callSiteWeight, + calleeWeight, scale); } //------------------------------------------------------------------------ @@ -175,7 +176,7 @@ bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, BasicBlock::wei if (hash % 3 == 0) { - weight = 0; + weight = BB_ZERO_WEIGHT; } else if (hash % 11 == 0) { @@ -187,7 +188,7 @@ bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, BasicBlock::wei } // The first block is never given a weight of zero - if ((offset == 0) && (weight == 0)) + if ((offset == 0) && (weight == BB_ZERO_WEIGHT)) { weight = (BasicBlock::weight_t)1 + (hash % 5); } @@ -1751,15 +1752,6 @@ void Compiler::fgSetProfileWeight(BasicBlock* block, BasicBlock::weight_t profil block->setBBProfileWeight(profileWeight); - if (profileWeight == BB_ZERO_WEIGHT) - { - block->bbSetRunRarely(); - } - else - { - block->bbFlags &= ~BBF_RUN_RARELY; - } - #if HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION // Handle a special case -- some handler entries can't have zero profile count. // @@ -2164,8 +2156,8 @@ void EfficientEdgeCountReconstructor::Prepare() Edge* const edge = new (m_allocator) Edge(sourceBlock, targetBlock); - JITDUMP("... adding known edge " FMT_BB " -> " FMT_BB ": weight %0f\n", edge->m_sourceBlock->bbNum, - edge->m_targetBlock->bbNum, weight); + JITDUMP("... adding known edge " FMT_BB " -> " FMT_BB ": weight " FMT_WT "\n", + edge->m_sourceBlock->bbNum, edge->m_targetBlock->bbNum, weight); edge->m_weightKnown = true; edge->m_weight = weight; @@ -2238,11 +2230,11 @@ void EfficientEdgeCountReconstructor::Solve() edge->m_sourceBlock->bbNum, edge->m_targetBlock->bbNum); } assert(edge->m_weightKnown); - JITDUMP(" " FMT_BB " -> " FMT_BB " has weight %0f\n", edge->m_sourceBlock->bbNum, + JITDUMP(" " FMT_BB " -> " FMT_BB " has weight " FMT_WT "\n", edge->m_sourceBlock->bbNum, edge->m_targetBlock->bbNum, edge->m_weight); weight += edge->m_weight; } - JITDUMP(FMT_BB ": all incoming edge weights known, sum is %0f\n", block->bbNum, weight); + JITDUMP(FMT_BB ": all incoming edge weights known, sum is " FMT_WT "\n", block->bbNum, weight); weightKnown = true; } else if (info->m_outgoingUnknown == 0) @@ -2256,11 +2248,11 @@ void EfficientEdgeCountReconstructor::Solve() edge->m_sourceBlock->bbNum, edge->m_targetBlock->bbNum); } assert(edge->m_weightKnown); - JITDUMP(" " FMT_BB " -> " FMT_BB " has weight %0f\n", edge->m_sourceBlock->bbNum, + JITDUMP(" " FMT_BB " -> " FMT_BB " has weight " FMT_WT "\n", edge->m_sourceBlock->bbNum, edge->m_targetBlock->bbNum, edge->m_weight); weight += edge->m_weight; } - JITDUMP(FMT_BB ": all outgoing edge weights known, sum is %0f\n", block->bbNum, weight); + JITDUMP(FMT_BB ": all outgoing edge weights known, sum is " FMT_WT "\n", block->bbNum, weight); weightKnown = true; } @@ -2304,7 +2296,8 @@ void EfficientEdgeCountReconstructor::Solve() weight = info->m_weight - weight; JITDUMP(FMT_BB " -> " FMT_BB - ": target block weight and all other incoming edge weights known, so weight is %0f\n", + ": target block weight and all other incoming edge weights known, so weight is " FMT_WT + "\n", resolvedEdge->m_sourceBlock->bbNum, resolvedEdge->m_targetBlock->bbNum, weight); // If we arrive at a negative count for this edge, set it to zero. @@ -2350,7 +2343,8 @@ void EfficientEdgeCountReconstructor::Solve() weight = info->m_weight - weight; JITDUMP(FMT_BB " -> " FMT_BB - ": source block weight and all other outgoing edge weights known, so weight is %0f\n", + ": source block weight and all other outgoing edge weights known, so weight is " FMT_WT + "\n", resolvedEdge->m_sourceBlock->bbNum, resolvedEdge->m_targetBlock->bbNum, weight); // If we arrive at a negative count for this edge, set it to zero. @@ -2462,7 +2456,19 @@ void Compiler::fgIncorporateEdgeCounts() e.Propagate(); } -bool flowList::setEdgeWeightMinChecked(BasicBlock::weight_t newWeight, BasicBlock::weight_t slop, bool* wbUsedSlop) +//------------------------------------------------------------------------ +// setEdgeWeightMinChecked: possibly update minimum edge weight +// +// Arguments: +// newWeight - proposed new weight +// bDst - destination block for edge +// slop - profile slush fund +// wbUsedSlop [out] - true if we tapped into the slush fund +// +bool flowList::setEdgeWeightMinChecked(BasicBlock::weight_t newWeight, + BasicBlock* bDst, + BasicBlock::weight_t slop, + bool* wbUsedSlop) { bool result = false; if ((newWeight <= flEdgeWeightMax) && (newWeight >= flEdgeWeightMin)) @@ -2517,11 +2523,12 @@ bool flowList::setEdgeWeightMinChecked(BasicBlock::weight_t newWeight, BasicBloc } // If we are returning true then we should have adjusted the range so that - // the newWeight is in new range [Min..Max] or fgEdjeWeightMax is zero. + // the newWeight is in new range [Min..Max] or fgEdgeWeightMax is zero. // Also we should have set wbUsedSlop to true. if (result == true) { - assert((flEdgeWeightMax == 0) || ((newWeight <= flEdgeWeightMax) && (newWeight >= flEdgeWeightMin))); + assert((flEdgeWeightMax == BB_ZERO_WEIGHT) || + ((newWeight <= flEdgeWeightMax) && (newWeight >= flEdgeWeightMin))); if (wbUsedSlop != nullptr) { @@ -2533,14 +2540,34 @@ bool flowList::setEdgeWeightMinChecked(BasicBlock::weight_t newWeight, BasicBloc #if DEBUG if (result == false) { + JITDUMP("Not adjusting min weight of " FMT_BB " -> " FMT_BB "; new value " FMT_WT " not in range [" FMT_WT + ".." FMT_WT "] (+/- " FMT_WT ")\n", + getBlock()->bbNum, bDst->bbNum, newWeight, flEdgeWeightMin, flEdgeWeightMax, slop); result = false; // break here } + else + { + JITDUMP("Updated min weight of " FMT_BB " -> " FMT_BB " to [" FMT_WT ".." FMT_WT "]\n", getBlock()->bbNum, + bDst->bbNum, flEdgeWeightMin, flEdgeWeightMax); + } #endif // DEBUG return result; } -bool flowList::setEdgeWeightMaxChecked(BasicBlock::weight_t newWeight, BasicBlock::weight_t slop, bool* wbUsedSlop) +//------------------------------------------------------------------------ +// setEdgeWeightMaxChecked: possibly update maximum edge weight +// +// Arguments: +// newWeight - proposed new weight +// bDst - destination block for edge +// slop - profile slush fund +// wbUsedSlop [out] - true if we tapped into the slush fund +// +bool flowList::setEdgeWeightMaxChecked(BasicBlock::weight_t newWeight, + BasicBlock* bDst, + BasicBlock::weight_t slop, + bool* wbUsedSlop) { bool result = false; if ((newWeight >= flEdgeWeightMin) && (newWeight <= flEdgeWeightMax)) @@ -2595,11 +2622,12 @@ bool flowList::setEdgeWeightMaxChecked(BasicBlock::weight_t newWeight, BasicBloc } // If we are returning true then we should have adjusted the range so that - // the newWeight is in new range [Min..Max] or fgEdjeWeightMax is zero + // the newWeight is in new range [Min..Max] or fgEdgeWeightMax is zero // Also we should have set wbUsedSlop to true, unless it is NULL if (result == true) { - assert((flEdgeWeightMax == 0) || ((newWeight <= flEdgeWeightMax) && (newWeight >= flEdgeWeightMin))); + assert((flEdgeWeightMax == BB_ZERO_WEIGHT) || + ((newWeight <= flEdgeWeightMax) && (newWeight >= flEdgeWeightMin))); assert((wbUsedSlop == nullptr) || (*wbUsedSlop == true)); } @@ -2608,8 +2636,16 @@ bool flowList::setEdgeWeightMaxChecked(BasicBlock::weight_t newWeight, BasicBloc #if DEBUG if (result == false) { + JITDUMP("Not adjusting max weight of " FMT_BB " -> " FMT_BB "; new value " FMT_WT " not in range [" FMT_WT + ".." FMT_WT "] (+/- " FMT_WT ")\n", + getBlock()->bbNum, bDst->bbNum, newWeight, flEdgeWeightMin, flEdgeWeightMax, slop); result = false; // break here } + else + { + JITDUMP("Updated max weight of " FMT_BB " -> " FMT_BB " to [" FMT_WT ".." FMT_WT "]\n", getBlock()->bbNum, + bDst->bbNum, flEdgeWeightMin, flEdgeWeightMax); + } #endif // DEBUG return result; @@ -2623,11 +2659,15 @@ bool flowList::setEdgeWeightMaxChecked(BasicBlock::weight_t newWeight, BasicBloc // Arguments: // theMinWeight - the new minimum lower (flEdgeWeightMin) // theMaxWeight - the new maximum upper (flEdgeWeightMin) +// bDst - the destination block for the edge // -void flowList::setEdgeWeights(BasicBlock::weight_t theMinWeight, BasicBlock::weight_t theMaxWeight) +void flowList::setEdgeWeights(BasicBlock::weight_t theMinWeight, BasicBlock::weight_t theMaxWeight, BasicBlock* bDst) { assert(theMinWeight <= theMaxWeight); + JITDUMP("Setting edge weights for " FMT_BB " -> " FMT_BB " to [" FMT_WT " .. " FMT_WT "]\n", getBlock()->bbNum, + bDst->bbNum, theMinWeight, theMaxWeight); + flEdgeWeightMin = theMinWeight; flEdgeWeightMax = theMaxWeight; } @@ -2761,7 +2801,7 @@ BasicBlock::weight_t Compiler::fgComputeMissingBlockWeights() changed = true; modified = true; bDst->bbWeight = newWeight; - if (newWeight == 0) + if (newWeight == BB_ZERO_WEIGHT) { bDst->bbFlags |= BBF_RUN_RARELY; } @@ -2834,7 +2874,7 @@ void Compiler::fgComputeCalledCount(BasicBlock::weight_t returnWeight) // (i.e. the function never returns because it always throws) // then just use the first block weight rather than 0. // - if ((firstILBlock->countOfInEdges() == 1) || (returnWeight == 0)) + if ((firstILBlock->countOfInEdges() == 1) || (returnWeight == BB_ZERO_WEIGHT)) { assert(firstILBlock->hasProfileWeight()); // This should always be a profile-derived weight fgCalledCount = firstILBlock->bbWeight; @@ -2849,20 +2889,12 @@ void Compiler::fgComputeCalledCount(BasicBlock::weight_t returnWeight) if (fgFirstBBisScratch()) { fgFirstBB->setBBProfileWeight(fgCalledCount); - if (fgFirstBB->bbWeight == 0) - { - fgFirstBB->bbFlags |= BBF_RUN_RARELY; - } - else - { - fgFirstBB->bbFlags &= ~BBF_RUN_RARELY; - } } #if DEBUG if (verbose) { - printf("We are using the Profile Weights and fgCalledCount is %.0f.\n", fgCalledCount); + printf("We are using the Profile Weights and fgCalledCount is " FMT_WT "\n", fgCalledCount); } #endif } @@ -2893,6 +2925,8 @@ void Compiler::fgComputeEdgeWeights() unsigned numEdges = 0; unsigned iterations = 0; + JITDUMP("Initial weight assignments\n\n"); + // Now we will compute the initial flEdgeWeightMin and flEdgeWeightMax values for (bDst = fgFirstBB; bDst != nullptr; bDst = bDst->bbNext) { @@ -2922,7 +2956,7 @@ void Compiler::fgComputeEdgeWeights() if (!bSrc->hasProfileWeight() || !bDst->hasProfileWeight()) { - edge->setEdgeWeights(BB_ZERO_WEIGHT, BB_MAX_WEIGHT); + edge->setEdgeWeights(BB_ZERO_WEIGHT, BB_MAX_WEIGHT, bDst); } slop = BasicBlock::GetSlopFraction(bSrc, bDst) + 1; @@ -2933,8 +2967,8 @@ void Compiler::fgComputeEdgeWeights() case BBJ_NONE: case BBJ_CALLFINALLY: // We know the exact edge weight - assignOK &= edge->setEdgeWeightMinChecked(bSrc->bbWeight, slop, &usedSlop); - assignOK &= edge->setEdgeWeightMaxChecked(bSrc->bbWeight, slop, &usedSlop); + assignOK &= edge->setEdgeWeightMinChecked(bSrc->bbWeight, bDst, slop, &usedSlop); + assignOK &= edge->setEdgeWeightMaxChecked(bSrc->bbWeight, bDst, slop, &usedSlop); break; case BBJ_COND: @@ -2944,7 +2978,7 @@ void Compiler::fgComputeEdgeWeights() if (edge->edgeWeightMax() > bSrc->bbWeight) { // The maximum edge weight to block can't be greater than the weight of bSrc - assignOK &= edge->setEdgeWeightMaxChecked(bSrc->bbWeight, slop, &usedSlop); + assignOK &= edge->setEdgeWeightMaxChecked(bSrc->bbWeight, bDst, slop, &usedSlop); } break; @@ -2957,7 +2991,7 @@ void Compiler::fgComputeEdgeWeights() // The maximum edge weight to block can't be greater than the weight of bDst if (edge->edgeWeightMax() > bDstWeight) { - assignOK &= edge->setEdgeWeightMaxChecked(bDstWeight, slop, &usedSlop); + assignOK &= edge->setEdgeWeightMaxChecked(bDstWeight, bDst, slop, &usedSlop); } if (!assignOK) @@ -2976,11 +3010,14 @@ void Compiler::fgComputeEdgeWeights() do { + JITDUMP("\nSolver pass %u\n", iterations); + iterations++; goodEdgeCountPrevious = goodEdgeCountCurrent; goodEdgeCountCurrent = 0; hasIncompleteEdgeWeights = false; + JITDUMP("\n -- step 1 --\n"); for (bDst = fgFirstBB; bDst != nullptr; bDst = bDst->bbNext) { for (edge = bDst->bbPreds; edge != nullptr; edge = edge->flNext) @@ -2995,14 +3032,17 @@ void Compiler::fgComputeEdgeWeights() { BasicBlock::weight_t diff; flowList* otherEdge; + BasicBlock* otherDst; if (bSrc->bbNext == bDst) { - otherEdge = fgGetPredForBlock(bSrc->bbJumpDest, bSrc); + otherDst = bSrc->bbJumpDest; } else { - otherEdge = fgGetPredForBlock(bSrc->bbNext, bSrc); + otherDst = bSrc->bbNext; } + otherEdge = fgGetPredForBlock(otherDst, bSrc); + noway_assert(edge->edgeWeightMin() <= edge->edgeWeightMax()); noway_assert(otherEdge->edgeWeightMin() <= otherEdge->edgeWeightMax()); @@ -3010,24 +3050,24 @@ void Compiler::fgComputeEdgeWeights() diff = bSrc->bbWeight - (edge->edgeWeightMin() + otherEdge->edgeWeightMax()); if (diff > 0) { - assignOK &= edge->setEdgeWeightMinChecked(edge->edgeWeightMin() + diff, slop, &usedSlop); + assignOK &= edge->setEdgeWeightMinChecked(edge->edgeWeightMin() + diff, bDst, slop, &usedSlop); } else if (diff < 0) { - assignOK &= - otherEdge->setEdgeWeightMaxChecked(otherEdge->edgeWeightMax() + diff, slop, &usedSlop); + assignOK &= otherEdge->setEdgeWeightMaxChecked(otherEdge->edgeWeightMax() + diff, otherDst, + slop, &usedSlop); } // Adjust otherEdge->flEdgeWeightMin up or adjust edge->flEdgeWeightMax down diff = bSrc->bbWeight - (otherEdge->edgeWeightMin() + edge->edgeWeightMax()); if (diff > 0) { - assignOK &= - otherEdge->setEdgeWeightMinChecked(otherEdge->edgeWeightMin() + diff, slop, &usedSlop); + assignOK &= otherEdge->setEdgeWeightMinChecked(otherEdge->edgeWeightMin() + diff, otherDst, + slop, &usedSlop); } else if (diff < 0) { - assignOK &= edge->setEdgeWeightMaxChecked(edge->edgeWeightMax() + diff, slop, &usedSlop); + assignOK &= edge->setEdgeWeightMaxChecked(edge->edgeWeightMax() + diff, bDst, slop, &usedSlop); } if (!assignOK) @@ -3050,6 +3090,8 @@ void Compiler::fgComputeEdgeWeights() } } + JITDUMP("\n -- step 2 --\n"); + for (bDst = fgFirstBB; bDst != nullptr; bDst = bDst->bbNext) { BasicBlock::weight_t bDstWeight = bDst->bbWeight; @@ -3111,7 +3153,7 @@ void Compiler::fgComputeEdgeWeights() (BasicBlock::weight_t)(bDstWeight - otherMaxEdgesWeightSum); if (minWeightCalc > edge->edgeWeightMin()) { - assignOK &= edge->setEdgeWeightMinChecked(minWeightCalc, slop, &usedSlop); + assignOK &= edge->setEdgeWeightMinChecked(minWeightCalc, bDst, slop, &usedSlop); } } @@ -3122,15 +3164,15 @@ void Compiler::fgComputeEdgeWeights() (BasicBlock::weight_t)(bDstWeight - otherMinEdgesWeightSum); if (maxWeightCalc < edge->edgeWeightMax()) { - assignOK &= edge->setEdgeWeightMaxChecked(maxWeightCalc, slop, &usedSlop); + assignOK &= edge->setEdgeWeightMaxChecked(maxWeightCalc, bDst, slop, &usedSlop); } } if (!assignOK) { // Here we have inconsistent profile data - JITDUMP("Inconsistent profile data at " FMT_BB " -> " FMT_BB - ": dest weight %f, min/max into dest is %f/%f, edge %f/%f\n", + JITDUMP("Inconsistent profile data at " FMT_BB " -> " FMT_BB ": dest weight " FMT_WT + ", min/max into dest is " FMT_WT "/" FMT_WT ", edge " FMT_WT "/" FMT_WT "\n", bSrc->bbNum, bDst->bbNum, bDstWeight, minEdgeWeightSum, maxEdgeWeightSum, edge->edgeWeightMin(), edge->edgeWeightMax()); @@ -3254,14 +3296,14 @@ bool Compiler::fgProfileWeightsEqual(BasicBlock::weight_t weight1, BasicBlock::w // bool Compiler::fgProfileWeightsConsistent(BasicBlock::weight_t weight1, BasicBlock::weight_t weight2) { - if (weight2 == 0) + if (weight2 == BB_ZERO_WEIGHT) { return fgProfileWeightsEqual(weight1, weight2); } BasicBlock::weight_t const relativeDiff = (weight2 - weight1) / weight2; - return fgProfileWeightsEqual(relativeDiff, 0.0f); + return fgProfileWeightsEqual(relativeDiff, BB_ZERO_WEIGHT); } #ifdef DEBUG @@ -3280,10 +3322,15 @@ bool Compiler::fgProfileWeightsConsistent(BasicBlock::weight_t weight1, BasicBlo // void Compiler::fgDebugCheckProfileData() { - // We can't check before we have pred lists built. - // assert(fgComputePredsDone); + // We can't check before we have computed edge weights. + // + if (!fgEdgeWeightsComputed) + { + return; + } + JITDUMP("Checking Profile Data\n"); unsigned problemBlocks = 0; unsigned unprofiledBlocks = 0; @@ -3377,10 +3424,10 @@ void Compiler::fgDebugCheckProfileData() // if (entryProfiled && exitProfiled) { - if (entryWeight != exitWeight) + if (!fgProfileWeightsConsistent(entryWeight, exitWeight)) { problemBlocks++; - JITDUMP(" Entry %f exit %f weight mismatch\n", entryWeight, exitWeight); + JITDUMP(" Entry " FMT_WT " exit " FMT_WT " weight mismatch\n", entryWeight, exitWeight); } } @@ -3444,22 +3491,24 @@ bool Compiler::fgDebugCheckIncomingProfileData(BasicBlock* block) return true; } - if (incomingWeightMin > incomingWeightMax) + if (!fgProfileWeightsConsistent(incomingWeightMin, incomingWeightMax)) { - JITDUMP(" " FMT_BB " - incoming min %f > incoming max %f\n", block->bbNum, incomingWeightMin, - incomingWeightMax); + JITDUMP(" " FMT_BB " - incoming min " FMT_WT " inconsistent with incoming max " FMT_WT "\n", block->bbNum, + incomingWeightMin, incomingWeightMax); return false; } - if (blockWeight < incomingWeightMin) + if (!fgProfileWeightsConsistent(blockWeight, incomingWeightMin)) { - JITDUMP(" " FMT_BB " - block weight %f < incoming min %f\n", block->bbNum, blockWeight, incomingWeightMin); + JITDUMP(" " FMT_BB " - block weight " FMT_WT " inconsistent with incoming min " FMT_WT "\n", block->bbNum, + blockWeight, incomingWeightMin); return false; } - if (blockWeight > incomingWeightMax) + if (!fgProfileWeightsConsistent(blockWeight, incomingWeightMax)) { - JITDUMP(" " FMT_BB " - block weight %f > incoming max %f\n", block->bbNum, blockWeight, incomingWeightMax); + JITDUMP(" " FMT_BB " - block weight " FMT_WT " inconsistent with incoming max " FMT_WT "\n", block->bbNum, + blockWeight, incomingWeightMax); return false; } @@ -3481,7 +3530,9 @@ bool Compiler::fgDebugCheckIncomingProfileData(BasicBlock* block) // bool Compiler::fgDebugCheckOutgoingProfileData(BasicBlock* block) { - const unsigned numSuccs = block->NumSucc(); + // We want switch targets unified, but not EH edges. + // + const unsigned numSuccs = block->NumSucc(this); if (numSuccs == 0) { @@ -3490,18 +3541,24 @@ bool Compiler::fgDebugCheckOutgoingProfileData(BasicBlock* block) return true; } + // We won't check finally or filter returns (for now). + // + if ((block->bbJumpKind == BBJ_EHFINALLYRET) || (block->bbJumpKind == BBJ_EHFILTERRET)) + { + return true; + } + BasicBlock::weight_t const blockWeight = block->bbWeight; BasicBlock::weight_t outgoingWeightMin = 0; BasicBlock::weight_t outgoingWeightMax = 0; - // Walking successor edges is a bit wonky. Seems like it should be easier. - // Note this can also fail to enumerate all the edges, if we have a multigraph + // Walk successor edges and add up flow counts. // int missingEdges = 0; for (unsigned i = 0; i < numSuccs; i++) { - BasicBlock* succBlock = block->GetSucc(i); + BasicBlock* succBlock = block->GetSucc(i, this); flowList* succEdge = nullptr; for (flowList* edge = succBlock->bbPreds; edge != nullptr; edge = edge->flNext) @@ -3529,22 +3586,24 @@ bool Compiler::fgDebugCheckOutgoingProfileData(BasicBlock* block) JITDUMP(" " FMT_BB " - missing %d successor edges\n", block->bbNum, missingEdges); } - if (outgoingWeightMin > outgoingWeightMax) + if (!fgProfileWeightsConsistent(outgoingWeightMin, outgoingWeightMax)) { - JITDUMP(" " FMT_BB " - outgoing min %f > outgoing max %f\n", block->bbNum, outgoingWeightMin, - outgoingWeightMax); + JITDUMP(" " FMT_BB " - outgoing min " FMT_WT " inconsistent with outgoing max " FMT_WT "\n", block->bbNum, + outgoingWeightMin, outgoingWeightMax); return false; } - if (blockWeight < outgoingWeightMin) + if (!fgProfileWeightsConsistent(blockWeight, outgoingWeightMin)) { - JITDUMP(" " FMT_BB " - block weight %f < outgoing min %f\n", block->bbNum, blockWeight, outgoingWeightMin); + JITDUMP(" " FMT_BB " - block weight " FMT_WT " inconsistent with outgoing min " FMT_WT "\n", block->bbNum, + blockWeight, outgoingWeightMin); return false; } - if (blockWeight > outgoingWeightMax) + if (!fgProfileWeightsConsistent(blockWeight, outgoingWeightMax)) { - JITDUMP(" " FMT_BB " - block weight %f > outgoing max %f\n", block->bbNum, blockWeight, outgoingWeightMax); + JITDUMP(" " FMT_BB " - block weight " FMT_WT " inconsistent with outgoing max " FMT_WT "\n", block->bbNum, + blockWeight, outgoingWeightMax); return false; } diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index d6a9ed7642b15..8b89c2e5945a5 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -2313,17 +2313,7 @@ class MergedReturns noway_assert(newReturnBB->bbNext == nullptr); -#ifdef DEBUG - if (comp->verbose) - { - printf("\n newReturnBB [" FMT_BB "] created\n", newReturnBB->bbNum); - } -#endif - - // We have profile weight, the weight is zero, and the block is run rarely, - // until we prove otherwise by merging other returns into this one. - newReturnBB->bbFlags |= (BBF_PROF_WEIGHT | BBF_RUN_RARELY); - newReturnBB->bbWeight = 0; + JITDUMP("\n newReturnBB [" FMT_BB "] created\n", newReturnBB->bbNum); GenTree* returnExpr; @@ -2505,6 +2495,22 @@ class MergedReturns // merged return block are lexically forward. insertionPoints[index] = returnBlock; + + // Update profile information in the mergedReturnBlock to + // reflect the additional flow. + // + if (returnBlock->hasProfileWeight()) + { + BasicBlock::weight_t const oldWeight = + mergedReturnBlock->hasProfileWeight() ? mergedReturnBlock->bbWeight : BB_ZERO_WEIGHT; + BasicBlock::weight_t const newWeight = oldWeight + returnBlock->bbWeight; + + JITDUMP("merging profile weight " FMT_WT " from " FMT_BB " to const return " FMT_BB "\n", + returnBlock->bbWeight, returnBlock->bbNum, mergedReturnBlock->bbNum); + + mergedReturnBlock->setBBProfileWeight(newWeight); + DISPBLOCK(mergedReturnBlock); + } } } } @@ -2512,6 +2518,8 @@ class MergedReturns if (mergedReturnBlock == nullptr) { // No constant return block for this return; use the general one. + // We defer flow update and profile update to morph. + // mergedReturnBlock = comp->genReturnBB; if (mergedReturnBlock == nullptr) { @@ -2528,20 +2536,6 @@ class MergedReturns if (returnBlock != nullptr) { - // Propagate profile weight and related annotations to the merged block. - // Return weight should never exceed entry weight, so cap it to avoid nonsensical - // hot returns in synthetic profile settings. - mergedReturnBlock->bbWeight = - min(mergedReturnBlock->bbWeight + returnBlock->bbWeight, comp->fgFirstBB->bbWeight); - if (!returnBlock->hasProfileWeight()) - { - mergedReturnBlock->bbFlags &= ~BBF_PROF_WEIGHT; - } - if (mergedReturnBlock->bbWeight > 0) - { - mergedReturnBlock->bbFlags &= ~BBF_RUN_RARELY; - } - // Update fgReturnCount to reflect or anticipate that `returnBlock` will no longer // be a return point. comp->fgReturnCount--; @@ -2746,13 +2740,6 @@ void Compiler::fgAddInternal() // will expect to find it. BasicBlock* mergedReturn = merger.EagerCreate(); assert(mergedReturn == genReturnBB); - // Assume weight equal to entry weight for this BB. - mergedReturn->bbFlags &= ~BBF_PROF_WEIGHT; - mergedReturn->bbWeight = fgFirstBB->bbWeight; - if (mergedReturn->bbWeight > 0) - { - mergedReturn->bbFlags &= ~BBF_RUN_RARELY; - } } else { diff --git a/src/coreclr/jit/jit.h b/src/coreclr/jit/jit.h index c5d9774dfa639..4c0a5bf63487d 100644 --- a/src/coreclr/jit/jit.h +++ b/src/coreclr/jit/jit.h @@ -548,6 +548,9 @@ const bool dspGCtbls = true; #define DISPTREERANGE(range, t) \ if (JitTls::GetCompiler()->verbose) \ JitTls::GetCompiler()->gtDispTreeRange(range, t); +#define DISPBLOCK(b) \ + if (JitTls::GetCompiler()->verbose) \ + JitTls::GetCompiler()->fgTableDispBasicBlock(b); #define VERBOSE JitTls::GetCompiler()->verbose #else // !DEBUG #define JITDUMP(...) @@ -559,6 +562,7 @@ const bool dspGCtbls = true; #define DISPSTMT(t) #define DISPRANGE(range) #define DISPTREERANGE(range, t) +#define DISPBLOCK(b) #define VERBOSE 0 #endif // !DEBUG diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 0c07628678cdd..d939f9aad4990 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -7637,10 +7637,90 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) } #endif - // This block is not longer any block's predecessor. If we end up - // converting this tail call to a branch, we'll add appropriate - // successor information then. - fgRemoveBlockAsPred(compCurBB); + // If this block has a flow successor, make suitable updates. + // + BasicBlock* const nextBlock = compCurBB->GetUniqueSucc(); + + if (nextBlock == nullptr) + { + // No unique successor. compCurBB should be a return. + // + assert(compCurBB->bbJumpKind == BBJ_RETURN); + } + else + { + // Flow no longer reaches nextBlock from here. + // + fgRemoveRefPred(nextBlock, compCurBB); + + // Adjust profile weights. + // + // Note if this is a tail call to loop, further updates + // are needed once we install the loop edge. + // + if (compCurBB->hasProfileWeight() && nextBlock->hasProfileWeight()) + { + // Since we have linear flow we can update the next block weight. + // + BasicBlock::weight_t const blockWeight = compCurBB->bbWeight; + BasicBlock::weight_t const nextWeight = nextBlock->bbWeight; + BasicBlock::weight_t const newNextWeight = nextWeight - blockWeight; + + // If the math would result in a negative weight then there's + // no local repair we can do; just leave things inconsistent. + // + if (newNextWeight >= 0) + { + // Note if we'd already morphed the IR in nextblock we might + // have done something profile sensitive that we should arguably reconsider. + // + JITDUMP("Reducing profile weight of " FMT_BB " from " FMT_WT " to " FMT_WT "\n", nextBlock->bbNum, + nextWeight, newNextWeight); + + nextBlock->setBBProfileWeight(newNextWeight); + } + else + { + JITDUMP("Not reducing profile weight of " FMT_BB " as its weight " FMT_WT + " is less than direct flow pred " FMT_BB " weight " FMT_WT "\n", + nextBlock->bbNum, nextWeight, compCurBB->bbNum, blockWeight); + } + + // If nextBlock is not a BBJ_RETURN, it should have a unique successor that + // is a BBJ_RETURN, as we allow a little bit of flow after a tail call. + // + if (nextBlock->bbJumpKind != BBJ_RETURN) + { + BasicBlock* const nextNextBlock = nextBlock->GetUniqueSucc(); + assert(nextNextBlock->bbJumpKind == BBJ_RETURN); + + if (nextNextBlock->hasProfileWeight()) + { + // Do similar updates here. + // + BasicBlock::weight_t const nextNextWeight = nextNextBlock->bbWeight; + BasicBlock::weight_t const newNextNextWeight = nextNextWeight - blockWeight; + + // If the math would result in an negative weight then there's + // no local repair we can do; just leave things inconsistent. + // + if (newNextNextWeight >= 0) + { + JITDUMP("Reducing profile weight of " FMT_BB " from " FMT_WT " to " FMT_WT "\n", + nextNextBlock->bbNum, nextNextWeight, newNextNextWeight); + + nextNextBlock->setBBProfileWeight(newNextNextWeight); + } + else + { + JITDUMP("Not reducing profile weight of " FMT_BB " as its weight " FMT_WT + " is less than direct flow pred " FMT_BB " weight " FMT_WT "\n", + nextNextBlock->bbNum, nextNextWeight, compCurBB->bbNum, blockWeight); + } + } + } + } + } #if !FEATURE_TAILCALL_OPT_SHARED_RETURN // We enable shared-ret tail call optimization for recursive calls even if @@ -16273,7 +16353,7 @@ bool Compiler::fgFoldConditional(BasicBlock* block) { // The edge weights for (block -> bTaken) are 100% of block's weight - edgeTaken->setEdgeWeights(block->bbWeight, block->bbWeight); + edgeTaken->setEdgeWeights(block->bbWeight, block->bbWeight, bTaken); if (!bTaken->hasProfileWeight()) { @@ -16290,7 +16370,7 @@ bool Compiler::fgFoldConditional(BasicBlock* block) if (bTaken->countOfInEdges() == 1) { // There is only one in edge to bTaken - edgeTaken->setEdgeWeights(bTaken->bbWeight, bTaken->bbWeight); + edgeTaken->setEdgeWeights(bTaken->bbWeight, bTaken->bbWeight, bTaken); // Update the weight of block block->inheritWeight(bTaken); @@ -16311,21 +16391,21 @@ bool Compiler::fgFoldConditional(BasicBlock* block) edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated); newMaxWeight = bUpdated->bbWeight; newMinWeight = min(edge->edgeWeightMin(), newMaxWeight); - edge->setEdgeWeights(newMinWeight, newMaxWeight); + edge->setEdgeWeights(newMinWeight, newMaxWeight, bUpdated->bbNext); break; case BBJ_COND: edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated); newMaxWeight = bUpdated->bbWeight; newMinWeight = min(edge->edgeWeightMin(), newMaxWeight); - edge->setEdgeWeights(newMinWeight, newMaxWeight); + edge->setEdgeWeights(newMinWeight, newMaxWeight, bUpdated->bbNext); FALLTHROUGH; case BBJ_ALWAYS: edge = fgGetPredForBlock(bUpdated->bbJumpDest, bUpdated); newMaxWeight = bUpdated->bbWeight; newMinWeight = min(edge->edgeWeightMin(), newMaxWeight); - edge->setEdgeWeights(newMinWeight, newMaxWeight); + edge->setEdgeWeights(newMinWeight, newMaxWeight, bUpdated->bbNext); break; default: @@ -17074,13 +17154,22 @@ void Compiler::fgMergeBlockReturn(BasicBlock* block) fgRemoveStmt(block, lastStmt); } -#ifdef DEBUG - if (verbose) + + JITDUMP("\nUpdate " FMT_BB " to jump to common return block.\n", block->bbNum); + DISPBLOCK(block); + + if (block->hasProfileWeight()) { - printf("morph " FMT_BB " to point at onereturn. New block is\n", block->bbNum); - fgTableDispBasicBlock(block); + BasicBlock::weight_t const oldWeight = + genReturnBB->hasProfileWeight() ? genReturnBB->bbWeight : BB_ZERO_WEIGHT; + BasicBlock::weight_t const newWeight = oldWeight + block->bbWeight; + + JITDUMP("merging profile weight " FMT_WT " from " FMT_BB " to common return " FMT_BB "\n", block->bbWeight, + block->bbNum, genReturnBB->bbNum); + + genReturnBB->setBBProfileWeight(newWeight); + DISPBLOCK(genReturnBB); } -#endif } } diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 79b32874f58fd..1244ec3454912 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -4423,8 +4423,8 @@ void Compiler::optInvertWhileLoop(BasicBlock* block) JITDUMP("Setting weight of " FMT_BB " -> " FMT_BB " to %f (exit loop)\n", bTest->bbNum, bTest->bbNext->bbNum, testToAfterWeight); - edgeTestToNext->setEdgeWeights(testToNextWeight, testToNextWeight); - edgeTestToAfter->setEdgeWeights(testToAfterWeight, testToAfterWeight); + edgeTestToNext->setEdgeWeights(testToNextWeight, testToNextWeight, bTest->bbJumpDest); + edgeTestToAfter->setEdgeWeights(testToAfterWeight, testToAfterWeight, bTest->bbNext); // Adjust edges out of block, using the same distribution. // @@ -4444,8 +4444,8 @@ void Compiler::optInvertWhileLoop(BasicBlock* block) JITDUMP("Setting weight of " FMT_BB " -> " FMT_BB " to %f (avoid loop)\n", block->bbNum, block->bbJumpDest->bbNum, blockToAfterWeight); - edgeBlockToNext->setEdgeWeights(blockToNextWeight, blockToNextWeight); - edgeBlockToAfter->setEdgeWeights(blockToAfterWeight, blockToAfterWeight); + edgeBlockToNext->setEdgeWeights(blockToNextWeight, blockToNextWeight, block->bbNext); + edgeBlockToAfter->setEdgeWeights(blockToAfterWeight, blockToAfterWeight, block->bbJumpDest); #ifdef DEBUG // Verify profile for the two target blocks is consistent. @@ -7578,17 +7578,25 @@ void Compiler::fgCreateLoopPreHeader(unsigned lnum) loopSkippedCount = head->bbJumpDest->bbWeight; } + JITDUMP("%s; loopEnterCount " FMT_WT " loopSkipCount " FMT_WT "\n", + fgHaveValidEdgeWeights ? "valid edge weights" : "no edge weights", loopEnteredCount, + loopSkippedCount); + BasicBlock::weight_t loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount); + JITDUMP("%s; loopEnterCount " FMT_WT " loopSkipCount " FMT_WT " taken ratio " FMT_WT "\n", + fgHaveValidEdgeWeights ? "valid edge weights" : "no edge weights", loopEnteredCount, + loopSkippedCount, loopTakenRatio); + // Calculate a good approximation of the preHead's block weight - BasicBlock::weight_t preHeadWeight = (head->bbWeight * loopTakenRatio) + 0.5f; - preHead->setBBWeight(max(preHeadWeight, 1)); + BasicBlock::weight_t preHeadWeight = (head->bbWeight * loopTakenRatio); + preHead->setBBProfileWeight(preHeadWeight); noway_assert(!preHead->isRunRarely()); } } } - // Link in the preHead block. + // Link in the preHead block fgInsertBBbefore(top, preHead); // Ideally we would re-run SSA and VN if we optimized by doing loop hoisting. @@ -7639,8 +7647,9 @@ void Compiler::fgCreateLoopPreHeader(unsigned lnum) All predecessors of 'beg', (which is the entry in the loop) now have to jump to 'preHead', unless they are dominated by 'head' */ - preHead->bbRefs = 0; - fgAddRefPred(preHead, head); + preHead->bbRefs = 0; + flowList* const edgeToPreHeader = fgAddRefPred(preHead, head); + edgeToPreHeader->setEdgeWeights(preHead->bbWeight, preHead->bbWeight, preHead); bool checkNestedLoops = false; for (flowList* pred = top->bbPreds; pred; pred = pred->flNext) @@ -7722,7 +7731,8 @@ void Compiler::fgCreateLoopPreHeader(unsigned lnum) noway_assert(!fgGetPredForBlock(top, preHead)); fgRemoveRefPred(top, head); - fgAddRefPred(top, preHead); + flowList* const edgeFromPreHeader = fgAddRefPred(top, preHead); + edgeFromPreHeader->setEdgeWeights(preHead->bbWeight, preHead->bbWeight, top); /* If we found at least one back-edge in the flowgraph pointing to the top/entry of the loop @@ -9308,11 +9318,11 @@ void Compiler::optOptimizeBools() BasicBlock::weight_t edgeSumMax = edge1->edgeWeightMax() + edge2->edgeWeightMax(); if ((edgeSumMax >= edge1->edgeWeightMax()) && (edgeSumMax >= edge2->edgeWeightMax())) { - edge1->setEdgeWeights(edgeSumMin, edgeSumMax); + edge1->setEdgeWeights(edgeSumMin, edgeSumMax, b1->bbJumpDest); } else { - edge1->setEdgeWeights(BB_ZERO_WEIGHT, BB_MAX_WEIGHT); + edge1->setEdgeWeights(BB_ZERO_WEIGHT, BB_MAX_WEIGHT, b1->bbJumpDest); } /* Get rid of the second block (which is a BBJ_COND) */