-
Notifications
You must be signed in to change notification settings - Fork 4.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Optimization to remove redundant zero initializations. #36918
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13883,7 +13883,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) | |
bool bbIsReturn = (block->bbJumpKind == BBJ_RETURN) && | ||
(!compIsForInlining() || (impInlineInfo->iciBlock->bbJumpKind == BBJ_RETURN)); | ||
LclVarDsc* const lclDsc = lvaGetDesc(lclNum); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unrelated to this PR, but this code inexplicably usees both |
||
if (fgVarNeedsExplicitZeroInit(lclDsc, bbInALoop, bbIsReturn)) | ||
if (fgVarNeedsExplicitZeroInit(lclNum, bbInALoop, bbIsReturn)) | ||
{ | ||
// Append a tree to zero-out the temp | ||
newObjThisPtr = gtNewLclvNode(lclNum, lvaTable[lclNum].TypeGet()); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9189,3 +9189,147 @@ void Compiler::optOptimizeBools() | |
fgDebugCheckBBlist(); | ||
#endif | ||
} | ||
|
||
typedef JitHashTable<unsigned, JitSmallPrimitiveKeyFuncs<unsigned>, unsigned> LclVarRefCounts; | ||
|
||
//------------------------------------------------------------------------------------------ | ||
// optRemoveRedundantZeroInits: Remove redundant zero intializations. | ||
// | ||
// Notes: | ||
// This phase iterates over basic blocks starting with the first basic block until there is no unique | ||
// basic block successor or until it detects a loop. It keeps track of local nodes it encounters. | ||
// When it gets to an assignment to a local variable or a local field, it checks whether the assignment | ||
// is the first reference to the local (or to the parent of the local field), and, if so, | ||
// it may do one of two optimizations: | ||
// 1. If the following conditions are true: | ||
// the local is untracked, | ||
// the rhs of the assignment is 0, | ||
// the local is guaranteed to be fully initialized in the prolog, | ||
// then the explicit zero initialization is removed. | ||
// 2. If the following conditions are true: | ||
// the assignment is to a local (and not a field), | ||
// the local is not lvLiveInOutOfHndlr or no exceptions can be thrown between the prolog and the assignment, | ||
// either the local has no gc pointers or there are no gc-safe points between the prolog and the assignment, | ||
// then the local with lvHasExplicitInit which tells the codegen not to insert zero initialization for this | ||
// local in the prolog. | ||
|
||
void Compiler::optRemoveRedundantZeroInits() | ||
{ | ||
#ifdef DEBUG | ||
if (verbose) | ||
{ | ||
printf("*************** In optRemoveRedundantZeroInits()\n"); | ||
} | ||
#endif // DEBUG | ||
|
||
CompAllocator allocator(getAllocator(CMK_ZeroInit)); | ||
LclVarRefCounts refCounts(allocator); | ||
bool hasGCSafePoint = false; | ||
bool canThrow = false; | ||
|
||
assert(fgStmtListThreaded); | ||
|
||
for (BasicBlock* block = fgFirstBB; (block != nullptr) && ((block->bbFlags & BBF_MARKED) == 0); | ||
block = block->GetUniqueSucc()) | ||
{ | ||
block->bbFlags |= BBF_MARKED; | ||
for (Statement* stmt = block->FirstNonPhiDef(); stmt != nullptr;) | ||
{ | ||
Statement* next = stmt->GetNextStmt(); | ||
for (GenTree* tree = stmt->GetTreeList(); tree != nullptr; tree = tree->gtNext) | ||
{ | ||
if (((tree->gtFlags & GTF_CALL) != 0) && (!tree->IsCall() || !tree->AsCall()->IsSuppressGCTransition())) | ||
{ | ||
hasGCSafePoint = true; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't we have some calls ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, I added a check for calls with |
||
} | ||
|
||
if ((tree->gtFlags & GTF_EXCEPT) != 0) | ||
{ | ||
canThrow = true; | ||
} | ||
|
||
switch (tree->gtOper) | ||
{ | ||
case GT_LCL_VAR: | ||
case GT_LCL_FLD: | ||
case GT_LCL_VAR_ADDR: | ||
case GT_LCL_FLD_ADDR: | ||
{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For non-address exposed locals we only care about seeing defs, not uses? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We care about seeing defs when removing explicit zero initializations and we care about seeing uses when marking locals with |
||
unsigned lclNum = tree->AsLclVarCommon()->GetLclNum(); | ||
unsigned* pRefCount = refCounts.LookupPointer(lclNum); | ||
if (pRefCount != nullptr) | ||
{ | ||
*pRefCount = (*pRefCount) + 1; | ||
} | ||
else | ||
{ | ||
refCounts.Set(lclNum, 1); | ||
} | ||
|
||
break; | ||
} | ||
case GT_ASG: | ||
{ | ||
GenTreeOp* treeOp = tree->AsOp(); | ||
if (treeOp->gtOp1->OperIs(GT_LCL_VAR, GT_LCL_FLD)) | ||
{ | ||
unsigned lclNum = treeOp->gtOp1->AsLclVarCommon()->GetLclNum(); | ||
LclVarDsc* const lclDsc = lvaGetDesc(lclNum); | ||
unsigned* pRefCount = refCounts.LookupPointer(lclNum); | ||
assert(pRefCount != nullptr); | ||
if (*pRefCount == 1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Minor suggestion - it might be worth a comment (or perhaps add to the comment below) that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will add a comment with my next PR. |
||
{ | ||
// The local hasn't been referenced before this assignment. | ||
bool removedExplicitZeroInit = false; | ||
if (!lclDsc->lvTracked && treeOp->gtOp2->IsIntegralConst(0)) | ||
{ | ||
bool bbInALoop = (block->bbFlags & BBF_BACKWARD_JUMP) != 0; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. BBF_BACKWARD_JUMP may be too conservative? Seems like we should have enough graph analysis state lying around to detect loops more accurately. Might not be worth the trouble. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I looked around and found that we call |
||
bool bbIsReturn = block->bbJumpKind == BBJ_RETURN; | ||
|
||
if (!fgVarNeedsExplicitZeroInit(lclNum, bbInALoop, bbIsReturn)) | ||
{ | ||
// We are guaranteed to have a zero initialization in the prolog and | ||
// the local hasn't been redefined between the prolog and this explicit | ||
// zero initialization so the assignment can be safely removed. | ||
if (tree == stmt->GetRootNode()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For non-root assignments can't we just bash the asg tree to a nop? Or if bashing during walking is too painful, keep track of these trees and bash them later? Or are these rare enough that it's not worth trying to handle them? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried that and saw no new diffs in framework and benchmarks. |
||
{ | ||
fgRemoveStmt(block, stmt); | ||
removedExplicitZeroInit = true; | ||
*pRefCount = 0; | ||
lclDsc->lvSuppressedZeroInit = 1; | ||
} | ||
} | ||
} | ||
|
||
if (!removedExplicitZeroInit && treeOp->gtOp1->OperIs(GT_LCL_VAR) && | ||
(!canThrow || !lclDsc->lvLiveInOutOfHndlr)) | ||
{ | ||
// If compMethodRequiresPInvokeFrame() returns true, lower may later | ||
// insert a call to CORINFO_HELP_INIT_PINVOKE_FRAME which is a gc-safe point. | ||
if (!lclDsc->HasGCPtr() || | ||
(!GetInterruptible() && !hasGCSafePoint && !compMethodRequiresPInvokeFrame())) | ||
{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add to your comment above or below and explain why we're checking There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
// The local hasn't been used and won't be reported to the gc between | ||
// the prolog and this explicit intialization. Therefore, it doesn't | ||
// require zero initialization in the prolog. | ||
lclDsc->lvHasExplicitInit = 1; | ||
} | ||
} | ||
} | ||
} | ||
break; | ||
} | ||
default: | ||
break; | ||
} | ||
} | ||
stmt = next; | ||
} | ||
} | ||
|
||
for (BasicBlock* block = fgFirstBB; (block != nullptr) && ((block->bbFlags & BBF_MARKED) != 0); | ||
block = block->GetUniqueSucc()) | ||
{ | ||
block->bbFlags &= ~BBF_MARKED; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems like it would be less confusing to run this before we build SSA, since it doesn't really leverage or involve SSA (other than skipping phi defs).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This optimization needs to run after liveness so that it can use lvTracked and lvLiveInOutOfHndlr. We run liveness when we build SSA.