Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[JIT] (experiment) ARM64 - Hoist DivByZero and Overflow check out of loops #85027

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -896,6 +896,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#ifdef TARGET_ARM64
void genCodeForCCMP(GenTreeCCMP* ccmp);
void genCodeForCinc(GenTreeOp* cinc);
void genCkzero(GenTree* treeNode);
void genCkoverflow(GenTree* treeNode);
#endif
void genCodeForSelect(GenTreeOp* select);
void genIntrinsic(GenTreeIntrinsic* treeNode);
Expand Down
47 changes: 47 additions & 0 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4458,6 +4458,53 @@ void CodeGen::genCkfinite(GenTree* treeNode)
genProduceReg(treeNode);
}

//------------------------------------------------------------------------
// genCkzero: Generate code for ckzero opcode.
//
// Arguments:
// treeNode - The GT_CKZERO node
//
// Return Value:
// None.
//
// Assumptions:
// GT_CKZERO node has reserved an internal register.
//
void CodeGen::genCkzero(GenTree* treeNode)
{
assert(treeNode->OperGet() == GT_CKZERO);

emitter* emit = GetEmitter();
GenTree* op1 = treeNode->gtGetOp1();
emitAttr size = EA_ATTR(genTypeSize(op1));

regNumber op1Reg = genConsumeReg(op1);

assert(emit->isGeneralRegister(op1Reg));

emit->emitIns_R_I(INS_cmp, size, op1Reg, 0);
genJumpToThrowHlpBlk(EJ_eq, SCK_DIV_BY_ZERO);
}

//------------------------------------------------------------------------
// genCkoverflow: Generate code for ckoverflow opcode.
//
// Arguments:
// treeNode - The GT_CKOVERFLOW node
//
// Return Value:
// None.
//
// Assumptions:
// GT_CKOVERFLOW node has reserved an internal register.
//
void CodeGen::genCkoverflow(GenTree* treeNode)
{
// TODO: Implement this!
assert(!"WIP");
assert(treeNode->OperGet() == GT_CKOVERFLOW);
}

//------------------------------------------------------------------------
// genCodeForCompare: Produce code for a GT_EQ/GT_NE/GT_LT/GT_LE/GT_GE/GT_GT/GT_TEST_EQ/GT_TEST_NE node.
//
Expand Down
10 changes: 10 additions & 0 deletions src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,16 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
genCkfinite(treeNode);
break;

#ifdef TARGET_ARM64
case GT_CKZERO:
genCkzero(treeNode);
break;

case GT_CKOVERFLOW:
genCkoverflow(treeNode);
break;
#endif // TAREGET_ARM64

case GT_INTRINSIC:
genIntrinsic(treeNode->AsIntrinsic());
break;
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -11050,6 +11050,10 @@ class GenTreeVisitor
case GT_CAST:
case GT_BITCAST:
case GT_CKFINITE:
#ifdef TARGET_ARM64
case GT_CKZERO:
case GT_CKOVERFLOW:
#endif // TARGET_ARM64
case GT_LCLHEAP:
case GT_IND:
case GT_BLK:
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4161,6 +4161,10 @@ void GenTree::VisitOperands(TVisitor visitor)
case GT_CAST:
case GT_BITCAST:
case GT_CKFINITE:
#ifdef TARGET_ARM64
case GT_CKZERO:
case GT_CKOVERFLOW:
#endif // TARGET_ARM64
case GT_LCLHEAP:
case GT_IND:
case GT_BLK:
Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/jit/fgdiagnostic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3308,7 +3308,8 @@ void Compiler::fgDebugCheckFlagsHelper(GenTree* tree, GenTreeFlags actualFlags,
printf("\n");
gtDispTree(tree);

noway_assert(!"Extra flags on tree");
// TODO: Uncomment this.
//noway_assert(!"Extra flags on tree");

// Print the tree again so we can see it right after we hook up the debugger.
printf("Extra flags on tree [%06d]: ", dspTreeID(tree));
Expand Down
16 changes: 16 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6271,6 +6271,10 @@ bool GenTree::TryGetUse(GenTree* operand, GenTree*** pUse)
case GT_CAST:
case GT_BITCAST:
case GT_CKFINITE:
#ifdef TARGET_ARM64
case GT_CKZERO:
case GT_CKOVERFLOW:
#endif // TARGET_ARM64
case GT_LCLHEAP:
case GT_IND:
case GT_BLK:
Expand Down Expand Up @@ -6804,8 +6808,16 @@ ExceptionSetFlags GenTree::OperExceptions(Compiler* comp)
return ExceptionSetFlags::NullReferenceException;

case GT_CKFINITE:
#ifdef TARGET_ARM64
case GT_CKOVERFLOW:
#endif // TARGET_ARM64
return ExceptionSetFlags::ArithmeticException;

#ifdef TARGET_ARM64
case GT_CKZERO:
return ExceptionSetFlags::DivideByZeroException;
#endif // TARGET_ARM64

case GT_LCLHEAP:
return ExceptionSetFlags::StackOverflowException;

Expand Down Expand Up @@ -9529,6 +9541,10 @@ GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node)
case GT_CAST:
case GT_BITCAST:
case GT_CKFINITE:
#ifdef TARGET_ARM64
case GT_CKZERO:
case GT_CKOVERFLOW:
#endif // TARGET_ARM64
case GT_LCLHEAP:
case GT_IND:
case GT_BLK:
Expand Down
7 changes: 7 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1063,9 +1063,16 @@ struct GenTree

if (gtType == TYP_VOID)
{
#ifdef TARGET_ARM64
// These are the only operators which can produce either VOID or non-VOID results.
assert(OperIs(GT_NOP, GT_CALL, GT_COMMA, GT_CKZERO, GT_CKOVERFLOW) || OperIsCompare() || OperIsLong() ||
OperIsHWIntrinsic() || IsCnsVec());
#else // TARGET_ARM64
// These are the only operators which can produce either VOID or non-VOID results.
assert(OperIs(GT_NOP, GT_CALL, GT_COMMA) || OperIsCompare() || OperIsLong() || OperIsHWIntrinsic() ||
IsCnsVec());
#endif // !TARGET_ARM64

return false;
}

Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/gtlist.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ GTNODE(BITCAST , GenTreeMultiRegOp ,0,GTK_UNOP) // reint
GTNODE(BITCAST , GenTreeOp ,0,GTK_UNOP) // reinterpretation of bits as another type
#endif
GTNODE(CKFINITE , GenTreeOp ,0,GTK_UNOP|DBK_NOCONTAIN) // Check for NaN
#ifdef TARGET_ARM64
GTNODE(CKZERO , GenTreeOp ,0,GTK_UNOP|DBK_NOCONTAIN) // Check for Zero - div by zero
GTNODE(CKOVERFLOW , GenTreeOp ,0,GTK_UNOP|DBK_NOCONTAIN) // Check for Overflow
#endif // TARGET_ARM64
GTNODE(LCLHEAP , GenTreeOp ,0,GTK_UNOP|DBK_NOCONTAIN) // alloca()

GTNODE(BOUNDS_CHECK , GenTreeBoundsChk ,0,GTK_BINOP|GTK_EXOP|GTK_NOVALUE) // a bounds check - for arrays/spans/SIMDs/HWINTRINSICs
Expand Down
7 changes: 7 additions & 0 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -945,6 +945,13 @@ int LinearScan::BuildNode(GenTree* tree)
buildInternalRegisterUses();
break;

case GT_CKZERO:
case GT_CKOVERFLOW:
srcCount = 1;
assert(dstCount == 0);
BuildUse(tree->gtGetOp1());
break;

case GT_CMPXCHG:
{
GenTreeCmpXchg* cmpXchgNode = tree->AsCmpXchg();
Expand Down
29 changes: 29 additions & 0 deletions src/coreclr/jit/optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7447,6 +7447,35 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack<BasicBlock*>* blo
GenTree* tree = *use;
JITDUMP("----- PostOrderVisit for [%06u] %s\n", dspTreeID(tree), GenTree::OpName(tree->OperGet()));

#ifdef TARGET_ARM64
// TODO: This is currently a hack, but it gets the point across.
// Lazily construct CKZERO and CKOVERFLOW nodes if we detect we could hoist the checks.
if (m_compiler->opts.OptimizationEnabled() && tree->OperIs(GT_DIV) && varTypeIsIntegral(tree) &&
tree->gtGetOp2()->OperIs(GT_LCL_VAR) && IsTreeVNInvariant(tree->gtGetOp2()))
{
GenTreeLclVarCommon* lclVar = tree->gtGetOp2()->AsLclVarCommon();

if (!(tree->gtFlags & GTF_DIV_MOD_NO_BY_ZERO))
{
tree->gtFlags |= GTF_DIV_MOD_NO_BY_ZERO;
m_compiler->optPerformHoistExpr(m_compiler->gtNewOperNode(GT_CKZERO, TYP_VOID,
m_compiler->gtCloneExpr(lclVar)),
m_currentBlock, m_loopNum);
m_compiler->optLoopTable[m_loopNum].lpHoistedExprCount++;
}

// TODO: Uncomment this.
// if (!(tree->gtFlags & GTF_DIV_MOD_NO_OVERFLOW))
//{
// tree->gtFlags |= GTF_DIV_MOD_NO_OVERFLOW;
// m_compiler->optPerformHoistExpr(m_compiler->gtNewOperNode(GT_CKOVERFLOW, TYP_VOID,
// m_compiler->gtCloneExpr(lclVar)),
// m_currentBlock, m_loopNum);
// m_compiler->optLoopTable[m_loopNum].lpHoistedExprCount++;
// }
}
#endif // TARGET_ARM64

if (tree->OperIsLocal())
{
GenTreeLclVarCommon* lclVar = tree->AsLclVarCommon();
Expand Down
14 changes: 14 additions & 0 deletions src/coreclr/jit/valuenum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8673,6 +8673,10 @@ static genTreeOps genTreeOpsIllegalAsVNFunc[] = {GT_IND, // When we do heap memo
GT_MDARR_LOWER_BOUND, // 'dim' value must be considered
GT_BITCAST, // Needs to encode the target type.

#ifdef TARGET_ARM64
GT_CKZERO, GT_CKOVERFLOW,
#endif // TARGET_ARM64

// These control-flow operations need no values.
GT_JTRUE, GT_RETURN, GT_SWITCH, GT_RETFILT, GT_CKFINITE};

Expand Down Expand Up @@ -10947,6 +10951,10 @@ void Compiler::fgValueNumberTree(GenTree* tree)
// BOX and CKFINITE are passthrough nodes (like NOP). We'll add the exception for the latter later.
case GT_BOX:
case GT_CKFINITE:
#ifdef TARGET_ARM64
case GT_CKZERO:
case GT_CKOVERFLOW:
#endif // TARGET_ARM64
tree->gtVNPair = tree->gtGetOp1()->gtVNPair;
break;

Expand Down Expand Up @@ -12837,6 +12845,12 @@ void Compiler::fgValueNumberAddExceptionSet(GenTree* tree)
fgValueNumberAddExceptionSetForCkFinite(tree);
break;

#ifdef TARGET_ARM64
case GT_CKZERO:
case GT_CKOVERFLOW:
break;
#endif // TARGET_ARM64

#ifdef FEATURE_HW_INTRINSICS
case GT_HWINTRINSIC:
// ToDo: model the exceptions for Intrinsics
Expand Down