Skip to content

Commit

Permalink
Optimize "X / C" via cmovns
Browse files Browse the repository at this point in the history
  • Loading branch information
EgorBo committed Aug 29, 2020
1 parent 436f155 commit 9b1d149
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 6 deletions.
52 changes: 51 additions & 1 deletion src/coreclr/src/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -773,6 +773,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
GenTree* divisor = treeNode->gtOp2;
genTreeOps oper = treeNode->OperGet();
emitAttr size = emitTypeSize(treeNode);
regNumber operandReg = dividend->GetRegNum();
regNumber targetReg = treeNode->GetRegNum();
var_types targetType = treeNode->TypeGet();
emitter* emit = GetEmitter();
Expand All @@ -783,7 +784,56 @@ void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
// dividend is in a register.
assert(dividend->isUsedFromReg());

genConsumeOperands(treeNode->AsOp());
genConsumeReg(dividend);

if (treeNode->OperIs(GT_DIV) && treeNode->TypeIs(TYP_INT, TYP_LONG) &&
divisor->IsIntegralConst())
{
if (operandReg == targetReg)
{
// the optimization won't work if target reg == dividend reg
// so we need to move the dividend to a temp reg
inst_RV_RV(INS_mov, REG_RDX, operandReg, targetType);
operandReg = REG_RDX;
}

const ssize_t cnsDivisor = divisor->AsIntConCommon()->IconValue();
const size_t absCnsDivisor = abs(cnsDivisor);
if (absCnsDivisor >= 4 && isPow2(absCnsDivisor))
{
if (absCnsDivisor <= (1UL << 30))
{
// lea rax, [rdx + (cnsDivisor-1)]
emit->emitIns_R_AR(INS_lea, size, targetReg, operandReg, static_cast<int>(absCnsDivisor - 1));
}
else
{
// mov + add
assert(false); // TODO
}

// test rdx, rdx
emit->emitIns_R_R(INS_test, size, operandReg, operandReg);

// cmovns rax, rdx
emit->emitIns_R_R(INS_cmovns, size, targetReg, operandReg);

// sar rax, ctz(cnsDivisor)
emit->emitIns_R_I(INS_sar_N, size, targetReg, genLog2(static_cast<size_t>(absCnsDivisor)));

if (cnsDivisor < 0)
{
// neg rax
emit->emitIns_R(INS_neg, size, targetReg);
}

genProduceReg(treeNode);
return;
}
}

genConsumeRegs(divisor);

// dividend must be in RAX
genCopyRegIfNeeded(dividend, REG_RAX);

Expand Down
13 changes: 8 additions & 5 deletions src/coreclr/src/jit/emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1138,11 +1138,14 @@ float emitter::insEvaluateExecutionCost(instrDesc* id)
//
void emitter::perfScoreUnhandledInstruction(instrDesc* id, insExecutionCharacteristics* pResult)
{
#ifdef DEBUG
printf("PerfScore: unhandled instruction: %s, format %s", codeGen->genInsName(id->idIns()),
emitIfName(id->idInsFmt()));
assert(!"PerfScore: unhandled instruction");
#endif
//#ifdef DEBUG
// printf("PerfScore: unhandled instruction: %s, format %s", codeGen->genInsName(id->idIns()),
// emitIfName(id->idInsFmt()));
// assert(!"PerfScore: unhandled instruction");
//#endif
//
// TODO: update perfscore for CMOV* instructions
//
pResult->insThroughput = PERFSCORE_THROUGHPUT_1C;
pResult->insLatency = PERFSCORE_LATENCY_1C;
}
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/src/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5404,6 +5404,14 @@ GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node)
#endif
}

if (isDiv && absDivisorValue >= 4 && isPow2(absDivisorValue) && comp->opts.compUseCMOV)
{
divisor->SetContained();
// don't expand "X s/ C" to RSH+AND+ADD if C is a power of two (>= 4)
// and CMOV instruction is available
return nullptr;
}

// We're committed to the conversion now. Go find the use if any.
LIR::Use use;
if (!BlockRange().TryGetUse(node, &use))
Expand Down

0 comments on commit 9b1d149

Please sign in to comment.