Skip to content

Commit

Permalink
JIT: Support containing compares in GT_SELECT for xarch
Browse files Browse the repository at this point in the history
This adds support for contained compares in GT_SELECT nodes for xarch.
As part of this, also enables if-conversion on xarch.

Fix dotnet#6749
  • Loading branch information
jakobbotsch committed Jan 27, 2023
1 parent 9b76c28 commit 2963b54
Show file tree
Hide file tree
Showing 7 changed files with 225 additions and 37 deletions.
4 changes: 3 additions & 1 deletion src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -1532,9 +1532,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
instruction genMapShiftInsToShiftByConstantIns(instruction ins, int shiftByValue);
#endif // TARGET_XARCH

#ifdef TARGET_ARM64
#if defined(TARGET_ARM64)
static insCflags InsCflagsForCcmp(GenCondition cond);
static insCond JumpKindToInsCond(emitJumpKind condition);
#elif defined(TARGET_XARCH)
static instruction JumpKindToCmov(emitJumpKind condition);
#endif

#ifndef TARGET_LOONGARCH64
Expand Down
13 changes: 10 additions & 3 deletions src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1595,6 +1595,14 @@ void CodeGen::genConsumeRegs(GenTree* tree)
{
genConsumeAddress(tree);
}
#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
else if (tree->OperIsCompare())
{
// Compares can be contained by SELECT/compare chains.
genConsumeRegs(tree->gtGetOp1());
genConsumeRegs(tree->gtGetOp2());
}
#endif
#ifdef TARGET_ARM64
else if (tree->OperIs(GT_BFIZ))
{
Expand All @@ -1610,10 +1618,9 @@ void CodeGen::genConsumeRegs(GenTree* tree)
assert(cast->isContained());
genConsumeAddress(cast->CastOp());
}
else if (tree->OperIsCompare() || tree->OperIs(GT_AND))
else if (tree->OperIs(GT_AND))
{
// Compares can be contained by a SELECT.
// ANDs and Cmp Compares may be contained in a chain.
// ANDs may be contained in a chain.
genConsumeRegs(tree->gtGetOp1());
genConsumeRegs(tree->gtGetOp2());
}
Expand Down
161 changes: 142 additions & 19 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1303,6 +1303,46 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree)
}
}

//------------------------------------------------------------------------
// JumpKindToCmov:
// Convert an emitJumpKind to the corresponding cmov instruction.
//
// Arguments:
// condition - the condition
//
// Returns:
// A cmov instruction.
//
instruction CodeGen::JumpKindToCmov(emitJumpKind condition)
{
static constexpr instruction s_table[EJ_COUNT] = {
INS_none, INS_none, INS_cmovo, INS_cmovno, INS_cmovb, INS_cmovae, INS_cmove, INS_cmovne, INS_cmovbe,
INS_cmova, INS_cmovs, INS_cmovns, INS_cmovp, INS_cmovnp, INS_cmovl, INS_cmovge, INS_cmovle, INS_cmovg,
};

static_assert_no_msg(s_table[EJ_NONE] == INS_none);
static_assert_no_msg(s_table[EJ_jmp] == INS_none);
static_assert_no_msg(s_table[EJ_jo] == INS_cmovo);
static_assert_no_msg(s_table[EJ_jno] == INS_cmovno);
static_assert_no_msg(s_table[EJ_jb] == INS_cmovb);
static_assert_no_msg(s_table[EJ_jae] == INS_cmovae);
static_assert_no_msg(s_table[EJ_je] == INS_cmove);
static_assert_no_msg(s_table[EJ_jne] == INS_cmovne);
static_assert_no_msg(s_table[EJ_jbe] == INS_cmovbe);
static_assert_no_msg(s_table[EJ_ja] == INS_cmova);
static_assert_no_msg(s_table[EJ_js] == INS_cmovs);
static_assert_no_msg(s_table[EJ_jns] == INS_cmovns);
static_assert_no_msg(s_table[EJ_jp] == INS_cmovp);
static_assert_no_msg(s_table[EJ_jnp] == INS_cmovnp);
static_assert_no_msg(s_table[EJ_jl] == INS_cmovl);
static_assert_no_msg(s_table[EJ_jge] == INS_cmovge);
static_assert_no_msg(s_table[EJ_jle] == INS_cmovle);
static_assert_no_msg(s_table[EJ_jg] == INS_cmovg);

assert((condition >= EJ_NONE) && (condition < EJ_COUNT));
return s_table[condition];
}

//------------------------------------------------------------------------
// genCodeForCompare: Produce code for a GT_SELECT/GT_SELECT_HI node.
//
Expand All @@ -1317,37 +1357,123 @@ void CodeGen::genCodeForSelect(GenTreeOp* select)
assert(select->OperIs(GT_SELECT));
#endif

regNumber dstReg = select->GetRegNum();
if (select->OperIs(GT_SELECT))
{
genConsumeReg(select->AsConditional()->gtCond);
genConsumeRegs(select->AsConditional()->gtCond);
}

genConsumeOperands(select);

instruction cmovKind = INS_cmovne;
GenTree* trueVal = select->gtOp1;
GenTree* falseVal = select->gtOp2;
regNumber dstReg = select->GetRegNum();

GenTree* trueVal = select->gtOp1;
GenTree* falseVal = select->gtOp2;

GenCondition cc = GenCondition::NE;

if (select->OperIs(GT_SELECT))
{
GenTree* cond = select->AsConditional()->gtCond;
if (cond->isContained())
{
cc = GenCondition::FromRelop(cond);
}
}

// The usual codegen will be
// mov targetReg, falseValue
// cmovne targetReg, trueValue
//
// However, if the 'true' operand was allocated the same register as the
// target register then prefer to generate
//
// mov targetReg, trueValue
// cmove targetReg, falseValue
//
// so the first mov is elided.
//
if (falseVal->isUsedFromReg() && (falseVal->GetRegNum() == dstReg))
{
std::swap(trueVal, falseVal);
cc = GenCondition::Reverse(cc);
}

// The next conditions are constraints from the instruction's side. Lowering ensures that the constraints are satisfies
// - For contained constants, they do not end up in the cmov
// - For contained indirs, they do not end up in the cmov if we need multiple cmovs for the comparison
if (trueVal->isContained() && trueVal->IsCnsIntOrI())
{
// cmov instruction cannot handle contained constants, so swap it to the mov
std::swap(trueVal, falseVal);
cc = GenCondition::Reverse(cc);
}

GenConditionDesc desc = GenConditionDesc::Get(cc);

// OTOH, we cannot do anything if we need to check two conditions for the relop,
// so reverse it into an 'or' where we can emit two cmovs.
if (desc.oper == GT_AND)
{
std::swap(trueVal, falseVal);
cc = GenCondition::Reverse(cc);
desc = GenConditionDesc::Get(cc);
}

// If the 'true' operand was allocated the same register as the target
// register then flip it to the false value so we can skip a reg-reg mov.
if (trueVal->isUsedFromReg() && (trueVal->GetRegNum() == dstReg))
// Finally, with multiple cmovs lowering should have ensured that the
// 'true' value is not contained when reversed into the GT_OR case. We may
// need to reverse it again due to the above logic to make it the case.
if ((desc.oper != GT_NONE) && !trueVal->isUsedFromReg())
{
std::swap(trueVal, falseVal);
cmovKind = INS_cmove;
cc = GenCondition::Reverse(cc);
desc = GenConditionDesc::Get(cc);
}

// If the falseVal is a constant 0 and we do not interfere with any of the
// registers used by the comparre then zero it out with xor before the
// test.
bool genFalse = true;
if (select->OperIs(GT_SELECT) &&
falseVal->isContained() &&
falseVal->IsIntegralConst(0) &&
((select->AsConditional()->gtCond->gtGetContainedRegMask() & (genRegMask(dstReg))) == 0))
{
instGen_Set_Reg_To_Zero(emitTypeSize(select), dstReg);
genFalse = false;
}

if (select->OperIs(GT_SELECT))
{
// TODO-CQ: Support contained relops here.
assert(select->AsConditional()->gtCond->isUsedFromReg());
GenTree* cond = select->AsConditional()->gtCond;
if (cond->isContained())
{
assert(cond->OperIsCompare());
genCodeForCompare(cond->AsOp());
}
else
{
regNumber condReg = cond->GetRegNum();
GetEmitter()->emitIns_R_R(INS_test, EA_4BYTE, condReg, condReg);
}
}

regNumber condReg = select->AsConditional()->gtCond->GetRegNum();
GetEmitter()->emitIns_R_R(INS_test, EA_4BYTE, condReg, condReg);
if (genFalse)
{
// Note that this relies on inst_RV_TT not generating xor dstReg,
// dstReg for contained 0, which would kill the flags.
// When the compare does not interfere we handle it above.
inst_RV_TT(INS_mov, emitTypeSize(select), dstReg, falseVal);
}

inst_RV_TT(INS_mov, emitTypeSize(select), dstReg, falseVal);
inst_RV_TT(cmovKind, emitTypeSize(select), dstReg, trueVal);
assert(!trueVal->isContained() || trueVal->isUsedFromMemory());
inst_RV_TT(JumpKindToCmov(desc.jumpKind1), emitTypeSize(select), dstReg, trueVal);

if (desc.oper != GT_NONE)
{
assert(desc.oper == GT_OR);
assert(trueVal->isUsedFromReg());
inst_RV_TT(JumpKindToCmov(desc.jumpKind2), emitTypeSize(select), dstReg, trueVal);
}

genProduceReg(select);
}
Expand Down Expand Up @@ -1765,6 +1891,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
case GT_TEST_EQ:
case GT_TEST_NE:
case GT_CMP:
genConsumeOperands(treeNode->AsOp());
genCodeForCompare(treeNode->AsOp());
break;

Expand Down Expand Up @@ -6479,8 +6606,6 @@ void CodeGen::genCompareFloat(GenTree* treeNode)
var_types op1Type = op1->TypeGet();
var_types op2Type = op2->TypeGet();

genConsumeOperands(tree);

assert(varTypeIsFloating(op1Type));
assert(op1Type == op2Type);

Expand Down Expand Up @@ -6554,8 +6679,6 @@ void CodeGen::genCompareInt(GenTree* treeNode)
emitter* emit = GetEmitter();
bool canReuseFlags = false;

genConsumeOperands(tree);

assert(!op1->isContainedIntOrIImmed());
assert(!varTypeIsFloating(op2Type));

Expand Down
18 changes: 10 additions & 8 deletions src/coreclr/jit/ifconversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,15 @@ bool OptIfConversionDsc::IfConvertCheckStmts(BasicBlock* fromBlock, IfConvertOpe
return false;
}

#ifdef TARGET_64BIT
// Disallow 64-bit operands on 32-bit targets as the
// backend currently cannot handle contained compares efficiently.
if (varTypeIsLong(tree))
{
return false;
}
#endif

// Ensure it won't cause any additional side effects.
if ((op1->gtFlags & (GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF)) != 0)
{
Expand Down Expand Up @@ -621,12 +630,6 @@ bool OptIfConversionDsc::optIfConvert()
// Put a limit on the original source and destinations.
if (!m_comp->compStressCompile(Compiler::STRESS_IF_CONVERSION_COST, 25))
{
#ifdef TARGET_XARCH
// xarch does not support containing relops in GT_SELECT nodes
// currently so only introduce GT_SELECT in stress.
JITDUMP("Skipping if-conversion on xarch\n");
return false;
#else
int thenCost = 0;
int elseCost = 0;

Expand Down Expand Up @@ -657,7 +660,6 @@ bool OptIfConversionDsc::optIfConvert()
elseCost);
return false;
}
#endif
}

// Get the select node inputs.
Expand Down Expand Up @@ -777,7 +779,7 @@ PhaseStatus Compiler::optIfConversion()
// Currently only enabled on arm64 and under debug on xarch, since we only
// do it under stress.
CLANG_FORMAT_COMMENT_ANCHOR;
#if defined(TARGET_ARM64) || (defined(TARGET_XARCH) && defined(DEBUG))
#if defined(TARGET_ARM64) || defined(TARGET_XARCH)
// Reverse iterate through the blocks.
BasicBlock* block = fgLastBB;
while (block != nullptr)
Expand Down
50 changes: 47 additions & 3 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5874,7 +5874,42 @@ void Lowering::ContainCheckSelect(GenTreeOp* select)
assert(select->OperIs(GT_SELECT, GT_SELECT_HI));
#endif

// TODO-CQ: Support containing relops here for the GT_SELECT case.
bool canContainTrueVal = true;
bool canContainFalseVal = true;
// Disallow containing compares if the flags may be used by follow-up
// nodes, in which case those nodes expect zero/non-zero in the flags.
if (select->OperIs(GT_SELECT) && ((select->gtFlags & GTF_SET_FLAGS) == 0))
{
GenTree* cond = select->AsConditional()->gtCond;

if (cond->OperIsCompare())
{
MakeSrcContained(select, cond);

// Some compares require multiple cmovs to implement, see the
// comment in Codegen::GebConditionDesc::map. For those cases avoid
// containing the operand that ends up in the cmov so that we don't
// incur the memory access/address calculation twice.
GenCondition cc = GenCondition::FromRelop(cond);
switch (cc.GetCode())
{
case GenCondition::FEQ:
case GenCondition::FLT:
case GenCondition::FLE:
// AND comparison requires reversing into an OR comparison, so
// 'false' value ends up in the cmov.
canContainFalseVal = false;
break;
case GenCondition::FNEU:
case GenCondition::FGEU:
case GenCondition::FGTU:
canContainTrueVal = false;
break;
default:
break;
}
}
}

GenTree* op1 = select->gtOp1;
GenTree* op2 = select->gtOp2;
Expand All @@ -5885,7 +5920,7 @@ void Lowering::ContainCheckSelect(GenTreeOp* select)
unsigned operSize = genTypeSize(select);
assert((operSize == 4) || (operSize == TARGET_POINTER_SIZE));

if (genTypeSize(op1) == operSize)
if (canContainTrueVal && (genTypeSize(op1) == operSize))
{
if (IsContainableMemoryOp(op1))
{
Expand All @@ -5900,7 +5935,7 @@ void Lowering::ContainCheckSelect(GenTreeOp* select)
}
}

if (genTypeSize(op2) == operSize)
if (canContainFalseVal && (genTypeSize(op2) == operSize))
{
if (IsContainableMemoryOp(op2))
{
Expand All @@ -5914,6 +5949,15 @@ void Lowering::ContainCheckSelect(GenTreeOp* select)
MakeSrcRegOptional(select, op2);
}
}

if (canContainTrueVal && op1->IsCnsIntOrI() && !op2->isContained())
{
MakeSrcContained(select, op1);
}
else if (canContainFalseVal && op2->IsCnsIntOrI() && !op1->isContained())
{
MakeSrcContained(select, op2);
}
}

//------------------------------------------------------------------------
Expand Down
12 changes: 9 additions & 3 deletions src/coreclr/jit/lsrabuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3224,12 +3224,18 @@ int LinearScan::BuildOperandUses(GenTree* node, regMaskTP candidates)
return BuildOperandUses(hwintrinsic->Op(1), candidates);
}
#endif // FEATURE_HW_INTRINSICS
#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
if (node->OperIsCompare())
{
// Compares can be contained by a SELECT/compare chains.
return BuildBinaryUses(node->AsOp(), candidates);
}
#endif
#ifdef TARGET_ARM64
if (node->OperIs(GT_MUL) || node->OperIsCompare() || node->OperIs(GT_AND))
if (node->OperIs(GT_MUL) || node->OperIs(GT_AND))
{
// MUL can be contained for madd or msub on arm64.
// Compares can be contained by a SELECT.
// ANDs and Cmp Compares may be contained in a chain.
// ANDs may be contained in a chain.
return BuildBinaryUses(node->AsOp(), candidates);
}
if (node->OperIs(GT_NEG, GT_CAST, GT_LSH, GT_RSH, GT_RSZ))
Expand Down
Loading

0 comments on commit 2963b54

Please sign in to comment.