From 2963b5495e29631067a0f2c930b447800d3ba1bb Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 27 Jan 2023 15:41:38 +0100 Subject: [PATCH] JIT: Support containing compares in GT_SELECT for xarch This adds support for contained compares in GT_SELECT nodes for xarch. As part of this, also enables if-conversion on xarch. Fix #6749 --- src/coreclr/jit/codegen.h | 4 +- src/coreclr/jit/codegenlinear.cpp | 13 ++- src/coreclr/jit/codegenxarch.cpp | 161 ++++++++++++++++++++++++++---- src/coreclr/jit/ifconversion.cpp | 18 ++-- src/coreclr/jit/lowerxarch.cpp | 50 +++++++++- src/coreclr/jit/lsrabuild.cpp | 12 ++- src/coreclr/jit/lsraxarch.cpp | 4 + 7 files changed, 225 insertions(+), 37 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index aa3fbefad7003..32305a70ab89b 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1532,9 +1532,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX instruction genMapShiftInsToShiftByConstantIns(instruction ins, int shiftByValue); #endif // TARGET_XARCH -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) static insCflags InsCflagsForCcmp(GenCondition cond); static insCond JumpKindToInsCond(emitJumpKind condition); +#elif defined(TARGET_XARCH) + static instruction JumpKindToCmov(emitJumpKind condition); #endif #ifndef TARGET_LOONGARCH64 diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index b510ce4a558dc..30a2c89f399ad 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1595,6 +1595,14 @@ void CodeGen::genConsumeRegs(GenTree* tree) { genConsumeAddress(tree); } +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) + else if (tree->OperIsCompare()) + { + // Compares can be contained by SELECT/compare chains. + genConsumeRegs(tree->gtGetOp1()); + genConsumeRegs(tree->gtGetOp2()); + } +#endif #ifdef TARGET_ARM64 else if (tree->OperIs(GT_BFIZ)) { @@ -1610,10 +1618,9 @@ void CodeGen::genConsumeRegs(GenTree* tree) assert(cast->isContained()); genConsumeAddress(cast->CastOp()); } - else if (tree->OperIsCompare() || tree->OperIs(GT_AND)) + else if (tree->OperIs(GT_AND)) { - // Compares can be contained by a SELECT. - // ANDs and Cmp Compares may be contained in a chain. + // ANDs may be contained in a chain. genConsumeRegs(tree->gtGetOp1()); genConsumeRegs(tree->gtGetOp2()); } diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 5c1c26150eae1..187e1f1ee55c1 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -1303,6 +1303,46 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree) } } +//------------------------------------------------------------------------ +// JumpKindToCmov: +// Convert an emitJumpKind to the corresponding cmov instruction. +// +// Arguments: +// condition - the condition +// +// Returns: +// A cmov instruction. +// +instruction CodeGen::JumpKindToCmov(emitJumpKind condition) +{ + static constexpr instruction s_table[EJ_COUNT] = { + INS_none, INS_none, INS_cmovo, INS_cmovno, INS_cmovb, INS_cmovae, INS_cmove, INS_cmovne, INS_cmovbe, + INS_cmova, INS_cmovs, INS_cmovns, INS_cmovp, INS_cmovnp, INS_cmovl, INS_cmovge, INS_cmovle, INS_cmovg, + }; + + static_assert_no_msg(s_table[EJ_NONE] == INS_none); + static_assert_no_msg(s_table[EJ_jmp] == INS_none); + static_assert_no_msg(s_table[EJ_jo] == INS_cmovo); + static_assert_no_msg(s_table[EJ_jno] == INS_cmovno); + static_assert_no_msg(s_table[EJ_jb] == INS_cmovb); + static_assert_no_msg(s_table[EJ_jae] == INS_cmovae); + static_assert_no_msg(s_table[EJ_je] == INS_cmove); + static_assert_no_msg(s_table[EJ_jne] == INS_cmovne); + static_assert_no_msg(s_table[EJ_jbe] == INS_cmovbe); + static_assert_no_msg(s_table[EJ_ja] == INS_cmova); + static_assert_no_msg(s_table[EJ_js] == INS_cmovs); + static_assert_no_msg(s_table[EJ_jns] == INS_cmovns); + static_assert_no_msg(s_table[EJ_jp] == INS_cmovp); + static_assert_no_msg(s_table[EJ_jnp] == INS_cmovnp); + static_assert_no_msg(s_table[EJ_jl] == INS_cmovl); + static_assert_no_msg(s_table[EJ_jge] == INS_cmovge); + static_assert_no_msg(s_table[EJ_jle] == INS_cmovle); + static_assert_no_msg(s_table[EJ_jg] == INS_cmovg); + + assert((condition >= EJ_NONE) && (condition < EJ_COUNT)); + return s_table[condition]; +} + //------------------------------------------------------------------------ // genCodeForCompare: Produce code for a GT_SELECT/GT_SELECT_HI node. // @@ -1317,37 +1357,123 @@ void CodeGen::genCodeForSelect(GenTreeOp* select) assert(select->OperIs(GT_SELECT)); #endif - regNumber dstReg = select->GetRegNum(); if (select->OperIs(GT_SELECT)) { - genConsumeReg(select->AsConditional()->gtCond); + genConsumeRegs(select->AsConditional()->gtCond); } genConsumeOperands(select); - instruction cmovKind = INS_cmovne; - GenTree* trueVal = select->gtOp1; - GenTree* falseVal = select->gtOp2; + regNumber dstReg = select->GetRegNum(); + + GenTree* trueVal = select->gtOp1; + GenTree* falseVal = select->gtOp2; + + GenCondition cc = GenCondition::NE; + + if (select->OperIs(GT_SELECT)) + { + GenTree* cond = select->AsConditional()->gtCond; + if (cond->isContained()) + { + cc = GenCondition::FromRelop(cond); + } + } + + // The usual codegen will be + // mov targetReg, falseValue + // cmovne targetReg, trueValue + // + // However, if the 'true' operand was allocated the same register as the + // target register then prefer to generate + // + // mov targetReg, trueValue + // cmove targetReg, falseValue + // + // so the first mov is elided. + // + if (falseVal->isUsedFromReg() && (falseVal->GetRegNum() == dstReg)) + { + std::swap(trueVal, falseVal); + cc = GenCondition::Reverse(cc); + } + + // The next conditions are constraints from the instruction's side. Lowering ensures that the constraints are satisfies + // - For contained constants, they do not end up in the cmov + // - For contained indirs, they do not end up in the cmov if we need multiple cmovs for the comparison + if (trueVal->isContained() && trueVal->IsCnsIntOrI()) + { + // cmov instruction cannot handle contained constants, so swap it to the mov + std::swap(trueVal, falseVal); + cc = GenCondition::Reverse(cc); + } + + GenConditionDesc desc = GenConditionDesc::Get(cc); + + // OTOH, we cannot do anything if we need to check two conditions for the relop, + // so reverse it into an 'or' where we can emit two cmovs. + if (desc.oper == GT_AND) + { + std::swap(trueVal, falseVal); + cc = GenCondition::Reverse(cc); + desc = GenConditionDesc::Get(cc); + } - // If the 'true' operand was allocated the same register as the target - // register then flip it to the false value so we can skip a reg-reg mov. - if (trueVal->isUsedFromReg() && (trueVal->GetRegNum() == dstReg)) + // Finally, with multiple cmovs lowering should have ensured that the + // 'true' value is not contained when reversed into the GT_OR case. We may + // need to reverse it again due to the above logic to make it the case. + if ((desc.oper != GT_NONE) && !trueVal->isUsedFromReg()) { std::swap(trueVal, falseVal); - cmovKind = INS_cmove; + cc = GenCondition::Reverse(cc); + desc = GenConditionDesc::Get(cc); + } + + // If the falseVal is a constant 0 and we do not interfere with any of the + // registers used by the comparre then zero it out with xor before the + // test. + bool genFalse = true; + if (select->OperIs(GT_SELECT) && + falseVal->isContained() && + falseVal->IsIntegralConst(0) && + ((select->AsConditional()->gtCond->gtGetContainedRegMask() & (genRegMask(dstReg))) == 0)) + { + instGen_Set_Reg_To_Zero(emitTypeSize(select), dstReg); + genFalse = false; } if (select->OperIs(GT_SELECT)) { - // TODO-CQ: Support contained relops here. - assert(select->AsConditional()->gtCond->isUsedFromReg()); + GenTree* cond = select->AsConditional()->gtCond; + if (cond->isContained()) + { + assert(cond->OperIsCompare()); + genCodeForCompare(cond->AsOp()); + } + else + { + regNumber condReg = cond->GetRegNum(); + GetEmitter()->emitIns_R_R(INS_test, EA_4BYTE, condReg, condReg); + } + } - regNumber condReg = select->AsConditional()->gtCond->GetRegNum(); - GetEmitter()->emitIns_R_R(INS_test, EA_4BYTE, condReg, condReg); + if (genFalse) + { + // Note that this relies on inst_RV_TT not generating xor dstReg, + // dstReg for contained 0, which would kill the flags. + // When the compare does not interfere we handle it above. + inst_RV_TT(INS_mov, emitTypeSize(select), dstReg, falseVal); } - inst_RV_TT(INS_mov, emitTypeSize(select), dstReg, falseVal); - inst_RV_TT(cmovKind, emitTypeSize(select), dstReg, trueVal); + assert(!trueVal->isContained() || trueVal->isUsedFromMemory()); + inst_RV_TT(JumpKindToCmov(desc.jumpKind1), emitTypeSize(select), dstReg, trueVal); + + if (desc.oper != GT_NONE) + { + assert(desc.oper == GT_OR); + assert(trueVal->isUsedFromReg()); + inst_RV_TT(JumpKindToCmov(desc.jumpKind2), emitTypeSize(select), dstReg, trueVal); + } genProduceReg(select); } @@ -1765,6 +1891,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) case GT_TEST_EQ: case GT_TEST_NE: case GT_CMP: + genConsumeOperands(treeNode->AsOp()); genCodeForCompare(treeNode->AsOp()); break; @@ -6479,8 +6606,6 @@ void CodeGen::genCompareFloat(GenTree* treeNode) var_types op1Type = op1->TypeGet(); var_types op2Type = op2->TypeGet(); - genConsumeOperands(tree); - assert(varTypeIsFloating(op1Type)); assert(op1Type == op2Type); @@ -6554,8 +6679,6 @@ void CodeGen::genCompareInt(GenTree* treeNode) emitter* emit = GetEmitter(); bool canReuseFlags = false; - genConsumeOperands(tree); - assert(!op1->isContainedIntOrIImmed()); assert(!varTypeIsFloating(op2Type)); diff --git a/src/coreclr/jit/ifconversion.cpp b/src/coreclr/jit/ifconversion.cpp index 292d915384ae4..153840134c691 100644 --- a/src/coreclr/jit/ifconversion.cpp +++ b/src/coreclr/jit/ifconversion.cpp @@ -301,6 +301,15 @@ bool OptIfConversionDsc::IfConvertCheckStmts(BasicBlock* fromBlock, IfConvertOpe return false; } +#ifdef TARGET_64BIT + // Disallow 64-bit operands on 32-bit targets as the + // backend currently cannot handle contained compares efficiently. + if (varTypeIsLong(tree)) + { + return false; + } +#endif + // Ensure it won't cause any additional side effects. if ((op1->gtFlags & (GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF)) != 0) { @@ -621,12 +630,6 @@ bool OptIfConversionDsc::optIfConvert() // Put a limit on the original source and destinations. if (!m_comp->compStressCompile(Compiler::STRESS_IF_CONVERSION_COST, 25)) { -#ifdef TARGET_XARCH - // xarch does not support containing relops in GT_SELECT nodes - // currently so only introduce GT_SELECT in stress. - JITDUMP("Skipping if-conversion on xarch\n"); - return false; -#else int thenCost = 0; int elseCost = 0; @@ -657,7 +660,6 @@ bool OptIfConversionDsc::optIfConvert() elseCost); return false; } -#endif } // Get the select node inputs. @@ -777,7 +779,7 @@ PhaseStatus Compiler::optIfConversion() // Currently only enabled on arm64 and under debug on xarch, since we only // do it under stress. CLANG_FORMAT_COMMENT_ANCHOR; -#if defined(TARGET_ARM64) || (defined(TARGET_XARCH) && defined(DEBUG)) +#if defined(TARGET_ARM64) || defined(TARGET_XARCH) // Reverse iterate through the blocks. BasicBlock* block = fgLastBB; while (block != nullptr) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 81940f9a90ad9..1a59dc216e7dc 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -5874,7 +5874,42 @@ void Lowering::ContainCheckSelect(GenTreeOp* select) assert(select->OperIs(GT_SELECT, GT_SELECT_HI)); #endif - // TODO-CQ: Support containing relops here for the GT_SELECT case. + bool canContainTrueVal = true; + bool canContainFalseVal = true; + // Disallow containing compares if the flags may be used by follow-up + // nodes, in which case those nodes expect zero/non-zero in the flags. + if (select->OperIs(GT_SELECT) && ((select->gtFlags & GTF_SET_FLAGS) == 0)) + { + GenTree* cond = select->AsConditional()->gtCond; + + if (cond->OperIsCompare()) + { + MakeSrcContained(select, cond); + + // Some compares require multiple cmovs to implement, see the + // comment in Codegen::GebConditionDesc::map. For those cases avoid + // containing the operand that ends up in the cmov so that we don't + // incur the memory access/address calculation twice. + GenCondition cc = GenCondition::FromRelop(cond); + switch (cc.GetCode()) + { + case GenCondition::FEQ: + case GenCondition::FLT: + case GenCondition::FLE: + // AND comparison requires reversing into an OR comparison, so + // 'false' value ends up in the cmov. + canContainFalseVal = false; + break; + case GenCondition::FNEU: + case GenCondition::FGEU: + case GenCondition::FGTU: + canContainTrueVal = false; + break; + default: + break; + } + } + } GenTree* op1 = select->gtOp1; GenTree* op2 = select->gtOp2; @@ -5885,7 +5920,7 @@ void Lowering::ContainCheckSelect(GenTreeOp* select) unsigned operSize = genTypeSize(select); assert((operSize == 4) || (operSize == TARGET_POINTER_SIZE)); - if (genTypeSize(op1) == operSize) + if (canContainTrueVal && (genTypeSize(op1) == operSize)) { if (IsContainableMemoryOp(op1)) { @@ -5900,7 +5935,7 @@ void Lowering::ContainCheckSelect(GenTreeOp* select) } } - if (genTypeSize(op2) == operSize) + if (canContainFalseVal && (genTypeSize(op2) == operSize)) { if (IsContainableMemoryOp(op2)) { @@ -5914,6 +5949,15 @@ void Lowering::ContainCheckSelect(GenTreeOp* select) MakeSrcRegOptional(select, op2); } } + + if (canContainTrueVal && op1->IsCnsIntOrI() && !op2->isContained()) + { + MakeSrcContained(select, op1); + } + else if (canContainFalseVal && op2->IsCnsIntOrI() && !op1->isContained()) + { + MakeSrcContained(select, op2); + } } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 3908f1998792a..14029a8ccc286 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -3224,12 +3224,18 @@ int LinearScan::BuildOperandUses(GenTree* node, regMaskTP candidates) return BuildOperandUses(hwintrinsic->Op(1), candidates); } #endif // FEATURE_HW_INTRINSICS +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) + if (node->OperIsCompare()) + { + // Compares can be contained by a SELECT/compare chains. + return BuildBinaryUses(node->AsOp(), candidates); + } +#endif #ifdef TARGET_ARM64 - if (node->OperIs(GT_MUL) || node->OperIsCompare() || node->OperIs(GT_AND)) + if (node->OperIs(GT_MUL) || node->OperIs(GT_AND)) { // MUL can be contained for madd or msub on arm64. - // Compares can be contained by a SELECT. - // ANDs and Cmp Compares may be contained in a chain. + // ANDs may be contained in a chain. return BuildBinaryUses(node->AsOp(), candidates); } if (node->OperIs(GT_NEG, GT_CAST, GT_LSH, GT_RSH, GT_RSZ)) diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index c0fd6030c2880..e3ec931f5cac4 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -932,6 +932,8 @@ int LinearScan::BuildSelect(GenTreeOp* select) else { tgtPrefUse = BuildUse(select->gtOp1); + setDelayFree(tgtPrefUse); + srcCount++; } @@ -942,6 +944,8 @@ int LinearScan::BuildSelect(GenTreeOp* select) else { tgtPrefUse2 = BuildUse(select->gtOp2); + setDelayFree(tgtPrefUse2); + srcCount++; }