Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change fgMorphMultiOp to fgMorphHWIntrinsic #103732

Merged
merged 5 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 4 additions & 86 deletions src/coreclr/jit/assertionprop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3117,101 +3117,19 @@ bool Compiler::optIsProfitableToSubstitute(GenTree* dest, BasicBlock* destBlock,

if (inspectIntrinsic)
{
GenTreeHWIntrinsic* parent = destParent->AsHWIntrinsic();
GenTreeVecCon* vecCon = value->AsVecCon();

NamedIntrinsic intrinsicId = parent->GetHWIntrinsicId();
var_types simdBaseType = parent->GetSimdBaseType();
GenTreeHWIntrinsic* parent = destParent->AsHWIntrinsic();
NamedIntrinsic intrinsicId = parent->GetHWIntrinsicId();

if (!HWIntrinsicInfo::CanBenefitFromConstantProp(intrinsicId))
{
return false;
}

// For several of the scenarios below we may skip the costing logic
// For several of the scenarios we may skip the costing logic
// since we know that the operand is always containable and therefore
// is always cost effective to propagate.

switch (intrinsicId)
{
#if defined(TARGET_ARM64)
case NI_Vector64_op_Equality:
case NI_Vector64_op_Inequality:
#endif // TARGET_ARM64
case NI_Vector128_op_Equality:
case NI_Vector128_op_Inequality:
#if defined(TARGET_XARCH)
case NI_Vector256_op_Equality:
case NI_Vector256_op_Inequality:
case NI_Vector512_op_Equality:
case NI_Vector512_op_Inequality:
#endif // TARGET_XARCH
{
// We can optimize when the constant is zero, but only
// for non floating-point since +0.0 == -0.0.
return vecCon->IsZero() && !varTypeIsFloating(simdBaseType);
}

#if defined(TARGET_ARM64)
case NI_AdvSimd_CompareEqual:
case NI_AdvSimd_Arm64_CompareEqual:
case NI_AdvSimd_Arm64_CompareEqualScalar:
{
// We can optimize when the constant is zero due to a
// specialized encoding for the instruction
return vecCon->IsZero();
}

case NI_AdvSimd_CompareGreaterThan:
case NI_AdvSimd_CompareGreaterThanOrEqual:
case NI_AdvSimd_Arm64_CompareGreaterThan:
case NI_AdvSimd_Arm64_CompareGreaterThanOrEqual:
case NI_AdvSimd_Arm64_CompareGreaterThanScalar:
case NI_AdvSimd_Arm64_CompareGreaterThanOrEqualScalar:
{
// We can optimize when the constant is zero, but only
// for signed types, due to a specialized encoding for
// the instruction
return vecCon->IsZero() && !varTypeIsUnsigned(simdBaseType);
}
#endif // TARGET_ARM64

#if defined(TARGET_XARCH)
case NI_SSE41_Insert:
{
// We can optimize for float when the constant is zero
// due to a specialized encoding for the instruction
return (simdBaseType == TYP_FLOAT) && vecCon->IsZero();
}

case NI_EVEX_CompareEqualMask:
case NI_EVEX_CompareNotEqualMask:
{
// We can optimize when the constant is zero, but only
// for non floating-point since +0.0 == -0.0
return vecCon->IsZero() && !varTypeIsFloating(simdBaseType);
}
#endif // TARGET_XARCH

case NI_Vector128_Shuffle:
#if defined(TARGET_XARCH)
case NI_Vector256_Shuffle:
case NI_Vector512_Shuffle:
#elif defined(TARGET_ARM64)
case NI_Vector64_Shuffle:
#endif
{
// The shuffle indices need to be constant so we can preserve
// the node as a hwintrinsic instead of rewriting as a user call.
assert(parent->GetOperandCount() == 2);
return parent->IsUserCall() && (dest == parent->Op(2));
}

default:
{
break;
}
}
return parent->ShouldConstantProp(dest, value->AsVecCon());
}
#endif // FEATURE_HW_INTRINSICS
}
Expand Down
6 changes: 3 additions & 3 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -6623,9 +6623,10 @@ class Compiler
GenTree* fgOptimizeEqualityComparisonWithConst(GenTreeOp* cmp);
GenTree* fgOptimizeRelationalComparisonWithConst(GenTreeOp* cmp);
GenTree* fgOptimizeRelationalComparisonWithFullRangeConst(GenTreeOp* cmp);
#ifdef FEATURE_HW_INTRINSICS
#if defined(FEATURE_HW_INTRINSICS)
GenTree* fgMorphHWIntrinsic(GenTreeHWIntrinsic* tree);
GenTree* fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node);
#endif
#endif // FEATURE_HW_INTRINSICS
GenTree* fgOptimizeCommutativeArithmetic(GenTreeOp* tree);
GenTree* fgOptimizeRelationalComparisonWithCasts(GenTreeOp* cmp);
GenTree* fgOptimizeAddition(GenTreeOp* add);
Expand All @@ -6638,7 +6639,6 @@ class Compiler
GenTree* fgMorphModToSubMulDiv(GenTreeOp* tree);
GenTree* fgMorphUModToAndSub(GenTreeOp* tree);
GenTree* fgMorphSmpOpOptional(GenTreeOp* tree, bool* optAssertionPropDone);
GenTree* fgMorphMultiOp(GenTreeMultiOp* multiOp);
GenTree* fgMorphConst(GenTree* tree);

GenTreeOp* fgMorphCommutative(GenTreeOp* tree);
Expand Down
228 changes: 222 additions & 6 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20350,6 +20350,12 @@ bool GenTree::isCommutativeHWIntrinsic() const
{
return !varTypeIsFloating(node->GetSimdBaseType());
}

case NI_AVX512F_Add:
case NI_AVX512F_Multiply:
{
return node->GetOperandCount() == 2;
}
#endif // TARGET_XARCH

default:
Expand Down Expand Up @@ -28536,6 +28542,110 @@ genTreeOps GenTreeHWIntrinsic::HWOperGet(bool* isScalar) const
}
}

//---------------------------------------------------------------------------------------
// GenTreeHWIntrinsic::ShouldConstantProp:
// Determines if a given operand should be constant propagated
//
// Arguments
// operand - The operand to check
// vecCon - The vector constant to check
//
// Return Value
// true if operand should be constant propagated, otherwise false
//
// Remarks
// This method takes the operand and vector constant given that assertion prop
// may be checking if the underlying constant for a lcl_var should be propagated
//
bool GenTreeHWIntrinsic::ShouldConstantProp(GenTree* operand, GenTreeVecCon* vecCon)
{
assert(HWIntrinsicInfo::CanBenefitFromConstantProp(gtHWIntrinsicId));

var_types simdBaseType = GetSimdBaseType();

switch (gtHWIntrinsicId)
{
#if defined(TARGET_ARM64)
case NI_Vector64_op_Equality:
case NI_Vector64_op_Inequality:
#endif // TARGET_ARM64
case NI_Vector128_op_Equality:
case NI_Vector128_op_Inequality:
#if defined(TARGET_XARCH)
case NI_Vector256_op_Equality:
case NI_Vector256_op_Inequality:
case NI_Vector512_op_Equality:
case NI_Vector512_op_Inequality:
#endif // TARGET_XARCH
{
// We can optimize when the constant is zero, but only
// for non floating-point since +0.0 == -0.0.
return vecCon->IsZero() && !varTypeIsFloating(simdBaseType);
}

#if defined(TARGET_ARM64)
case NI_AdvSimd_CompareEqual:
case NI_AdvSimd_Arm64_CompareEqual:
case NI_AdvSimd_Arm64_CompareEqualScalar:
{
// We can optimize when the constant is zero due to a
// specialized encoding for the instruction
return vecCon->IsZero();
}

case NI_AdvSimd_CompareGreaterThan:
case NI_AdvSimd_CompareGreaterThanOrEqual:
case NI_AdvSimd_Arm64_CompareGreaterThan:
case NI_AdvSimd_Arm64_CompareGreaterThanOrEqual:
case NI_AdvSimd_Arm64_CompareGreaterThanScalar:
case NI_AdvSimd_Arm64_CompareGreaterThanOrEqualScalar:
{
// We can optimize when the constant is zero, but only
// for signed types, due to a specialized encoding for
// the instruction
return vecCon->IsZero() && !varTypeIsUnsigned(simdBaseType);
}
#endif // TARGET_ARM64

#if defined(TARGET_XARCH)
case NI_SSE41_Insert:
{
// We can optimize for float when the constant is zero
// due to a specialized encoding for the instruction
return (simdBaseType == TYP_FLOAT) && vecCon->IsZero();
}

case NI_EVEX_CompareEqualMask:
case NI_EVEX_CompareNotEqualMask:
{
// We can optimize when the constant is zero, but only
// for non floating-point since +0.0 == -0.0
return vecCon->IsZero() && !varTypeIsFloating(simdBaseType);
}
#endif // TARGET_XARCH

case NI_Vector128_Shuffle:
#if defined(TARGET_XARCH)
case NI_Vector256_Shuffle:
case NI_Vector512_Shuffle:
#elif defined(TARGET_ARM64)
case NI_Vector64_Shuffle:
#endif
{
// The shuffle indices need to be constant so we can preserve
// the node as a hwintrinsic instead of rewriting as a user call.
assert(GetOperandCount() == 2);
return IsUserCall() && (operand == Op(2));
}

default:
{
break;
}
}

return false;
}
#endif // FEATURE_HW_INTRINSICS

//---------------------------------------------------------------------------------------
Expand Down Expand Up @@ -29567,6 +29677,38 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
return tree;
}

NamedIntrinsic ni = tree->GetHWIntrinsicId();
var_types retType = tree->TypeGet();
var_types simdBaseType = tree->GetSimdBaseType();
CorInfoType simdBaseJitType = tree->GetSimdBaseJitType();
unsigned int simdSize = tree->GetSimdSize();

simd_t simdVal = {};

if (GenTreeVecCon::IsHWIntrinsicCreateConstant<simd_t>(tree, simdVal))
{
GenTreeVecCon* vecCon = gtNewVconNode(retType);

for (GenTree* arg : tree->Operands())
{
DEBUG_DESTROY_NODE(arg);
}

vecCon->gtSimdVal = simdVal;

if (fgGlobalMorph)
{
INDEBUG(vecCon->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
;
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
}

if (vnStore != nullptr)
{
fgValueNumberTreeConst(vecCon);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: fgUpdateConstTreeValueNumber since it does vnStore != nullptr under the hood

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a few places doing this that can switch, I'll get them updated in a follow up PR.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Opened #103771

}
return vecCon;
}

GenTree* op1 = nullptr;
GenTree* op2 = nullptr;
GenTree* op3 = nullptr;
Expand Down Expand Up @@ -29598,6 +29740,86 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
}
}

if (tree->OperIsConvertMaskToVector())
{
GenTree* op = op1;

if (!op->OperIsHWIntrinsic())
{
return tree;
}

unsigned simdBaseTypeSize = genTypeSize(simdBaseType);
GenTreeHWIntrinsic* cvtOp = op->AsHWIntrinsic();

if (!cvtOp->OperIsConvertVectorToMask())
{
return tree;
}

if ((genTypeSize(cvtOp->GetSimdBaseType()) != simdBaseTypeSize))
{
// We need the operand to be the same kind of mask; otherwise
// the bitwise operation can differ in how it performs
return tree;
}

#if defined(TARGET_XARCH)
GenTree* vectorNode = cvtOp->Op(1);
#elif defined(TARGET_ARM64)
GenTree* vectorNode = cvtOp->Op(2);
#else
#error Unsupported platform
#endif // !TARGET_XARCH && !TARGET_ARM64

DEBUG_DESTROY_NODE(op, tree);
INDEBUG(vectorNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);

return vectorNode;
}

if (tree->OperIsConvertVectorToMask())
{
GenTree* op = op1;

#if defined(TARGET_ARM64)
if (!op->OperIsHWIntrinsic(NI_Sve_CreateTrueMaskAll))
{
return tree;
}
op = op2;
#endif // TARGET_ARM64

if (!op->OperIsHWIntrinsic())
{
return tree;
}

unsigned simdBaseTypeSize = genTypeSize(simdBaseType);
GenTreeHWIntrinsic* cvtOp = op->AsHWIntrinsic();

if (!cvtOp->OperIsConvertMaskToVector())
{
return tree;
}

if ((genTypeSize(cvtOp->GetSimdBaseType()) != simdBaseTypeSize))
{
// We need the operand to be the same kind of mask; otherwise
// the bitwise operation can differ in how it performs
return tree;
}

GenTree* maskNode = cvtOp->Op(1);

#if defined(TARGET_ARM64)
DEBUG_DESTROY_NODE(op1);
#endif // TARGET_ARM64

DEBUG_DESTROY_NODE(op, tree);
return maskNode;
}

bool isScalar = false;
genTreeOps oper = tree->HWOperGet(&isScalar);

Expand Down Expand Up @@ -29630,12 +29852,6 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)

GenTree* resultNode = tree;

NamedIntrinsic ni = tree->GetHWIntrinsicId();
var_types retType = tree->TypeGet();
var_types simdBaseType = tree->GetSimdBaseType();
CorInfoType simdBaseJitType = tree->GetSimdBaseJitType();
unsigned int simdSize = tree->GetSimdSize();

if (otherNode == nullptr)
{
assert(op2 == nullptr);
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -6639,6 +6639,8 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic

genTreeOps HWOperGet(bool* isScalar) const;

bool ShouldConstantProp(GenTree* operand, GenTreeVecCon* vecCon);

private:
void SetHWIntrinsicId(NamedIntrinsic intrinsicId);

Expand Down
Loading
Loading