Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arm64/Sve: Predicated Abs, Predicated/UnPredicated Add, Conditional Select #100743

Merged
merged 52 commits into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
0d437e3
JIT ARM64-SVE: Add Sve.Abs() and Sve.Add()
a74nh Mar 11, 2024
e9fa735
Fix sve scaling in enitIns_R_S/S_R
a74nh Mar 22, 2024
5acc122
Revert "Fix sve scaling in enitIns_R_S/S_R"
a74nh Mar 23, 2024
15e893d
Fix sve scaling in enitIns_R_S/S_R
a74nh Mar 23, 2024
c22f458
Restore testing
a74nh Mar 24, 2024
508a52a
Use NaturalScale_helper for vector load/stores
a74nh Mar 25, 2024
5825c20
Merge remote-tracking branch 'origin/main' into api_abs_github
kunalspathak Mar 28, 2024
9081fa3
Merge remote-tracking branch 'origin/main' into api_abs_github
kunalspathak Apr 3, 2024
d4f3c86
wip
kunalspathak Apr 3, 2024
20defbd
Add ConditionalSelect() APIs
kunalspathak Apr 4, 2024
c70367f
Handle ConditionalSelect in JIT
kunalspathak Apr 4, 2024
1896b21
Add test coverage
kunalspathak Apr 4, 2024
2e0d022
Update the test cases
kunalspathak Apr 6, 2024
185fcfa
jit format
kunalspathak Apr 6, 2024
88e8f87
Merge branch 'sve-conditional-select' into abi_abs_conditional
kunalspathak Apr 6, 2024
ebcb174
fix merge conflicts
kunalspathak Apr 6, 2024
28aa7b7
Make predicated/unpredicated work with ConditionalSelect
kunalspathak Apr 7, 2024
1c7b4b2
Misc. changes
kunalspathak Apr 7, 2024
6adf6c6
jit format
kunalspathak Apr 7, 2024
ecf4e01
jit format
kunalspathak Apr 7, 2024
c6fef82
Merge remote-tracking branch 'origin/main' into abi_abs_conditional
kunalspathak Apr 7, 2024
d780156
Handle all the conditions correctly
kunalspathak Apr 8, 2024
eb4d1dc
jit format
kunalspathak Apr 9, 2024
00aacab
fix some spacing
kunalspathak Apr 9, 2024
f716958
Removed the assert
kunalspathak Apr 9, 2024
5eadcf9
fix the largest vector size to 64 to fix #100366
kunalspathak Apr 9, 2024
bc6e58d
review feedback
kunalspathak Apr 11, 2024
19ec4b6
wip
kunalspathak Apr 11, 2024
ed7c781
Add SVE feature detection for Windows
kunalspathak Apr 11, 2024
f966b5e
fix the check for invalid alignment
kunalspathak Apr 11, 2024
f0c81f1
Revert "Add SVE feature detection for Windows"
kunalspathak Apr 11, 2024
89ede7d
Handle case where Abs() is wrapped in another conditionalSelect
kunalspathak Apr 11, 2024
1f44b2f
jit format
kunalspathak Apr 12, 2024
9712a16
fix the size comparison
kunalspathak Apr 12, 2024
0ad2e64
HW_Flag_MaskedPredicatedOnlyOperation
kunalspathak Apr 12, 2024
f76e324
Revert the change in emitarm64.cpp around INS_sve_ldr_mask/INS_sve_st…
kunalspathak Apr 12, 2024
8af7108
Fix the condition for lowering
kunalspathak Apr 14, 2024
e934e26
address review feedback for movprfx
kunalspathak Apr 22, 2024
0daeac7
Move the special handling of Vector<>.Zero from lowerer to importer
kunalspathak Apr 22, 2024
635a7d7
Rename IsEmbeddedMaskedOperation/IsOptionalEmbeddedMaskedOperation
kunalspathak Apr 22, 2024
19da982
Add more test coverage for conditionalSelect
kunalspathak Apr 22, 2024
f1b8b17
Rename test method name
kunalspathak Apr 23, 2024
409a039
Add more test coverage for conditionalSelect:Abs
kunalspathak Apr 23, 2024
0c8a14d
jit format
kunalspathak Apr 23, 2024
53c5eb3
Add logging on test methods
kunalspathak Apr 23, 2024
868229f
Merge remote-tracking branch 'origin/main' into abi_abs_conditional
kunalspathak Apr 23, 2024
9ae3c78
Add the missing movprfx for abs
kunalspathak Apr 23, 2024
c8244eb
Add few more scenarios where falseVal is zero
kunalspathak Apr 23, 2024
f6eb1fe
Make sure LoadVector is marked as explicit needing mask
kunalspathak Apr 23, 2024
83e1b1b
revisit the codegen logic
kunalspathak Apr 24, 2024
c78e0c7
Remove commented code and add some other comments
kunalspathak Apr 24, 2024
6aa2386
jit format
kunalspathak Apr 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1648,15 +1648,13 @@ void CodeGen::genConsumeRegs(GenTree* tree)
// Update the life of the lcl var.
genUpdateLife(tree);
}
#ifdef TARGET_XARCH
#ifdef FEATURE_HW_INTRINSICS
else if (tree->OperIs(GT_HWINTRINSIC))
{
GenTreeHWIntrinsic* hwintrinsic = tree->AsHWIntrinsic();
genConsumeMultiOpOperands(hwintrinsic);
}
#endif // FEATURE_HW_INTRINSICS
#endif // TARGET_XARCH
else if (tree->OperIs(GT_BITCAST, GT_NEG, GT_CAST, GT_LSH, GT_RSH, GT_RSZ, GT_ROR, GT_BSWAP, GT_BSWAP16))
{
genConsumeRegs(tree->gtGetOp1());
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3473,6 +3473,7 @@ class Compiler
#if defined(TARGET_ARM64)
GenTree* gtNewSimdConvertVectorToMaskNode(var_types type, GenTree* node, CorInfoType simdBaseJitType, unsigned simdSize);
GenTree* gtNewSimdConvertMaskToVectorNode(GenTreeHWIntrinsic* node, var_types type);
GenTree* gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigned simdSize);
#endif

//------------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/emitloongarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ enum EmitCallType

EC_FUNC_TOKEN, // Direct call to a helper/static/nonvirtual/global method
// EC_FUNC_TOKEN_INDIR, // Indirect call to a helper/static/nonvirtual/global method
// EC_FUNC_ADDR, // Direct call to an absolute address
// EC_FUNC_ADDR, // Direct call to an absolute address

EC_INDIR_R, // Indirect call via register

Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/emitriscv64.h
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ enum EmitCallType

EC_FUNC_TOKEN, // Direct call to a helper/static/nonvirtual/global method
// EC_FUNC_TOKEN_INDIR, // Indirect call to a helper/static/nonvirtual/global method
// EC_FUNC_ADDR, // Direct call to an absolute address
// EC_FUNC_ADDR, // Direct call to an absolute address

// EC_FUNC_VIRTUAL, // Call to a virtual method (using the vtable)
EC_INDIR_R, // Indirect call via register
Expand Down
14 changes: 8 additions & 6 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18012,7 +18012,7 @@ bool GenTree::canBeContained() const
}
else if (OperIsHWIntrinsic() && !isContainableHWIntrinsic())
{
return isEvexEmbeddedMaskingCompatibleHWIntrinsic();
return isEmbeddedMaskingCompatibleHWIntrinsic();
}

return true;
Expand Down Expand Up @@ -19909,24 +19909,26 @@ bool GenTree::isEvexCompatibleHWIntrinsic() const
}

//------------------------------------------------------------------------
// isEvexEmbeddedMaskingCompatibleHWIntrinsic: Checks if the intrinsic is compatible
// isEmbeddedMaskingCompatibleHWIntrinsic : Checks if the intrinsic is compatible
// with the EVEX embedded masking form for its intended lowering instruction.
//
// Return Value:
// true if the intrisic node lowering instruction has an EVEX embedded masking
//
bool GenTree::isEvexEmbeddedMaskingCompatibleHWIntrinsic() const
bool GenTree::isEmbeddedMaskingCompatibleHWIntrinsic() const
{
#if defined(TARGET_XARCH)
if (OperIsHWIntrinsic())
{
#if defined(TARGET_XARCH)
// TODO-AVX512F-CQ: Expand this to the full set of APIs and make it table driven
// using IsEmbMaskingCompatible. For now, however, limit it to some explicit ids
// for prototyping purposes.
return (AsHWIntrinsic()->GetHWIntrinsicId() == NI_AVX512F_Add);
#elif defined(TARGET_ARM64)
return HWIntrinsicInfo::IsEmbeddedMaskedOperation(AsHWIntrinsic()->GetHWIntrinsicId()) ||
HWIntrinsicInfo::IsOptionalEmbeddedMaskedOperation(AsHWIntrinsic()->GetHWIntrinsicId());
#endif
}
#endif // TARGET_XARCH

return false;
}

Expand Down
12 changes: 6 additions & 6 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -557,9 +557,9 @@ enum GenTreeFlags : unsigned int

GTF_MDARRLOWERBOUND_NONFAULTING = 0x20000000, // GT_MDARR_LOWER_BOUND -- An MD array lower bound operation that cannot fault. Same as GT_IND_NONFAULTING.

#if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS)
#ifdef FEATURE_HW_INTRINSICS
GTF_HW_EM_OP = 0x10000000, // GT_HWINTRINSIC -- node is used as an operand to an embedded mask
#endif // TARGET_XARCH && FEATURE_HW_INTRINSICS
#endif // FEATURE_HW_INTRINSICS
};

inline constexpr GenTreeFlags operator ~(GenTreeFlags a)
Expand Down Expand Up @@ -1465,7 +1465,7 @@ struct GenTree
bool isContainableHWIntrinsic() const;
bool isRMWHWIntrinsic(Compiler* comp);
bool isEvexCompatibleHWIntrinsic() const;
bool isEvexEmbeddedMaskingCompatibleHWIntrinsic() const;
bool isEmbeddedMaskingCompatibleHWIntrinsic() const;
#else
bool isCommutativeHWIntrinsic() const
{
Expand All @@ -1487,7 +1487,7 @@ struct GenTree
return false;
}

bool isEvexEmbeddedMaskingCompatibleHWIntrinsic() const
bool isEmbeddedMaskingCompatibleHWIntrinsic() const
{
return false;
}
Expand Down Expand Up @@ -2226,7 +2226,7 @@ struct GenTree
gtFlags &= ~GTF_ICON_HDL_MASK;
}

#if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS)
#ifdef FEATURE_HW_INTRINSICS

bool IsEmbMaskOp()
{
Expand All @@ -2240,7 +2240,7 @@ struct GenTree
gtFlags |= GTF_HW_EM_OP;
}

#endif // TARGET_XARCH && FEATURE_HW_INTRINSICS
#endif // FEATURE_HW_INTRINSICS

static bool HandleKindDataIsInvariant(GenTreeFlags flags);

Expand Down
60 changes: 43 additions & 17 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1396,6 +1396,36 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
GenTree* op3 = nullptr;
GenTree* op4 = nullptr;

switch (numArgs)
{
case 4:
op4 = getArgForHWIntrinsic(sigReader.GetOp4Type(), sigReader.op4ClsHnd);
op4 = addRangeCheckIfNeeded(intrinsic, op4, mustExpand, immLowerBound, immUpperBound);
op3 = getArgForHWIntrinsic(sigReader.GetOp3Type(), sigReader.op3ClsHnd);
op2 = getArgForHWIntrinsic(sigReader.GetOp2Type(), sigReader.op2ClsHnd);
op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd);
break;

case 3:
op3 = getArgForHWIntrinsic(sigReader.GetOp3Type(), sigReader.op3ClsHnd);
op2 = getArgForHWIntrinsic(sigReader.GetOp2Type(), sigReader.op2ClsHnd);
op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd);
break;

case 2:
op2 = getArgForHWIntrinsic(sigReader.GetOp2Type(), sigReader.op2ClsHnd);
op2 = addRangeCheckIfNeeded(intrinsic, op2, mustExpand, immLowerBound, immUpperBound);
op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd);
break;

case 1:
op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd);
break;

default:
break;
}

switch (numArgs)
{
case 0:
Expand All @@ -1407,8 +1437,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,

case 1:
{
op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd);

if ((category == HW_Category_MemoryLoad) && op1->OperIs(GT_CAST))
{
// Although the API specifies a pointer, if what we have is a BYREF, that's what
Expand Down Expand Up @@ -1467,10 +1495,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,

case 2:
{
op2 = getArgForHWIntrinsic(sigReader.GetOp2Type(), sigReader.op2ClsHnd);
op2 = addRangeCheckIfNeeded(intrinsic, op2, mustExpand, immLowerBound, immUpperBound);
op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd);

retNode = isScalar
? gtNewScalarHWIntrinsicNode(nodeRetType, op1, op2, intrinsic)
: gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, intrinsic, simdBaseJitType, simdSize);
Expand Down Expand Up @@ -1524,10 +1548,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,

case 3:
{
op3 = getArgForHWIntrinsic(sigReader.GetOp3Type(), sigReader.op3ClsHnd);
op2 = getArgForHWIntrinsic(sigReader.GetOp2Type(), sigReader.op2ClsHnd);
op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd);

#ifdef TARGET_ARM64
if (intrinsic == NI_AdvSimd_LoadAndInsertScalar)
{
Expand Down Expand Up @@ -1569,12 +1589,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,

case 4:
{
op4 = getArgForHWIntrinsic(sigReader.GetOp4Type(), sigReader.op4ClsHnd);
op4 = addRangeCheckIfNeeded(intrinsic, op4, mustExpand, immLowerBound, immUpperBound);
op3 = getArgForHWIntrinsic(sigReader.GetOp3Type(), sigReader.op3ClsHnd);
op2 = getArgForHWIntrinsic(sigReader.GetOp2Type(), sigReader.op2ClsHnd);
op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd);

assert(!isScalar);
retNode =
gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, op4, intrinsic, simdBaseJitType, simdSize);
Expand All @@ -1591,10 +1605,22 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
}

#if defined(TARGET_ARM64)
if (HWIntrinsicInfo::IsMaskedOperation(intrinsic))
if (HWIntrinsicInfo::IsExplicitMaskedOperation(intrinsic))
{
assert(numArgs > 0);
GenTree* op1 = retNode->AsHWIntrinsic()->Op(1);
if (intrinsic == NI_Sve_ConditionalSelect)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not important for this PR, but this is potentially something that should be handled in gtNewSimdCndSelNode instead and then more generally as part of morph to capture values that don't materialize as constants until later.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will do it in follow-up PR.

{
if (op1->IsVectorAllBitsSet())
{
return retNode->AsHWIntrinsic()->Op(2);
}
else if (op1->IsVectorZero())
{
return retNode->AsHWIntrinsic()->Op(3);
}
}

if (!varTypeIsMask(op1))
{
// Op1 input is a vector. HWInstrinsic requires a mask.
Expand Down
29 changes: 27 additions & 2 deletions src/coreclr/jit/hwintrinsic.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,11 +186,18 @@ enum HWIntrinsicFlag : unsigned int
HW_Flag_ReturnsPerElementMask = 0x10000,

// The intrinsic uses a mask in arg1 to select elements present in the result
HW_Flag_MaskedOperation = 0x20000,
HW_Flag_ExplicitMaskedOperation = 0x20000,

// The intrinsic uses a mask in arg1 to select elements present in the result, and must use a low register.
HW_Flag_LowMaskedOperation = 0x40000,

// The intrinsic can optionally use a mask in arg1 to select elements present in the result, which is not present in
// the API call
HW_Flag_OptionalEmbeddedMaskedOperation = 0x80000,

// The intrinsic uses a mask in arg1 to select elements present in the result, which is not present in the API call
HW_Flag_EmbeddedMaskedOperation = 0x100000,

#else
#error Unsupported platform
#endif
Expand Down Expand Up @@ -872,7 +879,7 @@ struct HWIntrinsicInfo
static bool IsMaskedOperation(NamedIntrinsic id)
{
const HWIntrinsicFlag flags = lookupFlags(id);
return ((flags & HW_Flag_MaskedOperation) != 0) || IsLowMaskedOperation(id);
return IsLowMaskedOperation(id) || IsOptionalEmbeddedMaskedOperation(id) || IsExplicitMaskedOperation(id);
}

static bool IsLowMaskedOperation(NamedIntrinsic id)
Expand All @@ -881,6 +888,24 @@ struct HWIntrinsicInfo
return (flags & HW_Flag_LowMaskedOperation) != 0;
}

static bool IsOptionalEmbeddedMaskedOperation(NamedIntrinsic id)
{
const HWIntrinsicFlag flags = lookupFlags(id);
return (flags & HW_Flag_OptionalEmbeddedMaskedOperation) != 0;
}

static bool IsEmbeddedMaskedOperation(NamedIntrinsic id)
{
const HWIntrinsicFlag flags = lookupFlags(id);
return (flags & HW_Flag_EmbeddedMaskedOperation) != 0;
}

static bool IsExplicitMaskedOperation(NamedIntrinsic id)
{
const HWIntrinsicFlag flags = lookupFlags(id);
return (flags & HW_Flag_ExplicitMaskedOperation) != 0;
}

#endif // TARGET_ARM64

static bool HasSpecialSideEffect(NamedIntrinsic id)
Expand Down
20 changes: 17 additions & 3 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2222,9 +2222,8 @@ GenTree* Compiler::gtNewSimdConvertVectorToMaskNode(var_types type,
assert(varTypeIsSIMD(node));

// ConvertVectorToMask uses cmpne which requires an embedded mask.
GenTree* embeddedMask = gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateTrueMaskAll, simdBaseJitType, simdSize);
return gtNewSimdHWIntrinsicNode(TYP_MASK, embeddedMask, node, NI_Sve_ConvertVectorToMask, simdBaseJitType,
simdSize);
GenTree* trueMask = gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize);
return gtNewSimdHWIntrinsicNode(TYP_MASK, trueMask, node, NI_Sve_ConvertVectorToMask, simdBaseJitType, simdSize);
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
}

//------------------------------------------------------------------------
Expand All @@ -2246,4 +2245,19 @@ GenTree* Compiler::gtNewSimdConvertMaskToVectorNode(GenTreeHWIntrinsic* node, va
node->GetSimdSize());
}

//------------------------------------------------------------------------
// gtNewSimdEmbeddedMaskNode: Create an embedded mask
//
// Arguments:
// simdBaseJitType -- the base jit type of the nodes being masked
// simdSize -- the simd size of the nodes being masked
//
// Return Value:
// The mask
//
GenTree* Compiler::gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigned simdSize)
{
return gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateTrueMaskAll, simdBaseJitType, simdSize);
}

#endif // FEATURE_HW_INTRINSICS
Loading
Loading