Skip to content

Commit

Permalink
Revert "JIT: Added SVE GetFfr, SetFfr, LoadVectorFirstFaulting,…
Browse files Browse the repository at this point in the history
… `Gather…" (#105591)

This reverts commit 991ae97.
  • Loading branch information
jkotas committed Jul 27, 2024
1 parent 991ae97 commit dc7d7bc
Show file tree
Hide file tree
Showing 22 changed files with 164 additions and 3,217 deletions.
4 changes: 0 additions & 4 deletions src/coreclr/jit/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4245,10 +4245,6 @@ bool Compiler::fgVarIsNeverZeroInitializedInProlog(unsigned varNum)
bool result = varDsc->lvIsParam || lvaIsOSRLocal(varNum) || (varNum == lvaGSSecurityCookie) ||
(varNum == lvaInlinedPInvokeFrameVar) || (varNum == lvaStubArgumentVar) || (varNum == lvaRetAddrVar);

#ifdef TARGET_ARM64
result = result || (varNum == lvaFfrRegister);
#endif

#if FEATURE_FIXED_OUT_ARGS
result = result || (varNum == lvaOutgoingArgSpaceVar);
#endif
Expand Down
9 changes: 4 additions & 5 deletions src/coreclr/jit/fgdiagnostic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3428,15 +3428,14 @@ void Compiler::fgDebugCheckFlags(GenTree* tree, BasicBlock* block)

#if defined(TARGET_ARM64)
case NI_ArmBase_Yield:
case NI_Sve_GatherPrefetch16Bit:
case NI_Sve_GatherPrefetch32Bit:
case NI_Sve_GatherPrefetch64Bit:
case NI_Sve_GatherPrefetch8Bit:
case NI_Sve_PrefetchBytes:
case NI_Sve_PrefetchInt16:
case NI_Sve_PrefetchInt32:
case NI_Sve_PrefetchInt64:
case NI_Sve_SetFfr:
case NI_Sve_GatherPrefetch16Bit:
case NI_Sve_GatherPrefetch32Bit:
case NI_Sve_GatherPrefetch64Bit:
case NI_Sve_GatherPrefetch8Bit:
{
assert(tree->OperRequiresCallFlag(this));
expectedFlags |= GTF_GLOB_REF;
Expand Down
48 changes: 23 additions & 25 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26732,18 +26732,6 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const
addr = Op(3);
break;

case NI_Sve_GatherVector:
case NI_Sve_GatherVectorByteZeroExtend:
case NI_Sve_GatherVectorFirstFaulting:
case NI_Sve_GatherVectorInt16SignExtend:
case NI_Sve_GatherVectorInt16WithByteOffsetsSignExtend:
case NI_Sve_GatherVectorInt32SignExtend:
case NI_Sve_GatherVectorInt32WithByteOffsetsSignExtend:
case NI_Sve_GatherVectorSByteSignExtend:
case NI_Sve_GatherVectorUInt16WithByteOffsetsZeroExtend:
case NI_Sve_GatherVectorUInt16ZeroExtend:
case NI_Sve_GatherVectorUInt32WithByteOffsetsZeroExtend:
case NI_Sve_GatherVectorUInt32ZeroExtend:
case NI_Sve_GatherVectorWithByteOffsets:
case NI_Sve_LoadVector:
case NI_Sve_LoadVectorNonTemporal:
Expand All @@ -26754,7 +26742,6 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const
case NI_Sve_LoadVectorByteZeroExtendToUInt16:
case NI_Sve_LoadVectorByteZeroExtendToUInt32:
case NI_Sve_LoadVectorByteZeroExtendToUInt64:
case NI_Sve_LoadVectorFirstFaulting:
case NI_Sve_LoadVectorInt16SignExtendToInt32:
case NI_Sve_LoadVectorInt16SignExtendToInt64:
case NI_Sve_LoadVectorInt16SignExtendToUInt32:
Expand All @@ -26779,6 +26766,20 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const
addr = Op(2);
break;

case NI_Sve_GatherVector:
case NI_Sve_GatherVectorByteZeroExtend:
case NI_Sve_GatherVectorInt16SignExtend:
case NI_Sve_GatherVectorInt16WithByteOffsetsSignExtend:
case NI_Sve_GatherVectorInt32SignExtend:
case NI_Sve_GatherVectorInt32WithByteOffsetsSignExtend:
case NI_Sve_GatherVectorSByteSignExtend:
case NI_Sve_GatherVectorUInt16WithByteOffsetsZeroExtend:
case NI_Sve_GatherVectorUInt16ZeroExtend:
case NI_Sve_GatherVectorUInt32WithByteOffsetsZeroExtend:
case NI_Sve_GatherVectorUInt32ZeroExtend:
addr = Op(2);
break;

#endif // TARGET_ARM64

default:
Expand Down Expand Up @@ -26858,12 +26859,11 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const
{
#ifdef TARGET_ARM64
static_assert_no_msg(
AreContiguous(NI_Sve_GatherVector, NI_Sve_GatherVectorByteZeroExtend, NI_Sve_GatherVectorFirstFaulting,
NI_Sve_GatherVectorInt16SignExtend, NI_Sve_GatherVectorInt16WithByteOffsetsSignExtend,
NI_Sve_GatherVectorInt32SignExtend, NI_Sve_GatherVectorInt32WithByteOffsetsSignExtend,
NI_Sve_GatherVectorSByteSignExtend, NI_Sve_GatherVectorUInt16WithByteOffsetsZeroExtend,
NI_Sve_GatherVectorUInt16ZeroExtend, NI_Sve_GatherVectorUInt32WithByteOffsetsZeroExtend,
NI_Sve_GatherVectorUInt32ZeroExtend));
AreContiguous(NI_Sve_GatherVector, NI_Sve_GatherVectorByteZeroExtend, NI_Sve_GatherVectorInt16SignExtend,
NI_Sve_GatherVectorInt16WithByteOffsetsSignExtend, NI_Sve_GatherVectorInt32SignExtend,
NI_Sve_GatherVectorInt32WithByteOffsetsSignExtend, NI_Sve_GatherVectorSByteSignExtend,
NI_Sve_GatherVectorUInt16WithByteOffsetsZeroExtend, NI_Sve_GatherVectorUInt16ZeroExtend,
NI_Sve_GatherVectorUInt32WithByteOffsetsZeroExtend, NI_Sve_GatherVectorUInt32ZeroExtend));
assert(varTypeIsI(addr) || (varTypeIsSIMD(addr) && ((intrinsicId >= NI_Sve_GatherVector) &&
(intrinsicId <= NI_Sve_GatherVectorUInt32ZeroExtend))));
#else
Expand Down Expand Up @@ -27281,7 +27281,6 @@ bool GenTreeHWIntrinsic::OperRequiresCallFlag() const
case NI_Sve_GatherPrefetch32Bit:
case NI_Sve_GatherPrefetch64Bit:
case NI_Sve_GatherPrefetch8Bit:
case NI_Sve_SetFfr:
{
return true;
}
Expand Down Expand Up @@ -27464,15 +27463,14 @@ void GenTreeHWIntrinsic::Initialize(NamedIntrinsic intrinsicId)

#if defined(TARGET_ARM64)
case NI_ArmBase_Yield:
case NI_Sve_GatherPrefetch16Bit:
case NI_Sve_GatherPrefetch32Bit:
case NI_Sve_GatherPrefetch64Bit:
case NI_Sve_GatherPrefetch8Bit:
case NI_Sve_PrefetchBytes:
case NI_Sve_PrefetchInt16:
case NI_Sve_PrefetchInt32:
case NI_Sve_PrefetchInt64:
case NI_Sve_SetFfr:
case NI_Sve_GatherPrefetch16Bit:
case NI_Sve_GatherPrefetch32Bit:
case NI_Sve_GatherPrefetch64Bit:
case NI_Sve_GatherPrefetch8Bit:
{
// Mark as a call and global reference, much as is done for GT_KEEPALIVE
gtFlags |= (GTF_CALL | GTF_GLOB_REF);
Expand Down
1 change: 0 additions & 1 deletion src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2217,7 +2217,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
#elif defined(TARGET_ARM64)
case NI_Sve_GatherVector:
case NI_Sve_GatherVectorByteZeroExtend:
case NI_Sve_GatherVectorFirstFaulting:
case NI_Sve_GatherVectorInt16SignExtend:
case NI_Sve_GatherVectorInt16WithByteOffsetsSignExtend:
case NI_Sve_GatherVectorInt32SignExtend:
Expand Down
54 changes: 14 additions & 40 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2049,34 +2049,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}

case NI_Sve_GatherVectorFirstFaulting:
{
if (node->GetAuxiliaryType() == TYP_UNKNOWN)
{
if (intrin.numOperands == 3)
{
// We have extra argument which means there is a "use" of FFR here. Restore it back in FFR
// register.
assert(op3Reg != REG_NA);
GetEmitter()->emitIns_R(INS_sve_wrffr, emitSize, op3Reg, opt);
}
}
else
{
// AuxilaryType is added only for numOperands == 3. If there is an extra argument, we need to
// "use" FFR here. Restore it back in FFR register.

if (intrin.numOperands == 4)
{
// We have extra argument which means there is a "use" of FFR here. Restore it back in FFR
// register.
assert(op4Reg != REG_NA);
GetEmitter()->emitIns_R(INS_sve_wrffr, emitSize, op4Reg, opt);
}
}

FALLTHROUGH;
}
case NI_Sve_GatherVector:
case NI_Sve_GatherVectorByteZeroExtend:
case NI_Sve_GatherVectorInt16SignExtend:
Expand All @@ -2093,24 +2065,25 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
{
// GatherVector...(Vector<T> mask, T* address, Vector<T2> indices)

emitAttr baseSize = emitActualTypeSize(intrin.baseType);
bool isLoadingBytes = ((ins == INS_sve_ld1b) || (ins == INS_sve_ld1sb) || (ins == INS_sve_ldff1b) ||
(ins == INS_sve_ldff1sb));
insScalableOpts sopt = INS_SCALABLE_OPTS_NONE;
assert(intrin.numOperands == 3);
emitAttr baseSize = emitActualTypeSize(intrin.baseType);
insScalableOpts sopt = INS_SCALABLE_OPTS_NONE;

if (baseSize == EA_4BYTE)
if (baseSize == EA_8BYTE)
{
// Index is multiplied.
sopt = (ins == INS_sve_ld1b || ins == INS_sve_ld1sb) ? INS_SCALABLE_OPTS_NONE
: INS_SCALABLE_OPTS_LSL_N;
}
else
{
// Index is sign or zero extended to 64bits, then multiplied.
assert(baseSize == EA_4BYTE);
opt = varTypeIsUnsigned(node->GetAuxiliaryType()) ? INS_OPTS_SCALABLE_S_UXTW
: INS_OPTS_SCALABLE_S_SXTW;

sopt = isLoadingBytes ? INS_SCALABLE_OPTS_NONE : INS_SCALABLE_OPTS_MOD_N;
}
else
{
// Index is multiplied.
assert(baseSize == EA_8BYTE);
sopt = isLoadingBytes ? INS_SCALABLE_OPTS_NONE : INS_SCALABLE_OPTS_LSL_N;
sopt = (ins == INS_sve_ld1b || ins == INS_sve_ld1sb) ? INS_SCALABLE_OPTS_NONE
: INS_SCALABLE_OPTS_MOD_N;
}

GetEmitter()->emitIns_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, op3Reg, opt, sopt);
Expand All @@ -2119,6 +2092,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
{
// GatherVector...(Vector<T> mask, Vector<T2> addresses)

assert(intrin.numOperands == 2);
GetEmitter()->emitIns_R_R_R_I(ins, emitSize, targetReg, op1Reg, op2Reg, 0, opt);
}

Expand Down
1 change: 0 additions & 1 deletion src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,6 @@ HARDWARE_INTRINSIC(Sve, GatherPrefetch64Bit,
HARDWARE_INTRINSIC(Sve, GatherPrefetch8Bit, -1, -1, false, {INS_sve_prfb, INS_sve_prfb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand|HW_Flag_SpecialSideEffect_Other)
HARDWARE_INTRINSIC(Sve, GatherVector, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, GatherVectorByteZeroExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1b, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, GatherVectorFirstFaulting, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1w, INS_sve_ldff1w, INS_sve_ldff1d, INS_sve_ldff1d, INS_sve_ldff1w, INS_sve_ldff1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialSideEffectMask)
HARDWARE_INTRINSIC(Sve, GatherVectorInt16SignExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_sve_ld1sh, INS_sve_ld1sh, INS_sve_ld1sh, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, GatherVectorInt16WithByteOffsetsSignExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_sve_ld1sh, INS_sve_ld1sh, INS_sve_ld1sh, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, GatherVectorInt32SignExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sw, INS_sve_ld1sw, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
Expand Down
51 changes: 2 additions & 49 deletions src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1775,50 +1775,6 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)

break;
}
case NI_Sve_GatherVectorFirstFaulting:
{
LIR::Use use;
bool foundUse = BlockRange().TryGetUse(node, &use);

if (m_ffrTrashed)
{
// Consume the FFR register value from local variable to simulate "use" of FFR,
// only if it was trashed. If it was not trashed, we do not have to reload the
// contents of the FFR register.

unsigned lclNum = comp->getFFRegisterVarNum();
GenTree* lclVar = comp->gtNewLclvNode(lclNum, TYP_MASK);
BlockRange().InsertBefore(node, lclVar);
LowerNode(lclVar);

if (node->GetOperandCount() == 3)
{
assert(node->GetAuxiliaryType() != TYP_UNKNOWN);
node->ResetHWIntrinsicId(intrinsicId, comp, node->Op(1), node->Op(2), node->Op(3), lclVar);
}
else
{
assert(node->GetOperandCount() == 2);
node->ResetHWIntrinsicId(intrinsicId, comp, node->Op(1), node->Op(2), lclVar);
}
}

if (foundUse)
{
unsigned tmpNum = comp->lvaGrabTemp(true DEBUGARG("Return value result/FFR"));
LclVarDsc* tmpVarDsc = comp->lvaGetDesc(tmpNum);
tmpVarDsc->lvType = node->TypeGet();
GenTree* storeLclVar;
use.ReplaceWithLclVar(comp, tmpNum, &storeLclVar);
}
else
{
node->SetUnusedValue();
}

StoreFFRValue(node);
break;
}
case NI_Sve_LoadVectorFirstFaulting:
{
LIR::Use use;
Expand All @@ -1830,8 +1786,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
// only if it was trashed. If it was not trashed, we do not have to reload the
// contents of the FFR register.

unsigned lclNum = comp->getFFRegisterVarNum();
GenTree* lclVar = comp->gtNewLclvNode(lclNum, TYP_MASK);
GenTree* lclVar = comp->gtNewLclvNode(comp->lvaFfrRegister, TYP_MASK);
BlockRange().InsertBefore(node, lclVar);
LowerNode(lclVar);

Expand Down Expand Up @@ -4127,10 +4082,8 @@ void Lowering::StoreFFRValue(GenTreeHWIntrinsic* node)
#ifdef DEBUG
switch (node->GetHWIntrinsicId())
{
case NI_Sve_GatherVectorFirstFaulting:
case NI_Sve_LoadVectorFirstFaulting:
case NI_Sve_SetFfr:

case NI_Sve_LoadVectorFirstFaulting:
break;
default:
assert(!"Unexpected HWIntrinsicId");
Expand Down
Loading

0 comments on commit dc7d7bc

Please sign in to comment.