Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RyuJIT] Implement Interlocked.And and Interlocked.Or for arm64-v8.1 #46253

Merged
merged 14 commits into from
Feb 10, 2021
24 changes: 22 additions & 2 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2786,10 +2786,10 @@ void CodeGen::genJumpTable(GenTree* treeNode)
}

//------------------------------------------------------------------------
// genLockedInstructions: Generate code for a GT_XADD or GT_XCHG node.
// genLockedInstructions: Generate code for a GT_XADD, GT_XAND, GT_XXOR or GT_XCHG node.
//
// Arguments:
// treeNode - the GT_XADD/XCHG node
// treeNode - the GT_XADD/XAND/XXOR/XCHG node
//
void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
{
Expand All @@ -2810,6 +2810,19 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode)

switch (treeNode->gtOper)
{
case GT_XXOR:
GetEmitter()->emitIns_R_R_R(INS_ldsetal, dataSize, dataReg, (targetReg == REG_NA) ? REG_ZR : targetReg,
addrReg);
break;
case GT_XAND:
{
// Grab a temp reg to perform `MVN` for dataReg first.
regNumber tempReg = treeNode->GetSingleTempReg();
GetEmitter()->emitIns_R_R(INS_mvn, dataSize, tempReg, dataReg);
GetEmitter()->emitIns_R_R_R(INS_ldclral, dataSize, tempReg, (targetReg == REG_NA) ? REG_ZR : targetReg,
addrReg);
break;
}
case GT_XCHG:
GetEmitter()->emitIns_R_R_R(INS_swpal, dataSize, dataReg, targetReg, addrReg);
break;
Expand All @@ -2831,6 +2844,9 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
}
else
{
// These are imported normally if Atomics aren't supported.
assert(!treeNode->OperIs(GT_XXOR, GT_XAND));

regNumber exResultReg = treeNode->ExtractTempReg(RBM_ALLINT);
regNumber storeDataReg = (treeNode->OperGet() == GT_XCHG) ? dataReg : treeNode->ExtractTempReg(RBM_ALLINT);
regNumber loadReg = (targetReg != REG_NA) ? targetReg : storeDataReg;
Expand Down Expand Up @@ -6221,6 +6237,10 @@ void CodeGen::genArm64EmitterUnitTests()
theEmitter->emitIns_R_R_R(INS_ldadd, EA_8BYTE, REG_R8, REG_R9, REG_R10);
theEmitter->emitIns_R_R_R(INS_ldadda, EA_8BYTE, REG_R8, REG_R9, REG_R10);
theEmitter->emitIns_R_R_R(INS_ldaddal, EA_8BYTE, REG_R8, REG_R9, REG_R10);
theEmitter->emitIns_R_R_R(INS_ldclral, EA_4BYTE, REG_R8, REG_R9, REG_R10);
theEmitter->emitIns_R_R_R(INS_ldclral, EA_8BYTE, REG_R8, REG_R9, REG_R10);
theEmitter->emitIns_R_R_R(INS_ldsetal, EA_4BYTE, REG_R8, REG_R9, REG_R10);
theEmitter->emitIns_R_R_R(INS_ldsetal, EA_8BYTE, REG_R8, REG_R9, REG_R10);
theEmitter->emitIns_R_R_R(INS_ldaddl, EA_8BYTE, REG_R8, REG_R9, REG_R10);
theEmitter->emitIns_R_R_R(INS_swpb, EA_4BYTE, REG_R8, REG_R9, REG_R10);
theEmitter->emitIns_R_R_R(INS_swpab, EA_4BYTE, REG_R8, REG_R9, REG_R10);
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,8 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)

#ifdef TARGET_ARM64
case GT_XCHG:
case GT_XXOR:
case GT_XAND:
case GT_XADD:
genLockedInstructions(treeNode->AsOp());
break;
Expand Down
5 changes: 5 additions & 0 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1706,6 +1706,11 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
genLockedInstructions(treeNode->AsOp());
break;

case GT_XXOR:
case GT_XAND:
NYI("Interlocked.Or and Interlocked.And aren't implemented for x86 yet.");
break;

case GT_MEMORYBARRIER:
{
CodeGen::BarrierKind barrierKind =
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/decomposelongs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,8 @@ GenTree* DecomposeLongs::DecomposeNode(GenTree* tree)
#endif // FEATURE_SIMD

case GT_LOCKADD:
case GT_XXOR:
case GT_XAND:
case GT_XADD:
case GT_XCHG:
case GT_CMPXCHG:
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6091,6 +6091,8 @@ void emitter::emitIns_R_R_R(
case INS_ldadda:
case INS_ldaddal:
case INS_ldaddl:
case INS_ldclral:
case INS_ldsetal:
case INS_swpb:
case INS_swpab:
case INS_swpalb:
Expand Down
4 changes: 3 additions & 1 deletion src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5474,7 +5474,7 @@ GenTree* GenTree::gtGetParent(GenTree*** parentChildPtrPtr) const

bool GenTree::OperRequiresAsgFlag()
{
if (OperIs(GT_ASG) || OperIs(GT_XADD, GT_XCHG, GT_LOCKADD, GT_CMPXCHG, GT_MEMORYBARRIER))
if (OperIs(GT_ASG) || OperIs(GT_XADD, GT_XXOR, GT_XAND, GT_XCHG, GT_LOCKADD, GT_CMPXCHG, GT_MEMORYBARRIER))
{
return true;
}
Expand Down Expand Up @@ -5549,6 +5549,8 @@ bool GenTree::OperIsImplicitIndir() const
switch (gtOper)
{
case GT_LOCKADD:
case GT_XXOR:
case GT_XAND:
case GT_XADD:
case GT_XCHG:
case GT_CMPXCHG:
Expand Down
13 changes: 12 additions & 1 deletion src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1519,7 +1519,18 @@ struct GenTree

static bool OperIsAtomicOp(genTreeOps gtOper)
{
return (gtOper == GT_XADD || gtOper == GT_XCHG || gtOper == GT_LOCKADD || gtOper == GT_CMPXCHG);
switch (gtOper)
{
case GT_XADD:
case GT_XXOR:
case GT_XAND:
case GT_XCHG:
case GT_LOCKADD:
case GT_CMPXCHG:
return true;
default:
return false;
}
}

bool OperIsAtomicOp() const
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/gtlist.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ GTNODE(ARR_LENGTH , GenTreeArrLen ,0,(GTK_UNOP|GTK_EXOP)) // arr
GTNODE(INTRINSIC , GenTreeIntrinsic ,0,(GTK_BINOP|GTK_EXOP)) // intrinsics

GTNODE(LOCKADD , GenTreeOp ,0,(GTK_BINOP|GTK_NOVALUE))
GTNODE(XAND , GenTreeOp ,0,GTK_BINOP)
GTNODE(XXOR , GenTreeOp ,0,GTK_BINOP)
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
GTNODE(XADD , GenTreeOp ,0,GTK_BINOP)
GTNODE(XCHG , GenTreeOp ,0,GTK_BINOP)
GTNODE(CMPXCHG , GenTreeCmpXchg ,0,GTK_SPECIAL)
Expand Down
30 changes: 30 additions & 0 deletions src/coreclr/jit/importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4337,6 +4337,25 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis,
break;
}

#ifdef TARGET_ARM64
// Intrinsify Interlocked.Or and Interlocked.And only for arm64-v8.1 (and newer)
// TODO-CQ: Implement for XArch (https://github.com/dotnet/runtime/issues/32239).
case NI_System_Threading_Interlocked_Or:
case NI_System_Threading_Interlocked_And:
{
if (opts.OptimizationEnabled() && compOpportunisticallyDependsOn(InstructionSet_Atomics))
{
assert(sig->numArgs == 2);
GenTree* op2 = impPopStack().val;
GenTree* op1 = impPopStack().val;
genTreeOps op = (ni == NI_System_Threading_Interlocked_Or) ? GT_XXOR : GT_XAND;
retNode = gtNewOperNode(op, genActualType(callType), op1, op2);
retNode->gtFlags |= GTF_GLOB_REF | GTF_ASG;
}
break;
}
#endif // TARGET_ARM64

#ifdef FEATURE_HW_INTRINSICS
case NI_System_Math_FusedMultiplyAdd:
{
Expand Down Expand Up @@ -4845,6 +4864,17 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method)
result = NI_System_Threading_Thread_get_ManagedThreadId;
}
}
else if (strcmp(className, "Interlocked") == 0)
{
if (strcmp(methodName, "And") == 0)
{
result = NI_System_Threading_Interlocked_And;
}
else if (strcmp(methodName, "Or") == 0)
{
result = NI_System_Threading_Interlocked_Or;
}
}
}
#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
else if (strcmp(namespaceName, "System.Buffers.Binary") == 0)
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/instrsarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -1194,6 +1194,12 @@ INST1(ldadda, "ldadda", LD|ST, IF_LS_3E, 0xB8A00000)
INST1(ldaddal, "ldaddal", LD|ST, IF_LS_3E, 0xB8E00000)
// ldaddal Rm, Rt, [Xn] LS_3E 1X111000111mmmmm 000000nnnnnttttt B8E0 0000 Rm Rt Rn ARMv8.1 LSE Atomics

INST1(ldclral, "ldclral", LD|ST, IF_LS_3E, 0xB8E01000)
// ldclral Rm, Rt, [Xn] LS_3E 1X111000111mmmmm 000100nnnnnttttt B8E0 1000 Rm Rt Rn ARMv8.1 LSE Atomics

INST1(ldsetal, "ldsetal", LD|ST, IF_LS_3E, 0xB8E03000)
// ldsetal Rm, Rt, [Xn] LS_3E 1X111000111mmmmm 001100nnnnnttttt B8E0 3000 Rm Rt Rn ARMv8.1 LSE Atomics

INST1(ldaddl, "ldaddl", LD|ST, IF_LS_3E, 0xB8600000)
// ldaddl Rm, Rt, [Xn] LS_3E 1X111000011mmmmm 000000nnnnnttttt B860 0000 Rm Rt Rn ARMv8.1 LSE Atomics

Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/liveness.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,8 @@ void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree)

// We'll assume these are use-then-defs of memory.
case GT_LOCKADD:
case GT_XXOR:
case GT_XAND:
case GT_XADD:
case GT_XCHG:
case GT_CMPXCHG:
Expand Down Expand Up @@ -2052,6 +2054,8 @@ void Compiler::fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALAR
break;

case GT_LOCKADD:
case GT_XXOR:
case GT_XAND:
case GT_XADD:
case GT_XCHG:
case GT_CMPXCHG:
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,10 +318,14 @@ GenTree* Lowering::LowerNode(GenTree* node)
CheckImmedAndMakeContained(node, node->AsCmpXchg()->gtOpComparand);
break;

case GT_XXOR:
case GT_XAND:
case GT_XADD:
CheckImmedAndMakeContained(node, node->AsOp()->gtOp2);
break;
#elif defined(TARGET_XARCH)
case GT_XXOR:
case GT_XAND:
case GT_XADD:
if (node->IsUnusedValue())
{
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const
#ifdef TARGET_ARM64
case GT_CMPXCHG:
case GT_LOCKADD:
case GT_XXOR:
case GT_XAND:
case GT_XADD:
return comp->compOpportunisticallyDependsOn(InstructionSet_Atomics)
? false
Expand Down
7 changes: 7 additions & 0 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,8 @@ int LinearScan::BuildNode(GenTree* tree)
break;

case GT_LOCKADD:
case GT_XXOR:
case GT_XAND:
case GT_XADD:
case GT_XCHG:
{
Expand All @@ -440,6 +442,11 @@ int LinearScan::BuildNode(GenTree* tree)
buildInternalIntRegisterDefForNode(tree);
}
}
else if (tree->OperIs(GT_XAND))
{
// for ldclral we need an internal register.
buildInternalIntRegisterDefForNode(tree);
}

assert(!tree->gtGetOp1()->isContained());
RefPosition* op1Use = BuildUse(tree->gtGetOp1());
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/lsraxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,8 @@ int LinearScan::BuildNode(GenTree* tree)
}
break;

case GT_XXOR:
case GT_XAND:
case GT_XADD:
case GT_XCHG:
{
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/namedintrinsiclist.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ enum NamedIntrinsic : unsigned short
NI_IsSupported_Dynamic,
NI_Throw_PlatformNotSupportedException,

NI_System_Threading_Interlocked_And,
NI_System_Threading_Interlocked_Or,

#ifdef FEATURE_HW_INTRINSICS
NI_HW_INTRINSIC_START,
#if defined(TARGET_XARCH)
Expand Down
4 changes: 3 additions & 1 deletion src/coreclr/jit/optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7101,7 +7101,7 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack<BasicBlock*>* blo
m_beforeSideEffect = false;
}
}
else if (tree->OperIs(GT_XADD, GT_XCHG, GT_LOCKADD, GT_CMPXCHG, GT_MEMORYBARRIER))
else if (tree->OperIs(GT_XADD, GT_XXOR, GT_XAND, GT_XCHG, GT_LOCKADD, GT_CMPXCHG, GT_MEMORYBARRIER))
{
// If this node is a MEMORYBARRIER or an Atomic operation
// then don't hoist and stop any further hoisting after this node
Expand Down Expand Up @@ -7915,6 +7915,8 @@ bool Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
break;

case GT_LOCKADD:
case GT_XXOR:
case GT_XAND:
case GT_XADD:
case GT_XCHG:
case GT_CMPXCHG:
Expand Down
4 changes: 3 additions & 1 deletion src/coreclr/jit/valuenum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5251,7 +5251,7 @@ void ValueNumStore::vnDumpSimdType(Compiler* comp, VNFuncApp* simdType)
static UINT8 vnfOpAttribs[VNF_COUNT];
static genTreeOps genTreeOpsIllegalAsVNFunc[] = {GT_IND, // When we do heap memory.
GT_NULLCHECK, GT_QMARK, GT_COLON, GT_LOCKADD, GT_XADD, GT_XCHG,
GT_CMPXCHG, GT_LCLHEAP, GT_BOX,
GT_CMPXCHG, GT_LCLHEAP, GT_BOX, GT_XXOR, GT_XAND,

// These need special semantics:
GT_COMMA, // == second argument (but with exception(s) from first).
Expand Down Expand Up @@ -8232,6 +8232,8 @@ void Compiler::fgValueNumberTree(GenTree* tree)
noway_assert("LOCKADD should not appear before lowering");
break;

case GT_XXOR: // Binop
case GT_XAND: // Binop
case GT_XADD: // Binop
case GT_XCHG: // Binop
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ public static ulong Read(ref ulong location) =>
/// <param name="value">The value to be combined with the integer at <paramref name="location1"/>.</param>
/// <returns>The original value in <paramref name="location1"/>.</returns>
/// <exception cref="NullReferenceException">The address of <paramref name="location1"/> is a null pointer.</exception>
[Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int And(ref int location1, int value)
{
Expand Down Expand Up @@ -164,6 +165,7 @@ public static uint And(ref uint location1, uint value) =>
/// <param name="value">The value to be combined with the integer at <paramref name="location1"/>.</param>
/// <returns>The original value in <paramref name="location1"/>.</returns>
/// <exception cref="NullReferenceException">The address of <paramref name="location1"/> is a null pointer.</exception>
[Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long And(ref long location1, long value)
{
Expand Down Expand Up @@ -197,6 +199,7 @@ public static ulong And(ref ulong location1, ulong value) =>
/// <param name="value">The value to be combined with the integer at <paramref name="location1"/>.</param>
/// <returns>The original value in <paramref name="location1"/>.</returns>
/// <exception cref="NullReferenceException">The address of <paramref name="location1"/> is a null pointer.</exception>
[Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int Or(ref int location1, int value)
{
Expand Down Expand Up @@ -228,6 +231,7 @@ public static uint Or(ref uint location1, uint value) =>
/// <param name="value">The value to be combined with the integer at <paramref name="location1"/>.</param>
/// <returns>The original value in <paramref name="location1"/>.</returns>
/// <exception cref="NullReferenceException">The address of <paramref name="location1"/> is a null pointer.</exception>
[Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long Or(ref long location1, long value)
{
Expand Down
Loading