Skip to content

Commit

Permalink
[RyuJIT] Implement Interlocked.And and Interlocked.Or for arm64-v8.1 (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
EgorBo authored Feb 10, 2021
1 parent 69425a7 commit af25fe6
Show file tree
Hide file tree
Showing 23 changed files with 429 additions and 6 deletions.
24 changes: 22 additions & 2 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2789,10 +2789,10 @@ void CodeGen::genJumpTable(GenTree* treeNode)
}

//------------------------------------------------------------------------
// genLockedInstructions: Generate code for a GT_XADD or GT_XCHG node.
// genLockedInstructions: Generate code for a GT_XADD, GT_XAND, GT_XORR or GT_XCHG node.
//
// Arguments:
// treeNode - the GT_XADD/XCHG node
// treeNode - the GT_XADD/XAND/XORR/XCHG node
//
void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
{
Expand All @@ -2813,6 +2813,19 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode)

switch (treeNode->gtOper)
{
case GT_XORR:
GetEmitter()->emitIns_R_R_R(INS_ldsetal, dataSize, dataReg, (targetReg == REG_NA) ? REG_ZR : targetReg,
addrReg);
break;
case GT_XAND:
{
// Grab a temp reg to perform `MVN` for dataReg first.
regNumber tempReg = treeNode->GetSingleTempReg();
GetEmitter()->emitIns_R_R(INS_mvn, dataSize, tempReg, dataReg);
GetEmitter()->emitIns_R_R_R(INS_ldclral, dataSize, tempReg, (targetReg == REG_NA) ? REG_ZR : targetReg,
addrReg);
break;
}
case GT_XCHG:
GetEmitter()->emitIns_R_R_R(INS_swpal, dataSize, dataReg, targetReg, addrReg);
break;
Expand All @@ -2826,6 +2839,9 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
}
else
{
// These are imported normally if Atomics aren't supported.
assert(!treeNode->OperIs(GT_XORR, GT_XAND));

regNumber exResultReg = treeNode->ExtractTempReg(RBM_ALLINT);
regNumber storeDataReg = (treeNode->OperGet() == GT_XCHG) ? dataReg : treeNode->ExtractTempReg(RBM_ALLINT);
regNumber loadReg = (targetReg != REG_NA) ? targetReg : storeDataReg;
Expand Down Expand Up @@ -6217,6 +6233,10 @@ void CodeGen::genArm64EmitterUnitTests()
theEmitter->emitIns_R_R_R(INS_ldadd, EA_8BYTE, REG_R8, REG_R9, REG_R10);
theEmitter->emitIns_R_R_R(INS_ldadda, EA_8BYTE, REG_R8, REG_R9, REG_R10);
theEmitter->emitIns_R_R_R(INS_ldaddal, EA_8BYTE, REG_R8, REG_R9, REG_R10);
theEmitter->emitIns_R_R_R(INS_ldclral, EA_4BYTE, REG_R8, REG_R9, REG_R10);
theEmitter->emitIns_R_R_R(INS_ldclral, EA_8BYTE, REG_R8, REG_R9, REG_R10);
theEmitter->emitIns_R_R_R(INS_ldsetal, EA_4BYTE, REG_R8, REG_R9, REG_R10);
theEmitter->emitIns_R_R_R(INS_ldsetal, EA_8BYTE, REG_R8, REG_R9, REG_R10);
theEmitter->emitIns_R_R_R(INS_ldaddl, EA_8BYTE, REG_R8, REG_R9, REG_R10);
theEmitter->emitIns_R_R_R(INS_swpb, EA_4BYTE, REG_R8, REG_R9, REG_R10);
theEmitter->emitIns_R_R_R(INS_swpab, EA_4BYTE, REG_R8, REG_R9, REG_R10);
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,8 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)

#ifdef TARGET_ARM64
case GT_XCHG:
case GT_XORR:
case GT_XAND:
case GT_XADD:
genLockedInstructions(treeNode->AsOp());
break;
Expand Down
5 changes: 5 additions & 0 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1735,6 +1735,11 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
genLockedInstructions(treeNode->AsOp());
break;

case GT_XORR:
case GT_XAND:
NYI("Interlocked.Or and Interlocked.And aren't implemented for x86 yet.");
break;

case GT_MEMORYBARRIER:
{
CodeGen::BarrierKind barrierKind =
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/decomposelongs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,8 @@ GenTree* DecomposeLongs::DecomposeNode(GenTree* tree)
#endif // FEATURE_SIMD

case GT_LOCKADD:
case GT_XORR:
case GT_XAND:
case GT_XADD:
case GT_XCHG:
case GT_CMPXCHG:
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6105,6 +6105,8 @@ void emitter::emitIns_R_R_R(
case INS_ldadda:
case INS_ldaddal:
case INS_ldaddl:
case INS_ldclral:
case INS_ldsetal:
case INS_swpb:
case INS_swpab:
case INS_swpalb:
Expand Down
4 changes: 3 additions & 1 deletion src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5483,7 +5483,7 @@ GenTree* GenTree::gtGetParent(GenTree*** parentChildPtrPtr) const

bool GenTree::OperRequiresAsgFlag()
{
if (OperIs(GT_ASG) || OperIs(GT_XADD, GT_XCHG, GT_LOCKADD, GT_CMPXCHG, GT_MEMORYBARRIER))
if (OperIs(GT_ASG) || OperIs(GT_XADD, GT_XORR, GT_XAND, GT_XCHG, GT_LOCKADD, GT_CMPXCHG, GT_MEMORYBARRIER))
{
return true;
}
Expand Down Expand Up @@ -5558,6 +5558,8 @@ bool GenTree::OperIsImplicitIndir() const
switch (gtOper)
{
case GT_LOCKADD:
case GT_XORR:
case GT_XAND:
case GT_XADD:
case GT_XCHG:
case GT_CMPXCHG:
Expand Down
13 changes: 12 additions & 1 deletion src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1519,7 +1519,18 @@ struct GenTree

static bool OperIsAtomicOp(genTreeOps gtOper)
{
return (gtOper == GT_XADD || gtOper == GT_XCHG || gtOper == GT_LOCKADD || gtOper == GT_CMPXCHG);
switch (gtOper)
{
case GT_XADD:
case GT_XORR:
case GT_XAND:
case GT_XCHG:
case GT_LOCKADD:
case GT_CMPXCHG:
return true;
default:
return false;
}
}

bool OperIsAtomicOp() const
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/gtlist.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ GTNODE(ARR_LENGTH , GenTreeArrLen ,0,(GTK_UNOP|GTK_EXOP)) // arr
GTNODE(INTRINSIC , GenTreeIntrinsic ,0,(GTK_BINOP|GTK_EXOP)) // intrinsics

GTNODE(LOCKADD , GenTreeOp ,0,(GTK_BINOP|GTK_NOVALUE))
GTNODE(XAND , GenTreeOp ,0,GTK_BINOP)
GTNODE(XORR , GenTreeOp ,0,GTK_BINOP)
GTNODE(XADD , GenTreeOp ,0,GTK_BINOP)
GTNODE(XCHG , GenTreeOp ,0,GTK_BINOP)
GTNODE(CMPXCHG , GenTreeCmpXchg ,0,GTK_SPECIAL)
Expand Down
33 changes: 33 additions & 0 deletions src/coreclr/jit/importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4337,6 +4337,25 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis,
break;
}

#ifdef TARGET_ARM64
// Intrinsify Interlocked.Or and Interlocked.And only for arm64-v8.1 (and newer)
// TODO-CQ: Implement for XArch (https://github.com/dotnet/runtime/issues/32239).
case NI_System_Threading_Interlocked_Or:
case NI_System_Threading_Interlocked_And:
{
if (opts.OptimizationEnabled() && compOpportunisticallyDependsOn(InstructionSet_Atomics))
{
assert(sig->numArgs == 2);
GenTree* op2 = impPopStack().val;
GenTree* op1 = impPopStack().val;
genTreeOps op = (ni == NI_System_Threading_Interlocked_Or) ? GT_XORR : GT_XAND;
retNode = gtNewOperNode(op, genActualType(callType), op1, op2);
retNode->gtFlags |= GTF_GLOB_REF | GTF_ASG;
}
break;
}
#endif // TARGET_ARM64

#ifdef FEATURE_HW_INTRINSICS
case NI_System_Math_FusedMultiplyAdd:
{
Expand Down Expand Up @@ -4894,6 +4913,20 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method)
result = NI_System_Threading_Thread_get_ManagedThreadId;
}
}
#ifndef TARGET_ARM64
// TODO-CQ: Implement for XArch (https://github.com/dotnet/runtime/issues/32239).
else if (strcmp(className, "Interlocked") == 0)
{
if (strcmp(methodName, "And") == 0)
{
result = NI_System_Threading_Interlocked_And;
}
else if (strcmp(methodName, "Or") == 0)
{
result = NI_System_Threading_Interlocked_Or;
}
}
#endif
}
#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
else if (strcmp(namespaceName, "System.Buffers.Binary") == 0)
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/instrsarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -1194,6 +1194,12 @@ INST1(ldadda, "ldadda", LD|ST, IF_LS_3E, 0xB8A00000)
INST1(ldaddal, "ldaddal", LD|ST, IF_LS_3E, 0xB8E00000)
// ldaddal Rm, Rt, [Xn] LS_3E 1X111000111mmmmm 000000nnnnnttttt B8E0 0000 Rm Rt Rn ARMv8.1 LSE Atomics

INST1(ldclral, "ldclral", LD|ST, IF_LS_3E, 0xB8E01000)
// ldclral Rm, Rt, [Xn] LS_3E 1X111000111mmmmm 000100nnnnnttttt B8E0 1000 Rm Rt Rn ARMv8.1 LSE Atomics

INST1(ldsetal, "ldsetal", LD|ST, IF_LS_3E, 0xB8E03000)
// ldsetal Rm, Rt, [Xn] LS_3E 1X111000111mmmmm 001100nnnnnttttt B8E0 3000 Rm Rt Rn ARMv8.1 LSE Atomics

INST1(ldaddl, "ldaddl", LD|ST, IF_LS_3E, 0xB8600000)
// ldaddl Rm, Rt, [Xn] LS_3E 1X111000011mmmmm 000000nnnnnttttt B860 0000 Rm Rt Rn ARMv8.1 LSE Atomics

Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/liveness.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,8 @@ void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree)

// We'll assume these are use-then-defs of memory.
case GT_LOCKADD:
case GT_XORR:
case GT_XAND:
case GT_XADD:
case GT_XCHG:
case GT_CMPXCHG:
Expand Down Expand Up @@ -2052,6 +2054,8 @@ void Compiler::fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALAR
break;

case GT_LOCKADD:
case GT_XORR:
case GT_XAND:
case GT_XADD:
case GT_XCHG:
case GT_CMPXCHG:
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,10 +318,14 @@ GenTree* Lowering::LowerNode(GenTree* node)
CheckImmedAndMakeContained(node, node->AsCmpXchg()->gtOpComparand);
break;

case GT_XORR:
case GT_XAND:
case GT_XADD:
CheckImmedAndMakeContained(node, node->AsOp()->gtOp2);
break;
#elif defined(TARGET_XARCH)
case GT_XORR:
case GT_XAND:
case GT_XADD:
if (node->IsUnusedValue())
{
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const
#ifdef TARGET_ARM64
case GT_CMPXCHG:
case GT_LOCKADD:
case GT_XORR:
case GT_XAND:
case GT_XADD:
return comp->compOpportunisticallyDependsOn(InstructionSet_Atomics)
? false
Expand Down
7 changes: 7 additions & 0 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,8 @@ int LinearScan::BuildNode(GenTree* tree)
break;

case GT_LOCKADD:
case GT_XORR:
case GT_XAND:
case GT_XADD:
case GT_XCHG:
{
Expand All @@ -440,6 +442,11 @@ int LinearScan::BuildNode(GenTree* tree)
buildInternalIntRegisterDefForNode(tree);
}
}
else if (tree->OperIs(GT_XAND))
{
// for ldclral we need an internal register.
buildInternalIntRegisterDefForNode(tree);
}

assert(!tree->gtGetOp1()->isContained());
RefPosition* op1Use = BuildUse(tree->gtGetOp1());
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/lsraxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,8 @@ int LinearScan::BuildNode(GenTree* tree)
}
break;

case GT_XORR:
case GT_XAND:
case GT_XADD:
case GT_XCHG:
{
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/namedintrinsiclist.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ enum NamedIntrinsic : unsigned short
NI_IsSupported_Dynamic,
NI_Throw_PlatformNotSupportedException,

NI_System_Threading_Interlocked_And,
NI_System_Threading_Interlocked_Or,

#ifdef FEATURE_HW_INTRINSICS
NI_HW_INTRINSIC_START,
#if defined(TARGET_XARCH)
Expand Down
4 changes: 3 additions & 1 deletion src/coreclr/jit/optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7155,7 +7155,7 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack<BasicBlock*>* blo
m_beforeSideEffect = false;
}
}
else if (tree->OperIs(GT_XADD, GT_XCHG, GT_LOCKADD, GT_CMPXCHG, GT_MEMORYBARRIER))
else if (tree->OperIs(GT_XADD, GT_XORR, GT_XAND, GT_XCHG, GT_LOCKADD, GT_CMPXCHG, GT_MEMORYBARRIER))
{
// If this node is a MEMORYBARRIER or an Atomic operation
// then don't hoist and stop any further hoisting after this node
Expand Down Expand Up @@ -7969,6 +7969,8 @@ bool Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
break;

case GT_LOCKADD:
case GT_XORR:
case GT_XAND:
case GT_XADD:
case GT_XCHG:
case GT_CMPXCHG:
Expand Down
4 changes: 3 additions & 1 deletion src/coreclr/jit/valuenum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5557,7 +5557,7 @@ void ValueNumStore::vnDumpSimdType(Compiler* comp, VNFuncApp* simdType)
static UINT8 vnfOpAttribs[VNF_COUNT];
static genTreeOps genTreeOpsIllegalAsVNFunc[] = {GT_IND, // When we do heap memory.
GT_NULLCHECK, GT_QMARK, GT_COLON, GT_LOCKADD, GT_XADD, GT_XCHG,
GT_CMPXCHG, GT_LCLHEAP, GT_BOX,
GT_CMPXCHG, GT_LCLHEAP, GT_BOX, GT_XORR, GT_XAND,

// These need special semantics:
GT_COMMA, // == second argument (but with exception(s) from first).
Expand Down Expand Up @@ -8538,6 +8538,8 @@ void Compiler::fgValueNumberTree(GenTree* tree)
noway_assert("LOCKADD should not appear before lowering");
break;

case GT_XORR: // Binop
case GT_XAND: // Binop
case GT_XADD: // Binop
case GT_XCHG: // Binop
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ public static ulong Read(ref ulong location) =>
/// <param name="value">The value to be combined with the integer at <paramref name="location1"/>.</param>
/// <returns>The original value in <paramref name="location1"/>.</returns>
/// <exception cref="NullReferenceException">The address of <paramref name="location1"/> is a null pointer.</exception>
[Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int And(ref int location1, int value)
{
Expand Down Expand Up @@ -195,6 +196,7 @@ public static uint And(ref uint location1, uint value) =>
/// <param name="value">The value to be combined with the integer at <paramref name="location1"/>.</param>
/// <returns>The original value in <paramref name="location1"/>.</returns>
/// <exception cref="NullReferenceException">The address of <paramref name="location1"/> is a null pointer.</exception>
[Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long And(ref long location1, long value)
{
Expand Down Expand Up @@ -228,6 +230,7 @@ public static ulong And(ref ulong location1, ulong value) =>
/// <param name="value">The value to be combined with the integer at <paramref name="location1"/>.</param>
/// <returns>The original value in <paramref name="location1"/>.</returns>
/// <exception cref="NullReferenceException">The address of <paramref name="location1"/> is a null pointer.</exception>
[Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int Or(ref int location1, int value)
{
Expand Down Expand Up @@ -259,6 +262,7 @@ public static uint Or(ref uint location1, uint value) =>
/// <param name="value">The value to be combined with the integer at <paramref name="location1"/>.</param>
/// <returns>The original value in <paramref name="location1"/>.</returns>
/// <exception cref="NullReferenceException">The address of <paramref name="location1"/> is a null pointer.</exception>
[Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long Or(ref long location1, long value)
{
Expand Down
Loading

0 comments on commit af25fe6

Please sign in to comment.