Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: Avoid calling GC write barrier for byrefs #105934

Closed
wants to merge 15 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions src/coreclr/jit/codegenarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -857,12 +857,10 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
}
else
{
unsigned gcPtrCount = layout->GetGCPtrCount();

unsigned i = 0;
while (i < slots)
{
if (!layout->IsGCPtr(i))
if (!layout->IsGCRef(i))
{
emit->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
INS_FLAGS_DONT_CARE, INS_OPTS_LDST_POST_INC);
Expand All @@ -872,11 +870,10 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
else
{
genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
gcPtrCount--;
}

++i;
}
assert(gcPtrCount == 0);
}

if (cpObjNode->IsVolatile())
Expand Down
10 changes: 3 additions & 7 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3740,8 +3740,6 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
}
else
{
unsigned gcPtrCount = cpObjNode->GetLayout()->GetGCPtrCount();

// We might also need SIMD regs if we have 4 or more continuous non-gc slots
// On ARM64, SIMD loads/stores provide 8-byte atomicity guarantees when aligned to 8 bytes.
regNumber tmpSimdReg1 = REG_NA;
Expand All @@ -3755,15 +3753,15 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
unsigned i = 0;
while (i < slots)
{
if (!layout->IsGCPtr(i))
if (!layout->IsGCRef(i))
{
// How many continuous non-gc slots do we have?
// How many continuous non-GC ref slots do we have?
unsigned nonGcSlots = 0;
do
{
nonGcSlots++;
i++;
} while ((i < slots) && !layout->IsGCPtr(i));
} while ((i < slots) && !layout->IsGCRef(i));

const regNumber srcReg = REG_WRITE_BARRIER_SRC_BYREF;
const regNumber dstReg = REG_WRITE_BARRIER_DST_BYREF;
Expand Down Expand Up @@ -3802,11 +3800,9 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
{
// In the case of a GC-Pointer we'll call the ByRef write barrier helper
genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
gcPtrCount--;
i++;
}
}
assert(gcPtrCount == 0);
}

if (cpObjNode->IsVolatile())
Expand Down
10 changes: 3 additions & 7 deletions src/coreclr/jit/codegenloongarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2325,15 +2325,13 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
}
else
{
unsigned gcPtrCount = cpObjNode->GetLayout()->GetGCPtrCount();

unsigned i = 0;
while (i < slots)
{
if (!layout->IsGCPtr(i))
if (!layout->IsGCRef(i))
{
// Check if the next slot's type is also TYP_GC_NONE and use two load/store
if ((i + 1 < slots) && !layout->IsGCPtr(i + 1))
// Check if the next slot's type is also non-ref and use two load/store
if ((i + 1 < slots) && !layout->IsGCRef(i + 1))
{
if ((i + 2) == slots)
{
Expand Down Expand Up @@ -2369,11 +2367,9 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
{
// In the case of a GC-Pointer we'll call the ByRef write barrier helper
genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
gcPtrCount--;
}
++i;
}
assert(gcPtrCount == 0);
}

if (cpObjNode->IsVolatile())
Expand Down
10 changes: 3 additions & 7 deletions src/coreclr/jit/codegenriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2247,15 +2247,13 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
}
else
{
unsigned gcPtrCount = cpObjNode->GetLayout()->GetGCPtrCount();

unsigned i = 0;
while (i < slots)
{
if (!layout->IsGCPtr(i))
if (!layout->IsGCRef(i))
{
// Check if the next slot's type is also TYP_GC_NONE and use two ld/sd
if ((i + 1 < slots) && !layout->IsGCPtr(i + 1))
// Check if the next slot's type is also non-ref and use two ld/sd
if ((i + 1 < slots) && !layout->IsGCRef(i + 1))
{
if ((i + 2) == slots)
{
Expand Down Expand Up @@ -2291,11 +2289,9 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
{
// In the case of a GC-Pointer we'll call the ByRef write barrier helper
genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
gcPtrCount--;
}
++i;
}
assert(gcPtrCount == 0);
}

if (cpObjNode->IsVolatile())
Expand Down
22 changes: 9 additions & 13 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4321,32 +4321,31 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
}
else
{
ClassLayout* layout = cpObjNode->GetLayout();
unsigned gcPtrCount = layout->GetGCPtrCount();
ClassLayout* layout = cpObjNode->GetLayout();

unsigned i = 0;
while (i < slots)
{
if (!layout->IsGCPtr(i))
if (!layout->IsGCRef(i))
{
// Let's see if we can use rep movsp instead of a sequence of movsp instructions
// to save cycles and code size.
unsigned nonGcSlotCount = 0;
unsigned nonRefSlotCount = 0;

do
{
nonGcSlotCount++;
nonRefSlotCount++;
i++;
} while ((i < slots) && !layout->IsGCPtr(i));
} while ((i < slots) && !layout->IsGCRef(i));

// If we have a very small contiguous non-gc region, it's better just to
// emit a sequence of movsp instructions
if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
if (nonRefSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
{
while (nonGcSlotCount > 0)
while (nonRefSlotCount > 0)
{
instGen(INS_movsp);
nonGcSlotCount--;
nonRefSlotCount--;
}
}
else
Expand All @@ -4355,19 +4354,16 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
// rep movsp (alias for movsd/movsq for x86/x64)
assert((internalRegisters.GetAll(cpObjNode) & RBM_RCX) != 0);

GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount);
GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonRefSlotCount);
instGen(INS_r_movsp);
}
}
else
{
genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
gcPtrCount--;
i++;
}
}

assert(gcPtrCount == 0);
}

// Clear the gcInfo for RSI and RDI.
Expand Down
18 changes: 9 additions & 9 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -475,37 +475,37 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
// we can use REP MOVSD/Q instead of a sequence of MOVSD/Q instructions. According to the
// Intel Manual, the sweet spot for small structs is between 4 to 12 slots of size where
// the entire operation takes 20 cycles and encodes in 5 bytes (loading RCX and REP MOVSD/Q).
unsigned nonGCSlots = 0;
unsigned nonRefSlots = 0;

if (dstAddr->OperIs(GT_LCL_ADDR) || layout->IsStackOnly(comp))
{
// If the destination is on the stack then no write barriers are needed.
nonGCSlots = layout->GetSlotCount();
nonRefSlots = layout->GetSlotCount();
}
else
{
// Otherwise a write barrier is needed for every GC pointer in the layout
// so we need to check if there's a long enough sequence of non-GC slots.
// Otherwise a write barrier is needed for every TYP_REF pointer in the layout
// so we need to check if there's a long enough sequence of non-TYP_REF slots.
unsigned slots = layout->GetSlotCount();
for (unsigned i = 0; i < slots; i++)
{
if (layout->IsGCPtr(i))
if (layout->IsGCRef(i))
{
nonGCSlots = 0;
nonRefSlots = 0;
}
else
{
nonGCSlots++;
nonRefSlots++;

if (nonGCSlots >= CPOBJ_NONGC_SLOTS_LIMIT)
if (nonRefSlots >= CPOBJ_NONGC_SLOTS_LIMIT)
{
break;
}
}
}
}

if (nonGCSlots >= CPOBJ_NONGC_SLOTS_LIMIT)
if (nonRefSlots >= CPOBJ_NONGC_SLOTS_LIMIT)
{
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindCpObjRepInstr;
}
Expand Down
Loading