Skip to content

Commit

Permalink
JIT: Optimize redundant memory barriers on arm/arm64 (#60219)
Browse files Browse the repository at this point in the history
  • Loading branch information
EgorBo committed Oct 13, 2021
1 parent 09c1a1f commit 845e66c
Show file tree
Hide file tree
Showing 7 changed files with 136 additions and 35 deletions.
32 changes: 32 additions & 0 deletions src/coreclr/jit/codegenarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1883,4 +1883,36 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni
#endif // USING_SCOPE_INFO
}

//-----------------------------------------------------------------------------------
// instGen_MemoryBarrier: Emit a MemoryBarrier instruction
//
// Arguments:
// barrierKind - kind of barrier to emit (ignored on arm32)
//
// Notes:
// All MemoryBarriers instructions can be removed by DOTNET_JitNoMemoryBarriers=1
// barrierKind argument is ignored on arm32 and a full memory barrier is emitted
//
void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind)
{
#ifdef DEBUG
if (JitConfig.JitNoMemoryBarriers() == 1)
{
return;
}
#endif // DEBUG

// Avoid emitting redundant memory barriers on arm32 if they belong to the same IG
// and there were no memory accesses in-between them
if ((GetEmitter()->emitLastMemBarrier != nullptr) && compiler->opts.OptimizationEnabled())
{
assert(GetEmitter()->emitLastMemBarrier->idSmallCns() == INS_BARRIER_SY);
}
else
{
// ARM has only full barriers, so all barriers need to be emitted as full.
GetEmitter()->emitIns_I(INS_dmb, EA_4BYTE, INS_BARRIER_SY);
}
}

#endif // TARGET_ARM
50 changes: 50 additions & 0 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9510,4 +9510,54 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni
}
}

//-----------------------------------------------------------------------------------
// instGen_MemoryBarrier: Emit a MemoryBarrier instruction
//
// Arguments:
// barrierKind - kind of barrier to emit (Full or Load-Only).
//
// Notes:
// All MemoryBarriers instructions can be removed by DOTNET_JitNoMemoryBarriers=1
//
void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind)
{
#ifdef DEBUG
if (JitConfig.JitNoMemoryBarriers() == 1)
{
return;
}
#endif // DEBUG

// Avoid emitting redundant memory barriers on arm64 if they belong to the same IG
// and there were no memory accesses in-between them
emitter::instrDesc* lastMemBarrier = GetEmitter()->emitLastMemBarrier;
if ((lastMemBarrier != nullptr) && compiler->opts.OptimizationEnabled())
{
BarrierKind prevBarrierKind = BARRIER_FULL;
if (lastMemBarrier->idSmallCns() == INS_BARRIER_ISHLD)
{
prevBarrierKind = BARRIER_LOAD_ONLY;
}
else
{
// Currently we only emit two kinds of barriers on arm64:
// ISH - Full (inner shareable domain)
// ISHLD - LoadOnly (inner shareable domain)
assert(lastMemBarrier->idSmallCns() == INS_BARRIER_ISH);
}

if ((prevBarrierKind == BARRIER_LOAD_ONLY) && (barrierKind == BARRIER_FULL))
{
// Previous memory barrier: load-only, current: full
// Upgrade the previous one to full
assert((prevBarrierKind == BARRIER_LOAD_ONLY) && (barrierKind == BARRIER_FULL));
lastMemBarrier->idSmallCns(INS_BARRIER_ISH);
}
}
else
{
GetEmitter()->emitIns_BARR(INS_dmb, barrierKind == BARRIER_LOAD_ONLY ? INS_BARRIER_ISHLD : INS_BARRIER_ISH);
}
}

#endif // TARGET_ARM64
26 changes: 26 additions & 0 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9135,4 +9135,30 @@ void CodeGen::genPushCalleeSavedRegisters()
}
}

//-----------------------------------------------------------------------------------
// instGen_MemoryBarrier: Emit a MemoryBarrier instruction
//
// Arguments:
// barrierKind - kind of barrier to emit (Load-only is no-op on xarch)
//
// Notes:
// All MemoryBarriers instructions can be removed by DOTNET_JitNoMemoryBarriers=1
//
void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind)
{
#ifdef DEBUG
if (JitConfig.JitNoMemoryBarriers() == 1)
{
return;
}
#endif // DEBUG

// only full barrier needs to be emitted on Xarch
if (barrierKind == BARRIER_FULL)
{
instGen(INS_lock);
GetEmitter()->emitIns_I_AR(INS_or, EA_4BYTE, 0, REG_SPBASE, 0);
}
}

#endif // TARGET_XARCH
20 changes: 20 additions & 0 deletions src/coreclr/jit/emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -703,6 +703,11 @@ insGroup* emitter::emitSavIG(bool emitAdd)
ig = emitCurIG;
assert(ig);

#ifdef TARGET_ARMARCH
// Reset emitLastMemBarrier for new IG
emitLastMemBarrier = nullptr;
#endif

// Compute how much code we've generated

sz = emitCurIGfreeNext - emitCurIGfreeBase;
Expand Down Expand Up @@ -1140,6 +1145,10 @@ void emitter::emitBegFN(bool hasFramePtr

emitLastIns = nullptr;

#ifdef TARGET_ARMARCH
emitLastMemBarrier = nullptr;
#endif

ig->igNext = nullptr;

#ifdef DEBUG
Expand Down Expand Up @@ -1303,6 +1312,17 @@ void emitter::dispIns(instrDesc* id)

void emitter::appendToCurIG(instrDesc* id)
{
#ifdef TARGET_ARMARCH
if (id->idIns() == INS_dmb)
{
emitLastMemBarrier = id;
}
else if (emitInsIsLoadOrStore(id->idIns()))
{
// A memory access - reset saved memory barrier
emitLastMemBarrier = nullptr;
}
#endif
emitCurIGsize += id->idCodeSize();
}

Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -1884,6 +1884,10 @@ class emitter

instrDesc* emitLastIns;

#ifdef TARGET_ARMARCH
instrDesc* emitLastMemBarrier;
#endif

#ifdef DEBUG
void emitCheckIGoffsets();
#endif
Expand Down
35 changes: 0 additions & 35 deletions src/coreclr/jit/instr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2386,41 +2386,6 @@ void CodeGen::instGen_Return(unsigned stkArgSize)
#endif
}

/*****************************************************************************
*
* Emit a MemoryBarrier instruction
*
* Note: all MemoryBarriers instructions can be removed by
* SET COMPlus_JitNoMemoryBarriers=1
*/
void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind)
{
#ifdef DEBUG
if (JitConfig.JitNoMemoryBarriers() == 1)
{
return;
}
#endif // DEBUG

#if defined(TARGET_XARCH)
// only full barrier needs to be emitted on Xarch
if (barrierKind != BARRIER_FULL)
{
return;
}

instGen(INS_lock);
GetEmitter()->emitIns_I_AR(INS_or, EA_4BYTE, 0, REG_SPBASE, 0);
#elif defined(TARGET_ARM)
// ARM has only full barriers, so all barriers need to be emitted as full.
GetEmitter()->emitIns_I(INS_dmb, EA_4BYTE, 0xf);
#elif defined(TARGET_ARM64)
GetEmitter()->emitIns_BARR(INS_dmb, barrierKind == BARRIER_LOAD_ONLY ? INS_BARRIER_ISHLD : INS_BARRIER_ISH);
#else
#error "Unknown TARGET"
#endif
}

/*****************************************************************************
*
* Machine independent way to move a Zero value into a register
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/instr.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,10 @@ enum insOpts: unsigned
INS_OPTS_ASR,
INS_OPTS_ROR
};
enum insBarrier : unsigned
{
INS_BARRIER_SY = 15
};
#elif defined(TARGET_ARM64)
enum insOpts : unsigned
{
Expand Down

0 comments on commit 845e66c

Please sign in to comment.