Skip to content

Commit

Permalink
[NativeAOT] Linux/ARM bring-up (4/n) (#97269)
Browse files Browse the repository at this point in the history
* Fix recursive generics for ARM

* Fix compilation on Debian Bookworm

* Implement thread return address hijacking for ARM

* Implement TrailingEpilogueInstructionsCount for ARM

* Fix comment

* Fix bugs in RhpGcProbeHijack logic

* Fix register trashing by INLINE_GETTHREAD in FixupHijackedCallstack on ARM

* Mask the Thumb bit when loading IP from probe frame

* Disable DwarfDump on linux-arm

* Cleanup

* Emit DWARF info with instruction addresses without Thumb bit (matches clang)

* Report R2/R3 registers in ForEachPossibleObjectRef

* Ensure that PInvokeTransitionFrame(s) on the stack are 8-byte aligned. Save FP return values on hijack.

* Tame the Thumb bit

* Fix GC hole when thread hijack happens with r0 register holding a reference (eg. boxed int)

* Update src/coreclr/nativeaot/Runtime/arm/GcProbe.S

---------

Co-authored-by: Vladimir Sadov <[email protected]>
  • Loading branch information
filipnavara and VSadov committed Jan 25, 2024
1 parent 758f34d commit 7e2dd59
Show file tree
Hide file tree
Showing 17 changed files with 417 additions and 36 deletions.
7 changes: 6 additions & 1 deletion src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,12 @@ COOP_PINVOKE_HELPER(uint8_t *, RhGetRuntimeVersion, (int32_t* pcbLength))

COOP_PINVOKE_HELPER(uint8_t *, RhFindMethodStartAddress, (void * codeAddr))
{
return dac_cast<uint8_t *>(GetRuntimeInstance()->FindMethodStartAddress(dac_cast<PTR_VOID>(codeAddr)));
uint8_t *startAddress = dac_cast<uint8_t *>(GetRuntimeInstance()->FindMethodStartAddress(dac_cast<PTR_VOID>(codeAddr)));
#if TARGET_ARM
return startAddress + 1; // Set the Thumb bit
#else
return startAddress;
#endif
}

PTR_UInt8 RuntimeInstance::FindMethodStartAddress(PTR_VOID ControlPC)
Expand Down
26 changes: 17 additions & 9 deletions src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "threadstore.inl"
#include "thread.inl"
#include "stressLog.h"
#include "CommonMacros.inl"

#include "shash.h"
#include "RuntimeInstance.h"
Expand Down Expand Up @@ -75,7 +76,7 @@ GVAL_IMPL_INIT(PTR_VOID, g_RhpRethrow2Addr, PointerToRhpRethrow2);
#ifdef DACCESS_COMPILE
#define EQUALS_RETURN_ADDRESS(x, func_name) ((x) == g_ ## func_name ## Addr)
#else
#define EQUALS_RETURN_ADDRESS(x, func_name) (((x)) == (PointerTo ## func_name))
#define EQUALS_RETURN_ADDRESS(x, func_name) (((x)) == (PTR_VOID)PCODEToPINSTR((PCODE)PointerTo ## func_name))
#endif

#ifdef DACCESS_COMPILE
Expand Down Expand Up @@ -178,7 +179,7 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PInvokeTransitionF

#if !defined(USE_PORTABLE_HELPERS) // @TODO: no portable version of regdisplay
memset(&m_RegDisplay, 0, sizeof(m_RegDisplay));
m_RegDisplay.SetIP((PCODE)pFrame->m_RIP);
m_RegDisplay.SetIP((PCODE)PCODEToPINSTR((PCODE)pFrame->m_RIP));
SetControlPC(dac_cast<PTR_VOID>(m_RegDisplay.GetIP()));

PTR_UIntNative pPreservedRegsCursor = (PTR_UIntNative)PTR_HOST_MEMBER(PInvokeTransitionFrame, pFrame, m_PreservedRegs);
Expand Down Expand Up @@ -384,9 +385,9 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PTR_PAL_LIMITED_CO
//
// control state
//
SetControlPC(dac_cast<PTR_VOID>(pCtx->GetIp()));
m_RegDisplay.SP = pCtx->GetSp();
m_RegDisplay.IP = pCtx->GetIp();
m_RegDisplay.IP = PCODEToPINSTR(pCtx->GetIp());
SetControlPC(dac_cast<PTR_VOID>(m_RegDisplay.GetIP()));

#ifdef TARGET_ARM
//
Expand Down Expand Up @@ -609,6 +610,8 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pC

m_RegDisplay.pR0 = (PTR_UIntNative)PTR_TO_REG(pCtx, R0);
m_RegDisplay.pR1 = (PTR_UIntNative)PTR_TO_REG(pCtx, R1);
m_RegDisplay.pR2 = (PTR_UIntNative)PTR_TO_REG(pCtx, R2);
m_RegDisplay.pR3 = (PTR_UIntNative)PTR_TO_REG(pCtx, R3);
m_RegDisplay.pR4 = (PTR_UIntNative)PTR_TO_REG(pCtx, R4);
m_RegDisplay.pR5 = (PTR_UIntNative)PTR_TO_REG(pCtx, R5);
m_RegDisplay.pR6 = (PTR_UIntNative)PTR_TO_REG(pCtx, R6);
Expand Down Expand Up @@ -991,7 +994,7 @@ void StackFrameIterator::UnwindFuncletInvokeThunk()
#endif

#if !defined(TARGET_ARM64)
m_RegDisplay.SetIP(*SP++);
m_RegDisplay.SetIP(PCODEToPINSTR(*SP++));
#endif

m_RegDisplay.SetSP((uintptr_t)dac_cast<TADDR>(SP));
Expand Down Expand Up @@ -1174,7 +1177,7 @@ void StackFrameIterator::UnwindUniversalTransitionThunk()
stackFrame->UnwindNonVolatileRegisters(&m_RegDisplay);

PTR_UIntNative addressOfPushedCallerIP = stackFrame->get_AddressOfPushedCallerIP();
m_RegDisplay.SetIP(*addressOfPushedCallerIP);
m_RegDisplay.SetIP(PCODEToPINSTR(*addressOfPushedCallerIP));
m_RegDisplay.SetSP((uintptr_t)dac_cast<TADDR>(stackFrame->get_CallerSP()));
SetControlPC(dac_cast<PTR_VOID>(m_RegDisplay.GetIP()));

Expand Down Expand Up @@ -1265,9 +1268,9 @@ void StackFrameIterator::UnwindThrowSiteThunk()
ASSERT_UNCONDITIONALLY("NYI for this arch");
#endif

m_RegDisplay.SetIP(pContext->IP);
m_RegDisplay.SetIP(PCODEToPINSTR(pContext->IP));
m_RegDisplay.SetSP(pContext->GetSp());
SetControlPC(dac_cast<PTR_VOID>(pContext->IP));
SetControlPC(dac_cast<PTR_VOID>(m_RegDisplay.GetIP()));

// We expect the throw site to be in managed code, and since this function's notion of how to unwind
// through the stub is brittle relative to the stub itself, we want to check as soon as we can.
Expand Down Expand Up @@ -1357,7 +1360,7 @@ void StackFrameIterator::NextInternal()
// if the thread is safe to walk, it better not have a hijack in place.
ASSERT(!m_pThread->IsHijacked());

SetControlPC(dac_cast<PTR_VOID>(m_RegDisplay.GetIP()));
SetControlPC(dac_cast<PTR_VOID>(PCODEToPINSTR(m_RegDisplay.GetIP())));

PTR_VOID collapsingTargetFrame = NULL;

Expand Down Expand Up @@ -1717,6 +1720,11 @@ bool StackFrameIterator::GetHijackedReturnValueLocation(PTR_OBJECTREF * pLocatio

void StackFrameIterator::SetControlPC(PTR_VOID controlPC)
{
#if TARGET_ARM
// Ensure that PC doesn't have the Thumb bit set. This needs to be
// consistent for EQUALS_RETURN_ADDRESS to work.
ASSERT(((uintptr_t)controlPC & 1) == 0);
#endif
m_OriginalControlPC = m_ControlPC = controlPC;
}

Expand Down
111 changes: 109 additions & 2 deletions src/coreclr/nativeaot/Runtime/arm/GcProbe.S
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,105 @@
#include "AsmOffsets.inc"

.global RhpGcPoll2
.global RhpThrowHwEx

// See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves return registers
// and accepts the register bitmask
// Call this macro first in the method (no further prolog instructions can be added after this).
//
// threadReg : register containing the Thread* (this will be preserved).
// trashReg : register that can be trashed by this macro
// BITMASK : value to initialize m_dwFlags field with (register or #constant)
.macro PUSH_PROBE_FRAME threadReg, trashReg, BITMASK
// Define the method prolog, allocating enough stack space for the PInvokeTransitionFrame and saving
// incoming register values into it.
PROLOG_VPUSH "{d0-d3}" // Save d0-d3 which can have the floating point return value
PROLOG_STACK_ALLOC 4 // Padding for 8-byte alignment
PROLOG_PUSH "{r0,r1}" // Save return registers
PROLOG_STACK_ALLOC 4 // Space for caller's SP
PROLOG_PUSH "{r4-r10}" // Save non-volatile registers
PROLOG_STACK_ALLOC 8 // Space for flags and Thread*
PROLOG_PUSH "{r11}" // Save caller's frame pointer
PROLOG_PUSH "{r11,lr}" // Save frame-chain pointer and return address

str \threadReg, [sp, #OFFSETOF__PInvokeTransitionFrame__m_pThread]
mov \trashReg, \BITMASK
str \trashReg, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags]

// Compute SP value at entry to this method and save it in slot of the frame.
add \trashReg, sp, #(16 * 4 + 4 * 8)
str \trashReg, [sp, #(12 * 4)]

// Link the frame into the Thread
str sp, [\threadReg, #OFFSETOF__Thread__m_pDeferredTransitionFrame]
.endm

//
// Remove the frame from a previous call to PUSH_PROBE_FRAME from the top of the stack and restore preserved
// registers and return value to their values from before the probe was called (while also updating any
// object refs or byrefs).
//
.macro POP_PROBE_FRAME
EPILOG_POP "{r11,lr}" // Restore frame-chain pointer and return address
EPILOG_POP "{r11}" // Restore caller's frame pointer
EPILOG_STACK_FREE 8 // Discard flags and Thread*
EPILOG_POP "{r4-r10}" // Restore non-volatile registers
EPILOG_STACK_FREE 4 // Discard caller's SP
EPILOG_POP "{r0,r1}" // Restore return registers
EPILOG_STACK_FREE 4 // Discard padding for 8-byte alignment
EPILOG_VPOP "{d0-d3}" // Restore d0-d3 which can have the floating point return value
.endm

//
// The prolog for all GC suspension hijacks (normal and stress). Fixes up the hijacked return address, and
// clears the hijack state.
//
// Register state on entry:
// All registers correct for return to the original return address.
//
// Register state on exit:
// r2: thread pointer
// r3: trashed
//
.macro FixupHijackedCallstack
push {r0, r1}

// r0 <- GetThread()
INLINE_GETTHREAD

mov r2, r0
pop {r0, r1}

// Fix the stack by restoring the original return address
ldr lr, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress]
ldr r12, [r2, #OFFSETOF__Thread__m_uHijackedReturnValueFlags]

// Clear hijack state
mov r3, #0
str r3, [r2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation]
str r3, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress]
str r3, [r2, #OFFSETOF__Thread__m_uHijackedReturnValueFlags]
.endm

NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler
PUSH_PROBE_FRAME r2, r3, r12

ldr r0, [r2, #OFFSETOF__Thread__m_pDeferredTransitionFrame]
bl RhpWaitForGC2

ldr r2, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags]
tst r2, #PTFF_THREAD_ABORT
bne LOCAL_LABEL(ThrowThreadAbort)

POP_PROBE_FRAME
bx lr

LOCAL_LABEL(ThrowThreadAbort):
POP_PROBE_FRAME
mov r0, #STATUS_REDHAWK_THREAD_ABORT
mov r1, lr // return address as exception PC
b C_FUNC(RhpThrowHwEx)
NESTED_END RhpWaitForGC

LEAF_ENTRY RhpGcPoll
PREPARE_EXTERNAL_VAR_INDIRECT RhpTrapThreads, r0
Expand All @@ -24,8 +123,16 @@ NESTED_ENTRY RhpGcPollRare, _TEXT, NoHandler
NESTED_END RhpGcPollRare

NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler
// Not implemented
EMIT_BREAKPOINT
FixupHijackedCallstack

PREPARE_EXTERNAL_VAR_INDIRECT RhpTrapThreads, r3
tst r3, #TrapThreadsFlags_TrapThreads
bne LOCAL_LABEL(WaitForGC)
bx lr
LOCAL_LABEL(WaitForGC):
mov r3, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0)
orr r12, r3
b RhpWaitForGC
NESTED_END RhpGcProbeHijack

#ifdef FEATURE_GC_STRESS
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,8 @@
#define MCREG_Lr(mc) ((mc).arm_lr)
#define MCREG_R0(mc) ((mc).arm_r0)
#define MCREG_R1(mc) ((mc).arm_r1)
#define MCREG_R2(mc) ((mc).arm_r2)
#define MCREG_R3(mc) ((mc).arm_r3)
#define MCREG_R4(mc) ((mc).arm_r4)
#define MCREG_R5(mc) ((mc).arm_r5)
#define MCREG_R6(mc) ((mc).arm_r6)
Expand Down Expand Up @@ -514,6 +516,8 @@ uint64_t GetPC(void* context)
uint64_t& UNIX_CONTEXT::Lr(){ return (uint64_t&)MCREG_Lr(ctx.uc_mcontext); }
uint64_t& UNIX_CONTEXT::R0(){ return (uint64_t&)MCREG_R0(ctx.uc_mcontext); }
uint64_t& UNIX_CONTEXT::R1(){ return (uint64_t&)MCREG_R1(ctx.uc_mcontext); }
uint64_t& UNIX_CONTEXT::R2(){ return (uint64_t&)MCREG_R2(ctx.uc_mcontext); }
uint64_t& UNIX_CONTEXT::R3(){ return (uint64_t&)MCREG_R3(ctx.uc_mcontext); }
uint64_t& UNIX_CONTEXT::R4(){ return (uint64_t&)MCREG_R4(ctx.uc_mcontext); }
uint64_t& UNIX_CONTEXT::R5(){ return (uint64_t&)MCREG_R5(ctx.uc_mcontext); }
uint64_t& UNIX_CONTEXT::R6(){ return (uint64_t&)MCREG_R6(ctx.uc_mcontext); }
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/nativeaot/Runtime/unix/UnixContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ struct UNIX_CONTEXT
uint64_t& Lr();
uint64_t& R0();
uint64_t& R1();
uint64_t& R2();
uint64_t& R3();
uint64_t& R4();
uint64_t& R5();
uint64_t& R6();
Expand All @@ -143,6 +145,8 @@ struct UNIX_CONTEXT
{
lambda((size_t*)&R0());
lambda((size_t*)&R1());
lambda((size_t*)&R2());
lambda((size_t*)&R3());
lambda((size_t*)&R4());
lambda((size_t*)&R5());
lambda((size_t*)&R6());
Expand Down
Loading

0 comments on commit 7e2dd59

Please sign in to comment.