Skip to content

Commit

Permalink
[NativeAOT/ARM] Save R9 (REG_SAVED_LOCALLOC_SP) in PInvoke frames (#9…
Browse files Browse the repository at this point in the history
…7919)

* Save R9 (REG_SAVED_LOCALLOC_SP) in PInvoke frames

* Handle 'mov r9, sp' as part of prolog

* Remove m_ChainPointer from PInvokeTransitionFrame and update comments
  • Loading branch information
filipnavara authored Feb 4, 2024
1 parent e1be9a7 commit 7b3e409
Show file tree
Hide file tree
Showing 7 changed files with 37 additions and 37 deletions.
8 changes: 4 additions & 4 deletions src/coreclr/nativeaot/Runtime/arm/AsmOffsetsCpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ PLAT_ASM_OFFSET(10, ExInfo, m_idxCurClause)
PLAT_ASM_OFFSET(18, ExInfo, m_frameIter)
PLAT_ASM_OFFSET(130, ExInfo, m_notifyDebuggerSP)

PLAT_ASM_OFFSET(0, PInvokeTransitionFrame, m_FramePointer)
PLAT_ASM_OFFSET(4, PInvokeTransitionFrame, m_RIP)
PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_FramePointer)
PLAT_ASM_OFFSET(0c, PInvokeTransitionFrame, m_pThread)
PLAT_ASM_OFFSET(10, PInvokeTransitionFrame, m_Flags)
PLAT_ASM_OFFSET(14, PInvokeTransitionFrame, m_PreservedRegs)
PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_pThread)
PLAT_ASM_OFFSET(c, PInvokeTransitionFrame, m_Flags)
PLAT_ASM_OFFSET(10, PInvokeTransitionFrame, m_PreservedRegs)

PLAT_ASM_SIZEOF(118, StackFrameIterator)
PLAT_ASM_OFFSET(08, StackFrameIterator, m_FramePointer)
Expand Down
12 changes: 4 additions & 8 deletions src/coreclr/nativeaot/Runtime/arm/GcProbe.S
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,19 @@
// Define the method prolog, allocating enough stack space for the PInvokeTransitionFrame and saving
// incoming register values into it.
PROLOG_VPUSH "{d0-d3}" // Save d0-d3 which can have the floating point return value
PROLOG_STACK_ALLOC 4 // Padding for 8-byte alignment
PROLOG_PUSH "{r0,r1}" // Save return registers
PROLOG_STACK_ALLOC 4 // Space for caller's SP
PROLOG_PUSH "{r4-r10}" // Save non-volatile registers
PROLOG_STACK_ALLOC 8 // Space for flags and Thread*
PROLOG_PUSH "{r11}" // Save caller's frame pointer
PROLOG_PUSH "{r11,lr}" // Save frame-chain pointer and return address
PROLOG_PUSH "{r11,lr}" // Save caller's frame pointer and return address

str \threadReg, [sp, #OFFSETOF__PInvokeTransitionFrame__m_pThread]
mov \trashReg, \BITMASK
str \trashReg, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags]

// Compute SP value at entry to this method and save it in slot of the frame.
add \trashReg, sp, #(16 * 4 + 4 * 8)
str \trashReg, [sp, #(12 * 4)]
add \trashReg, sp, #(14 * 4 + 4 * 8)
str \trashReg, [sp, #(11 * 4)]

// Link the frame into the Thread
str sp, [\threadReg, #OFFSETOF__Thread__m_pDeferredTransitionFrame]
Expand All @@ -47,13 +45,11 @@
// object refs or byrefs).
//
.macro POP_PROBE_FRAME
EPILOG_POP "{r11,lr}" // Restore frame-chain pointer and return address
EPILOG_POP "{r11}" // Restore caller's frame pointer
EPILOG_POP "{r11,lr}" // Restore caller's frame pointer and return address
EPILOG_STACK_FREE 8 // Discard flags and Thread*
EPILOG_POP "{r4-r10}" // Restore non-volatile registers
EPILOG_STACK_FREE 4 // Discard caller's SP
EPILOG_POP "{r0,r1}" // Restore return registers
EPILOG_STACK_FREE 4 // Discard padding for 8-byte alignment
EPILOG_VPOP "{d0-d3}" // Restore d0-d3 which can have the floating point return value
.endm

Expand Down
6 changes: 4 additions & 2 deletions src/coreclr/nativeaot/Runtime/arm/PInvoke.S
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@
NESTED_ENTRY RhpPInvoke, _TEXT, NoHandler
str lr, [r0, #OFFSETOF__PInvokeTransitionFrame__m_RIP]
str r11, [r0, #OFFSETOF__PInvokeTransitionFrame__m_FramePointer]
str sp, [r0, #OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs]
mov r3, #PTFF_SAVE_SP
// We need to save R9 which could be frame pointer if the caller method uses stackalloc (REG_SAVED_LOCALLOC_SP)
str r9, [r0, #OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs]
str sp, [r0, #OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs + 4]
mov r3, #(PTFF_SAVE_R9 + PTFF_SAVE_SP)
str r3, [r0, #OFFSETOF__PInvokeTransitionFrame__m_Flags]

PROLOG_PUSH "{r5,lr}"
Expand Down
7 changes: 2 additions & 5 deletions src/coreclr/nativeaot/Runtime/inc/rhbinder.h
Original file line number Diff line number Diff line change
Expand Up @@ -400,11 +400,8 @@ struct PInvokeTransitionFrame
#else // USE_PORTABLE_HELPERS
struct PInvokeTransitionFrame
{
#ifdef TARGET_ARM
TgtPTR_Void m_ChainPointer; // R11, used by OS to walk stack quickly
#endif
#ifdef TARGET_ARM64
// On arm64, the FP and LR registers are pushed in that order when setting up frames
#if defined(TARGET_ARM64) || defined(TARGET_ARM)
// On arm32/arm64, the FP and LR registers are pushed in that order when setting up frames
TgtPTR_Void m_FramePointer;
TgtPTR_Void m_RIP;
#else
Expand Down
6 changes: 5 additions & 1 deletion src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -532,11 +532,15 @@ int UnixNativeCodeManager::IsInProlog(MethodInfo * pMethodInfo, PTR_VOID pvAddre
// MOV SP, R4
#define MOV_SP_R4 0x46A5

// MOV R9, SP
#define MOV_R9_SP 0x46E9

uint16_t* pInstr = (uint16_t*)pvAddress;
uint32_t instr = *pInstr;

if ((instr & SUB_SP_IMM_MASK) == SUB_SP_IMM_BITS ||
(instr & PUSH_MASK) == PUSH_BITS)
(instr & PUSH_MASK) == PUSH_BITS ||
instr == MOV_R9_SP)
{
return 1;
}
Expand Down
19 changes: 9 additions & 10 deletions src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#define TSF_DoNotTriggerGc 0x10

#define PTFF_SAVE_ALL_PRESERVED 0x0000007F // NOTE: R11 is not included in this set!
#define PTFF_SAVE_R9 0x00000020
#define PTFF_SAVE_SP 0x00000100
#define PTFF_SAVE_R0 0x00000200
#define PTFF_THREAD_ABORT 0x00100000
Expand Down Expand Up @@ -244,30 +245,28 @@ C_FUNC(\Name):
//
.macro PUSH_COOP_PINVOKE_FRAME trashReg

PROLOG_STACK_ALLOC 8 // Save space for caller's SP and 8-byte alignment padding
PROLOG_STACK_ALLOC 4 // Save space for caller's SP
PROLOG_PUSH "{r4-r10}" // Save preserved registers
PROLOG_STACK_ALLOC 8 // Save space for flags and Thread*
PROLOG_PUSH "{r11}" // Save caller's FP
PROLOG_PUSH "{r11,lr}" // Save caller's frame-chain pointer and PC
PROLOG_PUSH "{r11,lr}" // Save caller's frame pointer and PC

// Compute SP value at entry to this method and save it in the last slot of the frame (slot #12).
add \trashReg, sp, #(14 * 4)
str \trashReg, [sp, #(12 * 4)]
add \trashReg, sp, #(12 * 4)
str \trashReg, [sp, #(11 * 4)]

// Record the bitmask of saved registers in the frame (slot #4).
// Record the bitmask of saved registers in the frame (slot #3).
mov \trashReg, #DEFAULT_FRAME_SAVE_FLAGS
str \trashReg, [sp, #(4 * 4)]
str \trashReg, [sp, #(3 * 4)]

mov \trashReg, sp
.endm

// Pop the frame and restore register state preserved by PUSH_COOP_PINVOKE_FRAME
.macro POP_COOP_PINVOKE_FRAME
EPILOG_POP "{r11,lr}" // Restore caller's frame-chain pointer and PC (return address)
EPILOG_POP "{r11}" // Restore caller's FP
EPILOG_POP "{r11,lr}" // Restore caller's frame pointer and PC (return address)
EPILOG_STACK_FREE 8 // Discard flags and Thread*
EPILOG_POP "{r4-r10}" // Restore preserved registers
EPILOG_STACK_FREE 8 // Discard caller's SP and 8-byte alignment padding
EPILOG_STACK_FREE 4 // Discard caller's SP
.endm

// thumb with PIC version
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2006,20 +2006,22 @@ private int SizeOfPInvokeTransitionFrame
get
{
// struct PInvokeTransitionFrame:
// #ifdef _TARGET_ARM_
// m_ChainPointer
// #endif
// m_RIP
// m_FramePointer
// m_RIP (1)
// m_FramePointer (1)
// m_pThread
// m_Flags + align (no align for ARM64 that has 64 bit m_Flags)
// m_PreserverRegs - RSP
// m_PreservedRegs - RSP / R9 (2)
// No need to save other preserved regs because of the JIT ensures that there are
// no live GC references in callee saved registers around the PInvoke callsite.
//
// (1) On ARM32/ARM64 the order of m_RIP and m_FramePointer is reverse
// (2) R9 is saved for ARM32 because it needs to be preserved for methods with stackalloc
int size = 5 * this.PointerSize;

if (_compilation.TypeSystemContext.Target.Architecture == TargetArchitecture.ARM)
size += this.PointerSize; // m_ChainPointer
{
size += this.PointerSize; // R9 (REG_SAVED_LOCALLOC_SP)
}

return size;
}
Expand Down

0 comments on commit 7b3e409

Please sign in to comment.