From 7e2dd594adb25f176a6529c40fea155570f7fba7 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Thu, 25 Jan 2024 17:32:30 +0100 Subject: [PATCH] [NativeAOT] Linux/ARM bring-up (4/n) (#97269) * Fix recursive generics for ARM * Fix compilation on Debian Bookworm * Implement thread return address hijacking for ARM * Implement TrailingEpilogueInstructionsCount for ARM * Fix comment * Fix bugs in RhpGcProbeHijack logic * Fix register trashing by INLINE_GETTHREAD in FixupHijackedCallstack on ARM * Mask the Thumb bit when loading IP from probe frame * Disable DwarfDump on linux-arm * Cleanup * Emit DWARF info with instruction addresses without Thumb bit (matches clang) * Report R2/R3 registers in ForEachPossibleObjectRef * Ensure that PInvokeTransitionFrame(s) on the stack are 8-byte aligned. Save FP return values on hijack. * Tame the Thumb bit * Fix GC hole when thread hijack happens with r0 register holding a reference (eg. boxed int) * Update src/coreclr/nativeaot/Runtime/arm/GcProbe.S --------- Co-authored-by: Vladimir Sadov --- .../nativeaot/Runtime/RuntimeInstance.cpp | 7 +- .../nativeaot/Runtime/StackFrameIterator.cpp | 26 +- src/coreclr/nativeaot/Runtime/arm/GcProbe.S | 111 +++++++- .../nativeaot/Runtime/unix/UnixContext.cpp | 4 + .../nativeaot/Runtime/unix/UnixContext.h | 4 + .../Runtime/unix/UnixNativeCodeManager.cpp | 240 +++++++++++++++++- .../nativeaot/Runtime/unix/unixasmmacros.inc | 3 + .../Runtime/unix/unixasmmacrosamd64.inc | 3 - .../Runtime/unix/unixasmmacrosarm.inc | 8 +- .../Runtime/unix/unixasmmacrosarm64.inc | 3 - .../Target_ARM/ARMEmitter.cs | 9 + .../ARMReadyToRunGenericHelperNode.cs | 12 + .../ARM64ReadyToRunGenericHelperNode.cs | 2 +- .../X64ReadyToRunGenericHelperNode.cs | 2 +- .../Compiler/ObjectWriter/UnixObjectWriter.cs | 11 +- .../llvm-libunwind/src/AddressSpace.hpp | 6 +- .../SmokeTests/DwarfDump/DwarfDump.csproj | 2 + 17 files changed, 417 insertions(+), 36 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp b/src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp index c3ebf0a78077f..b02e64580a517 100644 --- a/src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp +++ b/src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp @@ -65,7 +65,12 @@ COOP_PINVOKE_HELPER(uint8_t *, RhGetRuntimeVersion, (int32_t* pcbLength)) COOP_PINVOKE_HELPER(uint8_t *, RhFindMethodStartAddress, (void * codeAddr)) { - return dac_cast(GetRuntimeInstance()->FindMethodStartAddress(dac_cast(codeAddr))); + uint8_t *startAddress = dac_cast(GetRuntimeInstance()->FindMethodStartAddress(dac_cast(codeAddr))); +#if TARGET_ARM + return startAddress + 1; // Set the Thumb bit +#else + return startAddress; +#endif } PTR_UInt8 RuntimeInstance::FindMethodStartAddress(PTR_VOID ControlPC) diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index 1943aa2bcb6d7..06cbcd9901f9a 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -21,6 +21,7 @@ #include "threadstore.inl" #include "thread.inl" #include "stressLog.h" +#include "CommonMacros.inl" #include "shash.h" #include "RuntimeInstance.h" @@ -75,7 +76,7 @@ GVAL_IMPL_INIT(PTR_VOID, g_RhpRethrow2Addr, PointerToRhpRethrow2); #ifdef DACCESS_COMPILE #define EQUALS_RETURN_ADDRESS(x, func_name) ((x) == g_ ## func_name ## Addr) #else -#define EQUALS_RETURN_ADDRESS(x, func_name) (((x)) == (PointerTo ## func_name)) +#define EQUALS_RETURN_ADDRESS(x, func_name) (((x)) == (PTR_VOID)PCODEToPINSTR((PCODE)PointerTo ## func_name)) #endif #ifdef DACCESS_COMPILE @@ -178,7 +179,7 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PInvokeTransitionF #if !defined(USE_PORTABLE_HELPERS) // @TODO: no portable version of regdisplay memset(&m_RegDisplay, 0, sizeof(m_RegDisplay)); - m_RegDisplay.SetIP((PCODE)pFrame->m_RIP); + m_RegDisplay.SetIP((PCODE)PCODEToPINSTR((PCODE)pFrame->m_RIP)); SetControlPC(dac_cast(m_RegDisplay.GetIP())); PTR_UIntNative pPreservedRegsCursor = (PTR_UIntNative)PTR_HOST_MEMBER(PInvokeTransitionFrame, pFrame, m_PreservedRegs); @@ -384,9 +385,9 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PTR_PAL_LIMITED_CO // // control state // - SetControlPC(dac_cast(pCtx->GetIp())); m_RegDisplay.SP = pCtx->GetSp(); - m_RegDisplay.IP = pCtx->GetIp(); + m_RegDisplay.IP = PCODEToPINSTR(pCtx->GetIp()); + SetControlPC(dac_cast(m_RegDisplay.GetIP())); #ifdef TARGET_ARM // @@ -609,6 +610,8 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pC m_RegDisplay.pR0 = (PTR_UIntNative)PTR_TO_REG(pCtx, R0); m_RegDisplay.pR1 = (PTR_UIntNative)PTR_TO_REG(pCtx, R1); + m_RegDisplay.pR2 = (PTR_UIntNative)PTR_TO_REG(pCtx, R2); + m_RegDisplay.pR3 = (PTR_UIntNative)PTR_TO_REG(pCtx, R3); m_RegDisplay.pR4 = (PTR_UIntNative)PTR_TO_REG(pCtx, R4); m_RegDisplay.pR5 = (PTR_UIntNative)PTR_TO_REG(pCtx, R5); m_RegDisplay.pR6 = (PTR_UIntNative)PTR_TO_REG(pCtx, R6); @@ -991,7 +994,7 @@ void StackFrameIterator::UnwindFuncletInvokeThunk() #endif #if !defined(TARGET_ARM64) - m_RegDisplay.SetIP(*SP++); + m_RegDisplay.SetIP(PCODEToPINSTR(*SP++)); #endif m_RegDisplay.SetSP((uintptr_t)dac_cast(SP)); @@ -1174,7 +1177,7 @@ void StackFrameIterator::UnwindUniversalTransitionThunk() stackFrame->UnwindNonVolatileRegisters(&m_RegDisplay); PTR_UIntNative addressOfPushedCallerIP = stackFrame->get_AddressOfPushedCallerIP(); - m_RegDisplay.SetIP(*addressOfPushedCallerIP); + m_RegDisplay.SetIP(PCODEToPINSTR(*addressOfPushedCallerIP)); m_RegDisplay.SetSP((uintptr_t)dac_cast(stackFrame->get_CallerSP())); SetControlPC(dac_cast(m_RegDisplay.GetIP())); @@ -1265,9 +1268,9 @@ void StackFrameIterator::UnwindThrowSiteThunk() ASSERT_UNCONDITIONALLY("NYI for this arch"); #endif - m_RegDisplay.SetIP(pContext->IP); + m_RegDisplay.SetIP(PCODEToPINSTR(pContext->IP)); m_RegDisplay.SetSP(pContext->GetSp()); - SetControlPC(dac_cast(pContext->IP)); + SetControlPC(dac_cast(m_RegDisplay.GetIP())); // We expect the throw site to be in managed code, and since this function's notion of how to unwind // through the stub is brittle relative to the stub itself, we want to check as soon as we can. @@ -1357,7 +1360,7 @@ void StackFrameIterator::NextInternal() // if the thread is safe to walk, it better not have a hijack in place. ASSERT(!m_pThread->IsHijacked()); - SetControlPC(dac_cast(m_RegDisplay.GetIP())); + SetControlPC(dac_cast(PCODEToPINSTR(m_RegDisplay.GetIP()))); PTR_VOID collapsingTargetFrame = NULL; @@ -1717,6 +1720,11 @@ bool StackFrameIterator::GetHijackedReturnValueLocation(PTR_OBJECTREF * pLocatio void StackFrameIterator::SetControlPC(PTR_VOID controlPC) { +#if TARGET_ARM + // Ensure that PC doesn't have the Thumb bit set. This needs to be + // consistent for EQUALS_RETURN_ADDRESS to work. + ASSERT(((uintptr_t)controlPC & 1) == 0); +#endif m_OriginalControlPC = m_ControlPC = controlPC; } diff --git a/src/coreclr/nativeaot/Runtime/arm/GcProbe.S b/src/coreclr/nativeaot/Runtime/arm/GcProbe.S index 52fee44368288..c202591d7fcd1 100644 --- a/src/coreclr/nativeaot/Runtime/arm/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/arm/GcProbe.S @@ -8,6 +8,105 @@ #include "AsmOffsets.inc" .global RhpGcPoll2 +.global RhpThrowHwEx + +// See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves return registers +// and accepts the register bitmask +// Call this macro first in the method (no further prolog instructions can be added after this). +// +// threadReg : register containing the Thread* (this will be preserved). +// trashReg : register that can be trashed by this macro +// BITMASK : value to initialize m_dwFlags field with (register or #constant) +.macro PUSH_PROBE_FRAME threadReg, trashReg, BITMASK + // Define the method prolog, allocating enough stack space for the PInvokeTransitionFrame and saving + // incoming register values into it. + PROLOG_VPUSH "{d0-d3}" // Save d0-d3 which can have the floating point return value + PROLOG_STACK_ALLOC 4 // Padding for 8-byte alignment + PROLOG_PUSH "{r0,r1}" // Save return registers + PROLOG_STACK_ALLOC 4 // Space for caller's SP + PROLOG_PUSH "{r4-r10}" // Save non-volatile registers + PROLOG_STACK_ALLOC 8 // Space for flags and Thread* + PROLOG_PUSH "{r11}" // Save caller's frame pointer + PROLOG_PUSH "{r11,lr}" // Save frame-chain pointer and return address + + str \threadReg, [sp, #OFFSETOF__PInvokeTransitionFrame__m_pThread] + mov \trashReg, \BITMASK + str \trashReg, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags] + + // Compute SP value at entry to this method and save it in slot of the frame. + add \trashReg, sp, #(16 * 4 + 4 * 8) + str \trashReg, [sp, #(12 * 4)] + + // Link the frame into the Thread + str sp, [\threadReg, #OFFSETOF__Thread__m_pDeferredTransitionFrame] +.endm + +// +// Remove the frame from a previous call to PUSH_PROBE_FRAME from the top of the stack and restore preserved +// registers and return value to their values from before the probe was called (while also updating any +// object refs or byrefs). +// +.macro POP_PROBE_FRAME + EPILOG_POP "{r11,lr}" // Restore frame-chain pointer and return address + EPILOG_POP "{r11}" // Restore caller's frame pointer + EPILOG_STACK_FREE 8 // Discard flags and Thread* + EPILOG_POP "{r4-r10}" // Restore non-volatile registers + EPILOG_STACK_FREE 4 // Discard caller's SP + EPILOG_POP "{r0,r1}" // Restore return registers + EPILOG_STACK_FREE 4 // Discard padding for 8-byte alignment + EPILOG_VPOP "{d0-d3}" // Restore d0-d3 which can have the floating point return value +.endm + +// +// The prolog for all GC suspension hijacks (normal and stress). Fixes up the hijacked return address, and +// clears the hijack state. +// +// Register state on entry: +// All registers correct for return to the original return address. +// +// Register state on exit: +// r2: thread pointer +// r3: trashed +// +.macro FixupHijackedCallstack + push {r0, r1} + + // r0 <- GetThread() + INLINE_GETTHREAD + + mov r2, r0 + pop {r0, r1} + + // Fix the stack by restoring the original return address + ldr lr, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + ldr r12, [r2, #OFFSETOF__Thread__m_uHijackedReturnValueFlags] + + // Clear hijack state + mov r3, #0 + str r3, [r2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + str r3, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + str r3, [r2, #OFFSETOF__Thread__m_uHijackedReturnValueFlags] +.endm + +NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler + PUSH_PROBE_FRAME r2, r3, r12 + + ldr r0, [r2, #OFFSETOF__Thread__m_pDeferredTransitionFrame] + bl RhpWaitForGC2 + + ldr r2, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags] + tst r2, #PTFF_THREAD_ABORT + bne LOCAL_LABEL(ThrowThreadAbort) + + POP_PROBE_FRAME + bx lr + +LOCAL_LABEL(ThrowThreadAbort): + POP_PROBE_FRAME + mov r0, #STATUS_REDHAWK_THREAD_ABORT + mov r1, lr // return address as exception PC + b C_FUNC(RhpThrowHwEx) +NESTED_END RhpWaitForGC LEAF_ENTRY RhpGcPoll PREPARE_EXTERNAL_VAR_INDIRECT RhpTrapThreads, r0 @@ -24,8 +123,16 @@ NESTED_ENTRY RhpGcPollRare, _TEXT, NoHandler NESTED_END RhpGcPollRare NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler - // Not implemented - EMIT_BREAKPOINT + FixupHijackedCallstack + + PREPARE_EXTERNAL_VAR_INDIRECT RhpTrapThreads, r3 + tst r3, #TrapThreadsFlags_TrapThreads + bne LOCAL_LABEL(WaitForGC) + bx lr +LOCAL_LABEL(WaitForGC): + mov r3, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0) + orr r12, r3 + b RhpWaitForGC NESTED_END RhpGcProbeHijack #ifdef FEATURE_GC_STRESS diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp index 515b6002cf059..4087e2ae6ac4f 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp @@ -256,6 +256,8 @@ #define MCREG_Lr(mc) ((mc).arm_lr) #define MCREG_R0(mc) ((mc).arm_r0) #define MCREG_R1(mc) ((mc).arm_r1) +#define MCREG_R2(mc) ((mc).arm_r2) +#define MCREG_R3(mc) ((mc).arm_r3) #define MCREG_R4(mc) ((mc).arm_r4) #define MCREG_R5(mc) ((mc).arm_r5) #define MCREG_R6(mc) ((mc).arm_r6) @@ -514,6 +516,8 @@ uint64_t GetPC(void* context) uint64_t& UNIX_CONTEXT::Lr(){ return (uint64_t&)MCREG_Lr(ctx.uc_mcontext); } uint64_t& UNIX_CONTEXT::R0(){ return (uint64_t&)MCREG_R0(ctx.uc_mcontext); } uint64_t& UNIX_CONTEXT::R1(){ return (uint64_t&)MCREG_R1(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R2(){ return (uint64_t&)MCREG_R2(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R3(){ return (uint64_t&)MCREG_R3(ctx.uc_mcontext); } uint64_t& UNIX_CONTEXT::R4(){ return (uint64_t&)MCREG_R4(ctx.uc_mcontext); } uint64_t& UNIX_CONTEXT::R5(){ return (uint64_t&)MCREG_R5(ctx.uc_mcontext); } uint64_t& UNIX_CONTEXT::R6(){ return (uint64_t&)MCREG_R6(ctx.uc_mcontext); } diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixContext.h b/src/coreclr/nativeaot/Runtime/unix/UnixContext.h index bfa99e04c60a1..471f395bef1f5 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixContext.h +++ b/src/coreclr/nativeaot/Runtime/unix/UnixContext.h @@ -126,6 +126,8 @@ struct UNIX_CONTEXT uint64_t& Lr(); uint64_t& R0(); uint64_t& R1(); + uint64_t& R2(); + uint64_t& R3(); uint64_t& R4(); uint64_t& R5(); uint64_t& R6(); @@ -143,6 +145,8 @@ struct UNIX_CONTEXT { lambda((size_t*)&R0()); lambda((size_t*)&R1()); + lambda((size_t*)&R2()); + lambda((size_t*)&R3()); lambda((size_t*)&R4()); lambda((size_t*)&R5()); lambda((size_t*)&R6()); diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp index 2a75e83bbfdfb..ee0097b7c0dd6 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp @@ -87,12 +87,12 @@ bool UnixNativeCodeManager::FindMethodInfo(PTR_VOID ControlPC, unw_proc_info_t procInfo; - if (!UnwindHelpers::GetUnwindProcInfo(PINSTRToPCODE((TADDR)ControlPC), m_UnwindInfoSections, &procInfo)) + if (!UnwindHelpers::GetUnwindProcInfo((TADDR)ControlPC, m_UnwindInfoSections, &procInfo)) { return false; } - assert((procInfo.start_ip <= PINSTRToPCODE((TADDR)ControlPC)) && (PINSTRToPCODE((TADDR)ControlPC) < procInfo.end_ip)); + assert((procInfo.start_ip <= (TADDR)ControlPC) && ((TADDR)ControlPC < procInfo.end_ip)); pMethodInfo->start_ip = procInfo.start_ip; pMethodInfo->format = procInfo.format; @@ -205,6 +205,13 @@ void UnixNativeCodeManager::EnumGcRefs(MethodInfo * pMethodInfo, PTR_UInt8 gcInfo; uint32_t codeOffset = GetCodeOffset(pMethodInfo, safePointAddress, &gcInfo); +#ifdef TARGET_ARM + // Ensure that code offset doesn't have the Thumb bit set. We need + // it to be aligned to instruction start to make the !isActiveStackFrame + // branch below work. + ASSERT(((uintptr_t)codeOffset & 1) == 0); +#endif + if (!isActiveStackFrame) { // If we are not in the active method, we are currently pointing @@ -367,13 +374,17 @@ bool UnixNativeCodeManager::IsUnwindable(PTR_VOID pvAddress) { MethodInfo * pMethodInfo = NULL; +#if defined(TARGET_ARM) + ASSERT(((uintptr_t)pvAddress & 1) == 0); +#endif + #if defined(TARGET_ARM64) MethodInfo methodInfo; FindMethodInfo(pvAddress, &methodInfo); pMethodInfo = &methodInfo; #endif -#if defined(TARGET_APPLE) && defined(TARGET_ARM64) +#if (defined(TARGET_APPLE) && defined(TARGET_ARM64)) || defined(TARGET_ARM) // VirtualUnwind can't unwind epilogues and some prologues. return TrailingEpilogueInstructionsCount(pMethodInfo, pvAddress) == 0 && IsInProlog(pMethodInfo, pvAddress) != 1; #else @@ -449,6 +460,148 @@ int UnixNativeCodeManager::IsInProlog(MethodInfo * pMethodInfo, PTR_VOID pvAddre return savedFpLr && establishedFp ? 0 : 1; +#elif defined(TARGET_ARM) + +// SUB SP, SP, # +// 1011 0000 1xxx xxxx +#define SUB_SP_IMM_BITS 0xB080 +#define SUB_SP_IMM_MASK 0xFF80 + +// SUB{S}.W SP, SP, # +// 1111 0x01 101x 1101 0xxx 1101 xxxx xxxx +#define SUB_W_SP_IMM_BITS 0xF1AD0D00 +#define SUB_W_SP_IMM_MASK 0xFBEF8F00 + +// SUBW SP, SP, # +// 1111 0x10 1010 1101 0xxx 1101 xxxx xxxx +#define SUBW_SP_IMM_BITS 0xF2AD0D00 +#define SUBW_SP_IMM_MASK 0xFBFF8F00 + +// SUB SP, +// 0100 0100 1xxx x101 +#define SUB_SP_REG_BITS 0x4485 +#define SUB_SP_REG_MASK 0xFF87 + +// SUB{S}.W SP, SP, {, } +// 1110 1011 101x 1101 0xxx 1101 xxxx xxxx +#define SUB_W_SP_REG_BITS 0xEBAD0D00 +#define SUB_W_SP_REG_MASK 0xFFEF8F00 + +// PUSH +// 1011 010x xxxx xxxx +#define PUSH_BITS 0xB400 +#define PUSH_MASK 0xFE00 + +// PUSH.W +// 1110 1001 0010 1101 0x0x xxxx xxxx xxxx +#define PUSH_W_BITS_T2 0xE92D0000 +#define PUSH_W_MASK_T2 0xFFFFA000 + +// PUSH.W +// 1111 1000 0100 1101 xxxx 1101 0000 0100 +#define PUSH_W_BITS_T3 0xF84D0D04 +#define PUSH_W_MASK_T3 0xFFFF0FFF + +// VPUSH +// 1110 1101 0x10 1101 xxxx 1011 xxxx xxxx +#define VPUSH_BITS_T1 0xED2D0B00 +#define VPUSH_MASK_T1 0xFFBF0F00 + +// VPUSH +// 1110 1101 0x10 1101 xxxx 1010 xxxx xxxx +#define VPUSH_BITS_T2 0xED2D0A00 +#define VPUSH_MASK_T2 0xFFBF0F00 + +// POP +// 1011 110x xxxx xxxx +#define POP_BITS 0xBC00 +#define POP_MASK 0xFE00 + +// POP.W +// 1110 1000 1011 1101 +#define POP_W_T2 0xE8BD + +// POP.W +// 1111 1000 0101 1101 +#define POP_W_T3 0xF85D + +// BX LR +#define BX_LR_BITS 0x4770 +#define BX_LR_MASK 0xFFFF + +// MOV SP, R4 +#define MOV_SP_R4 0x46A5 + + uint16_t* pInstr = (uint16_t*)pvAddress; + uint32_t instr = *pInstr; + + if ((instr & SUB_SP_IMM_MASK) == SUB_SP_IMM_BITS || + (instr & PUSH_MASK) == PUSH_BITS) + { + return 1; + } + + instr <<= 16; + instr |= *(pInstr + 1); + + if ((instr & SUB_W_SP_IMM_MASK) == SUB_W_SP_IMM_BITS || + (instr & SUBW_SP_IMM_MASK) == SUBW_SP_IMM_BITS || + (instr & SUB_W_SP_REG_MASK) == SUB_W_SP_REG_BITS || + (instr & PUSH_W_MASK_T2) == PUSH_W_BITS_T2 || + (instr & PUSH_W_MASK_T3) == PUSH_W_BITS_T3 || + (instr & VPUSH_MASK_T1) == VPUSH_BITS_T1 || + (instr & VPUSH_MASK_T2) == VPUSH_BITS_T2) + { + return 1; + } + + // The localloc pattern generated by JIT looks like: + // + // movw r4, #frameSize + // sub r4, sp, r4 + // bl CORINFO_HELP_STACK_PROBE + // mov sp, r4 + // + // or + // + // movw r4, #frameSizeLo16 + // movt r4, #frameSizeHi16 + // sub r4, sp, r4 + // bl CORINFO_HELP_STACK_PROBE + // mov sp, r4 + // + // We can look ahead by couple of instructions and look for "mov sp, rXX". + for (int c = 5; c >= 0; --c) + { + instr = *pInstr; + if (instr == MOV_SP_R4) + { + return 1; + } + + // Bail out on any instruction that's clearly an epilog and can be + // end of the method. + if ((instr & POP_MASK) == POP_BITS || + (instr & BX_LR_MASK) == BX_LR_BITS || + instr == POP_W_T2 || instr == POP_W_T3) + { + return 0; + } + + // Skip over to next instruction + if ((instr & 0xE000) == 0xE000 && (instr & 0xF800) != 0xE000) + { + // 32-but Thumb instruction + pInstr += 2; + } + else + { + pInstr++; + } + } + + return 0; + #else return -1; @@ -733,6 +886,77 @@ int UnixNativeCodeManager::TrailingEpilogueInstructionsCount(MethodInfo * pMetho } } +#elif defined(TARGET_ARM) + +// ADD SP, SP, # +// 1011 0000 0xxx xxxx +#define ADD_SP_IMM_BITS 0xB000 +#define ADD_SP_IMM_MASK 0xFF80 + +// ADD{S}.W SP, SP, # +// 1111 0x01 000x 1101 0xxx 1101 xxxx xxxx +#define ADD_W_SP_IMM_BITS 0xF10D0D00 +#define ADD_W_SP_IMM_MASK 0xFBEF8F00 + +// ADDW SP, SP, # +// 1111 0x10 0000 1101 0xxx 1101 xxxx xxxx +#define ADDW_SP_IMM_BITS 0xF20D0D00 +#define ADDW_SP_IMM_MASK 0xFBFF8F00 + +// ADD SP, +// 0100 0100 1xxx x101 +#define ADD_SP_REG_BITS 0x4485 +#define ADD_SP_REG_MASK 0xFF87 + +// ADD{S}.W SP, SP, {, } +// 1110 1011 000x 1101 0xxx 1101 xxxx xxxx +#define ADD_W_SP_REG_BITS 0xEB0D0D00 +#define ADD_W_SP_REG_MASK 0xFFEF8F00 + +// POP.W +// 1110 1000 1011 1101 xx0x xxxx xxxx xxxx +#define POP_W_BITS_T2 0xE8BD0000 +#define POP_W_MASK_T2 0xFFFF2000 + +// POP.W +// 1111 1000 0101 1101 xxxx 1011 0000 0100 +#define POP_W_BITS_T3 0xF85D0B04 +#define POP_W_MASK_T3 0xFFFF0FFF + +// VPOP +// 1110 1100 1x11 1101 xxxx 1011 xxxx xxxx +#define VPOP_BITS_T1 0xECBD0B00 +#define VPOP_MASK_T1 0xFFBF0F00 + +// VPOP +// 1110 1100 1x11 1101 xxxx 1010 xxxx xxxx +#define VPOP_BITS_T2 0xECBD0A00 +#define VPOP_MASK_T2 0xFFBF0F00 + + uint32_t instr = *(uint16_t*)pvAddress; + + if ((instr & ADD_SP_IMM_MASK) == ADD_SP_IMM_BITS || + (instr & ADD_SP_REG_MASK) == ADD_SP_REG_BITS || + (instr & POP_MASK) == POP_BITS || + (instr & BX_LR_MASK) == BX_LR_BITS) + { + return -1; + } + + instr <<= 16; + instr |= *((uint16_t*)pvAddress + 1); + + if ((instr & ADD_W_SP_IMM_MASK) == ADD_W_SP_IMM_BITS || + (instr & ADDW_SP_IMM_MASK) == ADDW_SP_IMM_BITS || + (instr & ADD_W_SP_REG_MASK) == ADD_W_SP_REG_BITS || + (instr & POP_W_MASK_T2) == POP_W_BITS_T2 || + (instr & POP_W_MASK_T3) == POP_W_BITS_T3 || + (instr & VPOP_MASK_T1) == VPOP_BITS_T1 || + (instr & VPOP_MASK_T2) == VPOP_BITS_T2) + { + return -1; + } + #endif return 0; @@ -782,6 +1006,12 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn GcInfoDecoder decoder(GCInfoToken(p), flags); *pRetValueKind = GetGcRefKind(decoder.GetReturnKind()); +#if defined(TARGET_ARM) + // Ensure that PC doesn't have the Thumb bit set. Prolog and epilog + // checks depend on it. + ASSERT(((uintptr_t)pRegisterSet->IP & 1) == 0); +#endif + int epilogueInstructions = TrailingEpilogueInstructionsCount(pMethodInfo, (PTR_VOID)pRegisterSet->IP); if (epilogueInstructions < 0) { @@ -794,7 +1024,7 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn return true; } -#if defined(TARGET_APPLE) && defined(TARGET_ARM64) +#if (defined(TARGET_APPLE) && defined(TARGET_ARM64)) || defined(TARGET_ARM) // If we are inside a prolog without a saved frame then we cannot safely unwind. // // Some known frame layouts use compact unwind encoding which cannot handle unwinding @@ -822,7 +1052,7 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn *ppvRetAddrLocation = (PTR_PTR_VOID)(pRegisterSet->GetSP() - sizeof(TADDR)); return true; -#elif defined(TARGET_ARM64) +#elif defined(TARGET_ARM64) || defined(TARGET_ARM) if (decoder.HasTailCalls()) { diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc index bde1d517b7e82..80f633327c830 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc @@ -3,6 +3,9 @@ #define INVALIDGCVALUE 0xCCCCCCCD +// This must match HwExceptionCode.STATUS_REDHAWK_THREAD_ABORT +#define STATUS_REDHAWK_THREAD_ABORT 0x43 + // Enforce subsections via symbols to workaround bugs in Xcode 15 linker. #if defined(__APPLE__) .subsections_via_symbols diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc index 11225c0ad5ed2..05786d0fe470e 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc @@ -252,9 +252,6 @@ C_FUNC(\Name): #define TSF_SuppressGcStress 0x08 #define TSF_DoNotTriggerGc 0x10 -// This must match HwExceptionCode.STATUS_REDHAWK_THREAD_ABORT -#define STATUS_REDHAWK_THREAD_ABORT 0x43 - // // Rename fields of nested structs // diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc index 7961fdb1dbcf4..eaf96c70609ee 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc @@ -17,6 +17,8 @@ #define PTFF_SAVE_ALL_PRESERVED 0x0000007F // NOTE: R11 is not included in this set! #define PTFF_SAVE_SP 0x00000100 #define PTFF_SAVE_R0 0x00000200 +#define PTFF_THREAD_ABORT 0x00100000 + #define DEFAULT_FRAME_SAVE_FLAGS (PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP) // These must match the TrapThreadsFlags enum @@ -237,14 +239,14 @@ C_FUNC(\Name): // .macro PUSH_COOP_PINVOKE_FRAME trashReg - PROLOG_STACK_ALLOC 4 // Save space for caller's SP + PROLOG_STACK_ALLOC 8 // Save space for caller's SP and 8-byte alignment padding PROLOG_PUSH "{r4-r10}" // Save preserved registers PROLOG_STACK_ALLOC 8 // Save space for flags and Thread* PROLOG_PUSH "{r11}" // Save caller's FP PROLOG_PUSH "{r11,lr}" // Save caller's frame-chain pointer and PC // Compute SP value at entry to this method and save it in the last slot of the frame (slot #12). - add \trashReg, sp, #(13 * 4) + add \trashReg, sp, #(14 * 4) str \trashReg, [sp, #(12 * 4)] // Record the bitmask of saved registers in the frame (slot #4). @@ -260,7 +262,7 @@ C_FUNC(\Name): EPILOG_POP "{r11}" // Restore caller's FP EPILOG_STACK_FREE 8 // Discard flags and Thread* EPILOG_POP "{r4-r10}" // Restore preserved registers - EPILOG_STACK_FREE 4 // Discard caller's SP + EPILOG_STACK_FREE 8 // Discard caller's SP and 8-byte alignment padding .endm // thumb with PIC version diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm64.inc index 5bd5db64e983a..8361b06fe6975 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm64.inc @@ -372,9 +372,6 @@ PTFF_THREAD_ABORT_BIT = 36 TrapThreadsFlags_AbortInProgress_Bit = 0 TrapThreadsFlags_TrapThreads_Bit = 1 -// This must match HwExceptionCode.STATUS_REDHAWK_THREAD_ABORT -#define STATUS_REDHAWK_THREAD_ABORT 0x43 - // These must match the TrapThreadsFlags enum #define TrapThreadsFlags_None 0 #define TrapThreadsFlags_AbortInProgress 1 diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_ARM/ARMEmitter.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_ARM/ARMEmitter.cs index 822a351610b9b..8562927f83878 100644 --- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_ARM/ARMEmitter.cs +++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_ARM/ARMEmitter.cs @@ -216,5 +216,14 @@ public void EmitRETIfEqual() EmitBNE(4); EmitRET(); } + + // beq label(+4): ret(2) + next(2) + // bx lr + // label: ... + public void EmitRETIfNotEqual() + { + EmitBEQ(4); + EmitRET(); + } } } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM/ARMReadyToRunGenericHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM/ARMReadyToRunGenericHelperNode.cs index ab985e89a9b90..a1950d4937350 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM/ARMReadyToRunGenericHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM/ARMReadyToRunGenericHelperNode.cs @@ -39,6 +39,18 @@ protected void EmitDictionaryLookup(NodeFactory factory, ref ARMEmitter encoder, // Load the generic dictionary cell encoder.EmitLDR(result, context, dictionarySlot * factory.Target.PointerSize); + + // If there's any invalid entries, we need to test for them + // + // Skip this in relocsOnly to make it easier to weed out bugs - the _hasInvalidEntries + // flag can change over the course of compilation and the bad slot helper dependency + // should be reported by someone else - the system should not rely on it coming from here. + if (!relocsOnly && _hasInvalidEntries) + { + encoder.EmitCMP(result, 0); + encoder.EmitRETIfNotEqual(); + encoder.EmitJMP(GetBadSlotHelper(factory)); + } } protected sealed override void EmitCode(NodeFactory factory, ref ARMEmitter encoder, bool relocsOnly) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs index 01f55b542daf1..767475ca336cc 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs @@ -42,7 +42,7 @@ protected void EmitDictionaryLookup(NodeFactory factory, ref ARM64Emitter encode // If there's any invalid entries, we need to test for them // - // Only do this in relocsOnly to make it easier to weed out bugs - the _hasInvalidEntries + // Skip this in relocsOnly to make it easier to weed out bugs - the _hasInvalidEntries // flag can change over the course of compilation and the bad slot helper dependency // should be reported by someone else - the system should not rely on it coming from here. if (!relocsOnly && _hasInvalidEntries) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunGenericHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunGenericHelperNode.cs index 1981ed6e1c503..89e4247a44bff 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunGenericHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunGenericHelperNode.cs @@ -44,7 +44,7 @@ protected void EmitDictionaryLookup(NodeFactory factory, ref X64Emitter encoder, // If there's any invalid entries, we need to test for them // - // Only do this in relocsOnly to make it easier to weed out bugs - the _hasInvalidEntries + // Skip this in relocsOnly to make it easier to weed out bugs - the _hasInvalidEntries // flag can change over the course of compilation and the bad slot helper dependency // should be reported by someone else - the system should not rely on it coming from here. if (!relocsOnly && _hasInvalidEntries) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/UnixObjectWriter.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/UnixObjectWriter.cs index 37d274b794649..78fe7dc30dbaa 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/UnixObjectWriter.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/UnixObjectWriter.cs @@ -75,7 +75,12 @@ private protected override void EmitUnwindInfo( if (nodeWithCodeInfo.FrameInfos is FrameInfo[] frameInfos && nodeWithCodeInfo is ISymbolDefinitionNode) { - bool useFrameNames = UseFrameNames; + // On ARM we always use frame names, even for first frame to ensure that + // DWARF data uses PC references without the Thumb bit set. Using a function + // symbol would propagate the bit. Unfortunately, the R_ARM_REL32_NOI + // relocation is not widely implemented, so using that is not an option. + bool isArm = _nodeFactory.Target.Architecture == TargetArchitecture.ARM; + bool useFrameNames = isArm || UseFrameNames; SectionWriter lsdaSectionWriter; if (ShouldShareSymbol((ObjectNode)nodeWithCodeInfo)) @@ -100,7 +105,7 @@ private protected override void EmitUnwindInfo( string framSymbolName = $"_fram{i}{currentSymbolName}"; lsdaSectionWriter.EmitSymbolDefinition(lsdaSymbolName); - if (start != 0 && useFrameNames) + if (useFrameNames && (isArm || start != 0)) { sectionWriter.EmitSymbolDefinition(framSymbolName, start); } @@ -142,7 +147,7 @@ private protected override void EmitUnwindInfo( } } - string startSymbolName = useFrameNames && start != 0 ? framSymbolName : currentSymbolName; + string startSymbolName = useFrameNames && (isArm || start != 0) ? framSymbolName : currentSymbolName; ulong length = (ulong)(end - start); if (!EmitCompactUnwinding(startSymbolName, length, lsdaSymbolName, blob)) { diff --git a/src/native/external/llvm-libunwind/src/AddressSpace.hpp b/src/native/external/llvm-libunwind/src/AddressSpace.hpp index 26d289068b38c..dff3249bf9bfc 100644 --- a/src/native/external/llvm-libunwind/src/AddressSpace.hpp +++ b/src/native/external/llvm-libunwind/src/AddressSpace.hpp @@ -583,11 +583,7 @@ inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr, // `_dl_find_object`. Use _LIBUNWIND_SUPPORT_DWARF_INDEX, because libunwind // support for _dl_find_object on other unwind formats is not implemented, // yet. -#if defined(DLFO_STRUCT_HAS_EH_DBASE) & defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) - // We expect `_dl_find_object` to return PT_GNU_EH_FRAME. -#if DLFO_EH_SEGMENT_TYPE != PT_GNU_EH_FRAME -#error _dl_find_object retrieves an unexpected section type -#endif +#if defined(DLFO_STRUCT_HAS_EH_DBASE) && defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) && DLFO_EH_SEGMENT_TYPE == PT_GNU_EH_FRAME // We look-up `dl_find_object` dynamically at runtime to ensure backwards // compatibility with earlier version of glibc not yet providing it. On older // systems, we gracefully fallback to `dl_iterate_phdr`. Cache the pointer diff --git a/src/tests/nativeaot/SmokeTests/DwarfDump/DwarfDump.csproj b/src/tests/nativeaot/SmokeTests/DwarfDump/DwarfDump.csproj index 8166b640a16dd..9b234b923cd8a 100644 --- a/src/tests/nativeaot/SmokeTests/DwarfDump/DwarfDump.csproj +++ b/src/tests/nativeaot/SmokeTests/DwarfDump/DwarfDump.csproj @@ -4,6 +4,8 @@ 0 true + + true true false