Skip to content

Commit

Permalink
[NativeAOT] Adding CET support (#102680)
Browse files Browse the repository at this point in the history
* Add support for STATUS_RETURN_ADDRESS_HIJACK_ATTEMPT

* fix build with clang

* Allow hijacked returns that land in assembly thunks

* fix x86 build

* fail fast if hijack is hit on an unhijacked thread.

* comment

* assert that OS unhijacked the thread to the same target as stashed by us.

* opt into CETCOMPAT by default

* unify adjustment for thunks

* Use CETCompat as condition. Narrow to x64.

* Enable EHCONT, if CET and CFG are enabled

* tweak comments

* Reconcile shadow stack with SP changes in RhpCallCatchFunclet

* Use RhFailFast for failfast

* drop __fastcall in HijackFunc

* Apply suggestions from code review

Co-authored-by: Jan Kotas <[email protected]>

* remove fastcall from RhpHijackForGcStress

* Apply suggestions from code review

Co-authored-by: Jan Kotas <[email protected]>

---------

Co-authored-by: Jan Kotas <[email protected]>
  • Loading branch information
VSadov and jkotas authored Jul 2, 2024
1 parent c5f2d4f commit c8e4f2c
Show file tree
Hide file tree
Showing 14 changed files with 254 additions and 71 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,12 @@ The .NET Foundation licenses this file to you under the MIT license.
<LinkerArg Condition="'$(OutputType)' == 'WinExe' or '$(OutputType)' == 'Exe'" Include="/STACK:$(IlcDefaultStackSize)" />
<!-- Do not warn if someone declares UnmanagedCallersOnly with an entrypoint of 'DllGetClassObject' and similar -->
<LinkerArg Include="/IGNORE:4104" />
<!-- Opt into CETCOMPAT by default. -->
<LinkerArg Condition="'$(CETCompat)' != 'false' and '$(_targetArchitecture)' == 'x64'" Include="/CETCOMPAT" />
<!-- Allow user to opt out. -->
<LinkerArg Condition="'$(CETCompat)' == 'false' and '$(_targetArchitecture)' == 'x64'" Include="/CETCOMPAT:NO" />
<!-- Enable EHCONT if CET is not disabled and CFG is enabled. -->
<LinkerArg Condition="'$(CETCompat)' != 'false' and '$(_targetArchitecture)' == 'x64' and '$(ControlFlowGuard)' == 'Guard'" Include="/guard:ehcont"/>
</ItemGroup>

<ItemGroup Condition="!Exists('$(IlcSdkPath)debugucrt.txt')">
Expand Down
55 changes: 55 additions & 0 deletions src/coreclr/nativeaot/Runtime/EHHelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,61 @@ int32_t __stdcall RhpVectoredExceptionHandler(PEXCEPTION_POINTERS pExPtrs)
return EXCEPTION_CONTINUE_SEARCH;
}

// the following would work on ARM64 as well, but there is no way to test right now.
#ifdef TARGET_AMD64

#ifndef STATUS_RETURN_ADDRESS_HIJACK_ATTEMPT
#define STATUS_RETURN_ADDRESS_HIJACK_ATTEMPT ((uintptr_t)0x80000033L)
#endif

if (faultCode == STATUS_RETURN_ADDRESS_HIJACK_ATTEMPT)
{
Thread * pThread = ThreadStore::GetCurrentThreadIfAvailable();
if (pThread == NULL || !pThread->IsCurrentThreadInCooperativeMode())
{
// if we are not in coop mode, this cannot be our hijack
// Perhaps some other runtime is responsible.
return EXCEPTION_CONTINUE_SEARCH;
}

// Sanity check.
if (!pThread->IsHijacked())
{
_ASSERTE(!"The thread should be hijacked by us.");
RhFailFast();
}

PCONTEXT interruptedContext = pExPtrs->ContextRecord;
bool areShadowStacksEnabled = PalAreShadowStacksEnabled();
if (areShadowStacksEnabled)
{
// OS should have fixed the SP value to the same as we`ve stashed for the hijacked thread
_ASSERTE(*(size_t *)interruptedContext->GetSp() == (uintptr_t)pThread->GetHijackedReturnAddress());

// When the CET is enabled, the interruption happens on the ret instruction in the calee.
// We need to "pop" rsp to the caller, as if the ret has consumed it.
interruptedContext->SetSp(interruptedContext->GetSp() + 8);
}

// Change the IP to be at the original return site, as if we have returned to the caller.
// That IP is an interruptible safe point, so we can suspend right there.
uintptr_t origIp = interruptedContext->GetIp();
interruptedContext->SetIp((uintptr_t)pThread->GetHijackedReturnAddress());

pThread->InlineSuspend(interruptedContext);

if (areShadowStacksEnabled)
{
// Undo the "pop", so that the ret could now succeed.
interruptedContext->SetSp(interruptedContext->GetSp() - 8);
interruptedContext->SetIp(origIp);
}

ASSERT(!pThread->IsHijacked());
return EXCEPTION_CONTINUE_EXECUTION;
}
#endif // TARGET_AMD64 (support for STATUS_RETURN_ADDRESS_HIJACK_ATTEMPT)

uintptr_t faultingIP = pExPtrs->ContextRecord->GetIp();

ICodeManager * pCodeManager = GetRuntimeInstance()->GetCodeManagerForAddress((PTR_VOID)faultingIP);
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/nativeaot/Runtime/PalRedhawk.h
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,7 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT {
uintptr_t GetIp() { return Pc; }
uintptr_t GetLr() { return Lr; }
uintptr_t GetSp() { return Sp; }
void SetSp(uintptr_t sp) { Sp = sp; }

template <typename F>
void ForEachPossibleObjectRef(F lambda)
Expand Down Expand Up @@ -665,6 +666,7 @@ REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalVirtualProtect(_In_ void* pAddre
REDHAWK_PALIMPORT void PalFlushInstructionCache(_In_ void* pAddress, size_t size);
REDHAWK_PALIMPORT void REDHAWK_PALAPI PalSleep(uint32_t milliseconds);
REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalSwitchToThread();
REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalAreShadowStacksEnabled();
REDHAWK_PALIMPORT HANDLE REDHAWK_PALAPI PalCreateEventW(_In_opt_ LPSECURITY_ATTRIBUTES pEventAttributes, UInt32_BOOL manualReset, UInt32_BOOL initialState, _In_opt_z_ LPCWSTR pName);
REDHAWK_PALIMPORT uint64_t REDHAWK_PALAPI PalGetTickCount64();
REDHAWK_PALIMPORT HANDLE REDHAWK_PALAPI PalGetModuleHandleFromPointer(_In_ void* pointer);
Expand Down Expand Up @@ -692,6 +694,7 @@ REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalStartEventPipeHelperThread(_In_ Backgro
typedef void (*PalHijackCallback)(_In_ NATIVE_CONTEXT* pThreadContext, _In_opt_ void* pThreadToHijack);
REDHAWK_PALIMPORT void REDHAWK_PALAPI PalHijack(HANDLE hThread, _In_opt_ void* pThreadToHijack);
REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalRegisterHijackCallback(_In_ PalHijackCallback callback);
REDHAWK_PALIMPORT HijackFunc* REDHAWK_PALAPI PalGetHijackTarget(_In_ HijackFunc* defaultHijackTarget);
#endif

REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalAllocateThunksFromTemplate(_In_ HANDLE hTemplateModule, uint32_t templateRva, size_t templateSize, _Outptr_result_bytebuffer_(templateSize) void** newThunksOut);
Expand Down
66 changes: 41 additions & 25 deletions src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -277,28 +277,8 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PInvokeTransitionF

#endif // defined(USE_PORTABLE_HELPERS)

// This function guarantees that the final initialized context will refer to a managed
// frame. In the rare case where the PC does not refer to managed code (and refers to an
// assembly thunk instead), unwind through the thunk sequence to find the nearest managed
// frame.
// NOTE: When thunks are present, the thunk sequence may report a conservative GC reporting
// lower bound that must be applied when processing the managed frame.

ReturnAddressCategory category = CategorizeUnadjustedReturnAddress(m_ControlPC);

if (category == InManagedCode)
{
ASSERT(m_pInstance->IsManaged(m_ControlPC));
}
else if (IsNonEHThunk(category))
{
UnwindNonEHThunkSequence();
ASSERT(m_pInstance->IsManaged(m_ControlPC));
}
else
{
FAILFAST_OR_DAC_FAIL_UNCONDITIONALLY("PInvokeTransitionFrame PC points to an unexpected assembly thunk kind.");
}
// adjust for thunks, if needed
EnsureInitializedToManagedFrame();

STRESS_LOG1(LF_STACKWALK, LL_INFO10000, " %p\n", m_ControlPC);
}
Expand Down Expand Up @@ -484,7 +464,13 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PTR_PAL_LIMITED_CO
}

// Prepare to start a stack walk from the context listed in the supplied NATIVE_CONTEXT.
// The supplied context can describe a location in managed code.
// NOTE: When a return address hijack is executed, the PC in the NATIVE_CONTEXT
// matches the hijacked return address. This PC is not guaranteed to be in managed code
// since the hijacked return address may refer to a location where an assembly thunk called
// into managed code.
// NOTE: When the PC is in an assembly thunk, this function will unwind to the next managed
// frame and may publish a conservative stack range (if and only if any of the unwound
// thunks report a conservative range).
void StackFrameIterator::InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pCtx, uint32_t dwFlags)
{
ASSERT((dwFlags & MethodStateCalculated) == 0);
Expand All @@ -498,8 +484,9 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pC
// properly walk it in parallel.
ResetNextExInfoForSP(pCtx->GetSp());

// This codepath is used by the hijack stackwalk. The IP must be in managed code.
ASSERT(m_pInstance->IsManaged(dac_cast<PTR_VOID>(pCtx->GetIp())));
// This codepath is used by the hijack stackwalk. The IP must be in managed code
// or in a conservatively reported assembly thunk.
ASSERT(IsValidReturnAddress((void*)pCtx->GetIp()));

//
// control state
Expand Down Expand Up @@ -616,6 +603,35 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pC
#endif // TARGET_ARM

#undef PTR_TO_REG

// adjust for thunks, if needed
EnsureInitializedToManagedFrame();
}

void StackFrameIterator::EnsureInitializedToManagedFrame()
{
// This function guarantees that the final initialized context will refer to a managed
// frame. In the rare case where the PC does not refer to managed code (and refers to an
// assembly thunk instead), unwind through the thunk sequence to find the nearest managed
// frame.
// NOTE: When thunks are present, the thunk sequence may report a conservative GC reporting
// lower bound that must be applied when processing the managed frame.

ReturnAddressCategory category = CategorizeUnadjustedReturnAddress(m_ControlPC);

if (category == InManagedCode)
{
ASSERT(m_pInstance->IsManaged(m_ControlPC));
}
else if (IsNonEHThunk(category))
{
UnwindNonEHThunkSequence();
ASSERT(m_pInstance->IsManaged(m_ControlPC));
}
else
{
FAILFAST_OR_DAC_FAIL_UNCONDITIONALLY("Unadjusted initial PC points to an unexpected assembly thunk kind.");
}
}

PTR_VOID StackFrameIterator::HandleExCollide(PTR_ExInfo pExInfo)
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/nativeaot/Runtime/StackFrameIterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ class StackFrameIterator
void InternalInit(Thread * pThreadToWalk, PTR_PInvokeTransitionFrame pFrame, uint32_t dwFlags); // GC stackwalk
void InternalInit(Thread * pThreadToWalk, PTR_PAL_LIMITED_CONTEXT pCtx, uint32_t dwFlags); // EH and hijack stackwalk, and collided unwind
void InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pCtx, uint32_t dwFlags); // GC stackwalk of redirected thread
void EnsureInitializedToManagedFrame();

void InternalInitForEH(Thread * pThreadToWalk, PAL_LIMITED_CONTEXT * pCtx, bool instructionFault); // EH stackwalk
void InternalInitForStackTrace(); // Environment.StackTrace
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/nativeaot/Runtime/amd64/AsmOffsetsCpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ PLAT_ASM_OFFSET(0f0, PAL_LIMITED_CONTEXT, Xmm15)

PLAT_ASM_SIZEOF(130, REGDISPLAY)
PLAT_ASM_OFFSET(78, REGDISPLAY, SP)
PLAT_ASM_OFFSET(80, REGDISPLAY, IP)

PLAT_ASM_OFFSET(18, REGDISPLAY, pRbx)
PLAT_ASM_OFFSET(20, REGDISPLAY, pRbp)
Expand Down
45 changes: 40 additions & 5 deletions src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ include asmmacros.inc
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
NESTED_ENTRY RhpThrowHwEx, _TEXT

ALTERNATE_ENTRY RhpThrowHwExGEHCONT ; this needs to be an EHCONT target since we'll be context-jumping here.

.GEHCONT RhpThrowHwExGEHCONT

SIZEOF_XmmSaves equ SIZEOF__PAL_LIMITED_CONTEXT - OFFSETOF__PAL_LIMITED_CONTEXT__Xmm6
STACKSIZEOF_ExInfo equ ((SIZEOF__ExInfo + 15) AND (NOT 15))

Expand Down Expand Up @@ -486,8 +490,9 @@ endif
INLINE_THREAD_UNHIJACK rdx, rcx, r9 ;; Thread in rdx, trashes rcx and r9

mov rcx, [rsp + rsp_offsetof_arguments + 18h] ;; rcx <- current ExInfo *
mov r10, [r8 + OFFSETOF__REGDISPLAY__IP] ;; r10 <- original IP value
mov r8, [r8 + OFFSETOF__REGDISPLAY__SP] ;; r8 <- resume SP value
xor r9d, r9d ;; r9 <- 0
xor r9, r9 ;; r9 <- 0

@@: mov rcx, [rcx + OFFSETOF__ExInfo__m_pPrevExInfo] ;; rcx <- next ExInfo
cmp rcx, r9
Expand All @@ -497,6 +502,20 @@ endif

@@: mov [rdx + OFFSETOF__Thread__m_pExInfoStackHead], rcx ;; store the new head on the Thread

;; Sanity check: if we have shadow stack, it should agree with what we have in rsp
LOCAL_STACK_USE equ 118h
ifdef _DEBUG
rdsspq r9
test r9, r9
jz @f
mov r9, [r9]
cmp [rsp + LOCAL_STACK_USE], r9
je @f
int 3
@@:
xor r9, r9 ;; r9 <- 0
endif

test [RhpTrapThreads], TrapThreadsFlags_AbortInProgress
jz @f

Expand All @@ -507,12 +526,28 @@ endif
;; It was the ThreadAbortException, so rethrow it
mov rcx, STATUS_REDHAWK_THREAD_ABORT
mov rdx, rax ;; rdx <- continuation address as exception RIP
mov rsp, r8 ;; reset the SP to resume SP value
jmp RhpThrowHwEx ;; Throw the ThreadAbortException as a special kind of hardware exception
mov rax, RhpThrowHwEx ;; Throw the ThreadAbortException as a special kind of hardware exception

;; reset RSP and jump to the continuation address
;; reset RSP and jump to RAX
@@: mov rsp, r8 ;; reset the SP to resume SP value
jmp rax

;; if have shadow stack, then we need to reconcile it with the rsp change we have just made
rdsspq r9
test r9, r9
jz NoSSP

;; Find the shadow stack pointer for the frame we are going to restore to.
;; The SSP we search is pointing to the return address of the frame represented
;; by the passed in context. So we search for the instruction pointer from
;; the context and return one slot up from there.
;; (Same logic as in GetSSPForFrameOnCurrentStack)
xor r11, r11
@@: inc r11
cmp [r9 + r11 * 8 - 8], r10
jne @b

incsspq r11
NoSSP: jmp rax


NESTED_END RhpCallCatchFunclet, _TEXT
Expand Down
12 changes: 6 additions & 6 deletions src/coreclr/nativeaot/Runtime/i386/GcProbe.asm
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ RhpGcStressProbe endp

endif ;; FEATURE_GC_STRESS

FASTCALL_FUNC RhpGcProbeHijack, 0
_RhpGcProbeHijack@0 proc public
HijackFixupProlog
test [RhpTrapThreads], TrapThreadsFlags_TrapThreads
jnz WaitForGC
Expand All @@ -261,18 +261,18 @@ WaitForGC:
or ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX
jmp RhpWaitForGC

FASTCALL_ENDFUNC
_RhpGcProbeHijack@0 endp

ifdef FEATURE_GC_STRESS
FASTCALL_FUNC RhpGcStressHijack, 0
_RhpGcStressHijack@0 proc public

HijackFixupProlog
or ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX
jmp RhpGcStressProbe

FASTCALL_ENDFUNC
_RhpGcStressHijack@0 endp

FASTCALL_FUNC RhpHijackForGcStress, 0
_RhpHijackForGcStress@0 proc public
push ebp
mov ebp, esp

Expand Down Expand Up @@ -307,7 +307,7 @@ FASTCALL_FUNC RhpHijackForGcStress, 0
pop edx
pop ebp
ret
FASTCALL_ENDFUNC
_RhpHijackForGcStress@0 endp
endif ;; FEATURE_GC_STRESS

end
4 changes: 4 additions & 0 deletions src/coreclr/nativeaot/Runtime/inc/CommonTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,4 +61,8 @@ typedef struct _GUID {
} GUID;
#endif // FEATURE_EVENT_TRACE && !_INC_WINDOWS

// Hijack funcs are not called, they are "returned to". And when done, they return to the actual caller.
// Thus they cannot have any parameters or return anything.
typedef void HijackFunc();

#endif // __COMMON_TYPES_H__
10 changes: 6 additions & 4 deletions src/coreclr/nativeaot/Runtime/thread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -572,13 +572,13 @@ void Thread::GcScanRootsWorker(ScanFunc * pfnEnumCallback, ScanContext * pvCallb

#ifdef FEATURE_HIJACK

EXTERN_C void FASTCALL RhpGcProbeHijack();
EXTERN_C void FASTCALL RhpGcStressHijack();
EXTERN_C void RhpGcProbeHijack();
EXTERN_C void RhpGcStressHijack();

// static
bool Thread::IsHijackTarget(void* address)
{
if (&RhpGcProbeHijack == address)
if (PalGetHijackTarget(/*defaultHijackTarget*/&RhpGcProbeHijack) == address)
return true;
#ifdef FEATURE_GC_STRESS
if (&RhpGcStressHijack == address)
Expand Down Expand Up @@ -697,7 +697,9 @@ void Thread::HijackCallback(NATIVE_CONTEXT* pThreadContext, void* pThreadToHijac
#endif //FEATURE_SUSPEND_REDIRECTION
}

pThread->HijackReturnAddress(pThreadContext, &RhpGcProbeHijack);
pThread->HijackReturnAddress(
pThreadContext,
PalGetHijackTarget(/*defaultHijackTarget*/&RhpGcProbeHijack));
}

#ifdef FEATURE_GC_STRESS
Expand Down
8 changes: 1 addition & 7 deletions src/coreclr/nativeaot/Runtime/thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,16 +173,9 @@ class Thread : private RuntimeThreadLocals
#ifdef FEATURE_HIJACK
static void HijackCallback(NATIVE_CONTEXT* pThreadContext, void* pThreadToHijack);

//
// Hijack funcs are not called, they are "returned to". And when done, they return to the actual caller.
// Thus they cannot have any parameters or return anything.
//
typedef void FASTCALL HijackFunc();

void HijackReturnAddress(PAL_LIMITED_CONTEXT* pSuspendCtx, HijackFunc* pfnHijackFunction);
void HijackReturnAddress(NATIVE_CONTEXT* pSuspendCtx, HijackFunc* pfnHijackFunction);
void HijackReturnAddressWorker(StackFrameIterator* frameIterator, HijackFunc* pfnHijackFunction);
bool InlineSuspend(NATIVE_CONTEXT* interruptedContext);
void CrossThreadUnhijack();
void UnhijackWorker();
#else // FEATURE_HIJACK
Expand All @@ -209,6 +202,7 @@ class Thread : private RuntimeThreadLocals
static uint64_t s_DeadThreadsNonAllocBytes;

public:
bool InlineSuspend(NATIVE_CONTEXT* interruptedContext);

static uint64_t GetDeadThreadsNonAllocBytes();

Expand Down
Loading

0 comments on commit c8e4f2c

Please sign in to comment.