Skip to content

Commit

Permalink
Make GC Stress 4/8 work with CET (#71085)
Browse files Browse the repository at this point in the history
* Make GC Stress 4/8 work with CET

This change makes the GC stress 4/8 work without redirection. It also
fixes a problem with missing unwinding of the shadow stack pointer
that was not discovered before. I've found it while testing this
change - it has manifested itself as a shadow stack overflow.

And there is one more fix. The VSD Resolve stub was problematic to unwind
through when null reference was passed as this to it. The stub had a push rdx
as the first instruction and the dereference of this happened after that. So
in case of the null, the call stack in the vectored exception handler contained
a phantom frame caused by a problem unwinding from the stub. That
caused incorrect updating of the shadow SP. I've fixed it by moving the dereference
before the push.
  • Loading branch information
janvorli committed Jun 22, 2022
1 parent 93f46d2 commit 305144e
Show file tree
Hide file tree
Showing 18 changed files with 263 additions and 114 deletions.
2 changes: 1 addition & 1 deletion src/coreclr/debug/ee/debugger.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1010,7 +1010,7 @@ MemoryRange Debugger::s_hijackFunction[kMaxHijackFunctions] =
RedirectedHandledJITCaseForDbgThreadControl_StubEnd),
GetMemoryRangeForFunction(RedirectedHandledJITCaseForUserSuspend_Stub,
RedirectedHandledJITCaseForUserSuspend_StubEnd)
#if defined(HAVE_GCCOVER) && defined(TARGET_AMD64)
#if defined(HAVE_GCCOVER) && defined(TARGET_AMD64) && defined(USE_REDIRECT_FOR_GCSTRESS)
,
GetMemoryRangeForFunction(RedirectedHandledJITCaseForGCStress_Stub,
RedirectedHandledJITCaseForGCStress_StubEnd)
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/debug/ee/debugger.h
Original file line number Diff line number Diff line change
Expand Up @@ -2938,7 +2938,7 @@ void RedirectedHandledJITCaseForDbgThreadControl_StubEnd();
void RedirectedHandledJITCaseForUserSuspend_Stub();
void RedirectedHandledJITCaseForUserSuspend_StubEnd();

#if defined(HAVE_GCCOVER) && defined(TARGET_AMD64)
#if defined(HAVE_GCCOVER) && defined(TARGET_AMD64) && defined(USE_REDIRECT_FOR_GCSTRESS)
void RedirectedHandledJITCaseForGCStress_Stub();
void RedirectedHandledJITCaseForGCStress_StubEnd();
#endif // HAVE_GCCOVER && TARGET_AMD64
Expand Down
16 changes: 12 additions & 4 deletions src/coreclr/vm/amd64/Context.asm
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ CONTEXT_CONTROL equ 1h
CONTEXT_INTEGER equ 2h
CONTEXT_FLOATING_POINT equ 8h

; Signature: EXTERN_C void STDCALL ClrRestoreNonvolatileContext(PCONTEXT ContextRecord);
NESTED_ENTRY ClrRestoreNonvolatileContext, _TEXT
; Signature: EXTERN_C void STDCALL ClrRestoreNonvolatileContextWorker(PCONTEXT ContextRecord, DWORD64 ssp);
NESTED_ENTRY ClrRestoreNonvolatileContextWorker, _TEXT
push_nonvol_reg rbp
set_frame rbp, 0
END_PROLOGUE
Expand All @@ -40,7 +40,15 @@ NESTED_ENTRY ClrRestoreNonvolatileContext, _TEXT
test byte ptr [rcx + OFFSETOF__CONTEXT__ContextFlags], CONTEXT_CONTROL
je Done_Restore_CONTEXT_CONTROL

test rdx, rdx
je No_Ssp_Update
rdsspq rax
sub rdx, rax
shr rdx, 3
incsspq rdx
No_Ssp_Update:

; When user-mode shadow stacks are enabled, and for example the intent is to continue execution in managed code after
; exception handling, iret and ret can't be used because their shadow stack enforcement would not allow that transition,
; and using them would require writing to the shadow stack, which is not preferable. Instead, iret is partially
Expand All @@ -55,6 +63,6 @@ NESTED_ENTRY ClrRestoreNonvolatileContext, _TEXT
; The function was not asked to restore the control registers so we return back to the caller
pop rbp
ret
NESTED_END ClrRestoreNonvolatileContext, _TEXT
NESTED_END ClrRestoreNonvolatileContextWorker, _TEXT

end
41 changes: 41 additions & 0 deletions src/coreclr/vm/amd64/cgencpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,47 @@ inline void SetSP(CONTEXT *context, TADDR rsp)
context->Rsp = rsp;
}

#if defined(TARGET_WINDOWS) && !defined(DACCESS_COMPILE)
inline DWORD64 GetSSP(const CONTEXT * context)
{
CONTRACTL
{
NOTHROW;
GC_NOTRIGGER;

PRECONDITION(CheckPointer(context));
}
CONTRACTL_END;

XSAVE_CET_U_FORMAT* pCET = (XSAVE_CET_U_FORMAT*)LocateXStateFeature(const_cast<PCONTEXT>(context), XSTATE_CET_U, NULL);
if ((pCET != NULL) && (pCET->Ia32CetUMsr != 0))
{
return pCET->Ia32Pl3SspMsr;
}

return 0;
}

inline void SetSSP(CONTEXT *context, DWORD64 ssp)
{
CONTRACTL
{
NOTHROW;
GC_NOTRIGGER;

PRECONDITION(CheckPointer(context));
}
CONTRACTL_END;

XSAVE_CET_U_FORMAT* pCET = (XSAVE_CET_U_FORMAT*)LocateXStateFeature(context, XSTATE_CET_U, NULL);
if (pCET != NULL)
{
pCET->Ia32Pl3SspMsr = ssp;
pCET->Ia32CetUMsr = 1;
}
}
#endif // TARGET_WINDOWS && !DACCESS_COMPILE

#define SetFP(context, ebp)
inline TADDR GetFP(const CONTEXT * context)
{
Expand Down
9 changes: 8 additions & 1 deletion src/coreclr/vm/amd64/excepamd64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -619,7 +619,6 @@ AdjustContextForVirtualStub(
_ASSERTE(!"AV in ResolveStub at unknown instruction");
return FALSE;
}
SetSP(pContext, dac_cast<PCODE>(dac_cast<PTR_BYTE>(GetSP(pContext)) + sizeof(void*))); // rollback push rdx
}
else
{
Expand All @@ -634,6 +633,14 @@ AdjustContextForVirtualStub(
SetIP(pContext, callsite);
SetSP(pContext, dac_cast<PCODE>(dac_cast<PTR_BYTE>(GetSP(pContext)) + sizeof(void*))); // Move SP to where it was at the call site

#if defined(TARGET_WINDOWS)
DWORD64 ssp = GetSSP(pContext);
if (ssp != 0)
{
SetSSP(pContext, ssp + sizeof(void*));
}
#endif // TARGET_WINDOWS

return TRUE;
}

Expand Down
37 changes: 19 additions & 18 deletions src/coreclr/vm/amd64/virtualcallstubcpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -398,11 +398,12 @@ struct ResolveStub
private:
friend struct ResolveHolder;

BYTE _resolveEntryPoint[3];// resolveStub:
BYTE _resolveEntryPoint[6];// resolveStub:
// 48 8B XX mov rax, [THIS_REG]
// 52 push rdx
// 49 BA mov r10,
size_t _cacheAddress; // xx xx xx xx xx xx xx xx 64-bit address
BYTE part1 [15]; // 48 8B XX mov rax, [THIS_REG] ; Compute hash = ((MT + MT>>12) ^ prehash)
BYTE part1 [12]; // ; Compute hash = ((MT + MT>>12) ^ prehash)
// 48 8B D0 mov rdx, rax ; rdx <- current MethodTable
// 48 C1 E8 0C shr rax, 12
// 48 03 C2 add rax, rdx
Expand Down Expand Up @@ -687,25 +688,25 @@ void ResolveHolder::InitializeStatic()
{
static_assert_no_msg((sizeof(ResolveHolder) % sizeof(void*)) == 0);

resolveInit._resolveEntryPoint [0] = 0x52;
resolveInit._resolveEntryPoint [1] = 0x49;
resolveInit._resolveEntryPoint [2] = 0xBA;
resolveInit._resolveEntryPoint [0] = X64_INSTR_MOV_RAX_IND_THIS_REG & 0xff;
resolveInit._resolveEntryPoint [1] = (X64_INSTR_MOV_RAX_IND_THIS_REG >> 8) & 0xff;
resolveInit._resolveEntryPoint [2] = (X64_INSTR_MOV_RAX_IND_THIS_REG >> 16) & 0xff;
resolveInit._resolveEntryPoint [3] = 0x52;
resolveInit._resolveEntryPoint [4] = 0x49;
resolveInit._resolveEntryPoint [5] = 0xBA;
resolveInit._cacheAddress = 0xcccccccccccccccc;
resolveInit.part1 [ 0] = X64_INSTR_MOV_RAX_IND_THIS_REG & 0xff;
resolveInit.part1 [ 1] = (X64_INSTR_MOV_RAX_IND_THIS_REG >> 8) & 0xff;
resolveInit.part1 [ 2] = (X64_INSTR_MOV_RAX_IND_THIS_REG >> 16) & 0xff;
resolveInit.part1 [ 0] = 0x48;
resolveInit.part1 [ 1] = 0x8B;
resolveInit.part1 [ 2] = 0xD0;
resolveInit.part1 [ 3] = 0x48;
resolveInit.part1 [ 4] = 0x8B;
resolveInit.part1 [ 5] = 0xD0;
resolveInit.part1 [ 6] = 0x48;
resolveInit.part1 [ 7] = 0xC1;
resolveInit.part1 [ 8] = 0xE8;
resolveInit.part1 [ 9] = CALL_STUB_CACHE_NUM_BITS;
resolveInit.part1 [ 4] = 0xC1;
resolveInit.part1 [ 5] = 0xE8;
resolveInit.part1 [ 6] = CALL_STUB_CACHE_NUM_BITS;
resolveInit.part1 [ 7] = 0x48;
resolveInit.part1 [ 8] = 0x03;
resolveInit.part1 [ 9] = 0xC2;
resolveInit.part1 [10] = 0x48;
resolveInit.part1 [11] = 0x03;
resolveInit.part1 [12] = 0xC2;
resolveInit.part1 [13] = 0x48;
resolveInit.part1 [14] = 0x35;
resolveInit.part1 [11] = 0x35;
// Review truncation from unsigned __int64 to UINT32 of a constant value.
#if defined(_MSC_VER)
#pragma warning(push)
Expand Down
22 changes: 22 additions & 0 deletions src/coreclr/vm/ceemain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,26 @@ static BOOL WINAPI DbgCtrlCHandler(DWORD dwCtrlType)
// A host can specify that it only wants one version of hosting interface to be used.
BOOL g_singleVersionHosting;

#ifdef TARGET_WINDOWS
typedef BOOL(WINAPI* PINITIALIZECONTEXT2)(PVOID Buffer, DWORD ContextFlags, PCONTEXT* Context, PDWORD ContextLength, ULONG64 XStateCompactionMask);
PINITIALIZECONTEXT2 g_pfnInitializeContext2 = NULL;

#ifdef TARGET_X86
typedef VOID(__cdecl* PRTLRESTORECONTEXT)(PCONTEXT ContextRecord, struct _EXCEPTION_RECORD* ExceptionRecord);
PRTLRESTORECONTEXT g_pfnRtlRestoreContext = NULL;
#endif // TARGET_X86

void InitializeOptionalWindowsAPIPointers()
{
HMODULE hm = GetModuleHandleW(_T("kernel32.dll"));
g_pfnInitializeContext2 = (PINITIALIZECONTEXT2)GetProcAddress(hm, "InitializeContext2");

#ifdef TARGET_X86
hm = GetModuleHandleW(_T("ntdll.dll"));
g_pfnRtlRestoreContext = (PRTLRESTORECONTEXT)GetProcAddress(hm, "RtlRestoreContext");
#endif //TARGET_X86
}
#endif // TARGET_WINDOWS

void InitializeStartupFlags()
{
Expand Down Expand Up @@ -595,6 +614,9 @@ void EEStartupHelper()
::SetConsoleCtrlHandler(DbgCtrlCHandler, TRUE/*add*/);
#endif

#ifdef HOST_WINDOWS
InitializeOptionalWindowsAPIPointers();
#endif // HOST_WINDOWS

// SString initialization
// This needs to be done before config because config uses SString::Empty()
Expand Down
12 changes: 0 additions & 12 deletions src/coreclr/vm/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -269,18 +269,6 @@ namespace Loader
} LoadFlag;
}

#if !defined(DACCESS_COMPILE)
#if defined(TARGET_WINDOWS) && defined(TARGET_AMD64)
EXTERN_C void STDCALL ClrRestoreNonvolatileContext(PCONTEXT ContextRecord);
#elif !(defined(TARGET_WINDOWS) && defined(TARGET_X86)) // !(TARGET_WINDOWS && TARGET_AMD64) && !(TARGET_WINDOWS && TARGET_X86)
inline void ClrRestoreNonvolatileContext(PCONTEXT ContextRecord)
{
// Falling back to RtlRestoreContext() for now, though it should be possible to have simpler variants for these cases
RtlRestoreContext(ContextRecord, NULL);
}
#endif // TARGET_WINDOWS && TARGET_AMD64
#endif // !DACCESS_COMPILE

// src/inc
#include "utilcode.h"
#include "log.h"
Expand Down
5 changes: 5 additions & 0 deletions src/coreclr/vm/encee.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,9 @@ NOINLINE void EditAndContinueModule::FixContextAndResume(
STATIC_CONTRACT_GC_TRIGGERS; // Sends IPC event
STATIC_CONTRACT_THROWS;

#if defined(TARGET_WINDOWS) && defined(TARGET_AMD64)
DWORD64 ssp = GetSSP(pContext);
#endif
// Create local copies of all structs passed as arguments to prevent them from being overwritten
CONTEXT context;
memcpy(&context, pContext, sizeof(CONTEXT));
Expand Down Expand Up @@ -832,6 +835,8 @@ NOINLINE void EditAndContinueModule::FixContextAndResume(

#if defined(TARGET_X86)
ResumeAtJit(pContext, oldSP);
#elif defined(TARGET_WINDOWS) && defined(TARGET_AMD64)
ClrRestoreNonvolatileContextWorker(pContext, ssp);
#else
ClrRestoreNonvolatileContext(pContext);
#endif
Expand Down
7 changes: 5 additions & 2 deletions src/coreclr/vm/excep.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7087,9 +7087,12 @@ VEH_ACTION WINAPI CLRVectoredExceptionHandlerPhase3(PEXCEPTION_POINTERS pExcepti
// NOTE: this is effectively ifdef (TARGET_AMD64 || TARGET_ARM), and does not actually trigger
// a GC. This will redirect the exception context to a stub which will
// push a frame and cause GC.
if (IsGcMarker(pContext, pExceptionRecord))
if (Thread::UseRedirectForGcStress())
{
return VEH_CONTINUE_EXECUTION;;
if (IsGcMarker(pContext, pExceptionRecord))
{
return VEH_CONTINUE_EXECUTION;;
}
}
#endif // USE_REDIRECT_FOR_GCSTRESS

Expand Down
8 changes: 0 additions & 8 deletions src/coreclr/vm/exceptionhandling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -950,14 +950,6 @@ ProcessCLRException(IN PEXCEPTION_RECORD pExceptionRecord
// begin Early Processing
//
{
#ifndef USE_REDIRECT_FOR_GCSTRESS
if (IsGcMarker(pContextRecord, pExceptionRecord))
{
returnDisposition = ExceptionContinueExecution;
goto lExit;
}
#endif // !USE_REDIRECT_FOR_GCSTRESS

EH_LOG((LL_INFO100, "..................................................................................\n"));
EH_LOG((LL_INFO100, "ProcessCLRException enter, sp = 0x%p, ControlPc = 0x%p\n", MemoryStackFp, pDispatcherContext->ControlPc));
DebugLogExceptionRecord(pExceptionRecord);
Expand Down
36 changes: 19 additions & 17 deletions src/coreclr/vm/gccover.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1359,19 +1359,21 @@ BOOL OnGcCoverageInterrupt(PCONTEXT regs)
#if defined(USE_REDIRECT_FOR_GCSTRESS) && !defined(TARGET_UNIX)
// If we're unable to redirect, then we simply won't test GC at this
// location.
if (!pThread->CheckForAndDoRedirectForGCStress(regs))
if (Thread::UseRedirectForGcStress())
{
RemoveGcCoverageInterrupt(instrPtr, savedInstrPtr, gcCover, offset);
if (!pThread->CheckForAndDoRedirectForGCStress(regs))
{
RemoveGcCoverageInterrupt(instrPtr, savedInstrPtr, gcCover, offset);
}
}

#else // !USE_REDIRECT_FOR_GCSTRESS

else
#endif // !USE_REDIRECT_FOR_GCSTRESS
{
#ifdef _DEBUG
if (!g_pConfig->SkipGCCoverage(pMD->GetModule()->GetSimpleName()))
if (!g_pConfig->SkipGCCoverage(pMD->GetModule()->GetSimpleName()))
#endif
DoGcStress(regs, codeInfo.GetNativeCodeVersion());

#endif // !USE_REDIRECT_FOR_GCSTRESS
DoGcStress(regs, codeInfo.GetNativeCodeVersion());
}

return TRUE;
}
Expand Down Expand Up @@ -1707,16 +1709,15 @@ void DoGcStress (PCONTEXT regs, NativeCodeVersion nativeCodeVersion)
}
#endif // 0


#if !defined(USE_REDIRECT_FOR_GCSTRESS)
//
// If we redirect for gc stress, we don't need this frame on the stack,
// the redirection will push a resumable frame.
//
FrameWithCookie<ResumableFrame> frame(regs);
frame.Push(pThread);
#endif // USE_REDIRECT_FOR_GCSTRESS

if (!Thread::UseRedirectForGcStress())
{
frame.Push(pThread);
}

DWORD_PTR retValRegs[2] = { 0 };
UINT numberOfRegs = 0;
Expand Down Expand Up @@ -1801,9 +1802,10 @@ void DoGcStress (PCONTEXT regs, NativeCodeVersion nativeCodeVersion)
}
}

#if !defined(USE_REDIRECT_FOR_GCSTRESS)
frame.Pop(pThread);
#endif // USE_REDIRECT_FOR_GCSTRESS
if (!Thread::UseRedirectForGcStress())
{
frame.Pop(pThread);
}

if (enableWhenDone)
{
Expand Down
Loading

0 comments on commit 305144e

Please sign in to comment.