Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RISC-V] Initial patch to fix RISCV64 interpreter #94548

Merged
merged 2 commits into from
Nov 10, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 53 additions & 9 deletions src/coreclr/vm/interpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -911,6 +911,9 @@ CorJitResult Interpreter::GenerateInterpreterStub(CEEInfo* comp,
// x8 through x15 are scratch registers on ARM64.
IntReg x8 = IntReg(8);
IntReg x9 = IntReg(9);

#elif defined(HOST_RISCV64)
#else
#error unsupported platform
#endif
}
Expand Down Expand Up @@ -1073,15 +1076,15 @@ CorJitResult Interpreter::GenerateInterpreterStub(CEEInfo* comp,
argState.AddArg(vaSigCookieIndex);
}

#if defined(HOST_ARM) || defined(HOST_AMD64) || defined(HOST_ARM64)
#if defined(HOST_ARM) || defined(HOST_AMD64) || defined(HOST_ARM64) || defined(HOST_RISCV64)
// Generics context comes before args on ARM. Would be better if I factored this out as a call,
// to avoid large swatches of duplicate code.
if (hasGenericsContextArg)
{
argPerm[genericsContextArgIndex] = physArgIndex; physArgIndex++;
argState.AddArg(genericsContextArgIndex);
}
#endif // HOST_ARM || HOST_AMD64 || HOST_ARM64
#endif // HOST_ARM || HOST_AMD64 || HOST_ARM64 || HOST_RISCV64

CORINFO_ARG_LIST_HANDLE argPtr = info->args.args;
// Some arguments are have been passed in registers, some in memory. We must generate code that
Expand Down Expand Up @@ -1432,7 +1435,7 @@ CorJitResult Interpreter::GenerateInterpreterStub(CEEInfo* comp,
sl.X86EmitPopReg(kEBP);
sl.X86EmitReturn(static_cast<WORD>(argState.callerArgStackSlots * sizeof(void*)));
#elif defined(UNIX_AMD64_ABI)
bool hasTowRetSlots = info->args.retType == CORINFO_TYPE_VALUECLASS &&
bool hasTwoRetSlots = info->args.retType == CORINFO_TYPE_VALUECLASS &&
getClassSize(info->args.retTypeClass) == 16;

int fixedTwoSlotSize = 16;
Expand Down Expand Up @@ -1484,7 +1487,7 @@ CorJitResult Interpreter::GenerateInterpreterStub(CEEInfo* comp,
sl.X86EmitRegLoad(ARGUMENT_kREG1, reinterpret_cast<UINT_PTR>(interpMethInfo));

sl.X86EmitCall(sl.NewExternalCodeLabel(interpretMethodFunc), 0);
if (hasTowRetSlots) {
if (hasTwoRetSlots) {
sl.X86EmitEspOffset(0x8b, kRAX, 0);
sl.X86EmitEspOffset(0x8b, kRDX, 8);
}
Expand Down Expand Up @@ -1635,7 +1638,40 @@ CorJitResult Interpreter::GenerateInterpreterStub(CEEInfo* comp,
#elif defined(HOST_LOONGARCH64)
assert(!"unimplemented on LOONGARCH yet");
#elif defined(HOST_RISCV64)
assert(!"unimplemented on RISCV64 yet");
bool hasTwoRetSlots = info->args.retType == CORINFO_TYPE_VALUECLASS &&
getClassSize(info->args.retTypeClass) == 16;

UINT stackFrameSize = argState.numFPRegArgSlots;

sl.EmitProlog(argState.numRegArgs, argState.numFPRegArgSlots, hasTwoRetSlots ? 2 * sizeof(void*) : 0);

#if INTERP_ILSTUBS
if (pMD->IsILStub())
{
// Third argument is stubcontext, in t2 (METHODDESC_REGISTER).
sl.EmitMovReg(IntReg(12), IntReg(7));
}
else
#endif
{
// For a non-ILStub method, push NULL as the third StubContext argument.
sl.EmitMovConstant(IntReg(12), 0);
}
// Second arg is pointer to the base of the ILargs arr -- i.e., the current stack value.
sl.EmitAddImm(IntReg(11), RegSp, sl.GetSavedRegArgsOffset());

// First arg is the pointer to the interpMethodInfo structure
sl.EmitMovConstant(IntReg(10), reinterpret_cast<UINT64>(interpMethInfo));

sl.EmitCallLabel(sl.NewExternalCodeLabel((LPVOID)interpretMethodFunc), FALSE, FALSE);
if (hasTwoRetSlots)
{
// TODO: handle return registers to use int or float registers
sl.EmitLoad(IntReg(10), RegSp, 0);
sl.EmitLoad(IntReg(11), RegSp, sizeof(void*));
}

sl.EmitEpilog();
#else
#error unsupported platform
#endif
Expand Down Expand Up @@ -2430,6 +2466,14 @@ void Interpreter::ExecuteMethod(ARG_SLOT* retVal, _Out_ bool* pDoJmpCall, _Out_
//The Fixed Two slot return buffer address
memcpy(m_ilArgs-16, OpStackGet<void*>(0), sz);
}
#elif defined(TARGET_RISCV64)
// Is it an struct contained in two slots
else if (m_methInfo->m_returnType == CORINFO_TYPE_VALUECLASS
&& sz == 16)
{
//The Fixed Two slot return buffer address
memcpy(m_ilArgs-32, OpStackGet<void*>(0), sz);
}
#endif
else if (CorInfoTypeIsFloatingPoint(m_methInfo->m_returnType) &&
CorInfoTypeIsFloatingPoint(retValIt.ToCorInfoType()))
Expand Down Expand Up @@ -9448,7 +9492,7 @@ void Interpreter::DoCallWork(bool virtualCall, void* thisArg, CORINFO_RESOLVED_T
HFAReturnArgSlots = (HFAReturnArgSlots + sizeof(ARG_SLOT) - 1) / sizeof(ARG_SLOT);
}
}
#elif defined(UNIX_AMD64_ABI)
#elif defined(UNIX_AMD64_ABI) || defined(TARGET_RISCV64)
unsigned HasTwoSlotBuf = sigInfo.retType == CORINFO_TYPE_VALUECLASS &&
getClassSize(sigInfo.retTypeClass) == 16;
#endif
Expand Down Expand Up @@ -9689,7 +9733,7 @@ void Interpreter::DoCallWork(bool virtualCall, void* thisArg, CORINFO_RESOLVED_T
// This is the argument slot that will be used to hold the return value.
// In UNIX_AMD64_ABI, return type may have need tow ARG_SLOTs.
ARG_SLOT retVals[2] = {0, 0};
#if !defined(HOST_ARM) && !defined(UNIX_AMD64_ABI)
#if !defined(HOST_ARM) && !defined(UNIX_AMD64_ABI) && !defined(TARGET_RISCV64)
_ASSERTE (NUMBER_RETURNVALUE_SLOTS == 1);
#endif

Expand Down Expand Up @@ -9968,7 +10012,7 @@ void Interpreter::DoCallWork(bool virtualCall, void* thisArg, CORINFO_RESOLVED_T
bool b = CycleTimer::GetThreadCyclesS(&startCycles); _ASSERTE(b);
#endif // INTERP_ILCYCLE_PROFILE

#if defined(UNIX_AMD64_ABI)
#if defined(UNIX_AMD64_ABI) || defined(TARGET_RISCV64)
mdcs.CallTargetWorker(args, retVals, HasTwoSlotBuf ? 16: 8);
#else
mdcs.CallTargetWorker(args, retVals, 8);
Expand Down Expand Up @@ -10114,7 +10158,7 @@ void Interpreter::DoCallWork(bool virtualCall, void* thisArg, CORINFO_RESOLVED_T
{
OpStackSet<INT64>(m_curStackHt, GetSmallStructValue(&smallStructRetVal, retTypeSz));
}
#if defined(UNIX_AMD64_ABI)
#if defined(UNIX_AMD64_ABI) || defined(TARGET_RISCV64)
else if (HasTwoSlotBuf)
{
void* dst = LargeStructOperandStackPush(16);
Expand Down
4 changes: 3 additions & 1 deletion src/coreclr/vm/riscv64/cgencpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,6 @@ class StubLinkerCPU : public StubLinker
void EmitComputedInstantiatingMethodStub(MethodDesc* pSharedMD, struct ShuffleEntry *pShuffleEntryArray, void* extraArg);
#endif // FEATURE_SHARE_GENERIC_CODE

private:
void EmitMovConstant(IntReg target, UINT64 constant);
void EmitJumpRegister(IntReg regTarget);
void EmitMovReg(IntReg dest, IntReg source);
Expand All @@ -380,6 +379,9 @@ class StubLinkerCPU : public StubLinker
void EmitLoad(FloatReg dest, IntReg srcAddr, int offset = 0);
void EmitStore(IntReg src, IntReg destAddr, int offset = 0);
void EmitStore(FloatReg src, IntReg destAddr, int offset = 0);

void EmitProlog(unsigned short cIntRegArgs, unsigned short cFpRegArgs, unsigned short cbStackSpace = 0);
void EmitEpilog();
};

extern "C" void SinglecastDelegateInvokeStub();
Expand Down
110 changes: 110 additions & 0 deletions src/coreclr/vm/riscv64/stubs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1104,6 +1104,116 @@ void StubLinkerCPU::EmitJumpRegister(IntReg regTarget)
Emit32(0x00000067 | (regTarget << 15));
}

void StubLinkerCPU::EmitProlog(unsigned short cIntRegArgs, unsigned short cFpRegArgs, unsigned short cbStackSpace)
{
_ASSERTE(!m_fProlog);

unsigned short numberOfEntriesOnStack = 2 + cIntRegArgs + cFpRegArgs; // 2 for fp, ra

// Stack needs to be 16 byte aligned. Compute the required padding before saving it
unsigned short totalPaddedFrameSize = static_cast<unsigned short>(ALIGN_UP(cbStackSpace + numberOfEntriesOnStack * sizeof(void*), 2 * sizeof(void*)));
// The padding is going to be applied to the local stack
cbStackSpace = totalPaddedFrameSize - numberOfEntriesOnStack * sizeof(void*);

// Record the parameters of this prolog so that we can generate a matching epilog and unwind info.
DescribeProlog(cIntRegArgs, cFpRegArgs, cbStackSpace);


// N.B Despite the range of a jump with a sub sp is 4KB, we're limiting to 504 to save from emitting right prolog that's
// expressable in unwind codes efficiently. The largest offset in typical unwindinfo encodings that we use is 504.
// so allocations larger than 504 bytes would require setting the SP in multiple strides, which would complicate both
// prolog and epilog generation as well as unwindinfo generation.
_ASSERTE((totalPaddedFrameSize <= 504) && "NYI:RISCV64 Implement StubLinker prologs with larger than 504 bytes of frame size");
if (totalPaddedFrameSize > 504)
COMPlusThrow(kNotSupportedException);

// Here is how the stack would look like (Stack grows up)
// [Low Address]
// +------------+
// SP -> | | <-+
// : : | Stack Frame, (i.e outgoing arguments) including padding
// | | <-+
// +------------+
// | FP |
// +------------+
// | RA |
// +------------+
// | F10 | <-+
// +------------+ |
// : : | Fp Args
// +------------+ |
// | F17 | <-+
// +------------+
// | X10 | <-+
// +------------+ |
// : : | Int Args
// +------------+ |
// | X17 | <-+
// +------------+
// Old SP -> |[Stack Args]|
// [High Address]

// Regarding the order of operations in the prolog and epilog;
// If the prolog and the epilog matches each other we can simplify emitting the unwind codes and save a few
// bytes of unwind codes by making prolog and epilog share the same unwind codes.
// In order to do that we need to make the epilog be the reverse of the prolog.
// But we wouldn't want to add restoring of the argument registers as that's completely unnecessary.
// Besides, saving argument registers cannot be expressed by the unwind code encodings.
// So, we'll push saving the argument registers to the very last in the prolog, skip restoring it in epilog,
// and also skip reporting it to the OS.
//
// Another bit that we can save is resetting the frame pointer.
// This is not necessary when the SP doesn't get modified beyond prolog and epilog. (i.e no alloca/localloc)
// And in that case we don't need to report setting up the FP either.

// 1. Relocate SP
EmitSubImm(RegSp, RegSp, totalPaddedFrameSize);

unsigned cbOffset = 2 * sizeof(void*) + cbStackSpace; // 2 is for fp, ra

// 2. Store FP/RA
EmitStore(RegFp, RegSp, cbStackSpace);
EmitStore(RegRa, RegSp, cbStackSpace + sizeof(void*));

// 3. Set the frame pointer
EmitMovReg(RegFp, RegSp);

// 4. Store floating point argument registers
_ASSERTE(cFpRegArgs <= 8);
for (unsigned short i = 0; i < cFpRegArgs; i++)
EmitStore(FloatReg(i + 10), RegSp, cbOffset + i * sizeof(void*));

// 5. Store int argument registers
cbOffset += cFpRegArgs * sizeof(void*);
_ASSERTE(cIntRegArgs <= 8);
for (unsigned short i = 0 ; i < cIntRegArgs; i++)
EmitStore(IntReg(i + 10), RegSp, cbOffset + i * sizeof(void*));
}

void StubLinkerCPU::EmitEpilog()
{
_ASSERTE(m_fProlog);

// 5. Restore int argument registers
// nop: We don't need to. They are scratch registers

// 4. Restore floating point argument registers
// nop: We don't need to. They are scratch registers

// 3. Restore the SP from FP
// N.B. We're assuming that the stublinker stubs doesn't do alloca, hence nop

// 2. Restore FP/RA
EmitLoad(RegFp, RegSp, m_cbStackSpace);
EmitLoad(RegRa, RegSp, m_cbStackSpace + sizeof(void*));

// 1. Restore SP
EmitAddImm(RegSp, RegSp, GetStackFrameSize());

// jalr x0, 0(ra)
EmitJumpRegister(1);
JongHeonChoi marked this conversation as resolved.
Show resolved Hide resolved
}

// Instruction types as per RISC-V Spec, Chapter 24 RV32/64G Instruction Set Listings
static unsigned ITypeInstr(unsigned opcode, unsigned funct3, unsigned rd, unsigned rs1, int imm12)
{
Expand Down
31 changes: 30 additions & 1 deletion src/coreclr/vm/stublink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,12 @@ StubLinker::StubLinker()
m_cbStackFrame = 0;
m_fPushArgRegs = FALSE;
#endif
#ifdef TARGET_RISCV64
m_fProlog = FALSE;
m_cIntRegArgs = 0;
m_cFpRegArgs = 0;
m_cbStackSpace = 0;
#endif
#ifdef STUBLINKER_GENERATES_UNWIND_INFO
#ifdef _DEBUG
m_pUnwindInfoCheckLabel = NULL;
Expand Down Expand Up @@ -1891,7 +1897,30 @@ UINT StubLinker::GetStackFrameSize()
return m_cbStackSpace + (2 + m_cCalleeSavedRegs + m_cIntRegArgs + m_cVecRegArgs)*sizeof(void*);
}

#endif // ifdef TARGET_ARM, elif defined(TARGET_ARM64)
#elif defined(TARGET_RISCV64)
void StubLinker::DescribeProlog(UINT cIntRegArgs, UINT cFpRegArgs, UINT cbStackSpace)
{
m_fProlog = TRUE;
m_cIntRegArgs = cIntRegArgs;
m_cFpRegArgs = cFpRegArgs;
m_cbStackSpace = cbStackSpace;
}

UINT StubLinker::GetSavedRegArgsOffset()
{
_ASSERTE(m_fProlog);
// This is the offset from SP
// We're assuming that the stublinker will push the arg registers to the bottom of the stack frame
return m_cbStackSpace + 2 * sizeof(void*); // 2 is for FP and LR
}

UINT StubLinker::GetStackFrameSize()
{
_ASSERTE(m_fProlog);
return m_cbStackSpace + (2 + m_cIntRegArgs + m_cFpRegArgs) * sizeof(void*);
}

#endif // ifdef TARGET_ARM, elif defined(TARGET_ARM64), elif defined(TARGET_RISCV64)

#endif // #ifndef DACCESS_COMPILE

Expand Down
12 changes: 12 additions & 0 deletions src/coreclr/vm/stublink.h
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,10 @@ class StubLinker
void DescribeProlog(UINT cIntRegArgs, UINT cVecRegArgs, UINT cCalleeSavedRegs, UINT cbStackFrame);
UINT GetSavedRegArgsOffset();
UINT GetStackFrameSize();
#elif defined(TARGET_RISCV64)
void DescribeProlog(UINT cIntRegArgs, UINT cVecRegArgs, UINT cbStackFrame);
UINT GetSavedRegArgsOffset();
UINT GetStackFrameSize();
#endif

//===========================================================================
Expand Down Expand Up @@ -304,6 +308,14 @@ class StubLinker
UINT m_cbStackSpace; // Additional stack space for return buffer and stack alignment
#endif // TARGET_ARM64

#ifdef TARGET_RISCV64
protected:
BOOL m_fProlog; // True if DescribeProlog has been called
UINT m_cIntRegArgs; // Count of int register arguments (x10 - x17)
UINT m_cFpRegArgs; // Count of FP register arguments (f10 - f17)
UINT m_cbStackSpace; // Additional stack space for return buffer and stack alignment
#endif // TARGET_RISCV64

#ifdef STUBLINKER_GENERATES_UNWIND_INFO

#ifdef _DEBUG
Expand Down
Loading