vertexjit: Only save extra regs on x64.

hrydgard · Feb 1, 2021 · c1fa495 · c1fa495
1 parent 30b6f1f
commit c1fa495
Showing 1 changed file with 12 additions and 8 deletions.
diff --git a/GPU/Common/VertexDecoderX86.cpp b/GPU/Common/VertexDecoderX86.cpp
@@ -53,7 +53,7 @@ alignas(16) static const float by16384[4] = {
 	1.0f / 16384.0f, 1.0f / 16384.0f, 1.0f / 16384.0f, 1.0f / 16384.0f,
 };
 
-#ifdef _M_X64
+#if PPSSPP_ARCH(AMD64)
 #ifdef _WIN32
 static const X64Reg tempReg1 = RAX;
 static const X64Reg tempReg2 = R9;
@@ -197,8 +197,10 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
 	MOVUPS(MDisp(ESP, 16), XMM5);
 	MOVUPS(MDisp(ESP, 32), XMM6);
 	MOVUPS(MDisp(ESP, 48), XMM7);
+#if PPSSPP_ARCH(AMD64)
 	MOVUPS(MDisp(ESP, 64), XMM8);
 	MOVUPS(MDisp(ESP, 80), XMM9);
+#endif
 
 	bool prescaleStep = false;
 	// Look for prescaled texcoord steps
@@ -275,11 +277,13 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
 	MOVUPS(XMM5, MDisp(ESP, 16));
 	MOVUPS(XMM6, MDisp(ESP, 32));
 	MOVUPS(XMM7, MDisp(ESP, 48));
+#if PPSSPP_ARCH(AMD64)
 	MOVUPS(XMM8, MDisp(ESP, 64));
 	MOVUPS(XMM9, MDisp(ESP, 80));
+#endif
 	ADD(PTRBITS, R(ESP), Imm8(STACK_FIXED_ALLOC));
 
-#ifdef _M_IX86
+#if PPSSPP_ARCH(X86)
 	// Restore register values
 	POP(EBP);
 	POP(EBX);
@@ -466,7 +470,7 @@ void VertexDecoderJitCache::Jit_WeightsFloat() {
 void VertexDecoderJitCache::Jit_WeightsU8Skin() {
 	MOV(PTRBITS, R(tempReg2), ImmPtr(&bones));
 
-#ifdef _M_X64
+#if PPSSPP_ARCH(AMD64)
 	if (dec_->nweights > 4) {
 		// This reads 8 bytes, we split the top 4 so we can expand each set of 4.
 		MOVQ_xmm(XMM8, MDisp(srcReg, dec_->weightoff));
@@ -518,7 +522,7 @@ void VertexDecoderJitCache::Jit_WeightsU8Skin() {
 
 	for (int j = 0; j < dec_->nweights; j++) {
 		X64Reg weight = XMM1;
-#ifdef _M_X64
+#if PPSSPP_ARCH(AMD64)
 		X64Reg weightSrc = j < 4 ? XMM8 : XMM9;
 		if (j == 3 || j == dec_->nweights - 1) {
 			// In the previous iteration, we already spread this value to all lanes.
@@ -576,7 +580,7 @@ void VertexDecoderJitCache::Jit_WeightsU8Skin() {
 void VertexDecoderJitCache::Jit_WeightsU16Skin() {
 	MOV(PTRBITS, R(tempReg2), ImmPtr(&bones));
 
-#ifdef _M_X64
+#if PPSSPP_ARCH(AMD64)
 	if (dec_->nweights > 6) {
 		// Since this is probably not aligned, two MOVQs are better than one MOVDQU.
 		MOVQ_xmm(XMM8, MDisp(srcReg, dec_->weightoff));
@@ -632,7 +636,7 @@ void VertexDecoderJitCache::Jit_WeightsU16Skin() {
 
 	for (int j = 0; j < dec_->nweights; j++) {
 		X64Reg weight = XMM1;
-#ifdef _M_X64
+#if PPSSPP_ARCH(AMD64)
 		X64Reg weightSrc = j < 4 ? XMM8 : XMM9;
 		if (j == 3 || j == dec_->nweights - 1) {
 			// In the previous iteration, we already spread this value to all lanes.
@@ -730,7 +734,7 @@ void VertexDecoderJitCache::Jit_TcU16ToFloat() {
 }
 
 void VertexDecoderJitCache::Jit_TcFloat() {
-#ifdef _M_X64
+#if PPSSPP_ARCH(AMD64)
 	MOV(64, R(tempReg1), MDisp(srcReg, dec_->tcoff));
 	MOV(64, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1));
 #else
@@ -911,7 +915,7 @@ void VertexDecoderJitCache::Jit_TcU16ThroughToFloat() {
 }
 
 void VertexDecoderJitCache::Jit_TcFloatThrough() {
-#ifdef _M_X64
+#if PPSSPP_ARCH(AMD64)
 	MOV(64, R(tempReg1), MDisp(srcReg, dec_->tcoff));
 	MOV(64, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1));
 #else