diff --git a/GPU/Common/VertexDecoderX86.cpp b/GPU/Common/VertexDecoderX86.cpp index 10e7cbe019df..a3c6f15e4149 100644 --- a/GPU/Common/VertexDecoderX86.cpp +++ b/GPU/Common/VertexDecoderX86.cpp @@ -53,7 +53,7 @@ alignas(16) static const float by16384[4] = { 1.0f / 16384.0f, 1.0f / 16384.0f, 1.0f / 16384.0f, 1.0f / 16384.0f, }; -#ifdef _M_X64 +#if PPSSPP_ARCH(AMD64) #ifdef _WIN32 static const X64Reg tempReg1 = RAX; static const X64Reg tempReg2 = R9; @@ -197,8 +197,10 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int MOVUPS(MDisp(ESP, 16), XMM5); MOVUPS(MDisp(ESP, 32), XMM6); MOVUPS(MDisp(ESP, 48), XMM7); +#if PPSSPP_ARCH(AMD64) MOVUPS(MDisp(ESP, 64), XMM8); MOVUPS(MDisp(ESP, 80), XMM9); +#endif bool prescaleStep = false; // Look for prescaled texcoord steps @@ -275,11 +277,13 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int MOVUPS(XMM5, MDisp(ESP, 16)); MOVUPS(XMM6, MDisp(ESP, 32)); MOVUPS(XMM7, MDisp(ESP, 48)); +#if PPSSPP_ARCH(AMD64) MOVUPS(XMM8, MDisp(ESP, 64)); MOVUPS(XMM9, MDisp(ESP, 80)); +#endif ADD(PTRBITS, R(ESP), Imm8(STACK_FIXED_ALLOC)); -#ifdef _M_IX86 +#if PPSSPP_ARCH(X86) // Restore register values POP(EBP); POP(EBX); @@ -466,7 +470,7 @@ void VertexDecoderJitCache::Jit_WeightsFloat() { void VertexDecoderJitCache::Jit_WeightsU8Skin() { MOV(PTRBITS, R(tempReg2), ImmPtr(&bones)); -#ifdef _M_X64 +#if PPSSPP_ARCH(AMD64) if (dec_->nweights > 4) { // This reads 8 bytes, we split the top 4 so we can expand each set of 4. MOVQ_xmm(XMM8, MDisp(srcReg, dec_->weightoff)); @@ -518,7 +522,7 @@ void VertexDecoderJitCache::Jit_WeightsU8Skin() { for (int j = 0; j < dec_->nweights; j++) { X64Reg weight = XMM1; -#ifdef _M_X64 +#if PPSSPP_ARCH(AMD64) X64Reg weightSrc = j < 4 ? XMM8 : XMM9; if (j == 3 || j == dec_->nweights - 1) { // In the previous iteration, we already spread this value to all lanes. @@ -576,7 +580,7 @@ void VertexDecoderJitCache::Jit_WeightsU8Skin() { void VertexDecoderJitCache::Jit_WeightsU16Skin() { MOV(PTRBITS, R(tempReg2), ImmPtr(&bones)); -#ifdef _M_X64 +#if PPSSPP_ARCH(AMD64) if (dec_->nweights > 6) { // Since this is probably not aligned, two MOVQs are better than one MOVDQU. MOVQ_xmm(XMM8, MDisp(srcReg, dec_->weightoff)); @@ -632,7 +636,7 @@ void VertexDecoderJitCache::Jit_WeightsU16Skin() { for (int j = 0; j < dec_->nweights; j++) { X64Reg weight = XMM1; -#ifdef _M_X64 +#if PPSSPP_ARCH(AMD64) X64Reg weightSrc = j < 4 ? XMM8 : XMM9; if (j == 3 || j == dec_->nweights - 1) { // In the previous iteration, we already spread this value to all lanes. @@ -730,7 +734,7 @@ void VertexDecoderJitCache::Jit_TcU16ToFloat() { } void VertexDecoderJitCache::Jit_TcFloat() { -#ifdef _M_X64 +#if PPSSPP_ARCH(AMD64) MOV(64, R(tempReg1), MDisp(srcReg, dec_->tcoff)); MOV(64, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1)); #else @@ -911,7 +915,7 @@ void VertexDecoderJitCache::Jit_TcU16ThroughToFloat() { } void VertexDecoderJitCache::Jit_TcFloatThrough() { -#ifdef _M_X64 +#if PPSSPP_ARCH(AMD64) MOV(64, R(tempReg1), MDisp(srcReg, dec_->tcoff)); MOV(64, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1)); #else