Skip to content

Commit

Permalink
vertexjit: Fix a silly mistake in weights > 4.
Browse files Browse the repository at this point in the history
Darn switch, took me way too long to notice this.
  • Loading branch information
unknownbrackets committed Mar 24, 2014
1 parent 717e6db commit b589d3b
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 24 deletions.
4 changes: 2 additions & 2 deletions Common/ArmEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1634,7 +1634,7 @@ void ARMXEmitter::VMOV_neon(u32 Size, ARMReg Vd, ARMReg Rt, int lane)
_assert_msg_(JIT, false, "VMOV_neon unsupported size");
}

if (Vd < S0 && Rt >= D0)
if (Vd < S0 && Rt >= D0 && Rt < Q0)
{
// Oh, reading to reg, our params are backwards.
ARMReg Src = Rt;
Expand All @@ -1645,7 +1645,7 @@ void ARMXEmitter::VMOV_neon(u32 Size, ARMReg Vd, ARMReg Rt, int lane)

Write32(condition | (0xE1 << 20) | U | (opc1 << 21) | EncodeVn(Src) | (Dest << 12) | (0xB << 8) | (opc2 << 5) | (1 << 4));
}
else if (Rt < S0 && Vd >= D0)
else if (Rt < S0 && Vd >= D0 && Vd < Q0)
{
ARMReg Src = Rt;
ARMReg Dest = Vd;
Expand Down
32 changes: 10 additions & 22 deletions GPU/GLES/VertexDecoderArm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -430,16 +430,13 @@ void VertexDecoderJitCache::Jit_WeightsU8Skin() {
if (NEONSkinning) {
// Weight is first so srcReg is correct.
switch (dec_->nweights) {
case 1: LDRB(scratchReg2, srcReg, 0); break;
case 2: LDRH(scratchReg2, srcReg, 0); break;
case 3:
case 4:
case 1: VLD1_lane(I_8, neonScratchReg, srcReg, 0, false); break;
case 2: VLD1_lane(I_16, neonScratchReg, srcReg, 0, false); break;
default:
// For 3, we over read, for over 4, we read more later.
VLD1_lane(I_32, neonScratchReg, srcReg, 0, false);
break;
}
if (dec_->nweights == 1 || dec_->nweights == 2) {
VMOV_neon(I_32, neonScratchReg, scratchReg2, 0);
}
// This can be represented as a constant.
VMOV_neon(F_32, Q3, by128);
VMOVL(I_8 | I_UNSIGNED, neonScratchRegQ, neonScratchReg);
Expand All @@ -450,16 +447,13 @@ void VertexDecoderJitCache::Jit_WeightsU8Skin() {
if (dec_->nweights > 4) {
ADD(tempReg1, srcReg, 4 * sizeof(u8));
switch (dec_->nweights) {
case 5: LDRB(scratchReg2, tempReg1, 0); break;
case 6: LDRH(scratchReg2, tempReg1, 0); break;
case 5: VLD1_lane(I_8, neonScratchReg, tempReg1, 0, false); break;
case 6: VLD1_lane(I_16, neonScratchReg, tempReg1, 0, false); break;
case 7:
case 8:
VLD1_lane(I_32, neonScratchReg, tempReg1, 0, false);
break;
}
if (dec_->nweights == 5 || dec_->nweights == 6) {
VMOV_neon(I_32, neonScratchReg, scratchReg2, 0);
}
VMOVL(I_8 | I_UNSIGNED, neonScratchRegQ, neonScratchReg);
VMOVL(I_16 | I_UNSIGNED, neonScratchRegQ, neonScratchReg);
VCVT(F_32 | I_UNSIGNED, neonScratchRegQ, neonScratchRegQ);
Expand All @@ -480,13 +474,10 @@ void VertexDecoderJitCache::Jit_WeightsU8Skin() {
void VertexDecoderJitCache::Jit_WeightsU16Skin() {
if (NEONSkinning) {
switch (dec_->nweights) {
case 1:
LDRH(scratchReg, srcReg, 0);
VMOV_neon(I_32, neonScratchReg, scratchReg, 0);
break;
case 1: VLD1_lane(I_16, neonScratchReg, srcReg, 0, true); break;
case 2: VLD1_lane(I_32, neonScratchReg, srcReg, 0, false); break;
case 3:
case 4:
default:
// For 3, we over read, for over 4, we read more later.
VLD1(I_32, neonScratchReg, srcReg, 1, ALIGN_NONE);
break;
}
Expand All @@ -499,10 +490,7 @@ void VertexDecoderJitCache::Jit_WeightsU16Skin() {
if (dec_->nweights > 4) {
ADD(tempReg1, srcReg, 4 * sizeof(u16));
switch (dec_->nweights) {
case 5:
LDRH(scratchReg, tempReg1, 0);
VMOV_neon(I_32, neonScratchReg, scratchReg, 0);
break;
case 5: VLD1_lane(I_16, neonScratchReg, tempReg1, 0, true); break;
case 6: VLD1_lane(I_32, neonScratchReg, tempReg1, 0, false); break;
case 7:
case 8:
Expand Down

0 comments on commit b589d3b

Please sign in to comment.