diff --git a/internal/engine/wazevo/backend/isa/arm64/lower_instr.go b/internal/engine/wazevo/backend/isa/arm64/lower_instr.go index 8721285507..3f062e975e 100644 --- a/internal/engine/wazevo/backend/isa/arm64/lower_instr.go +++ b/internal/engine/wazevo/backend/isa/arm64/lower_instr.go @@ -1102,20 +1102,32 @@ func (m *machine) lowerVMinMaxPseudo(instr *ssa.Instruction, max bool) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + creg := m.getOperand_NR(m.compiler.ValueDefinition(instr.Return()), extModeNone) + tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + + // creg is overwritten by BSL, so we need to move it to the result register before the instruction + // in case when it is used somewhere else. + rd := m.compiler.VRegOf(instr.Return()) + mov := m.allocateInstr() + mov.asFpuMov128(tmp.nr(), creg.nr()) + m.insert(mov) fcmgt := m.allocateInstr() if max { - fcmgt.asVecRRR(vecOpFcmgt, rd, rm, rn, arr) + fcmgt.asVecRRR(vecOpFcmgt, tmp, rm, rn, arr) } else { // If min, swap the args. - fcmgt.asVecRRR(vecOpFcmgt, rd, rn, rm, arr) + fcmgt.asVecRRR(vecOpFcmgt, tmp, rn, rm, arr) } m.insert(fcmgt) bsl := m.allocateInstr() - bsl.asVecRRR(vecOpBsl, rd, rm, rn, vecArrangement16B) + bsl.asVecRRR(vecOpBsl, tmp, rm, rn, vecArrangement16B) m.insert(bsl) + + mov2 := m.allocateInstr() + mov2.asFpuMov128(rd, tmp.nr()) + m.insert(mov2) } func (m *machine) lowerIRem(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bit, signed bool) { diff --git a/internal/integration_test/fuzzcases/fuzzcases_test.go b/internal/integration_test/fuzzcases/fuzzcases_test.go index 10e8bf4949..df4a3ce397 100644 --- a/internal/integration_test/fuzzcases/fuzzcases_test.go +++ b/internal/integration_test/fuzzcases/fuzzcases_test.go @@ -667,13 +667,13 @@ func Test1817(t *testing.T) { }) } -// Test1817 tests that i16x8.narrow_i32x4_u assigns the dest register correctly. -func Test1818(t *testing.T) { +// Test1820 tests that i16x8.narrow_i32x4_u assigns the dest register correctly. +func Test1820(t *testing.T) { if !platform.CompilerSupported() { return } run(t, func(t *testing.T, r wazero.Runtime) { - mod, err := r.Instantiate(ctx, getWasmBinary(t, "1818")) + mod, err := r.Instantiate(ctx, getWasmBinary(t, "1820")) require.NoError(t, err) m := mod.(*wasm.ModuleInstance) _, err = m.ExportedFunction("").Call(ctx) @@ -682,3 +682,20 @@ func Test1818(t *testing.T) { require.Equal(t, uint64(0xFFFF), m.Globals[1].ValHi) }) } + +// Test1823 tests that f64x2.pmin lowers to BSL with the right register usage +// (condition register gets overwritten). +func Test1823(t *testing.T) { + if !platform.CompilerSupported() { + return + } + run(t, func(t *testing.T, r wazero.Runtime) { + mod, err := r.Instantiate(ctx, getWasmBinary(t, "1823")) + require.NoError(t, err) + m := mod.(*wasm.ModuleInstance) + _, err = m.ExportedFunction("").Call(ctx) + require.NoError(t, err) + require.Equal(t, uint64(17282609607625994159), m.Globals[0].Val) + require.Equal(t, uint64(4671060543367625455), m.Globals[0].ValHi) + }) +} diff --git a/internal/integration_test/fuzzcases/testdata/1818.wasm b/internal/integration_test/fuzzcases/testdata/1820.wasm similarity index 100% rename from internal/integration_test/fuzzcases/testdata/1818.wasm rename to internal/integration_test/fuzzcases/testdata/1820.wasm diff --git a/internal/integration_test/fuzzcases/testdata/1818.wat b/internal/integration_test/fuzzcases/testdata/1820.wat similarity index 100% rename from internal/integration_test/fuzzcases/testdata/1818.wat rename to internal/integration_test/fuzzcases/testdata/1820.wat diff --git a/internal/integration_test/fuzzcases/testdata/1823.wasm b/internal/integration_test/fuzzcases/testdata/1823.wasm new file mode 100644 index 0000000000..42fc5fb2ed Binary files /dev/null and b/internal/integration_test/fuzzcases/testdata/1823.wasm differ diff --git a/internal/integration_test/fuzzcases/testdata/1823.wat b/internal/integration_test/fuzzcases/testdata/1823.wat new file mode 100644 index 0000000000..940c1bcd0c --- /dev/null +++ b/internal/integration_test/fuzzcases/testdata/1823.wat @@ -0,0 +1,168 @@ +(module + (func (;1;) (result v128 v128 f64 v128 v128 v128 i32 v128 v128 v128 v128 v128) + v128.const i32x4 0xffffffff 0x0000ffff 0xfefff700 0xffffffff + v128.const i32x4 0xffffffff 0xffffffff 0x24ffffff 0x10108240 + f64.const -0x1.3e3e3e3e3e3e3p+575 (;=-153732818170537500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000;) + v128.const i32x4 0x02ffe3e3 0x00e3ff00 0x5151ffff 0x391b5151 + v128.const i32x4 0xe3e34000 0xff10e3e3 0xffff3aff 0x1082f1ff + v128.const i32x4 0x45ffffff 0x103735c2 0x243a0010 0x0000517f + i32.const 0 + v128.const i32x4 0xffffff00 0xffffff01 0xffffffff 0x505050ff + v128.const i32x4 0x00001071 0x00ffffff 0x00000000 0x50505050 + v128.const i32x4 0x50505050 0xff51514e 0xd60001ff 0x10505050 + v128.const i32x4 0x50505050 0x50105050 0x4e505010 0x50105151 + v128.const i32x4 0x50505050 0xffdc1050 0xffffffff 0x000000ff + ) + (func (;4;) + (local i32) + f64.const 0 + call 0 + i32x4.ge_u + f64x2.pmin + i16x8.narrow_i32x4_s + f64x2.pmin + memory.size + call 0 + v128.any_true + call 0 + v128.any_true + call 0 + i16x8.gt_s + v128.bitselect + global.get 0 + v128.xor + global.set 0 + global.get 0 + v128.xor + global.set 0 + global.get 1 + i32.xor + global.set 1 + global.get 0 + v128.xor + global.set 0 + global.get 0 + v128.xor + global.set 0 + global.get 0 + v128.xor + global.set 0 + i64.reinterpret_f64 + global.get 2 + i64.xor + global.set 2 + global.get 0 + v128.xor + global.set 0 + global.get 0 + v128.xor + global.set 0 + global.get 1 + i32.xor + global.set 1 + global.get 0 + v128.xor + global.set 0 + global.get 0 + v128.xor + global.set 0 + global.get 0 + v128.xor + global.set 0 + global.get 0 + v128.xor + global.set 0 + global.get 1 + i32.xor + global.set 1 + global.get 0 + v128.xor + global.set 0 + global.get 0 + v128.xor + global.set 0 + global.get 0 + v128.xor + global.set 0 + i64.reinterpret_f64 + global.get 2 + i64.xor + global.set 2 + global.get 0 + v128.xor + global.set 0 + global.get 0 + v128.xor + global.set 0 + global.get 1 + i32.xor + global.set 1 + global.get 0 + v128.xor + global.set 0 + global.get 0 + v128.xor + global.set 0 + global.get 0 + v128.xor + global.set 0 + global.get 0 + v128.xor + global.set 0 + global.get 1 + i32.xor + global.set 1 + drop + global.get 0 + v128.xor + global.set 0 + global.get 0 + v128.xor + global.set 0 + i64.reinterpret_f64 + global.get 2 + i64.xor + global.set 2 + global.get 0 + v128.xor + global.set 0 + global.get 0 + v128.xor + global.set 0 + global.get 1 + i32.xor + global.set 1 + global.get 0 + v128.xor + global.set 0 + global.get 1 + i32.xor + global.set 1 + global.get 0 + v128.xor + global.set 0 + drop + global.get 0 + v128.xor + global.set 0 + i64.reinterpret_f64 + global.get 2 + i64.xor + global.set 2 + global.get 0 + v128.xor + global.set 0 + global.get 0 + v128.xor + global.set 0 + drop + ) + (table (;0;) 1000 1000 externref) + (table (;1;) 1000 1000 externref) + (memory (;0;) 0 0) + (global (;0;) (mut v128) v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + (global (;1;) (mut i32) i32.const 0) + (global (;2;) (mut i64) i64.const 0) + (global (;3;) (mut i32) i32.const 1000) + (export "" (func 1)) +)