diff --git a/CMakeLists.txt b/CMakeLists.txt index d0abfdba6..62c48107b 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1007,5 +1007,10 @@ add_test(sse4_2 ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX86} -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests/test25 -D TEST_OUTPUT=tmpfile25.txt -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests/ref25.txt -P ${CMAKE_SOURCE_DIR}/runTest.cmake ) + +add_test(fpu_rounding ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX86} + -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests/test26 -D TEST_OUTPUT=tmpfile26.txt + -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests/ref26.txt + -P ${CMAKE_SOURCE_DIR}/runTest.cmake ) endif(BOX86LIB) diff --git a/src/dynarec/arm_emitter.h b/src/dynarec/arm_emitter.h index dde75b2ca..9e5244cbf 100755 --- a/src/dynarec/arm_emitter.h +++ b/src/dynarec/arm_emitter.h @@ -192,6 +192,9 @@ Op is 20-27 // and dst, src1, #imm ror rot*2 #define AND_IMM8_ROR(dst, src, imm8, rot) \ EMIT(0xe2000000 | ((dst) << 12) | ((src) << 16) | ((rot)<<8) | brIMM(imm8) ) +// and.c dst, src, #(imm8) +#define AND_IMM8_COND(cond, dst, src, imm8) \ + EMIT((cond) | 0x02000000 | ((dst) << 12) | ((src) << 16) | brIMM(imm8) ) // and.s dst, src, #(imm8) #define ANDS_IMM8(dst, src, imm8) \ EMIT(0xe2100000 | ((dst) << 12) | ((src) << 16) | brIMM(imm8) ) diff --git a/src/dynarec/dynarec_arm_d8.c b/src/dynarec/dynarec_arm_d8.c index 7cb6844aa..830c88246 100755 --- a/src/dynarec/dynarec_arm_d8.c +++ b/src/dynarec/dynarec_arm_d8.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -33,6 +34,7 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, int d1; int fixedaddress; int parity; + uint8_t u8; MAYUSE(d1); MAYUSE(s0); @@ -52,11 +54,13 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FADD ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VADD_F32(v1, v1, v2); } else { VADD_F64(v1, v1, v2); } + x87_restoreround(dyn, ninst, u8); break; case 0xC8: case 0xC9: @@ -69,11 +73,13 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FMUL ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VMUL_F32(v1, v1, v2); } else { VMUL_F64(v1, v1, v2); } + x87_restoreround(dyn, ninst, u8); break; case 0xD0: case 0xD1: @@ -123,11 +129,13 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FSUB ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VSUB_F32(v1, v1, v2); } else { VSUB_F64(v1, v1, v2); } + x87_restoreround(dyn, ninst, u8); break; case 0xE8: case 0xE9: @@ -140,11 +148,13 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FSUBR ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VSUB_F32(v1, v2, v1); } else { VSUB_F64(v1, v2, v1); } + x87_restoreround(dyn, ninst, u8); break; case 0xF0: case 0xF1: @@ -157,11 +167,13 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FDIV ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VDIV_F32(v1, v1, v2); } else { VDIV_F64(v1, v1, v2); } + x87_restoreround(dyn, ninst, u8); break; case 0xF8: case 0xF9: @@ -174,11 +186,13 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FDIVR ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VDIV_F32(v1, v2, v1); } else { VDIV_F64(v1, v2, v1); } + x87_restoreround(dyn, ninst, u8); break; default: @@ -196,12 +210,14 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, GETED; VMOVtoV(s0, ed); } + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VADD_F32(v1, v1, s0); } else { VCVT_F64_F32(d1, s0); VADD_F64(v1, v1, d1); } + x87_restoreround(dyn, ninst, u8); break; case 1: INST_NAME("FMUL ST0, float[ED]"); @@ -216,12 +232,14 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, GETED; VMOVtoV(s0, ed); } + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VMUL_F32(v1, v1, s0); } else { VCVT_F64_F32(d1, s0); VMUL_F64(v1, v1, d1); } + x87_restoreround(dyn, ninst, u8); break; case 2: INST_NAME("FCOM ST0, float[ED]"); @@ -279,12 +297,14 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, GETED; VMOVtoV(s0, ed); } + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VSUB_F32(v1, v1, s0); } else { VCVT_F64_F32(d1, s0); VSUB_F64(v1, v1, d1); } + x87_restoreround(dyn, ninst, u8); break; case 5: INST_NAME("FSUBR ST0, float[ED]"); @@ -299,12 +319,14 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, GETED; VMOVtoV(s0, ed); } + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VSUB_F32(v1, s0, v1); } else { VCVT_F64_F32(d1, s0); VSUB_F64(v1, d1, v1); } + x87_restoreround(dyn, ninst, u8); break; case 6: INST_NAME("FDIV ST0, float[ED]"); @@ -319,12 +341,14 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, GETED; VMOVtoV(s0, ed); } + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VDIV_F32(v1, v1, s0); } else { VCVT_F64_F32(d1, s0); VDIV_F64(v1, v1, d1); } + x87_restoreround(dyn, ninst, u8); break; case 7: INST_NAME("FDIVR ST0, float[ED]"); @@ -339,12 +363,14 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, GETED; VMOVtoV(s0, ed); } + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VDIV_F32(v1, s0, v1); } else { VCVT_F64_F32(d1, s0); VDIV_F64(v1, d1, v1); } + x87_restoreround(dyn, ninst, u8); break; default: DEFAULT; diff --git a/src/dynarec/dynarec_arm_d9.c b/src/dynarec/dynarec_arm_d9.c index f54f3cec9..8f7a688d2 100755 --- a/src/dynarec/dynarec_arm_d9.c +++ b/src/dynarec/dynarec_arm_d9.c @@ -281,19 +281,42 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, CALL(arm_f2xm1, -1, 0); #else v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); - //if(ST0.d!=0.0) - // ST0.d = exp2(ST0.d)-1.0; - VMOV_64(0, v1); - CALL_1D(exp2, 0); // return is d0 + VMRS_APSR(); + B_NEXT(cEQ); + LDRH_IMM8(x1, xEmu, offsetof(x86emu_t, cw)); // hopefully cw is not too far for an imm8 + UBFX(x1, x1, 10, 2); // extract round... + UBFX(x2, x1, 1, 1); // swap bits 0 and 1 + BFI(x2, x1, 1, 1); + VMRS(x14); // get fpscr + MOV_REG(x3, x14); if((PK(0)==0xD9 && PK(1)==0xE8) && // next inst is FLD1 (PK(2)==0xDE && PK(3)==0xC1)) { MESSAGE(LOG_DUMP, "Hack for fld1 / faddp st1, st0\n"); + VCMP_F64_0(v1); + B_MARK(cGE); // if ST0 < 0 and if the rounding mode is toward 0, then use upward + TSTS_IMM8(x2, 0b01); + AND_IMM8_COND(cNE, x2, x2, 0b01); // 11 (TOWARDZERO) -> 01 (UPWARD), 01 -> 01 + MARK; + BFI(x3, x2, 22, 2); // inject new round + VMSR(x3); + + VMOV_64(0, v1); + CALL_1D(exp2, 1 << x14); // return is d0 VMOV_64(v1, 0); addr+=4; } else { - VMOV_i_64(v1, 0b01110000); // 1.0 - VSUB_F64(v1, 0, v1); + BFI(x3, x2, 22, 2); // inject new round + VMSR(x3); // put new fpscr + + //ST0.d = expm1(LN2 * ST0.d); + MOV32(x2, (&d_ln2)); + VLDR_64(0, x2, 0); + VMUL_F64(0, 0, v1); + x87_setround(dyn, ninst, x1, x2, -1); + CALL_1D(expm1, 1 << x14); // return is d0 + VMOV_64(v1, 0); } + VMSR(x14); #endif // should set C1 to 0 break; @@ -301,9 +324,33 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FYL2X"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); v2 = x87_get_st(dyn, ninst, x1, x2, 1, NEON_CACHE_ST_D); + + LDRH_IMM8(x1, xEmu, offsetof(x86emu_t, cw)); // hopefully cw is not too far for an imm8 + UBFX(x1, x1, 10, 2); // extract round... + VCMP_F64_0(v2); + VMRS_APSR(); + B_MARK(cLT); // if ST1.d < 0 then don't swap bits 0 and 1 + BFI(x1, x1, 2, 1); // if ST1.d >= 0 then swap bits 0 and 1 + UBFX(x1, x1, 1, 2); + MARK; + s0 = fpu_get_scratch_double(dyn); + VMOV_i_64(s0, 0b01110000); // = 1.0 + VCMP_F64(v1, s0); + VMRS_APSR(); + B_MARK2(cGE); // if ST0 < 1 and if the rounding mode is toward 0, then use upward + TSTS_IMM8(x1, 0b01); + AND_IMM8_COND(cNE, x1, x1, 0b01); // 11 (TOWARDZERO) -> 01 (UPWARD), 01 -> 01 + MARK2; + VMRS(x14); // get fpscr + MOV_REG(x3, x14); + BFI(x3, x1, 22, 2); // inject new round + VMSR(x3); // put new fpscr + VMOV_64(0, v1); // prepare call to log2 - CALL_1D(log2, 0); + CALL_1D(log2, 1 << x14); + x87_setround(dyn, ninst, x1, x2, -1); VMUL_F64(v2, v2, 0); //ST(1).d = log2(ST0.d)*ST(1).d + VMSR(x14); x87_do_pop(dyn, ninst, x3); // should set C1 to 0 break; @@ -311,9 +358,12 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FPTAN"); v2 = x87_do_push(dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F); v1 = x87_get_st(dyn, ninst, x1, x2, 1, NEON_CACHE_ST_D); + // seems that tan of glib doesn't follow the rounding direction mode + //u8 = x87_setround(dyn, ninst, x1, x2, x14); VMOV_64(0, v1); // prepare call to tan CALL_1D(tan, 0); VMOV_64(v1, 0); + //x87_restoreround(dyn, ninst, u8); //emu->sw.f.F87_C2 = 0; //emu->sw.f.F87_C1 = 0; LDRH_IMM8(x1, xEmu, offsetof(x86emu_t, sw)); @@ -338,10 +388,12 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FPATAN"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); v2 = x87_get_st(dyn, ninst, x1, x2, 1, NEON_CACHE_ST_D); + u8 = x87_setround(dyn, ninst, x1, x2, x14); VMOV_64(0, v2); // prepare call to atan2 VMOV_64(1, v1); - CALL_2D(atan2, 0); + CALL_2D(atan2, 1 << u8); VMOV_64(v2, 0); //ST(1).d = atan2(ST1.d, ST0.d); + x87_restoreround(dyn, ninst, u8); x87_do_pop(dyn, ninst, x3); // should set C1 to 0 break; @@ -449,32 +501,55 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FYL2XP1"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); v2 = x87_get_st(dyn, ninst, x1, x2, 1, NEON_CACHE_ST_D); - VMOV_i_64(0, 0b01110000); // D0 = 1.0 - VADD_F64(0, 0, v1); // prepare call to log2 - CALL_1D(log2, 0); - VMUL_F64(v2, v2, 0); //ST(1).d = log2(ST0.d + 1.0)*ST(1).d; + LDRH_IMM8(x1, xEmu, offsetof(x86emu_t, cw)); // hopefully cw is not too far for an imm8 + UBFX(x1, x1, 10, 2); // extract round... + VCMP_F64_0(v2); + VMRS_APSR(); + B_MARK(cLT); // if ST1.d < 0 then don't swap bits 0 and 1 + BFI(x1, x1, 2, 1); // if ST1.d >= 0 then swap bits 0 and 1 + UBFX(x1, x1, 1, 2); + MARK; + VMRS(x14); // get fpscr + MOV_REG(x3, x14); + BFI(x3, x1, 22, 2); // inject new round + VMSR(x3); // put new fpscr + + //ST(1).d = (ST(1).d * log1p(ST0.d)) / M_LN2; + VMOV_64(0, v1); // prepare call to log1p + CALL_1D(log1p, 1 << x14); + x87_setround(dyn, ninst, x1, x2, -1); + VMUL_F64(v2, v2, 0); + MOV32(x2, (&d_ln2)); + VLDR_64(0, x2, 0); + VDIV_F64(v2, v2, 0); + VMSR(x14); x87_do_pop(dyn, ninst, x3); // should set C1 to 0 break; case 0xFA: INST_NAME("FSQRT"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VSQRT_F32(v1, v1); } else { VSQRT_F64(v1, v1); } + x87_restoreround(dyn, ninst, u8); // should set C1 to 0 break; case 0xFB: INST_NAME("FSINCOS"); v2 = x87_do_push(dyn, ninst, x3, NEON_CACHE_ST_D); v1 = x87_get_st(dyn, ninst, x1, x2, 1, NEON_CACHE_ST_D); + // seems that sin and cos function of glibc don't follow the rounding mode + //u8 = x87_setround(dyn, ninst, x1, x2, x14); VMOV_64(0, v1); CALL_1D(sin, 0); VSWP(v1, 0); CALL_1D(cos, 0); // would it be faster to do sqrt(1-sin()²) ??? VMOV_64(v2, 0); + //x87_restoreround(dyn, ninst, u8); //emu->sw.f.F87_C2 = 0; C1 too LDRH_IMM8(x1, xEmu, offsetof(x86emu_t, sw)); BFC(x1, 9, 2); //C2 C1 = 0 0 @@ -509,26 +584,28 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); v2 = x87_get_st(dyn, ninst, x1, x2, 1, NEON_CACHE_ST_D); //if(ST0.d!=0.0) - // ST0.d *= exp2(trunc(ST1.d)); - VCMP_F64_0(v1); - VMRS_APSR(); - B_NEXT(cEQ); - if(!arm_v8) { - VMOV_64(0, v2); - CALL_1DD(trunc, exp2, 0); - } else { - VRINTZ_F64(0, v2); - CALL_1D(exp2, 0); - } - VMUL_F64(v1, v1, 0); + // ST0.d = ldexp(ST0.d, trunc(ST1.d)); + u8 = x87_setround(dyn, ninst, x1, x2, x14); + s0 = fpu_get_scratch_single(dyn); + // value of s0 = + // 2^31-1 (ST1 >= 2^31), -2^31 (ST1 < -2^31) or int(ST1) (other situations) + VCVT_S32_F64(s0 , v2); + VMOVfrV(x2, s0); + VMOV_64(0, v1); + CALL_1DDR(ldexp, x2, x3, 1 << u8); + VMOV_64(v1, 0); + x87_restoreround(dyn, ninst, u8); // should set C1 to 0 break; case 0xFE: INST_NAME("FSIN"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); + // seems that sin of glib doesn't follow the rounding direction mode + //u8 = x87_setround(dyn, ninst, x1, x2, x14); VMOV_64(0, v1); // prepare call to sin CALL_1D(sin, 0); VMOV_64(v1, 0); + //x87_restoreround(dyn, ninst, u8); //emu->sw.f.F87_C2 = 0; C1 too LDRH_IMM8(x1, xEmu, offsetof(x86emu_t, sw)); BFC(x1, 9, 2); //C2 C1 = 0 0 @@ -537,9 +614,12 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, case 0xFF: INST_NAME("FCOS"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); + // seems that cos of glib doesn't follow the rounding direction mode + //u8 = x87_setround(dyn, ninst, x1, x2, x14); VMOV_64(0, v1); // prepare call to cos CALL_1D(cos, 0); VMOV_64(v1, 0); + //x87_restoreround(dyn, ninst, u8); //emu->sw.f.F87_C2 = 0; C1 too LDRH_IMM8(x1, xEmu, offsetof(x86emu_t, sw)); BFC(x1, 9, 2); //C2 C1 = 0 0 @@ -598,7 +678,9 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, s0 = v1; else { s0 = fpu_get_scratch_single(dyn); + u8 = x87_setround(dyn, ninst, x1, x2, x14); VCVT_F32_F64(s0, v1); + x87_restoreround(dyn, ninst, u8); } parity = getedparity(dyn, ninst, addr, nextop, 2); if(parity) { @@ -617,7 +699,9 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, s0 = v1; else { s0 = fpu_get_scratch_single(dyn); + u8 = x87_setround(dyn, ninst, x1, x2, x14); VCVT_F32_F64(s0, v1); + x87_restoreround(dyn, ninst, u8); } parity = getedparity(dyn, ninst, addr, nextop, 2); if(parity) { diff --git a/src/dynarec/dynarec_arm_da.c b/src/dynarec/dynarec_arm_da.c index 3a4f44d8d..971f5dd3d 100755 --- a/src/dynarec/dynarec_arm_da.c +++ b/src/dynarec/dynarec_arm_da.c @@ -33,6 +33,7 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, int d0; int s0; int fixedaddress; + uint8_t u8; MAYUSE(s0); MAYUSE(d0); @@ -155,7 +156,9 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, s0 = fpu_get_scratch_single(dyn); VMOVtoV(s0, ed); VCVT_F64_S32(d0, s0); + u8 = x87_setround(dyn, ninst, x1, x2, x14); VADD_F64(v1, v1, d0); + x87_restoreround(dyn, ninst, u8); break; case 1: INST_NAME("FIMUL ST0, Ed"); @@ -165,7 +168,9 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, s0 = fpu_get_scratch_single(dyn); VMOVtoV(s0, ed); VCVT_F64_S32(d0, s0); + u8 = x87_setround(dyn, ninst, x1, x2, x14); VMUL_F64(v1, v1, d0); + x87_restoreround(dyn, ninst, u8); break; case 2: INST_NAME("FICOM ST0, Ed"); @@ -198,7 +203,9 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, s0 = fpu_get_scratch_single(dyn); VMOVtoV(s0, ed); VCVT_F64_S32(d0, s0); + u8 = x87_setround(dyn, ninst, x1, x2, x14); VSUB_F64(v1, v1, d0); + x87_restoreround(dyn, ninst, u8); break; case 5: INST_NAME("FISUBR ST0, Ed"); @@ -208,7 +215,9 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, s0 = fpu_get_scratch_single(dyn); VMOVtoV(s0, ed); VCVT_F64_S32(d0, s0); + u8 = x87_setround(dyn, ninst, x1, x2, x14); VSUB_F64(v1, d0, v1); + x87_restoreround(dyn, ninst, u8); break; case 6: INST_NAME("FIDIV ST0, Ed"); @@ -218,7 +227,9 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, s0 = fpu_get_scratch_single(dyn); VMOVtoV(s0, ed); VCVT_F64_S32(d0, s0); + u8 = x87_setround(dyn, ninst, x1, x2, x14); VDIV_F64(v1, v1, d0); + x87_restoreround(dyn, ninst, u8); break; case 7: INST_NAME("FIDIVR ST0, Ed"); @@ -228,7 +239,9 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, s0 = fpu_get_scratch_single(dyn); VMOVtoV(s0, ed); VCVT_F64_S32(d0, s0); + u8 = x87_setround(dyn, ninst, x1, x2, x14); VDIV_F64(v1, d0, v1); + x87_restoreround(dyn, ninst, u8); break; } } diff --git a/src/dynarec/dynarec_arm_dc.c b/src/dynarec/dynarec_arm_dc.c index baef4f6b3..4ec31e7d0 100755 --- a/src/dynarec/dynarec_arm_dc.c +++ b/src/dynarec/dynarec_arm_dc.c @@ -31,6 +31,7 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, int d1; int fixedaddress; int parity; + uint8_t u8; MAYUSE(d1); MAYUSE(v2); @@ -48,11 +49,13 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FADD STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VADD_F32(v1, v1, v2); } else { VADD_F64(v1, v1, v2); } + x87_restoreround(dyn, ninst, u8); break; case 0xC8: case 0xC9: @@ -65,11 +68,13 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FMUL STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VMUL_F32(v1, v1, v2); } else { VMUL_F64(v1, v1, v2); } + x87_restoreround(dyn, ninst, u8); break; case 0xD0: case 0xD1: @@ -119,11 +124,13 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FSUBR STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VSUB_F32(v1, v2, v1); } else { VSUB_F64(v1, v2, v1); } + x87_restoreround(dyn, ninst, u8); break; case 0xE8: case 0xE9: @@ -136,11 +143,13 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FSUB STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VSUB_F32(v1, v1, v2); } else { VSUB_F64(v1, v1, v2); } + x87_restoreround(dyn, ninst, u8); break; case 0xF0: case 0xF1: @@ -153,11 +162,13 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FDIVR STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VDIV_F32(v1, v2, v1); } else { VDIV_F64(v1, v2, v1); } + x87_restoreround(dyn, ninst, u8); break; case 0xF8: case 0xF9: @@ -170,11 +181,13 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FDIV STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VDIV_F32(v1, v1, v2); } else { VDIV_F64(v1, v1, v2); } + x87_restoreround(dyn, ninst, u8); break; default: switch((nextop>>3)&7) { @@ -192,7 +205,9 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, LDR_IMM9(x3, wback, fixedaddress+4); VMOVtoV_D(d1, x2, x3); } + u8 = x87_setround(dyn, ninst, x1, x2, x14); VADD_F64(v1, v1, d1); + x87_restoreround(dyn, ninst, u8); break; case 1: INST_NAME("FMUL ST0, double[ED]"); @@ -208,7 +223,9 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, LDR_IMM9(x3, wback, fixedaddress+4); VMOVtoV_D(d1, x2, x3); } + u8 = x87_setround(dyn, ninst, x1, x2, x14); VMUL_F64(v1, v1, d1); + x87_restoreround(dyn, ninst, u8); break; case 2: INST_NAME("FCOM ST0, double[ED]"); @@ -259,7 +276,9 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, LDR_IMM9(x3, wback, fixedaddress+4); VMOVtoV_D(d1, x2, x3); } + u8 = x87_setround(dyn, ninst, x1, x2, x14); VSUB_F64(v1, v1, d1); + x87_restoreround(dyn, ninst, u8); break; case 5: INST_NAME("FSUBR ST0, double[ED]"); @@ -275,7 +294,9 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, LDR_IMM9(x3, wback, fixedaddress+4); VMOVtoV_D(d1, x2, x3); } + u8 = x87_setround(dyn, ninst, x1, x2, x14); VSUB_F64(v1, d1, v1); + x87_restoreround(dyn, ninst, u8); break; case 6: INST_NAME("FDIV ST0, double[ED]"); @@ -291,7 +312,9 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, LDR_IMM9(x3, wback, fixedaddress+4); VMOVtoV_D(d1, x2, x3); } + u8 = x87_setround(dyn, ninst, x1, x2, x14); VDIV_F64(v1, v1, d1); + x87_restoreround(dyn, ninst, u8); break; case 7: INST_NAME("FDIVR ST0, double[ED]"); @@ -307,7 +330,9 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, LDR_IMM9(x3, wback, fixedaddress+4); VMOVtoV_D(d1, x2, x3); } + u8 = x87_setround(dyn, ninst, x1, x2, x14); VDIV_F64(v1, d1, v1); + x87_restoreround(dyn, ninst, u8); break; } } diff --git a/src/dynarec/dynarec_arm_de.c b/src/dynarec/dynarec_arm_de.c index 28d169e06..7114debf7 100755 --- a/src/dynarec/dynarec_arm_de.c +++ b/src/dynarec/dynarec_arm_de.c @@ -27,6 +27,7 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, { uint8_t nextop = F8; int v1, v2; + uint8_t u8; MAYUSE(v2); MAYUSE(v1); @@ -43,11 +44,13 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FADDP STx, ST0"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VADD_F32(v2, v2, v1); } else { VADD_F64(v2, v2, v1); } + x87_restoreround(dyn, ninst, u8); x87_do_pop(dyn, ninst, x3); break; case 0xC8: @@ -61,11 +64,13 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FMULP STx, ST0"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VMUL_F32(v2, v2, v1); } else { VMUL_F64(v2, v2, v1); } + x87_restoreround(dyn, ninst, u8); x87_do_pop(dyn, ninst, x3); break; case 0xD0: @@ -112,11 +117,13 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FSUBRP STx, ST0"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VSUB_F32(v2, v1, v2); } else { VSUB_F64(v2, v1, v2); } + x87_restoreround(dyn, ninst, u8); x87_do_pop(dyn, ninst, x3); break; case 0xE8: @@ -130,11 +137,13 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FSUBP STx, ST0"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VSUB_F32(v2, v2, v1); } else { VSUB_F64(v2, v2, v1); } + x87_restoreround(dyn, ninst, u8); x87_do_pop(dyn, ninst, x3); break; case 0xF0: @@ -148,11 +157,13 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FDIVRP STx, ST0"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VDIV_F32(v2, v1, v2); } else { VDIV_F64(v2, v1, v2); } + x87_restoreround(dyn, ninst, u8); x87_do_pop(dyn, ninst, x3); break; case 0xF8: @@ -166,12 +177,17 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FDIVP STx, ST0"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + VMRS(x14); // get fpscr if(!box86_dynarec_fastnan) { - VMRS(x14); // get fpscr ORR_IMM8(x3, x14, 0b010, 9); // enable exceptions BIC_IMM8(x3, x3, 0b10011111, 0); - VMSR(x3); - } + } else MOV_REG(x3, x14); + LDRH_IMM8(x1, xEmu, offsetof(x86emu_t, cw)); // hopefully cw is not too far for an imm8 + UBFX(x1, x1, 10, 2); // extract round... + UBFX(x2, x1, 1, 1); // swap bits 0 and 1 + BFI(x2, x1, 1, 1); + BFI(x3, x2, 22, 2); // inject new round + VMSR(x3); // put new fpscr if(ST_IS_F(0)) { VDIV_F32(v2, v2, v1); } else { @@ -185,8 +201,8 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, } else { VNEG_F64_cond(cNE, v2, v2); } - VMSR(x14); // restore fpscr } + VMSR(x14); // restore fpscr x87_do_pop(dyn, ninst, x3); break; diff --git a/src/dynarec/dynarec_arm_helper.c b/src/dynarec/dynarec_arm_helper.c index 52c813f2d..2618db486 100755 --- a/src/dynarec/dynarec_arm_helper.c +++ b/src/dynarec/dynarec_arm_helper.c @@ -498,6 +498,55 @@ void call_d(dynarec_arm_t* dyn, int ninst, void* fnc, void* fnc2, int n, int reg } SET_NODF(); } +// call a function with 1 double arg (taking care of the SOFTFP / HARD call) and 1 non-float arg that return a double +void call_ddr(dynarec_arm_t* dyn, int ninst, void* fnc, void* fnc2, int arg, int reg, int ret, uint32_t mask, int saveflags) +{ + if(ret!=-2 && !mask) { + // ARM ABI require the stack to be 8-bytes aligned! + // so, if no mask asked, add one to stay 8-bytes aligned + if(ret!=xFlags) mask=1< r0:r1 + MOV_REG(2, arg); + #else + MOV_REG(0, arg); + #endif + MOV32(reg, (uintptr_t)fnc); + BLX(reg); + if(fnc2) { + #ifdef ARM_SOFTFP + // result are already in r0:r1 for next call + #endif + MOV32(reg, (uintptr_t)fnc2); + BLX(reg); + } + #ifdef ARM_SOFTFP + if(n!=1) { + POP(xSP, (1<<2) | (1<<3)); + } + VMOVtoV_64(0, 0, 1); // load r0:r1 to D0 to simulate hardfo + #endif + fpu_popcache(dyn, ninst, reg); + if(ret>=0) { + MOV_REG(ret, 0); + } + if(ret!=-2) { + POP(xSP, (1<= 0) + MOV_REG(s3, s1); BFI(s1, s2, 22, 2); // inject new round VMSR(s1); // put new fpscr return s3; diff --git a/src/dynarec/dynarec_arm_helper.h b/src/dynarec/dynarec_arm_helper.h index e8ea15159..7068d555e 100755 --- a/src/dynarec/dynarec_arm_helper.h +++ b/src/dynarec/dynarec_arm_helper.h @@ -262,8 +262,10 @@ #define CALL_1DD(F, F2, M) call_d(dyn, ninst, F, F2, 1, x3, -1, M, 0) // CALL_1D_U64 will use S as scratch. Return value in ret/ret2, 1 ARG in D0 #define CALL_1DR_U64(R, ret, ret2, S, M) call_dr(dyn, ninst, R, 1, S, ret, ret2, M, 1) -// CALL_1D will use S as scratch. Return value in D0, 1 ARG in D0 +// CALL_1DR will use S as scratch. Return value in D0, 1 ARG in D0 #define CALL_1DR(R, S, M) call_dr(dyn, ninst, R, 1, S, -1, -1, M, 0) +// CALL_1DI will use S as scratch. Return value in D0, 1 ARG in D0, 1 ARG in R0 +#define CALL_1DDR(F, R, S, M) call_ddr(dyn, ninst, F, NULL, R, S, -1, M, 0) // CALL_1RD will use S as scratch. Return value in D0, 1 ARG in R #define CALL_1RD(F, R, S, M) call_rd(dyn, ninst, F, R, S, M, 0); @@ -463,6 +465,7 @@ void* arm_next(x86emu_t* emu, uintptr_t addr); #define iret_to_epilog STEPNAME(iret_to_epilog_) #define call_c STEPNAME(call_c_) #define call_d STEPNAME(call_d_) +#define call_ddr STEPNAME(call_ddr_) #define call_dr STEPNAME(call_dr_) #define call_rd STEPNAME(call_rd_) #define grab_fsdata STEPNAME(grab_fsdata_) @@ -616,6 +619,7 @@ void iret_to_epilog(dynarec_arm_t* dyn, int ninst); void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, uint32_t mask, int saveflags); void call_d(dynarec_arm_t* dyn, int ninst, void* fnc, void* fnc2, int n, int reg, int ret, uint32_t mask, int saveflags); void call_dr(dynarec_arm_t* dyn, int ninst, int reg, int n, int s1, int ret, int ret2, uint32_t mask, int saveflags); +void call_ddr(dynarec_arm_t* dyn, int ninst, void* fnc, void* fnc2, int arg, int reg, int ret, uint32_t mask, int saveflags); void call_rd(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int s1, uint32_t mask, int saveflags); void grab_fsdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg); void grab_tlsdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg); diff --git a/src/emu/x86rund8.c b/src/emu/x86rund8.c index 944f5df3a..392b74d53 100755 --- a/src/emu/x86rund8.c +++ b/src/emu/x86rund8.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -14,6 +15,7 @@ #include "x86primop.h" #include "x86trace.h" #include "box86context.h" +#include "setround.h" #include "modrm.h" @@ -32,6 +34,7 @@ uintptr_t RunD8(x86emu_t *emu, uintptr_t addr) #ifdef TEST_INTERPRETER x86emu_t*emu = test->emu; #endif + int oldround = setround(emu); nextop = F8; switch (nextop) { @@ -192,9 +195,11 @@ uintptr_t RunD8(x86emu_t *emu, uintptr_t addr) } break; default: + fesetround(oldround); return 0; } } - return addr; + fesetround(oldround); + return addr; } #pragma GCC diagnostic pop diff --git a/src/emu/x86rund9.c b/src/emu/x86rund9.c index 753aa4421..87e74158c 100755 --- a/src/emu/x86rund9.c +++ b/src/emu/x86rund9.c @@ -1,4 +1,5 @@ #define _GNU_SOURCE +#include #include #include #include @@ -7,6 +8,7 @@ #include "debug.h" #include "box86stack.h" +#include "setround.h" #include "x86emu.h" #include "x86run.h" #include "x86emu_private.h" @@ -33,6 +35,7 @@ uintptr_t RunD9(x86emu_t *emu, uintptr_t addr) x86emu_t*emu = test->emu; #endif + int rount = setround(emu); nextop = F8; switch (nextop) { case 0xC0: @@ -110,11 +113,30 @@ uintptr_t RunD9(x86emu_t *emu, uintptr_t addr) break; case 0xF0: /* F2XM1 */ - ST0.d = exp2(ST0.d) - 1.0; + if (ST0.d == 0) + break; + // Using the expm1 instead of exp2(ST0)-1 can avoid losing precision much, + // expecially when ST0 is close to zero (which loses the precise when -1). + // printf("%a, %a\n", LN2 * ST0.d, expm1(LN2 * ST0.d)); + ST0.d = expm1(LN2 * ST0.d); + // = 2^ST0 - 1 + error. (in math) emu->sw.f.F87_C1 = 0; break; case 0xF1: /* FYL2X */ - ST(1).d *= log2(ST0.d); + if (ST1.d < 0) { + switch (emu->cw.f.C87_RD) { + case ROUND_Up: + fesetround(FE_DOWNWARD); + break; + case ROUND_Down: + fesetround(FE_UPWARD); + } + } + if (ST0.d < 1 && emu->cw.f.C87_RD == ROUND_Chop) + fesetround(FE_UPWARD); + const double log2_st0 = log2(ST0.d); + setround(emu); + ST(1).d *= log2_st0; fpu_do_pop(emu); emu->sw.f.F87_C1 = 0; break; @@ -187,7 +209,21 @@ uintptr_t RunD9(x86emu_t *emu, uintptr_t addr) emu->top=(emu->top+1)&7; // this will probably break a few things break; case 0xF9: /* FYL2XP1 */ - ST(1).d *= log2(ST0.d + 1.0); + if (ST1.d < 0) { + switch (emu->cw.f.C87_RD) { + case ROUND_Up: + fesetround(FE_DOWNWARD); + break; + case ROUND_Down: + fesetround(FE_UPWARD); + } + } + // Using the log1p instead of log2(ST0+1) can avoid losing precision much, + // expecially when ST0 is close to zero (which loses the precise when +1). + const double log1p_st0 = log1p(ST0.d); + setround(emu); + ST(1).d = (ST(1).d * log1p_st0) / M_LN2; + // = ST1 * log2(ST0 + 1) + error. (in math) fpu_do_pop(emu); emu->sw.f.F87_C1 = 0; break; @@ -206,9 +242,14 @@ uintptr_t RunD9(x86emu_t *emu, uintptr_t addr) emu->sw.f.F87_C1 = 0; break; case 0xFD: /* FSCALE */ - // this could probably be done by just altering the exponant part of the float... + if (ST1.d > INT32_MAX) + tmp32s = INT32_MAX; + else if (ST1.d < INT32_MIN) + tmp32s = INT32_MIN; + else + tmp32s = ST1.d; if(ST0.d!=0.0) - ST0.d *= exp2(trunc(ST1.d)); + ST0.d = ldexp(ST0.d, tmp32s); emu->sw.f.F87_C1 = 0; break; case 0xFE: /* FSIN */ @@ -241,6 +282,7 @@ uintptr_t RunD9(x86emu_t *emu, uintptr_t addr) case 0xE6: case 0xE7: case 0xEF: + fesetround(rount); return 0; default: switch((nextop>>3)&7) { @@ -300,8 +342,10 @@ uintptr_t RunD9(x86emu_t *emu, uintptr_t addr) EW->word[0] = emu->cw.x16; break; default: + fesetround(rount); return 0; } } - return addr; + fesetround(rount); + return addr; } diff --git a/src/emu/x86runda.c b/src/emu/x86runda.c index c797c96f6..d91721481 100755 --- a/src/emu/x86runda.c +++ b/src/emu/x86runda.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -14,6 +15,7 @@ #include "x86primop.h" #include "x86trace.h" #include "box86context.h" +#include "setround.h" #include "modrm.h" @@ -24,7 +26,6 @@ uintptr_t RunDA(x86emu_t *emu, uintptr_t addr) #endif { uint8_t nextop; - int32_t tmp32s; int64_t ll; float f; reg32_t *oped; @@ -101,7 +102,8 @@ uintptr_t RunDA(x86emu_t *emu, uintptr_t addr) case 0xF9: case 0xFD: return 0; - default: + default:; + int oldround = setround(emu); switch((nextop>>3)&7) { case 0: /* FIADD ST0, Ed int */ GET_ED; @@ -137,6 +139,7 @@ uintptr_t RunDA(x86emu_t *emu, uintptr_t addr) ST0.d = (double)ED->sdword[0] / ST0.d; break; } + fesetround(oldround); } return addr; } \ No newline at end of file diff --git a/src/emu/x86rundc.c b/src/emu/x86rundc.c index ed1fbfb91..8c3c163e4 100755 --- a/src/emu/x86rundc.c +++ b/src/emu/x86rundc.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -14,6 +15,7 @@ #include "x86primop.h" #include "x86trace.h" #include "box86context.h" +#include "setround.h" #include "modrm.h" @@ -32,6 +34,7 @@ uintptr_t RunDC(x86emu_t *emu, uintptr_t addr) x86emu_t*emu = test->emu; #endif + int oldround = setround(emu); nextop = F8; switch(nextop) { case 0xC0: @@ -184,8 +187,10 @@ uintptr_t RunDC(x86emu_t *emu, uintptr_t addr) } break; default: + fesetround(oldround); return 0; } } + fesetround(oldround); return addr; } \ No newline at end of file diff --git a/src/emu/x86runde.c b/src/emu/x86runde.c index f44a3c694..ac6f1af7a 100755 --- a/src/emu/x86runde.c +++ b/src/emu/x86runde.c @@ -6,6 +6,7 @@ #include "debug.h" #include "box86stack.h" +#include "setround.h" #include "x86emu.h" #include "x86run.h" #include "x86emu_private.h" @@ -32,6 +33,7 @@ uintptr_t RunDE(x86emu_t *emu, uintptr_t addr) x86emu_t*emu = test->emu; #endif + int oldround = setround(emu); nextop = F8; switch (nextop) { case 0xC0: /* FADDP STx, ST0 */ @@ -158,8 +160,10 @@ uintptr_t RunDE(x86emu_t *emu, uintptr_t addr) ST0.d = (double)EW->sword[0] / ST0.d; break; default: + fesetround(oldround); return 0; } } + fesetround(oldround); return addr; } \ No newline at end of file diff --git a/src/emu/x86rundf.c b/src/emu/x86rundf.c index 45ef62e0e..431ca3b26 100755 --- a/src/emu/x86rundf.c +++ b/src/emu/x86rundf.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -6,6 +7,7 @@ #include "debug.h" #include "box86stack.h" +#include "setround.h" #include "x86emu.h" #include "x86run.h" #include "x86emu_private.h" diff --git a/src/emu/x86test.c b/src/emu/x86test.c index ea6a9515b..d6a6f1b02 100644 --- a/src/emu/x86test.c +++ b/src/emu/x86test.c @@ -75,7 +75,7 @@ void x86test_check(x86emu_t* ref, uintptr_t ip) for(int i=0; ifpu_stack; ++i) { if(ref->x87[(ref->top+i)&7].d != emu->x87[(emu->top+i)&7].d) { BANNER; - printf_log(LOG_NONE, "ST%d: %g | %g\n", i, ref->x87[(ref->top+i)&7].d, emu->x87[(emu->top+i)&7].d); + printf_log(LOG_NONE, "ST%d: %g (%a) | %g (%a)\n", i, ref->x87[(ref->top+i)&7].d, ref->x87[(ref->top+i)&7].d, emu->x87[(emu->top+i)&7].d, emu->x87[(emu->top+i)&7].d); } } } diff --git a/src/emu/x87emu_private.c b/src/emu/x87emu_private.c index 3c38f519a..a983ccc25 100755 --- a/src/emu/x87emu_private.c +++ b/src/emu/x87emu_private.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/include/setround.h b/src/include/setround.h new file mode 100644 index 000000000..ff182d7d2 --- /dev/null +++ b/src/include/setround.h @@ -0,0 +1,29 @@ +#ifndef __SETROUND_H__ +#define __SETROUND_H__ +#pragma STDC FENV_ACCESS ON +#include +#include +#include "x86emu.h" +#include "emu/x86emu_private.h" +// set the rounding mode to the emulator's one, and return the old one +inline int setround(x86emu_t* emu) { + int ret = fegetround(); + int rounding_direction; + switch (emu->cw.f.C87_RD) { + case ROUND_Nearest: + rounding_direction = FE_TONEAREST; + break; + case ROUND_Down: + rounding_direction = FE_DOWNWARD; + break; + case ROUND_Up: + rounding_direction = FE_UPWARD; + break; + case ROUND_Chop: + rounding_direction = FE_TOWARDZERO; + break; + } + fesetround(rounding_direction); + return ret; +} +#endif diff --git a/tests/ref26.txt b/tests/ref26.txt new file mode 100644 index 000000000..358877e1a --- /dev/null +++ b/tests/ref26.txt @@ -0,0 +1,810 @@ +Testing: s = (0x1.123456789abcp2) -> (double)s +FE_TONEAREST 0x1.123456789abcp+2 +FE_DOWNWARD 0x1.123456789abcp+2 +FE_UPWARD 0x1.123456789abcp+2 +FE_TOWARDZERO 0x1.123456789abcp+2 + +Testing: s = (0x1.123456789abcp2) -> (float)s +FE_TONEAREST 0x1.123456p+2 +FE_DOWNWARD 0x1.123456p+2 +FE_UPWARD 0x1.123458p+2 +FE_TOWARDZERO 0x1.123456p+2 + +Testing: s = (-(0x1.123456789abcp2)) -> (double)s +FE_TONEAREST -0x1.123456789abcp+2 +FE_DOWNWARD -0x1.123456789abcp+2 +FE_UPWARD -0x1.123456789abcp+2 +FE_TOWARDZERO -0x1.123456789abcp+2 + +Testing: s = (-(0x1.123456789abcp2)) -> (float)s +FE_TONEAREST -0x1.123456p+2 +FE_DOWNWARD -0x1.123458p+2 +FE_UPWARD -0x1.123456p+2 +FE_TOWARDZERO -0x1.123456p+2 + +Testing: d = (0x1.123456789abcp512) -> (float)d +FE_TONEAREST inf +FE_DOWNWARD 0x1.fffffep+127 +FE_UPWARD inf +FE_TOWARDZERO 0x1.fffffep+127 + +Testing: s = (0x1.123456789abcp29) -> (double)s +FE_TONEAREST 0x1.123456789abcp+29 +FE_DOWNWARD 0x1.123456789abcp+29 +FE_UPWARD 0x1.123456789abcp+29 +FE_TOWARDZERO 0x1.123456789abcp+29 + +Testing: s = (0x1.123456789abcp29) -> (float)s +FE_TONEAREST 0x1.123456p+29 +FE_DOWNWARD 0x1.123456p+29 +FE_UPWARD 0x1.123458p+29 +FE_TOWARDZERO 0x1.123456p+29 + +Testing: s = (0x1.123456789abcp29) -> (int16_t)s +FE_TONEAREST -32768 +FE_DOWNWARD -32768 +FE_UPWARD -32768 +FE_TOWARDZERO -32768 + +Testing: s = (0x1.123456789abcp29) -> (int8_t)s +FE_TONEAREST 0 +FE_DOWNWARD 0 +FE_UPWARD 0 +FE_TOWARDZERO 0 + +Testing: s = (0x1.123456789abcp29) -> (unsigned short)s +FE_TONEAREST 35535 +FE_DOWNWARD 35535 +FE_UPWARD 35535 +FE_TOWARDZERO 35535 + +Testing: s = (0x1.123456789abcp29) -> (unsigned char)s +FE_TONEAREST 0 +FE_DOWNWARD 0 +FE_UPWARD 0 +FE_TOWARDZERO 0 + +Testing: s = (-(0x1.123456789abcp29)) -> (double)s +FE_TONEAREST -0x1.123456789abcp+29 +FE_DOWNWARD -0x1.123456789abcp+29 +FE_UPWARD -0x1.123456789abcp+29 +FE_TOWARDZERO -0x1.123456789abcp+29 + +Testing: s = (-(0x1.123456789abcp29)) -> (float)s +FE_TONEAREST -0x1.123456p+29 +FE_DOWNWARD -0x1.123458p+29 +FE_UPWARD -0x1.123456p+29 +FE_TOWARDZERO -0x1.123456p+29 + +Testing: d = (-0x1.123456789abcp30) -> (int32_t)d +FE_TONEAREST -1150096798 +FE_DOWNWARD -1150096798 +FE_UPWARD -1150096798 +FE_TOWARDZERO -1150096798 + +Testing: d = (-0x1.123456789abcp62) -> (int64_t)d +FE_TONEAREST -4939628135293321216 +FE_DOWNWARD -4939628135293321216 +FE_UPWARD -4939628135293321216 +FE_TOWARDZERO -4939628135293321216 + +Testing: s = (0x1.123456789abcp2f) -> (double)s +FE_TONEAREST 0x1.123456p+2 +FE_DOWNWARD 0x1.123456p+2 +FE_UPWARD 0x1.123458p+2 +FE_TOWARDZERO 0x1.123456p+2 + +Testing: s = (0x1.123456789abcp2f) -> (float)s +FE_TONEAREST 0x1.123456p+2 +FE_DOWNWARD 0x1.123456p+2 +FE_UPWARD 0x1.123458p+2 +FE_TOWARDZERO 0x1.123456p+2 + +Testing: s = (-(0x1.123456789abcp2f)) -> (double)s +FE_TONEAREST -0x1.123456p+2 +FE_DOWNWARD -0x1.123458p+2 +FE_UPWARD -0x1.123456p+2 +FE_TOWARDZERO -0x1.123456p+2 + +Testing: s = (-(0x1.123456789abcp2f)) -> (float)s +FE_TONEAREST -0x1.123456p+2 +FE_DOWNWARD -0x1.123458p+2 +FE_UPWARD -0x1.123456p+2 +FE_TOWARDZERO -0x1.123456p+2 + +Testing: s = (0x1.123456789abcp29f) -> (double)s +FE_TONEAREST 0x1.123456p+29 +FE_DOWNWARD 0x1.123456p+29 +FE_UPWARD 0x1.123458p+29 +FE_TOWARDZERO 0x1.123456p+29 + +Testing: s = (0x1.123456789abcp29f) -> (float)s +FE_TONEAREST 0x1.123456p+29 +FE_DOWNWARD 0x1.123456p+29 +FE_UPWARD 0x1.123458p+29 +FE_TOWARDZERO 0x1.123456p+29 + +Testing: s = (0x1.123456789abcp29f) -> (int16_t)s +FE_TONEAREST -32768 +FE_DOWNWARD -32768 +FE_UPWARD -32768 +FE_TOWARDZERO -32768 + +Testing: s = (0x1.123456789abcp29f) -> (int8_t)s +FE_TONEAREST 0 +FE_DOWNWARD 0 +FE_UPWARD 0 +FE_TOWARDZERO 0 + +Testing: s = (0x1.123456789abcp29f) -> (unsigned short)s +FE_TONEAREST 35520 +FE_DOWNWARD 35520 +FE_UPWARD 35584 +FE_TOWARDZERO 35520 + +Testing: s = (0x1.123456789abcp29f) -> (unsigned char)s +FE_TONEAREST 0 +FE_DOWNWARD 0 +FE_UPWARD 0 +FE_TOWARDZERO 0 + +Testing: s = (-(0x1.123456789abcp29f)) -> (double)s +FE_TONEAREST -0x1.123456p+29 +FE_DOWNWARD -0x1.123458p+29 +FE_UPWARD -0x1.123456p+29 +FE_TOWARDZERO -0x1.123456p+29 + +Testing: s = (-(0x1.123456789abcp29f)) -> (float)s +FE_TONEAREST -0x1.123456p+29 +FE_DOWNWARD -0x1.123458p+29 +FE_UPWARD -0x1.123456p+29 +FE_TOWARDZERO -0x1.123456p+29 + +Testing: f = -0x1.123456789abcp30f -> (int32_t)f +FE_TONEAREST -1150096768 +FE_DOWNWARD -1150096896 +FE_UPWARD -1150096768 +FE_TOWARDZERO -1150096768 + +Testing: d = -0x1.1234567p0 -> (double)((int)d) +FE_TONEAREST -0x1p+0 +FE_DOWNWARD -0x1p+0 +FE_UPWARD -0x1p+0 +FE_TOWARDZERO -0x1p+0 + +Testing: d = 0x1.9234567p0 -> (double)((int)d) +FE_TONEAREST 0x1p+0 +FE_DOWNWARD 0x1p+0 +FE_UPWARD 0x1p+0 +FE_TOWARDZERO 0x1p+0 + +Testing: d = -0x1.9234567p0 -> (double)((int)d) +FE_TONEAREST -0x1p+0 +FE_DOWNWARD -0x1p+0 +FE_UPWARD -0x1p+0 +FE_TOWARDZERO -0x1p+0 + +Testing: d = 0x1.1234567p0 -> (double)((long int)d) +FE_TONEAREST 0x1p+0 +FE_DOWNWARD 0x1p+0 +FE_UPWARD 0x1p+0 +FE_TOWARDZERO 0x1p+0 + +Testing: d = -0x1.1234567p0 -> (double)((long int)d) +FE_TONEAREST -0x1p+0 +FE_DOWNWARD -0x1p+0 +FE_UPWARD -0x1p+0 +FE_TOWARDZERO -0x1p+0 + +Testing: d = 0x1.9234567p0 -> (double)((long int)d) +FE_TONEAREST 0x1p+0 +FE_DOWNWARD 0x1p+0 +FE_UPWARD 0x1p+0 +FE_TOWARDZERO 0x1p+0 + +Testing: d = -0x1.9234567p0 -> (double)((long int)d) +FE_TONEAREST -0x1p+0 +FE_DOWNWARD -0x1p+0 +FE_UPWARD -0x1p+0 +FE_TOWARDZERO -0x1p+0 + +Testing: (d1 = (1.0), d2 = (0x1.0000000000001p0)) -> d1 + d2 +FE_TONEAREST 0x1p+1 +FE_DOWNWARD 0x1p+1 +FE_UPWARD 0x1.0000000000001p+1 +FE_TOWARDZERO 0x1p+1 + +Testing: (d1 = -(1.0), d2 = (0x1.0000000000001p0)) -> d1 + d2 +FE_TONEAREST 0x1p-52 +FE_DOWNWARD 0x1p-52 +FE_UPWARD 0x1p-52 +FE_TOWARDZERO 0x1p-52 + +Testing: (d1 = (1.0), d2 = -(0x1.0000000000001p0)) -> d1 + d2 +FE_TONEAREST -0x1p-52 +FE_DOWNWARD -0x1p-52 +FE_UPWARD -0x1p-52 +FE_TOWARDZERO -0x1p-52 + +Testing: (d1 = -(1.0), d2 = -(0x1.0000000000001p0)) -> d1 + d2 +FE_TONEAREST -0x1p+1 +FE_DOWNWARD -0x1.0000000000001p+1 +FE_UPWARD -0x1p+1 +FE_TOWARDZERO -0x1p+1 + +Testing: (d1 = (1.0), d2 = (0x1.0000000000001p0)) -> d1 - d2 +FE_TONEAREST -0x1p-52 +FE_DOWNWARD -0x1p-52 +FE_UPWARD -0x1p-52 +FE_TOWARDZERO -0x1p-52 + +Testing: (d1 = -(1.0), d2 = (0x1.0000000000001p0)) -> d1 - d2 +FE_TONEAREST -0x1p+1 +FE_DOWNWARD -0x1.0000000000001p+1 +FE_UPWARD -0x1p+1 +FE_TOWARDZERO -0x1p+1 + +Testing: (d1 = (1.0), d2 = -(0x1.0000000000001p0)) -> d1 - d2 +FE_TONEAREST 0x1p+1 +FE_DOWNWARD 0x1p+1 +FE_UPWARD 0x1.0000000000001p+1 +FE_TOWARDZERO 0x1p+1 + +Testing: (d1 = -(1.0), d2 = -(0x1.0000000000001p0)) -> d1 - d2 +FE_TONEAREST 0x1p-52 +FE_DOWNWARD 0x1p-52 +FE_UPWARD 0x1p-52 +FE_TOWARDZERO 0x1p-52 + +Testing: (d1 = (1.0), d2 = (0x1.0000000000001p0)) -> d2 - d1 +FE_TONEAREST 0x1p-52 +FE_DOWNWARD 0x1p-52 +FE_UPWARD 0x1p-52 +FE_TOWARDZERO 0x1p-52 + +Testing: (d1 = -(1.0), d2 = (0x1.0000000000001p0)) -> d2 - d1 +FE_TONEAREST 0x1p+1 +FE_DOWNWARD 0x1p+1 +FE_UPWARD 0x1.0000000000001p+1 +FE_TOWARDZERO 0x1p+1 + +Testing: (d1 = (1.0), d2 = -(0x1.0000000000001p0)) -> d2 - d1 +FE_TONEAREST -0x1p+1 +FE_DOWNWARD -0x1.0000000000001p+1 +FE_UPWARD -0x1p+1 +FE_TOWARDZERO -0x1p+1 + +Testing: (d1 = -(1.0), d2 = -(0x1.0000000000001p0)) -> d2 - d1 +FE_TONEAREST -0x1p-52 +FE_DOWNWARD -0x1p-52 +FE_UPWARD -0x1p-52 +FE_TOWARDZERO -0x1p-52 + +Testing: (d1 = (1.0), d2 = (0x1.000000000000dp-4)) -> d1 + d2 +FE_TONEAREST 0x1.1000000000001p+0 +FE_DOWNWARD 0x1.1p+0 +FE_UPWARD 0x1.1000000000001p+0 +FE_TOWARDZERO 0x1.1p+0 + +Testing: (d1 = -(1.0), d2 = (0x1.000000000000dp-4)) -> d1 + d2 +FE_TONEAREST -0x1.dfffffffffffep-1 +FE_DOWNWARD -0x1.dffffffffffffp-1 +FE_UPWARD -0x1.dfffffffffffep-1 +FE_TOWARDZERO -0x1.dfffffffffffep-1 + +Testing: (d1 = (1.0), d2 = -(0x1.000000000000dp-4)) -> d1 + d2 +FE_TONEAREST 0x1.dfffffffffffep-1 +FE_DOWNWARD 0x1.dfffffffffffep-1 +FE_UPWARD 0x1.dffffffffffffp-1 +FE_TOWARDZERO 0x1.dfffffffffffep-1 + +Testing: (d1 = -(1.0), d2 = -(0x1.000000000000dp-4)) -> d1 + d2 +FE_TONEAREST -0x1.1000000000001p+0 +FE_DOWNWARD -0x1.1000000000001p+0 +FE_UPWARD -0x1.1p+0 +FE_TOWARDZERO -0x1.1p+0 + +Testing: (d1 = (1.0), d2 = (0x1.000000000000dp-4)) -> d1 - d2 +FE_TONEAREST 0x1.dfffffffffffep-1 +FE_DOWNWARD 0x1.dfffffffffffep-1 +FE_UPWARD 0x1.dffffffffffffp-1 +FE_TOWARDZERO 0x1.dfffffffffffep-1 + +Testing: (d1 = -(1.0), d2 = (0x1.000000000000dp-4)) -> d1 - d2 +FE_TONEAREST -0x1.1000000000001p+0 +FE_DOWNWARD -0x1.1000000000001p+0 +FE_UPWARD -0x1.1p+0 +FE_TOWARDZERO -0x1.1p+0 + +Testing: (d1 = (1.0), d2 = -(0x1.000000000000dp-4)) -> d1 - d2 +FE_TONEAREST 0x1.1000000000001p+0 +FE_DOWNWARD 0x1.1p+0 +FE_UPWARD 0x1.1000000000001p+0 +FE_TOWARDZERO 0x1.1p+0 + +Testing: (d1 = -(1.0), d2 = -(0x1.000000000000dp-4)) -> d1 - d2 +FE_TONEAREST -0x1.dfffffffffffep-1 +FE_DOWNWARD -0x1.dffffffffffffp-1 +FE_UPWARD -0x1.dfffffffffffep-1 +FE_TOWARDZERO -0x1.dfffffffffffep-1 + +Testing: (d1 = (1.0), d2 = (0x1.000000000000dp-4)) -> d2 - d1 +FE_TONEAREST -0x1.dfffffffffffep-1 +FE_DOWNWARD -0x1.dffffffffffffp-1 +FE_UPWARD -0x1.dfffffffffffep-1 +FE_TOWARDZERO -0x1.dfffffffffffep-1 + +Testing: (d1 = -(1.0), d2 = (0x1.000000000000dp-4)) -> d2 - d1 +FE_TONEAREST 0x1.1000000000001p+0 +FE_DOWNWARD 0x1.1p+0 +FE_UPWARD 0x1.1000000000001p+0 +FE_TOWARDZERO 0x1.1p+0 + +Testing: (d1 = (1.0), d2 = -(0x1.000000000000dp-4)) -> d2 - d1 +FE_TONEAREST -0x1.1000000000001p+0 +FE_DOWNWARD -0x1.1000000000001p+0 +FE_UPWARD -0x1.1p+0 +FE_TOWARDZERO -0x1.1p+0 + +Testing: (d1 = -(1.0), d2 = -(0x1.000000000000dp-4)) -> d2 - d1 +FE_TONEAREST 0x1.dfffffffffffep-1 +FE_DOWNWARD 0x1.dfffffffffffep-1 +FE_UPWARD 0x1.dffffffffffffp-1 +FE_TOWARDZERO 0x1.dfffffffffffep-1 + +Testing: (d1 = (0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 + d2 +FE_TONEAREST 0x1.2bc55ef8922bp+1 +FE_DOWNWARD 0x1.2bc55ef8922bp+1 +FE_UPWARD 0x1.2bc55ef8922bp+1 +FE_TOWARDZERO 0x1.2bc55ef8922bp+1 + +Testing: (d1 = -(0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 + d2 +FE_TONEAREST 0x1.12233445566p-4 +FE_DOWNWARD 0x1.12233445566p-4 +FE_UPWARD 0x1.12233445566p-4 +FE_TOWARDZERO 0x1.12233445566p-4 + +Testing: (d1 = (0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 + d2 +FE_TONEAREST -0x1.12233445566p-4 +FE_DOWNWARD -0x1.12233445566p-4 +FE_UPWARD -0x1.12233445566p-4 +FE_TOWARDZERO -0x1.12233445566p-4 + +Testing: (d1 = -(0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 + d2 +FE_TONEAREST -0x1.2bc55ef8922bp+1 +FE_DOWNWARD -0x1.2bc55ef8922bp+1 +FE_UPWARD -0x1.2bc55ef8922bp+1 +FE_TOWARDZERO -0x1.2bc55ef8922bp+1 + +Testing: (d1 = (0x1.233445566778p0f), d2 = (0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd404804dp+0 +FE_DOWNWARD 0x1.5ebd3ddf57ep+0 +FE_UPWARD 0x1.5ebd428e6d5cp+0 +FE_TOWARDZERO 0x1.5ebd3ddf57ep+0 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = (0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd404804dp+0 +FE_DOWNWARD -0x1.5ebd404804dp+0 +FE_UPWARD -0x1.5ebd4025c068p+0 +FE_TOWARDZERO -0x1.5ebd3ddf57ep+0 + +Testing: (d1 = (0x1.233445566778p0f), d2 = -(0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd404804dp+0 +FE_DOWNWARD -0x1.5ebd4025c068p+0 +FE_UPWARD -0x1.5ebd404804dp+0 +FE_TOWARDZERO -0x1.5ebd3ddf57ep+0 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = -(0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd404804dp+0 +FE_DOWNWARD 0x1.5ebd428e6d5cp+0 +FE_UPWARD 0x1.5ebd3ddf57ep+0 +FE_TOWARDZERO 0x1.5ebd3ddf57ep+0 + +Testing: (d1 = (0x1.233445566778p0f), d2 = (0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd40f80919p+0 +FE_DOWNWARD 0x1.5ebd3e8f5c27dp+0 +FE_UPWARD 0x1.5ebd40f809191p+0 +FE_TOWARDZERO 0x1.5ebd3e8f5c27dp+0 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = (0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd40f80919p+0 +FE_DOWNWARD -0x1.5ebd40f809191p+0 +FE_UPWARD -0x1.5ebd3e8f5c27dp+0 +FE_TOWARDZERO -0x1.5ebd3e8f5c27dp+0 + +Testing: (d1 = (0x1.233445566778p0f), d2 = -(0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd40f80919p+0 +FE_DOWNWARD -0x1.5ebd3e8f5c27ep+0 +FE_UPWARD -0x1.5ebd40f80919p+0 +FE_TOWARDZERO -0x1.5ebd3e8f5c27dp+0 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = -(0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd40f80919p+0 +FE_DOWNWARD 0x1.5ebd40f80919p+0 +FE_UPWARD 0x1.5ebd3e8f5c27ep+0 +FE_TOWARDZERO 0x1.5ebd3e8f5c27dp+0 + +Testing: (d1 = (0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd402bc44c4p+0 +FE_DOWNWARD 0x1.5ebd402bc44c4p+0 +FE_UPWARD 0x1.5ebd402bc44c5p+0 +FE_TOWARDZERO 0x1.5ebd402bc44c4p+0 + +Testing: (d1 = -(0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd402bc44c4p+0 +FE_DOWNWARD -0x1.5ebd402bc44c5p+0 +FE_UPWARD -0x1.5ebd402bc44c4p+0 +FE_TOWARDZERO -0x1.5ebd402bc44c4p+0 + +Testing: (d1 = (0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd402bc44c4p+0 +FE_DOWNWARD -0x1.5ebd402bc44c5p+0 +FE_UPWARD -0x1.5ebd402bc44c4p+0 +FE_TOWARDZERO -0x1.5ebd402bc44c4p+0 + +Testing: (d1 = -(0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd402bc44c4p+0 +FE_DOWNWARD 0x1.5ebd402bc44c4p+0 +FE_UPWARD 0x1.5ebd402bc44c5p+0 +FE_TOWARDZERO 0x1.5ebd402bc44c4p+0 + +Testing: (d1 = (0x1.233445566778p0f), d2 = (0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd40f80919p+0 +FE_DOWNWARD 0x1.5ebd3e8f5c27dp+0 +FE_UPWARD 0x1.5ebd40f809191p+0 +FE_TOWARDZERO 0x1.5ebd3e8f5c27dp+0 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = (0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd40f80919p+0 +FE_DOWNWARD -0x1.5ebd40f809191p+0 +FE_UPWARD -0x1.5ebd3e8f5c27dp+0 +FE_TOWARDZERO -0x1.5ebd3e8f5c27dp+0 + +Testing: (d1 = (0x1.233445566778p0f), d2 = -(0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd40f80919p+0 +FE_DOWNWARD -0x1.5ebd3e8f5c27ep+0 +FE_UPWARD -0x1.5ebd40f80919p+0 +FE_TOWARDZERO -0x1.5ebd3e8f5c27dp+0 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = -(0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd40f80919p+0 +FE_DOWNWARD 0x1.5ebd40f80919p+0 +FE_UPWARD 0x1.5ebd3e8f5c27ep+0 +FE_TOWARDZERO 0x1.5ebd3e8f5c27dp+0 + +Testing: (d1 = (0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd3f7bc003ap+0 +FE_DOWNWARD 0x1.5ebd3f7bc003ap+0 +FE_UPWARD 0x1.5ebd41c2288e5p+0 +FE_TOWARDZERO 0x1.5ebd3f7bc003ap+0 + +Testing: (d1 = -(0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd3f7bc003ap+0 +FE_DOWNWARD -0x1.5ebd3f7bc003bp+0 +FE_UPWARD -0x1.5ebd41c2288e4p+0 +FE_TOWARDZERO -0x1.5ebd3f7bc003ap+0 + +Testing: (d1 = (0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd3f7bc003ap+0 +FE_DOWNWARD -0x1.5ebd41c2288e5p+0 +FE_UPWARD -0x1.5ebd3f7bc003ap+0 +FE_TOWARDZERO -0x1.5ebd3f7bc003ap+0 + +Testing: (d1 = -(0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd3f7bc003ap+0 +FE_DOWNWARD 0x1.5ebd41c2288e4p+0 +FE_UPWARD 0x1.5ebd3f7bc003bp+0 +FE_TOWARDZERO 0x1.5ebd3f7bc003ap+0 + +Testing: (d1 = (0x1.233445566778p0f), d2 = (0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd404804dp+0 +FE_DOWNWARD 0x1.5ebd3ddf57ep+0 +FE_UPWARD 0x1.5ebd428e6d5cp+0 +FE_TOWARDZERO 0x1.5ebd3ddf57ep+0 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = (0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd404804dp+0 +FE_DOWNWARD -0x1.5ebd404804dp+0 +FE_UPWARD -0x1.5ebd4025c068p+0 +FE_TOWARDZERO -0x1.5ebd3ddf57ep+0 + +Testing: (d1 = (0x1.233445566778p0f), d2 = -(0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd404804dp+0 +FE_DOWNWARD -0x1.5ebd4025c068p+0 +FE_UPWARD -0x1.5ebd404804dp+0 +FE_TOWARDZERO -0x1.5ebd3ddf57ep+0 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = -(0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd404804dp+0 +FE_DOWNWARD 0x1.5ebd428e6d5cp+0 +FE_UPWARD 0x1.5ebd3ddf57ep+0 +FE_TOWARDZERO 0x1.5ebd3ddf57ep+0 + +Testing: (d1 = (0x1.233445566778p0), d2 = (5)) -> d1 *d2 +FE_TONEAREST 0x1.6c0156ac0156p+2 +FE_DOWNWARD 0x1.6c0156ac0156p+2 +FE_UPWARD 0x1.6c0156ac0156p+2 +FE_TOWARDZERO 0x1.6c0156ac0156p+2 + +Testing: (d1 = -(0x1.233445566778p0), d2 = (5)) -> d1 *d2 +FE_TONEAREST -0x1.6c0156ac0156p+2 +FE_DOWNWARD -0x1.6c0156ac0156p+2 +FE_UPWARD -0x1.6c0156ac0156p+2 +FE_TOWARDZERO -0x1.6c0156ac0156p+2 + +Testing: (d1 = (0x1.233445566778p0), d2 = -(5)) -> d1 *d2 +FE_TONEAREST -0x1.6c0156ac0156p+2 +FE_DOWNWARD -0x1.6c0156ac0156p+2 +FE_UPWARD -0x1.6c0156ac0156p+2 +FE_TOWARDZERO -0x1.6c0156ac0156p+2 + +Testing: (d1 = -(0x1.233445566778p0), d2 = -(5)) -> d1 *d2 +FE_TONEAREST 0x1.6c0156ac0156p+2 +FE_DOWNWARD 0x1.6c0156ac0156p+2 +FE_UPWARD 0x1.6c0156ac0156p+2 +FE_TOWARDZERO 0x1.6c0156ac0156p+2 + +Testing: (d1 = (15), d2 = (0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST 0x1.2111111111102p+4 +FE_DOWNWARD 0x1.2111111111102p+4 +FE_UPWARD 0x1.2111111111102p+4 +FE_TOWARDZERO 0x1.2111111111102p+4 + +Testing: (d1 = -(15), d2 = (0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST -0x1.2111111111102p+4 +FE_DOWNWARD -0x1.2111111111102p+4 +FE_UPWARD -0x1.2111111111102p+4 +FE_TOWARDZERO -0x1.2111111111102p+4 + +Testing: (d1 = (15), d2 = -(0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST -0x1.2111111111102p+4 +FE_DOWNWARD -0x1.2111111111102p+4 +FE_UPWARD -0x1.2111111111102p+4 +FE_TOWARDZERO -0x1.2111111111102p+4 + +Testing: (d1 = -(15), d2 = -(0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST 0x1.2111111111102p+4 +FE_DOWNWARD 0x1.2111111111102p+4 +FE_UPWARD 0x1.2111111111102p+4 +FE_TOWARDZERO 0x1.2111111111102p+4 + +Testing: (d1 = (0x1.233445566778p0f), d2 = (15)) -> d1 *d2 +FE_TONEAREST 0x1.110101ap+4 +FE_DOWNWARD 0x1.1100ffcp+4 +FE_UPWARD 0x1.110101ap+4 +FE_TOWARDZERO 0x1.1100ffcp+4 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = (15)) -> d1 *d2 +FE_TONEAREST -0x1.110101ap+4 +FE_DOWNWARD -0x1.110101ap+4 +FE_UPWARD -0x1.1100ffcp+4 +FE_TOWARDZERO -0x1.1100ffcp+4 + +Testing: (d1 = (0x1.233445566778p0f), d2 = -(15)) -> d1 *d2 +FE_TONEAREST -0x1.110101ap+4 +FE_DOWNWARD -0x1.1100ffcp+4 +FE_UPWARD -0x1.110101ap+4 +FE_TOWARDZERO -0x1.1100ffcp+4 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = -(15)) -> d1 *d2 +FE_TONEAREST 0x1.110101ap+4 +FE_DOWNWARD 0x1.110101ap+4 +FE_UPWARD 0x1.1100ffcp+4 +FE_TOWARDZERO 0x1.1100ffcp+4 + +Testing: (d1 = (15), d2 = (0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST 0x1.2111108p+4 +FE_DOWNWARD 0x1.2111108p+4 +FE_UPWARD 0x1.2111126p+4 +FE_TOWARDZERO 0x1.2111108p+4 + +Testing: (d1 = -(15), d2 = (0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST -0x1.2111108p+4 +FE_DOWNWARD -0x1.2111108p+4 +FE_UPWARD -0x1.2111126p+4 +FE_TOWARDZERO -0x1.2111108p+4 + +Testing: (d1 = (15), d2 = -(0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST -0x1.2111108p+4 +FE_DOWNWARD -0x1.2111126p+4 +FE_UPWARD -0x1.2111108p+4 +FE_TOWARDZERO -0x1.2111108p+4 + +Testing: (d1 = -(15), d2 = -(0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST 0x1.2111108p+4 +FE_DOWNWARD 0x1.2111126p+4 +FE_UPWARD 0x1.2111108p+4 +FE_TOWARDZERO 0x1.2111108p+4 + +Testing: (d1 = (0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 / d2 +FE_TONEAREST 0x1.e38ca44203ab9p-1 +FE_DOWNWARD 0x1.e38ca44203ab8p-1 +FE_UPWARD 0x1.e38ca44203ab9p-1 +FE_TOWARDZERO 0x1.e38ca44203ab8p-1 + +Testing: (d1 = -(0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 / d2 +FE_TONEAREST -0x1.e38ca44203ab9p-1 +FE_DOWNWARD -0x1.e38ca44203ab9p-1 +FE_UPWARD -0x1.e38ca44203ab8p-1 +FE_TOWARDZERO -0x1.e38ca44203ab8p-1 + +Testing: (d1 = (0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 / d2 +FE_TONEAREST -0x1.e38ca44203ab9p-1 +FE_DOWNWARD -0x1.e38ca44203ab9p-1 +FE_UPWARD -0x1.e38ca44203ab8p-1 +FE_TOWARDZERO -0x1.e38ca44203ab8p-1 + +Testing: (d1 = -(0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 / d2 +FE_TONEAREST 0x1.e38ca44203ab9p-1 +FE_DOWNWARD 0x1.e38ca44203ab8p-1 +FE_UPWARD 0x1.e38ca44203ab9p-1 +FE_TOWARDZERO 0x1.e38ca44203ab8p-1 + +Testing: (d1 = (0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 / d2 +FE_TONEAREST 0x1.e38ca44203ab9p-1 +FE_DOWNWARD 0x1.e38ca44203ab8p-1 +FE_UPWARD 0x1.e38ca44203ab9p-1 +FE_TOWARDZERO 0x1.e38ca44203ab8p-1 + +Testing: (d1 = -(0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 / d2 +FE_TONEAREST -0x1.e38ca44203ab9p-1 +FE_DOWNWARD -0x1.e38ca44203ab9p-1 +FE_UPWARD -0x1.e38ca44203ab8p-1 +FE_TOWARDZERO -0x1.e38ca44203ab8p-1 + +Testing: (d1 = (0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 / d2 +FE_TONEAREST -0x1.e38ca44203ab9p-1 +FE_DOWNWARD -0x1.e38ca44203ab9p-1 +FE_UPWARD -0x1.e38ca44203ab8p-1 +FE_TOWARDZERO -0x1.e38ca44203ab8p-1 + +Testing: (d1 = -(0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 / d2 +FE_TONEAREST 0x1.e38ca44203ab9p-1 +FE_DOWNWARD 0x1.e38ca44203ab8p-1 +FE_UPWARD 0x1.e38ca44203ab9p-1 +FE_TOWARDZERO 0x1.e38ca44203ab8p-1 + +Testing: (d1 = (0x1.233445566778p0), d2 = (0x1.3456789abcdep0f)) -> d1 / d2 +FE_TONEAREST 0x1.e38ca534ae61p-1 +FE_DOWNWARD 0x1.e38ca534ae61p-1 +FE_UPWARD 0x1.e38ca211bd4adp-1 +FE_TOWARDZERO 0x1.e38ca534ae61p-1 + +Testing: (d1 = -(0x1.233445566778p0), d2 = (0x1.3456789abcdep0f)) -> d1 / d2 +FE_TONEAREST -0x1.e38ca534ae61p-1 +FE_DOWNWARD -0x1.e38ca534ae611p-1 +FE_UPWARD -0x1.e38ca211bd4acp-1 +FE_TOWARDZERO -0x1.e38ca534ae61p-1 + +Testing: (d1 = (0x1.233445566778p0), d2 = -(0x1.3456789abcdep0f)) -> d1 / d2 +FE_TONEAREST -0x1.e38ca534ae61p-1 +FE_DOWNWARD -0x1.e38ca211bd4adp-1 +FE_UPWARD -0x1.e38ca534ae61p-1 +FE_TOWARDZERO -0x1.e38ca534ae61p-1 + +Testing: (d1 = -(0x1.233445566778p0), d2 = -(0x1.3456789abcdep0f)) -> d1 / d2 +FE_TONEAREST 0x1.e38ca534ae61p-1 +FE_DOWNWARD 0x1.e38ca211bd4acp-1 +FE_UPWARD 0x1.e38ca534ae611p-1 +FE_TOWARDZERO 0x1.e38ca534ae61p-1 + +Testing: (d1 = (1.0), d2 = (0x1.0000000000001p0)) -> d2 - d1 +FE_TONEAREST 0x1p-52 +FE_DOWNWARD 0x1p-52 +FE_UPWARD 0x1p-52 +FE_TOWARDZERO 0x1p-52 + +Testing: (d1 = -(1.0), d2 = (0x1.0000000000001p0)) -> d2 - d1 +FE_TONEAREST 0x1p+1 +FE_DOWNWARD 0x1p+1 +FE_UPWARD 0x1.0000000000001p+1 +FE_TOWARDZERO 0x1p+1 + +Testing: (d1 = (1.0), d2 = -(0x1.0000000000001p0)) -> d2 - d1 +FE_TONEAREST -0x1p+1 +FE_DOWNWARD -0x1.0000000000001p+1 +FE_UPWARD -0x1p+1 +FE_TOWARDZERO -0x1p+1 + +Testing: (d1 = -(1.0), d2 = -(0x1.0000000000001p0)) -> d2 - d1 +FE_TONEAREST -0x1p-52 +FE_DOWNWARD -0x1p-52 +FE_UPWARD -0x1p-52 +FE_TOWARDZERO -0x1p-52 + +Testing: (d1 = (1.0), d2 = (0x1.000000000000dp-4)) -> d1 + d2 +FE_TONEAREST 0x1.1000000000001p+0 +FE_DOWNWARD 0x1.1p+0 +FE_UPWARD 0x1.1000000000001p+0 +FE_TOWARDZERO 0x1.1p+0 + +Testing: (d1 = -(1.0), d2 = (0x1.000000000000dp-4)) -> d1 + d2 +FE_TONEAREST -0x1.dfffffffffffep-1 +FE_DOWNWARD -0x1.dffffffffffffp-1 +FE_UPWARD -0x1.dfffffffffffep-1 +FE_TOWARDZERO -0x1.dfffffffffffep-1 + +Testing: (d1 = (1.0), d2 = -(0x1.000000000000dp-4)) -> d1 + d2 +FE_TONEAREST 0x1.dfffffffffffep-1 +FE_DOWNWARD 0x1.dfffffffffffep-1 +FE_UPWARD 0x1.dffffffffffffp-1 +FE_TOWARDZERO 0x1.dfffffffffffep-1 + +Testing: (d1 = -(1.0), d2 = -(0x1.000000000000dp-4)) -> d1 + d2 +FE_TONEAREST -0x1.1000000000001p+0 +FE_DOWNWARD -0x1.1000000000001p+0 +FE_UPWARD -0x1.1p+0 +FE_TOWARDZERO -0x1.1p+0 + +Testing: (d1 = (1.0), d2 = (0x1.000000000000dp-4)) -> d1 - d2 +FE_TONEAREST 0x1.dfffffffffffep-1 +FE_DOWNWARD 0x1.dfffffffffffep-1 +FE_UPWARD 0x1.dffffffffffffp-1 +FE_TOWARDZERO 0x1.dfffffffffffep-1 + +Testing: (d1 = -(1.0), d2 = (0x1.000000000000dp-4)) -> d1 - d2 +FE_TONEAREST -0x1.1000000000001p+0 +FE_DOWNWARD -0x1.1000000000001p+0 +FE_UPWARD -0x1.1p+0 +FE_TOWARDZERO -0x1.1p+0 + +Testing: (d1 = (1.0), d2 = -(0x1.000000000000dp-4)) -> d1 - d2 +FE_TONEAREST 0x1.1000000000001p+0 +FE_DOWNWARD 0x1.1p+0 +FE_UPWARD 0x1.1000000000001p+0 +FE_TOWARDZERO 0x1.1p+0 + +Testing: (d1 = -(1.0), d2 = -(0x1.000000000000dp-4)) -> d1 - d2 +FE_TONEAREST -0x1.dfffffffffffep-1 +FE_DOWNWARD -0x1.dffffffffffffp-1 +FE_UPWARD -0x1.dfffffffffffep-1 +FE_TOWARDZERO -0x1.dfffffffffffep-1 + +Testing: (d1 = (1.0), d2 = (0x1.000000000000dp-4)) -> d2 - d1 +FE_TONEAREST -0x1.dfffffffffffep-1 +FE_DOWNWARD -0x1.dffffffffffffp-1 +FE_UPWARD -0x1.dfffffffffffep-1 +FE_TOWARDZERO -0x1.dfffffffffffep-1 + +Testing: (d1 = -(1.0), d2 = (0x1.000000000000dp-4)) -> d2 - d1 +FE_TONEAREST 0x1.1000000000001p+0 +FE_DOWNWARD 0x1.1p+0 +FE_UPWARD 0x1.1000000000001p+0 +FE_TOWARDZERO 0x1.1p+0 + +Testing: (d1 = (1.0), d2 = -(0x1.000000000000dp-4)) -> d2 - d1 +FE_TONEAREST -0x1.1000000000001p+0 +FE_DOWNWARD -0x1.1000000000001p+0 +FE_UPWARD -0x1.1p+0 +FE_TOWARDZERO -0x1.1p+0 + +Testing: (d1 = -(1.0), d2 = -(0x1.000000000000dp-4)) -> d2 - d1 +FE_TONEAREST 0x1.dfffffffffffep-1 +FE_DOWNWARD 0x1.dfffffffffffep-1 +FE_UPWARD 0x1.dffffffffffffp-1 +FE_TOWARDZERO 0x1.dfffffffffffep-1 + +Testing X87 instruction: "FSQRT" (ST0 = 0x1p+2, ST1 = 0x0p+0) +FE_TONEAREST ST0 = 0x1p+1 +FE_DOWNWARD ST0 = 0x1p+1 +FE_UPWARD ST0 = 0x1p+1 +FE_TOWARDZERO ST0 = 0x1p+1 + +Testing X87 instruction: "FSQRT" (ST0 = 0x1.0000000000001p+1, ST1 = 0x0p+0) +FE_TONEAREST ST0 = 0x1.6a09e667f3bcdp+0 +FE_DOWNWARD ST0 = 0x1.6a09e667f3bcdp+0 +FE_UPWARD ST0 = 0x1.6a09e667f3bcep+0 +FE_TOWARDZERO ST0 = 0x1.6a09e667f3bcdp+0 + +Testing X87 instruction: "FSQRT" (ST0 = 0x1.123456789abcp+31, ST1 = 0x0p+0) +FE_TONEAREST ST0 = 0x1.76b0aac9e6a5p+15 +FE_DOWNWARD ST0 = 0x1.76b0aac9e6a4fp+15 +FE_UPWARD ST0 = 0x1.76b0aac9e6a5p+15 +FE_TOWARDZERO ST0 = 0x1.76b0aac9e6a4fp+15 + +Testing X87 instruction: "FSQRT" (ST0 = 0x1.123456789abdp+31, ST1 = 0x0p+0) +FE_TONEAREST ST0 = 0x1.76b0aac9e6a5bp+15 +FE_DOWNWARD ST0 = 0x1.76b0aac9e6a5ap+15 +FE_UPWARD ST0 = 0x1.76b0aac9e6a5bp+15 +FE_TOWARDZERO ST0 = 0x1.76b0aac9e6a5ap+15 + diff --git a/tests/test26 b/tests/test26 new file mode 100755 index 000000000..4efc881e5 Binary files /dev/null and b/tests/test26 differ diff --git a/tests/test26.c b/tests/test26.c new file mode 100644 index 000000000..f2097a18c --- /dev/null +++ b/tests/test26.c @@ -0,0 +1,116 @@ +#include +#include +#include +#include +#define USE_ASM_ROUNDING +#include "roundtest.h" + +// Build with +// `gcc -march=core2 -O0 -m32 test26.c -o test26 -std=c99 -masm=intel +// -mfpmath=387 -frounding-math` + +#define TEST_CONVERT_(stype, s_) \ + do { \ + stype s; \ + TEST_(s = (s_), (double)s, "%a"); \ + TEST_(s = (s_), (float)s, "%a"); \ + /* converting too large float to integer, the result is undefined, on both \ + * c99 and FISTP instruction */ \ + if (INT64_MIN <= s && INT64_MAX <= s) \ + TEST_(s = (s_), (int64_t)s, "%" PRId64); \ + if (INT32_MIN <= s && INT32_MAX <= s) \ + TEST_(s = (s_), (int32_t)s, "%" PRId32); \ + if (INT16_MIN <= s && INT16_MAX <= s) \ + TEST_(s = (s_), (int16_t)s, "%" PRId16); \ + if (INT8_MIN <= s && INT8_MAX <= s) \ + TEST_(s = (s_), (int8_t)s, "%" PRId8); \ + if (0 <= s && UINT64_MAX <= s) \ + TEST_(s = (s_), (uint64_t)s, "%" PRIu64); \ + if (0 <= s && UINT32_MAX <= s) \ + TEST_(s = (s_), (unsigned int)s, "%" PRIu32); \ + if (0 <= s && UINT16_MAX <= s) \ + TEST_(s = (s_), (unsigned short)s, "%" PRIu16); \ + if (0 <= s && UINT8_MAX <= s) \ + TEST_(s = (s_), (unsigned char)s, "%" PRIu8); \ + } while (0) + +#define TEST_CONVERT(stype, s_) \ + do { \ + TEST_CONVERT_(stype, s_); \ + TEST_CONVERT_(stype, -(s_)); \ + } while (0) + +#define TEST_2NUMBER(d1type, d1_, d2type, d2_, operation) \ + do { \ + d1type d1; \ + d2type d2; \ + TEST((d1 = (d1_), d2 = (d2_)), operation); \ + TEST((d1 = -(d1_), d2 = (d2_)), operation); \ + TEST((d1 = (d1_), d2 = -(d2_)), operation); \ + TEST((d1 = -(d1_), d2 = -(d2_)), operation); \ + } while (0) + +int main() { + double d; + float f; + int64_t i64; + TEST_CONVERT(double, 0x1.123456789abcp2); // FISTTP + TEST_(d = (0x1.123456789abcp512), (float)d, "%a"); + TEST_CONVERT(double, 0x1.123456789abcp29); + TEST_(d = (-0x1.123456789abcp30), (int32_t)d, "%" PRId32); + TEST_(d = (-0x1.123456789abcp62), (int64_t)d, "%" PRId64); + + TEST_CONVERT(float, 0x1.123456789abcp2f); + TEST_CONVERT(float, 0x1.123456789abcp29f); + TEST_(f = -0x1.123456789abcp30f, (int32_t)f, "%" PRId32); + // to be fixed: + //TEST_(f = -0x1.123456789abcp62f, (int64_t)f, "%" PRId64); + // The direction of rounding when an integer is converted to a floating-point + // number that cannot exactly represent the original value + // https://gcc.gnu.org/onlinedocs/gcc/Floating-point-implementation.html + // to be fixed: + //TEST_(i64 = INT64_MAX, (double)i64, "%a"); // FILD and FSTP + TEST(d = -0x1.1234567p0, (double)((int)d)); + TEST(d = 0x1.9234567p0, (double)((int)d)); + TEST(d = -0x1.9234567p0, (double)((int)d)); + + TEST(d = 0x1.1234567p0, (double)((long int)d)); + TEST(d = -0x1.1234567p0, (double)((long int)d)); + TEST(d = 0x1.9234567p0, (double)((long int)d)); + TEST(d = -0x1.9234567p0, (double)((long int)d)); + + TEST_2NUMBER(double, 1.0, double, 0x1.0000000000001p0, d1 + d2); + TEST_2NUMBER(double, 1.0, double, 0x1.0000000000001p0, d1 - d2); + TEST_2NUMBER(double, 1.0, double, 0x1.0000000000001p0, d2 - d1); + TEST_2NUMBER(double, 1.0, double, 0x1.000000000000dp-4, d1 + d2); + TEST_2NUMBER(double, 1.0, double, 0x1.000000000000dp-4, d1 - d2); + TEST_2NUMBER(double, 1.0, double, 0x1.000000000000dp-4, d2 - d1); + + TEST_2NUMBER(double, 0x1.233445566778p0, double, 0x1.3456789abcdep0, d1 + d2); + TEST_2NUMBER(float, 0x1.233445566778p0f, float, 0x1.3456789abcdep0f, d1 *d2); + TEST_2NUMBER(float, 0x1.233445566778p0f, double, 0x1.3456789abcdep0, d1 *d2); + TEST_2NUMBER(double, 0x1.233445566778p0, double, 0x1.3456789abcdep0, d1 *d2); + TEST_2NUMBER(float, 0x1.233445566778p0f, double, 0x1.3456789abcdep0, d1 *d2); + TEST_2NUMBER(double, 0x1.233445566778p0, float, 0x1.3456789abcdep0, d1 *d2); + TEST_2NUMBER(float, 0x1.233445566778p0f, float, 0x1.3456789abcdep0f, d1 *d2); + TEST_2NUMBER(double, 0x1.233445566778p0, int, 5, d1 *d2); + TEST_2NUMBER(int, 15, double, 0x1.3456789abcdep0f, d1 *d2); + TEST_2NUMBER(float, 0x1.233445566778p0f, int, 15, d1 *d2); + TEST_2NUMBER(int, 15, float, 0x1.3456789abcdep0f, d1 *d2); + + TEST_2NUMBER(double, 0x1.233445566778p0, double, 0x1.3456789abcdep0, d1 / d2); + TEST_2NUMBER(double, 0x1.233445566778p0, double, 0x1.3456789abcdep0, d1 / d2); + TEST_2NUMBER(double, 0x1.233445566778p0, float, 0x1.3456789abcdep0f, d1 / d2); + + TEST_2NUMBER(double, 1.0, double, 0x1.0000000000001p0, d2 - d1); + TEST_2NUMBER(double, 1.0, double, 0x1.000000000000dp-4, d1 + d2); + TEST_2NUMBER(double, 1.0, double, 0x1.000000000000dp-4, d1 - d2); + TEST_2NUMBER(double, 1.0, double, 0x1.000000000000dp-4, d2 - d1); + + TEST_X87_1("FSQRT", 0x1.0000000000000p2); + TEST_X87_1("FSQRT", 0x1.0000000000001p1); + TEST_X87_1("FSQRT", 0x1.123456789abcp31); + TEST_X87_1("FSQRT", 0x1.123456789abdp31); + + return 0; +}