diff --git a/src/dynarec/dynarec_arm_d8.c b/src/dynarec/dynarec_arm_d8.c index 830c88246..c70e22358 100755 --- a/src/dynarec/dynarec_arm_d8.c +++ b/src/dynarec/dynarec_arm_d8.c @@ -1,4 +1,3 @@ -#include #include #include #include @@ -54,13 +53,15 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FADD ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VADD_F32(v1, v1, v2); } else { VADD_F64(v1, v1, v2); } - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xC8: case 0xC9: @@ -129,13 +130,15 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FSUB ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VSUB_F32(v1, v1, v2); } else { VSUB_F64(v1, v1, v2); } - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xE8: case 0xE9: @@ -148,13 +151,15 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FSUBR ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VSUB_F32(v1, v2, v1); } else { VSUB_F64(v1, v2, v1); } - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xF0: case 0xF1: @@ -167,13 +172,15 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FDIV ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VDIV_F32(v1, v1, v2); } else { VDIV_F64(v1, v1, v2); } - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xF8: case 0xF9: @@ -186,13 +193,15 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FDIVR ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VDIV_F32(v1, v2, v1); } else { VDIV_F64(v1, v2, v1); } - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; default: @@ -210,14 +219,16 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, GETED; VMOVtoV(s0, ed); } - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VADD_F32(v1, v1, s0); } else { VCVT_F64_F32(d1, s0); VADD_F64(v1, v1, d1); } - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 1: INST_NAME("FMUL ST0, float[ED]"); @@ -232,14 +243,16 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, GETED; VMOVtoV(s0, ed); } - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VMUL_F32(v1, v1, s0); } else { VCVT_F64_F32(d1, s0); VMUL_F64(v1, v1, d1); } - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 2: INST_NAME("FCOM ST0, float[ED]"); @@ -297,14 +310,16 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, GETED; VMOVtoV(s0, ed); } - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VSUB_F32(v1, v1, s0); } else { VCVT_F64_F32(d1, s0); VSUB_F64(v1, v1, d1); } - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 5: INST_NAME("FSUBR ST0, float[ED]"); @@ -319,14 +334,16 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, GETED; VMOVtoV(s0, ed); } - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VSUB_F32(v1, s0, v1); } else { VCVT_F64_F32(d1, s0); VSUB_F64(v1, d1, v1); } - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 6: INST_NAME("FDIV ST0, float[ED]"); @@ -341,14 +358,16 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, GETED; VMOVtoV(s0, ed); } - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VDIV_F32(v1, v1, s0); } else { VCVT_F64_F32(d1, s0); VDIV_F64(v1, v1, d1); } - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 7: INST_NAME("FDIVR ST0, float[ED]"); @@ -363,14 +382,16 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, GETED; VMOVtoV(s0, ed); } - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VDIV_F32(v1, s0, v1); } else { VCVT_F64_F32(d1, s0); VDIV_F64(v1, d1, v1); } - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; default: DEFAULT; diff --git a/src/dynarec/dynarec_arm_d9.c b/src/dynarec/dynarec_arm_d9.c index 8f7a688d2..09747a221 100755 --- a/src/dynarec/dynarec_arm_d9.c +++ b/src/dynarec/dynarec_arm_d9.c @@ -281,42 +281,47 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, CALL(arm_f2xm1, -1, 0); #else v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); - VMRS_APSR(); - B_NEXT(cEQ); - LDRH_IMM8(x1, xEmu, offsetof(x86emu_t, cw)); // hopefully cw is not too far for an imm8 - UBFX(x1, x1, 10, 2); // extract round... - UBFX(x2, x1, 1, 1); // swap bits 0 and 1 - BFI(x2, x1, 1, 1); - VMRS(x14); // get fpscr - MOV_REG(x3, x14); + if(!box86_dynarec_fastround) { + LDRH_IMM8(x1, xEmu, offsetof(x86emu_t, cw)); // hopefully cw is not too far for an imm8 + UBFX(x1, x1, 10, 2); // extract round... + UBFX(x2, x1, 1, 1); // swap bits 0 and 1 + BFI(x2, x1, 1, 1); + VMRS(x14); // get fpscr + MOV_REG(x3, x14); + } if((PK(0)==0xD9 && PK(1)==0xE8) && // next inst is FLD1 (PK(2)==0xDE && PK(3)==0xC1)) { MESSAGE(LOG_DUMP, "Hack for fld1 / faddp st1, st0\n"); - VCMP_F64_0(v1); - B_MARK(cGE); // if ST0 < 0 and if the rounding mode is toward 0, then use upward - TSTS_IMM8(x2, 0b01); - AND_IMM8_COND(cNE, x2, x2, 0b01); // 11 (TOWARDZERO) -> 01 (UPWARD), 01 -> 01 - MARK; - BFI(x3, x2, 22, 2); // inject new round - VMSR(x3); + if(!box86_dynarec_fastround) { + VCMP_F64_0(v1); + B_MARK(cGE); // if ST0 < 0 and if the rounding mode is toward 0, then use upward + TSTS_IMM8(x2, 0b01); + AND_IMM8_COND(cNE, x2, x2, 0b01); // 11 (TOWARDZERO) -> 01 (UPWARD), 01 -> 01 + MARK; + BFI(x3, x2, 22, 2); // inject new round + VMSR(x3); + } VMOV_64(0, v1); - CALL_1D(exp2, 1 << x14); // return is d0 + CALL_1D(exp2, box86_dynarec_fastround ? 0 : (1 << x14)); // return is d0 VMOV_64(v1, 0); addr+=4; } else { - BFI(x3, x2, 22, 2); // inject new round - VMSR(x3); // put new fpscr + if(!box86_dynarec_fastround) { + BFI(x3, x2, 22, 2); // inject new round + VMSR(x3); // put new fpscr + } //ST0.d = expm1(LN2 * ST0.d); MOV32(x2, (&d_ln2)); VLDR_64(0, x2, 0); VMUL_F64(0, 0, v1); x87_setround(dyn, ninst, x1, x2, -1); - CALL_1D(expm1, 1 << x14); // return is d0 + CALL_1D(expm1, box86_dynarec_fastround ? 0 : (1 << x14)); // return is d0 VMOV_64(v1, 0); } - VMSR(x14); + if(!box86_dynarec_fastround) + VMSR(x14); #endif // should set C1 to 0 break; @@ -325,32 +330,36 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); v2 = x87_get_st(dyn, ninst, x1, x2, 1, NEON_CACHE_ST_D); - LDRH_IMM8(x1, xEmu, offsetof(x86emu_t, cw)); // hopefully cw is not too far for an imm8 - UBFX(x1, x1, 10, 2); // extract round... - VCMP_F64_0(v2); - VMRS_APSR(); - B_MARK(cLT); // if ST1.d < 0 then don't swap bits 0 and 1 - BFI(x1, x1, 2, 1); // if ST1.d >= 0 then swap bits 0 and 1 - UBFX(x1, x1, 1, 2); - MARK; - s0 = fpu_get_scratch_double(dyn); - VMOV_i_64(s0, 0b01110000); // = 1.0 - VCMP_F64(v1, s0); - VMRS_APSR(); - B_MARK2(cGE); // if ST0 < 1 and if the rounding mode is toward 0, then use upward - TSTS_IMM8(x1, 0b01); - AND_IMM8_COND(cNE, x1, x1, 0b01); // 11 (TOWARDZERO) -> 01 (UPWARD), 01 -> 01 - MARK2; - VMRS(x14); // get fpscr - MOV_REG(x3, x14); - BFI(x3, x1, 22, 2); // inject new round - VMSR(x3); // put new fpscr + if(!box86_dynarec_fastround) { + LDRH_IMM8(x1, xEmu, offsetof(x86emu_t, cw)); // hopefully cw is not too far for an imm8 + UBFX(x1, x1, 10, 2); // extract round... + VCMP_F64_0(v2); + VMRS_APSR(); + B_MARK(cLT); // if ST1.d < 0 then don't swap bits 0 and 1 + BFI(x1, x1, 2, 1); // if ST1.d >= 0 then swap bits 0 and 1 + UBFX(x1, x1, 1, 2); + MARK; + s0 = fpu_get_scratch_double(dyn); + VMOV_i_64(s0, 0b01110000); // = 1.0 + VCMP_F64(v1, s0); + VMRS_APSR(); + B_MARK2(cGE); // if ST0 < 1 and if the rounding mode is toward 0, then use upward + TSTS_IMM8(x1, 0b01); + AND_IMM8_COND(cNE, x1, x1, 0b01); // 11 (TOWARDZERO) -> 01 (UPWARD), 01 -> 01 + MARK2; + VMRS(x14); // get fpscr + MOV_REG(x3, x14); + BFI(x3, x1, 22, 2); // inject new round + VMSR(x3); // put new fpscr + } VMOV_64(0, v1); // prepare call to log2 - CALL_1D(log2, 1 << x14); - x87_setround(dyn, ninst, x1, x2, -1); + CALL_1D(log2, box86_dynarec_fastround ? 0 : (1 << x14)); + if(!box86_dynarec_fastround) + x87_setround(dyn, ninst, x1, x2, -1); VMUL_F64(v2, v2, 0); //ST(1).d = log2(ST0.d)*ST(1).d - VMSR(x14); + if(!box86_dynarec_fastround) + VMSR(x14); x87_do_pop(dyn, ninst, x3); // should set C1 to 0 break; @@ -388,12 +397,14 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FPATAN"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); v2 = x87_get_st(dyn, ninst, x1, x2, 1, NEON_CACHE_ST_D); - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VMOV_64(0, v2); // prepare call to atan2 VMOV_64(1, v1); - CALL_2D(atan2, 1 << u8); + CALL_2D(atan2, box86_dynarec_fastround ? 0 : (1 << u8)); VMOV_64(v2, 0); //ST(1).d = atan2(ST1.d, ST0.d); - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); x87_do_pop(dyn, ninst, x3); // should set C1 to 0 break; @@ -501,41 +512,47 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FYL2XP1"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); v2 = x87_get_st(dyn, ninst, x1, x2, 1, NEON_CACHE_ST_D); - LDRH_IMM8(x1, xEmu, offsetof(x86emu_t, cw)); // hopefully cw is not too far for an imm8 - UBFX(x1, x1, 10, 2); // extract round... - VCMP_F64_0(v2); - VMRS_APSR(); - B_MARK(cLT); // if ST1.d < 0 then don't swap bits 0 and 1 - BFI(x1, x1, 2, 1); // if ST1.d >= 0 then swap bits 0 and 1 - UBFX(x1, x1, 1, 2); - MARK; - VMRS(x14); // get fpscr - MOV_REG(x3, x14); - BFI(x3, x1, 22, 2); // inject new round - VMSR(x3); // put new fpscr + if(!box86_dynarec_fastround) { + LDRH_IMM8(x1, xEmu, offsetof(x86emu_t, cw)); // hopefully cw is not too far for an imm8 + UBFX(x1, x1, 10, 2); // extract round... + VCMP_F64_0(v2); + VMRS_APSR(); + B_MARK(cLT); // if ST1.d < 0 then don't swap bits 0 and 1 + BFI(x1, x1, 2, 1); // if ST1.d >= 0 then swap bits 0 and 1 + UBFX(x1, x1, 1, 2); + MARK; + VMRS(x14); // get fpscr + MOV_REG(x3, x14); + BFI(x3, x1, 22, 2); // inject new round + VMSR(x3); // put new fpscr + } //ST(1).d = (ST(1).d * log1p(ST0.d)) / M_LN2; VMOV_64(0, v1); // prepare call to log1p - CALL_1D(log1p, 1 << x14); - x87_setround(dyn, ninst, x1, x2, -1); + CALL_1D(log1p, box86_dynarec_fastround ? 0 : (1 << x14)); + if(!box86_dynarec_fastround) + x87_setround(dyn, ninst, x1, x2, -1); VMUL_F64(v2, v2, 0); MOV32(x2, (&d_ln2)); VLDR_64(0, x2, 0); VDIV_F64(v2, v2, 0); - VMSR(x14); + if(!box86_dynarec_fastround) + VMSR(x14); x87_do_pop(dyn, ninst, x3); // should set C1 to 0 break; case 0xFA: INST_NAME("FSQRT"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VSQRT_F32(v1, v1); } else { VSQRT_F64(v1, v1); } - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); // should set C1 to 0 break; case 0xFB: @@ -585,16 +602,18 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, v2 = x87_get_st(dyn, ninst, x1, x2, 1, NEON_CACHE_ST_D); //if(ST0.d!=0.0) // ST0.d = ldexp(ST0.d, trunc(ST1.d)); - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); s0 = fpu_get_scratch_single(dyn); // value of s0 = // 2^31-1 (ST1 >= 2^31), -2^31 (ST1 < -2^31) or int(ST1) (other situations) VCVT_S32_F64(s0 , v2); VMOVfrV(x2, s0); VMOV_64(0, v1); - CALL_1DDR(ldexp, x2, x3, 1 << u8); + CALL_1DDR(ldexp, x2, x3, box86_dynarec_fastround ? 0 : (1 << u8)); VMOV_64(v1, 0); - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); // should set C1 to 0 break; case 0xFE: @@ -678,9 +697,11 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, s0 = v1; else { s0 = fpu_get_scratch_single(dyn); - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VCVT_F32_F64(s0, v1); - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); } parity = getedparity(dyn, ninst, addr, nextop, 2); if(parity) { @@ -699,9 +720,11 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, s0 = v1; else { s0 = fpu_get_scratch_single(dyn); - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VCVT_F32_F64(s0, v1); - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); } parity = getedparity(dyn, ninst, addr, nextop, 2); if(parity) { diff --git a/src/dynarec/dynarec_arm_da.c b/src/dynarec/dynarec_arm_da.c index 971f5dd3d..a2b27957b 100755 --- a/src/dynarec/dynarec_arm_da.c +++ b/src/dynarec/dynarec_arm_da.c @@ -156,9 +156,11 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, s0 = fpu_get_scratch_single(dyn); VMOVtoV(s0, ed); VCVT_F64_S32(d0, s0); - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VADD_F64(v1, v1, d0); - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 1: INST_NAME("FIMUL ST0, Ed"); @@ -168,9 +170,11 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, s0 = fpu_get_scratch_single(dyn); VMOVtoV(s0, ed); VCVT_F64_S32(d0, s0); - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VMUL_F64(v1, v1, d0); - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 2: INST_NAME("FICOM ST0, Ed"); @@ -203,9 +207,11 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, s0 = fpu_get_scratch_single(dyn); VMOVtoV(s0, ed); VCVT_F64_S32(d0, s0); - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VSUB_F64(v1, v1, d0); - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 5: INST_NAME("FISUBR ST0, Ed"); @@ -215,9 +221,11 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, s0 = fpu_get_scratch_single(dyn); VMOVtoV(s0, ed); VCVT_F64_S32(d0, s0); - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VSUB_F64(v1, d0, v1); - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 6: INST_NAME("FIDIV ST0, Ed"); @@ -227,9 +235,11 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, s0 = fpu_get_scratch_single(dyn); VMOVtoV(s0, ed); VCVT_F64_S32(d0, s0); - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VDIV_F64(v1, v1, d0); - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 7: INST_NAME("FIDIVR ST0, Ed"); @@ -239,9 +249,11 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, s0 = fpu_get_scratch_single(dyn); VMOVtoV(s0, ed); VCVT_F64_S32(d0, s0); - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VDIV_F64(v1, d0, v1); - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; } } diff --git a/src/dynarec/dynarec_arm_dc.c b/src/dynarec/dynarec_arm_dc.c index 4ec31e7d0..e9d6e67cf 100755 --- a/src/dynarec/dynarec_arm_dc.c +++ b/src/dynarec/dynarec_arm_dc.c @@ -49,13 +49,15 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FADD STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VADD_F32(v1, v1, v2); } else { VADD_F64(v1, v1, v2); } - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xC8: case 0xC9: @@ -68,13 +70,15 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FMUL STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VMUL_F32(v1, v1, v2); } else { VMUL_F64(v1, v1, v2); } - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xD0: case 0xD1: @@ -124,13 +128,15 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FSUBR STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VSUB_F32(v1, v2, v1); } else { VSUB_F64(v1, v2, v1); } - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xE8: case 0xE9: @@ -143,13 +149,15 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FSUB STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VSUB_F32(v1, v1, v2); } else { VSUB_F64(v1, v1, v2); } - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xF0: case 0xF1: @@ -162,13 +170,15 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FDIVR STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VDIV_F32(v1, v2, v1); } else { VDIV_F64(v1, v2, v1); } - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xF8: case 0xF9: @@ -181,13 +191,15 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FDIV STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VDIV_F32(v1, v1, v2); } else { VDIV_F64(v1, v1, v2); } - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; default: switch((nextop>>3)&7) { @@ -205,9 +217,11 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, LDR_IMM9(x3, wback, fixedaddress+4); VMOVtoV_D(d1, x2, x3); } - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VADD_F64(v1, v1, d1); - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 1: INST_NAME("FMUL ST0, double[ED]"); @@ -223,9 +237,11 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, LDR_IMM9(x3, wback, fixedaddress+4); VMOVtoV_D(d1, x2, x3); } - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VMUL_F64(v1, v1, d1); - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 2: INST_NAME("FCOM ST0, double[ED]"); @@ -276,9 +292,11 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, LDR_IMM9(x3, wback, fixedaddress+4); VMOVtoV_D(d1, x2, x3); } - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VSUB_F64(v1, v1, d1); - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 5: INST_NAME("FSUBR ST0, double[ED]"); @@ -294,9 +312,11 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, LDR_IMM9(x3, wback, fixedaddress+4); VMOVtoV_D(d1, x2, x3); } - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VSUB_F64(v1, d1, v1); - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 6: INST_NAME("FDIV ST0, double[ED]"); @@ -312,9 +332,11 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, LDR_IMM9(x3, wback, fixedaddress+4); VMOVtoV_D(d1, x2, x3); } - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VDIV_F64(v1, v1, d1); - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 7: INST_NAME("FDIVR ST0, double[ED]"); @@ -330,9 +352,11 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, LDR_IMM9(x3, wback, fixedaddress+4); VMOVtoV_D(d1, x2, x3); } - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VDIV_F64(v1, d1, v1); - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; } } diff --git a/src/dynarec/dynarec_arm_de.c b/src/dynarec/dynarec_arm_de.c index 7114debf7..bd8afdfad 100755 --- a/src/dynarec/dynarec_arm_de.c +++ b/src/dynarec/dynarec_arm_de.c @@ -44,13 +44,15 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FADDP STx, ST0"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VADD_F32(v2, v2, v1); } else { VADD_F64(v2, v2, v1); } - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); x87_do_pop(dyn, ninst, x3); break; case 0xC8: @@ -64,13 +66,15 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FMULP STx, ST0"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VMUL_F32(v2, v2, v1); } else { VMUL_F64(v2, v2, v1); } - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); x87_do_pop(dyn, ninst, x3); break; case 0xD0: @@ -117,13 +121,15 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FSUBRP STx, ST0"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VSUB_F32(v2, v1, v2); } else { VSUB_F64(v2, v1, v2); } - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); x87_do_pop(dyn, ninst, x3); break; case 0xE8: @@ -137,13 +143,15 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FSUBP STx, ST0"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VSUB_F32(v2, v2, v1); } else { VSUB_F64(v2, v2, v1); } - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); x87_do_pop(dyn, ninst, x3); break; case 0xF0: @@ -157,13 +165,15 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FDIVRP STx, ST0"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); - u8 = x87_setround(dyn, ninst, x1, x2, x14); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VDIV_F32(v2, v1, v2); } else { VDIV_F64(v2, v1, v2); } - x87_restoreround(dyn, ninst, u8); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); x87_do_pop(dyn, ninst, x3); break; case 0xF8: @@ -177,17 +187,17 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FDIVP STx, ST0"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); - VMRS(x14); // get fpscr if(!box86_dynarec_fastnan) { + VMRS(x14); // get fpscr ORR_IMM8(x3, x14, 0b010, 9); // enable exceptions BIC_IMM8(x3, x3, 0b10011111, 0); - } else MOV_REG(x3, x14); - LDRH_IMM8(x1, xEmu, offsetof(x86emu_t, cw)); // hopefully cw is not too far for an imm8 - UBFX(x1, x1, 10, 2); // extract round... - UBFX(x2, x1, 1, 1); // swap bits 0 and 1 - BFI(x2, x1, 1, 1); - BFI(x3, x2, 22, 2); // inject new round - VMSR(x3); // put new fpscr + LDRH_IMM8(x1, xEmu, offsetof(x86emu_t, cw)); // hopefully cw is not too far for an imm8 + UBFX(x1, x1, 10, 2); // extract round... + UBFX(x2, x1, 1, 1); // swap bits 0 and 1 + BFI(x2, x1, 1, 1); + BFI(x3, x2, 22, 2); // inject new round + VMSR(x3); // put new fpscr + } if(ST_IS_F(0)) { VDIV_F32(v2, v2, v1); } else { @@ -201,8 +211,8 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, } else { VNEG_F64_cond(cNE, v2, v2); } + VMSR(x14); // restore fpscr } - VMSR(x14); // restore fpscr x87_do_pop(dyn, ninst, x3); break;