Skip to content

Commit

Permalink
[DYNAREC] Backported some Flags optimisation fixes from box64
Browse files Browse the repository at this point in the history
  • Loading branch information
ptitSeb committed May 19, 2024
1 parent 79e622a commit 25e1d8b
Show file tree
Hide file tree
Showing 11 changed files with 85 additions and 86 deletions.
11 changes: 4 additions & 7 deletions src/dynarec/dynarec_arm.c
Original file line number Diff line number Diff line change
Expand Up @@ -332,14 +332,10 @@ static int updateNeed(dynarec_arm_t* dyn, int ninst, uint8_t need) {
while (ninst>=0) {
// need pending but instruction is only a subset: remove pend and use an X_ALL instead
need |= dyn->insts[ninst].x86.need_after;
if((need&X_PEND) && (dyn->insts[ninst].x86.state_flags==SF_SUBSET)) {
if((need&X_PEND) && (dyn->insts[ninst].x86.state_flags==SF_SUBSET || dyn->insts[ninst].x86.state_flags==SF_SET || dyn->insts[ninst].x86.state_flags==SF_SET_NODF)) {
need &=~X_PEND;
need |= X_ALL;
}
if((need&X_PEND) && (dyn->insts[ninst].x86.state_flags==SF_SET)) {
need &=~X_PEND;
need |= dyn->insts[ninst].x86.set_flags; // SF_SET will compute all flags, it's not SUBSET!
}
if((need&X_PEND) && dyn->insts[ninst].x86.state_flags==SF_SUBSET_PENDING) {
need |= X_ALL&~(dyn->insts[ninst].x86.set_flags);
}
Expand All @@ -348,10 +344,11 @@ static int updateNeed(dynarec_arm_t* dyn, int ninst, uint8_t need) {
dyn->insts[ninst].x86.gen_flags |= X_PEND;
dyn->insts[ninst].x86.need_after = need;
need = dyn->insts[ninst].x86.need_after&~dyn->insts[ninst].x86.gen_flags;

if(dyn->insts[ninst].x86.may_set)
need |= dyn->insts[ninst].x86.gen_flags; // forward the flags
else if((need&X_PEND) && (dyn->insts[ninst].x86.set_flags&SF_PENDING))
need &=~X_PEND; // Consume X_PEND if relevant
need &=~X_PEND; // Consume X_PEND if relevant
need |= dyn->insts[ninst].x86.use_flags;
if(dyn->insts[ninst].x86.need_before == need)
return ninst - 1;
Expand Down Expand Up @@ -478,10 +475,10 @@ dynarec_log(LOG_DEBUG, "Asked to Fill block %p with %p\n", block, (void*)addr);
for(int ii=0; ii<helper.jmp_sz; ++ii) {
int i = helper.jmps[ii];
uintptr_t j = helper.insts[i].x86.jmp;
helper.insts[i].x86.jmp_insts = -1;
if(j<start || j>=end || j==helper.insts[i].x86.addr) {
if(j==helper.insts[i].x86.addr) // if there is a loop on some opcode, make the block "always to tested"
helper.always_test = 1;
helper.insts[i].x86.jmp_insts = -1;
helper.insts[i].x86.need_after |= X_PEND;
} else {
// find jump address instruction
Expand Down
80 changes: 36 additions & 44 deletions src/dynarec/dynarec_arm_00.c

Large diffs are not rendered by default.

14 changes: 7 additions & 7 deletions src/dynarec/dynarec_arm_0f.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ uintptr_t dynarec0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
case 0xD0:
INST_NAME("FAKE xgetbv");
addr = fakeed(dyn, addr, ninst, nextop);
SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state
SETFLAGS(X_ALL, SF_SET_NODF); // Hack to set flags in "don't care" state
//CALL(arm_ud, -1, 0);
SKIPTEST(x14);
UDF(0);
Expand Down Expand Up @@ -147,7 +147,7 @@ uintptr_t dynarec0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,

case 0x0B:
INST_NAME("UD2");
SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state
SETFLAGS(X_ALL, SF_SET_NODF); // Hack to set flags in "don't care" state
//CALL(arm_ud, -1, 0);
SKIPTEST(x14);
UDF(0);
Expand Down Expand Up @@ -444,7 +444,7 @@ uintptr_t dynarec0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
// no special check...
case 0x2F:
if(opcode==0x2F) {INST_NAME("COMISS Gx, Ex");} else {INST_NAME("UCOMISS Gx, Ex");}
SETFLAGS(X_ALL, SF_SET);
SETFLAGS(X_ALL, SF_SET_DF);
nextop = F8;
GETGX(v0, 0);
if(MODREG) {
Expand Down Expand Up @@ -1644,7 +1644,7 @@ uintptr_t dynarec0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
case 0xA5:
nextop = F8;
INST_NAME("SHLD Ed, Gd, CL");
SETFLAGS(X_ALL, SF_SET);
SETFLAGS(X_ALL, SF_SET_PENDING);
AND_IMM8(x3, xECX, 0x1f);
GETED;
GETGD;
Expand Down Expand Up @@ -1702,7 +1702,7 @@ uintptr_t dynarec0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
case 0xAC:
nextop = F8;
INST_NAME("SHRD Ed, Gd, Ib");
SETFLAGS(X_ALL, SF_SET);
SETFLAGS(X_ALL, SF_SET_PENDING);
GETED;
GETGD;
u8 = F8;
Expand All @@ -1712,7 +1712,7 @@ uintptr_t dynarec0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
case 0xAD:
nextop = F8;
INST_NAME("SHRD Ed, Gd, CL");
SETFLAGS(X_ALL, SF_SET);
SETFLAGS(X_ALL, SF_SET_PENDING);
AND_IMM8(x3, xECX, 0x1f);
GETED;
GETGD;
Expand Down Expand Up @@ -1785,7 +1785,7 @@ uintptr_t dynarec0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
case 5:
INST_NAME("XRSTOR Ed (not implemented");
FAKEED;
SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state
SETFLAGS(X_ALL, SF_SET_NODF); // Hack to set flags in "don't care" state
STM(xEmu, (1<<xEAX)|(1<<xECX)|(1<<xEDX)|(1<<xEBX)|(1<<xESP)|(1<<xEBP)|(1<<xESI)|(1<<xEDI)|(1<<xFlags));
STR_IMM9(xEIP, xEmu, offsetof(x86emu_t, ip));
CALL(arm_ud, -1, 0);
Expand Down
8 changes: 4 additions & 4 deletions src/dynarec/dynarec_arm_65.c
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,7 @@ uintptr_t dynarecGS(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
INST_NAME("RCL Ed, 1");
MESSAGE(LOG_DUMP, "Need Optimization\n");
READFLAGS(X_CF);
SETFLAGS(X_OF|X_CF, SF_SET);
SETFLAGS(X_OF|X_CF, SF_SET_DF);
MOVW(x2, 1);
GETEDO2(x14);
if(ed!=x1) {MOV_REG(x1, ed); wb = x1;}
Expand All @@ -519,7 +519,7 @@ uintptr_t dynarecGS(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
INST_NAME("RCR Ed, 1");
MESSAGE(LOG_DUMP, "Need Optimization\n");
READFLAGS(X_CF);
SETFLAGS(X_OF|X_CF, SF_SET);
SETFLAGS(X_OF|X_CF, SF_SET_DF);
MOVW(x2, 1);
if(ed!=x1) {MOV_REG(x1, ed); wb = x1;}
CALL_(rcr32, ed, (1<<x2));
Expand Down Expand Up @@ -601,7 +601,7 @@ uintptr_t dynarecGS(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
INST_NAME("RCL Ed, CL");
MESSAGE(LOG_DUMP, "Need Optimization\n");
READFLAGS(X_CF);
SETFLAGS(X_OF|X_CF, SF_SET);
SETFLAGS(X_OF|X_CF, SF_SET_DF);
AND_IMM8(x2, xECX, 0x1f);
if(ed!=x1) {MOV_REG(x1, ed); wb = x1;}
CALL_(rcl32, ed, (1<<x2));
Expand All @@ -611,7 +611,7 @@ uintptr_t dynarecGS(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
INST_NAME("RCR Ed, CL");
MESSAGE(LOG_DUMP, "Need Optimization\n");
READFLAGS(X_CF);
SETFLAGS(X_OF|X_CF, SF_SET);
SETFLAGS(X_OF|X_CF, SF_SET_DF);
AND_IMM8(x2, xECX, 0x1f);
if(ed!=x1) {MOV_REG(x1, ed); wb = x1;}
CALL_(rcr32, ed, (1<<x14));
Expand Down
21 changes: 11 additions & 10 deletions src/dynarec/dynarec_arm_66.c
Original file line number Diff line number Diff line change
Expand Up @@ -637,7 +637,7 @@ uintptr_t dynarec66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
break;
case 0x9D:
INST_NAME("POPF (16b)");
SETFLAGS(X_ALL, SF_SET); // lower 16bits is all flags handled in dynarec
SETFLAGS(X_ALL, SF_SET_DF); // lower 16bits is all flags handled in dynarec
LDRHA_IMM8(x2, xESP, 2);
MOV32(x1, 0x7FD7);
AND_REG_LSL_IMM5(x2, x2, x1, 0);
Expand Down Expand Up @@ -753,7 +753,7 @@ uintptr_t dynarec66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
case 2:
INST_NAME("RCL Ew, Ib");
READFLAGS(X_CF);
SETFLAGS(X_OF|X_CF, SF_SET);
SETFLAGS(X_OF|X_CF, SF_SET_DF);
GETEW(x1);
u8 = F8;
MOVW(x2, u8);
Expand All @@ -763,7 +763,7 @@ uintptr_t dynarec66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
case 3:
INST_NAME("RCR Ew, Ib");
READFLAGS(X_CF);
SETFLAGS(X_OF|X_CF, SF_SET);
SETFLAGS(X_OF|X_CF, SF_SET_DF);
GETEW(x1);
u8 = F8;
MOVW(x2, u8);
Expand Down Expand Up @@ -849,7 +849,7 @@ uintptr_t dynarec66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
case 2:
INST_NAME("RCL Ew, 1");
READFLAGS(X_CF);
SETFLAGS(X_OF|X_CF, SF_SET);
SETFLAGS(X_OF|X_CF, SF_SET_DF);
MOVW(x2, 1);
GETEW(x1);
CALL_(rcl16, x1, (1<<x3));
Expand All @@ -858,7 +858,7 @@ uintptr_t dynarec66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
case 3:
INST_NAME("RCR Ew, 1");
READFLAGS(X_CF);
SETFLAGS(X_OF|X_CF, SF_SET);
SETFLAGS(X_OF|X_CF, SF_SET_DF);
MOVW(x2, 1);
GETEW(x1);
CALL_(rcr16, x1, (1<<x3));
Expand Down Expand Up @@ -942,7 +942,7 @@ uintptr_t dynarec66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
case 2:
INST_NAME("RCL Ew, CL");
READFLAGS(X_CF);
SETFLAGS(X_OF|X_CF, SF_SET);
SETFLAGS(X_OF|X_CF, SF_SET_DF);
AND_IMM8(x2, xECX, 0x1f);
GETEW(x1);
CALL_(rcl16, x1, (1<<x3));
Expand All @@ -951,7 +951,7 @@ uintptr_t dynarec66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
case 3:
INST_NAME("RCR Ew, CL");
READFLAGS(X_CF);
SETFLAGS(X_OF|X_CF, SF_SET);
SETFLAGS(X_OF|X_CF, SF_SET_DF);
AND_IMM8(x2, xECX, 0x1f);
GETEW(x1);
CALL_(rcr16, x1, (1<<x3));
Expand Down Expand Up @@ -1161,7 +1161,8 @@ uintptr_t dynarec66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
INST_NAME("DIV Ew");
if(arm_div) {
GETEW(x1);
SETFLAGS(X_ALL, SF_SET);
SETFLAGS(X_ALL, SF_SET_DF);
SET_DFNONE(x2);
UXTH(x2, xEAX, 0);
ORR_REG_LSL_IMM5(x2, x2, xEDX, 16);
UDIV(x3, x2, ed);
Expand All @@ -1170,7 +1171,7 @@ uintptr_t dynarec66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
BFI(xEDX, x14, 0, 16);
} else {
MESSAGE(LOG_DUMP, "Need Optimization\n");
SETFLAGS(X_ALL, SF_SET);
SETFLAGS(X_ALL, SF_SET_DF);
GETEW(x1);
STM(xEmu, (1<<xEAX) | (1<<xECX) | (1<<xEDX));
CALL(div16, -1, 0);
Expand All @@ -1180,7 +1181,7 @@ uintptr_t dynarec66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
case 7:
INST_NAME("IDIV Ew");
MESSAGE(LOG_DUMP, "Need Optimization\n");
SETFLAGS(X_ALL, SF_SET);
SETFLAGS(X_ALL, SF_SET_DF);
GETEW(x1);
STM(xEmu, (1<<xEAX) | (1<<xECX) | (1<<xEDX));
CALL(idiv16, -1, 0);
Expand Down
8 changes: 4 additions & 4 deletions src/dynarec/dynarec_arm_660f.c
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ uintptr_t dynarec660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nins
// no special check...
case 0x2F:
if(opcode==0x2F) {INST_NAME("COMISD Gx, Ex");} else {INST_NAME("UCOMISD Gx, Ex");}
SETFLAGS(X_ALL, SF_SET);
SETFLAGS(X_ALL, SF_SET_DF);
nextop = F8;
gd = (nextop&0x38)>>3;
v0 = sse_get_reg(dyn, ninst, x1, gd, 0);
Expand Down Expand Up @@ -804,7 +804,7 @@ uintptr_t dynarec660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nins

case 0x63:
INST_NAME("PCMPISTRI Gx, Ex, Ib");
SETFLAGS(X_OF|X_CF|X_AF|X_ZF|X_SF|X_PF, SF_SET);
SETFLAGS(X_OF|X_CF|X_AF|X_ZF|X_SF|X_PF, SF_SET_DF);
nextop = F8;
GETG;
if(!sse_reflect_reg(dyn, ninst, gd, x2)) {
Expand Down Expand Up @@ -1836,7 +1836,7 @@ uintptr_t dynarec660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nins
INST_NAME("SHLD Ew, Gw, CL");
}
MESSAGE(LOG_DUMP, "Need Optimization\n");
SETFLAGS(X_ALL, SF_SET);
SETFLAGS(X_ALL, SF_SET_DF);
GETEWW(x14, x1);
GETGW(x2);
if(opcode==0xA4) {
Expand Down Expand Up @@ -1888,7 +1888,7 @@ uintptr_t dynarec660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nins
INST_NAME("SHRD Ew, Gw, CL");
}
MESSAGE(LOG_DUMP, "Need Optimization\n");
SETFLAGS(X_ALL, SF_SET);
SETFLAGS(X_ALL, SF_SET_DF);
GETEWW(x14, x1);
GETGW(x2);
if(opcode==0xAC) {
Expand Down
5 changes: 1 addition & 4 deletions src/dynarec/dynarec_arm_helper.c
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,7 @@ void retn_to_epilog(dynarec_arm_t* dyn, int ninst, int n)
void iret_to_epilog(dynarec_arm_t* dyn, int ninst)
{
MESSAGE(LOG_DUMP, "IRet epilog\n");
SET_DFNONE(x1);
// POP IP
POP1(xEIP);
// POP CS
Expand Down Expand Up @@ -395,7 +396,6 @@ void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, uint32_t
if(saveflags) {
LDR_IMM9(xFlags, xEmu, offsetof(x86emu_t, eflags));
}
SET_NODF();
}

#if defined(__ARM_PCS) && !defined(__ARM_PCS_VFP)
Expand Down Expand Up @@ -447,7 +447,6 @@ void call_dr(dynarec_arm_t* dyn, int ninst, int reg, int n, int s1, int ret, int
if(saveflags) {
LDR_IMM9(xFlags, xEmu, offsetof(x86emu_t, eflags));
}
SET_NODF();
}
// call a function with n double args (taking care of the SOFTFP / HARD call) that return a double too
void call_d(dynarec_arm_t* dyn, int ninst, void* fnc, void* fnc2, int n, int reg, int ret, uint32_t mask, int saveflags)
Expand Down Expand Up @@ -496,7 +495,6 @@ void call_d(dynarec_arm_t* dyn, int ninst, void* fnc, void* fnc2, int n, int reg
if(saveflags) {
LDR_IMM9(xFlags, xEmu, offsetof(x86emu_t, eflags));
}
SET_NODF();
}
// call a function with 1 double arg (taking care of the SOFTFP / HARD call) and 1 non-float arg that return a double
void call_ddr(dynarec_arm_t* dyn, int ninst, void* fnc, void* fnc2, int arg, int reg, int ret, uint32_t mask, int saveflags)
Expand Down Expand Up @@ -542,7 +540,6 @@ void call_ddr(dynarec_arm_t* dyn, int ninst, void* fnc, void* fnc2, int arg, int
if(saveflags) {
LDR_IMM9(xFlags, xEmu, offsetof(x86emu_t, eflags));
}
SET_NODF();
}

// call a function with 1 arg, (taking care of the SOFTFP / HARD call) that return a double, using s1 as scratch
Expand Down
2 changes: 2 additions & 0 deletions src/dynarec/dynarec_arm_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,8 @@
if(dyn->insts[ninst].x86.gen_flags) switch(B) { \
case SF_SUBSET: \
case SF_SET: dyn->f.pending = SF_SET; break; \
case SF_SET_DF: dyn->f.pending = SF_SET; dyn->f.dfnone = 1; break; \
case SF_SET_NODF: dyn->f.pending = SF_SET; dyn->f.dfnone = 0; break; \
case SF_PENDING: dyn->f.pending = SF_PENDING; break; \
case SF_SUBSET_PENDING: \
case SF_SET_PENDING: \
Expand Down
8 changes: 7 additions & 1 deletion src/dynarec/dynarec_arm_pass.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,14 @@ uintptr_t arm_pass(dynarec_arm_t* dyn, uintptr_t addr)
}
}
#else
// check if block need to be stopped, because it's a 00 00 opcode (unreadeable is already checked earlier)
if((ok>0) && !dyn->forward && !(*(uint8_t*)addr) && !(*(uint8_t*)(addr+1))) {
if(box86_dynarec_dump) dynarec_log(LOG_NONE, "Stopping block at %p reason: %s\n", (void*)addr, "Next opcode is 00 00");
ok = 0;
need_epilog = 1;
}
if(dyn->forward) {
if(dyn->forward_to == addr && !need_epilog) {
if(dyn->forward_to == addr && !need_epilog && ok>=0) {
// we made it!
if(box86_dynarec_dump) dynarec_log(LOG_NONE, "Forward extend block for %d bytes %s%p -> %p\n", dyn->forward_to-dyn->forward, dyn->insts[dyn->forward_ninst].x86.has_callret?"(opt. call) ":"", (void*)dyn->forward, (void*)dyn->forward_to);
if(dyn->insts[dyn->forward_ninst].x86.has_callret && !dyn->insts[dyn->forward_ninst].x86.has_next)
Expand Down
4 changes: 2 additions & 2 deletions src/dynarec/dynarec_arm_pass0.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
dyn->f.pending=SF_SET
#define SETFLAGS(A,B) \
dyn->insts[ninst].x86.set_flags = A; \
dyn->insts[ninst].x86.state_flags = B; \
dyn->insts[ninst].x86.state_flags = (B)&~SF_DF; \
dyn->f.pending=(B)&SF_SET_PENDING; \
dyn->f.dfnone=((B)&SF_SET)?1:0;
dyn->f.dfnone=((B)&SF_SET)?(((B)==SF_SET_NODF)?0:1):0;
#define EMIT(A)
#define JUMP(A, C) add_jump(dyn, ninst); add_next(dyn, (uintptr_t)A); SMEND(); dyn->insts[ninst].x86.jmp = A; dyn->insts[ninst].x86.jmp_cond = C
#define BARRIER(A) if(A!=BARRIER_MAYBE) {fpu_purgecache(dyn, ninst, 0, x1, x2, x3); dyn->insts[ninst].x86.barrier = A;} else dyn->insts[ninst].barrier_maybe = 1
Expand Down
10 changes: 7 additions & 3 deletions src/dynarec/dynarec_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,21 @@
#define SF_SUB 4
#define SF_SUBSET (SF_SUB|SF_SET)
#define SF_SUBSET_PENDING (SF_SUBSET|SF_PENDING)
#define SF_DF 8
#define SF_SET_DF (SF_SET|SF_DF)
#define SF_NODF 16
#define SF_SET_NODF (SF_SET|SF_NODF)

typedef struct instruction_x86_s {
uintptr_t addr; //address of the instruction
int32_t size; // size of the instruction
uintptr_t jmp; // offset to jump to, even if conditionnal (0 if not), no relative offset here
int jmp_insts; // instuction to jump to (-1 if out of the block)
uint8_t jmp_cond; // 1 of conditionnal jump
uint8_t has_next; // does this opcode can continue to the next?
uint8_t jmp_cond:1; // 1 of conditionnal jump
uint8_t has_next:1; // does this opcode can continue to the next?
uint8_t has_callret:1; // this instruction have an optimised call setup
uint8_t barrier; // next instruction is a jump point, so no optim allowed
uint8_t barrier_next; // next instruction needs a barrier
uint8_t has_callret; // this instruction have an optimised call setup
uint8_t state_flags;// One of SF_XXX state
uint8_t use_flags; // 0 or combination of X_?F
uint8_t set_flags; // 0 or combination of X_?F
Expand Down

0 comments on commit 25e1d8b

Please sign in to comment.