optimise away unneeded flag sets

- especially useful for thumb code and larger max block sizes
- can still be improved upon
This commit is contained in:
RSDuck
2019-08-25 12:28:48 +02:00
parent 316378092a
commit f378458c10
7 changed files with 241 additions and 104 deletions

View File

@ -111,6 +111,8 @@ OpArg Compiler::A_Comp_GetALUOp2(bool S, bool& carryUsed)
}
else
{
S = S && (CurInstr.SetFlags & 0x2);
int op = (CurInstr.Instr >> 5) & 0x3;
if (CurInstr.Instr & (1 << 4))
{
@ -215,7 +217,8 @@ void Compiler::A_Comp_MovOp()
if (S)
{
TEST(32, rd, rd);
if (FlagsNZRequired())
TEST(32, rd, rd);
Comp_RetriveFlags(false, false, carryUsed);
}
@ -263,12 +266,14 @@ void Compiler::Comp_MulOp(bool S, bool add, Gen::OpArg rd, Gen::OpArg rm, Gen::O
{
IMUL(32, RSCRATCH, rs);
LEA(32, rd.GetSimpleReg(), MRegSum(RSCRATCH, rn.GetSimpleReg()));
TEST(32, rd, rd);
if (S && FlagsNZRequired())
TEST(32, rd, rd);
}
else
{
IMUL(32, RSCRATCH, rs);
MOV(32, rd, R(RSCRATCH));
if (S && FlagsNZRequired())
TEST(32, R(RSCRATCH), R(RSCRATCH));
}
@ -331,7 +336,7 @@ void Compiler::A_Comp_SMULL_SMLAL()
else
{
IMUL(64, RSCRATCH2, R(RSCRATCH3));
if (S)
if (S && FlagsNZRequired())
TEST(64, R(RSCRATCH2), R(RSCRATCH2));
}
@ -345,9 +350,20 @@ void Compiler::A_Comp_SMULL_SMLAL()
void Compiler::Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed)
{
CPSRDirty = true;
if (CurInstr.SetFlags == 0)
return;
if (retriveCV && !(CurInstr.SetFlags & 0x3))
retriveCV = false;
bool carryOnly = !retriveCV && carryUsed;
if (carryOnly && !(CurInstr.SetFlags & 0x2))
{
carryUsed = false;
carryOnly = false;
}
CPSRDirty = true;
if (retriveCV)
{
SETcc(CC_O, R(RSCRATCH));
@ -355,19 +371,28 @@ void Compiler::Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed)
LEA(32, RSCRATCH2, MComplex(RSCRATCH, RSCRATCH3, SCALE_2, 0));
}
SETcc(CC_S, R(RSCRATCH));
SETcc(CC_Z, R(RSCRATCH3));
LEA(32, RSCRATCH, MComplex(RSCRATCH3, RSCRATCH, SCALE_2, 0));
int shiftAmount = 30;
if (retriveCV || carryUsed)
if (FlagsNZRequired())
{
LEA(32, RSCRATCH, MComplex(RSCRATCH2, RSCRATCH, carryOnly ? SCALE_2 : SCALE_4, 0));
shiftAmount = carryOnly ? 29 : 28;
}
SHL(32, R(RSCRATCH), Imm8(shiftAmount));
SETcc(CC_S, R(RSCRATCH));
SETcc(CC_Z, R(RSCRATCH3));
LEA(32, RSCRATCH, MComplex(RSCRATCH3, RSCRATCH, SCALE_2, 0));
int shiftAmount = 30;
if (retriveCV || carryUsed)
{
LEA(32, RSCRATCH, MComplex(RSCRATCH2, RSCRATCH, carryOnly ? SCALE_2 : SCALE_4, 0));
shiftAmount = carryOnly ? 29 : 28;
}
SHL(32, R(RSCRATCH), Imm8(shiftAmount));
AND(32, R(RCPSR), Imm32(0x3FFFFFFF & ~(carryUsed << 29) & ~((retriveCV ? 3 : 0) << 28)));
OR(32, R(RCPSR), R(RSCRATCH));
AND(32, R(RCPSR), Imm32(0x3FFFFFFF & ~(carryUsed << 29) & ~((retriveCV ? 3 : 0) << 28)));
OR(32, R(RCPSR), R(RSCRATCH));
}
else
{
SHL(32, R(RSCRATCH2), Imm8(carryOnly ? 29 : 28));
AND(32, R(RCPSR), Imm32(0xFFFFFFFF & ~(carryUsed << 29) & ~((retriveCV ? 3 : 0) << 28)));
OR(32, R(RCPSR), R(RSCRATCH2));
}
}
// always uses RSCRATCH, RSCRATCH2 only if S == true
@ -523,7 +548,8 @@ void Compiler::T_Comp_ShiftImm()
if (shifted != rd)
MOV(32, rd, shifted);
TEST(32, rd, rd);
if (FlagsNZRequired())
TEST(32, rd, rd);
Comp_RetriveFlags(false, false, carryUsed);
}
@ -557,7 +583,8 @@ void Compiler::T_Comp_ALU_Imm8()
{
case 0x0:
MOV(32, rd, imm);
TEST(32, rd, rd);
if (FlagsNZRequired())
TEST(32, rd, rd);
Comp_RetriveFlags(false, false, false);
return;
case 0x1:
@ -607,7 +634,8 @@ void Compiler::T_Comp_ALU()
int shiftOp = op == 0x7 ? 3 : op - 0x2;
bool carryUsed;
OpArg shifted = Comp_RegShiftReg(shiftOp, rs, rd, true, carryUsed);
TEST(32, shifted, shifted);
if (FlagsNZRequired())
TEST(32, shifted, shifted);
MOV(32, rd, shifted);
Comp_RetriveFlags(false, false, true);
}

View File

@ -342,6 +342,11 @@ const Compiler::CompileFunc T_Comp[ARMInstrInfo::tk_Count] = {
};
#undef F
bool Compiler::CanCompile(bool thumb, u16 kind)
{
return (thumb ? T_Comp[kind] : A_Comp[kind]) != NULL;
}
void Compiler::Reset()
{
memset(ResetStart, 0xcc, CodeMemSize);
@ -380,11 +385,15 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
// TODO: this is ugly as a whole, do better
RegCache = RegisterCache<Compiler, X64Reg>(this, instrs, instrsCount);
printf("block start %d\n", Thumb);
for (int i = 0; i < instrsCount; i++)
{
R15 += Thumb ? 2 : 4;
CurInstr = instrs[i];
printf("%x %d %d %d\n", CurInstr.Instr, CurInstr.SetFlags, CurInstr.Info.WriteFlags, CurInstr.Info.ReadFlags);
CompileFunc comp = Thumb
? T_Comp[CurInstr.Info.Kind]
: A_Comp[CurInstr.Info.Kind];

View File

@ -29,6 +29,8 @@ public:
void LoadReg(int reg, Gen::X64Reg nativeReg);
void SaveReg(int reg, Gen::X64Reg nativeReg);
bool CanCompile(bool thumb, u16 kind);
typedef void (Compiler::*CompileFunc)();
void Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR = false);
@ -64,7 +66,6 @@ public:
void A_Comp_BranchImm();
void A_Comp_BranchXchangeReg();
void T_Comp_ShiftImm();
void T_Comp_AddSub_();
void T_Comp_ALU_Imm8();
@ -121,6 +122,9 @@ public:
void LoadCPSR();
void SaveCPSR();
bool FlagsNZRequired()
{ return CurInstr.SetFlags & 0xC; }
Gen::FixupBranch CheckCondition(u32 cond);
Gen::OpArg MapReg(int reg)