From 1429fccb9724ab6ec0ee1d443d9838d0ad330764 Mon Sep 17 00:00:00 2001 From: magumagu Date: Mon, 23 Jun 2014 19:19:22 -0700 Subject: [PATCH] Initial unoptimized JITIL flag optimization. --- Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp | 93 +++++++++++++++++-- Source/Core/Core/PowerPC/JitILCommon/IR.cpp | 11 ++- Source/Core/Core/PowerPC/JitILCommon/IR.h | 9 ++ .../PowerPC/JitILCommon/JitILBase_Branch.cpp | 2 + .../JitILCommon/JitILBase_FloatingPoint.cpp | 2 +- .../PowerPC/JitILCommon/JitILBase_Integer.cpp | 4 +- .../JitILCommon/JitILBase_SystemRegisters.cpp | 11 ++- 7 files changed, 114 insertions(+), 18 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp index 585485271e..8abe9fd3f9 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp @@ -85,7 +85,7 @@ static unsigned regReadUse(RegInfo& R, InstLoc I) { return R.IInfo[I - R.FirstI] & 3; } -static unsigned SlotSet[1000]; +static u64 SlotSet[1000]; static u8 GC_ALIGNED16(FSlotSet[16*1000]); static OpArg regLocForSlot(RegInfo& RI, unsigned slot) { @@ -107,7 +107,7 @@ static void regSpill(RegInfo& RI, X64Reg reg) { unsigned slot = regGetSpill(RI, RI.regs[reg]); if (!slot) { slot = regCreateSpill(RI, RI.regs[reg]); - RI.Jit->MOV(32, regLocForSlot(RI, slot), R(reg)); + RI.Jit->MOV(64, regLocForSlot(RI, slot), R(reg)); } RI.regs[reg] = nullptr; } @@ -621,6 +621,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { case FPDup1: case FSNeg: case FDNeg: + case ConvertFromFastCR: + case ConvertToFastCR: if (thisUsed) regMarkUse(RI, I, getOp1(I), 1); break; @@ -763,8 +765,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { if (!thisUsed) break; X64Reg reg = regFindFreeReg(RI); unsigned ppcreg = *I >> 8; - // TODO(delroth): unbreak - //Jit->MOVZX(32, 8, reg, M(&PowerPC::ppcState.cr_fast[ppcreg])); + Jit->MOV(64, R(reg), M(&PowerPC::ppcState.cr_val[ppcreg])); RI.regs[reg] = I; break; } @@ -814,11 +815,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { break; } case StoreCR: { - Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); + Jit->MOV(64, R(RCX), regLocForInst(RI, getOp1(I))); unsigned ppcreg = *I >> 16; - // CAUTION: uses 8-bit reg! - // TODO(delroth): Unbreak. - //Jit->MOV(8, M(&PowerPC::ppcState.cr_fast[ppcreg]), R(ECX)); + Jit->MOV(64, M(&PowerPC::ppcState.cr_val[ppcreg]), R(RCX)); regNormalRegClear(RI, I); break; } @@ -1116,6 +1115,84 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { regNormalRegClear(RI, I); break; } + case ConvertFromFastCR: + { + if (!thisUsed) break; + X64Reg cr_val = regUReg(RI, I); + Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I))); + + Jit->XOR(32, R(EAX), R(EAX)); + + // SO: Bit 61 set. + Jit->MOV(64, R(RCX), R(cr_val)); + Jit->SHR(64, R(RCX), Imm8(61)); + Jit->AND(32, R(ECX), Imm8(1)); + Jit->OR(32, R(EAX), R(ECX)); + + // EQ: Bits 31-0 == 0. + Jit->XOR(32, R(ECX), R(ECX)); + Jit->TEST(32, R(cr_val), R(cr_val)); + Jit->SETcc(CC_Z, R(ECX)); + Jit->SHL(32, R(ECX), Imm8(1)); + Jit->OR(32, R(EAX), R(ECX)); + + // GT: Value > 0. + Jit->XOR(32, R(ECX), R(ECX)); + Jit->TEST(64, R(cr_val), R(cr_val)); + Jit->SETcc(CC_G, R(ECX)); + Jit->SHL(32, R(ECX), Imm8(2)); + Jit->OR(32, R(EAX), R(ECX)); + + // LT: Bit 62 set. + Jit->MOV(64, R(ECX), R(cr_val)); + Jit->SHR(64, R(ECX), Imm8(62 - 3)); + Jit->AND(32, R(ECX), Imm8(0x8)); + Jit->OR(32, R(EAX), R(ECX)); + + Jit->MOV(32, R(cr_val), R(EAX)); + RI.regs[cr_val] = I; + regNormalRegClear(RI, I); + break; + } + case ConvertToFastCR: + { + if (!thisUsed) break; + X64Reg cr_val = regUReg(RI, I); + Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I))); + + Jit->MOV(64, R(RCX), Imm64(1ull << 32)); + + // SO + Jit->MOV(64, R(RAX), R(cr_val)); + Jit->SHL(64, R(RAX), Imm8(63)); + Jit->SHR(64, R(RAX), Imm8(63 - 61)); + Jit->OR(64, R(RCX), R(RAX)); + + // EQ + Jit->MOV(64, R(RAX), R(cr_val)); + Jit->NOT(64, R(RAX)); + Jit->AND(64, R(RAX), Imm8(CR_EQ)); + Jit->OR(64, R(RCX), R(RAX)); + + // GT + Jit->MOV(64, R(RAX), R(cr_val)); + Jit->NOT(64, R(RAX)); + Jit->AND(64, R(RAX), Imm8(CR_GT)); + Jit->SHL(64, R(RAX), Imm8(63 - 2)); + Jit->OR(64, R(RCX), R(RAX)); + + // LT + Jit->MOV(64, R(RAX), R(cr_val)); + Jit->AND(64, R(RAX), Imm8(CR_LT)); + Jit->SHL(64, R(RAX), Imm8(62 - 3)); + Jit->OR(64, R(RCX), R(RAX)); + + Jit->MOV(64, R(cr_val), R(RCX)); + + RI.regs[cr_val] = I; + regNormalRegClear(RI, I); + break; + } case LoadSingle: { if (!thisUsed) break; X64Reg reg = fregFindFreeReg(RI); diff --git a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp b/Source/Core/Core/PowerPC/JitILCommon/IR.cpp index c7515e5bfc..0f0bb1c7f3 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/IR.cpp @@ -1130,7 +1130,7 @@ unsigned IRBuilder::getNumberOfOperands(InstLoc I) const { numberOfOperands[CInt32] = 0; static unsigned ZeroOp[] = {LoadCR, LoadLink, LoadMSR, LoadGReg, LoadCTR, InterpreterBranch, LoadCarry, RFIExit, LoadFReg, LoadFRegDENToZero, LoadGQR, Int3, }; - static unsigned UOp[] = {StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, }; + static unsigned UOp[] = {StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, ConvertFromFastCR, ConvertToFastCR}; static unsigned BiOp[] = {BranchCond, IdleBranch, And, Xor, Sub, Or, Add, Mul, Rol, Shl, Shrl, Sarl, ICmpEq, ICmpNe, ICmpUgt, ICmpUlt, ICmpSgt, ICmpSlt, ICmpSge, ICmpSle, Store8, Store16, Store32, ICmpCRSigned, ICmpCRUnsigned, FallBackToInterpreter, StoreSingle, StoreDouble, StorePaired, InsertDoubleInMReg, FSMul, FSAdd, FSSub, FDMul, FDAdd, FDSub, FPAdd, FPMul, FPSub, FPMerge00, FPMerge01, FPMerge10, FPMerge11, FDCmpCR, }; for (auto& op : ZeroOp) { numberOfOperands[op] = 0; @@ -1235,10 +1235,11 @@ static std::unique_ptr writer; static const std::string opcodeNames[] = { "Nop", "LoadGReg", "LoadLink", "LoadCR", "LoadCarry", "LoadCTR", "LoadMSR", "LoadGQR", "SExt8", "SExt16", "BSwap32", "BSwap16", "Cntlzw", - "Not", "Load8", "Load16", "Load32", "BranchUncond", "StoreGReg", - "StoreCR", "StoreLink", "StoreCarry", "StoreCTR", "StoreMSR", "StoreFPRF", - "StoreGQR", "StoreSRR", "FallBackToInterpreter", "Add", "Mul", "And", "Or", - "Xor", "MulHighUnsigned", "Sub", "Shl", "Shrl", "Sarl", "Rol", + "Not", "Load8", "Load16", "Load32", "BranchUncond", "ConvertFromFastCR", + "ConvertToFastCR", "StoreGReg", "StoreCR", "StoreLink", "StoreCarry", + "StoreCTR", "StoreMSR", "StoreFPRF", "StoreGQR", "StoreSRR", + "FallBackToInterpreter", "Add", "Mul", "And", "Or", "Xor", + "MulHighUnsigned", "Sub", "Shl", "Shrl", "Sarl", "Rol", "ICmpCRSigned", "ICmpCRUnsigned", "ICmpEq", "ICmpNe", "ICmpUgt", "ICmpUlt", "ICmpUge", "ICmpUle", "ICmpSgt", "ICmpSlt", "ICmpSge", "ICmpSle", "Store8", "Store16", "Store32", "BranchCond", "FResult_Start", diff --git a/Source/Core/Core/PowerPC/JitILCommon/IR.h b/Source/Core/Core/PowerPC/JitILCommon/IR.h index f84f29fe02..c09de3b9b2 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/IR.h +++ b/Source/Core/Core/PowerPC/JitILCommon/IR.h @@ -33,6 +33,9 @@ enum Opcode { Load8, // These loads zext Load16, Load32, + // CR conversions + ConvertFromFastCR, + ConvertToFastCR, // Branches BranchUncond, // Register store operators @@ -373,6 +376,12 @@ public: InstLoc EmitICmpCRUnsigned(InstLoc op1, InstLoc op2) { return FoldBiOp(ICmpCRUnsigned, op1, op2); } + InstLoc EmitConvertFromFastCR(InstLoc op1) { + return FoldUOp(ConvertFromFastCR, op1); + } + InstLoc EmitConvertToFastCR(InstLoc op1) { + return FoldUOp(ConvertToFastCR, op1); + } InstLoc EmitFallBackToInterpreter(InstLoc op1, InstLoc op2) { return FoldBiOp(FallBackToInterpreter, op1, op2); } diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp index 85601cd0c5..9cb80059ec 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp @@ -66,6 +66,7 @@ static IREmitter::InstLoc TestBranch(IREmitter::IRBuilder& ibuild, UGeckoInstruc if ((inst.BO & 16) == 0) // Test a CR bit { IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2); + CRReg = ibuild.EmitConvertFromFastCR(CRReg); IREmitter::InstLoc CRCmp = ibuild.EmitIntConst(8 >> (inst.BI & 3)); CRTest = ibuild.EmitAnd(CRReg, CRCmp); if (!(inst.BO & 8)) @@ -141,6 +142,7 @@ void JitILBase::bcctrx(UGeckoInstruction inst) if ((inst.BO & 16) == 0) // Test a CR bit { IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2); + CRReg = ibuild.EmitConvertFromFastCR(CRReg); IREmitter::InstLoc CRCmp = ibuild.EmitIntConst(8 >> (inst.BI & 3)); test = ibuild.EmitAnd(CRReg, CRCmp); if (!(inst.BO & 8)) diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp index 3184fbc16f..b97740ce5e 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp @@ -86,7 +86,7 @@ void JitILBase::fcmpx(UGeckoInstruction inst) int ordered = (inst.SUBOP10 == 32) ? 1 : 0; res = ibuild.EmitFDCmpCR(lhs, rhs, ordered); ibuild.EmitStoreFPRF(res); - ibuild.EmitStoreCR(res, inst.CRFD); + ibuild.EmitStoreCR(ibuild.EmitConvertToFastCR(res), inst.CRFD); } void JitILBase::fsign(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp index e7b96a6850..6f275184ee 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp @@ -12,7 +12,7 @@ static void ComputeRC(IREmitter::IRBuilder& ibuild, IREmitter::InstLoc val) { IREmitter::InstLoc res = ibuild.EmitICmpCRSigned(val, ibuild.EmitIntConst(0)); - ibuild.EmitStoreCR(res, 0); + ibuild.EmitStoreCR(ibuild.EmitConvertToFastCR(res), 0); } void JitILBase::reg_imm(UGeckoInstruction inst) @@ -114,7 +114,7 @@ void JitILBase::cmpXX(UGeckoInstruction inst) js.downcountAmount++; //TODO: should this be somewhere else? - ibuild.EmitStoreCR(res, inst.CRFD); + ibuild.EmitStoreCR(ibuild.EmitConvertToFastCR(res), inst.CRFD); } void JitILBase::boolX(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_SystemRegisters.cpp index dbd08d94bb..31c6ffa4b4 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_SystemRegisters.cpp @@ -107,8 +107,10 @@ void JitILBase::mfcr(UGeckoInstruction inst) IREmitter::InstLoc d = ibuild.EmitIntConst(0); for (int i = 0; i < 8; ++i) { - d = ibuild.EmitShl(d, ibuild.EmitIntConst(4)); - d = ibuild.EmitOr(d, ibuild.EmitLoadCR(i)); + IREmitter::InstLoc cr = ibuild.EmitLoadCR(i); + cr = ibuild.EmitConvertFromFastCR(cr); + cr = ibuild.EmitShl(cr, ibuild.EmitIntConst(28 - 4 * i)); + d = ibuild.EmitOr(d, cr); } ibuild.EmitStoreGReg(d, inst.RD); } @@ -126,6 +128,7 @@ void JitILBase::mtcrf(UGeckoInstruction inst) IREmitter::InstLoc value; value = ibuild.EmitShrl(s, ibuild.EmitIntConst(28 - i * 4)); value = ibuild.EmitAnd(value, ibuild.EmitIntConst(0xF)); + value = ibuild.EmitConvertToFastCR(value); ibuild.EmitStoreCR(value, i); } } @@ -150,6 +153,7 @@ void JitILBase::crXX(UGeckoInstruction inst) // Get bit CRBA in EAX aligned with bit CRBD const int shiftA = (inst.CRBD & 3) - (inst.CRBA & 3); IREmitter::InstLoc eax = ibuild.EmitLoadCR(inst.CRBA >> 2); + eax = ibuild.EmitConvertFromFastCR(eax); if (shiftA < 0) eax = ibuild.EmitShl(eax, ibuild.EmitIntConst(-shiftA)); else if (shiftA > 0) @@ -158,6 +162,7 @@ void JitILBase::crXX(UGeckoInstruction inst) // Get bit CRBB in ECX aligned with bit CRBD const int shiftB = (inst.CRBD & 3) - (inst.CRBB & 3); IREmitter::InstLoc ecx = ibuild.EmitLoadCR(inst.CRBB >> 2); + ecx = ibuild.EmitConvertFromFastCR(ecx); if (shiftB < 0) ecx = ibuild.EmitShl(ecx, ibuild.EmitIntConst(-shiftB)); else if (shiftB > 0) @@ -211,7 +216,9 @@ void JitILBase::crXX(UGeckoInstruction inst) // Store result bit in CRBD eax = ibuild.EmitAnd(eax, ibuild.EmitIntConst(0x8 >> (inst.CRBD & 3))); IREmitter::InstLoc bd = ibuild.EmitLoadCR(inst.CRBD >> 2); + bd = ibuild.EmitConvertFromFastCR(bd); bd = ibuild.EmitAnd(bd, ibuild.EmitIntConst(~(0x8 >> (inst.CRBD & 3)))); bd = ibuild.EmitOr(bd, eax); + bd = ibuild.EmitConvertToFastCR(bd); ibuild.EmitStoreCR(bd, inst.CRBD >> 2); }