diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index e6d20a0b7e..2894429cd8 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -162,7 +162,9 @@ void Jit64::Init() jo.enableBlocklink = false; } else + { jo.enableBlocklink = !Core::g_CoreStartupParameter.bMMU; + } } jo.fpAccurateFcmp = Core::g_CoreStartupParameter.bEnableFPRF; jo.optimizeGatherPipe = true; @@ -435,7 +437,8 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc ABI_CallFunction((void *)&ImHere); //Used to get a trace of the last few blocks before a crash, sometimes VERY useful // Conditionally add profiling code. - if (Profiler::g_ProfileBlocks) { + if (Profiler::g_ProfileBlocks) + { ADD(32, M(&b->runCount), Imm8(1)); #ifdef _WIN32 b->ticCounter = 0; @@ -617,7 +620,8 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc //NOTICE_LOG(DYNA_REC, "Unflushed register: %s", ppc_inst.c_str()); } #endif - if (js.skipnext) { + if (js.skipnext) + { js.skipnext = false; i++; // Skip next instruction } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 3ea3ec81ab..95fbcd1f7b 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -68,18 +68,22 @@ public: void ClearCache() override; - const u8 *GetDispatcher() { + const u8 *GetDispatcher() + { return asm_routines.dispatcher; } - const CommonAsmRoutines *GetAsmRoutines() override { + + const CommonAsmRoutines *GetAsmRoutines() override + { return &asm_routines; } - const char *GetName() override { + const char *GetName() override + { return "JIT64"; } - // Run! + // Run! void Run() override; void SingleStep() override; diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.h b/Source/Core/Core/PowerPC/Jit64/JitAsm.h index 19679247ec..e3cc4371f7 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.h +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.h @@ -27,13 +27,15 @@ private: void GenerateCommon(); public: - void Init() { + void Init() + { AllocCodeSpace(8192); Generate(); WriteProtect(); } - void Shutdown() { + void Shutdown() + { FreeCodeSpace(); } }; diff --git a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp index 28e9de7441..119c41612b 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp @@ -52,21 +52,35 @@ void RegCache::Start() void RegCache::Lock(int p1, int p2, int p3, int p4) { regs[p1].locked = true; - if (p2 != 0xFF) regs[p2].locked = true; - if (p3 != 0xFF) regs[p3].locked = true; - if (p4 != 0xFF) regs[p4].locked = true; + + if (p2 != 0xFF) + regs[p2].locked = true; + + if (p3 != 0xFF) + regs[p3].locked = true; + + if (p4 != 0xFF) + regs[p4].locked = true; } // these are x64 reg indices void RegCache::LockX(int x1, int x2, int x3, int x4) { - if (xregs[x1].locked) { + if (xregs[x1].locked) + { PanicAlert("RegCache: x %i already locked!", x1); } + xregs[x1].locked = true; - if (x2 != 0xFF) xregs[x2].locked = true; - if (x3 != 0xFF) xregs[x3].locked = true; - if (x4 != 0xFF) xregs[x4].locked = true; + + if (x2 != 0xFF) + xregs[x2].locked = true; + + if (x3 != 0xFF) + xregs[x3].locked = true; + + if (x4 != 0xFF) + xregs[x4].locked = true; } void RegCache::UnlockAll() @@ -321,6 +335,7 @@ void RegCache::Flush(FlushMode mode) { PanicAlert("Someone forgot to unlock PPC reg %" PRIx64 " (X64 reg %i).", i, RX(i)); } + if (regs[i].away) { if (regs[i].location.IsSimpleReg() || regs[i].location.IsImm()) diff --git a/Source/Core/Core/PowerPC/Jit64/JitRegCache.h b/Source/Core/Core/PowerPC/Jit64/JitRegCache.h index d4e984f537..e01da5dc5f 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitRegCache.h +++ b/Source/Core/Core/PowerPC/Jit64/JitRegCache.h @@ -47,23 +47,34 @@ protected: public: RegCache(); - virtual ~RegCache() {} + void Start(); void DiscardRegContentsIfCached(size_t preg); - void SetEmitter(Gen::XEmitter *emitter) {emit = emitter;} + void SetEmitter(Gen::XEmitter *emitter) + { + emit = emitter; + } void FlushR(Gen::X64Reg reg); - void FlushR(Gen::X64Reg reg, Gen::X64Reg reg2) {FlushR(reg); FlushR(reg2);} - void FlushLockX(Gen::X64Reg reg) { + void FlushR(Gen::X64Reg reg, Gen::X64Reg reg2) + { + FlushR(reg); + FlushR(reg2); + } + + void FlushLockX(Gen::X64Reg reg) + { FlushR(reg); LockX(reg); } - void FlushLockX(Gen::X64Reg reg1, Gen::X64Reg reg2) { + void FlushLockX(Gen::X64Reg reg1, Gen::X64Reg reg2) + { FlushR(reg1); FlushR(reg2); LockX(reg1); LockX(reg2); } + void Flush(FlushMode mode = FLUSH_ALL); void Flush(PPCAnalyst::CodeOp *op) {Flush();} int SanityCheck() const; @@ -76,7 +87,11 @@ public: virtual void StoreRegister(size_t preg, Gen::OpArg newLoc) = 0; virtual void LoadRegister(size_t preg, Gen::X64Reg newLoc) = 0; - const Gen::OpArg &R(size_t preg) const {return regs[preg].location;} + const Gen::OpArg &R(size_t preg) const + { + return regs[preg].location; + } + Gen::X64Reg RX(size_t preg) const { if (IsBound(preg)) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp index 135e0c4f3c..8bef37cb51 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp @@ -67,7 +67,8 @@ void Jit64::bx(UGeckoInstruction inst) // If this is not the last instruction of a block, // we will skip the rest process. // Because PPCAnalyst::Flatten() merged the blocks. - if (!js.isLastInstruction) { + if (!js.isLastInstruction) + { return; } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index a35797b80c..7761c636d2 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -136,10 +136,13 @@ void Jit64::fmaddXX(UGeckoInstruction inst) fpr.BindToRegister(d, false); //YES it is necessary to dupe the result :( //TODO : analysis - does the top reg get used? If so, dupe, if not, don't. - if (single_precision) { + if (single_precision) + { ForceSinglePrecisionS(XMM0); MOVDDUP(fpr.RX(d), R(XMM0)); - } else { + } + else + { MOVSD(fpr.RX(d), R(XMM0)); } // SMB checks flags after this op. Let's lie. @@ -159,7 +162,8 @@ void Jit64::fsign(UGeckoInstruction inst) fpr.Lock(b, d); fpr.BindToRegister(d, true, true); MOVSD(XMM0, fpr.R(b)); - switch (inst.SUBOP10) { + switch (inst.SUBOP10) + { case 40: // fnegx PXOR(XMM0, M((void*)&psSignBits2)); break; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 6ee50b1a5c..dc43cbe5a9 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -137,10 +137,26 @@ void Jit64::ComputeRC(const Gen::OpArg & arg) } } -static u32 Add(u32 a, u32 b) {return a + b;} -static u32 Or (u32 a, u32 b) {return a | b;} -static u32 And(u32 a, u32 b) {return a & b;} -static u32 Xor(u32 a, u32 b) {return a ^ b;} +// Following static functions are used in conjunction with regimmop +static u32 Add(u32 a, u32 b) +{ + return a + b; +} + +static u32 Or(u32 a, u32 b) +{ + return a | b; +} + +static u32 And(u32 a, u32 b) +{ + return a & b; +} + +static u32 Xor(u32 a, u32 b) +{ + return a ^ b; +} void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc, bool carry) { @@ -196,7 +212,7 @@ void Jit64::reg_imm(UGeckoInstruction inst) u32 d = inst.RD, a = inst.RA, s = inst.RS; switch (inst.OPCD) { - case 14: // addi + case 14: // addi // occasionally used as MOV - emulate, with immediate propagation if (gpr.R(a).IsImm() && d != a && a != 0) { @@ -244,18 +260,36 @@ void Jit64::reg_imm(UGeckoInstruction inst) regimmop(d, a, false, (u32)inst.SIMM_16 << 16, Add, &XEmitter::ADD); } break; - case 24: + case 24: // ori if (a == 0 && s == 0 && inst.UIMM == 0 && !inst.Rc) //check for nop - {NOP(); return;} //make the nop visible in the generated code. not much use but interesting if we see one. + { + // Make the nop visible in the generated code. not much use but interesting if we see one. + NOP(); + return; + } regimmop(a, s, true, inst.UIMM, Or, &XEmitter::OR); - break; //ori - case 25: regimmop(a, s, true, inst.UIMM << 16, Or, &XEmitter::OR, false); break;//oris - case 28: regimmop(a, s, true, inst.UIMM, And, &XEmitter::AND, true); break; - case 29: regimmop(a, s, true, inst.UIMM << 16, And, &XEmitter::AND, true); break; - case 26: regimmop(a, s, true, inst.UIMM, Xor, &XEmitter::XOR, false); break; //xori - case 27: regimmop(a, s, true, inst.UIMM << 16, Xor, &XEmitter::XOR, false); break; //xoris - case 12: regimmop(d, a, false, (u32)(s32)inst.SIMM_16, Add, &XEmitter::ADD, false, true); break; //addic - case 13: regimmop(d, a, true, (u32)(s32)inst.SIMM_16, Add, &XEmitter::ADD, true, true); break; //addic_rc + break; + case 25: // oris + regimmop(a, s, true, inst.UIMM << 16, Or, &XEmitter::OR, false); + break; + case 28: // andi + regimmop(a, s, true, inst.UIMM, And, &XEmitter::AND, true); + break; + case 29: // andis + regimmop(a, s, true, inst.UIMM << 16, And, &XEmitter::AND, true); + break; + case 26: // xori + regimmop(a, s, true, inst.UIMM, Xor, &XEmitter::XOR, false); + break; + case 27: // xoris + regimmop(a, s, true, inst.UIMM << 16, Xor, &XEmitter::XOR, false); + break; + case 12: // addic + regimmop(d, a, false, (u32)(s32)inst.SIMM_16, Add, &XEmitter::ADD, false, true); + break; + case 13: // addic_rc + regimmop(d, a, true, (u32)(s32)inst.SIMM_16, Add, &XEmitter::ADD, true, true); + break; default: FALLBACK_IF(true); } @@ -274,20 +308,23 @@ void Jit64::cmpXX(UGeckoInstruction inst) int test_crf = js.next_inst.BI >> 2; // Check if the next instruction is a branch - if it is, merge the two. if (((js.next_inst.OPCD == 16 /* bcx */) || - ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528) /* bcctrx */) || - ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16) /* bclrx */)) && - (js.next_inst.BO & BO_DONT_DECREMENT_FLAG) && - !(js.next_inst.BO & BO_DONT_CHECK_CONDITION)) { + ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528) /* bcctrx */) || + ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16) /* bclrx */)) && + (js.next_inst.BO & BO_DONT_DECREMENT_FLAG) && + !(js.next_inst.BO & BO_DONT_CHECK_CONDITION)) + { // Looks like a decent conditional branch that we can merge with. // It only test CR, not CTR. - if (test_crf == crf) { + if (test_crf == crf) + { merge_branch = true; } } OpArg comparand; bool signedCompare; - if (inst.OPCD == 31) { + if (inst.OPCD == 31) + { // cmp / cmpl gpr.Lock(a, b); comparand = gpr.R(b); @@ -402,6 +439,7 @@ void Jit64::cmpXX(UGeckoInstruction inst) MOV(64, R(RAX), Imm32((s32)gpr.R(a).offset)); else MOVSX(64, 32, RAX, gpr.R(a)); + if (!comparand.IsImm()) { MOVSX(64, 32, ABI_PARAM1, comparand); @@ -419,6 +457,7 @@ void Jit64::cmpXX(UGeckoInstruction inst) MOV(32, R(ABI_PARAM1), comparand); else MOVZX(64, 32, ABI_PARAM1, comparand); + comparand = R(ABI_PARAM1); } SUB(64, R(RAX), comparand); @@ -466,6 +505,7 @@ void Jit64::cmpXX(UGeckoInstruction inst) { if (js.next_inst.LK) MOV(32, M(&LR), Imm32(js.compilerPC + 4)); + MOV(32, R(EAX), M(&CTR)); AND(32, R(EAX), Imm32(0xFFFFFFFC)); WriteExitDestInEAX(); @@ -474,8 +514,10 @@ void Jit64::cmpXX(UGeckoInstruction inst) { MOV(32, R(EAX), M(&LR)); AND(32, R(EAX), Imm32(0xFFFFFFFC)); + if (js.next_inst.LK) MOV(32, M(&LR), Imm32(js.compilerPC + 4)); + WriteExitDestInEAX(); } else @@ -506,22 +548,23 @@ void Jit64::boolX(UGeckoInstruction inst) if (gpr.R(s).IsImm() && gpr.R(b).IsImm()) { - if (inst.SUBOP10 == 28) /* andx */ + if (inst.SUBOP10 == 28) // andx gpr.SetImmediate32(a, (u32)gpr.R(s).offset & (u32)gpr.R(b).offset); - else if (inst.SUBOP10 == 476) /* nandx */ + else if (inst.SUBOP10 == 476) // nandx gpr.SetImmediate32(a, ~((u32)gpr.R(s).offset & (u32)gpr.R(b).offset)); - else if (inst.SUBOP10 == 60) /* andcx */ + else if (inst.SUBOP10 == 60) // andcx gpr.SetImmediate32(a, (u32)gpr.R(s).offset & (~(u32)gpr.R(b).offset)); - else if (inst.SUBOP10 == 444) /* orx */ + else if (inst.SUBOP10 == 444) // orx gpr.SetImmediate32(a, (u32)gpr.R(s).offset | (u32)gpr.R(b).offset); - else if (inst.SUBOP10 == 124) /* norx */ + else if (inst.SUBOP10 == 124) // norx gpr.SetImmediate32(a, ~((u32)gpr.R(s).offset | (u32)gpr.R(b).offset)); - else if (inst.SUBOP10 == 412) /* orcx */ + else if (inst.SUBOP10 == 412) // orcx gpr.SetImmediate32(a, (u32)gpr.R(s).offset | (~(u32)gpr.R(b).offset)); - else if (inst.SUBOP10 == 316) /* xorx */ + else if (inst.SUBOP10 == 316) // xorx gpr.SetImmediate32(a, (u32)gpr.R(s).offset ^ (u32)gpr.R(b).offset); - else if (inst.SUBOP10 == 284) /* eqvx */ + else if (inst.SUBOP10 == 284) // eqvx gpr.SetImmediate32(a, ~((u32)gpr.R(s).offset ^ (u32)gpr.R(b).offset)); + if (inst.Rc) { ComputeRC(gpr.R(a)); @@ -575,16 +618,16 @@ void Jit64::boolX(UGeckoInstruction inst) OpArg operand = ((a == s) ? gpr.R(b) : gpr.R(s)); gpr.BindToRegister(a, true, true); - if (inst.SUBOP10 == 28) /* andx */ + if (inst.SUBOP10 == 28) // andx { AND(32, gpr.R(a), operand); } - else if (inst.SUBOP10 == 476) /* nandx */ + else if (inst.SUBOP10 == 476) // nandx { AND(32, gpr.R(a), operand); NOT(32, gpr.R(a)); } - else if (inst.SUBOP10 == 60) /* andcx */ + else if (inst.SUBOP10 == 60) // andcx { if (a == b) { @@ -598,16 +641,16 @@ void Jit64::boolX(UGeckoInstruction inst) AND(32, gpr.R(a), R(EAX)); } } - else if (inst.SUBOP10 == 444) /* orx */ + else if (inst.SUBOP10 == 444) // orx { OR(32, gpr.R(a), operand); } - else if (inst.SUBOP10 == 124) /* norx */ + else if (inst.SUBOP10 == 124) // norx { OR(32, gpr.R(a), operand); NOT(32, gpr.R(a)); } - else if (inst.SUBOP10 == 412) /* orcx */ + else if (inst.SUBOP10 == 412) // orcx { if (a == b) { @@ -621,11 +664,11 @@ void Jit64::boolX(UGeckoInstruction inst) OR(32, gpr.R(a), R(EAX)); } } - else if (inst.SUBOP10 == 316) /* xorx */ + else if (inst.SUBOP10 == 316) // xorx { XOR(32, gpr.R(a), operand); } - else if (inst.SUBOP10 == 284) /* eqvx */ + else if (inst.SUBOP10 == 284) // eqvx { NOT(32, gpr.R(a)); XOR(32, gpr.R(a), operand); @@ -643,46 +686,46 @@ void Jit64::boolX(UGeckoInstruction inst) gpr.Lock(a,s,b); gpr.BindToRegister(a, false, true); - if (inst.SUBOP10 == 28) /* andx */ + if (inst.SUBOP10 == 28) // andx { MOV(32, gpr.R(a), gpr.R(s)); AND(32, gpr.R(a), gpr.R(b)); } - else if (inst.SUBOP10 == 476) /* nandx */ + else if (inst.SUBOP10 == 476) // nandx { MOV(32, gpr.R(a), gpr.R(s)); AND(32, gpr.R(a), gpr.R(b)); NOT(32, gpr.R(a)); } - else if (inst.SUBOP10 == 60) /* andcx */ + else if (inst.SUBOP10 == 60) // andcx { MOV(32, gpr.R(a), gpr.R(b)); NOT(32, gpr.R(a)); AND(32, gpr.R(a), gpr.R(s)); } - else if (inst.SUBOP10 == 444) /* orx */ + else if (inst.SUBOP10 == 444) // orx { MOV(32, gpr.R(a), gpr.R(s)); OR(32, gpr.R(a), gpr.R(b)); } - else if (inst.SUBOP10 == 124) /* norx */ + else if (inst.SUBOP10 == 124) // norx { MOV(32, gpr.R(a), gpr.R(s)); OR(32, gpr.R(a), gpr.R(b)); NOT(32, gpr.R(a)); } - else if (inst.SUBOP10 == 412) /* orcx */ + else if (inst.SUBOP10 == 412) // orcx { MOV(32, gpr.R(a), gpr.R(b)); NOT(32, gpr.R(a)); OR(32, gpr.R(a), gpr.R(s)); } - else if (inst.SUBOP10 == 316) /* xorx */ + else if (inst.SUBOP10 == 316) // xorx { MOV(32, gpr.R(a), gpr.R(s)); XOR(32, gpr.R(a), gpr.R(b)); } - else if (inst.SUBOP10 == 284) /* eqvx */ + else if (inst.SUBOP10 == 284) // eqvx { MOV(32, gpr.R(a), gpr.R(s)); NOT(32, gpr.R(a)); @@ -992,13 +1035,25 @@ void Jit64::mulli(UGeckoInstruction inst) else if ((imm & (imm - 1)) == 0) { u32 shift = 0; - if (imm & 0xFFFF0000) shift |= 16; - if (imm & 0xFF00FF00) shift |= 8; - if (imm & 0xF0F0F0F0) shift |= 4; - if (imm & 0xCCCCCCCC) shift |= 2; - if (imm & 0xAAAAAAAA) shift |= 1; + + if (imm & 0xFFFF0000) + shift |= 16; + + if (imm & 0xFF00FF00) + shift |= 8; + + if (imm & 0xF0F0F0F0) + shift |= 4; + + if (imm & 0xCCCCCCCC) + shift |= 2; + + if (imm & 0xAAAAAAAA) + shift |= 1; + if (d != a) MOV(32, gpr.R(d), gpr.R(a)); + if (shift) SHL(32, gpr.R(d), Imm8(shift)); } @@ -1047,13 +1102,25 @@ void Jit64::mullwx(UGeckoInstruction inst) else if ((imm & (imm - 1)) == 0 && !inst.OE) { u32 shift = 0; - if (imm & 0xFFFF0000) shift |= 16; - if (imm & 0xFF00FF00) shift |= 8; - if (imm & 0xF0F0F0F0) shift |= 4; - if (imm & 0xCCCCCCCC) shift |= 2; - if (imm & 0xAAAAAAAA) shift |= 1; + + if (imm & 0xFFFF0000) + shift |= 16; + + if (imm & 0xFF00FF00) + shift |= 8; + + if (imm & 0xF0F0F0F0) + shift |= 4; + + if (imm & 0xCCCCCCCC) + shift |= 2; + + if (imm & 0xAAAAAAAA) + shift |= 1; + if (d != src) MOV(32, gpr.R(d), gpr.R(src)); + if (shift) SHL(32, gpr.R(d), Imm8(shift)); } @@ -1517,7 +1584,7 @@ void Jit64::rlwinmx(UGeckoInstruction inst) int s = inst.RS; if (gpr.R(s).IsImm()) { - unsigned result = (int)gpr.R(s).offset; + u32 result = (int)gpr.R(s).offset; if (inst.SH != 0) result = _rotl(result, inst.SH); result &= Helper_Mask(inst.MB, inst.ME); @@ -1554,6 +1621,7 @@ void Jit64::rlwinmx(UGeckoInstruction inst) { ROL(32, gpr.R(a), Imm8(inst.SH)); } + if (!(inst.MB==0 && inst.ME==31)) { AND(32, gpr.R(a), Imm32(Helper_Mask(inst.MB, inst.ME))); @@ -1604,10 +1672,12 @@ void Jit64::rlwimix(UGeckoInstruction inst) { MOV(32, gpr.R(a), gpr.R(s)); } + if (inst.SH) { ROL(32, gpr.R(a), Imm8(inst.SH)); } + if (inst.Rc) { ComputeRC(gpr.R(a)); @@ -1637,6 +1707,7 @@ void Jit64::rlwimix(UGeckoInstruction inst) AND(32, R(EAX), Imm32(mask)); XOR(32, gpr.R(a), R(EAX)); } + if (inst.Rc) ComputeRC(gpr.R(a)); } @@ -1700,6 +1771,7 @@ void Jit64::negx(UGeckoInstruction inst) { ComputeRC(gpr.R(d)); } + if (inst.OE) { GenerateConstantOverflow(gpr.R(d).offset == 0x80000000); @@ -1821,7 +1893,9 @@ void Jit64::srawx(UGeckoInstruction inst) SetJumpTarget(nocarry); gpr.UnlockAll(); gpr.UnlockAllX(); - if (inst.Rc) { + + if (inst.Rc) + { ComputeRC(gpr.R(a)); } } @@ -1888,8 +1962,10 @@ void Jit64::cntlzwx(UGeckoInstruction inst) u32 mask = 0x80000000; u32 i = 0; for (; i < 32; i++, mask >>= 1) + { if ((u32)gpr.R(s).offset & mask) break; + } gpr.SetImmediate32(a, i); } else diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index 484c072166..c671010c9d 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -30,26 +30,26 @@ void Jit64::lXXx(UGeckoInstruction inst) bool signExtend = false; switch (inst.OPCD) { - case 32: /* lwz */ - case 33: /* lwzu */ + case 32: // lwz + case 33: // lwzu accessSize = 32; signExtend = false; break; - case 34: /* lbz */ - case 35: /* lbzu */ + case 34: // lbz + case 35: // lbzu accessSize = 8; signExtend = false; break; - case 40: /* lhz */ - case 41: /* lhzu */ + case 40: // lhz + case 41: // lhzu accessSize = 16; signExtend = false; break; - case 42: /* lha */ - case 43: /* lhau */ + case 42: // lha + case 43: // lhau accessSize = 16; signExtend = true; break; @@ -57,25 +57,25 @@ void Jit64::lXXx(UGeckoInstruction inst) case 31: switch (inst.SUBOP10) { - case 23: /* lwzx */ - case 55: /* lwzux */ + case 23: // lwzx + case 55: // lwzux accessSize = 32; signExtend = false; break; - case 87: /* lbzx */ - case 119: /* lbzux */ + case 87: // lbzx + case 119: // lbzux accessSize = 8; signExtend = false; break; - case 279: /* lhzx */ - case 311: /* lhzux */ + case 279: // lhzx + case 311: // lhzux accessSize = 16; signExtend = false; break; - case 343: /* lhax */ - case 375: /* lhaux */ + case 343: // lhax + case 375: // lhaux accessSize = 16; signExtend = true; break; @@ -96,11 +96,11 @@ void Jit64::lXXx(UGeckoInstruction inst) // ... maybe the throttle one already do that :p // if (CommandProcessor::AllowIdleSkipping() && PixelEngine::AllowIdleSkipping()) if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle && - inst.OPCD == 32 && - (inst.hex & 0xFFFF0000) == 0x800D0000 && - (Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 || - (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) && - Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8) + inst.OPCD == 32 && + (inst.hex & 0xFFFF0000) == 0x800D0000 && + (Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 || + (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) && + Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8) { // TODO(LinesPrower): // - Rewrite this! @@ -259,10 +259,18 @@ void Jit64::stX(UGeckoInstruction inst) int accessSize; switch (inst.OPCD & ~1) { - case 36: accessSize = 32; break; //stw - case 44: accessSize = 16; break; //sth - case 38: accessSize = 8; break; //stb - default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return; + case 36: // stw + accessSize = 32; + break; + case 44: // sth + accessSize = 16; + break; + case 38: // stb + accessSize = 8; + break; + default: + _assert_msg_(DYNA_REC, 0, "stX: Invalid access size."); + return; } if ((a == 0) || gpr.R(a).IsImm()) @@ -273,18 +281,27 @@ void Jit64::stX(UGeckoInstruction inst) addr += offset; if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe) { - MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write + // Helps external systems know which instruction triggered the write + MOV(32, M(&PC), Imm32(jit->js.compilerPC)); + gpr.FlushLockX(ABI_PARAM1); MOV(32, R(ABI_PARAM1), gpr.R(s)); if (update) gpr.SetImmediate32(a, addr); + + // No need to protect these, they don't touch any state + // question - should we inline them instead? Pro: Lose a CALL Con: Code bloat switch (accessSize) { - // No need to protect these, they don't touch any state - // question - should we inline them instead? Pro: Lose a CALL Con: Code bloat - case 8: CALL((void *)asm_routines.fifoDirectWrite8); break; - case 16: CALL((void *)asm_routines.fifoDirectWrite16); break; - case 32: CALL((void *)asm_routines.fifoDirectWrite32); break; + case 8: + CALL((void *)asm_routines.fifoDirectWrite8); + break; + case 16: + CALL((void *)asm_routines.fifoDirectWrite16); + break; + case 32: + CALL((void *)asm_routines.fifoDirectWrite32); + break; } js.fifoBytesThisBlock += accessSize >> 3; gpr.UnlockAllX(); @@ -300,14 +317,22 @@ void Jit64::stX(UGeckoInstruction inst) } else { - MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write + // Helps external systems know which instruction triggered the write + MOV(32, M(&PC), Imm32(jit->js.compilerPC)); + u32 registersInUse = RegistersInUse(); ABI_PushRegistersAndAdjustStack(registersInUse, false); switch (accessSize) { - case 32: ABI_CallFunctionAC(true ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), gpr.R(s), addr); break; - case 16: ABI_CallFunctionAC(true ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), gpr.R(s), addr); break; - case 8: ABI_CallFunctionAC((void *)&Memory::Write_U8, gpr.R(s), addr); break; + case 32: + ABI_CallFunctionAC(true ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), gpr.R(s), addr); + break; + case 16: + ABI_CallFunctionAC(true ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), gpr.R(s), addr); + break; + case 8: + ABI_CallFunctionAC((void *)&Memory::Write_U8, gpr.R(s), addr); + break; } ABI_PopRegistersAndAdjustStack(registersInUse, false); if (update) @@ -359,17 +384,29 @@ void Jit64::stXx(UGeckoInstruction inst) ADD(32, gpr.R(a), gpr.R(b)); MOV(32, R(EDX), gpr.R(a)); MEMCHECK_END - } else { + } + else + { MOV(32, R(EDX), gpr.R(a)); ADD(32, R(EDX), gpr.R(b)); } + int accessSize; - switch (inst.SUBOP10 & ~32) { - case 151: accessSize = 32; break; - case 407: accessSize = 16; break; - case 215: accessSize = 8; break; - default: PanicAlert("stXx: invalid access size"); - accessSize = 0; break; + switch (inst.SUBOP10 & ~32) + { + case 151: + accessSize = 32; + break; + case 407: + accessSize = 16; + break; + case 215: + accessSize = 8; + break; + default: + PanicAlert("stXx: invalid access size"); + accessSize = 0; + break; } MOV(32, R(ECX), gpr.R(s)); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp index fbff119cb7..1129d5e833 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -47,12 +47,15 @@ void Jit64::psq_st(UGeckoInstruction inst) MOVZX(32, 8, EDX, R(AL)); // FIXME: Fix ModR/M encoding to allow [EDX*4+disp32] without a base register! - if (inst.W) { + if (inst.W) + { // One value PXOR(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions. CVTSD2SS(XMM0, fpr.R(s)); CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)asm_routines.singleStoreQuantized)); - } else { + } + else + { // Pair of values CVTPD2PS(XMM0, fpr.R(s)); CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized)); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp index 18bb56ebb9..6934f56d42 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp @@ -156,12 +156,21 @@ void Jit64::ps_arith(UGeckoInstruction inst) switch (inst.SUBOP5) { - case 18: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::DIVPD); break; //div - case 20: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::SUBPD); break; //sub - case 21: tri_op(inst.FD, inst.FA, inst.FB, true, &XEmitter::ADDPD); break; //add - case 25: tri_op(inst.FD, inst.FA, inst.FC, true, &XEmitter::MULPD); break; //mul + case 18: // div + tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::DIVPD); + break; + case 20: // sub + tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::SUBPD); + break; + case 21: // add + tri_op(inst.FD, inst.FA, inst.FB, true, &XEmitter::ADDPD); + break; + case 25: // mul + tri_op(inst.FD, inst.FA, inst.FC, true, &XEmitter::MULPD); + break; default: _assert_msg_(DYNA_REC, 0, "ps_arith WTF!!!"); + break; } } diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp index 85b15ffc1c..a3b4a91881 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp @@ -36,7 +36,8 @@ using namespace Gen; static const unsigned int MAX_NUMBER_OF_REGS = 16; -struct RegInfo { +struct RegInfo +{ JitIL *Jit; IRBuilder* Build; InstLoc FirstI; @@ -48,8 +49,10 @@ struct RegInfo { unsigned numFSpills; unsigned exitNumber; - RegInfo(JitIL* j, InstLoc f, unsigned insts) : Jit(j), FirstI(f), IInfo(insts), lastUsed(insts) { - for (unsigned i = 0; i < MAX_NUMBER_OF_REGS; i++) { + RegInfo(JitIL* j, InstLoc f, unsigned insts) : Jit(j), FirstI(f), IInfo(insts), lastUsed(insts) + { + for (unsigned i = 0; i < MAX_NUMBER_OF_REGS; i++) + { regs[i] = nullptr; fregs[i] = nullptr; } @@ -62,7 +65,8 @@ struct RegInfo { RegInfo(RegInfo&); // DO NOT IMPLEMENT }; -static u32 regsInUse(RegInfo& R) { +static u32 regsInUse(RegInfo& R) +{ u32 result = 0; for (unsigned i = 0; i < MAX_NUMBER_OF_REGS; i++) { @@ -74,65 +78,88 @@ static u32 regsInUse(RegInfo& R) { return result; } -static void regMarkUse(RegInfo& R, InstLoc I, InstLoc Op, unsigned OpNum) { +static void regMarkUse(RegInfo& R, InstLoc I, InstLoc Op, unsigned OpNum) +{ unsigned& info = R.IInfo[Op - R.FirstI]; - if (info == 0) R.IInfo[I - R.FirstI] |= 1 << (OpNum + 1); - if (info < 2) info++; + + if (info == 0) + R.IInfo[I - R.FirstI] |= 1 << (OpNum + 1); + + if (info < 2) + info++; + R.lastUsed[Op - R.FirstI] = std::max(R.lastUsed[Op - R.FirstI], I); } -static unsigned regReadUse(RegInfo& R, InstLoc I) { +static unsigned regReadUse(RegInfo& R, InstLoc I) +{ return R.IInfo[I - R.FirstI] & 3; } static u64 SlotSet[1000]; static u8 GC_ALIGNED16(FSlotSet[16*1000]); -static OpArg regLocForSlot(RegInfo& RI, unsigned slot) { +static OpArg regLocForSlot(RegInfo& RI, unsigned slot) +{ return M(&SlotSet[slot - 1]); } -static unsigned regCreateSpill(RegInfo& RI, InstLoc I) { +static unsigned regCreateSpill(RegInfo& RI, InstLoc I) +{ unsigned newSpill = ++RI.numSpills; RI.IInfo[I - RI.FirstI] |= newSpill << 16; return newSpill; } -static unsigned regGetSpill(RegInfo& RI, InstLoc I) { +static unsigned regGetSpill(RegInfo& RI, InstLoc I) +{ return RI.IInfo[I - RI.FirstI] >> 16; } -static void regSpill(RegInfo& RI, X64Reg reg) { - if (!RI.regs[reg]) return; +static void regSpill(RegInfo& RI, X64Reg reg) +{ + if (!RI.regs[reg]) + return; + unsigned slot = regGetSpill(RI, RI.regs[reg]); - if (!slot) { + if (!slot) + { slot = regCreateSpill(RI, RI.regs[reg]); RI.Jit->MOV(64, regLocForSlot(RI, slot), R(reg)); } + RI.regs[reg] = nullptr; } -static OpArg fregLocForSlot(RegInfo& RI, unsigned slot) { +static OpArg fregLocForSlot(RegInfo& RI, unsigned slot) +{ return M(&FSlotSet[slot*16]); } -static unsigned fregCreateSpill(RegInfo& RI, InstLoc I) { +static unsigned fregCreateSpill(RegInfo& RI, InstLoc I) +{ unsigned newSpill = ++RI.numFSpills; RI.IInfo[I - RI.FirstI] |= newSpill << 16; return newSpill; } -static unsigned fregGetSpill(RegInfo& RI, InstLoc I) { +static unsigned fregGetSpill(RegInfo& RI, InstLoc I) +{ return RI.IInfo[I - RI.FirstI] >> 16; } -static void fregSpill(RegInfo& RI, X64Reg reg) { - if (!RI.fregs[reg]) return; +static void fregSpill(RegInfo& RI, X64Reg reg) +{ + if (!RI.fregs[reg]) + return; + unsigned slot = fregGetSpill(RI, RI.fregs[reg]); - if (!slot) { + if (!slot) + { slot = fregCreateSpill(RI, RI.fregs[reg]); RI.Jit->MOVAPD(fregLocForSlot(RI, slot), reg); } + RI.fregs[reg] = nullptr; } @@ -148,17 +175,23 @@ static const int RegAllocSize = sizeof(RegAllocOrder) / sizeof(X64Reg); static const X64Reg FRegAllocOrder[] = {XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, XMM2, XMM3, XMM4, XMM5}; static const int FRegAllocSize = sizeof(FRegAllocOrder) / sizeof(X64Reg); -static X64Reg regFindFreeReg(RegInfo& RI) { +static X64Reg regFindFreeReg(RegInfo& RI) +{ for (auto& reg : RegAllocOrder) + { if (RI.regs[reg] == nullptr) return reg; + } int bestIndex = -1; InstLoc bestEnd = nullptr; - for (int i = 0; i < RegAllocSize; ++i) { + for (int i = 0; i < RegAllocSize; ++i) + { const InstLoc start = RI.regs[RegAllocOrder[i]]; const InstLoc end = RI.lastUsed[start - RI.FirstI]; - if (bestEnd < end) { + + if (bestEnd < end) + { bestEnd = end; bestIndex = i; } @@ -169,17 +202,23 @@ static X64Reg regFindFreeReg(RegInfo& RI) { return reg; } -static X64Reg fregFindFreeReg(RegInfo& RI) { +static X64Reg fregFindFreeReg(RegInfo& RI) +{ for (auto& reg : FRegAllocOrder) + { if (RI.fregs[reg] == nullptr) return reg; + } int bestIndex = -1; InstLoc bestEnd = nullptr; - for (int i = 0; i < FRegAllocSize; ++i) { + for (int i = 0; i < FRegAllocSize; ++i) + { const InstLoc start = RI.fregs[FRegAllocOrder[i]]; const InstLoc end = RI.lastUsed[start - RI.FirstI]; - if (bestEnd < end) { + + if (bestEnd < end) + { bestEnd = end; bestIndex = i; } @@ -190,10 +229,13 @@ static X64Reg fregFindFreeReg(RegInfo& RI) { return reg; } -static OpArg regLocForInst(RegInfo& RI, InstLoc I) { +static OpArg regLocForInst(RegInfo& RI, InstLoc I) +{ for (auto& reg : RegAllocOrder) + { if (RI.regs[reg] == I) return R(reg); + } unsigned slot = regGetSpill(RI, I); if (!slot) @@ -201,10 +243,13 @@ static OpArg regLocForInst(RegInfo& RI, InstLoc I) { return regLocForSlot(RI, slot); } -static OpArg fregLocForInst(RegInfo& RI, InstLoc I) { +static OpArg fregLocForInst(RegInfo& RI, InstLoc I) +{ for (auto& reg : FRegAllocOrder) + { if (RI.fregs[reg] == I) return R(reg); + } unsigned slot = fregGetSpill(RI, I); if (!slot) @@ -212,39 +257,54 @@ static OpArg fregLocForInst(RegInfo& RI, InstLoc I) { return fregLocForSlot(RI, slot); } -static void regClearInst(RegInfo& RI, InstLoc I) { +static void regClearInst(RegInfo& RI, InstLoc I) +{ for (auto& reg : RegAllocOrder) + { if (RI.regs[reg] == I) RI.regs[reg] = nullptr; + } } -static void fregClearInst(RegInfo& RI, InstLoc I) { +static void fregClearInst(RegInfo& RI, InstLoc I) +{ for (auto& reg : FRegAllocOrder) + { if (RI.fregs[reg] == I) RI.fregs[reg] = nullptr; + } } -static X64Reg regEnsureInReg(RegInfo& RI, InstLoc I) { +static X64Reg regEnsureInReg(RegInfo& RI, InstLoc I) +{ OpArg loc = regLocForInst(RI, I); - if (!loc.IsSimpleReg()) { + + if (!loc.IsSimpleReg()) + { X64Reg newReg = regFindFreeReg(RI); RI.Jit->MOV(32, R(newReg), loc); loc = R(newReg); } + return loc.GetSimpleReg(); } -static X64Reg fregEnsureInReg(RegInfo& RI, InstLoc I) { +static X64Reg fregEnsureInReg(RegInfo& RI, InstLoc I) +{ OpArg loc = fregLocForInst(RI, I); - if (!loc.IsSimpleReg()) { + + if (!loc.IsSimpleReg()) + { X64Reg newReg = fregFindFreeReg(RI); RI.Jit->MOVAPD(newReg, loc); loc = R(newReg); } + return loc.GetSimpleReg(); } -static void regSpillCallerSaved(RegInfo& RI) { +static void regSpillCallerSaved(RegInfo& RI) +{ regSpill(RI, RCX); regSpill(RI, RDX); regSpill(RI, RSI); @@ -255,50 +315,70 @@ static void regSpillCallerSaved(RegInfo& RI) { regSpill(RI, R11); } -static X64Reg regUReg(RegInfo& RI, InstLoc I) { +static X64Reg regUReg(RegInfo& RI, InstLoc I) +{ const OpArg loc = regLocForInst(RI, getOp1(I)); - if ((RI.IInfo[I - RI.FirstI] & 4) && loc.IsSimpleReg()) { + + if ((RI.IInfo[I - RI.FirstI] & 4) && loc.IsSimpleReg()) + { return loc.GetSimpleReg(); } + return regFindFreeReg(RI); } // Recycle the register if the lifetime of op1 register ends at I. -static X64Reg fregURegWithoutMov(RegInfo& RI, InstLoc I) { +static X64Reg fregURegWithoutMov(RegInfo& RI, InstLoc I) +{ const OpArg loc = fregLocForInst(RI, getOp1(I)); - if ((RI.IInfo[I - RI.FirstI] & 4) && loc.IsSimpleReg()) { + + if ((RI.IInfo[I - RI.FirstI] & 4) && loc.IsSimpleReg()) + { return loc.GetSimpleReg(); } + return fregFindFreeReg(RI); } -static X64Reg fregURegWithMov(RegInfo& RI, InstLoc I) { +static X64Reg fregURegWithMov(RegInfo& RI, InstLoc I) +{ const OpArg loc = fregLocForInst(RI, getOp1(I)); - if ((RI.IInfo[I - RI.FirstI] & 4) && loc.IsSimpleReg()) { + + if ((RI.IInfo[I - RI.FirstI] & 4) && loc.IsSimpleReg()) + { return loc.GetSimpleReg(); } + X64Reg reg = fregFindFreeReg(RI); RI.Jit->MOVAPD(reg, loc); return reg; } // Recycle the register if the lifetime of op1 register ends at I. -static X64Reg fregBinLHSRegWithMov(RegInfo& RI, InstLoc I) { +static X64Reg fregBinLHSRegWithMov(RegInfo& RI, InstLoc I) +{ const OpArg loc = fregLocForInst(RI, getOp1(I)); - if ((RI.IInfo[I - RI.FirstI] & 4) && loc.IsSimpleReg()) { + + if ((RI.IInfo[I - RI.FirstI] & 4) && loc.IsSimpleReg()) + { return loc.GetSimpleReg(); } + X64Reg reg = fregFindFreeReg(RI); RI.Jit->MOVAPD(reg, loc); return reg; } // Recycle the register if the lifetime of op2 register ends at I. -static X64Reg fregBinRHSRegWithMov(RegInfo& RI, InstLoc I) { +static X64Reg fregBinRHSRegWithMov(RegInfo& RI, InstLoc I) +{ const OpArg loc = fregLocForInst(RI, getOp2(I)); - if ((RI.IInfo[I - RI.FirstI] & 8) && loc.IsSimpleReg()) { + + if ((RI.IInfo[I - RI.FirstI] & 8) && loc.IsSimpleReg()) + { return loc.GetSimpleReg(); } + X64Reg reg = fregFindFreeReg(RI); RI.Jit->MOVAPD(reg, loc); return reg; @@ -306,36 +386,44 @@ static X64Reg fregBinRHSRegWithMov(RegInfo& RI, InstLoc I) { // If the lifetime of the register used by an operand ends at I, // return the register. Otherwise return a free register. -static X64Reg regBinReg(RegInfo& RI, InstLoc I) { +static X64Reg regBinReg(RegInfo& RI, InstLoc I) +{ // FIXME: When regLocForInst() is extracted as a local variable, // "Retrieving unknown spill slot?!" is shown. - if ((RI.IInfo[I - RI.FirstI] & 4) && - regLocForInst(RI, getOp1(I)).IsSimpleReg()) { + if ((RI.IInfo[I - RI.FirstI] & 4) && regLocForInst(RI, getOp1(I)).IsSimpleReg()) + { return regLocForInst(RI, getOp1(I)).GetSimpleReg(); - } else if ((RI.IInfo[I - RI.FirstI] & 8) && - regLocForInst(RI, getOp2(I)).IsSimpleReg()) { + } + else if ((RI.IInfo[I - RI.FirstI] & 8) && regLocForInst(RI, getOp2(I)).IsSimpleReg()) + { return regLocForInst(RI, getOp2(I)).GetSimpleReg(); } + return regFindFreeReg(RI); } -static X64Reg regBinLHSReg(RegInfo& RI, InstLoc I) { - if (RI.IInfo[I - RI.FirstI] & 4) { +static X64Reg regBinLHSReg(RegInfo& RI, InstLoc I) +{ + if (RI.IInfo[I - RI.FirstI] & 4) + { return regEnsureInReg(RI, getOp1(I)); } + X64Reg reg = regFindFreeReg(RI); RI.Jit->MOV(32, R(reg), regLocForInst(RI, getOp1(I))); return reg; } -static void regNormalRegClear(RegInfo& RI, InstLoc I) { +static void regNormalRegClear(RegInfo& RI, InstLoc I) +{ if (RI.IInfo[I - RI.FirstI] & 4) regClearInst(RI, getOp1(I)); if (RI.IInfo[I - RI.FirstI] & 8) regClearInst(RI, getOp2(I)); } -static void fregNormalRegClear(RegInfo& RI, InstLoc I) { +static void fregNormalRegClear(RegInfo& RI, InstLoc I) +{ if (RI.IInfo[I - RI.FirstI] & 4) fregClearInst(RI, getOp1(I)); if (RI.IInfo[I - RI.FirstI] & 8) @@ -343,45 +431,65 @@ static void fregNormalRegClear(RegInfo& RI, InstLoc I) { } static void regEmitBinInst(RegInfo& RI, InstLoc I, - void (JitIL::*op)(int, const OpArg&, - const OpArg&), - bool commutable = false) { + void (JitIL::*op)(int, const OpArg&, const OpArg&), + bool commutable = false) +{ X64Reg reg; bool commuted = false; - if (RI.IInfo[I - RI.FirstI] & 4) { + if (RI.IInfo[I - RI.FirstI] & 4) + { reg = regEnsureInReg(RI, getOp1(I)); - } else if (commutable && (RI.IInfo[I - RI.FirstI] & 8)) { + } + else if (commutable && (RI.IInfo[I - RI.FirstI] & 8)) + { reg = regEnsureInReg(RI, getOp2(I)); commuted = true; - } else { + } + else + { reg = regFindFreeReg(RI); RI.Jit->MOV(32, R(reg), regLocForInst(RI, getOp1(I))); } - if (isImm(*getOp2(I))) { + + if (isImm(*getOp2(I))) + { unsigned RHS = RI.Build->GetImmValue(getOp2(I)); - if (RHS + 128 < 256) { + if (RHS + 128 < 256) + { (RI.Jit->*op)(32, R(reg), Imm8(RHS)); - } else { + } + else + { (RI.Jit->*op)(32, R(reg), Imm32(RHS)); } - } else if (commuted) { + } + else if (commuted) + { (RI.Jit->*op)(32, R(reg), regLocForInst(RI, getOp1(I))); - } else { + } + else + { (RI.Jit->*op)(32, R(reg), regLocForInst(RI, getOp2(I))); } + RI.regs[reg] = I; regNormalRegClear(RI, I); } -static void fregEmitBinInst(RegInfo& RI, InstLoc I, - void (JitIL::*op)(X64Reg, OpArg)) { +static void fregEmitBinInst(RegInfo& RI, InstLoc I, void (JitIL::*op)(X64Reg, OpArg)) +{ X64Reg reg; - if (RI.IInfo[I - RI.FirstI] & 4) { + + if (RI.IInfo[I - RI.FirstI] & 4) + { reg = fregEnsureInReg(RI, getOp1(I)); - } else { + } + else + { reg = fregFindFreeReg(RI); RI.Jit->MOVAPD(reg, fregLocForInst(RI, getOp1(I))); } + (RI.Jit->*op)(reg, fregLocForInst(RI, getOp2(I))); RI.fregs[reg] = I; fregNormalRegClear(RI, I); @@ -389,16 +497,21 @@ static void fregEmitBinInst(RegInfo& RI, InstLoc I, // Mark and calculation routines for profiled load/store addresses // Could be extended to unprofiled addresses. -static void regMarkMemAddress(RegInfo& RI, InstLoc I, InstLoc AI, unsigned OpNum) { - if (isImm(*AI)) { +static void regMarkMemAddress(RegInfo& RI, InstLoc I, InstLoc AI, unsigned OpNum) +{ + if (isImm(*AI)) + { unsigned addr = RI.Build->GetImmValue(AI); if (Memory::IsRAMAddress(addr)) return; } - if (getOpcode(*AI) == Add && isImm(*getOp2(AI))) { + + if (getOpcode(*AI) == Add && isImm(*getOp2(AI))) + { regMarkUse(RI, I, getOp1(AI), OpNum); return; } + regMarkUse(RI, I, AI, OpNum); } @@ -406,48 +519,65 @@ static void regMarkMemAddress(RegInfo& RI, InstLoc I, InstLoc AI, unsigned OpNum static std::pair regBuildMemAddress(RegInfo& RI, InstLoc I, InstLoc AI, unsigned OpNum, unsigned Size, X64Reg* dest) { - if (isImm(*AI)) { + if (isImm(*AI)) + { unsigned addr = RI.Build->GetImmValue(AI); - if (Memory::IsRAMAddress(addr)) { + if (Memory::IsRAMAddress(addr)) + { if (dest) *dest = regFindFreeReg(RI); + return std::make_pair(Imm32(addr), 0); } } + unsigned offset; InstLoc AddrBase; - if (getOpcode(*AI) == Add && isImm(*getOp2(AI))) { + if (getOpcode(*AI) == Add && isImm(*getOp2(AI))) + { offset = RI.Build->GetImmValue(getOp2(AI)); AddrBase = getOp1(AI); - } else { + } + else + { offset = 0; AddrBase = AI; } + X64Reg baseReg; // Ok, this stuff needs a comment or three :P -ector - if (RI.IInfo[I - RI.FirstI] & (2 << OpNum)) { + if (RI.IInfo[I - RI.FirstI] & (2 << OpNum)) + { baseReg = regEnsureInReg(RI, AddrBase); regClearInst(RI, AddrBase); if (dest) *dest = baseReg; - } else if (dest) { + } + else if (dest) + { X64Reg reg = regFindFreeReg(RI); const OpArg loc = regLocForInst(RI, AddrBase); - if (!loc.IsSimpleReg()) { + if (!loc.IsSimpleReg()) + { RI.Jit->MOV(32, R(reg), loc); baseReg = reg; - } else { + } + else + { baseReg = loc.GetSimpleReg(); } *dest = reg; - } else { + } + else + { baseReg = regEnsureInReg(RI, AddrBase); } return std::make_pair(R(baseReg), offset); } -static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) { +static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) +{ X64Reg reg; auto info = regBuildMemAddress(RI, I, getOp1(I), 1, Size, ®); @@ -456,29 +586,43 @@ static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) { RI.regs[reg] = I; } -static OpArg regImmForConst(RegInfo& RI, InstLoc I, unsigned Size) { +static OpArg regImmForConst(RegInfo& RI, InstLoc I, unsigned Size) +{ unsigned imm = RI.Build->GetImmValue(I); - if (Size == 32) { + + if (Size == 32) + { return Imm32(imm); - } else if (Size == 16) { + } + else if (Size == 16) + { return Imm16(imm); - } else { + } + else + { return Imm8(imm); } } -static void regEmitMemStore(RegInfo& RI, InstLoc I, unsigned Size) { +static void regEmitMemStore(RegInfo& RI, InstLoc I, unsigned Size) +{ auto info = regBuildMemAddress(RI, I, getOp2(I), 2, Size, nullptr); if (info.first.IsImm()) RI.Jit->MOV(32, R(ECX), info.first); else RI.Jit->LEA(32, ECX, MDisp(info.first.GetSimpleReg(), info.second)); + regSpill(RI, EAX); - if (isImm(*getOp1(I))) { + + if (isImm(*getOp1(I))) + { RI.Jit->MOV(Size, R(EAX), regImmForConst(RI, getOp1(I), Size)); - } else { + } + else + { RI.Jit->MOV(32, R(EAX), regLocForInst(RI, getOp1(I))); } + RI.Jit->SafeWriteRegToReg(EAX, ECX, Size, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); if (RI.IInfo[I - RI.FirstI] & 4) regClearInst(RI, getOp1(I)); @@ -487,42 +631,55 @@ static void regEmitMemStore(RegInfo& RI, InstLoc I, unsigned Size) { static void regEmitShiftInst(RegInfo& RI, InstLoc I, void (JitIL::*op)(int, OpArg, OpArg)) { X64Reg reg = regBinLHSReg(RI, I); - if (isImm(*getOp2(I))) { + + if (isImm(*getOp2(I))) + { unsigned RHS = RI.Build->GetImmValue(getOp2(I)); (RI.Jit->*op)(32, R(reg), Imm8(RHS)); RI.regs[reg] = I; return; } + RI.Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I))); (RI.Jit->*op)(32, R(reg), R(ECX)); RI.regs[reg] = I; regNormalRegClear(RI, I); } -static void regStoreInstToConstLoc(RegInfo& RI, unsigned width, InstLoc I, void* loc) { - if (width != 32) { +static void regStoreInstToConstLoc(RegInfo& RI, unsigned width, InstLoc I, void* loc) +{ + if (width != 32) + { PanicAlert("Not implemented!"); return; } - if (isImm(*I)) { + + if (isImm(*I)) + { RI.Jit->MOV(32, M(loc), Imm32(RI.Build->GetImmValue(I))); return; } + X64Reg reg = regEnsureInReg(RI, I); RI.Jit->MOV(32, M(loc), R(reg)); } -static void regEmitCmp(RegInfo& RI, InstLoc I) { - if (isImm(*getOp2(I))) { +static void regEmitCmp(RegInfo& RI, InstLoc I) +{ + if (isImm(*getOp2(I))) + { unsigned RHS = RI.Build->GetImmValue(getOp2(I)); RI.Jit->CMP(32, regLocForInst(RI, getOp1(I)), Imm32(RHS)); - } else { + } + else + { X64Reg reg = regEnsureInReg(RI, getOp1(I)); RI.Jit->CMP(32, R(reg), regLocForInst(RI, getOp2(I))); } } -static void regEmitICmpInst(RegInfo& RI, InstLoc I, CCFlags flag) { +static void regEmitICmpInst(RegInfo& RI, InstLoc I, CCFlags flag) +{ regEmitCmp(RI, I); RI.Jit->SETcc(flag, R(ECX)); // Caution: SETCC uses 8-bit regs! X64Reg reg = regBinReg(RI, I); @@ -531,9 +688,11 @@ static void regEmitICmpInst(RegInfo& RI, InstLoc I, CCFlags flag) { regNormalRegClear(RI, I); } -static void regEmitICmpCRInst(RegInfo& RI, InstLoc I) { +static void regEmitICmpCRInst(RegInfo& RI, InstLoc I) +{ bool signed_compare = getOpcode(*I) == ICmpCRSigned; X64Reg reg; + if (RI.IInfo[I - RI.FirstI] & 4) { reg = regEnsureInReg(RI, getOp1(I)); @@ -548,6 +707,7 @@ static void regEmitICmpCRInst(RegInfo& RI, InstLoc I) { else RI.Jit->MOV(32, R(reg), regLocForInst(RI, getOp1(I))); } + if (isImm(*getOp2(I))) { unsigned RHS = RI.Build->GetImmValue(getOp2(I)); @@ -569,36 +729,47 @@ static void regEmitICmpCRInst(RegInfo& RI, InstLoc I) { RI.Jit->MOV(32, R(EAX), regLocForInst(RI, getOp2(I))); RI.Jit->SUB(64, R(reg), R(RAX)); } + RI.regs[reg] = I; regNormalRegClear(RI, I); } -static void regWriteExit(RegInfo& RI, InstLoc dest) { - if (isImm(*dest)) { +static void regWriteExit(RegInfo& RI, InstLoc dest) +{ + if (isImm(*dest)) + { RI.exitNumber++; RI.Jit->WriteExit(RI.Build->GetImmValue(dest)); - } else { + } + else + { RI.Jit->WriteExitDestInOpArg(regLocForInst(RI, dest)); } } // Helper function to check floating point exceptions static double GC_ALIGNED16(isSNANTemp[2][2]); -static bool checkIsSNAN() { +static bool checkIsSNAN() +{ return MathUtil::IsSNAN(isSNANTemp[0][0]) || MathUtil::IsSNAN(isSNANTemp[1][0]); } -static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { +static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) +{ //printf("Writing block: %x\n", js.blockStart); RegInfo RI(Jit, ibuild->getFirstInst(), ibuild->getNumInsts()); RI.Build = ibuild; + // Pass to compute liveness ibuild->StartBackPass(); - for (unsigned int index = (unsigned int)RI.IInfo.size() - 1; index != -1U; --index) { + for (unsigned int index = (unsigned int)RI.IInfo.size() - 1; index != -1U; --index) + { InstLoc I = ibuild->ReadBackward(); unsigned int op = getOpcode(*I); bool thisUsed = regReadUse(RI, I) ? true : false; - switch (op) { + + switch (op) + { default: PanicAlert("Unexpected inst!"); case Nop: @@ -717,7 +888,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { case FPMerge11: case FDCmpCR: case InsertDoubleInMReg: - if (thisUsed) { + if (thisUsed) + { regMarkUse(RI, I, getOp1(I), 1); if (!isImm(*getOp2(I))) regMarkUse(RI, I, getOp2(I), 2); @@ -743,12 +915,16 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { case IdleBranch: regMarkUse(RI, I, getOp1(getOp1(I)), 1); break; - case BranchCond: { - if (isICmp(*getOp1(I))) { + case BranchCond: + { + if (isICmp(*getOp1(I))) + { regMarkUse(RI, I, getOp1(getOp1(I)), 1); if (!isImm(*getOp2(getOp1(I)))) regMarkUse(RI, I, getOp2(getOp1(I)), 2); - } else { + } + else + { regMarkUse(RI, I, getOp1(I), 1); } if (!isImm(*getOp2(I))) @@ -759,16 +935,20 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { } ibuild->StartForwardPass(); - for (unsigned i = 0; i != RI.IInfo.size(); i++) { + for (unsigned i = 0; i != RI.IInfo.size(); i++) + { InstLoc I = ibuild->ReadForward(); bool thisUsed = regReadUse(RI, I) ? true : false; - if (thisUsed) { + if (thisUsed) + { // Needed for IR Writer ibuild->SetMarkUsed(I); } - switch (getOpcode(*I)) { - case FallBackToInterpreter: { + switch (getOpcode(*I)) + { + case FallBackToInterpreter: + { unsigned InstCode = ibuild->GetImmValue(getOp1(I)); unsigned InstLoc = ibuild->GetImmValue(getOp2(I)); // There really shouldn't be anything live across an @@ -781,53 +961,74 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { InstCode); break; } - case LoadGReg: { - if (!thisUsed) break; + case LoadGReg: + { + if (!thisUsed) + break; + X64Reg reg = regFindFreeReg(RI); unsigned ppcreg = *I >> 8; Jit->MOV(32, R(reg), M(&PowerPC::ppcState.gpr[ppcreg])); RI.regs[reg] = I; break; } - case LoadCR: { - if (!thisUsed) break; + case LoadCR: + { + if (!thisUsed) + break; + X64Reg reg = regFindFreeReg(RI); unsigned ppcreg = *I >> 8; Jit->MOV(64, R(reg), M(&PowerPC::ppcState.cr_val[ppcreg])); RI.regs[reg] = I; break; } - case LoadCTR: { - if (!thisUsed) break; + case LoadCTR: + { + if (!thisUsed) + break; + X64Reg reg = regFindFreeReg(RI); Jit->MOV(32, R(reg), M(&CTR)); RI.regs[reg] = I; break; } - case LoadLink: { - if (!thisUsed) break; + case LoadLink: + { + if (!thisUsed) + break; + X64Reg reg = regFindFreeReg(RI); Jit->MOV(32, R(reg), M(&LR)); RI.regs[reg] = I; break; } - case LoadMSR: { - if (!thisUsed) break; + case LoadMSR: + { + if (!thisUsed) + break; + X64Reg reg = regFindFreeReg(RI); Jit->MOV(32, R(reg), M(&MSR)); RI.regs[reg] = I; break; } - case LoadGQR: { - if (!thisUsed) break; + case LoadGQR: + { + if (!thisUsed) + break; + X64Reg reg = regFindFreeReg(RI); unsigned gqr = *I >> 8; Jit->MOV(32, R(reg), M(&GQR(gqr))); RI.regs[reg] = I; break; } - case LoadCarry: { - if (!thisUsed) break; + case LoadCarry: + { + if (!thisUsed) + break; + X64Reg reg = regFindFreeReg(RI); Jit->MOV(32, R(reg), M(&PowerPC::ppcState.spr[SPR_XER])); Jit->SHR(32, R(reg), Imm8(29)); @@ -835,31 +1036,36 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { RI.regs[reg] = I; break; } - case StoreGReg: { + case StoreGReg: + { unsigned ppcreg = *I >> 16; regStoreInstToConstLoc(RI, 32, getOp1(I), &PowerPC::ppcState.gpr[ppcreg]); regNormalRegClear(RI, I); break; } - case StoreCR: { + case StoreCR: + { X64Reg reg = regEnsureInReg(RI, getOp1(I)); unsigned ppcreg = *I >> 16; Jit->MOV(64, M(&PowerPC::ppcState.cr_val[ppcreg]), R(reg)); regNormalRegClear(RI, I); break; } - case StoreLink: { + case StoreLink: + { regStoreInstToConstLoc(RI, 32, getOp1(I), &LR); regNormalRegClear(RI, I); break; } - case StoreCTR: { + case StoreCTR: + { regStoreInstToConstLoc(RI, 32, getOp1(I), &CTR); regNormalRegClear(RI, I); break; } - case StoreMSR: { + case StoreMSR: + { unsigned InstLoc = ibuild->GetImmValue(getOp2(I)); regStoreInstToConstLoc(RI, 32, getOp1(I), &MSR); regNormalRegClear(RI, I); @@ -882,20 +1088,23 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { Jit->SetJumpTarget(noExceptionsPending); break; } - case StoreGQR: { + case StoreGQR: + { unsigned gqr = *I >> 16; regStoreInstToConstLoc(RI, 32, getOp1(I), &GQR(gqr)); regNormalRegClear(RI, I); break; } - case StoreSRR: { + case StoreSRR: + { unsigned srr = *I >> 16; regStoreInstToConstLoc(RI, 32, getOp1(I), &PowerPC::ppcState.spr[SPR_SRR0+srr]); regNormalRegClear(RI, I); break; } - case StoreCarry: { + case StoreCarry: + { Jit->CMP(32, regLocForInst(RI, getOp1(I)), Imm8(0)); FixupBranch nocarry = Jit->J_CC(CC_Z); Jit->JitSetCA(); @@ -906,7 +1115,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { regNormalRegClear(RI, I); break; } - case StoreFPRF: { + case StoreFPRF: + { Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); Jit->AND(32, R(ECX), Imm8(0x1F)); Jit->SHL(32, R(ECX), Imm8(12)); @@ -915,32 +1125,41 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { regNormalRegClear(RI, I); break; } - case Load8: { + case Load8: + { regEmitMemLoad(RI, I, 8); break; } - case Load16: { + case Load16: + { regEmitMemLoad(RI, I, 16); break; } - case Load32: { + case Load32: + { regEmitMemLoad(RI, I, 32); break; } - case Store8: { + case Store8: + { regEmitMemStore(RI, I, 8); break; } - case Store16: { + case Store16: + { regEmitMemStore(RI, I, 16); break; } - case Store32: { + case Store32: + { regEmitMemStore(RI, I, 32); break; } - case SExt8: { - if (!thisUsed) break; + case SExt8: + { + if (!thisUsed) + break; + X64Reg reg = regUReg(RI, I); Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); Jit->MOVSX(32, 8, reg, R(ECX)); @@ -948,16 +1167,22 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { regNormalRegClear(RI, I); break; } - case SExt16: { - if (!thisUsed) break; + case SExt16: + { + if (!thisUsed) + break; + X64Reg reg = regUReg(RI, I); Jit->MOVSX(32, 16, reg, regLocForInst(RI, getOp1(I))); RI.regs[reg] = I; regNormalRegClear(RI, I); break; } - case Cntlzw: { - if (!thisUsed) break; + case Cntlzw: + { + if (!thisUsed) + break; + X64Reg reg = regUReg(RI, I); Jit->MOV(32, R(ECX), Imm32(63)); Jit->BSR(32, reg, regLocForInst(RI, getOp1(I))); @@ -967,66 +1192,95 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { regNormalRegClear(RI, I); break; } - case Not: { - if (!thisUsed) break; + case Not: + { + if (!thisUsed) + break; + X64Reg reg = regBinLHSReg(RI, I); Jit->NOT(32, R(reg)); RI.regs[reg] = I; regNormalRegClear(RI, I); break; } - case And: { - if (!thisUsed) break; + case And: + { + if (!thisUsed) + break; + regEmitBinInst(RI, I, &JitIL::AND, true); break; } - case Xor: { - if (!thisUsed) break; + case Xor: + { + if (!thisUsed) + break; + regEmitBinInst(RI, I, &JitIL::XOR, true); break; } - case Sub: { - if (!thisUsed) break; + case Sub: + { + if (!thisUsed) + break; + regEmitBinInst(RI, I, &JitIL::SUB); break; } - case Or: { - if (!thisUsed) break; + case Or: + { + if (!thisUsed) + break; + regEmitBinInst(RI, I, &JitIL::OR, true); break; } - case Add: { - if (!thisUsed) break; + case Add: + { + if (!thisUsed) + break; + regEmitBinInst(RI, I, &JitIL::ADD, true); break; } - case Mul: { - if (!thisUsed) break; + case Mul: + { + if (!thisUsed) + break; + // FIXME: Use three-address capability of IMUL! X64Reg reg = regBinLHSReg(RI, I); - if (isImm(*getOp2(I))) { + if (isImm(*getOp2(I))) + { unsigned RHS = RI.Build->GetImmValue(getOp2(I)); - if (RHS + 128 < 256) { + if (RHS + 128 < 256) Jit->IMUL(32, reg, Imm8(RHS)); - } else { + else Jit->IMUL(32, reg, Imm32(RHS)); - } - } else { + } + else + { Jit->IMUL(32, reg, regLocForInst(RI, getOp2(I))); } RI.regs[reg] = I; regNormalRegClear(RI, I); break; } - case MulHighUnsigned: { - if (!thisUsed) break; + case MulHighUnsigned: + { + if (!thisUsed) + break; + regSpill(RI, EAX); regSpill(RI, EDX); X64Reg reg = regBinReg(RI, I); - if (isImm(*getOp2(I))) { + if (isImm(*getOp2(I))) + { unsigned RHS = RI.Build->GetImmValue(getOp2(I)); Jit->MOV(32, R(EAX), Imm32(RHS)); - } else { + } + else + { Jit->MOV(32, R(EAX), regLocForInst(RI, getOp2(I))); } Jit->MUL(32, regLocForInst(RI, getOp1(I))); @@ -1035,91 +1289,139 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { regNormalRegClear(RI, I); break; } - case Rol: { - if (!thisUsed) break; + case Rol: + { + if (!thisUsed) + break; + regEmitShiftInst(RI, I, &JitIL::ROL); break; } - case Shl: { - if (!thisUsed) break; + case Shl: + { + if (!thisUsed) + break; + regEmitShiftInst(RI, I, &JitIL::SHL); break; } - case Shrl: { - if (!thisUsed) break; + case Shrl: + { + if (!thisUsed) + break; + regEmitShiftInst(RI, I, &JitIL::SHR); break; } - case Sarl: { - if (!thisUsed) break; + case Sarl: + { + if (!thisUsed) + break; + regEmitShiftInst(RI, I, &JitIL::SAR); break; } - case ICmpEq: { - if (!thisUsed) break; + case ICmpEq: + { + if (!thisUsed) + break; + regEmitICmpInst(RI, I, CC_E); break; } - case ICmpNe: { - if (!thisUsed) break; + case ICmpNe: + { + if (!thisUsed) + break; + regEmitICmpInst(RI, I, CC_NE); break; } - case ICmpUgt: { - if (!thisUsed) break; + case ICmpUgt: + { + if (!thisUsed) + break; + regEmitICmpInst(RI, I, CC_A); break; } - case ICmpUlt: { - if (!thisUsed) break; + case ICmpUlt: + { + if (!thisUsed) + break; + regEmitICmpInst(RI, I, CC_B); break; } - case ICmpUge: { - if (!thisUsed) break; + case ICmpUge: + { + if (!thisUsed) + break; + regEmitICmpInst(RI, I, CC_AE); break; } - case ICmpUle: { - if (!thisUsed) break; + case ICmpUle: + { + if (!thisUsed) + break; + regEmitICmpInst(RI, I, CC_BE); break; } - case ICmpSgt: { - if (!thisUsed) break; + case ICmpSgt: + { + if (!thisUsed) + break; + regEmitICmpInst(RI, I, CC_G); break; } - case ICmpSlt: { - if (!thisUsed) break; + case ICmpSlt: + { + if (!thisUsed) + break; + regEmitICmpInst(RI, I, CC_L); break; } - case ICmpSge: { - if (!thisUsed) break; + case ICmpSge: + { + if (!thisUsed) + break; + regEmitICmpInst(RI, I, CC_GE); break; } - case ICmpSle: { - if (!thisUsed) break; + case ICmpSle: + { + if (!thisUsed) + break; + regEmitICmpInst(RI, I, CC_LE); break; } case ICmpCRUnsigned: { - if (!thisUsed) break; + if (!thisUsed) + break; + regEmitICmpCRInst(RI, I); break; } case ICmpCRSigned: { - if (!thisUsed) break; + if (!thisUsed) + break; + regEmitICmpCRInst(RI, I); break; } case ConvertFromFastCR: { - if (!thisUsed) break; + if (!thisUsed) + break; + X64Reg cr_val = regUReg(RI, I); Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I))); @@ -1158,7 +1460,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { } case ConvertToFastCR: { - if (!thisUsed) break; + if (!thisUsed) + break; + X64Reg cr_val = regUReg(RI, I); Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I))); @@ -1197,7 +1501,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { } case FastCRSOSet: { - if (!thisUsed) break; + if (!thisUsed) + break; + X64Reg reg = regUReg(RI, I); Jit->MOV(64, R(RAX), Imm64(1ull << 61)); Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RAX)); @@ -1209,7 +1515,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { } case FastCREQSet: { - if (!thisUsed) break; + if (!thisUsed) + break; + X64Reg reg = regUReg(RI, I); Jit->CMP(32, regLocForInst(RI, getOp1(I)), Imm32(0)); Jit->SETcc(CC_Z, R(AL)); @@ -1220,7 +1528,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { } case FastCRGTSet: { - if (!thisUsed) break; + if (!thisUsed) + break; + X64Reg reg = regUReg(RI, I); Jit->CMP(64, regLocForInst(RI, getOp1(I)), Imm8(0)); Jit->SETcc(CC_G, R(AL)); @@ -1231,7 +1541,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { } case FastCRLTSet: { - if (!thisUsed) break; + if (!thisUsed) + break; + X64Reg reg = regUReg(RI, I); Jit->MOV(64, R(RAX), Imm64(1ull << 62)); Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RAX)); @@ -1241,8 +1553,11 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { regNormalRegClear(RI, I); break; } - case LoadSingle: { - if (!thisUsed) break; + case LoadSingle: + { + if (!thisUsed) + break; + X64Reg reg = fregFindFreeReg(RI); Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); RI.Jit->SafeLoadToReg(ECX, R(ECX), 32, 0, regsInUse(RI), false, EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); @@ -1251,8 +1566,11 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { regNormalRegClear(RI, I); break; } - case LoadDouble: { - if (!thisUsed) break; + case LoadDouble: + { + if (!thisUsed) + break; + X64Reg reg = fregFindFreeReg(RI); const OpArg loc = regLocForInst(RI, getOp1(I)); Jit->MOV(32, R(ECX), loc); @@ -1262,8 +1580,11 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { regNormalRegClear(RI, I); break; } - case LoadPaired: { - if (!thisUsed) break; + case LoadPaired: + { + if (!thisUsed) + break; + regSpill(RI, EAX); regSpill(RI, EDX); X64Reg reg = fregFindFreeReg(RI); @@ -1286,14 +1607,15 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { regNormalRegClear(RI, I); break; } - case StoreSingle: { + case StoreSingle: + { regSpill(RI, EAX); const OpArg loc1 = fregLocForInst(RI, getOp1(I)); - if (loc1.IsSimpleReg()) { + if (loc1.IsSimpleReg()) Jit->MOVD_xmm(R(EAX), loc1.GetSimpleReg()); - } else { + else Jit->MOV(32, R(EAX), loc1); - } + Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I))); RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); if (RI.IInfo[I - RI.FirstI] & 4) @@ -1302,7 +1624,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { regClearInst(RI, getOp2(I)); break; } - case StoreDouble: { + case StoreDouble: + { regSpill(RI, EAX); OpArg value = fregLocForInst(RI, getOp1(I)); @@ -1318,7 +1641,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { regClearInst(RI, getOp2(I)); break; } - case StorePaired: { + case StorePaired: + { regSpill(RI, EAX); regSpill(RI, EDX); u32 quantreg = *I >> 24; @@ -1335,7 +1659,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { regClearInst(RI, getOp2(I)); break; } - case DupSingleToMReg: { + case DupSingleToMReg: + { if (!thisUsed) break; X64Reg input = fregEnsureInReg(RI, getOp1(I)); @@ -1346,7 +1671,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { fregNormalRegClear(RI, I); break; } - case InsertDoubleInMReg: { + case InsertDoubleInMReg: + { if (!thisUsed) break; // r[0] = op1[0]; r[1] = op2[1]; @@ -1354,9 +1680,12 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { // recycled. (SHUFPD may not be so fast.) X64Reg reg = fregBinRHSRegWithMov(RI, I); OpArg loc1 = fregLocForInst(RI, getOp1(I)); - if (loc1.IsSimpleReg()) { + if (loc1.IsSimpleReg()) + { Jit->MOVSD(reg, loc1); - } else { + } + else + { // If op1 is in FSlotSet, we have to mov loc1 to XMM0 // before MOVSD/MOVSS. // Because register<->memory transfer with MOVSD/MOVSS @@ -1368,78 +1697,102 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { fregNormalRegClear(RI, I); break; } - case ExpandPackedToMReg: { - if (!thisUsed) break; + case ExpandPackedToMReg: + { + if (!thisUsed) + break; + X64Reg reg = fregURegWithoutMov(RI, I); Jit->CVTPS2PD(reg, fregLocForInst(RI, getOp1(I))); RI.fregs[reg] = I; fregNormalRegClear(RI, I); break; } - case CompactMRegToPacked: { - if (!thisUsed) break; + case CompactMRegToPacked: + { + if (!thisUsed) + break; + X64Reg reg = fregURegWithoutMov(RI, I); Jit->CVTPD2PS(reg, fregLocForInst(RI, getOp1(I))); RI.fregs[reg] = I; fregNormalRegClear(RI, I); break; } - case FSNeg: { - if (!thisUsed) break; + case FSNeg: + { + if (!thisUsed) + break; + X64Reg reg = fregURegWithMov(RI, I); - static const u32 GC_ALIGNED16(ssSignBits[4]) = - {0x80000000}; + static const u32 GC_ALIGNED16(ssSignBits[4]) = {0x80000000}; Jit->PXOR(reg, M((void*)&ssSignBits)); RI.fregs[reg] = I; fregNormalRegClear(RI, I); break; } - case FDNeg: { - if (!thisUsed) break; + case FDNeg: + { + if (!thisUsed) + break; + X64Reg reg = fregURegWithMov(RI, I); - static const u64 GC_ALIGNED16(sdSignBits[2]) = - {0x8000000000000000ULL}; + static const u64 GC_ALIGNED16(sdSignBits[2]) = {0x8000000000000000ULL}; Jit->PXOR(reg, M((void*)&sdSignBits)); RI.fregs[reg] = I; fregNormalRegClear(RI, I); break; } - case FPNeg: { - if (!thisUsed) break; + case FPNeg: + { + if (!thisUsed) + break; + X64Reg reg = fregURegWithMov(RI, I); - static const u32 GC_ALIGNED16(psSignBits[4]) = - {0x80000000, 0x80000000}; + static const u32 GC_ALIGNED16(psSignBits[4]) = {0x80000000, 0x80000000}; Jit->PXOR(reg, M((void*)&psSignBits)); RI.fregs[reg] = I; fregNormalRegClear(RI, I); break; } - case FPDup0: { - if (!thisUsed) break; + case FPDup0: + { + if (!thisUsed) + break; + X64Reg reg = fregURegWithMov(RI, I); Jit->PUNPCKLDQ(reg, R(reg)); RI.fregs[reg] = I; fregNormalRegClear(RI, I); break; } - case FPDup1: { - if (!thisUsed) break; + case FPDup1: + { + if (!thisUsed) + break; + X64Reg reg = fregURegWithMov(RI, I); Jit->SHUFPS(reg, R(reg), 0xE5); RI.fregs[reg] = I; fregNormalRegClear(RI, I); break; } - case LoadFReg: { - if (!thisUsed) break; + case LoadFReg: + { + if (!thisUsed) + break; + X64Reg reg = fregFindFreeReg(RI); unsigned ppcreg = *I >> 8; Jit->MOVAPD(reg, M(&PowerPC::ppcState.ps[ppcreg])); RI.fregs[reg] = I; break; } - case LoadFRegDENToZero: { - if (!thisUsed) break; + case LoadFRegDENToZero: + { + if (!thisUsed) + break; + X64Reg reg = fregFindFreeReg(RI); unsigned ppcreg = *I >> 8; char *p = (char*)&(PowerPC::ppcState.ps[ppcreg][0]); @@ -1454,15 +1807,18 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { RI.fregs[reg] = I; break; } - case StoreFReg: { + case StoreFReg: + { unsigned ppcreg = *I >> 16; Jit->MOVAPD(M(&PowerPC::ppcState.ps[ppcreg]), fregEnsureInReg(RI, getOp1(I))); fregNormalRegClear(RI, I); break; } - case DoubleToSingle: { - if (!thisUsed) break; + case DoubleToSingle: + { + if (!thisUsed) + break; X64Reg input = fregEnsureInReg(RI, getOp1(I)); X64Reg output = fregURegWithoutMov(RI, I); @@ -1472,37 +1828,56 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { fregNormalRegClear(RI, I); break; } - case FSMul: { - if (!thisUsed) break; + case FSMul: + { + if (!thisUsed) + break; + fregEmitBinInst(RI, I, &JitIL::MULSS); break; } - case FSAdd: { - if (!thisUsed) break; + case FSAdd: + { + if (!thisUsed) + break; + fregEmitBinInst(RI, I, &JitIL::ADDSS); break; } - case FSSub: { - if (!thisUsed) break; + case FSSub: + { + if (!thisUsed) + break; + fregEmitBinInst(RI, I, &JitIL::SUBSS); break; } - case FDMul: { - if (!thisUsed) break; + case FDMul: + { + if (!thisUsed) + break; + fregEmitBinInst(RI, I, &JitIL::MULSD); break; } - case FDAdd: { - if (!thisUsed) break; + case FDAdd: + { + if (!thisUsed) + break; + fregEmitBinInst(RI, I, &JitIL::ADDSD); break; } - case FDSub: { - if (!thisUsed) break; + case FDSub: + { + if (!thisUsed) + break; + fregEmitBinInst(RI, I, &JitIL::SUBSD); break; } - case FDCmpCR: { + case FDCmpCR: + { const u32 ordered = *I >> 24; X64Reg destreg = regFindFreeReg(RI); // TODO: Remove an extra MOVSD if loc1.IsSimpleReg() @@ -1525,15 +1900,19 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { static const u32 FPSCR_VXSNAN = (u32)1 << (31 - 7); static const u32 FPSCR_FX = (u32)1 << (31 - 0); - if (ordered) { + if (ordered) + { // fcmpo // TODO: Optimize the following code if slow. // SNAN check may not be needed // because it does not happen so much. Jit->MOVSD(M(isSNANTemp[0]), XMM0); - if (loc2.IsSimpleReg()) { + if (loc2.IsSimpleReg()) + { Jit->MOVSD(M(isSNANTemp[1]), loc2.GetSimpleReg()); - } else { + } + else + { Jit->MOVSD(XMM0, loc2); Jit->MOVSD(M(isSNANTemp[1]), XMM0); } @@ -1551,13 +1930,18 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask; Jit->SetJumpTarget(finish0); Jit->SetJumpTarget(finish1); - } else { + } + else + { // fcmpu // TODO: Optimize the following code if slow Jit->MOVSD(M(isSNANTemp[0]), XMM0); - if (loc2.IsSimpleReg()) { + if (loc2.IsSimpleReg()) + { Jit->MOVSD(M(isSNANTemp[1]), loc2.GetSimpleReg()); - } else { + } + else + { Jit->MOVSD(XMM0, loc2); Jit->MOVSD(M(isSNANTemp[1]), XMM0); } @@ -1584,24 +1968,36 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { fregNormalRegClear(RI, I); break; } - case FPAdd: { - if (!thisUsed) break; + case FPAdd: + { + if (!thisUsed) + break; + fregEmitBinInst(RI, I, &JitIL::ADDPS); break; } - case FPMul: { - if (!thisUsed) break; + case FPMul: + { + if (!thisUsed) + break; + fregEmitBinInst(RI, I, &JitIL::MULPS); break; } - case FPSub: { - if (!thisUsed) break; + case FPSub: + { + if (!thisUsed) + break; + fregEmitBinInst(RI, I, &JitIL::SUBPS); break; } - case FPMerge00: { + case FPMerge00: + { // r[0] = op1[0]; r[1] = op2[0]; - if (!thisUsed) break; + if (!thisUsed) + break; + // TODO: Optimize the case that the register of only op2 can be // recycled. X64Reg reg = fregBinLHSRegWithMov(RI, I); @@ -1610,16 +2006,22 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { fregNormalRegClear(RI, I); break; } - case FPMerge01: { + case FPMerge01: + { // r[0] = op1[0]; r[1] = op2[1]; - if (!thisUsed) break; + if (!thisUsed) + break; + // TODO: Optimize the case that the register of only op1 can be // recycled. X64Reg reg = fregBinRHSRegWithMov(RI, I); OpArg loc1 = fregLocForInst(RI, getOp1(I)); - if (loc1.IsSimpleReg()) { + if (loc1.IsSimpleReg()) + { Jit->MOVSS(reg, loc1); - } else { + } + else + { Jit->MOVAPD(XMM0, loc1); Jit->MOVSS(reg, R(XMM0)); } @@ -1627,16 +2029,22 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { fregNormalRegClear(RI, I); break; } - case FPMerge10: { + case FPMerge10: + { // r[0] = op1[1]; r[1] = op2[0]; - if (!thisUsed) break; + if (!thisUsed) + break; + // TODO: Optimize the case that the register of only op2 can be // recycled. X64Reg reg = fregBinLHSRegWithMov(RI, I); OpArg loc2 = fregLocForInst(RI, getOp2(I)); - if (loc2.IsSimpleReg()) { + if (loc2.IsSimpleReg()) + { Jit->MOVSS(reg, loc2); - } else { + } + else + { Jit->MOVAPD(XMM0, loc2); Jit->MOVSS(reg, R(XMM0)); } @@ -1645,9 +2053,12 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { fregNormalRegClear(RI, I); break; } - case FPMerge11: { + case FPMerge11: + { // r[0] = op1[1]; r[1] = op2[1]; - if (!thisUsed) break; + if (!thisUsed) + break; + // TODO: Optimize the case that the register of only op2 can be // recycled. X64Reg reg = fregBinLHSRegWithMov(RI, I); @@ -1660,8 +2071,11 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { break; } case CInt32: - case CInt16: { - if (!thisUsed) break; + case CInt16: + { + if (!thisUsed) + break; + X64Reg reg = regFindFreeReg(RI); u64 val = ibuild->GetImmValue64(I); if ((u32)val == val) @@ -1677,7 +2091,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { case BlockEnd: break; - case IdleBranch: { + case IdleBranch: + { Jit->CMP(32, regLocForInst(RI, getOp1(getOp1(I))), Imm32(RI.Build->GetImmValue(getOp2(getOp1(I))))); FixupBranch cont = Jit->J_CC(CC_NE); @@ -1696,22 +2111,48 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { break; } - case BranchCond: { - if (isICmp(*getOp1(I))) { + case BranchCond: + { + if (isICmp(*getOp1(I))) + { regEmitCmp(RI, getOp1(I)); CCFlags flag; - switch (getOpcode(*getOp1(I))) { - case ICmpEq: flag = CC_NE; break; - case ICmpNe: flag = CC_E; break; - case ICmpUgt: flag = CC_BE; break; - case ICmpUlt: flag = CC_AE; break; - case ICmpUge: flag = CC_B; break; - case ICmpUle: flag = CC_A; break; - case ICmpSgt: flag = CC_LE; break; - case ICmpSlt: flag = CC_GE; break; - case ICmpSge: flag = CC_L; break; - case ICmpSle: flag = CC_G; break; - default: PanicAlert("cmpXX"); flag = CC_O; break; + switch (getOpcode(*getOp1(I))) + { + case ICmpEq: + flag = CC_NE; + break; + case ICmpNe: + flag = CC_E; + break; + case ICmpUgt: + flag = CC_BE; + break; + case ICmpUlt: + flag = CC_AE; + break; + case ICmpUge: + flag = CC_B; + break; + case ICmpUle: + flag = CC_A; + break; + case ICmpSgt: + flag = CC_LE; + break; + case ICmpSlt: + flag = CC_GE; + break; + case ICmpSge: + flag = CC_L; + break; + case ICmpSle: + flag = CC_G; + break; + default: + PanicAlert("cmpXX"); + flag = CC_O; + break; } FixupBranch cont = Jit->J_CC(flag); regWriteExit(RI, getOp2(I)); @@ -1721,7 +2162,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { if (RI.IInfo[I - RI.FirstI] & 8) regClearInst(RI, getOp2(getOp1(I))); } - else { + else + { Jit->CMP(32, regLocForInst(RI, getOp1(I)), Imm8(0)); FixupBranch cont = Jit->J_CC(CC_Z); regWriteExit(RI, getOp2(I)); @@ -1733,19 +2175,22 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { regClearInst(RI, getOp2(I)); break; } - case BranchUncond: { + case BranchUncond: + { regWriteExit(RI, getOp1(I)); regNormalRegClear(RI, I); break; } - case ShortIdleLoop: { + case ShortIdleLoop: + { unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); Jit->ABI_CallFunction((void *)&CoreTiming::Idle); Jit->MOV(32, M(&PC), Imm32(InstLoc)); Jit->WriteExceptionExit(); break; } - case SystemCall: { + case SystemCall: + { unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); Jit->LOCK(); Jit->OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_SYSCALL)); @@ -1753,12 +2198,14 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { Jit->WriteExceptionExit(); break; } - case InterpreterBranch: { + case InterpreterBranch: + { Jit->MOV(32, R(EAX), M(&NPC)); Jit->WriteExitDestInOpArg(R(EAX)); break; } - case RFIExit: { + case RFIExit: + { // See Interpreter rfi for details const u32 mask = 0x87C0FFFF; // MSR = (MSR & ~mask) | (SRR1 & mask); @@ -1775,7 +2222,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { Jit->WriteRfiExitDestInOpArg(R(EAX)); break; } - case FPExceptionCheck: { + case FPExceptionCheck: + { unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); //This instruction uses FPU - needs to add FP exception bailout Jit->TEST(32, M(&PowerPC::ppcState.msr), Imm32(1 << 13)); // Test FP enabled bit @@ -1790,7 +2238,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { Jit->SetJumpTarget(b1); break; } - case DSIExceptionCheck: { + case DSIExceptionCheck: + { unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); Jit->TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_DSI)); FixupBranch noMemException = Jit->J_CC(CC_Z); @@ -1802,7 +2251,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { Jit->SetJumpTarget(noMemException); break; } - case ISIException: { + case ISIException: + { unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); // Address of instruction could not be translated @@ -1815,7 +2265,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { Jit->WriteExceptionExit(); break; } - case ExtExceptionCheck: { + case ExtExceptionCheck: + { unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); Jit->TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_ISI | EXCEPTION_PROGRAM | EXCEPTION_SYSCALL | EXCEPTION_FPU_UNAVAILABLE | EXCEPTION_DSI | EXCEPTION_ALIGNMENT)); @@ -1836,7 +2287,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { Jit->SetJumpTarget(clearInt); break; } - case BreakPointCheck: { + case BreakPointCheck: + { unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); Jit->MOV(32, M(&PC), Imm32(InstLoc)); @@ -1847,7 +2299,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { Jit->SetJumpTarget(noBreakpoint); break; } - case Int3: { + case Int3: + { Jit->INT3(); break; } @@ -1859,13 +2312,17 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { } } - for (unsigned i = 0; i < MAX_NUMBER_OF_REGS; i++) { - if (RI.regs[i]) { + for (unsigned i = 0; i < MAX_NUMBER_OF_REGS; i++) + { + if (RI.regs[i]) + { // Start a game in Burnout 2 to get this. Or animal crossing. PanicAlert("Incomplete cleanup! (regs)"); exit(1); } - if (RI.fregs[i]) { + + if (RI.fregs[i]) + { PanicAlert("Incomplete cleanup! (fregs)"); exit(1); } @@ -1875,6 +2332,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { Jit->UD2(); } -void JitIL::WriteCode(u32 exitAddress) { +void JitIL::WriteCode(u32 exitAddress) +{ DoWriteCode(&ibuild, this, exitAddress); } diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp index 1e133d5e99..a805c813c7 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp @@ -174,8 +174,12 @@ namespace JitILProfiler u64 codeHash; u64 totalElapsed; u64 numberOfCalls; - Block() : index(0), codeHash(0), totalElapsed(0), numberOfCalls(0) { } + + Block() : index(0), codeHash(0), totalElapsed(0), numberOfCalls(0) + { + } }; + static std::vector blocks; static u32 blockIndex; static u64 beginTime; @@ -188,6 +192,7 @@ namespace JitILProfiler block.codeHash = codeHash; return block; } + // These functions need to be static because they are called with // ABI_CallFunction(). static void Begin(u32 index) @@ -195,6 +200,7 @@ namespace JitILProfiler blockIndex = index; beginTime = __rdtsc(); } + static void End() { const u64 endTime = __rdtsc(); @@ -203,6 +209,7 @@ namespace JitILProfiler block.totalElapsed += duration; ++block.numberOfCalls; } + struct JitILProfilerFinalizer { virtual ~JitILProfilerFinalizer() @@ -221,11 +228,13 @@ namespace JitILProfiler } } }; + static std::unique_ptr finalizer; static void Init() { finalizer = std::make_unique(); } + static void Shutdown() { finalizer.reset(); @@ -246,10 +255,14 @@ void JitIL::Init() else { if (!Core::g_CoreStartupParameter.bJITBlockLinking) + { jo.enableBlocklink = false; + } else + { // Speed boost, but not 100% safe jo.enableBlocklink = !Core::g_CoreStartupParameter.bMMU; + } } jo.fpAccurateFcmp = false; @@ -268,7 +281,8 @@ void JitIL::Init() code_block.m_gpa = &js.gpa; code_block.m_fpa = &js.fpa; - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) { + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) + { JitILProfiler::Init(); } } @@ -282,7 +296,8 @@ void JitIL::ClearCache() void JitIL::Shutdown() { - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) { + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) + { JitILProfiler::Shutdown(); } @@ -352,11 +367,14 @@ static void ImHere() PowerPC::ppcState.gpr[5], PowerPC::ppcState.gpr[6]); f.Flush(); } - if (been_here.find(PC) != been_here.end()) { + + if (been_here.find(PC) != been_here.end()) + { been_here.find(PC)->second++; if ((been_here.find(PC)->second) & 1023) return; } + DEBUG_LOG(DYNA_REC, "I'm here - PC = %08x , LR = %08x", PC, LR); been_here[PC] = 1; } @@ -376,7 +394,8 @@ void JitIL::Cleanup() void JitIL::WriteExit(u32 destination) { Cleanup(); - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) { + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) + { ABI_CallFunction((void *)JitILProfiler::End); } SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); @@ -408,7 +427,8 @@ void JitIL::WriteExitDestInOpArg(const Gen::OpArg& arg) { MOV(32, M(&PC), arg); Cleanup(); - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) { + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) + { ABI_CallFunction((void *)JitILProfiler::End); } SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); @@ -420,7 +440,8 @@ void JitIL::WriteRfiExitDestInOpArg(const Gen::OpArg& arg) MOV(32, M(&PC), arg); MOV(32, M(&NPC), arg); Cleanup(); - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) { + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) + { ABI_CallFunction((void *)JitILProfiler::End); } ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions)); @@ -431,7 +452,8 @@ void JitIL::WriteRfiExitDestInOpArg(const Gen::OpArg& arg) void JitIL::WriteExceptionExit() { Cleanup(); - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) { + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) + { ABI_CallFunction((void *)JitILProfiler::End); } MOV(32, R(EAX), M(&PC)); @@ -554,7 +576,7 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc u64 codeHash = -1; if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling || - SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILOutputIR) + SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILOutputIR) { // For profiling and IR Writer for (u32 i = 0; i < code_block.m_num_instructions; i++) @@ -577,7 +599,7 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc js.downcountAmount = 0; if (!Core::g_CoreStartupParameter.bEnableDebugging) - js.downcountAmount += PatchEngine::GetSpeedhackCycles(code_block.m_address); + js.downcountAmount += PatchEngine::GetSpeedhackCycles(code_block.m_address); // Translate instructions for (u32 i = 0; i < code_block.m_num_instructions; i++) diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.h b/Source/Core/Core/PowerPC/Jit64IL/JitIL.h index 9e8a40ef7d..792726d31d 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.h +++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL.h @@ -67,19 +67,22 @@ public: bool IsInCodeSpace(u8 *ptr) override { return IsInSpace(ptr); } void ClearCache() override; - const u8 *GetDispatcher() { + const u8 *GetDispatcher() + { return asm_routines.dispatcher; // asm_routines.dispatcher } - const CommonAsmRoutines *GetAsmRoutines() override { + + const CommonAsmRoutines *GetAsmRoutines() override + { return &asm_routines; } - const char *GetName() override { + const char *GetName() override + { return "JIT64IL"; } // Run! - void Run() override; void SingleStep() override; diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL_Tables.cpp b/Source/Core/Core/PowerPC/Jit64IL/JitIL_Tables.cpp index c3214f6e18..8d7e1811b6 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL_Tables.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL_Tables.cpp @@ -376,15 +376,19 @@ void CompileInstruction(PPCAnalyst::CodeOp & op) JitIL *jitil = (JitIL *)jit; (jitil->*dynaOpTable[op.inst.OPCD])(op.inst); GekkoOPInfo *info = op.opinfo; - if (info) { + if (info) + { #ifdef OPLOG - if (!strcmp(info->opname, OP_TO_LOG)){ ///"mcrfs" + if (!strcmp(info->opname, OP_TO_LOG)) // "mcrfs" + { rsplocations.push_back(jit.js.compilerPC); } #endif info->compileCount++; info->lastUse = jit->js.compilerPC; - } else { + } + else + { PanicAlert("Tried to compile illegal (or unknown) instruction %08x, at %08x", op.inst.hex, jit->js.compilerPC); } } diff --git a/Source/Core/Core/PowerPC/JitArm32/Jit.cpp b/Source/Core/Core/PowerPC/JitArm32/Jit.cpp index 97ef0644ec..efe05df218 100644 --- a/Source/Core/Core/PowerPC/JitArm32/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/Jit.cpp @@ -108,12 +108,14 @@ static void ImHere() } fprintf(f.GetHandle(), "%08x\n", PC); } + if (been_here.find(PC) != been_here.end()) { been_here.find(PC)->second++; if ((been_here.find(PC)->second) & 1023) return; } + DEBUG_LOG(DYNA_REC, "I'm here - PC = %08x , LR = %08x", PC, LR); been_here[PC] = 1; } @@ -374,8 +376,10 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo SetCC(); gpr.Unlock(A, C); } + // Conditionally add profiling code. - if (Profiler::g_ProfileBlocks) { + if (Profiler::g_ProfileBlocks) + { ARMReg rA = gpr.GetReg(); ARMReg rB = gpr.GetReg(); MOVI2R(rA, (u32)&b->runCount); // Load in to register @@ -415,7 +419,8 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo // WARNING - cmp->branch merging will screw this up. js.isLastInstruction = true; js.next_inst = 0; - if (Profiler::g_ProfileBlocks) { + if (Profiler::g_ProfileBlocks) + { // CAUTION!!! push on stack regs you use, do your stuff, then pop PROFILER_VPUSH; // get end tic @@ -431,6 +436,7 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo js.next_inst = ops[i + 1].inst; js.next_compilerPC = ops[i + 1].address; } + if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32) { js.fifoBytesThisBlock -= 32; @@ -438,6 +444,7 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo QuickCallFunction(R14, (void*)&GPFifo::CheckGatherPipe); POP(4, R0, R1, R2, R3); } + if (Core::g_CoreStartupParameter.bEnableDebugging) { // Add run count @@ -457,6 +464,7 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo fpr.Unlock(VA); fpr.Unlock(VB); } + if (!ops[i].skip) { PrintDebug(ops[i].inst, DEBUG_OUTPUT); @@ -474,6 +482,7 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo } } } + if (code_block.m_memory_exception) BKPT(0x500); diff --git a/Source/Core/Core/PowerPC/JitArm32/Jit.h b/Source/Core/Core/PowerPC/JitArm32/Jit.h index c4cfcaa115..214ff31971 100644 --- a/Source/Core/Core/PowerPC/JitArm32/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm32/Jit.h @@ -74,18 +74,22 @@ public: void ClearCache(); - const u8 *GetDispatcher() { + const u8 *GetDispatcher() + { return asm_routines.dispatcher; } - CommonAsmRoutinesBase *GetAsmRoutines() { + + CommonAsmRoutinesBase *GetAsmRoutines() + { return &asm_routines; } - const char *GetName() { + const char *GetName() + { return "JITARM"; } - // Run! + // Run! void Run(); void SingleStep(); diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArmCache.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArmCache.cpp index 2fdd195140..f06e863033 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArmCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArmCache.cpp @@ -16,20 +16,21 @@ using namespace ArmGen; - void JitArmBlockCache::WriteLinkBlock(u8* location, const u8* address) - { - ARMXEmitter emit(location); - emit.B(address); - emit.FlushIcache(); - } - void JitArmBlockCache::WriteDestroyBlock(const u8* location, u32 address) - { - ARMXEmitter emit((u8 *)location); - emit.MOVI2R(R11, address); - emit.MOVI2R(R12, (u32)jit->GetAsmRoutines()->dispatcher); - emit.STR(R11, R9, PPCSTATE_OFF(pc)); - emit.B(R12); - emit.FlushIcache(); - } +void JitArmBlockCache::WriteLinkBlock(u8* location, const u8* address) +{ + ARMXEmitter emit(location); + emit.B(address); + emit.FlushIcache(); +} + +void JitArmBlockCache::WriteDestroyBlock(const u8* location, u32 address) +{ + ARMXEmitter emit((u8 *)location); + emit.MOVI2R(R11, address); + emit.MOVI2R(R12, (u32)jit->GetAsmRoutines()->dispatcher); + emit.STR(R11, R9, PPCSTATE_OFF(pc)); + emit.B(R12); + emit.FlushIcache(); +} diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_Branch.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_Branch.cpp index 09d4b33acb..c7a578ef7c 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_Branch.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_Branch.cpp @@ -97,10 +97,12 @@ void JitArm::bx(UGeckoInstruction inst) STR(rA, R9, PPCSTATE_OFF(spr[SPR_LR])); //ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4); } + // If this is not the last instruction of a block, // we will skip the rest process. // Because PPCAnalyst::Flatten() merged the blocks. - if (!js.isLastInstruction) { + if (!js.isLastInstruction) + { return; } @@ -231,7 +233,8 @@ void JitArm::bcctrx(UGeckoInstruction inst) LDR(rA, R9, PPCSTATE_OFF(spr[SPR_CTR])); BIC(rA, rA, 0x3); - if (inst.LK_3){ + if (inst.LK_3) + { u32 Jumpto = js.compilerPC + 4; MOVI2R(rB, Jumpto); STR(rB, R9, PPCSTATE_OFF(spr[SPR_LR])); @@ -285,7 +288,8 @@ void JitArm::bclrx(UGeckoInstruction inst) //AND(32, R(EAX), Imm32(0xFFFFFFFC)); LDR(rA, R9, PPCSTATE_OFF(spr[SPR_LR])); BIC(rA, rA, 0x3); - if (inst.LK){ + if (inst.LK) + { u32 Jumpto = js.compilerPC + 4; MOVI2R(rB, Jumpto); STR(rB, R9, PPCSTATE_OFF(spr[SPR_LR])); diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_FloatingPoint.cpp index 79a0b79885..47611d201a 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_FloatingPoint.cpp @@ -117,7 +117,8 @@ void JitArm::fctiwx(UGeckoInstruction inst) NEONXEmitter nemit(this); nemit.VORR(vD, vD, V0); - if (inst.Rc) Helper_UpdateCR1(fpscrReg, rA); + if (inst.Rc) + Helper_UpdateCR1(fpscrReg, rA); STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); gpr.Unlock(rA); @@ -197,7 +198,8 @@ void JitArm::fctiwzx(UGeckoInstruction inst) NEONXEmitter nemit(this); nemit.VORR(vD, vD, V0); - if (inst.Rc) Helper_UpdateCR1(fpscrReg, rA); + if (inst.Rc) + Helper_UpdateCR1(fpscrReg, rA); STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); gpr.Unlock(rA); diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp index 6f7e6854dc..6e6bdb3b36 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp @@ -14,7 +14,8 @@ #include "Core/PowerPC/JitArm32/JitAsm.h" #include "Core/PowerPC/JitArm32/JitRegCache.h" -void JitArm::ComputeRC(ARMReg value, int cr) { +void JitArm::ComputeRC(ARMReg value, int cr) +{ ARMReg rB = gpr.GetReg(); Operand2 ASRReg(value, ST_ASR, 31); @@ -25,7 +26,9 @@ void JitArm::ComputeRC(ARMReg value, int cr) { gpr.Unlock(rB); } -void JitArm::ComputeRC(s32 value, int cr) { + +void JitArm::ComputeRC(s32 value, int cr) +{ ARMReg rB = gpr.GetReg(); Operand2 ASRReg(rB, ST_ASR, 31); @@ -51,6 +54,7 @@ void JitArm::ComputeCarry() STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); gpr.Unlock(tmp); } + void JitArm::ComputeCarry(bool Carry) { ARMReg tmp = gpr.GetReg(); @@ -162,12 +166,35 @@ void JitArm::subfic(UGeckoInstruction inst) // This instruction has no RC flag } -u32 Add(u32 a, u32 b) {return a + b;} -u32 Sub(u32 a, u32 b) {return a - b;} -u32 Mul(u32 a, u32 b) {return a * b;} -u32 Or (u32 a, u32 b) {return a | b;} -u32 And(u32 a, u32 b) {return a & b;} -u32 Xor(u32 a, u32 b) {return a ^ b;} +u32 Add(u32 a, u32 b) +{ + return a + b; +} + +u32 Sub(u32 a, u32 b) +{ + return a - b; +} + +u32 Mul(u32 a, u32 b) +{ + return a * b; +} + +u32 Or (u32 a, u32 b) +{ + return a | b; +} + +u32 And(u32 a, u32 b) +{ + return a & b; +} + +u32 Xor(u32 a, u32 b) +{ + return a ^ b; +} void JitArm::arith(UGeckoInstruction inst) { @@ -410,8 +437,13 @@ void JitArm::arith(UGeckoInstruction inst) } break; } - if (carry) ComputeCarry(hasCarry); - if (Rc) ComputeRC(gpr.GetImm(dest), 0); + + if (carry) + ComputeCarry(hasCarry); + + if (Rc) + ComputeRC(gpr.GetImm(dest), 0); + return; } @@ -452,7 +484,9 @@ void JitArm::arith(UGeckoInstruction inst) gpr.Unlock(rA); } else + { gpr.SetImmediate(d, Imm[1]); + } break; case 24: case 25: @@ -603,8 +637,12 @@ void JitArm::arith(UGeckoInstruction inst) } break; } - if (carry) ComputeCarry(); - if (Rc) ComputeRC(gpr.R(dest)); + + if (carry) + ComputeCarry(); + + if (Rc) + ComputeRC(gpr.R(dest)); } void JitArm::addex(UGeckoInstruction inst) @@ -623,7 +661,10 @@ void JitArm::addex(UGeckoInstruction inst) GetCarryAndClear(rA); ADDS(RD, RA, RB); FinalizeCarry(rA); - if (inst.Rc) ComputeRC(RD); + + if (inst.Rc) + ComputeRC(RD); + gpr.Unlock(rA); } @@ -652,7 +693,9 @@ void JitArm::mulhwux(UGeckoInstruction inst) ARMReg RD = gpr.R(d); ARMReg rA = gpr.GetReg(false); UMULL(rA, RD, RA, RB); - if (inst.Rc) ComputeRC(RD); + + if (inst.Rc) + ComputeRC(RD); } void JitArm::extshx(UGeckoInstruction inst) @@ -664,7 +707,10 @@ void JitArm::extshx(UGeckoInstruction inst) if (gpr.IsImm(s)) { gpr.SetImmediate(a, (u32)(s32)(s16)gpr.GetImm(s)); - if (inst.Rc) ComputeRC(gpr.GetImm(a), 0); + + if (inst.Rc) + ComputeRC(gpr.GetImm(a), 0); + return; } ARMReg rA = gpr.R(a); @@ -682,7 +728,10 @@ void JitArm::extsbx(UGeckoInstruction inst) if (gpr.IsImm(s)) { gpr.SetImmediate(a, (u32)(s32)(s8)gpr.GetImm(s)); - if (inst.Rc) ComputeRC(gpr.GetImm(a), 0); + + if (inst.Rc) + ComputeRC(gpr.GetImm(a), 0); + return; } ARMReg rA = gpr.R(a); @@ -865,7 +914,9 @@ void JitArm::twx(UGeckoInstruction inst) MOV(RA, inst.TO); if (inst.OPCD == 3) // twi + { CMP(gpr.R(a), gpr.R(inst.RB)); + } else // tw { MOVI2R(RB, (s32)(s16)inst.SIMM_16); diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStore.cpp index 1e95d372dc..d0a022fb36 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStore.cpp @@ -68,7 +68,9 @@ void JitArm::SafeStoreFromReg(bool fastmem, s32 dest, u32 value, s32 regOffset, NOP(1); } else + { MOVI2R(R10, (u32)offset, false); + } if (dest != -1) ADD(R10, R10, RA); @@ -439,11 +441,11 @@ void JitArm::lXX(UGeckoInstruction inst) // LWZ idle skipping if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle && - inst.OPCD == 32 && - (inst.hex & 0xFFFF0000) == 0x800D0000 && - (Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 || - (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) && - Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8) + inst.OPCD == 32 && + (inst.hex & 0xFFFF0000) == 0x800D0000 && + (Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 || + (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) && + Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8) { ARMReg RD = gpr.R(d); diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp index 8d14362688..033af88bf8 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp @@ -89,11 +89,12 @@ void JitArm::lfXX(UGeckoInstruction inst) ADD(rB, rB, RA); } else + { ADD(rB, gpr.R(offsetReg), RA); + } } else { - if (zeroA) { if (offsetReg == -1) @@ -105,7 +106,9 @@ void JitArm::lfXX(UGeckoInstruction inst) ADD(rB, rB, RA); } else + { MOVI2R(rB, (u32)offset); + } } else { @@ -116,7 +119,9 @@ void JitArm::lfXX(UGeckoInstruction inst) ADD(rB, RB, RA); } else + { MOV(rB, RB); + } } } } @@ -248,11 +253,12 @@ void JitArm::stfXX(UGeckoInstruction inst) ADD(rB, rB, RA); } else + { ADD(rB, gpr.R(offsetReg), RA); + } } else { - if (zeroA) { if (offsetReg == -1) @@ -264,7 +270,9 @@ void JitArm::stfXX(UGeckoInstruction inst) ADD(rB, rB, RA); } else + { MOVI2R(rB, (u32)offset); + } } else { @@ -275,7 +283,9 @@ void JitArm::stfXX(UGeckoInstruction inst) ADD(rB, RB, RA); } else + { MOV(rB, RB); + } } } } @@ -320,7 +330,6 @@ void JitArm::stfXX(UGeckoInstruction inst) MOV(R1, rB); BL(rA); - } else { @@ -332,7 +341,6 @@ void JitArm::stfXX(UGeckoInstruction inst) VMOV(D0, v0); MOV(R0, rB); #endif - BL(rA); } POP(4, R0, R1, R2, R3); @@ -361,8 +369,9 @@ void JitArm::stfs(UGeckoInstruction inst) ADD(rB, rB, RA); } else + { MOVI2R(rB, (u32)inst.SIMM_16); - + } MOVI2R(rA, (u32)&Memory::Write_U32); PUSH(4, R0, R1, R2, R3); diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStorePaired.cpp index 754b97b19a..ea4aa72850 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStorePaired.cpp @@ -77,7 +77,9 @@ void JitArm::psq_lx(UGeckoInstruction inst) ADD(R10, gpr.R(inst.RB), gpr.R(inst.RA)); } else + { MOV(R10, gpr.R(inst.RB)); + } if (update) MOV(gpr.R(inst.RA), R10); @@ -128,7 +130,9 @@ void JitArm::psq_st(UGeckoInstruction inst) ADD(R10, gpr.R(inst.RA), R14); } else + { MOVI2R(R10, (u32)offset); + } if (update) MOV(gpr.R(inst.RA), R10); @@ -171,7 +175,9 @@ void JitArm::psq_stx(UGeckoInstruction inst) ADD(R10, gpr.R(inst.RA), gpr.R(inst.RB)); } else + { MOV(R10, gpr.R(inst.RB)); + } if (update) MOV(gpr.R(inst.RA), R10); diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_SystemRegisters.cpp index d17e61a88f..712355acc5 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_SystemRegisters.cpp @@ -98,12 +98,14 @@ void JitArm::mtspr(UGeckoInstruction inst) ARMReg RD = gpr.R(inst.RD); STR(RD, R9, PPCSTATE_OFF(spr) + iIndex * 4); } + void JitArm::mftb(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); mfspr(inst); } + void JitArm::mfspr(UGeckoInstruction inst) { INSTRUCTION_START diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_Tables.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_Tables.cpp index d7c1882f44..698cfba91a 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_Tables.cpp @@ -378,9 +378,12 @@ void CompileInstruction(PPCAnalyst::CodeOp & op) JitArm *jitarm = (JitArm *)jit; (jitarm->*dynaOpTable[op.inst.OPCD])(op.inst); GekkoOPInfo *info = op.opinfo; - if (info) { + + if (info) + { #ifdef OPLOG - if (!strcmp(info->opname, OP_TO_LOG)){ ///"mcrfs" + if (!strcmp(info->opname, OP_TO_LOG)) // "mcrfs" + { rsplocations.push_back(jit.js.compilerPC); } #endif diff --git a/Source/Core/Core/PowerPC/JitArm32/JitAsm.h b/Source/Core/Core/PowerPC/JitArm32/JitAsm.h index f9d8aefa00..41cd248336 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitAsm.h +++ b/Source/Core/Core/PowerPC/JitArm32/JitAsm.h @@ -14,13 +14,15 @@ private: void GenerateCommon(); public: - void Init() { + void Init() + { AllocCodeSpace(8192); Generate(); WriteProtect(); } - void Shutdown() { + void Shutdown() + { FreeCodeSpace(); } }; diff --git a/Source/Core/Core/PowerPC/JitArm32/JitFPRCache.cpp b/Source/Core/Core/PowerPC/JitArm32/JitFPRCache.cpp index 9f5d1bfc7d..fb533888e7 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitFPRCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitFPRCache.cpp @@ -83,6 +83,7 @@ ARMReg *ArmFPRCache::GetAllocationOrder(int &count) ARMReg ArmFPRCache::GetReg(bool AutoLock) { for (u8 a = 0; a < NUMARMREG; ++a) + { if (ArmRegs[a].free) { // Alright, this one is free @@ -90,6 +91,8 @@ ARMReg ArmFPRCache::GetReg(bool AutoLock) ArmRegs[a].free = false; return ArmRegs[a].Reg; } + } + // Uh Oh, we have all them locked.... _assert_msg_(_DYNA_REC_, false, "All available registers are locked dumb dumb"); return D31; @@ -109,9 +112,11 @@ u32 ArmFPRCache::GetLeastUsedRegister(bool increment) { u32 HighestUsed = 0; u8 lastRegIndex = 0; - for (u8 a = 0; a < NUMPPCREG; ++a){ + for (u8 a = 0; a < NUMPPCREG; ++a) + { if (increment) ++ArmCRegs[a].LastLoad; + if (ArmCRegs[a].LastLoad > HighestUsed) { HighestUsed = ArmCRegs[a].LastLoad; @@ -123,11 +128,13 @@ u32 ArmFPRCache::GetLeastUsedRegister(bool increment) bool ArmFPRCache::FindFreeRegister(u32 ®index) { for (u8 a = 0; a < NUMPPCREG; ++a) + { if (ArmCRegs[a].PPCReg == 33) { regindex = a; return true; } + } return false; } diff --git a/Source/Core/Core/PowerPC/JitArm32/JitRegCache.cpp b/Source/Core/Core/PowerPC/JitArm32/JitRegCache.cpp index ab51781adf..fcfb7fd874 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitRegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitRegCache.cpp @@ -28,6 +28,7 @@ void ArmRegCache::Init(ARMXEmitter *emitter) ArmRegs[a].free = true; } } + void ArmRegCache::Start(PPCAnalyst::BlockRegStats &stats) { // Make sure the state is wiped on Start @@ -71,6 +72,7 @@ ARMReg *ArmRegCache::GetAllocationOrder(int &count) ARMReg ArmRegCache::GetReg(bool AutoLock) { for (u8 a = 0; a < NUMARMREG; ++a) + { if (ArmRegs[a].free) { // Alright, this one is free @@ -78,6 +80,8 @@ ARMReg ArmRegCache::GetReg(bool AutoLock) ArmRegs[a].free = false; return ArmRegs[a].Reg; } + } + // Uh Oh, we have all them locked.... _assert_msg_(_DYNA_REC_, false, "All available registers are locked dumb dumb"); return R0; @@ -92,11 +96,18 @@ void ArmRegCache::Unlock(ARMReg R0, ARMReg R1, ARMReg R2, ARMReg R3) _assert_msg_(_DYNA_REC, !ArmRegs[RegNum].free, "This register is already unlocked"); ArmRegs[RegNum].free = true; } - if ( R1 != INVALID_REG && ArmRegs[RegNum].Reg == R1) ArmRegs[RegNum].free = true; - if ( R2 != INVALID_REG && ArmRegs[RegNum].Reg == R2) ArmRegs[RegNum].free = true; - if ( R3 != INVALID_REG && ArmRegs[RegNum].Reg == R3) ArmRegs[RegNum].free = true; + + if (R1 != INVALID_REG && ArmRegs[RegNum].Reg == R1) + ArmRegs[RegNum].free = true; + + if (R2 != INVALID_REG && ArmRegs[RegNum].Reg == R2) + ArmRegs[RegNum].free = true; + + if (R3 != INVALID_REG && ArmRegs[RegNum].Reg == R3) + ArmRegs[RegNum].free = true; } } + u32 ArmRegCache::GetLeastUsedRegister(bool increment) { u32 HighestUsed = 0; @@ -113,14 +124,17 @@ u32 ArmRegCache::GetLeastUsedRegister(bool increment) } return lastRegIndex; } + bool ArmRegCache::FindFreeRegister(u32 ®index) { for (u8 a = 0; a < NUMPPCREG; ++a) + { if (ArmCRegs[a].PPCReg == 33) { regindex = a; return true; } + } return false; } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp index 6e8a21ebfa..5b74980e0d 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp @@ -247,13 +247,16 @@ void CommonAsmRoutines::GenQuantizedSingleStores() SafeWriteF32ToReg(XMM0, ECX, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); RET(); /* - if (cpu_info.bSSSE3) { + if (cpu_info.bSSSE3) + { PSHUFB(XMM0, M((void *)pbswapShuffle2x4)); // TODO: SafeWriteFloat MOVSS(M(&psTemp[0]), XMM0); MOV(32, R(EAX), M(&psTemp[0])); SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); - } else { + } + else + { MOVSS(M(&psTemp[0]), XMM0); MOV(32, R(EAX), M(&psTemp[0])); SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); @@ -320,10 +323,13 @@ void CommonAsmRoutines::GenQuantizedLoads() UD2(); const u8* loadPairedFloatTwo = AlignCode4(); - if (cpu_info.bSSSE3) { + if (cpu_info.bSSSE3) + { MOVQ_xmm(XMM0, MComplex(RBX, RCX, 1, 0)); PSHUFB(XMM0, M((void *)pbswapShuffle2x4)); - } else { + } + else + { LoadAndSwap(64, RCX, MComplex(RBX, RCX, 1, 0)); ROL(64, R(RCX), Imm8(32)); MOVQ_xmm(XMM0, R(RCX)); @@ -331,11 +337,14 @@ void CommonAsmRoutines::GenQuantizedLoads() RET(); const u8* loadPairedFloatOne = AlignCode4(); - if (cpu_info.bSSSE3) { + if (cpu_info.bSSSE3) + { MOVD_xmm(XMM0, MComplex(RBX, RCX, 1, 0)); PSHUFB(XMM0, M((void *)pbswapShuffle1x4)); UNPCKLPS(XMM0, M((void*)m_one)); - } else { + } + else + { LoadAndSwap(32, RCX, MComplex(RBX, RCX, 1, 0)); MOVD_xmm(XMM0, R(RCX)); UNPCKLPS(XMM0, M((void*)m_one)); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp index dde1e16d1f..71bcbf252b 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp @@ -21,7 +21,8 @@ using namespace Gen; extern u8 *trampolineCodePtr; -static void BackPatchError(const std::string &text, u8 *codePtr, u32 emAddress) { +static void BackPatchError(const std::string &text, u8 *codePtr, u32 emAddress) +{ u64 code_addr = (u64)codePtr; disassembler disasm; char disbuf[256]; @@ -61,9 +62,10 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re if (addrReg != ABI_PARAM1) MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg)); - if (info.displacement) { + + if (info.displacement) ADD(32, R(ABI_PARAM1), Imm32(info.displacement)); - } + ABI_PushRegistersAndAdjustStack(registersInUse, true); switch (info.operandSize) { diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index dbf7b69ff6..2b927ba0d9 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -135,9 +135,9 @@ using namespace Gen; { // check if any endpoint is inside the other range if ((s1 >= s2 && s1 <= e2) || - (e1 >= s2 && e1 <= e2) || - (s2 >= s1 && s2 <= e1) || - (e2 >= s1 && e2 <= e1)) + (e1 >= s2 && e1 <= e2) || + (s2 >= s1 && s2 <= e1) || + (e2 >= s1 && e2 <= e1)) return true; else return false; @@ -360,11 +360,13 @@ using namespace Gen; } } } + void JitBlockCache::WriteLinkBlock(u8* location, const u8* address) { XEmitter emit(location); emit.JMP(address, true); } + void JitBlockCache::WriteDestroyBlock(const u8* location, u32 address) { XEmitter emit((u8 *)location); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index 2b0db88c64..0d711dcfa2 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -42,7 +42,8 @@ struct JitBlock bool invalid; - struct LinkData { + struct LinkData + { u8 *exitPtrs; // to be able to rewrite the exit jum u32 exitAddress; bool linkStatus; // is it already linked? @@ -81,18 +82,22 @@ public: m_valid_block.reset(new u32[VALID_BLOCK_ALLOC_ELEMENTS]); ClearAll(); } + void Set(u32 bit) { m_valid_block[bit / 32] |= 1u << (bit % 32); } + void Clear(u32 bit) { m_valid_block[bit / 32] &= ~(1u << (bit % 32)); } + void ClearAll() { memset(m_valid_block.get(), 0, sizeof(u32) * VALID_BLOCK_ALLOC_ELEMENTS); } + bool Test(u32 bit) { return (m_valid_block[bit / 32] & (1u << (bit % 32))) != 0; @@ -125,7 +130,10 @@ class JitBaseBlockCache public: JitBaseBlockCache() : blockCodePointers(nullptr), blocks(nullptr), num_blocks(0), - iCache(nullptr), iCacheEx(nullptr), iCacheVMEM(nullptr) {} + iCache(nullptr), iCacheEx(nullptr), iCacheVMEM(nullptr) + { + } + int AllocateBlock(u32 em_address); void FinalizeBlock(int block_num, bool block_link, const u8 *code_ptr); diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index 5a71aa2be5..2734e02715 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -77,7 +77,8 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, Gen::OpArg opAddress, int ac // offsets with the wrong sign, so whatever. Since the original code // *could* try to wrap an address around, however, this is the correct // place to address the issue.) - if ((u32) offset >= 0x1000) { + if ((u32) offset >= 0x1000) + { LEA(32, reg_value, MDisp(opAddress.GetSimpleReg(), offset)); opAddress = R(reg_value); offset = 0; @@ -186,7 +187,9 @@ private: // then mask, then sign extend if needed (1 instr vs. 2/3). u32 all_ones = (1ULL << sbits) - 1; if ((all_ones & mask) == all_ones) + { MoveOpArgToReg(sbits, MDisp(EAX, 0)); + } else { m_code->MOVZX(32, sbits, m_dst_reg, MDisp(EAX, 0)); @@ -342,10 +345,18 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, ABI_PushRegistersAndAdjustStack(registersInUse, false); switch (accessSize) { - case 64: ABI_CallFunctionA((void *)&Memory::Read_U64, addr_loc); break; - case 32: ABI_CallFunctionA((void *)&Memory::Read_U32, addr_loc); break; - case 16: ABI_CallFunctionA((void *)&Memory::Read_U16_ZX, addr_loc); break; - case 8: ABI_CallFunctionA((void *)&Memory::Read_U8_ZX, addr_loc); break; + case 64: + ABI_CallFunctionA((void *)&Memory::Read_U64, addr_loc); + break; + case 32: + ABI_CallFunctionA((void *)&Memory::Read_U32, addr_loc); + break; + case 16: + ABI_CallFunctionA((void *)&Memory::Read_U16_ZX, addr_loc); + break; + case 8: + ABI_CallFunctionA((void *)&Memory::Read_U8_ZX, addr_loc); + break; } ABI_PopRegistersAndAdjustStack(registersInUse, false); @@ -373,11 +384,12 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap) { - u8 *result; - if (accessSize == 8 && reg_value >= 4) { + if (accessSize == 8 && reg_value >= 4) + { PanicAlert("WARNING: likely incorrect use of UnsafeWriteRegToReg!"); } - result = GetWritableCodePtr(); + + u8* result = GetWritableCodePtr(); OpArg dest = MComplex(RBX, reg_addr, SCALE_1, offset); if (swap) { @@ -396,6 +408,7 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acc { MOV(accessSize, dest, R(reg_value)); } + return result; } @@ -450,10 +463,18 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce ABI_PushRegistersAndAdjustStack(registersInUse, noProlog); switch (accessSize) { - case 64: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U64) : ((void *)&Memory::Write_U64_Swap), reg_value, reg_addr, false); break; - case 32: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), reg_value, reg_addr, false); break; - case 16: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), reg_value, reg_addr, false); break; - case 8: ABI_CallFunctionRR((void *)&Memory::Write_U8, reg_value, reg_addr, false); break; + case 64: + ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U64) : ((void *)&Memory::Write_U64_Swap), reg_value, reg_addr, false); + break; + case 32: + ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), reg_value, reg_addr, false); + break; + case 16: + ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), reg_value, reg_addr, false); + break; + case 8: + ABI_CallFunctionRR((void *)&Memory::Write_U8, reg_value, reg_addr, false); + break; } ABI_PopRegistersAndAdjustStack(registersInUse, noProlog); FixupBranch exit = J(); @@ -478,7 +499,8 @@ void EmuCodeBlock::WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 a MOV(accessSize, MDisp(RBX, address & 0x3FFFFFFF), R(arg)); } -void EmuCodeBlock::ForceSinglePrecisionS(X64Reg xmm) { +void EmuCodeBlock::ForceSinglePrecisionS(X64Reg xmm) +{ // Most games don't need these. Zelda requires it though - some platforms get stuck without them. if (jit->jo.accurateSinglePrecision) { @@ -487,7 +509,8 @@ void EmuCodeBlock::ForceSinglePrecisionS(X64Reg xmm) { } } -void EmuCodeBlock::ForceSinglePrecisionP(X64Reg xmm) { +void EmuCodeBlock::ForceSinglePrecisionP(X64Reg xmm) +{ // Most games don't need these. Zelda requires it though - some platforms get stuck without them. if (jit->jo.accurateSinglePrecision) { @@ -600,10 +623,13 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src) MOVSD(XMM1, R(src)); FLD(64, M(&temp64)); CCFlags cond; - if (cpu_info.bSSE4_1) { + if (cpu_info.bSSE4_1) + { PTEST(XMM1, M((void *)&double_exponent)); cond = CC_NC; - } else { + } + else + { // emulate PTEST; checking FPU flags is incorrect because the NaN bits // are sticky (persist between instructions) MOVSD(XMM0, M((void *)&double_exponent)); @@ -619,9 +645,12 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src) PANDN(XMM1, M((void *)&double_qnan_bit)); PSRLQ(XMM1, 29); - if (cpu_info.bAVX) { + if (cpu_info.bAVX) + { VPANDN(XMM0, XMM1, R(XMM0)); - } else { + } + else + { PANDN(XMM1, R(XMM0)); MOVSS(XMM0, R(XMM1)); } @@ -633,19 +662,26 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src) void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr) { - if (src_is_gpr) { + if (src_is_gpr) + { MOV(32, M(&temp32), R(src)); MOVD_xmm(XMM1, R(src)); - } else { + } + else + { MOVSS(M(&temp32), src); MOVSS(R(XMM1), src); } + FLD(32, M(&temp32)); CCFlags cond; - if (cpu_info.bSSE4_1) { + if (cpu_info.bSSE4_1) + { PTEST(XMM1, M((void *)&single_exponent)); cond = CC_NC; - } else { + } + else + { // emulate PTEST; checking FPU flags is incorrect because the NaN bits // are sticky (persist between instructions) MOVSS(XMM0, M((void *)&single_exponent)); @@ -661,9 +697,12 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr PANDN(XMM1, M((void *)&single_qnan_bit)); PSLLQ(XMM1, 29); - if (cpu_info.bAVX) { + if (cpu_info.bAVX) + { VPANDN(dst, XMM1, R(dst)); - } else { + } + else + { PANDN(XMM1, R(dst)); MOVSD(dst, R(XMM1)); } diff --git a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp b/Source/Core/Core/PowerPC/JitILCommon/IR.cpp index 8bb10142d1..f078a4cac9 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/IR.cpp @@ -136,71 +136,87 @@ TODO (in no particular order): #include "Core/PowerPC/JitILCommon/IR.h" using namespace Gen; -namespace IREmitter { +namespace IREmitter +{ -InstLoc IRBuilder::EmitZeroOp(unsigned Opcode, unsigned extra = 0) { +InstLoc IRBuilder::EmitZeroOp(unsigned Opcode, unsigned extra = 0) +{ InstLoc curIndex = InstList.data() + InstList.size(); InstList.push_back(Opcode | (extra << 8)); MarkUsed.push_back(false); return curIndex; } -InstLoc IRBuilder::EmitUOp(unsigned Opcode, InstLoc Op1, unsigned extra) { +InstLoc IRBuilder::EmitUOp(unsigned Opcode, InstLoc Op1, unsigned extra) +{ InstLoc curIndex = InstList.data() + InstList.size(); unsigned backOp1 = (s32)(curIndex - 1 - Op1); - if (backOp1 >= 256) { + if (backOp1 >= 256) + { InstList.push_back(Tramp | backOp1 << 8); MarkUsed.push_back(false); backOp1 = 0; curIndex++; } + InstList.push_back(Opcode | (backOp1 << 8) | (extra << 16)); MarkUsed.push_back(false); return curIndex; } -InstLoc IRBuilder::EmitBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2, unsigned extra) { +InstLoc IRBuilder::EmitBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2, unsigned extra) +{ InstLoc curIndex = InstList.data() + InstList.size(); unsigned backOp1 = (s32)(curIndex - 1 - Op1); - if (backOp1 >= 255) { + if (backOp1 >= 255) + { InstList.push_back(Tramp | backOp1 << 8); MarkUsed.push_back(false); backOp1 = 0; curIndex++; } + unsigned backOp2 = (s32)(curIndex - 1 - Op2); - if (backOp2 >= 256) { + if (backOp2 >= 256) + { InstList.push_back(Tramp | backOp2 << 8); MarkUsed.push_back(false); backOp2 = 0; backOp1++; curIndex++; } + InstList.push_back(Opcode | (backOp1 << 8) | (backOp2 << 16) | (extra << 24)); MarkUsed.push_back(false); return curIndex; } #if 0 -InstLoc IRBuilder::EmitTriOp(unsigned Opcode, InstLoc Op1, InstLoc Op2, InstLoc Op3) { +InstLoc IRBuilder::EmitTriOp(unsigned Opcode, InstLoc Op1, InstLoc Op2, InstLoc Op3) +{ InstLoc curIndex = InstList.data() + InstList.size(); unsigned backOp1 = curIndex - 1 - Op1; - if (backOp1 >= 254) { + if (backOp1 >= 254) + { InstList.push_back(Tramp | backOp1 << 8); MarkUsed.push_back(false); backOp1 = 0; curIndex++; } + unsigned backOp2 = curIndex - 1 - Op2; - if (backOp2 >= 255) { + if (backOp2 >= 255) + { InstList.push_back((Tramp | backOp2 << 8)); MarkUsed.push_back(false); backOp2 = 0; backOp1++; curIndex++; } + unsigned backOp3 = curIndex - 1 - Op3; - if (backOp3 >= 256) { + if (backOp3 >= 256) + { InstList.push_back(Tramp | (backOp3 << 8)); MarkUsed.push_back(false); backOp3 = 0; @@ -208,14 +224,17 @@ InstLoc IRBuilder::EmitTriOp(unsigned Opcode, InstLoc Op1, InstLoc Op2, InstLoc backOp1++; curIndex++; } + InstList.push_back(Opcode | (backOp1 << 8) | (backOp2 << 16) | (backOp3 << 24)); MarkUsed.push_back(false); return curIndex; } #endif -unsigned IRBuilder::ComputeKnownZeroBits(InstLoc I) const { - switch (getOpcode(*I)) { +unsigned IRBuilder::ComputeKnownZeroBits(InstLoc I) const +{ + switch (getOpcode(*I)) + { case Load8: return 0xFFFFFF00; case Or: @@ -225,21 +244,24 @@ unsigned IRBuilder::ComputeKnownZeroBits(InstLoc I) const { return ComputeKnownZeroBits(getOp1(I)) | ComputeKnownZeroBits(getOp2(I)); case Shl: - if (isImm(*getOp2(I))) { + if (isImm(*getOp2(I))) + { unsigned samt = GetImmValue(getOp2(I)) & 31; return (ComputeKnownZeroBits(getOp1(I)) << samt) | ~(-1U << samt); } return 0; case Shrl: - if (isImm(*getOp2(I))) { + if (isImm(*getOp2(I))) + { unsigned samt = GetImmValue(getOp2(I)) & 31; return (ComputeKnownZeroBits(getOp1(I)) >> samt) | ~(-1U >> samt); } return 0; case Rol: - if (isImm(*getOp2(I))) { + if (isImm(*getOp2(I))) + { return _rotl(ComputeKnownZeroBits(getOp1(I)), GetImmValue(getOp2(I))); } @@ -248,37 +270,44 @@ unsigned IRBuilder::ComputeKnownZeroBits(InstLoc I) const { } } -InstLoc IRBuilder::FoldZeroOp(unsigned Opcode, unsigned extra) { - if (Opcode == LoadGReg) { +InstLoc IRBuilder::FoldZeroOp(unsigned Opcode, unsigned extra) +{ + if (Opcode == LoadGReg) + { // Reg load folding: if we already loaded the value, // load it again if (!GRegCache[extra]) GRegCache[extra] = EmitZeroOp(LoadGReg, extra); return GRegCache[extra]; } - if (Opcode == LoadFReg) { + else if (Opcode == LoadFReg) + { // Reg load folding: if we already loaded the value, // load it again if (!FRegCache[extra]) FRegCache[extra] = EmitZeroOp(LoadFReg, extra); return FRegCache[extra]; } - if (Opcode == LoadFRegDENToZero) { + else if (Opcode == LoadFRegDENToZero) + { FRegCacheStore[extra] = nullptr; // prevent previous store operation from zapping FRegCache[extra] = EmitZeroOp(LoadFRegDENToZero, extra); return FRegCache[extra]; } - if (Opcode == LoadCarry) { + else if (Opcode == LoadCarry) + { if (!CarryCache) CarryCache = EmitZeroOp(LoadCarry, extra); return CarryCache; } - if (Opcode == LoadCR) { + else if (Opcode == LoadCR) + { if (!CRCache[extra]) CRCache[extra] = EmitZeroOp(LoadCR, extra); return CRCache[extra]; } - if (Opcode == LoadCTR) { + else if (Opcode == LoadCTR) + { if (!CTRCache) CTRCache = EmitZeroOp(LoadCTR, extra); return CTRCache; @@ -287,76 +316,88 @@ InstLoc IRBuilder::FoldZeroOp(unsigned Opcode, unsigned extra) { return EmitZeroOp(Opcode, extra); } -InstLoc IRBuilder::FoldUOp(unsigned Opcode, InstLoc Op1, unsigned extra) { - if (Opcode == StoreGReg) { +InstLoc IRBuilder::FoldUOp(unsigned Opcode, InstLoc Op1, unsigned extra) +{ + if (Opcode == StoreGReg) + { // Reg store folding: save the value for load folding. // If there's a previous store, zap it because it's dead. GRegCache[extra] = Op1; - if (GRegCacheStore[extra]) { + if (GRegCacheStore[extra]) *GRegCacheStore[extra] = 0; - } + GRegCacheStore[extra] = EmitUOp(StoreGReg, Op1, extra); return GRegCacheStore[extra]; } - if (Opcode == StoreFReg) { + else if (Opcode == StoreFReg) + { FRegCache[extra] = Op1; - if (FRegCacheStore[extra]) { + if (FRegCacheStore[extra]) *FRegCacheStore[extra] = 0; - } + FRegCacheStore[extra] = EmitUOp(StoreFReg, Op1, extra); return FRegCacheStore[extra]; } - if (Opcode == StoreCarry) { + else if (Opcode == StoreCarry) + { CarryCache = Op1; - if (CarryCacheStore) { + if (CarryCacheStore) *CarryCacheStore = 0; - } + CarryCacheStore = EmitUOp(StoreCarry, Op1, extra); return CarryCacheStore; } - if (Opcode == StoreCR) { + else if (Opcode == StoreCR) + { CRCache[extra] = Op1; - if (CRCacheStore[extra]) { + if (CRCacheStore[extra]) *CRCacheStore[extra] = 0; - } + CRCacheStore[extra] = EmitUOp(StoreCR, Op1, extra); return CRCacheStore[extra]; } - if (Opcode == StoreCTR) { + else if (Opcode == StoreCTR) + { CTRCache = Op1; - if (CTRCacheStore) { + if (CTRCacheStore) *CTRCacheStore = 0; - } + CTRCacheStore = EmitUOp(StoreCTR, Op1, extra); return CTRCacheStore; } - if (Opcode == CompactMRegToPacked) { + else if (Opcode == CompactMRegToPacked) + { if (getOpcode(*Op1) == ExpandPackedToMReg) return getOp1(Op1); } - if (Opcode == DoubleToSingle) { + else if (Opcode == DoubleToSingle) + { if (getOpcode(*Op1) == DupSingleToMReg) return getOp1(Op1); - if (getOpcode(*Op1) >= FDMul && getOpcode(*Op1) <= FDSub) { + + if (getOpcode(*Op1) >= FDMul && getOpcode(*Op1) <= FDSub) + { InstLoc OOp1 = getOp1(Op1), OOp2 = getOp2(Op1); if (getOpcode(*OOp1) == DupSingleToMReg && - getOpcode(*OOp2) == DupSingleToMReg) { - if (getOpcode(*Op1) == FDMul) { + getOpcode(*OOp2) == DupSingleToMReg) + { + if (getOpcode(*Op1) == FDMul) return FoldBiOp(FSMul, getOp1(OOp1), getOp2(OOp2)); - } else if (getOpcode(*Op1) == FDAdd) { + else if (getOpcode(*Op1) == FDAdd) return FoldBiOp(FSAdd, getOp1(OOp1), getOp2(OOp2)); - } else if (getOpcode(*Op1) == FDSub) { + else if (getOpcode(*Op1) == FDSub) return FoldBiOp(FSSub, getOp1(OOp1), getOp2(OOp2)); - } } } } - if (Opcode == Not) { - if (getOpcode(*Op1) == Not) { + else if (Opcode == Not) + { + if (getOpcode(*Op1) == Not) + { return getOp1(Op1); } } - if (Opcode == FastCRGTSet) + else if (Opcode == FastCRGTSet) { if (getOpcode(*Op1) == ICmpCRSigned) return EmitICmpSgt(getOp1(Op1), getOp2(Op1)); @@ -365,7 +406,7 @@ InstLoc IRBuilder::FoldUOp(unsigned Opcode, InstLoc Op1, unsigned extra) { if (isImm(*Op1)) return EmitIntConst((s64)GetImmValue64(Op1) > 0); } - if (Opcode == FastCRLTSet) + else if (Opcode == FastCRLTSet) { if (getOpcode(*Op1) == ICmpCRSigned) return EmitICmpSlt(getOp1(Op1), getOp2(Op1)); @@ -374,7 +415,7 @@ InstLoc IRBuilder::FoldUOp(unsigned Opcode, InstLoc Op1, unsigned extra) { if (isImm(*Op1)) return EmitIntConst(!!(GetImmValue64(Op1) & (1ull << 62))); } - if (Opcode == FastCREQSet) + else if (Opcode == FastCREQSet) { if (getOpcode(*Op1) == ICmpCRSigned || getOpcode(*Op1) == ICmpCRUnsigned) return EmitICmpEq(getOp1(Op1), getOp2(Op1)); @@ -386,33 +427,40 @@ InstLoc IRBuilder::FoldUOp(unsigned Opcode, InstLoc Op1, unsigned extra) { } // Fold Add opcode. Some rules are ported from LLVM -InstLoc IRBuilder::FoldAdd(InstLoc Op1, InstLoc Op2) { +InstLoc IRBuilder::FoldAdd(InstLoc Op1, InstLoc Op2) +{ simplifyCommutative(Add, Op1, Op2); // i0 + i1 => (i0 + i1) - if (isImm(*Op1) && isImm(*Op2)) { + if (isImm(*Op1) && isImm(*Op2)) + { return EmitIntConst(GetImmValue(Op1) + GetImmValue(Op2)); } // x + 0 => x - if (isImm(*Op2) && GetImmValue(Op2) == 0) { + if (isImm(*Op2) && GetImmValue(Op2) == 0) + { return Op1; } // x + (y - x) --> y - if (getOpcode(*Op2) == Sub && isSameValue(Op1, getOp2(Op2))) { + if (getOpcode(*Op2) == Sub && isSameValue(Op1, getOp2(Op2))) + { return getOp1(Op2); } // (x - y) + y => x - if (getOpcode(*Op1) == Sub && isSameValue(getOp2(Op1), Op2)) { + if (getOpcode(*Op1) == Sub && isSameValue(getOp2(Op1), Op2)) + { return getOp1(Op1); } - if (InstLoc negOp1 = isNeg(Op1)) { + if (InstLoc negOp1 = isNeg(Op1)) + { //// TODO: Test the folding below //// -A + -B --> -(A + B) - //if (InstLoc negOp2 = isNeg(Op2)) { + //if (InstLoc negOp2 = isNeg(Op2)) + //{ // return FoldSub(EmitIntConst(0), FoldAdd(negOp1, negOp2)); //} @@ -421,45 +469,57 @@ InstLoc IRBuilder::FoldAdd(InstLoc Op1, InstLoc Op2) { } // A + -B --> A - B - if (InstLoc negOp2 = isNeg(Op2)) { + if (InstLoc negOp2 = isNeg(Op2)) + { return FoldSub(Op1, negOp2); } // (x * i0) + x => x * (i0 + 1) - if (getOpcode(*Op1) == Mul && isImm(*getOp2(Op1)) && isSameValue(getOp1(Op1), Op2)) { + if (getOpcode(*Op1) == Mul && isImm(*getOp2(Op1)) && isSameValue(getOp1(Op1), Op2)) + { return FoldMul(getOp1(Op1), EmitIntConst(GetImmValue(getOp2(Op1)) + 1)); } //// TODO: Test the folding below //// (x * i0) + (x * i1) => x * (i0 + i1) - //if (getOpcode(*Op1) == Mul && getOpcode(*Op2) == Mul && isSameValue(getOp1(Op1), getOp1(Op2)) && isImm(*getOp2(Op1)) && isImm(*getOp2(Op2))) { + //if (getOpcode(*Op1) == Mul && getOpcode(*Op2) == Mul && isSameValue(getOp1(Op1), getOp1(Op2)) && isImm(*getOp2(Op1)) && isImm(*getOp2(Op2))) + //{ // return FoldMul(getOp1(Op1), EmitIntConst(GetImmValue(getOp2(Op1)) + GetImmValue(getOp2(Op2)))); //} // x + x * i0 => x * (i0 + 1) - if (getOpcode(*Op2) == Mul && isImm(*getOp2(Op2)) && isSameValue(Op1, getOp1(Op2))) { + if (getOpcode(*Op2) == Mul && isImm(*getOp2(Op2)) && isSameValue(Op1, getOp1(Op2))) + { return FoldMul(Op1, EmitIntConst(GetImmValue(getOp2(Op2)) + 1)); } // w * x + y * z => w * (x + z) iff w == y - if (getOpcode(*Op1) == Mul && getOpcode(*Op2) == Mul) { + if (getOpcode(*Op1) == Mul && getOpcode(*Op2) == Mul) + { InstLoc w = getOp1(Op1); InstLoc x = getOp2(Op1); InstLoc y = getOp1(Op2); InstLoc z = getOp2(Op2); - if (!isSameValue(w, y)) { - if (isSameValue(w, z)) { + if (!isSameValue(w, y)) + { + if (isSameValue(w, z)) + { std::swap(y, z); - } else if (isSameValue(y, x)) { + } + else if (isSameValue(y, x)) + { std::swap(w, x); - } else if (isSameValue(x, z)) { + } + else if (isSameValue(x, z)) + { std::swap(y, z); std::swap(w, x); } } - if (isSameValue(w, y)) { + if (isSameValue(w, y)) + { return FoldMul(w, FoldAdd(x, z)); } } @@ -468,108 +528,135 @@ InstLoc IRBuilder::FoldAdd(InstLoc Op1, InstLoc Op2) { } // Fold Sub opcode. Some rules are ported from LLVM -InstLoc IRBuilder::FoldSub(InstLoc Op1, InstLoc Op2) { +InstLoc IRBuilder::FoldSub(InstLoc Op1, InstLoc Op2) +{ // (x - x) => 0 - if (isSameValue(Op1, Op2)) { + if (isSameValue(Op1, Op2)) + { return EmitIntConst(0); } // x - (-A) => x + A - if (InstLoc negOp2 = isNeg(Op2)) { + if (InstLoc negOp2 = isNeg(Op2)) + { return FoldAdd(Op1, negOp2); } // (x - i0) => x + -i0 - if (isImm(*Op2)) { + if (isImm(*Op2)) + { return FoldAdd(Op1, EmitIntConst(-GetImmValue(Op2))); } - if (getOpcode(*Op2) == Add) { + if (getOpcode(*Op2) == Add) + { // x - (x + y) => -y - if (isSameValue(Op1, getOp1(Op2))) { + if (isSameValue(Op1, getOp1(Op2))) + { return FoldSub(EmitIntConst(0), getOp2(Op2)); } // x - (y + x) => -y - if (isSameValue(Op1, getOp2(Op2))) { + if (isSameValue(Op1, getOp2(Op2))) + { return FoldSub(EmitIntConst(0), getOp1(Op2)); } // i0 - (x + i1) => (i0 - i1) - x - if (isImm(*Op1) && isImm(*getOp2(Op2))) { + if (isImm(*Op1) && isImm(*getOp2(Op2))) + { return FoldSub(EmitIntConst(GetImmValue(Op1) - GetImmValue(getOp2(Op2))), getOp1(Op2)); } } //// TODO: Test the folding below //// 0 - (C << X) -> (-C << X) - //if (isImm(*Op1) && GetImmValue(Op1) == 0 && getOpcode(*Op2) == Shl && isImm(*getOp1(Op2))) { + //if (isImm(*Op1) && GetImmValue(Op1) == 0 && getOpcode(*Op2) == Shl && isImm(*getOp1(Op2))) + //{ // return FoldShl(EmitIntConst(-GetImmValue(getOp1(Op2))), getOp2(Op2)); //} //// TODO: Test the folding below //// x - x * i0 = x * (1 - i0) - //if (getOpcode(*Op2) == Mul && isImm(*getOp2(Op2)) && isSameValue(Op1, getOp1(Op2))) { + //if (getOpcode(*Op2) == Mul && isImm(*getOp2(Op2)) && isSameValue(Op1, getOp1(Op2))) + //{ // return FoldMul(Op1, EmitIntConst(1 - GetImmValue(getOp2(Op2)))); //} - if (getOpcode(*Op1) == Add) { + if (getOpcode(*Op1) == Add) + { // (x + y) - x => y - if (isSameValue(getOp1(Op1), Op2)) { + if (isSameValue(getOp1(Op1), Op2)) + { return getOp2(Op1); } // (x + y) - y => x - if (isSameValue(getOp2(Op1), Op2)) { + if (isSameValue(getOp2(Op1), Op2)) + { return getOp1(Op1); } } - //if (getOpcode(*Op1) == Sub) { + //if (getOpcode(*Op1) == Sub) + //{ // // TODO: Test the folding below // // (x - y) - x => -y - // if (isSameValue(getOp1(Op1), Op2)) { + // if (isSameValue(getOp1(Op1), Op2)) + // { // return FoldSub(EmitIntConst(0), getOp2(Op1)); // } //} - if (getOpcode(*Op1) == Mul) { + if (getOpcode(*Op1) == Mul) + { // x * i0 - x => x * (i0 - 1) - if (isImm(*getOp2(Op1)) && isSameValue(getOp1(Op1), Op2)) { + if (isImm(*getOp2(Op1)) && isSameValue(getOp1(Op1), Op2)) + { return FoldMul(getOp1(Op1), EmitIntConst(GetImmValue(getOp2(Op1)) - 1)); } //// TODO: Test the folding below //// x * i0 - x * i1 => x * (i0 - i1) - //if (getOpcode(*Op2) == Mul && isSameValue(getOp1(Op1), getOp1(Op2)) && isImm(*getOp2(Op1)) && isImm(*getOp2(Op2))) { + //if (getOpcode(*Op2) == Mul && isSameValue(getOp1(Op1), getOp1(Op2)) && isImm(*getOp2(Op1)) && isImm(*getOp2(Op2))) + //{ // return FoldMul(getOp1(Op1), EmitIntConst(GetImmValue(getOp2(Op1)) + GetImmValue(getOp2(Op2)))); //} } // (x + i0) - (y + i1) => (x - y) + (i0 - i1) - if (getOpcode(*Op1) == Add && getOpcode(*Op2) == Add && isImm(*getOp2(Op1)) && isImm(*getOp2(Op2))) { + if (getOpcode(*Op1) == Add && getOpcode(*Op2) == Add && isImm(*getOp2(Op1)) && isImm(*getOp2(Op2))) + { return FoldAdd(FoldSub(getOp1(Op1), getOp1(Op2)), EmitIntConst(GetImmValue(getOp2(Op1)) - GetImmValue(getOp2(Op2)))); } // w * x - y * z => w * (x - z) iff w == y - if (getOpcode(*Op1) == Mul && getOpcode(*Op2) == Mul) { + if (getOpcode(*Op1) == Mul && getOpcode(*Op2) == Mul) + { InstLoc w = getOp1(Op1); InstLoc x = getOp2(Op1); InstLoc y = getOp1(Op2); InstLoc z = getOp2(Op2); - if (!isSameValue(w, y)) { - if (isSameValue(w, z)) { + if (!isSameValue(w, y)) + { + if (isSameValue(w, z)) + { std::swap(y, z); - } else if (isSameValue(y, x)) { + } + else if (isSameValue(y, x)) + { std::swap(w, x); - } else if (isSameValue(x, z)) { + } + else if (isSameValue(x, z)) + { std::swap(y, z); std::swap(w, x); } } - if (isSameValue(w, y)) { + if (isSameValue(w, y)) + { return FoldMul(w, FoldSub(x, z)); } } @@ -578,36 +665,44 @@ InstLoc IRBuilder::FoldSub(InstLoc Op1, InstLoc Op2) { } // Fold Mul opcode. Some rules are ported from LLVM -InstLoc IRBuilder::FoldMul(InstLoc Op1, InstLoc Op2) { +InstLoc IRBuilder::FoldMul(InstLoc Op1, InstLoc Op2) +{ simplifyCommutative(Mul, Op1, Op2); // i0 * i1 => (i0 * i1) - if (isImm(*Op1) && isImm(*Op2)) { + if (isImm(*Op1) && isImm(*Op2)) + { return EmitIntConst(GetImmValue(Op1) * GetImmValue(Op2)); } // (x << i0) * i1 => x * (i1 << i0) - if (getOpcode(*Op1) == Shl && isImm(*getOp2(Op1)) && isImm(*Op2)) { + if (getOpcode(*Op1) == Shl && isImm(*getOp2(Op1)) && isImm(*Op2)) + { return FoldMul(getOp1(Op1), EmitIntConst(GetImmValue(Op2) << GetImmValue(getOp2(Op1)))); } - if (isImm(*Op2)) { + if (isImm(*Op2)) + { const unsigned imm = GetImmValue(Op2); // x * 0 => 0 - if (imm == 0) { + if (imm == 0) + { return EmitIntConst(0); } // x * -1 => 0 - x - if (imm == -1U) { + if (imm == -1U) + { return FoldSub(EmitIntConst(0), Op1); } - for (unsigned i0 = 0; i0 < 30; ++i0) { + for (unsigned i0 = 0; i0 < 30; ++i0) + { // x * (1 << i0) => x << i0 // One "shl" is faster than one "imul". - if (imm == (1U << i0)) { + if (imm == (1U << i0)) + { return FoldShl(Op1, EmitIntConst(i0)); } } @@ -615,65 +710,79 @@ InstLoc IRBuilder::FoldMul(InstLoc Op1, InstLoc Op2) { // (x + i0) * i1 => x * i1 + i0 * i1 // The later format can be folded by other rules, again. - if (getOpcode(*Op1) == Add && isImm(*getOp2(Op1)) && isImm(*Op2)) { + if (getOpcode(*Op1) == Add && isImm(*getOp2(Op1)) && isImm(*Op2)) + { return FoldAdd(FoldMul(getOp1(Op1), Op2), EmitIntConst(GetImmValue(getOp2(Op1)) * GetImmValue(Op2))); } //// TODO: Test the folding below //// -X * -Y => X * Y - //if (InstLoc negOp1 = isNeg(Op1)) { - // if (InstLoc negOp2 = isNeg(Op2)) { + //if (InstLoc negOp1 = isNeg(Op1)) + //{ + // if (InstLoc negOp2 = isNeg(Op2)) + // { // return FoldMul(negOp1, negOp2); // } //} //// TODO: Test the folding below //// x * (1 << y) => x << y - //if (getOpcode(*Op2) == Shl && isImm(*getOp1(Op2)) && GetImmValue(getOp1(Op2)) == 1) { + //if (getOpcode(*Op2) == Shl && isImm(*getOp1(Op2)) && GetImmValue(getOp1(Op2)) == 1) + //{ // return FoldShl(Op1, getOp2(Op2)); //} //// TODO: Test the folding below //// (1 << y) * x => x << y - //if (getOpcode(*Op1) == Shl && isImm(*getOp1(Op1)) && GetImmValue(getOp1(Op1)) == 1) { + //if (getOpcode(*Op1) == Shl && isImm(*getOp1(Op1)) && GetImmValue(getOp1(Op1)) == 1) + //{ // return FoldShl(Op2, getOp2(Op1)); //} // x * y (where y is 0 or 1) => (0 - y) & x - if (ComputeKnownZeroBits(Op2) == -2U) { + if (ComputeKnownZeroBits(Op2) == -2U) + { return FoldAnd(FoldSub(EmitIntConst(0), Op2), Op1); } // x * y (where y is 0 or 1) => (0 - x) & y - if (ComputeKnownZeroBits(Op1) == -2U) { + if (ComputeKnownZeroBits(Op1) == -2U) + { return FoldAnd(FoldSub(EmitIntConst(0), Op1), Op2); } return EmitBiOp(Mul, Op1, Op2); } -InstLoc IRBuilder::FoldMulHighUnsigned(InstLoc Op1, InstLoc Op2) { +InstLoc IRBuilder::FoldMulHighUnsigned(InstLoc Op1, InstLoc Op2) +{ // (i0 * i1) >> 32 - if (isImm(*Op1) && isImm(*Op2)) { + if (isImm(*Op1) && isImm(*Op2)) + { return EmitIntConst((u32)(((u64)GetImmValue(Op1) * (u64)GetImmValue(Op2)) >> 32)); } - if (isImm(*Op1) && !isImm(*Op2)) { + if (isImm(*Op1) && !isImm(*Op2)) + { return FoldMulHighUnsigned(Op2, Op1); } - if (isImm(*Op2)) { + if (isImm(*Op2)) + { const unsigned imm = GetImmValue(Op2); // (x * 0) >> 32 => 0 - if (imm == 0) { + if (imm == 0) + { return EmitIntConst(0); } - for (unsigned i0 = 0; i0 < 30; ++i0) { + for (unsigned i0 = 0; i0 < 30; ++i0) + { // (x * (1 << i0)) => x >> (32 - i0) // One "shl" is faster than one "imul". - if (imm == (1U << i0)) { + if (imm == (1U << i0)) + { return FoldShrl(Op1, EmitIntConst(32 - i0)); } } @@ -682,43 +791,62 @@ InstLoc IRBuilder::FoldMulHighUnsigned(InstLoc Op1, InstLoc Op2) { return EmitBiOp(MulHighUnsigned, Op1, Op2); } -InstLoc IRBuilder::FoldAnd(InstLoc Op1, InstLoc Op2) { +InstLoc IRBuilder::FoldAnd(InstLoc Op1, InstLoc Op2) +{ simplifyCommutative(And, Op1, Op2); - if (isImm(*Op1) && isImm(*Op2)) { + if (isImm(*Op1) && isImm(*Op2)) + { return EmitIntConst(GetImmValue(Op1) & GetImmValue(Op2)); } - if (isImm(*Op2)) { - if (!GetImmValue(Op2)) return EmitIntConst(0); - if (GetImmValue(Op2) == -1U) return Op1; - if (getOpcode(*Op1) == And && isImm(*getOp2(Op1))) { - unsigned RHS = GetImmValue(Op2) & - GetImmValue(getOp2(Op1)); + + if (isImm(*Op2)) + { + if (!GetImmValue(Op2)) + return EmitIntConst(0); + + if (GetImmValue(Op2) == -1U) + return Op1; + + if (getOpcode(*Op1) == And && isImm(*getOp2(Op1))) + { + unsigned RHS = GetImmValue(Op2) & GetImmValue(getOp2(Op1)); return FoldAnd(getOp1(Op1), EmitIntConst(RHS)); - } else if (getOpcode(*Op1) == Rol && isImm(*getOp2(Op1))) { + } + else if (getOpcode(*Op1) == Rol && isImm(*getOp2(Op1))) + { unsigned shiftMask1 = -1U << (GetImmValue(getOp2(Op1)) & 31); + if (GetImmValue(Op2) == shiftMask1) return FoldShl(getOp1(Op1), getOp2(Op1)); + unsigned shiftAmt2 = ((32 - GetImmValue(getOp2(Op1))) & 31); unsigned shiftMask2 = -1U >> shiftAmt2; - if (GetImmValue(Op2) == shiftMask2) { + + if (GetImmValue(Op2) == shiftMask2) + { return FoldShrl(getOp1(Op1), EmitIntConst(shiftAmt2)); } } - if (!(~ComputeKnownZeroBits(Op1) & ~GetImmValue(Op2))) { + + if (!(~ComputeKnownZeroBits(Op1) & ~GetImmValue(Op2))) + { return Op1; } - //if (getOpcode(*Op1) == Xor || getOpcode(*Op1) == Or) { + //if (getOpcode(*Op1) == Xor || getOpcode(*Op1) == Or) + //{ // // TODO: Test the folding below // // (x op y) & z => (x & z) op y if (y & z) == 0 - // if ((~ComputeKnownZeroBits(getOp2(Op1)) & ~ComputeKnownZeroBits(Op2)) == 0) { + // if ((~ComputeKnownZeroBits(getOp2(Op1)) & ~ComputeKnownZeroBits(Op2)) == 0) + // { // return FoldBiOp(getOpcode(*Op1), FoldAnd(getOp1(Op1), Op2), getOp2(Op1)); // } // // TODO: Test the folding below // // (x op y) & z => (y & z) op x if (x & z) == 0 - // if ((~ComputeKnownZeroBits(getOp1(Op1)) & ~ComputeKnownZeroBits(Op2)) == 0) { + // if ((~ComputeKnownZeroBits(getOp1(Op1)) & ~ComputeKnownZeroBits(Op2)) == 0) + // { // return FoldBiOp(getOpcode(*Op1), FoldAnd(getOp2(Op1), Op2), getOp1(Op1)); // } //} @@ -726,7 +854,8 @@ InstLoc IRBuilder::FoldAnd(InstLoc Op1, InstLoc Op2) { //// TODO: Test the folding below //// (x >> z) & (y >> z) => (x & y) >> z - //if (getOpcode(*Op1) == Shrl && getOpcode(*Op2) == Shrl && isSameValue(getOp2(Op1), getOp2(Op2))) { + //if (getOpcode(*Op1) == Shrl && getOpcode(*Op2) == Shrl && isSameValue(getOp2(Op1), getOp2(Op2))) + //{ // return FoldShl(FoldAnd(getOp1(Op1), getOp2(Op1)), getOp2(Op1)); //} @@ -736,69 +865,87 @@ InstLoc IRBuilder::FoldAnd(InstLoc Op1, InstLoc Op2) { //// ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 //// ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 //if ((getOpcode(*Op1) == Add || getOpcode(*Op1) == Sub) && - // (getOpcode(*getOp1(Op1)) == Or || getOpcode(*getOp1(Op1)) == Xor)) + // (getOpcode(*getOp1(Op1)) == Or || getOpcode(*getOp1(Op1)) == Xor)) //{ // const InstLoc A = getOp1(getOp1(Op1)); // const InstLoc N = getOp2(getOp1(Op1)); // const InstLoc B = getOp2(Op1); // const InstLoc AndRHS = Op2; - // if ((~ComputeKnownZeroBits(N) & ~ComputeKnownZeroBits(AndRHS)) == 0) { + // if ((~ComputeKnownZeroBits(N) & ~ComputeKnownZeroBits(AndRHS)) == 0) + // { // return FoldAnd(FoldBiOp(getOpcode(*Op1), A, B), AndRHS); // } //} //// TODO: Test the folding below //// (~A & ~B) == (~(A | B)) - De Morgan's Law - //if (InstLoc notOp1 = isNot(Op1)) { - // if (InstLoc notOp2 = isNot(Op2)) { + //if (InstLoc notOp1 = isNot(Op1)) + //{ + // if (InstLoc notOp2 = isNot(Op2)) + // { // return FoldXor(EmitIntConst(-1U), FoldOr(notOp1, notOp2)); // } //} //// TODO: Test the folding below //// (X^C)|Y -> (X|Y)^C iff Y&C == 0 - //if (getOpcode(*Op1) == Xor && isImm(*getOp2(Op1)) && (~ComputeKnownZeroBits(Op2) & GetImmValue(getOp2(Op1))) == 0) { + //if (getOpcode(*Op1) == Xor && isImm(*getOp2(Op1)) && (~ComputeKnownZeroBits(Op2) & GetImmValue(getOp2(Op1))) == 0) + //{ // return FoldXor(FoldOr(getOp1(Op1), Op2), getOp2(Op1)); //} - if (Op1 == Op2) return Op1; + if (Op1 == Op2) + return Op1; return EmitBiOp(And, Op1, Op2); } -InstLoc IRBuilder::FoldOr(InstLoc Op1, InstLoc Op2) { +InstLoc IRBuilder::FoldOr(InstLoc Op1, InstLoc Op2) +{ simplifyCommutative(Or, Op1, Op2); - if (isImm(*Op1) && isImm(*Op2)) { + if (isImm(*Op1) && isImm(*Op2)) + { return EmitIntConst(GetImmValue(Op1) | GetImmValue(Op2)); } - if (isImm(*Op2)) { - if (!GetImmValue(Op2)) return Op1; - if (GetImmValue(Op2) == -1U) return EmitIntConst(-1U); - if (getOpcode(*Op1) == Or && isImm(*getOp2(Op1))) { - unsigned RHS = GetImmValue(Op2) | - GetImmValue(getOp2(Op1)); + + if (isImm(*Op2)) + { + if (!GetImmValue(Op2)) + return Op1; + + if (GetImmValue(Op2) == -1U) + return EmitIntConst(-1U); + + if (getOpcode(*Op1) == Or && isImm(*getOp2(Op1))) + { + unsigned RHS = GetImmValue(Op2) | GetImmValue(getOp2(Op1)); + return FoldOr(getOp1(Op1), EmitIntConst(RHS)); } // (X & C1) | C2 --> (X | C2) & (C1|C2) // iff (C1 & C2) == 0. - if (getOpcode(*Op1) == And && isImm(*getOp2(Op1)) && (GetImmValue(getOp2(Op1)) & GetImmValue(Op2)) == 0) { + if (getOpcode(*Op1) == And && isImm(*getOp2(Op1)) && (GetImmValue(getOp2(Op1)) & GetImmValue(Op2)) == 0) + { return FoldAnd(FoldOr(getOp1(Op1), Op2), EmitIntConst(GetImmValue(getOp2(Op1)) | GetImmValue(Op2))); } // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2) - if (getOpcode(*Op1) == Xor && isImm(*getOp2(Op1)) && isImm(*Op2)) { + if (getOpcode(*Op1) == Xor && isImm(*getOp2(Op1)) && isImm(*Op2)) + { return FoldXor(FoldOr(getOp1(Op1), Op2), EmitIntConst(GetImmValue(getOp2(Op1)) & ~GetImmValue(Op2))); } } // (~A | ~B) == (~(A & B)) - De Morgan's Law - if (getOpcode(*Op1) == Not && getOpcode(*Op2) == Not) { + if (getOpcode(*Op1) == Not && getOpcode(*Op2) == Not) + { return EmitNot(FoldAnd(getOp1(Op1), getOp1(Op2))); } - if (Op1 == Op2) return Op1; + if (Op1 == Op2) + return Op1; return EmitBiOp(Or, Op1, Op2); } @@ -828,42 +975,56 @@ static unsigned ICmpInverseOp(unsigned op) case ICmpSge: return ICmpSlt; } + PanicAlert("Bad opcode"); return Nop; } -InstLoc IRBuilder::FoldXor(InstLoc Op1, InstLoc Op2) { +InstLoc IRBuilder::FoldXor(InstLoc Op1, InstLoc Op2) +{ simplifyCommutative(Xor, Op1, Op2); - if (isImm(*Op1) && isImm(*Op2)) { + if (isImm(*Op1) && isImm(*Op2)) + { return EmitIntConst(GetImmValue(Op1) ^ GetImmValue(Op2)); } - if (isImm(*Op2)) { - if (!GetImmValue(Op2)) return Op1; - if (GetImmValue(Op2) == 0xFFFFFFFFU) { + + if (isImm(*Op2)) + { + if (!GetImmValue(Op2)) + return Op1; + + if (GetImmValue(Op2) == 0xFFFFFFFFU) + { return EmitNot(Op1); } - if (getOpcode(*Op1) == Xor && isImm(*getOp2(Op1))) { + + if (getOpcode(*Op1) == Xor && isImm(*getOp2(Op1))) + { unsigned RHS = GetImmValue(Op2) ^ GetImmValue(getOp2(Op1)); return FoldXor(getOp1(Op1), EmitIntConst(RHS)); } + if (isICmp(getOpcode(*Op1)) && GetImmValue(Op2) == 1) { return FoldBiOp(ICmpInverseOp(getOpcode(*Op1)), getOp1(Op1), getOp2(Op1)); - } } - if (Op1 == Op2) return EmitIntConst(0); + if (Op1 == Op2) + return EmitIntConst(0); return EmitBiOp(Xor, Op1, Op2); } -InstLoc IRBuilder::FoldShl(InstLoc Op1, InstLoc Op2) { - if (isImm(*Op2)) { +InstLoc IRBuilder::FoldShl(InstLoc Op1, InstLoc Op2) +{ + if (isImm(*Op2)) + { // Shl x 0 => x - if (!GetImmValue(Op2)) { + if (!GetImmValue(Op2)) + { return Op1; } @@ -871,59 +1032,71 @@ InstLoc IRBuilder::FoldShl(InstLoc Op1, InstLoc Op2) { return EmitIntConst(GetImmValue(Op1) << (GetImmValue(Op2) & 31)); // ((x * i0) << i1) == x * (i0 << i1) - if (getOpcode(*Op1) == Mul && isImm(*getOp2(Op1))) { + if (getOpcode(*Op1) == Mul && isImm(*getOp2(Op1))) + { return FoldMul(getOp1(Op1), EmitIntConst(GetImmValue(getOp2(Op1)) << GetImmValue(Op2))); } } // 0 << x => 0 - if (isImm(*Op1) && GetImmValue(Op1) == 0) { + if (isImm(*Op1) && GetImmValue(Op1) == 0) + { return EmitIntConst(0); } return EmitBiOp(Shl, Op1, Op2); } -InstLoc IRBuilder::FoldShrl(InstLoc Op1, InstLoc Op2) { - if (isImm(*Op2)) { - if (isImm(*Op1)) - return EmitIntConst(GetImmValue(Op1) >> (GetImmValue(Op2) & 31)); +InstLoc IRBuilder::FoldShrl(InstLoc Op1, InstLoc Op2) +{ + if (isImm(*Op1) && isImm(*Op2)) + { + return EmitIntConst(GetImmValue(Op1) >> (GetImmValue(Op2) & 31)); } + return EmitBiOp(Shrl, Op1, Op2); } -InstLoc IRBuilder::FoldRol(InstLoc Op1, InstLoc Op2) { - if (isImm(*Op2)) { +InstLoc IRBuilder::FoldRol(InstLoc Op1, InstLoc Op2) +{ + if (isImm(*Op2)) + { if (isImm(*Op1)) - return EmitIntConst(_rotl(GetImmValue(Op1), - GetImmValue(Op2))); - if (!(GetImmValue(Op2) & 31)) return Op1; + return EmitIntConst(_rotl(GetImmValue(Op1), GetImmValue(Op2))); + + if (!(GetImmValue(Op2) & 31)) + return Op1; } return EmitBiOp(Rol, Op1, Op2); } -InstLoc IRBuilder::FoldBranchCond(InstLoc Op1, InstLoc Op2) { - if (isImm(*Op1)) { +InstLoc IRBuilder::FoldBranchCond(InstLoc Op1, InstLoc Op2) +{ + if (isImm(*Op1)) + { if (GetImmValue(Op1)) return EmitBranchUncond(Op2); + return nullptr; } + return EmitBiOp(BranchCond, Op1, Op2); } -InstLoc IRBuilder::FoldIdleBranch(InstLoc Op1, InstLoc Op2) { - return EmitBiOp( - IdleBranch, - EmitICmpEq(getOp1(getOp1(Op1)), - getOp2(getOp1(Op1))), Op2 - ); +InstLoc IRBuilder::FoldIdleBranch(InstLoc Op1, InstLoc Op2) +{ + return EmitBiOp(IdleBranch, EmitICmpEq(getOp1(getOp1(Op1)), getOp2(getOp1(Op1))), Op2); } -InstLoc IRBuilder::FoldICmp(unsigned Opcode, InstLoc Op1, InstLoc Op2) { - if (isImm(*Op1)) { - if (isImm(*Op2)) { +InstLoc IRBuilder::FoldICmp(unsigned Opcode, InstLoc Op1, InstLoc Op2) +{ + if (isImm(*Op1)) + { + if (isImm(*Op2)) + { unsigned result = 0; - switch (Opcode) { + switch (Opcode) + { case ICmpEq: result = GetImmValue(Op1) == GetImmValue(Op2); break; @@ -943,25 +1116,22 @@ InstLoc IRBuilder::FoldICmp(unsigned Opcode, InstLoc Op1, InstLoc Op2) { result = GetImmValue(Op1) <= GetImmValue(Op2); break; case ICmpSgt: - result = (signed)GetImmValue(Op1) > - (signed)GetImmValue(Op2); + result = (signed)GetImmValue(Op1) > (signed)GetImmValue(Op2); break; case ICmpSlt: - result = (signed)GetImmValue(Op1) < - (signed)GetImmValue(Op2); + result = (signed)GetImmValue(Op1) < (signed)GetImmValue(Op2); break; case ICmpSge: - result = (signed)GetImmValue(Op1) >= - (signed)GetImmValue(Op2); + result = (signed)GetImmValue(Op1) >= (signed)GetImmValue(Op2); break; case ICmpSle: - result = (signed)GetImmValue(Op1) <= - (signed)GetImmValue(Op2); + result = (signed)GetImmValue(Op1) <= (signed)GetImmValue(Op2); break; } return EmitIntConst(result); } - switch (Opcode) { + switch (Opcode) + { case ICmpEq: return FoldICmp(ICmpEq, Op2, Op1); case ICmpNe: @@ -984,86 +1154,120 @@ InstLoc IRBuilder::FoldICmp(unsigned Opcode, InstLoc Op1, InstLoc Op2) { return FoldICmp(ICmpSle, Op2, Op1); } } + return EmitBiOp(Opcode, Op1, Op2); } -InstLoc IRBuilder::FoldICmpCRSigned(InstLoc Op1, InstLoc Op2) { - if (isImm(*Op1)) { - if (isImm(*Op2)) { - s64 diff = (s64)(s32)GetImmValue(Op1) - (s64)(s32)GetImmValue(Op2); - return EmitIntConst64((u64)diff); - } +InstLoc IRBuilder::FoldICmpCRSigned(InstLoc Op1, InstLoc Op2) +{ + if (isImm(*Op1) && isImm(*Op2)) + { + s64 diff = (s64)(s32)GetImmValue(Op1) - (s64)(s32)GetImmValue(Op2); + return EmitIntConst64((u64)diff); } + return EmitBiOp(ICmpCRSigned, Op1, Op2); } -InstLoc IRBuilder::FoldICmpCRUnsigned(InstLoc Op1, InstLoc Op2) { - if (isImm(*Op1)) { - if (isImm(*Op2)) { - u64 diff = (u64)GetImmValue(Op1) - (u64)GetImmValue(Op2); - return EmitIntConst64(diff); - } +InstLoc IRBuilder::FoldICmpCRUnsigned(InstLoc Op1, InstLoc Op2) +{ + if (isImm(*Op1) && isImm(*Op2)) + { + u64 diff = (u64)GetImmValue(Op1) - (u64)GetImmValue(Op2); + return EmitIntConst64(diff); } + return EmitBiOp(ICmpCRUnsigned, Op1, Op2); } -InstLoc IRBuilder::FoldFallBackToInterpreter(InstLoc Op1, InstLoc Op2) { - for (unsigned i = 0; i < 32; i++) { +InstLoc IRBuilder::FoldFallBackToInterpreter(InstLoc Op1, InstLoc Op2) +{ + for (unsigned i = 0; i < 32; i++) + { GRegCache[i] = nullptr; GRegCacheStore[i] = nullptr; FRegCache[i] = nullptr; FRegCacheStore[i] = nullptr; } + CarryCache = nullptr; CarryCacheStore = nullptr; - for (unsigned i = 0; i < 8; i++) { + + for (unsigned i = 0; i < 8; i++) + { CRCache[i] = nullptr; CRCacheStore[i] = nullptr; } + CTRCache = nullptr; CTRCacheStore = nullptr; return EmitBiOp(FallBackToInterpreter, Op1, Op2); } -InstLoc IRBuilder::FoldDoubleBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2) { - if (getOpcode(*Op1) == InsertDoubleInMReg) { +InstLoc IRBuilder::FoldDoubleBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2) +{ + if (getOpcode(*Op1) == InsertDoubleInMReg) + { return FoldDoubleBiOp(Opcode, getOp1(Op1), Op2); } - if (getOpcode(*Op2) == InsertDoubleInMReg) { + if (getOpcode(*Op2) == InsertDoubleInMReg) + { return FoldDoubleBiOp(Opcode, Op1, getOp1(Op2)); } return EmitBiOp(Opcode, Op1, Op2); } -InstLoc IRBuilder::FoldBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2, unsigned extra) { - switch (Opcode) { - case Add: return FoldAdd(Op1, Op2); - case Sub: return FoldSub(Op1, Op2); - case Mul: return FoldMul(Op1, Op2); - case MulHighUnsigned: return FoldMulHighUnsigned(Op1, Op2); - case And: return FoldAnd(Op1, Op2); - case Or: return FoldOr(Op1, Op2); - case Xor: return FoldXor(Op1, Op2); - case Shl: return FoldShl(Op1, Op2); - case Shrl: return FoldShrl(Op1, Op2); - case Rol: return FoldRol(Op1, Op2); - case BranchCond: return FoldBranchCond(Op1, Op2); - case IdleBranch: return FoldIdleBranch(Op1, Op2); +InstLoc IRBuilder::FoldBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2, unsigned extra) +{ + switch (Opcode) + { + case Add: + return FoldAdd(Op1, Op2); + case Sub: + return FoldSub(Op1, Op2); + case Mul: + return FoldMul(Op1, Op2); + case MulHighUnsigned: + return FoldMulHighUnsigned(Op1, Op2); + case And: + return FoldAnd(Op1, Op2); + case Or: + return FoldOr(Op1, Op2); + case Xor: + return FoldXor(Op1, Op2); + case Shl: + return FoldShl(Op1, Op2); + case Shrl: + return FoldShrl(Op1, Op2); + case Rol: + return FoldRol(Op1, Op2); + case BranchCond: + return FoldBranchCond(Op1, Op2); + case IdleBranch: + return FoldIdleBranch(Op1, Op2); case ICmpEq: case ICmpNe: case ICmpUgt: case ICmpUlt: case ICmpUge: case ICmpUle: case ICmpSgt: case ICmpSlt: case ICmpSge: case ICmpSle: return FoldICmp(Opcode, Op1, Op2); - case ICmpCRSigned: return FoldICmpCRSigned(Op1, Op2); - case ICmpCRUnsigned: return FoldICmpCRUnsigned(Op1, Op2); - case FallBackToInterpreter: return FoldFallBackToInterpreter(Op1, Op2); - case FDMul: case FDAdd: case FDSub: return FoldDoubleBiOp(Opcode, Op1, Op2); - default: return EmitBiOp(Opcode, Op1, Op2, extra); + case ICmpCRSigned: + return FoldICmpCRSigned(Op1, Op2); + case ICmpCRUnsigned: + return FoldICmpCRUnsigned(Op1, Op2); + case FallBackToInterpreter: + return FoldFallBackToInterpreter(Op1, Op2); + case FDMul: + case FDAdd: + case FDSub: + return FoldDoubleBiOp(Opcode, Op1, Op2); + default: + return EmitBiOp(Opcode, Op1, Op2, extra); } } -InstLoc IRBuilder::EmitIntConst64(u64 value) { +InstLoc IRBuilder::EmitIntConst64(u64 value) +{ InstLoc curIndex = InstList.data() + InstList.size(); InstList.push_back(CInt32 | ((unsigned int)ConstList.size() << 8)); MarkUsed.push_back(false); @@ -1071,31 +1275,37 @@ InstLoc IRBuilder::EmitIntConst64(u64 value) { return curIndex; } -u64 IRBuilder::GetImmValue64(InstLoc I) const { +u64 IRBuilder::GetImmValue64(InstLoc I) const +{ return ConstList[*I >> 8]; } -void IRBuilder::SetMarkUsed(InstLoc I) { +void IRBuilder::SetMarkUsed(InstLoc I) +{ const unsigned i = (unsigned)(I - InstList.data()); MarkUsed[i] = true; } -bool IRBuilder::IsMarkUsed(InstLoc I) const { +bool IRBuilder::IsMarkUsed(InstLoc I) const +{ const unsigned i = (unsigned)(I - InstList.data()); return MarkUsed[i]; } -bool IRBuilder::isSameValue(InstLoc Op1, InstLoc Op2) const { - if (Op1 == Op2) { +bool IRBuilder::isSameValue(InstLoc Op1, InstLoc Op2) const +{ + if (Op1 == Op2) + { return true; } - if (isImm(*Op1) && isImm(*Op2) && GetImmValue(Op1) == GetImmValue(Op2)) { + if (isImm(*Op1) && isImm(*Op2) && GetImmValue(Op1) == GetImmValue(Op2)) + { return true; } if (getNumberOfOperands(Op1) == 2 && getOpcode(*Op1) != StorePaired && getOpcode(*Op1) == getOpcode(*Op2) && - isSameValue(getOp1(Op1), getOp1(Op2)) && isSameValue(getOp2(Op1), getOp2(Op2))) + isSameValue(getOp1(Op1), getOp1(Op2)) && isSameValue(getOp2(Op1), getOp2(Op2))) { return true; } @@ -1110,14 +1320,17 @@ bool IRBuilder::isSameValue(InstLoc Op1, InstLoc Op2) const { // 2 -> ZeroOp // 3 -> UOp // 4 -> BiOp -unsigned IRBuilder::getComplexity(InstLoc I) const { +unsigned IRBuilder::getComplexity(InstLoc I) const +{ const unsigned Opcode = getOpcode(*I); - if (Opcode == Nop || Opcode == CInt16 || Opcode == CInt32) { + if (Opcode == Nop || Opcode == CInt16 || Opcode == CInt32) + { return 1; } const unsigned numberOfOperands = getNumberOfOperands(I); - if (numberOfOperands == -1U) { + if (numberOfOperands == -1U) + { return 0; } @@ -1125,10 +1338,12 @@ unsigned IRBuilder::getComplexity(InstLoc I) const { } -unsigned IRBuilder::getNumberOfOperands(InstLoc I) const { +unsigned IRBuilder::getNumberOfOperands(InstLoc I) const +{ static unsigned numberOfOperands[256]; static bool initialized = false; - if (!initialized) { + if (!initialized) + { initialized = true; std::fill_n(numberOfOperands, sizeof(numberOfOperands) / sizeof(numberOfOperands[0]), -1U); @@ -1139,15 +1354,14 @@ unsigned IRBuilder::getNumberOfOperands(InstLoc I) const { static unsigned ZeroOp[] = { LoadCR, LoadLink, LoadMSR, LoadGReg, LoadCTR, InterpreterBranch, LoadCarry, RFIExit, LoadFReg, LoadFRegDENToZero, LoadGQR, Int3, }; static unsigned UOp[] = { StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, ConvertFromFastCR, ConvertToFastCR, FastCRSOSet, FastCREQSet, FastCRGTSet, FastCRLTSet, }; static unsigned BiOp[] = { BranchCond, IdleBranch, And, Xor, Sub, Or, Add, Mul, Rol, Shl, Shrl, Sarl, ICmpEq, ICmpNe, ICmpUgt, ICmpUlt, ICmpSgt, ICmpSlt, ICmpSge, ICmpSle, Store8, Store16, Store32, ICmpCRSigned, ICmpCRUnsigned, FallBackToInterpreter, StoreSingle, StoreDouble, StorePaired, InsertDoubleInMReg, FSMul, FSAdd, FSSub, FDMul, FDAdd, FDSub, FPAdd, FPMul, FPSub, FPMerge00, FPMerge01, FPMerge10, FPMerge11, FDCmpCR, }; - for (auto& op : ZeroOp) { + for (auto& op : ZeroOp) numberOfOperands[op] = 0; - } - for (auto& op : UOp) { + + for (auto& op : UOp) numberOfOperands[op] = 1; - } - for (auto& op : BiOp) { + + for (auto& op : BiOp) numberOfOperands[op] = 2; - } } return numberOfOperands[getOpcode(*I)]; @@ -1155,16 +1369,19 @@ unsigned IRBuilder::getNumberOfOperands(InstLoc I) const { // Performs a few simplifications for commutative operators // Ported from InstructionCombining.cpp in LLVM -void IRBuilder::simplifyCommutative(unsigned Opcode, InstLoc& Op1, InstLoc& Op2) { +void IRBuilder::simplifyCommutative(unsigned Opcode, InstLoc& Op1, InstLoc& Op2) +{ // Order operands such that they are listed from right (least complex) to // left (most complex). This puts constants before unary operators before // binary operators. - if (getComplexity(Op1) < getComplexity(Op2)) { + if (getComplexity(Op1) < getComplexity(Op2)) + { std::swap(Op1, Op2); } // Is this associative? - switch (Opcode) { + switch (Opcode) + { case Add: case Mul: case And: @@ -1176,7 +1393,8 @@ void IRBuilder::simplifyCommutative(unsigned Opcode, InstLoc& Op1, InstLoc& Op2) } // (V op C1) op C2 => V + (C1 + C2) - if (getOpcode(*Op1) == Opcode && isImm(*getOp2(Op1)) && isImm(*Op2)) { + if (getOpcode(*Op1) == Opcode && isImm(*getOp2(Op1)) && isImm(*Op2)) + { const InstLoc Op1Old = Op1; const InstLoc Op2Old = Op2; Op1 = getOp1(Op1Old); @@ -1185,7 +1403,8 @@ void IRBuilder::simplifyCommutative(unsigned Opcode, InstLoc& Op1, InstLoc& Op2) // ((V1 op C1) op (V2 op C2)) => ((V1 op V2) op (C1 op C2)) // Transform: (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) - if (getOpcode(*Op1) == Opcode && isImm(*getOp2(Op1)) && getOpcode(*Op2) == Opcode && isImm(*getOp2(Op2))) { + if (getOpcode(*Op1) == Opcode && isImm(*getOp2(Op1)) && getOpcode(*Op2) == Opcode && isImm(*getOp2(Op2))) + { const InstLoc Op1Old = Op1; const InstLoc Op2Old = Op2; Op1 = FoldBiOp(Opcode, getOp1(Op1Old), getOp1(Op2Old)); @@ -1195,7 +1414,8 @@ void IRBuilder::simplifyCommutative(unsigned Opcode, InstLoc& Op1, InstLoc& Op2) // FIXME: Following code has a bug. // ((w op x) op (y op z)) => (((w op x) op y) op z) /* - if (getOpcode(*Op1) == Opcode && getOpcode(*Op2) == Opcode) { + if (getOpcode(*Op1) == Opcode && getOpcode(*Op2) == Opcode) + { // Sort the operands where the complexities will be descending order. std::pair ops[4]; ops[0] = std::make_pair(getComplexity(getOp1(Op1)), getOp1(Op1)); @@ -1210,13 +1430,16 @@ void IRBuilder::simplifyCommutative(unsigned Opcode, InstLoc& Op1, InstLoc& Op2) */ } -bool IRBuilder::maskedValueIsZero(InstLoc Op1, InstLoc Op2) const { +bool IRBuilder::maskedValueIsZero(InstLoc Op1, InstLoc Op2) const +{ return (~ComputeKnownZeroBits(Op1) & ~ComputeKnownZeroBits(Op2)) == 0; } // Returns I' if I == (0 - I') -InstLoc IRBuilder::isNeg(InstLoc I) const { - if (getOpcode(*I) == Sub && isImm(*getOp1(I)) && GetImmValue(getOp1(I)) == 0) { +InstLoc IRBuilder::isNeg(InstLoc I) const +{ + if (getOpcode(*I) == Sub && isImm(*getOp1(I)) && GetImmValue(getOp1(I)) == 0) + { return getOp2(I); } @@ -1287,10 +1510,12 @@ static const std::set extra8Regs(extra8RegList, extra8RegList + sizeof static const std::set extra16Regs(extra16RegList, extra16RegList + sizeof(extra16RegList) / sizeof(extra16RegList[0])); static const std::set extra24Regs(extra24RegList, extra24RegList + sizeof(extra24RegList) / sizeof(extra24RegList[0])); -void IRBuilder::WriteToFile(u64 codeHash) { +void IRBuilder::WriteToFile(u64 codeHash) +{ _assert_(sizeof(opcodeNames) / sizeof(opcodeNames[0]) == Int3 + 1); - if (!writer.get()) { + if (!writer.get()) + { writer = std::make_unique(); } @@ -1300,7 +1525,8 @@ void IRBuilder::WriteToFile(u64 codeHash) { const InstLoc lastCurReadPtr = curReadPtr; StartForwardPass(); const unsigned numInsts = getNumInsts(); - for (unsigned int i = 0; i < numInsts; ++i) { + for (unsigned int i = 0; i < numInsts; ++i) + { const InstLoc I = ReadForward(); const unsigned opcode = getOpcode(*I); const bool thisUsed = IsMarkUsed(I) || @@ -1309,9 +1535,8 @@ void IRBuilder::WriteToFile(u64 codeHash) { // Line number fprintf(file, "%4u", i); - if (!thisUsed) { + if (!thisUsed) fprintf(file, "%*c", 32, ' '); - } // Opcode const std::string& opcodeName = opcodeNames[opcode]; @@ -1319,38 +1544,38 @@ void IRBuilder::WriteToFile(u64 codeHash) { const unsigned numberOfOperands = getNumberOfOperands(I); // Op1 - if (numberOfOperands >= 1) { + if (numberOfOperands >= 1) + { const IREmitter::InstLoc inst = getOp1(I); - if (isImm(*inst)) { + + if (isImm(*inst)) fprintf(file, " 0x%08x", GetImmValue(inst)); - } else { + else fprintf(file, " %10u", i - (unsigned int)(I - inst)); - } } // Op2 - if (numberOfOperands >= 2) { + if (numberOfOperands >= 2) + { const IREmitter::InstLoc inst = getOp2(I); - if (isImm(*inst)) { + + if (isImm(*inst)) fprintf(file, " 0x%08x", GetImmValue(inst)); - } else { + else fprintf(file, " %10u", i - (unsigned int)(I - inst)); - } } - if (extra8Regs.count(opcode)) { + if (extra8Regs.count(opcode)) fprintf(file, " R%d", *I >> 8); - } - if (extra16Regs.count(opcode)) { - fprintf(file, " R%d", *I >> 16); - } - if (extra24Regs.count(opcode)) { - fprintf(file, " R%d", *I >> 24); - } - if (opcode == CInt32 || opcode == CInt16) { + if (extra16Regs.count(opcode)) + fprintf(file, " R%d", *I >> 16); + + if (extra24Regs.count(opcode)) + fprintf(file, " R%d", *I >> 24); + + if (opcode == CInt32 || opcode == CInt16) fprintf(file, " 0x%08x", GetImmValue(I)); - } fprintf(file, "\n"); } diff --git a/Source/Core/Core/PowerPC/JitILCommon/IR.h b/Source/Core/Core/PowerPC/JitILCommon/IR.h index 58202ffd9b..851c3c70c9 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/IR.h +++ b/Source/Core/Core/PowerPC/JitILCommon/IR.h @@ -8,8 +8,11 @@ #include "Common/x64Emitter.h" -namespace IREmitter { -enum Opcode { +namespace IREmitter +{ + +enum Opcode +{ Nop = 0, // "Zero-operand" operators @@ -177,46 +180,57 @@ enum Opcode { typedef unsigned Inst; typedef Inst* InstLoc; -unsigned inline getOpcode(Inst i) { +unsigned inline getOpcode(Inst i) +{ return i & 255; } -unsigned inline isImm(Inst i) { +unsigned inline isImm(Inst i) +{ return getOpcode(i) >= CInt16 && getOpcode(i) <= CInt32; } -unsigned inline isICmp(Inst i) { +unsigned inline isICmp(Inst i) +{ return getOpcode(i) >= ICmpEq && getOpcode(i) <= ICmpSle; } -unsigned inline isFResult(Inst i) { +unsigned inline isFResult(Inst i) +{ return getOpcode(i) > FResult_Start && getOpcode(i) < FResult_End; } -InstLoc inline getOp1(InstLoc i) { +InstLoc inline getOp1(InstLoc i) +{ i = i - 1 - ((*i >> 8) & 255); - if (getOpcode(*i) == Tramp) { + + if (getOpcode(*i) == Tramp) + { i = i - 1 - (*i >> 8); } + return i; } -InstLoc inline getOp2(InstLoc i) { +InstLoc inline getOp2(InstLoc i) +{ i = i - 1 - ((*i >> 16) & 255); - if (getOpcode(*i) == Tramp) { + + if (getOpcode(*i) == Tramp) + { i = i - 1 - (*i >> 8); } + return i; } -class IRBuilder { +class IRBuilder +{ private: InstLoc EmitZeroOp(unsigned Opcode, unsigned extra); - InstLoc EmitUOp(unsigned OpCode, InstLoc Op1, - unsigned extra = 0); - InstLoc EmitBiOp(unsigned OpCode, InstLoc Op1, InstLoc Op2, - unsigned extra = 0); + InstLoc EmitUOp(unsigned OpCode, InstLoc Op1, unsigned extra = 0); + InstLoc EmitBiOp(unsigned OpCode, InstLoc Op1, InstLoc Op2, unsigned extra = 0); InstLoc FoldAdd(InstLoc Op1, InstLoc Op2); InstLoc FoldSub(InstLoc Op1, InstLoc Op2); @@ -238,316 +252,515 @@ private: InstLoc FoldFallBackToInterpreter(InstLoc Op1, InstLoc Op2); InstLoc FoldZeroOp(unsigned Opcode, unsigned extra); - InstLoc FoldUOp(unsigned OpCode, InstLoc Op1, - unsigned extra = 0); - InstLoc FoldBiOp(unsigned OpCode, InstLoc Op1, InstLoc Op2, - unsigned extra = 0); + InstLoc FoldUOp(unsigned OpCode, InstLoc Op1, unsigned extra = 0); + InstLoc FoldBiOp(unsigned OpCode, InstLoc Op1, InstLoc Op2, unsigned extra = 0); unsigned ComputeKnownZeroBits(InstLoc I) const; public: InstLoc EmitIntConst(unsigned value) { return EmitIntConst64(value); } InstLoc EmitIntConst64(u64 value); - InstLoc EmitStoreLink(InstLoc val) { + + InstLoc EmitStoreLink(InstLoc val) + { return FoldUOp(StoreLink, val); } - InstLoc EmitBranchUncond(InstLoc val) { + + InstLoc EmitBranchUncond(InstLoc val) + { return FoldUOp(BranchUncond, val); } - InstLoc EmitBranchCond(InstLoc check, InstLoc dest) { + + InstLoc EmitBranchCond(InstLoc check, InstLoc dest) + { return FoldBiOp(BranchCond, check, dest); } - InstLoc EmitIdleBranch(InstLoc check, InstLoc dest) { + + InstLoc EmitIdleBranch(InstLoc check, InstLoc dest) + { return FoldBiOp(IdleBranch, check, dest); } - InstLoc EmitLoadCR(unsigned crreg) { + + InstLoc EmitLoadCR(unsigned crreg) + { return FoldZeroOp(LoadCR, crreg); } - InstLoc EmitStoreCR(InstLoc value, unsigned crreg) { + + InstLoc EmitStoreCR(InstLoc value, unsigned crreg) + { return FoldUOp(StoreCR, value, crreg); } - InstLoc EmitLoadLink() { + + InstLoc EmitLoadLink() + { return FoldZeroOp(LoadLink, 0); } - InstLoc EmitLoadMSR() { + + InstLoc EmitLoadMSR() + { return FoldZeroOp(LoadMSR, 0); } - InstLoc EmitStoreMSR(InstLoc val, InstLoc pc) { + + InstLoc EmitStoreMSR(InstLoc val, InstLoc pc) + { return FoldBiOp(StoreMSR, val, pc); } - InstLoc EmitStoreFPRF(InstLoc value) { + + InstLoc EmitStoreFPRF(InstLoc value) + { return FoldUOp(StoreFPRF, value); } - InstLoc EmitLoadGReg(unsigned reg) { + + InstLoc EmitLoadGReg(unsigned reg) + { return FoldZeroOp(LoadGReg, reg); } - InstLoc EmitStoreGReg(InstLoc value, unsigned reg) { + + InstLoc EmitStoreGReg(InstLoc value, unsigned reg) + { return FoldUOp(StoreGReg, value, reg); } - InstLoc EmitNot(InstLoc op1) { + + InstLoc EmitNot(InstLoc op1) + { return FoldUOp(Not, op1); } - InstLoc EmitAnd(InstLoc op1, InstLoc op2) { + + InstLoc EmitAnd(InstLoc op1, InstLoc op2) + { return FoldBiOp(And, op1, op2); } - InstLoc EmitXor(InstLoc op1, InstLoc op2) { + + InstLoc EmitXor(InstLoc op1, InstLoc op2) + { return FoldBiOp(Xor, op1, op2); } - InstLoc EmitSub(InstLoc op1, InstLoc op2) { + + InstLoc EmitSub(InstLoc op1, InstLoc op2) + { return FoldBiOp(Sub, op1, op2); } - InstLoc EmitOr(InstLoc op1, InstLoc op2) { + + InstLoc EmitOr(InstLoc op1, InstLoc op2) + { return FoldBiOp(Or, op1, op2); } - InstLoc EmitAdd(InstLoc op1, InstLoc op2) { + + InstLoc EmitAdd(InstLoc op1, InstLoc op2) + { return FoldBiOp(Add, op1, op2); } - InstLoc EmitMul(InstLoc op1, InstLoc op2) { + + InstLoc EmitMul(InstLoc op1, InstLoc op2) + { return FoldBiOp(Mul, op1, op2); } - InstLoc EmitMulHighUnsigned(InstLoc op1, InstLoc op2) { + + InstLoc EmitMulHighUnsigned(InstLoc op1, InstLoc op2) + { return FoldBiOp(MulHighUnsigned, op1, op2); } - InstLoc EmitRol(InstLoc op1, InstLoc op2) { + + InstLoc EmitRol(InstLoc op1, InstLoc op2) + { return FoldBiOp(Rol, op1, op2); } - InstLoc EmitShl(InstLoc op1, InstLoc op2) { + + InstLoc EmitShl(InstLoc op1, InstLoc op2) + { return FoldBiOp(Shl, op1, op2); } - InstLoc EmitShrl(InstLoc op1, InstLoc op2) { + + InstLoc EmitShrl(InstLoc op1, InstLoc op2) + { return FoldBiOp(Shrl, op1, op2); } - InstLoc EmitSarl(InstLoc op1, InstLoc op2) { + + InstLoc EmitSarl(InstLoc op1, InstLoc op2) + { return FoldBiOp(Sarl, op1, op2); } - InstLoc EmitLoadCTR() { + + InstLoc EmitLoadCTR() + { return FoldZeroOp(LoadCTR, 0); } - InstLoc EmitStoreCTR(InstLoc op1) { + + InstLoc EmitStoreCTR(InstLoc op1) + { return FoldUOp(StoreCTR, op1); } - InstLoc EmitICmpEq(InstLoc op1, InstLoc op2) { + + InstLoc EmitICmpEq(InstLoc op1, InstLoc op2) + { return FoldBiOp(ICmpEq, op1, op2); } - InstLoc EmitICmpNe(InstLoc op1, InstLoc op2) { + + InstLoc EmitICmpNe(InstLoc op1, InstLoc op2) + { return FoldBiOp(ICmpNe, op1, op2); } - InstLoc EmitICmpUgt(InstLoc op1, InstLoc op2) { + + InstLoc EmitICmpUgt(InstLoc op1, InstLoc op2) + { return FoldBiOp(ICmpUgt, op1, op2); } - InstLoc EmitICmpUlt(InstLoc op1, InstLoc op2) { + + InstLoc EmitICmpUlt(InstLoc op1, InstLoc op2) + { return FoldBiOp(ICmpUlt, op1, op2); } - InstLoc EmitICmpSgt(InstLoc op1, InstLoc op2) { + + InstLoc EmitICmpSgt(InstLoc op1, InstLoc op2) + { return FoldBiOp(ICmpSgt, op1, op2); } - InstLoc EmitICmpSlt(InstLoc op1, InstLoc op2) { + + InstLoc EmitICmpSlt(InstLoc op1, InstLoc op2) + { return FoldBiOp(ICmpSlt, op1, op2); } - InstLoc EmitICmpSge(InstLoc op1, InstLoc op2) { + + InstLoc EmitICmpSge(InstLoc op1, InstLoc op2) + { return FoldBiOp(ICmpSge, op1, op2); } - InstLoc EmitICmpSle(InstLoc op1, InstLoc op2) { + + InstLoc EmitICmpSle(InstLoc op1, InstLoc op2) + { return FoldBiOp(ICmpSle, op1, op2); } - InstLoc EmitLoad8(InstLoc op1) { + + InstLoc EmitLoad8(InstLoc op1) + { return FoldUOp(Load8, op1); } - InstLoc EmitLoad16(InstLoc op1) { + + InstLoc EmitLoad16(InstLoc op1) + { return FoldUOp(Load16, op1); } - InstLoc EmitLoad32(InstLoc op1) { + + InstLoc EmitLoad32(InstLoc op1) + { return FoldUOp(Load32, op1); } - InstLoc EmitStore8(InstLoc op1, InstLoc op2) { + + InstLoc EmitStore8(InstLoc op1, InstLoc op2) + { return FoldBiOp(Store8, op1, op2); } - InstLoc EmitStore16(InstLoc op1, InstLoc op2) { + + InstLoc EmitStore16(InstLoc op1, InstLoc op2) + { return FoldBiOp(Store16, op1, op2); } - InstLoc EmitStore32(InstLoc op1, InstLoc op2) { + + InstLoc EmitStore32(InstLoc op1, InstLoc op2) + { return FoldBiOp(Store32, op1, op2); } - InstLoc EmitSExt16(InstLoc op1) { + + InstLoc EmitSExt16(InstLoc op1) + { return FoldUOp(SExt16, op1); } - InstLoc EmitSExt8(InstLoc op1) { + + InstLoc EmitSExt8(InstLoc op1) + { return FoldUOp(SExt8, op1); } - InstLoc EmitCntlzw(InstLoc op1) { + + InstLoc EmitCntlzw(InstLoc op1) + { return FoldUOp(Cntlzw, op1); } - InstLoc EmitICmpCRSigned(InstLoc op1, InstLoc op2) { + + InstLoc EmitICmpCRSigned(InstLoc op1, InstLoc op2) + { return FoldBiOp(ICmpCRSigned, op1, op2); } - InstLoc EmitICmpCRUnsigned(InstLoc op1, InstLoc op2) { + + InstLoc EmitICmpCRUnsigned(InstLoc op1, InstLoc op2) + { return FoldBiOp(ICmpCRUnsigned, op1, op2); } - InstLoc EmitConvertFromFastCR(InstLoc op1) { + + InstLoc EmitConvertFromFastCR(InstLoc op1) + { return FoldUOp(ConvertFromFastCR, op1); } - InstLoc EmitConvertToFastCR(InstLoc op1) { + + InstLoc EmitConvertToFastCR(InstLoc op1) + { return FoldUOp(ConvertToFastCR, op1); } - InstLoc EmitFastCRSOSet(InstLoc op1) { + + InstLoc EmitFastCRSOSet(InstLoc op1) + { return FoldUOp(FastCRSOSet, op1); } - InstLoc EmitFastCREQSet(InstLoc op1) { + + InstLoc EmitFastCREQSet(InstLoc op1) + { return FoldUOp(FastCREQSet, op1); } - InstLoc EmitFastCRLTSet(InstLoc op1) { + + InstLoc EmitFastCRLTSet(InstLoc op1) + { return FoldUOp(FastCRLTSet, op1); } - InstLoc EmitFastCRGTSet(InstLoc op1) { + + InstLoc EmitFastCRGTSet(InstLoc op1) + { return FoldUOp(FastCRGTSet, op1); } - InstLoc EmitFallBackToInterpreter(InstLoc op1, InstLoc op2) { + + InstLoc EmitFallBackToInterpreter(InstLoc op1, InstLoc op2) + { return FoldBiOp(FallBackToInterpreter, op1, op2); } - InstLoc EmitInterpreterBranch() { + + InstLoc EmitInterpreterBranch() + { return FoldZeroOp(InterpreterBranch, 0); } - InstLoc EmitLoadCarry() { + + InstLoc EmitLoadCarry() + { return FoldZeroOp(LoadCarry, 0); } - InstLoc EmitStoreCarry(InstLoc op1) { + + InstLoc EmitStoreCarry(InstLoc op1) + { return FoldUOp(StoreCarry, op1); } - InstLoc EmitSystemCall(InstLoc pc) { + + InstLoc EmitSystemCall(InstLoc pc) + { return FoldUOp(SystemCall, pc); } - InstLoc EmitFPExceptionCheck(InstLoc pc) { + + InstLoc EmitFPExceptionCheck(InstLoc pc) + { return EmitUOp(FPExceptionCheck, pc); } - InstLoc EmitDSIExceptionCheck(InstLoc pc) { + + InstLoc EmitDSIExceptionCheck(InstLoc pc) + { return EmitUOp(DSIExceptionCheck, pc); } - InstLoc EmitISIException(InstLoc dest) { + + InstLoc EmitISIException(InstLoc dest) + { return EmitUOp(ISIException, dest); } - InstLoc EmitExtExceptionCheck(InstLoc pc) { + + InstLoc EmitExtExceptionCheck(InstLoc pc) + { return EmitUOp(ExtExceptionCheck, pc); } - InstLoc EmitBreakPointCheck(InstLoc pc) { + + InstLoc EmitBreakPointCheck(InstLoc pc) + { return EmitUOp(BreakPointCheck, pc); } - InstLoc EmitRFIExit() { + + InstLoc EmitRFIExit() + { return FoldZeroOp(RFIExit, 0); } - InstLoc EmitShortIdleLoop(InstLoc pc) { + + InstLoc EmitShortIdleLoop(InstLoc pc) + { return FoldUOp(ShortIdleLoop, pc); } - InstLoc EmitLoadSingle(InstLoc addr) { + + InstLoc EmitLoadSingle(InstLoc addr) + { return FoldUOp(LoadSingle, addr); } - InstLoc EmitLoadDouble(InstLoc addr) { + + InstLoc EmitLoadDouble(InstLoc addr) + { return FoldUOp(LoadDouble, addr); } - InstLoc EmitLoadPaired(InstLoc addr, unsigned quantReg) { + + InstLoc EmitLoadPaired(InstLoc addr, unsigned quantReg) + { return FoldUOp(LoadPaired, addr, quantReg); } - InstLoc EmitStoreSingle(InstLoc value, InstLoc addr) { + + InstLoc EmitStoreSingle(InstLoc value, InstLoc addr) + { return FoldBiOp(StoreSingle, value, addr); } - InstLoc EmitStoreDouble(InstLoc value, InstLoc addr) { + + InstLoc EmitStoreDouble(InstLoc value, InstLoc addr) + { return FoldBiOp(StoreDouble, value, addr); } - InstLoc EmitStorePaired(InstLoc value, InstLoc addr, unsigned quantReg) { + + InstLoc EmitStorePaired(InstLoc value, InstLoc addr, unsigned quantReg) + { return FoldBiOp(StorePaired, value, addr, quantReg); } - InstLoc EmitLoadFReg(unsigned freg) { + + InstLoc EmitLoadFReg(unsigned freg) + { return FoldZeroOp(LoadFReg, freg); } - InstLoc EmitLoadFRegDENToZero(unsigned freg) { + + InstLoc EmitLoadFRegDENToZero(unsigned freg) + { return FoldZeroOp(LoadFRegDENToZero, freg); } - InstLoc EmitStoreFReg(InstLoc val, unsigned freg) { + + InstLoc EmitStoreFReg(InstLoc val, unsigned freg) + { return FoldUOp(StoreFReg, val, freg); } - InstLoc EmitDupSingleToMReg(InstLoc val) { + + InstLoc EmitDupSingleToMReg(InstLoc val) + { return FoldUOp(DupSingleToMReg, val); } - InstLoc EmitDupSingleToPacked(InstLoc val) { + + InstLoc EmitDupSingleToPacked(InstLoc val) + { return FoldUOp(DupSingleToPacked, val); } - InstLoc EmitInsertDoubleInMReg(InstLoc val, InstLoc reg) { + + InstLoc EmitInsertDoubleInMReg(InstLoc val, InstLoc reg) + { return FoldBiOp(InsertDoubleInMReg, val, reg); } - InstLoc EmitExpandPackedToMReg(InstLoc val) { + + InstLoc EmitExpandPackedToMReg(InstLoc val) + { return FoldUOp(ExpandPackedToMReg, val); } - InstLoc EmitCompactMRegToPacked(InstLoc val) { + + InstLoc EmitCompactMRegToPacked(InstLoc val) + { return FoldUOp(CompactMRegToPacked, val); } - InstLoc EmitFSMul(InstLoc op1, InstLoc op2) { + + InstLoc EmitFSMul(InstLoc op1, InstLoc op2) + { return FoldBiOp(FSMul, op1, op2); } - InstLoc EmitFSAdd(InstLoc op1, InstLoc op2) { + + InstLoc EmitFSAdd(InstLoc op1, InstLoc op2) + { return FoldBiOp(FSAdd, op1, op2); } - InstLoc EmitFSSub(InstLoc op1, InstLoc op2) { + + InstLoc EmitFSSub(InstLoc op1, InstLoc op2) + { return FoldBiOp(FSSub, op1, op2); } - InstLoc EmitFSNeg(InstLoc op1) { + + InstLoc EmitFSNeg(InstLoc op1) + { return FoldUOp(FSNeg, op1); } - InstLoc EmitFDMul(InstLoc op1, InstLoc op2) { + + InstLoc EmitFDMul(InstLoc op1, InstLoc op2) + { return FoldBiOp(FDMul, op1, op2); } - InstLoc EmitFDAdd(InstLoc op1, InstLoc op2) { + + InstLoc EmitFDAdd(InstLoc op1, InstLoc op2) + { return FoldBiOp(FDAdd, op1, op2); } - InstLoc EmitFDSub(InstLoc op1, InstLoc op2) { + + InstLoc EmitFDSub(InstLoc op1, InstLoc op2) + { return FoldBiOp(FDSub, op1, op2); } - InstLoc EmitFDNeg(InstLoc op1) { + + InstLoc EmitFDNeg(InstLoc op1) + { return FoldUOp(FDNeg, op1); } - InstLoc EmitFPAdd(InstLoc op1, InstLoc op2) { + + InstLoc EmitFPAdd(InstLoc op1, InstLoc op2) + { return FoldBiOp(FPAdd, op1, op2); } - InstLoc EmitFPMul(InstLoc op1, InstLoc op2) { + + InstLoc EmitFPMul(InstLoc op1, InstLoc op2) + { return FoldBiOp(FPMul, op1, op2); } - InstLoc EmitFPSub(InstLoc op1, InstLoc op2) { + + InstLoc EmitFPSub(InstLoc op1, InstLoc op2) + { return FoldBiOp(FPSub, op1, op2); } - InstLoc EmitFPMerge00(InstLoc op1, InstLoc op2) { + + InstLoc EmitFPMerge00(InstLoc op1, InstLoc op2) + { return FoldBiOp(FPMerge00, op1, op2); } - InstLoc EmitFPMerge01(InstLoc op1, InstLoc op2) { + + InstLoc EmitFPMerge01(InstLoc op1, InstLoc op2) + { return FoldBiOp(FPMerge01, op1, op2); } - InstLoc EmitFPMerge10(InstLoc op1, InstLoc op2) { + + InstLoc EmitFPMerge10(InstLoc op1, InstLoc op2) + { return FoldBiOp(FPMerge10, op1, op2); } - InstLoc EmitFPMerge11(InstLoc op1, InstLoc op2) { + + InstLoc EmitFPMerge11(InstLoc op1, InstLoc op2) + { return FoldBiOp(FPMerge11, op1, op2); } - InstLoc EmitFPDup0(InstLoc op1) { + + InstLoc EmitFPDup0(InstLoc op1) + { return FoldUOp(FPDup0, op1); } - InstLoc EmitFPDup1(InstLoc op1) { + + InstLoc EmitFPDup1(InstLoc op1) + { return FoldUOp(FPDup1, op1); } - InstLoc EmitFPNeg(InstLoc op1) { + + InstLoc EmitFPNeg(InstLoc op1) + { return FoldUOp(FPNeg, op1); } - InstLoc EmitDoubleToSingle(InstLoc op1) { + + InstLoc EmitDoubleToSingle(InstLoc op1) + { return FoldUOp(DoubleToSingle, op1); } - InstLoc EmitFDCmpCR(InstLoc op1, InstLoc op2, int ordered) { + + InstLoc EmitFDCmpCR(InstLoc op1, InstLoc op2, int ordered) + { return FoldBiOp(FDCmpCR, op1, op2, ordered); } - InstLoc EmitLoadGQR(unsigned gqr) { + + InstLoc EmitLoadGQR(unsigned gqr) + { return FoldZeroOp(LoadGQR, gqr); } - InstLoc EmitStoreGQR(InstLoc op1, unsigned gqr) { + + InstLoc EmitStoreGQR(InstLoc op1, unsigned gqr) + { return FoldUOp(StoreGQR, op1, gqr); } - InstLoc EmitStoreSRR(InstLoc op1, unsigned srr) { + + InstLoc EmitStoreSRR(InstLoc op1, unsigned srr) + { return FoldUOp(StoreSRR, op1, srr); } + InstLoc EmitINT3() { return FoldZeroOp(Int3, 0); @@ -566,28 +779,38 @@ public: bool IsMarkUsed(InstLoc I) const; void WriteToFile(u64 codeHash); - void Reset() { + void Reset() + { InstList.clear(); InstList.reserve(100000); MarkUsed.clear(); MarkUsed.reserve(100000); - for (unsigned i = 0; i < 32; i++) { + + for (unsigned i = 0; i < 32; i++) + { GRegCache[i] = nullptr; GRegCacheStore[i] = nullptr; FRegCache[i] = nullptr; FRegCacheStore[i] = nullptr; } + CarryCache = nullptr; CarryCacheStore = nullptr; - for (unsigned i = 0; i < 8; i++) { + + for (unsigned i = 0; i < 8; i++) + { CRCache[i] = nullptr; CRCacheStore[i] = nullptr; } + CTRCache = nullptr; CTRCacheStore = nullptr; } - IRBuilder() { Reset(); } + IRBuilder() + { + Reset(); + } private: IRBuilder(IRBuilder&); // DO NOT IMPLEMENT diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp index c60080e6a9..81158bab25 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp @@ -43,7 +43,8 @@ void JitILBase::bx(UGeckoInstruction inst) // If this is not the last instruction of a block, // we will skip the rest process. // Because PPCAnalyst::Flatten() merged the blocks. - if (!js.isLastInstruction) { + if (!js.isLastInstruction) + { return; } @@ -53,7 +54,8 @@ void JitILBase::bx(UGeckoInstruction inst) else destination = js.compilerPC + SignExt26(inst.LI << 2); - if (destination == js.compilerPC) { + if (destination == js.compilerPC) + { ibuild.EmitShortIdleLoop(ibuild.EmitIntConst(js.compilerPC)); return; } @@ -85,36 +87,40 @@ static IREmitter::InstLoc EmitCRTest(IREmitter::IRBuilder& ibuild, UGeckoInstruc return CRTest; } -static IREmitter::InstLoc TestBranch(IREmitter::IRBuilder& ibuild, UGeckoInstruction inst) { +static IREmitter::InstLoc TestBranch(IREmitter::IRBuilder& ibuild, UGeckoInstruction inst) +{ IREmitter::InstLoc CRTest = nullptr, CTRTest = nullptr; if ((inst.BO & 16) == 0) // Test a CR bit { CRTest = EmitCRTest(ibuild, inst); } - if ((inst.BO & 4) == 0) { + if ((inst.BO & 4) == 0) + { IREmitter::InstLoc c = ibuild.EmitLoadCTR(); c = ibuild.EmitSub(c, ibuild.EmitIntConst(1)); ibuild.EmitStoreCTR(c); - if (inst.BO & 2) { - CTRTest = ibuild.EmitICmpEq(c, - ibuild.EmitIntConst(0)); - } else { + + if (inst.BO & 2) + CTRTest = ibuild.EmitICmpEq(c, ibuild.EmitIntConst(0)); + else CTRTest = c; - } } IREmitter::InstLoc Test = CRTest; - if (CTRTest) { + if (CTRTest) + { if (Test) Test = ibuild.EmitAnd(Test, CTRTest); else Test = CTRTest; } - if (!Test) { + if (!Test) + { Test = ibuild.EmitIntConst(1); } + return Test; } @@ -122,8 +128,7 @@ void JitILBase::bcx(UGeckoInstruction inst) { NORMALBRANCH_START if (inst.LK) - ibuild.EmitStoreLink( - ibuild.EmitIntConst(js.compilerPC + 4)); + ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4)); IREmitter::InstLoc Test = TestBranch(ibuild, inst); @@ -134,11 +139,11 @@ void JitILBase::bcx(UGeckoInstruction inst) destination = js.compilerPC + SignExt16(inst.BD << 2); if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle && - inst.hex == 0x4182fff8 && - (Memory::ReadUnchecked_U32(js.compilerPC - 8) & 0xFFFF0000) == 0x800D0000 && - (Memory::ReadUnchecked_U32(js.compilerPC - 4) == 0x28000000 || - (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && Memory::ReadUnchecked_U32(js.compilerPC - 4) == 0x2C000000)) - ) + inst.hex == 0x4182fff8 && + (Memory::ReadUnchecked_U32(js.compilerPC - 8) & 0xFFFF0000) == 0x800D0000 && + (Memory::ReadUnchecked_U32(js.compilerPC - 4) == 0x28000000 || + (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && Memory::ReadUnchecked_U32(js.compilerPC - 4) == 0x2C000000)) + ) { ibuild.EmitIdleBranch(Test, ibuild.EmitIntConst(destination)); } @@ -152,11 +157,13 @@ void JitILBase::bcx(UGeckoInstruction inst) void JitILBase::bcctrx(UGeckoInstruction inst) { NORMALBRANCH_START - if ((inst.BO & 4) == 0) { + if ((inst.BO & 4) == 0) + { IREmitter::InstLoc c = ibuild.EmitLoadCTR(); c = ibuild.EmitSub(c, ibuild.EmitIntConst(1)); ibuild.EmitStoreCTR(c); } + IREmitter::InstLoc test; if ((inst.BO & 16) == 0) // Test a CR bit { @@ -181,16 +188,19 @@ void JitILBase::bclrx(UGeckoInstruction inst) NORMALBRANCH_START if (!js.isLastInstruction && - (inst.BO & (1 << 4)) && (inst.BO & (1 << 2))) { + (inst.BO & (1 << 4)) && (inst.BO & (1 << 2))) + { if (inst.LK) ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4)); return; } - if (inst.hex == 0x4e800020) { + if (inst.hex == 0x4e800020) + { ibuild.EmitBranchUncond(ibuild.EmitLoadLink()); return; } + IREmitter::InstLoc test = TestBranch(ibuild, inst); test = ibuild.EmitICmpEq(test, ibuild.EmitIntConst(0)); ibuild.EmitBranchCond(test, ibuild.EmitIntConst(js.compilerPC + 4)); diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp index b97740ce5e..79ce746072 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp @@ -30,10 +30,13 @@ void JitILBase::fp_arith_s(UGeckoInstruction inst) _assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!"); } - if (inst.OPCD == 59) { + if (inst.OPCD == 59) + { val = ibuild.EmitDoubleToSingle(val); val = ibuild.EmitDupSingleToMReg(val); - } else { + } + else + { val = ibuild.EmitInsertDoubleInMReg(val, ibuild.EmitLoadFReg(inst.FD)); } ibuild.EmitStoreFReg(val, inst.FD); @@ -50,18 +53,25 @@ void JitILBase::fmaddXX(UGeckoInstruction inst) IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.FA); val = ibuild.EmitFDMul(val, ibuild.EmitLoadFReg(inst.FC)); + if (inst.SUBOP5 & 1) val = ibuild.EmitFDAdd(val, ibuild.EmitLoadFReg(inst.FB)); else val = ibuild.EmitFDSub(val, ibuild.EmitLoadFReg(inst.FB)); + if (inst.SUBOP5 & 2) val = ibuild.EmitFDNeg(val); - if (inst.OPCD == 59) { + + if (inst.OPCD == 59) + { val = ibuild.EmitDoubleToSingle(val); val = ibuild.EmitDupSingleToMReg(val); - } else { + } + else + { val = ibuild.EmitInsertDoubleInMReg(val, ibuild.EmitLoadFReg(inst.FD)); } + ibuild.EmitStoreFReg(val, inst.FD); } @@ -97,7 +107,8 @@ void JitILBase::fsign(UGeckoInstruction inst) FALLBACK_IF(true); // TODO - switch (inst.SUBOP10) { + switch (inst.SUBOP10) + { case 40: // fnegx break; case 264: // fabsx diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp index e7b96a6850..10449eebb9 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp @@ -198,7 +198,7 @@ void JitILBase::subfic(UGeckoInstruction inst) JITDISABLE(bJITIntegerOff); IREmitter::InstLoc nota, lhs, val, test; nota = ibuild.EmitXor(ibuild.EmitLoadGReg(inst.RA), - ibuild.EmitIntConst(-1)); + ibuild.EmitIntConst(-1)); if (inst.SIMM_16 == -1) { @@ -220,7 +220,10 @@ void JitILBase::subfcx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITIntegerOff); - if (inst.OE) PanicAlert("OE: subfcx"); + + if (inst.OE) + PanicAlert("OE: subfcx"); + IREmitter::InstLoc val, test, lhs, rhs; lhs = ibuild.EmitLoadGReg(inst.RB); rhs = ibuild.EmitLoadGReg(inst.RA); @@ -229,6 +232,7 @@ void JitILBase::subfcx(UGeckoInstruction inst) test = ibuild.EmitICmpEq(rhs, ibuild.EmitIntConst(0)); test = ibuild.EmitOr(test, ibuild.EmitICmpUgt(lhs, val)); ibuild.EmitStoreCarry(test); + if (inst.Rc) ComputeRC(ibuild, val); } @@ -237,7 +241,10 @@ void JitILBase::subfex(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITIntegerOff); - if (inst.OE) PanicAlert("OE: subfex"); + + if (inst.OE) + PanicAlert("OE: subfex"); + IREmitter::InstLoc val, test, lhs, rhs, carry; rhs = ibuild.EmitLoadGReg(inst.RA); carry = ibuild.EmitLoadCarry(); @@ -250,6 +257,7 @@ void JitILBase::subfex(UGeckoInstruction inst) ibuild.EmitStoreGReg(val, inst.RD); test = ibuild.EmitOr(test, ibuild.EmitICmpUgt(lhs, val)); ibuild.EmitStoreCarry(test); + if (inst.Rc) ComputeRC(ibuild, val); } @@ -258,10 +266,14 @@ void JitILBase::subfx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITIntegerOff); - if (inst.OE) PanicAlert("OE: subfx"); + + if (inst.OE) + PanicAlert("OE: subfx"); + IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB); val = ibuild.EmitSub(val, ibuild.EmitLoadGReg(inst.RA)); ibuild.EmitStoreGReg(val, inst.RD); + if (inst.Rc) ComputeRC(ibuild, val); } @@ -282,6 +294,7 @@ void JitILBase::mullwx(UGeckoInstruction inst) IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB); val = ibuild.EmitMul(ibuild.EmitLoadGReg(inst.RA), val); ibuild.EmitStoreGReg(val, inst.RD); + if (inst.Rc) ComputeRC(ibuild, val); } @@ -295,6 +308,7 @@ void JitILBase::mulhwux(UGeckoInstruction inst) IREmitter::InstLoc b = ibuild.EmitLoadGReg(inst.RB); IREmitter::InstLoc d = ibuild.EmitMulHighUnsigned(a, b); ibuild.EmitStoreGReg(d, inst.RD); + if (inst.Rc) ComputeRC(ibuild, d); } @@ -326,7 +340,9 @@ void JitILBase::divwux(UGeckoInstruction inst) MOV(32, gpr.R(d), R(EAX)); gpr.UnlockAll(); gpr.UnlockAllX(); - if (inst.Rc) { + + if (inst.Rc) + { CALL((u8*)asm_routines.computeRc); } #endif @@ -339,6 +355,7 @@ void JitILBase::addx(UGeckoInstruction inst) IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB); val = ibuild.EmitAdd(ibuild.EmitLoadGReg(inst.RA), val); ibuild.EmitStoreGReg(val, inst.RD); + if (inst.Rc) ComputeRC(ibuild, val); } @@ -347,12 +364,12 @@ void JitILBase::addzex(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITIntegerOff); - IREmitter::InstLoc lhs = ibuild.EmitLoadGReg(inst.RA), - val, newcarry; + IREmitter::InstLoc lhs = ibuild.EmitLoadGReg(inst.RA), val, newcarry; val = ibuild.EmitAdd(lhs, ibuild.EmitLoadCarry()); ibuild.EmitStoreGReg(val, inst.RD); newcarry = ibuild.EmitICmpUlt(val, lhs); ibuild.EmitStoreCarry(newcarry); + if (inst.Rc) ComputeRC(ibuild, val); } @@ -375,7 +392,9 @@ void JitILBase::addex(UGeckoInstruction inst) ibuild.EmitStoreGReg(abc, inst.RD); ibuild.EmitStoreCarry(new_carry); - if (inst.OE) PanicAlert("OE: addex"); + if (inst.OE) + PanicAlert("OE: addex"); + if (inst.Rc) ComputeRC(ibuild, abc); } @@ -389,6 +408,7 @@ void JitILBase::rlwinmx(UGeckoInstruction inst) val = ibuild.EmitRol(val, ibuild.EmitIntConst(inst.SH)); val = ibuild.EmitAnd(val, ibuild.EmitIntConst(mask)); ibuild.EmitStoreGReg(val, inst.RA); + if (inst.Rc) ComputeRC(ibuild, val); } @@ -406,6 +426,7 @@ void JitILBase::rlwimix(UGeckoInstruction inst) ival = ibuild.EmitAnd(ival, ibuild.EmitIntConst(~mask)); val = ibuild.EmitOr(ival, val); ibuild.EmitStoreGReg(val, inst.RA); + if (inst.Rc) ComputeRC(ibuild, val); } @@ -419,6 +440,7 @@ void JitILBase::rlwnmx(UGeckoInstruction inst) val = ibuild.EmitRol(val, ibuild.EmitLoadGReg(inst.RB)); val = ibuild.EmitAnd(val, ibuild.EmitIntConst(mask)); ibuild.EmitStoreGReg(val, inst.RA); + if (inst.Rc) ComputeRC(ibuild, val); } @@ -430,6 +452,7 @@ void JitILBase::negx(UGeckoInstruction inst) IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RA); val = ibuild.EmitSub(ibuild.EmitIntConst(0), val); ibuild.EmitStoreGReg(val, inst.RD); + if (inst.Rc) ComputeRC(ibuild, val); } @@ -438,9 +461,11 @@ void JitILBase::srwx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITIntegerOff); - IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS), - samt = ibuild.EmitLoadGReg(inst.RB), - corr; + + IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS); + IREmitter::InstLoc samt = ibuild.EmitLoadGReg(inst.RB); + IREmitter::InstLoc corr; + // FIXME: We can do better with a cmov // FIXME: We can do better on 64-bit val = ibuild.EmitShrl(val, samt); @@ -449,6 +474,7 @@ void JitILBase::srwx(UGeckoInstruction inst) corr = ibuild.EmitXor(corr, ibuild.EmitIntConst(-1)); val = ibuild.EmitAnd(corr, val); ibuild.EmitStoreGReg(val, inst.RA); + if (inst.Rc) ComputeRC(ibuild, val); } @@ -457,9 +483,11 @@ void JitILBase::slwx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITIntegerOff); - IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS), - samt = ibuild.EmitLoadGReg(inst.RB), - corr; + + IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS); + IREmitter::InstLoc samt = ibuild.EmitLoadGReg(inst.RB); + IREmitter::InstLoc corr; + // FIXME: We can do better with a cmov // FIXME: We can do better on 64-bit val = ibuild.EmitShl(val, samt); @@ -468,6 +496,7 @@ void JitILBase::slwx(UGeckoInstruction inst) corr = ibuild.EmitXor(corr, ibuild.EmitIntConst(-1)); val = ibuild.EmitAnd(corr, val); ibuild.EmitStoreGReg(val, inst.RA); + if (inst.Rc) ComputeRC(ibuild, val); } @@ -524,6 +553,7 @@ void JitILBase::cntlzwx(UGeckoInstruction inst) IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS); val = ibuild.EmitCntlzw(val); ibuild.EmitStoreGReg(val, inst.RA); + if (inst.Rc) ComputeRC(ibuild, val); } diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStore.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStore.cpp index ad02e34df6..3801ac7ba7 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStore.cpp @@ -35,11 +35,21 @@ void JitILBase::lXz(UGeckoInstruction inst) IREmitter::InstLoc val; switch (inst.OPCD & ~0x1) { - case 32: val = ibuild.EmitLoad32(addr); break; //lwz - case 40: val = ibuild.EmitLoad16(addr); break; //lhz - case 34: val = ibuild.EmitLoad8(addr); break; //lbz - default: PanicAlert("lXz: invalid access size"); val = nullptr; break; + case 32: // lwz + val = ibuild.EmitLoad32(addr); + break; + case 40: // lhz + val = ibuild.EmitLoad16(addr); + break; + case 34: // lbz + val = ibuild.EmitLoad8(addr); + break; + default: + PanicAlert("lXz: invalid access size"); + val = nullptr; + break; } + ibuild.EmitStoreGReg(val, inst.RD); } @@ -87,10 +97,17 @@ void JitILBase::lXzx(UGeckoInstruction inst) IREmitter::InstLoc val; switch (inst.SUBOP10 & ~32) { - default: PanicAlert("lXzx: invalid access size"); - case 23: val = ibuild.EmitLoad32(addr); break; //lwzx - case 279: val = ibuild.EmitLoad16(addr); break; //lhzx - case 87: val = ibuild.EmitLoad8(addr); break; //lbzx + default: + PanicAlert("lXzx: invalid access size"); + case 23: // lwzx + val = ibuild.EmitLoad32(addr); + break; + case 279: // lhzx + val = ibuild.EmitLoad16(addr); + break; + case 87: // lbzx + val = ibuild.EmitLoad8(addr); + break; } ibuild.EmitStoreGReg(val, inst.RD); } @@ -115,7 +132,10 @@ void JitILBase::dcbz(UGeckoInstruction inst) // TODO! #if 0 if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff) - {Default(inst); return;} // turn off from debugger + { + Default(inst); + return; + } INSTRUCTION_START; MOV(32, R(EAX), gpr.R(inst.RB)); if (inst.RA) @@ -149,10 +169,18 @@ void JitILBase::stX(UGeckoInstruction inst) switch (inst.OPCD & ~1) { - case 36: ibuild.EmitStore32(value, addr); break; //stw - case 44: ibuild.EmitStore16(value, addr); break; //sth - case 38: ibuild.EmitStore8(value, addr); break; //stb - default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return; + case 36: // stw + ibuild.EmitStore32(value, addr); + break; + case 44: // sth + ibuild.EmitStore16(value, addr); + break; + case 38: // stb + ibuild.EmitStore8(value, addr); + break; + default: + _assert_msg_(DYNA_REC, 0, "stX: Invalid access size."); + return; } } @@ -172,10 +200,18 @@ void JitILBase::stXx(UGeckoInstruction inst) switch (inst.SUBOP10 & ~32) { - case 151: ibuild.EmitStore32(value, addr); break; //stw - case 407: ibuild.EmitStore16(value, addr); break; //sth - case 215: ibuild.EmitStore8(value, addr); break; //stb - default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return; + case 151: // stw + ibuild.EmitStore32(value, addr); + break; + case 407: // sth + ibuild.EmitStore16(value, addr); + break; + case 215: // stb + ibuild.EmitStore8(value, addr); + break; + default: + _assert_msg_(DYNA_REC, 0, "stXx: Invalid store size."); + return; } } diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Paired.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Paired.cpp index 2641e2b880..b26d729909 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Paired.cpp @@ -129,7 +129,8 @@ void JitILBase::ps_maddXX(UGeckoInstruction inst) switch (inst.SUBOP5) { - case 14: {//madds0 + case 14: // madds0 + { op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); op2 = ibuild.EmitFPDup0(op2); val = ibuild.EmitFPMul(val, op2); @@ -137,7 +138,8 @@ void JitILBase::ps_maddXX(UGeckoInstruction inst) val = ibuild.EmitFPAdd(val, op3); break; } - case 15: {//madds1 + case 15: // madds1 + { op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); op2 = ibuild.EmitFPDup1(op2); val = ibuild.EmitFPMul(val, op2); @@ -145,21 +147,24 @@ void JitILBase::ps_maddXX(UGeckoInstruction inst) val = ibuild.EmitFPAdd(val, op3); break; } - case 28: {//msub + case 28: // msub + { op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); val = ibuild.EmitFPMul(val, op2); op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); val = ibuild.EmitFPSub(val, op3); break; } - case 29: {//madd + case 29: // madd + { op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); val = ibuild.EmitFPMul(val, op2); op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); val = ibuild.EmitFPAdd(val, op3); break; } - case 30: {//nmsub + case 30: // nmsub + { op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); val = ibuild.EmitFPMul(val, op2); op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); @@ -167,7 +172,8 @@ void JitILBase::ps_maddXX(UGeckoInstruction inst) val = ibuild.EmitFPNeg(val); break; } - case 31: {//nmadd + case 31: // nmadd + { op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); val = ibuild.EmitFPMul(val, op2); op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_SystemRegisters.cpp index 31c6ffa4b4..e1ad7ee1c7 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_SystemRegisters.cpp @@ -170,42 +170,35 @@ void JitILBase::crXX(UGeckoInstruction inst) // Compute combined bit const unsigned subop = inst.SUBOP10; - switch (subop) { - case 257: - // crand + switch (subop) + { + case 257: // crand eax = ibuild.EmitAnd(eax, ecx); break; - case 129: - // crandc + case 129: // crandc ecx = ibuild.EmitNot(ecx); eax = ibuild.EmitAnd(eax, ecx); break; - case 289: - // creqv + case 289: // creqv eax = ibuild.EmitXor(eax, ecx); eax = ibuild.EmitNot(eax); break; - case 225: - // crnand + case 225: // crnand eax = ibuild.EmitAnd(eax, ecx); eax = ibuild.EmitNot(eax); break; - case 33: - // crnor + case 33: // crnor eax = ibuild.EmitOr(eax, ecx); eax = ibuild.EmitNot(eax); break; - case 449: - // cror + case 449: // cror eax = ibuild.EmitOr(eax, ecx); break; - case 417: - // crorc + case 417: // crorc ecx = ibuild.EmitNot(ecx); eax = ibuild.EmitOr(eax, ecx); break; - case 193: - // crxor + case 193: // crxor eax = ibuild.EmitXor(eax, ecx); break; default: