From 48891c6359cc1465bf083789fb8052b14262df36 Mon Sep 17 00:00:00 2001 From: comex Date: Mon, 1 Sep 2014 01:41:40 -0400 Subject: [PATCH] Reserve a register for ppcState. The register is RBP, previously in the GPR allocation order. The next commit will investigate whether there are too few GPRs (now or before), but for now there is no replacement. Previously, it was accessed RIP relatively; using RBP, anything in the first 0x100 bytes of ppcState (including all the GPRs) can be accessed with three fewer bytes. Code to access ppcState is generated constantly (mostly by register save/load), so in principle, this should improve instruction cache footprint significantly. It seems that this makes a significant performance difference in practice. The vast majority of this commit is mechanically replacing M(&PowerPC::ppcState.x) with a new macro PPCSTATE(x). Version 2: gets most of the cases which were using the register access macros. --- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 62 +++++------ Source/Core/Core/PowerPC/Jit64/JitAsm.cpp | 11 +- .../Core/Core/PowerPC/Jit64/JitRegCache.cpp | 6 +- Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp | 38 +++---- .../Core/PowerPC/Jit64/Jit_FloatingPoint.cpp | 12 +-- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 48 ++++----- .../Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp | 6 +- .../PowerPC/Jit64/Jit_LoadStorePaired.cpp | 2 +- .../PowerPC/Jit64/Jit_SystemRegisters.cpp | 54 +++++----- Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp | 100 +++++++++--------- Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp | 38 +++---- .../Core/PowerPC/JitCommon/JitAsmCommon.cpp | 18 ++-- .../Core/PowerPC/JitCommon/JitBackpatch.cpp | 2 +- .../Core/Core/PowerPC/JitCommon/JitCache.cpp | 2 +- .../Core/Core/PowerPC/JitCommon/Jit_Util.cpp | 14 +-- Source/Core/Core/PowerPC/JitCommon/Jit_Util.h | 12 ++- Source/Core/Core/PowerPC/PowerPC.h | 25 ++++- 17 files changed, 238 insertions(+), 212 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 102900ebe9..4ec492b1ab 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -210,8 +210,8 @@ void Jit64::WriteCallInterpreter(UGeckoInstruction inst) fpr.Flush(); if (js.isLastInstruction) { - MOV(32, M(&PC), Imm32(js.compilerPC)); - MOV(32, M(&NPC), Imm32(js.compilerPC + 4)); + MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); + MOV(32, PPCSTATE(npc), Imm32(js.compilerPC + 4)); } Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst); ABI_CallFunctionC((void*)instr, inst.hex); @@ -279,7 +279,7 @@ void Jit64::WriteExit(u32 destination) { Cleanup(); - SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); //If nobody has taken care of this yet (this can be removed when all branches are done) JitBlock *b = js.curBlock; @@ -298,7 +298,7 @@ void Jit64::WriteExit(u32 destination) } else { - MOV(32, M(&PC), Imm32(destination)); + MOV(32, PPCSTATE(pc), Imm32(destination)); JMP(asm_routines.dispatcher, true); } @@ -307,39 +307,39 @@ void Jit64::WriteExit(u32 destination) void Jit64::WriteExitDestInEAX() { - MOV(32, M(&PC), R(EAX)); + MOV(32, PPCSTATE(pc), R(EAX)); Cleanup(); - SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } void Jit64::WriteRfiExitDestInEAX() { - MOV(32, M(&PC), R(EAX)); - MOV(32, M(&NPC), R(EAX)); + MOV(32, PPCSTATE(pc), R(EAX)); + MOV(32, PPCSTATE(npc), R(EAX)); Cleanup(); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions)); - SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } void Jit64::WriteExceptionExit() { Cleanup(); - MOV(32, R(EAX), M(&PC)); - MOV(32, M(&NPC), R(EAX)); + MOV(32, R(EAX), PPCSTATE(pc)); + MOV(32, PPCSTATE(npc), R(EAX)); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions)); - SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } void Jit64::WriteExternalExceptionExit() { Cleanup(); - MOV(32, R(EAX), M(&PC)); - MOV(32, M(&NPC), R(EAX)); + MOV(32, R(EAX), PPCSTATE(pc)); + MOV(32, PPCSTATE(npc), R(EAX)); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExternalExceptions)); - SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } @@ -426,7 +426,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc // Downcount flag check. The last block decremented downcounter, and the flag should still be available. FixupBranch skip = J_CC(CC_NBE); - MOV(32, M(&PC), Imm32(js.blockStart)); + MOV(32, PPCSTATE(pc), Imm32(js.blockStart)); JMP(asm_routines.doTiming, true); // downcount hit zero - go doTiming. SetJumpTarget(skip); @@ -452,7 +452,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc } #if defined(_DEBUG) || defined(DEBUGFAST) || defined(NAN_CHECK) // should help logged stack-traces become more accurate - MOV(32, M(&PC), Imm32(js.blockStart)); + MOV(32, PPCSTATE(pc), Imm32(js.blockStart)); #endif // Start up the register allocators @@ -501,7 +501,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32) { js.fifoBytesThisBlock -= 32; - MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write + MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write u32 registersInUse = CallerSavedRegistersInUse(); ABI_PushRegistersAndAdjustStack(registersInUse, false); ABI_CallFunction((void *)&GPFifo::CheckGatherPipe); @@ -520,7 +520,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc HLEFunction(function); if (type == HLE::HLE_HOOK_REPLACE) { - MOV(32, R(EAX), M(&NPC)); + MOV(32, R(EAX), PPCSTATE(npc)); js.downcountAmount += js.st.numCycles; WriteExitDestInEAX(); break; @@ -537,13 +537,13 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc fpr.Flush(); //This instruction uses FPU - needs to add FP exception bailout - TEST(32, M(&PowerPC::ppcState.msr), Imm32(1 << 13)); // Test FP enabled bit + TEST(32, PPCSTATE(msr), Imm32(1 << 13)); // Test FP enabled bit FixupBranch b1 = J_CC(CC_NZ, true); // If a FPU exception occurs, the exception handler will read // from PC. Update PC with the latest value in case that happens. - MOV(32, M(&PC), Imm32(ops[i].address)); - OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); + MOV(32, PPCSTATE(pc), Imm32(ops[i].address)); + OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); WriteExceptionExit(); SetJumpTarget(b1); @@ -557,16 +557,16 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc gpr.Flush(); fpr.Flush(); - TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_ISI | EXCEPTION_PROGRAM | EXCEPTION_SYSCALL | EXCEPTION_FPU_UNAVAILABLE | EXCEPTION_DSI | EXCEPTION_ALIGNMENT)); + TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_ISI | EXCEPTION_PROGRAM | EXCEPTION_SYSCALL | EXCEPTION_FPU_UNAVAILABLE | EXCEPTION_DSI | EXCEPTION_ALIGNMENT)); FixupBranch clearInt = J_CC(CC_NZ, true); - TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_EXTERNAL_INT)); + TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT)); FixupBranch noExtException = J_CC(CC_Z, true); - TEST(32, M((void *)&PowerPC::ppcState.msr), Imm32(0x0008000)); + TEST(32, PPCSTATE(msr), Imm32(0x0008000)); FixupBranch noExtIntEnable = J_CC(CC_Z, true); TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH)); FixupBranch noCPInt = J_CC(CC_Z, true); - MOV(32, M(&PC), Imm32(ops[i].address)); + MOV(32, PPCSTATE(pc), Imm32(ops[i].address)); WriteExternalExceptionExit(); SetJumpTarget(noCPInt); @@ -580,7 +580,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc gpr.Flush(); fpr.Flush(); - MOV(32, M(&PC), Imm32(ops[i].address)); + MOV(32, PPCSTATE(pc), Imm32(ops[i].address)); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckBreakPoints)); TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); FixupBranch noBreakpoint = J_CC(CC_Z); @@ -597,12 +597,12 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc gpr.Flush(); fpr.Flush(); - TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_DSI)); + TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI)); FixupBranch noMemException = J_CC(CC_Z, true); // If a memory exception occurs, the exception handler will read // from PC. Update PC with the latest value in case that happens. - MOV(32, M(&PC), Imm32(ops[i].address)); + MOV(32, PPCSTATE(pc), Imm32(ops[i].address)); WriteExceptionExit(); SetJumpTarget(noMemException); } @@ -645,9 +645,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc if (code_block.m_memory_exception) { // Address of instruction could not be translated - MOV(32, M(&NPC), Imm32(js.compilerPC)); + MOV(32, PPCSTATE(npc), Imm32(js.compilerPC)); - OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_ISI)); + OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_ISI)); // Remove the invalid instruction from the icache, forcing a recompile MOV(64, R(RAX), ImmPtr(jit->GetBlockCache()->GetICachePtr(js.compilerPC))); diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index 2682ea80f2..b4c3bb9bc5 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -29,6 +29,7 @@ void Jit64AsmRoutineManager::Generate() // Two statically allocated registers. MOV(64, R(RBX), Imm64((u64)Memory::base)); MOV(64, R(R15), Imm64((u64)jit->GetBlockCache()->GetCodePointers())); //It's below 2GB so 32 bits are good enough + MOV(64, R(RBP), Imm64((u64)&PowerPC::ppcState + 0x80)); const u8* outerLoop = GetCodePtr(); ABI_CallFunction(reinterpret_cast(&CoreTiming::Advance)); @@ -55,7 +56,7 @@ void Jit64AsmRoutineManager::Generate() SetJumpTarget(skipToRealDispatch); dispatcherNoCheck = GetCodePtr(); - MOV(32, R(EAX), M(&PowerPC::ppcState.pc)); + MOV(32, R(EAX), PPCSTATE(pc)); dispatcherPcInEAX = GetCodePtr(); u32 mask = 0; @@ -113,7 +114,7 @@ void Jit64AsmRoutineManager::Generate() SetJumpTarget(notfound); //Ok, no block, let's jit - MOV(32, R(ABI_PARAM1), M(&PowerPC::ppcState.pc)); + MOV(32, R(ABI_PARAM1), PPCSTATE(pc)); CALL((void *)&Jit); JMP(dispatcherNoCheck); // no point in special casing this @@ -122,10 +123,10 @@ void Jit64AsmRoutineManager::Generate() doTiming = GetCodePtr(); // Test external exceptions. - TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_EXTERNAL_INT | EXCEPTION_PERFORMANCE_MONITOR | EXCEPTION_DECREMENTER)); + TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT | EXCEPTION_PERFORMANCE_MONITOR | EXCEPTION_DECREMENTER)); FixupBranch noExtException = J_CC(CC_Z); - MOV(32, R(EAX), M(&PC)); - MOV(32, M(&NPC), R(EAX)); + MOV(32, R(EAX), PPCSTATE(pc)); + MOV(32, PPCSTATE(npc), R(EAX)); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExternalExceptions)); SetJumpTarget(noExtException); diff --git a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp index e2e0ed6a6c..8a329eb723 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp @@ -202,7 +202,7 @@ const int* GPRRegCache::GetAllocationOrder(size_t& count) #ifdef _WIN32 RSI, RDI, R13, R14, R8, R9, R10, R11, R12, //, RCX #else - RBP, R13, R14, R8, R9, R10, R11, R12, //, RCX + R13, R14, R8, R9, R10, R11, R12, //, RCX #endif }; count = sizeof(allocationOrder) / sizeof(const int); @@ -221,12 +221,12 @@ const int* FPURegCache::GetAllocationOrder(size_t& count) OpArg GPRRegCache::GetDefaultLocation(size_t reg) const { - return M(&ppcState.gpr[reg]); + return PPCSTATE(gpr[reg]); } OpArg FPURegCache::GetDefaultLocation(size_t reg) const { - return M(&ppcState.ps[reg][0]); + return PPCSTATE(ps[reg][0]); } void RegCache::KillImmediate(size_t preg, bool doLoad, bool makeDirty) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp index 8bef37cb51..65b2cad7e8 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp @@ -28,9 +28,9 @@ void Jit64::sc(UGeckoInstruction inst) gpr.Flush(); fpr.Flush(); - MOV(32, M(&PC), Imm32(js.compilerPC + 4)); + MOV(32, PPCSTATE(pc), Imm32(js.compilerPC + 4)); LOCK(); - OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_SYSCALL)); + OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_SYSCALL)); WriteExceptionExit(); } @@ -45,12 +45,12 @@ void Jit64::rfi(UGeckoInstruction inst) const u32 mask = 0x87C0FFFF; const u32 clearMSR13 = 0xFFFBFFFF; // Mask used to clear the bit MSR[13] // MSR = ((MSR & ~mask) | (SRR1 & mask)) & clearMSR13; - AND(32, M(&MSR), Imm32((~mask) & clearMSR13)); - MOV(32, R(EAX), M(&SRR1)); + AND(32, PPCSTATE(msr), Imm32((~mask) & clearMSR13)); + MOV(32, R(EAX), PPCSTATE_SRR1); AND(32, R(EAX), Imm32(mask & clearMSR13)); - OR(32, M(&MSR), R(EAX)); + OR(32, PPCSTATE(msr), R(EAX)); // NPC = SRR0; - MOV(32, R(EAX), M(&SRR0)); + MOV(32, R(EAX), PPCSTATE_SRR0); WriteRfiExitDestInEAX(); } @@ -62,7 +62,7 @@ void Jit64::bx(UGeckoInstruction inst) // We must always process the following sentence // even if the blocks are merged by PPCAnalyst::Flatten(). if (inst.LK) - MOV(32, M(&LR), Imm32(js.compilerPC + 4)); + MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // If this is not the last instruction of a block, // we will skip the rest process. @@ -82,7 +82,7 @@ void Jit64::bx(UGeckoInstruction inst) destination = js.compilerPC + SignExt26(inst.LI << 2); #ifdef ACID_TEST if (inst.LK) - AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xFF000000))); + AND(32, PPCSTATE(cr), Imm32(~(0xFF000000))); #endif if (destination == js.compilerPC) { @@ -108,7 +108,7 @@ void Jit64::bcx(UGeckoInstruction inst) FixupBranch pCTRDontBranch; if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR { - SUB(32, M(&CTR), Imm8(1)); + SUB(32, PPCSTATE_CTR, Imm8(1)); if (inst.BO & BO_BRANCH_IF_CTR_0) pCTRDontBranch = J_CC(CC_NZ, true); else @@ -123,7 +123,7 @@ void Jit64::bcx(UGeckoInstruction inst) } if (inst.LK) - MOV(32, M(&LR), Imm32(js.compilerPC + 4)); + MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); u32 destination; if (inst.AA) @@ -164,9 +164,9 @@ void Jit64::bcctrx(UGeckoInstruction inst) gpr.Flush(); fpr.Flush(); - MOV(32, R(EAX), M(&CTR)); + MOV(32, R(EAX), PPCSTATE_CTR); if (inst.LK_3) - MOV(32, M(&LR), Imm32(js.compilerPC + 4)); // LR = PC + 4; + MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4; AND(32, R(EAX), Imm32(0xFFFFFFFC)); WriteExitDestInEAX(); } @@ -179,11 +179,11 @@ void Jit64::bcctrx(UGeckoInstruction inst) FixupBranch b = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), !(inst.BO_2 & BO_BRANCH_IF_TRUE)); - MOV(32, R(EAX), M(&CTR)); + MOV(32, R(EAX), PPCSTATE_CTR); AND(32, R(EAX), Imm32(0xFFFFFFFC)); - //MOV(32, M(&PC), R(EAX)); => Already done in WriteExitDestInEAX() + //MOV(32, PPCSTATE(pc), R(EAX)); => Already done in WriteExitDestInEAX() if (inst.LK_3) - MOV(32, M(&LR), Imm32(js.compilerPC + 4)); // LR = PC + 4; + MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4; gpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE); @@ -204,7 +204,7 @@ void Jit64::bclrx(UGeckoInstruction inst) FixupBranch pCTRDontBranch; if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR { - SUB(32, M(&CTR), Imm8(1)); + SUB(32, PPCSTATE_CTR, Imm8(1)); if (inst.BO & BO_BRANCH_IF_CTR_0) pCTRDontBranch = J_CC(CC_NZ, true); else @@ -221,13 +221,13 @@ void Jit64::bclrx(UGeckoInstruction inst) // This below line can be used to prove that blr "eats flags" in practice. // This observation will let us do a lot of fun observations. #ifdef ACID_TEST - AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xFF000000))); + AND(32, PPCSTATE(cr), Imm32(~(0xFF000000))); #endif - MOV(32, R(EAX), M(&LR)); + MOV(32, R(EAX), PPCSTATE_LR); AND(32, R(EAX), Imm32(0xFFFFFFFC)); if (inst.LK) - MOV(32, M(&LR), Imm32(js.compilerPC + 4)); + MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); gpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index 537f02db0d..88a23e8a19 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -248,7 +248,7 @@ void Jit64::fcmpx(UGeckoInstruction inst) fpr.BindToRegister(b, true); if (fprf) - AND(32, M(&FPSCR), Imm32(~FPRF_MASK)); + AND(32, PPCSTATE(fpscr), Imm32(~FPRF_MASK)); // Are we masking sNaN invalid floating point exceptions? If not this could crash if we don't handle the exception? UCOMISD(fpr.R(b).GetSimpleReg(), fpr.R(a)); @@ -273,14 +273,14 @@ void Jit64::fcmpx(UGeckoInstruction inst) MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_EQ))); if (fprf) - OR(32, M(&FPSCR), Imm32(CR_EQ << FPRF_SHIFT)); + OR(32, PPCSTATE(fpscr), Imm32(CR_EQ << FPRF_SHIFT)); continue1 = J(); SetJumpTarget(pNaN); MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_SO))); if (fprf) - OR(32, M(&FPSCR), Imm32(CR_SO << FPRF_SHIFT)); + OR(32, PPCSTATE(fpscr), Imm32(CR_SO << FPRF_SHIFT)); if (a != b) { @@ -289,13 +289,13 @@ void Jit64::fcmpx(UGeckoInstruction inst) SetJumpTarget(pGreater); MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_GT))); if (fprf) - OR(32, M(&FPSCR), Imm32(CR_GT << FPRF_SHIFT)); + OR(32, PPCSTATE(fpscr), Imm32(CR_GT << FPRF_SHIFT)); continue3 = J(); SetJumpTarget(pLesser); MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_LT))); if (fprf) - OR(32, M(&FPSCR), Imm32(CR_LT << FPRF_SHIFT)); + OR(32, PPCSTATE(fpscr), Imm32(CR_LT << FPRF_SHIFT)); } SetJumpTarget(continue1); @@ -305,7 +305,7 @@ void Jit64::fcmpx(UGeckoInstruction inst) SetJumpTarget(continue3); } - MOV(64, M(&PowerPC::ppcState.cr_val[crf]), R(RAX)); + MOV(64, PPCSTATE(cr_val[crf]), R(RAX)); fpr.UnlockAll(); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index a6b60f8f23..82b1e6e5a3 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -21,12 +21,12 @@ void Jit64::GenerateConstantOverflow(bool overflow) if (overflow) { //XER[OV/SO] = 1 - OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); + OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); } else { //XER[OV] = 0 - AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_OV_MASK)); + AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_OV_MASK)); } } @@ -34,11 +34,11 @@ void Jit64::GenerateOverflow() { FixupBranch jno = J_CC(CC_NO); //XER[OV/SO] = 1 - OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); + OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); FixupBranch exit = J(); SetJumpTarget(jno); //XER[OV] = 0 - AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_OV_MASK)); + AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_OV_MASK)); SetJumpTarget(exit); } @@ -54,7 +54,7 @@ void Jit64::FinalizeCarryOverflow(bool oe, bool inv) JitSetCA(); SetJumpTarget(carry1); //XER[OV/SO] = 1 - OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); + OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); FixupBranch exit = J(); SetJumpTarget(jno); // Do carry @@ -74,7 +74,7 @@ void Jit64::FinalizeCarryOverflow(bool oe, bool inv) void Jit64::GetCarryEAXAndClear() { - MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); + MOV(32, R(EAX), PPCSTATE(spr[SPR_XER])); BTR(32, R(EAX), Imm8(29)); } @@ -109,7 +109,7 @@ void Jit64::FinalizeCarryGenerateOverflowEAX(bool oe, bool inv) SetJumpTarget(carry1); } // Dump EAX back into XER - MOV(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX)); + MOV(32, PPCSTATE(spr[SPR_XER]), R(EAX)); } // Assumes that the flags were just set through an addition. @@ -117,10 +117,10 @@ void Jit64::GenerateCarry() { // USES_XER FixupBranch pNoCarry = J_CC(CC_NC); - OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); + OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_CA_MASK)); FixupBranch pContinue = J(); SetJumpTarget(pNoCarry); - AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~(XER_CA_MASK))); + AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~(XER_CA_MASK))); SetJumpTarget(pContinue); } @@ -128,12 +128,12 @@ void Jit64::ComputeRC(const Gen::OpArg & arg) { if (arg.IsImm()) { - MOV(64, M(&PowerPC::ppcState.cr_val[0]), Imm32((s32)arg.offset)); + MOV(64, PPCSTATE(cr_val[0]), Imm32((s32)arg.offset)); } else { MOVSX(64, 32, RAX, arg); - MOV(64, M(&PowerPC::ppcState.cr_val[0]), R(RAX)); + MOV(64, PPCSTATE(cr_val[0]), R(RAX)); } } @@ -375,7 +375,7 @@ void Jit64::cmpXX(UGeckoInstruction inst) compareResult = CR_LT; } MOV(64, R(RAX), Imm64(PPCCRToInternal(compareResult))); - MOV(64, M(&PowerPC::ppcState.cr_val[crf]), R(RAX)); + MOV(64, PPCSTATE(cr_val[crf]), R(RAX)); gpr.UnlockAll(); if (merge_branch) @@ -393,7 +393,7 @@ void Jit64::cmpXX(UGeckoInstruction inst) if (js.next_inst.OPCD == 16) // bcx { if (js.next_inst.LK) - MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); + MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); u32 destination; if (js.next_inst.AA) @@ -405,16 +405,16 @@ void Jit64::cmpXX(UGeckoInstruction inst) else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx { if (js.next_inst.LK) - MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); - MOV(32, R(EAX), M(&CTR)); + MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); + MOV(32, R(EAX), PPCSTATE_CTR); AND(32, R(EAX), Imm32(0xFFFFFFFC)); WriteExitDestInEAX(); } else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx { - MOV(32, R(EAX), M(&LR)); + MOV(32, R(EAX), PPCSTATE_LR); if (js.next_inst.LK) - MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); + MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); WriteExitDestInEAX(); } else @@ -461,7 +461,7 @@ void Jit64::cmpXX(UGeckoInstruction inst) comparand = R(ABI_PARAM1); } SUB(64, R(RAX), comparand); - MOV(64, M(&PowerPC::ppcState.cr_val[crf]), R(RAX)); + MOV(64, PPCSTATE(cr_val[crf]), R(RAX)); if (merge_branch) { @@ -492,7 +492,7 @@ void Jit64::cmpXX(UGeckoInstruction inst) if (js.next_inst.OPCD == 16) // bcx { if (js.next_inst.LK) - MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); + MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); u32 destination; if (js.next_inst.AA) @@ -504,19 +504,19 @@ void Jit64::cmpXX(UGeckoInstruction inst) else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx { if (js.next_inst.LK) - MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); + MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); - MOV(32, R(EAX), M(&CTR)); + MOV(32, R(EAX), PPCSTATE_CTR); AND(32, R(EAX), Imm32(0xFFFFFFFC)); WriteExitDestInEAX(); } else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx { - MOV(32, R(EAX), M(&LR)); + MOV(32, R(EAX), PPCSTATE_LR); AND(32, R(EAX), Imm32(0xFFFFFFFC)); if (js.next_inst.LK) - MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); + MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); WriteExitDestInEAX(); } @@ -2020,7 +2020,7 @@ void Jit64::twx(UGeckoInstruction inst) SetJumpTarget(fixup); } LOCK(); - OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_PROGRAM)); + OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_PROGRAM)); gpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index f085284ed8..5042018cc9 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -123,7 +123,7 @@ void Jit64::lXXx(UGeckoInstruction inst) ABI_PopRegistersAndAdjustStack(registersInUse, false); // ! we must continue executing of the loop after exception handling, maybe there is still 0 in r0 - //MOV(32, M(&PowerPC::ppcState.pc), Imm32(js.compilerPC)); + //MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); WriteExceptionExit(); SetJumpTarget(noIdle); @@ -331,7 +331,7 @@ void Jit64::stX(UGeckoInstruction inst) if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe) { // Helps external systems know which instruction triggered the write - MOV(32, M(&PC), Imm32(jit->js.compilerPC)); + MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); gpr.FlushLockX(ABI_PARAM1); MOV(32, R(ABI_PARAM1), gpr.R(s)); @@ -367,7 +367,7 @@ void Jit64::stX(UGeckoInstruction inst) else { // Helps external systems know which instruction triggered the write - MOV(32, M(&PC), Imm32(jit->js.compilerPC)); + MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); u32 registersInUse = CallerSavedRegistersInUse(); ABI_PushRegistersAndAdjustStack(registersInUse, false); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 1129d5e833..894d96789d 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -43,7 +43,7 @@ void Jit64::psq_st(UGeckoInstruction inst) // UU[SCALE]UUUUU[TYPE] where SCALE is 6 bits and TYPE is 3 bits, so we have to AND with // 0b0011111100000111, or 0x3F07. MOV(32, R(EAX), Imm32(0x3F07)); - AND(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_GQR0 + inst.I])); + AND(32, R(EAX), PPCSTATE(spr[SPR_GQR0 + inst.I])); MOVZX(32, 8, EDX, R(AL)); // FIXME: Fix ModR/M encoding to allow [EDX*4+disp32] without a base register! diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index 9c00e70be9..e430144aa8 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -16,22 +16,22 @@ void Jit64::GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate) switch (bit) { case CR_SO_BIT: // check bit 61 set - BT(64, M(&PowerPC::ppcState.cr_val[field]), Imm8(61)); + BT(64, PPCSTATE(cr_val[field]), Imm8(61)); SETcc(negate ? CC_NC : CC_C, R(out)); break; case CR_EQ_BIT: // check bits 31-0 == 0 - CMP(32, M(&PowerPC::ppcState.cr_val[field]), Imm8(0)); + CMP(32, PPCSTATE(cr_val[field]), Imm8(0)); SETcc(negate ? CC_NZ : CC_Z, R(out)); break; case CR_GT_BIT: // check val > 0 - CMP(64, M(&PowerPC::ppcState.cr_val[field]), Imm8(0)); + CMP(64, PPCSTATE(cr_val[field]), Imm8(0)); SETcc(negate ? CC_NG : CC_G, R(out)); break; case CR_LT_BIT: // check bit 62 set - BT(64, M(&PowerPC::ppcState.cr_val[field]), Imm8(62)); + BT(64, PPCSTATE(cr_val[field]), Imm8(62)); SETcc(negate ? CC_NC : CC_C, R(out)); break; @@ -42,7 +42,7 @@ void Jit64::GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate) void Jit64::SetCRFieldBit(int field, int bit, Gen::X64Reg in) { - MOV(64, R(ABI_PARAM1), M(&PowerPC::ppcState.cr_val[field])); + MOV(64, R(ABI_PARAM1), PPCSTATE(cr_val[field])); MOVZX(32, 8, in, R(in)); switch (bit) @@ -75,7 +75,7 @@ void Jit64::SetCRFieldBit(int field, int bit, Gen::X64Reg in) } BTS(64, R(ABI_PARAM1), Imm8(32)); - MOV(64, M(&PowerPC::ppcState.cr_val[field]), R(ABI_PARAM1)); + MOV(64, PPCSTATE(cr_val[field]), R(ABI_PARAM1)); } FixupBranch Jit64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) @@ -83,19 +83,19 @@ FixupBranch Jit64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) switch (bit) { case CR_SO_BIT: // check bit 61 set - BT(64, M(&PowerPC::ppcState.cr_val[field]), Imm8(61)); + BT(64, PPCSTATE(cr_val[field]), Imm8(61)); return J_CC(jump_if_set ? CC_C : CC_NC, true); case CR_EQ_BIT: // check bits 31-0 == 0 - CMP(32, M(&PowerPC::ppcState.cr_val[field]), Imm8(0)); + CMP(32, PPCSTATE(cr_val[field]), Imm8(0)); return J_CC(jump_if_set ? CC_Z : CC_NZ, true); case CR_GT_BIT: // check val > 0 - CMP(64, M(&PowerPC::ppcState.cr_val[field]), Imm8(0)); + CMP(64, PPCSTATE(cr_val[field]), Imm8(0)); return J_CC(jump_if_set ? CC_G : CC_LE, true); case CR_LT_BIT: // check bit 62 set - BT(64, M(&PowerPC::ppcState.cr_val[field]), Imm8(62)); + BT(64, PPCSTATE(cr_val[field]), Imm8(62)); return J_CC(jump_if_set ? CC_C : CC_NC, true); default: @@ -154,7 +154,7 @@ void Jit64::mtspr(UGeckoInstruction inst) gpr.Lock(d); gpr.BindToRegister(d, true, false); } - MOV(32, M(&PowerPC::ppcState.spr[iIndex]), gpr.R(d)); + MOV(32, PPCSTATE(spr[iIndex]), gpr.R(d)); gpr.UnlockAll(); } @@ -190,7 +190,7 @@ void Jit64::mfspr(UGeckoInstruction inst) LEA(64, RAX, MComplex(RAX, RDX, SCALE_1, offset)); else ADD(64, R(RAX), R(RDX)); - MOV(64, M(&TL), R(RAX)); + MOV(64, PPCSTATE(spr[SPR_TL]), R(RAX)); // Two calls of TU/TL next to each other are extremely common in typical usage, so merge them // if we can. @@ -234,7 +234,7 @@ void Jit64::mfspr(UGeckoInstruction inst) default: gpr.Lock(d); gpr.BindToRegister(d, false); - MOV(32, gpr.R(d), M(&PowerPC::ppcState.spr[iIndex])); + MOV(32, gpr.R(d), PPCSTATE(spr[iIndex])); break; } gpr.UnlockAll(); @@ -251,7 +251,7 @@ void Jit64::mtmsr(UGeckoInstruction inst) gpr.Lock(inst.RS); gpr.BindToRegister(inst.RS, true, false); } - MOV(32, M(&MSR), gpr.R(inst.RS)); + MOV(32, PPCSTATE(msr), gpr.R(inst.RS)); gpr.UnlockAll(); gpr.Flush(); fpr.Flush(); @@ -259,17 +259,17 @@ void Jit64::mtmsr(UGeckoInstruction inst) // If some exceptions are pending and EE are now enabled, force checking // external exceptions when going out of mtmsr in order to execute delayed // interrupts as soon as possible. - TEST(32, M(&MSR), Imm32(0x8000)); + TEST(32, PPCSTATE(msr), Imm32(0x8000)); FixupBranch eeDisabled = J_CC(CC_Z); - TEST(32, M((void*)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_EXTERNAL_INT | EXCEPTION_PERFORMANCE_MONITOR | EXCEPTION_DECREMENTER)); + TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT | EXCEPTION_PERFORMANCE_MONITOR | EXCEPTION_DECREMENTER)); FixupBranch noExceptionsPending = J_CC(CC_Z); // Check if a CP interrupt is waiting and keep the GPU emulation in sync (issue 4336) TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP)); FixupBranch cpInt = J_CC(CC_NZ); - MOV(32, M(&PC), Imm32(js.compilerPC + 4)); + MOV(32, PPCSTATE(pc), Imm32(js.compilerPC + 4)); WriteExternalExceptionExit(); SetJumpTarget(cpInt); @@ -288,7 +288,7 @@ void Jit64::mfmsr(UGeckoInstruction inst) //Privileged? gpr.Lock(inst.RD); gpr.BindToRegister(inst.RD, false, true); - MOV(32, gpr.R(inst.RD), M(&MSR)); + MOV(32, gpr.R(inst.RD), PPCSTATE(msr)); gpr.UnlockAll(); } @@ -318,7 +318,7 @@ void Jit64::mfcr(UGeckoInstruction inst) if (i != 0) SHL(32, gpr.R(d), Imm8(4)); - MOV(64, R(cr_val), M(&PowerPC::ppcState.cr_val[i])); + MOV(64, R(cr_val), PPCSTATE(cr_val[i])); // EQ: Bits 31-0 == 0; set flag bit 1 TEST(32, R(cr_val), R(cr_val)); @@ -360,12 +360,12 @@ void Jit64::mtcrf(UGeckoInstruction inst) u64 newcrval = PPCCRToInternal(newcr); if ((s64)newcrval == (s32)newcrval) { - MOV(64, M(&PowerPC::ppcState.cr_val[i]), Imm32((s32)newcrval)); + MOV(64, PPCSTATE(cr_val[i]), Imm32((s32)newcrval)); } else { MOV(64, R(RAX), Imm64(newcrval)); - MOV(64, M(&PowerPC::ppcState.cr_val[i]), R(RAX)); + MOV(64, PPCSTATE(cr_val[i]), R(RAX)); } } } @@ -384,7 +384,7 @@ void Jit64::mtcrf(UGeckoInstruction inst) if (i != 0) AND(32, R(EAX), Imm8(0xF)); MOV(64, R(EAX), MScaled(EAX, SCALE_8, (u32)(u64)m_crTable)); - MOV(64, M(&PowerPC::ppcState.cr_val[i]), R(EAX)); + MOV(64, PPCSTATE(cr_val[i]), R(EAX)); } } gpr.UnlockAll(); @@ -400,8 +400,8 @@ void Jit64::mcrf(UGeckoInstruction inst) // USES_CR if (inst.CRFS != inst.CRFD) { - MOV(64, R(EAX), M(&PowerPC::ppcState.cr_val[inst.CRFS])); - MOV(64, M(&PowerPC::ppcState.cr_val[inst.CRFD]), R(EAX)); + MOV(64, R(EAX), PPCSTATE(cr_val[inst.CRFS])); + MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(EAX)); } } @@ -413,14 +413,14 @@ void Jit64::mcrxr(UGeckoInstruction inst) // USES_CR // Copy XER[0-3] into CR[inst.CRFD] - MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); + MOV(32, R(EAX), PPCSTATE(spr[SPR_XER])); SHR(32, R(EAX), Imm8(28)); MOV(64, R(EAX), MScaled(EAX, SCALE_8, (u32)(u64)m_crTable)); - MOV(64, M(&PowerPC::ppcState.cr_val[inst.CRFD]), R(EAX)); + MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(EAX)); // Clear XER[0-3] - AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(0x0FFFFFFF)); + AND(32, PPCSTATE(spr[SPR_XER]), Imm32(0x0FFFFFFF)); } void Jit64::crXXX(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp index 6798f390cc..8e9a2e5107 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp @@ -163,7 +163,7 @@ static void fregSpill(RegInfo& RI, X64Reg reg) #ifdef _WIN32 static const X64Reg RegAllocOrder[] = {RSI, RDI, R12, R13, R14, R8, R9, R10, R11}; #else -static const X64Reg RegAllocOrder[] = {RBP, R12, R13, R14, R8, R9, R10, R11}; +static const X64Reg RegAllocOrder[] = {R12, R13, R14, R8, R9, R10, R11}; #endif static const int RegAllocSize = sizeof(RegAllocOrder) / sizeof(X64Reg); static const X64Reg FRegAllocOrder[] = {XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, XMM2, XMM3, XMM4, XMM5}; @@ -949,8 +949,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) // interpreter call at the moment, but optimizing interpreter // calls isn't completely out of the question... regSpillCallerSaved(RI); - Jit->MOV(32, M(&PC), Imm32(InstLoc)); - Jit->MOV(32, M(&NPC), Imm32(InstLoc+4)); + Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc)); + Jit->MOV(32, PPCSTATE(npc), Imm32(InstLoc+4)); Jit->ABI_CallFunctionC((void*)GetInterpreterOp(InstCode), InstCode); break; @@ -962,7 +962,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg reg = regFindFreeReg(RI); unsigned ppcreg = *I >> 8; - Jit->MOV(32, R(reg), M(&PowerPC::ppcState.gpr[ppcreg])); + Jit->MOV(32, R(reg), PPCSTATE(gpr[ppcreg])); RI.regs[reg] = I; break; } @@ -973,7 +973,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg reg = regFindFreeReg(RI); unsigned ppcreg = *I >> 8; - Jit->MOV(64, R(reg), M(&PowerPC::ppcState.cr_val[ppcreg])); + Jit->MOV(64, R(reg), PPCSTATE(cr_val[ppcreg])); RI.regs[reg] = I; break; } @@ -983,7 +983,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = regFindFreeReg(RI); - Jit->MOV(32, R(reg), M(&CTR)); + Jit->MOV(32, R(reg), PPCSTATE_CTR); RI.regs[reg] = I; break; } @@ -993,7 +993,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = regFindFreeReg(RI); - Jit->MOV(32, R(reg), M(&LR)); + Jit->MOV(32, R(reg), PPCSTATE_LR); RI.regs[reg] = I; break; } @@ -1003,7 +1003,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = regFindFreeReg(RI); - Jit->MOV(32, R(reg), M(&MSR)); + Jit->MOV(32, R(reg), PPCSTATE(msr)); RI.regs[reg] = I; break; } @@ -1014,7 +1014,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg reg = regFindFreeReg(RI); unsigned gqr = *I >> 8; - Jit->MOV(32, R(reg), M(&GQR(gqr))); + Jit->MOV(32, R(reg), PPCSTATE(spr[SPR_GQR0 + gqr])); RI.regs[reg] = I; break; } @@ -1024,7 +1024,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = regFindFreeReg(RI); - Jit->MOV(32, R(reg), M(&PowerPC::ppcState.spr[SPR_XER])); + Jit->MOV(32, R(reg), PPCSTATE(spr[SPR_XER])); Jit->SHR(32, R(reg), Imm8(29)); Jit->AND(32, R(reg), Imm8(1)); RI.regs[reg] = I; @@ -1042,7 +1042,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { X64Reg reg = regEnsureInReg(RI, getOp1(I)); unsigned ppcreg = *I >> 16; - Jit->MOV(64, M(&PowerPC::ppcState.cr_val[ppcreg]), R(reg)); + Jit->MOV(64, PPCSTATE(cr_val[ppcreg]), R(reg)); regNormalRegClear(RI, I); break; } @@ -1067,15 +1067,15 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) // If some exceptions are pending and EE are now enabled, force checking // external exceptions when going out of mtmsr in order to execute delayed // interrupts as soon as possible. - Jit->MOV(32, R(EAX), M(&MSR)); + Jit->MOV(32, R(EAX), PPCSTATE(msr)); Jit->TEST(32, R(EAX), Imm32(0x8000)); FixupBranch eeDisabled = Jit->J_CC(CC_Z); - Jit->MOV(32, R(EAX), M((void*)&PowerPC::ppcState.Exceptions)); + Jit->MOV(32, R(EAX), PPCSTATE(Exceptions)); Jit->TEST(32, R(EAX), R(EAX)); FixupBranch noExceptionsPending = Jit->J_CC(CC_Z); - Jit->MOV(32, M(&PC), Imm32(InstLoc + 4)); + Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc + 4)); Jit->WriteExceptionExit(); // TODO: Implement WriteExternalExceptionExit for JitIL Jit->SetJumpTarget(eeDisabled); @@ -1114,8 +1114,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); Jit->AND(32, R(ECX), Imm8(0x1F)); Jit->SHL(32, R(ECX), Imm8(12)); - Jit->AND(32, M(&FPSCR), Imm32(~(0x1F << 12))); - Jit->OR(32, M(&FPSCR), R(ECX)); + Jit->AND(32, PPCSTATE(fpscr), Imm32(~(0x1F << 12))); + Jit->OR(32, PPCSTATE(fpscr), R(ECX)); regNormalRegClear(RI, I); break; } @@ -1641,7 +1641,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) regSpill(RI, EDX); u32 quantreg = *I >> 24; Jit->MOV(32, R(EAX), Imm32(0x3F07)); - Jit->AND(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_GQR0 + quantreg])); + Jit->AND(32, R(EAX), PPCSTATE(spr[SPR_GQR0 + quantreg])); Jit->MOVZX(32, 8, EDX, R(AL)); Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I))); @@ -1778,7 +1778,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg reg = fregFindFreeReg(RI); unsigned ppcreg = *I >> 8; - Jit->MOVAPD(reg, M(&PowerPC::ppcState.ps[ppcreg])); + Jit->MOVAPD(reg, PPCSTATE(ps[ppcreg])); RI.fregs[reg] = I; break; } @@ -1797,14 +1797,14 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) Jit->AND(32, M(p+4), Imm32(0x80000000)); Jit->MOV(32, M(p), Imm32(0)); Jit->SetJumpTarget(ok); - Jit->MOVAPD(reg, M(&PowerPC::ppcState.ps[ppcreg])); + Jit->MOVAPD(reg, PPCSTATE(ps[ppcreg])); RI.fregs[reg] = I; break; } case StoreFReg: { unsigned ppcreg = *I >> 16; - Jit->MOVAPD(M(&PowerPC::ppcState.ps[ppcreg]), + Jit->MOVAPD(PPCSTATE(ps[ppcreg]), fregEnsureInReg(RI, getOp1(I))); fregNormalRegClear(RI, I); break; @@ -1913,15 +1913,15 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) Jit->ABI_CallFunction((void*)checkIsSNAN); Jit->TEST(8, R(EAX), R(EAX)); FixupBranch ok = Jit->J_CC(CC_Z); - Jit->OR(32, M(&FPSCR), Imm32(FPSCR_FX)); // FPSCR.FX = 1; - Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXSNAN)); // FPSCR.Hex |= mask; - Jit->TEST(32, M(&FPSCR), Imm32(FPSCR_VE)); + Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); // FPSCR.FX = 1; + Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXSNAN)); // FPSCR.Hex |= mask; + Jit->TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_VE)); FixupBranch finish0 = Jit->J_CC(CC_NZ); - Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask; + Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask; FixupBranch finish1 = Jit->J(); Jit->SetJumpTarget(ok); - Jit->OR(32, M(&FPSCR), Imm32(FPSCR_FX)); // FPSCR.FX = 1; - Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask; + Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); // FPSCR.FX = 1; + Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask; Jit->SetJumpTarget(finish0); Jit->SetJumpTarget(finish1); } @@ -1942,8 +1942,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) Jit->ABI_CallFunction((void*)checkIsSNAN); Jit->TEST(8, R(EAX), R(EAX)); FixupBranch finish = Jit->J_CC(CC_Z); - Jit->OR(32, M(&FPSCR), Imm32(FPSCR_FX)); // FPSCR.FX = 1; - Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask; + Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); // FPSCR.FX = 1; + Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask; Jit->SetJumpTarget(finish); } @@ -2094,7 +2094,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) RI.Jit->Cleanup(); // is it needed? Jit->ABI_CallFunction((void *)&PowerPC::OnIdleIL); - Jit->MOV(32, M(&PC), Imm32(ibuild->GetImmValue( getOp2(I) ))); + Jit->MOV(32, PPCSTATE(pc), Imm32(ibuild->GetImmValue( getOp2(I) ))); Jit->WriteExceptionExit(); Jit->SetJumpTarget(cont); @@ -2179,7 +2179,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); Jit->ABI_CallFunction((void *)&CoreTiming::Idle); - Jit->MOV(32, M(&PC), Imm32(InstLoc)); + Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc)); Jit->WriteExceptionExit(); break; } @@ -2187,14 +2187,14 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); Jit->LOCK(); - Jit->OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_SYSCALL)); - Jit->MOV(32, M(&PC), Imm32(InstLoc + 4)); + Jit->OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_SYSCALL)); + Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc + 4)); Jit->WriteExceptionExit(); break; } case InterpreterBranch: { - Jit->MOV(32, R(EAX), M(&NPC)); + Jit->MOV(32, R(EAX), PPCSTATE(npc)); Jit->WriteExitDestInOpArg(R(EAX)); break; } @@ -2203,16 +2203,16 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) // See Interpreter rfi for details const u32 mask = 0x87C0FFFF; // MSR = (MSR & ~mask) | (SRR1 & mask); - Jit->MOV(32, R(EAX), M(&MSR)); - Jit->MOV(32, R(ECX), M(&SRR1)); + Jit->MOV(32, R(EAX), PPCSTATE(msr)); + Jit->MOV(32, R(ECX), PPCSTATE_SRR1); Jit->AND(32, R(EAX), Imm32(~mask)); Jit->AND(32, R(ECX), Imm32(mask)); Jit->OR(32, R(EAX), R(ECX)); // MSR &= 0xFFFBFFFF; // Mask used to clear the bit MSR[13] Jit->AND(32, R(EAX), Imm32(0xFFFBFFFF)); - Jit->MOV(32, M(&MSR), R(EAX)); + Jit->MOV(32, PPCSTATE(msr), R(EAX)); // NPC = SRR0; - Jit->MOV(32, R(EAX), M(&SRR0)); + Jit->MOV(32, R(EAX), PPCSTATE_SRR0); Jit->WriteRfiExitDestInOpArg(R(EAX)); break; } @@ -2220,14 +2220,14 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); //This instruction uses FPU - needs to add FP exception bailout - Jit->TEST(32, M(&PowerPC::ppcState.msr), Imm32(1 << 13)); // Test FP enabled bit + Jit->TEST(32, PPCSTATE(msr), Imm32(1 << 13)); // Test FP enabled bit FixupBranch b1 = Jit->J_CC(CC_NZ); // If a FPU exception occurs, the exception handler will read // from PC. Update PC with the latest value in case that happens. - Jit->MOV(32, M(&PC), Imm32(InstLoc)); - Jit->SUB(32, M(&PowerPC::ppcState.downcount), Imm32(Jit->js.downcountAmount)); - Jit->OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); + Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc)); + Jit->SUB(32, PPCSTATE(downcount), Imm32(Jit->js.downcountAmount)); + Jit->OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); Jit->WriteExceptionExit(); Jit->SetJumpTarget(b1); break; @@ -2235,12 +2235,12 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) case DSIExceptionCheck: { unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); - Jit->TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_DSI)); + Jit->TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI)); FixupBranch noMemException = Jit->J_CC(CC_Z); // If a memory exception occurs, the exception handler will read // from PC. Update PC with the latest value in case that happens. - Jit->MOV(32, M(&PC), Imm32(InstLoc)); + Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc)); Jit->WriteExceptionExit(); Jit->SetJumpTarget(noMemException); break; @@ -2250,8 +2250,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); // Address of instruction could not be translated - Jit->MOV(32, M(&NPC), Imm32(InstLoc)); - Jit->OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_ISI)); + Jit->MOV(32, PPCSTATE(npc), Imm32(InstLoc)); + Jit->OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_ISI)); // Remove the invalid instruction from the icache, forcing a recompile Jit->MOV(64, R(RAX), ImmPtr(jit->GetBlockCache()->GetICachePtr(InstLoc))); @@ -2263,16 +2263,16 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); - Jit->TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_ISI | EXCEPTION_PROGRAM | EXCEPTION_SYSCALL | EXCEPTION_FPU_UNAVAILABLE | EXCEPTION_DSI | EXCEPTION_ALIGNMENT)); + Jit->TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_ISI | EXCEPTION_PROGRAM | EXCEPTION_SYSCALL | EXCEPTION_FPU_UNAVAILABLE | EXCEPTION_DSI | EXCEPTION_ALIGNMENT)); FixupBranch clearInt = Jit->J_CC(CC_NZ); - Jit->TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_EXTERNAL_INT)); + Jit->TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT)); FixupBranch noExtException = Jit->J_CC(CC_Z); - Jit->TEST(32, M((void *)&PowerPC::ppcState.msr), Imm32(0x0008000)); + Jit->TEST(32, PPCSTATE(msr), Imm32(0x0008000)); FixupBranch noExtIntEnable = Jit->J_CC(CC_Z); Jit->TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH)); FixupBranch noCPInt = Jit->J_CC(CC_Z); - Jit->MOV(32, M(&PC), Imm32(InstLoc)); + Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc)); Jit->WriteExceptionExit(); Jit->SetJumpTarget(noCPInt); @@ -2285,7 +2285,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); - Jit->MOV(32, M(&PC), Imm32(InstLoc)); + Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc)); Jit->ABI_CallFunction(reinterpret_cast(&PowerPC::CheckBreakPoints)); Jit->TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); FixupBranch noBreakpoint = Jit->J_CC(CC_Z); diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp index 7b9cd785f2..dcc43c33e1 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp @@ -313,14 +313,14 @@ void JitIL::WriteCallInterpreter(UGeckoInstruction inst) { if (js.isLastInstruction) { - MOV(32, M(&PC), Imm32(js.compilerPC)); - MOV(32, M(&NPC), Imm32(js.compilerPC + 4)); + MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); + MOV(32, PPCSTATE(npc), Imm32(js.compilerPC + 4)); } Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst); ABI_CallFunctionC((void*)instr, inst.hex); if (js.isLastInstruction) { - MOV(32, R(EAX), M(&NPC)); + MOV(32, R(EAX), PPCSTATE(npc)); WriteRfiExitDestInOpArg(R(EAX)); } } @@ -341,7 +341,7 @@ void JitIL::FallBackToInterpreter(UGeckoInstruction _inst) void JitIL::HLEFunction(UGeckoInstruction _inst) { ABI_CallFunctionCC((void*)&HLE::Execute, js.compilerPC, _inst.hex); - MOV(32, R(EAX), M(&NPC)); + MOV(32, R(EAX), PPCSTATE(npc)); WriteExitDestInOpArg(R(EAX)); } @@ -398,7 +398,7 @@ void JitIL::WriteExit(u32 destination) { ABI_CallFunction((void *)JitILProfiler::End); } - SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); //If nobody has taken care of this yet (this can be removed when all branches are done) JitBlock *b = js.curBlock; @@ -417,7 +417,7 @@ void JitIL::WriteExit(u32 destination) } else { - MOV(32, M(&PC), Imm32(destination)); + MOV(32, PPCSTATE(pc), Imm32(destination)); JMP(asm_routines.dispatcher, true); } b->linkData.push_back(linkData); @@ -425,27 +425,27 @@ void JitIL::WriteExit(u32 destination) void JitIL::WriteExitDestInOpArg(const Gen::OpArg& arg) { - MOV(32, M(&PC), arg); + MOV(32, PPCSTATE(pc), arg); Cleanup(); if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) { ABI_CallFunction((void *)JitILProfiler::End); } - SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } void JitIL::WriteRfiExitDestInOpArg(const Gen::OpArg& arg) { - MOV(32, M(&PC), arg); - MOV(32, M(&NPC), arg); + MOV(32, PPCSTATE(pc), arg); + MOV(32, PPCSTATE(npc), arg); Cleanup(); if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) { ABI_CallFunction((void *)JitILProfiler::End); } ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions)); - SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } @@ -456,10 +456,10 @@ void JitIL::WriteExceptionExit() { ABI_CallFunction((void *)JitILProfiler::End); } - MOV(32, R(EAX), M(&PC)); - MOV(32, M(&NPC), R(EAX)); + MOV(32, R(EAX), PPCSTATE(pc)); + MOV(32, PPCSTATE(npc), R(EAX)); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions)); - SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } @@ -548,7 +548,7 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc // Downcount flag check. The last block decremented downcounter, and the flag should still be available. FixupBranch skip = J_CC(CC_NBE); - MOV(32, M(&PC), Imm32(js.blockStart)); + MOV(32, PPCSTATE(pc), Imm32(js.blockStart)); JMP(asm_routines.doTiming, true); // downcount hit zero - go doTiming. SetJumpTarget(skip); @@ -561,13 +561,13 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc if (js.fpa.any) { // This block uses FPU - needs to add FP exception bailout - TEST(32, M(&PowerPC::ppcState.msr), Imm32(1 << 13)); //Test FP enabled bit + TEST(32, PPCSTATE(msr), Imm32(1 << 13)); //Test FP enabled bit FixupBranch b1 = J_CC(CC_NZ); // If a FPU exception occurs, the exception handler will read // from PC. Update PC with the latest value in case that happens. - MOV(32, M(&PC), Imm32(js.blockStart)); - OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); + MOV(32, PPCSTATE(pc), Imm32(js.blockStart)); + OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); WriteExceptionExit(); SetJumpTarget(b1); @@ -635,7 +635,7 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc HLEFunction(function); if (type == HLE::HLE_HOOK_REPLACE) { - MOV(32, R(EAX), M(&NPC)); + MOV(32, R(EAX), PPCSTATE(npc)); jit->js.downcountAmount += jit->js.st.numCycles; WriteExitDestInOpArg(R(EAX)); break; diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp index 7ab095bf36..68390396b8 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp @@ -93,20 +93,20 @@ void CommonAsmRoutines::GenFrsqrte() // Exception flags for zero input. SetJumpTarget(zero); - TEST(32, M(&FPSCR), Imm32(FPSCR_ZX)); + TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_ZX)); FixupBranch skip_set_fx1 = J_CC(CC_NZ); - OR(32, M(&FPSCR), Imm32(FPSCR_FX)); + OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); SetJumpTarget(skip_set_fx1); - OR(32, M(&FPSCR), Imm32(FPSCR_ZX)); + OR(32, PPCSTATE(fpscr), Imm32(FPSCR_ZX)); FixupBranch complex3 = J(); // Exception flags for negative input. SetJumpTarget(negative); - TEST(32, M(&FPSCR), Imm32(FPSCR_VXSQRT)); + TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_VXSQRT)); FixupBranch skip_set_fx2 = J_CC(CC_NZ); - OR(32, M(&FPSCR), Imm32(FPSCR_FX)); + OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); SetJumpTarget(skip_set_fx2); - OR(32, M(&FPSCR), Imm32(FPSCR_VXSQRT)); + OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXSQRT)); SetJumpTarget(complex1); SetJumpTarget(complex2); @@ -162,11 +162,11 @@ void CommonAsmRoutines::GenFres() // Exception flags for zero input. SetJumpTarget(zero); - TEST(32, M(&FPSCR), Imm32(FPSCR_ZX)); + TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_ZX)); FixupBranch skip_set_fx1 = J_CC(CC_NZ); - OR(32, M(&FPSCR), Imm32(FPSCR_FX)); + OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); SetJumpTarget(skip_set_fx1); - OR(32, M(&FPSCR), Imm32(FPSCR_ZX)); + OR(32, PPCSTATE(fpscr), Imm32(FPSCR_ZX)); SetJumpTarget(complex1); SetJumpTarget(complex2); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp index ac7ed17986..1fe88f9e28 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp @@ -113,7 +113,7 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r // check anyway. // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs - MOV(32, M(&PC), Imm32(pc)); + MOV(32, PPCSTATE(pc), Imm32(pc)); if (dataReg == ABI_PARAM2) PanicAlert("Incorrect use of SafeWriteRegToReg"); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index 2b927ba0d9..46c4be1715 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -370,6 +370,6 @@ using namespace Gen; void JitBlockCache::WriteDestroyBlock(const u8* location, u32 address) { XEmitter emit((u8 *)location); - emit.MOV(32, M(&PC), Imm32(address)); + emit.MOV(32, PPCSTATE(pc), Imm32(address)); emit.JMP(jit->GetAsmRoutines()->dispatcher, true); } diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index 58340b072e..4dd81015e8 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -468,7 +468,7 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce TEST(32, R(reg_addr), Imm32(mem_mask)); FixupBranch fast = J_CC(CC_Z, true); // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs - MOV(32, M(&PC), Imm32(jit->js.compilerPC)); + MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); bool noProlog = (0 != (flags & SAFE_LOADSTORE_NO_PROLOG)); bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP); ABI_PushRegistersAndAdjustStack(registersInUse, noProlog); @@ -718,7 +718,7 @@ static const u64 GC_ALIGNED16(psDoubleNoSign[2]) = {0x7FFFFFFFFFFFFFFFULL, 0}; // quite that necessary. void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm) { - AND(32, M(&FPSCR), Imm32(~FPRF_MASK)); + AND(32, PPCSTATE(fpscr), Imm32(~FPRF_MASK)); FixupBranch continue1, continue2, continue3, continue4; if (cpu_info.bSSE4_1) @@ -799,24 +799,24 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm) SetJumpTarget(continue3); SetJumpTarget(continue4); SHL(32, R(EAX), Imm8(FPRF_SHIFT)); - OR(32, M(&FPSCR), R(EAX)); + OR(32, PPCSTATE(fpscr), R(EAX)); } void EmuCodeBlock::JitClearCA() { - AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0 + AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0 } void EmuCodeBlock::JitSetCA() { - OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1 + OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1 } void EmuCodeBlock::JitClearCAOV(bool oe) { if (oe) - AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK & ~XER_OV_MASK)); //XER.CA, XER.OV = 0 + AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK & ~XER_OV_MASK)); //XER.CA, XER.OV = 0 else - AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0 + AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0 } diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h index addce16e93..2e865e50e6 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h @@ -13,13 +13,23 @@ namespace MMIO { class Mapping; } #define MEMCHECK_START \ Gen::FixupBranch memException; \ if (jit->js.memcheck) \ - { TEST(32, Gen::M((void *)&PowerPC::ppcState.Exceptions), Gen::Imm32(EXCEPTION_DSI)); \ + { TEST(32, PPCSTATE(Exceptions), Gen::Imm32(EXCEPTION_DSI)); \ memException = J_CC(Gen::CC_NZ, true); } #define MEMCHECK_END \ if (jit->js.memcheck) \ SetJumpTarget(memException); +// We offset by 0x80 because the range of one byte memory offsets is +// -0x80..0x7f. +#define PPCSTATE(x) MDisp(RBP, \ + (int) ((char *) &PowerPC::ppcState.x - (char *) &PowerPC::ppcState) - 0x80) +// In case you want to disable the ppcstate register: +// #define PPCSTATE(x) M((void*) &PowerPC::ppcState.x) +#define PPCSTATE_LR PPCSTATE(spr[SPR_LR]) +#define PPCSTATE_CTR PPCSTATE(spr[SPR_CTR]) +#define PPCSTATE_SRR0 PPCSTATE(spr[SPR_SRR0]) +#define PPCSTATE_SRR1 PPCSTATE(spr[SPR_SRR1]) // Like XCodeBlock but has some utilities for memory access. class EmuCodeBlock : public Gen::X64CodeBlock diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 7dd59f1573..26e4aa75c4 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -4,6 +4,8 @@ #pragma once +#include + #include "Common/BreakPoints.h" #include "Common/Common.h" @@ -30,11 +32,6 @@ struct GC_ALIGNED64(PowerPCState) { u32 gpr[32]; // General purpose registers. r1 = stack pointer. - // The paired singles are strange : PS0 is stored in the full 64 bits of each FPR - // but ps calculations are only done in 32-bit precision, and PS1 is only 32 bits. - // Since we want to use SIMD, SSE2 is the only viable alternative - 2x double. - u64 ps[32][2]; - u32 pc; // program counter u32 npc; @@ -64,6 +61,20 @@ struct GC_ALIGNED64(PowerPCState) // This variable should be inside of the CoreTiming namespace if we wanted to be correct. int downcount; +#if _M_X86_64 + // This member exists for the purpose of an assertion in x86 JitBase.cpp + // that its offset <= 0x100. To minimize code size on x86, we want as much + // useful stuff in the one-byte offset range as possible - which is why ps + // is sitting down here. It currently doesn't make a difference on other + // supported architectures. + std::tuple<> above_fits_in_first_0x100; +#endif + + // The paired singles are strange : PS0 is stored in the full 64 bits of each FPR + // but ps calculations are only done in 32-bit precision, and PS1 is only 32 bits. + // Since we want to use SIMD, SSE2 is the only viable alternative - 2x double. + GC_ALIGNED16(u64 ps[32][2]); + u32 sr[16]; // Segment registers. // special purpose registers - controls quantizers, DMA, and lots of other misc extensions. @@ -84,6 +95,10 @@ struct GC_ALIGNED64(PowerPCState) InstructionCache iCache; }; +#if _M_X86_64 +static_assert(offsetof(PowerPC::PowerPCState, above_fits_in_first_0x100) <= 0x100, "top of PowerPCState too big"); +#endif + enum CPUState { CPU_RUNNING = 0,