From 48891c6359cc1465bf083789fb8052b14262df36 Mon Sep 17 00:00:00 2001 From: comex Date: Mon, 1 Sep 2014 01:41:40 -0400 Subject: [PATCH 1/6] Reserve a register for ppcState. The register is RBP, previously in the GPR allocation order. The next commit will investigate whether there are too few GPRs (now or before), but for now there is no replacement. Previously, it was accessed RIP relatively; using RBP, anything in the first 0x100 bytes of ppcState (including all the GPRs) can be accessed with three fewer bytes. Code to access ppcState is generated constantly (mostly by register save/load), so in principle, this should improve instruction cache footprint significantly. It seems that this makes a significant performance difference in practice. The vast majority of this commit is mechanically replacing M(&PowerPC::ppcState.x) with a new macro PPCSTATE(x). Version 2: gets most of the cases which were using the register access macros. --- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 62 +++++------ Source/Core/Core/PowerPC/Jit64/JitAsm.cpp | 11 +- .../Core/Core/PowerPC/Jit64/JitRegCache.cpp | 6 +- Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp | 38 +++---- .../Core/PowerPC/Jit64/Jit_FloatingPoint.cpp | 12 +-- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 48 ++++----- .../Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp | 6 +- .../PowerPC/Jit64/Jit_LoadStorePaired.cpp | 2 +- .../PowerPC/Jit64/Jit_SystemRegisters.cpp | 54 +++++----- Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp | 100 +++++++++--------- Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp | 38 +++---- .../Core/PowerPC/JitCommon/JitAsmCommon.cpp | 18 ++-- .../Core/PowerPC/JitCommon/JitBackpatch.cpp | 2 +- .../Core/Core/PowerPC/JitCommon/JitCache.cpp | 2 +- .../Core/Core/PowerPC/JitCommon/Jit_Util.cpp | 14 +-- Source/Core/Core/PowerPC/JitCommon/Jit_Util.h | 12 ++- Source/Core/Core/PowerPC/PowerPC.h | 25 ++++- 17 files changed, 238 insertions(+), 212 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 102900ebe9..4ec492b1ab 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -210,8 +210,8 @@ void Jit64::WriteCallInterpreter(UGeckoInstruction inst) fpr.Flush(); if (js.isLastInstruction) { - MOV(32, M(&PC), Imm32(js.compilerPC)); - MOV(32, M(&NPC), Imm32(js.compilerPC + 4)); + MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); + MOV(32, PPCSTATE(npc), Imm32(js.compilerPC + 4)); } Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst); ABI_CallFunctionC((void*)instr, inst.hex); @@ -279,7 +279,7 @@ void Jit64::WriteExit(u32 destination) { Cleanup(); - SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); //If nobody has taken care of this yet (this can be removed when all branches are done) JitBlock *b = js.curBlock; @@ -298,7 +298,7 @@ void Jit64::WriteExit(u32 destination) } else { - MOV(32, M(&PC), Imm32(destination)); + MOV(32, PPCSTATE(pc), Imm32(destination)); JMP(asm_routines.dispatcher, true); } @@ -307,39 +307,39 @@ void Jit64::WriteExit(u32 destination) void Jit64::WriteExitDestInEAX() { - MOV(32, M(&PC), R(EAX)); + MOV(32, PPCSTATE(pc), R(EAX)); Cleanup(); - SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } void Jit64::WriteRfiExitDestInEAX() { - MOV(32, M(&PC), R(EAX)); - MOV(32, M(&NPC), R(EAX)); + MOV(32, PPCSTATE(pc), R(EAX)); + MOV(32, PPCSTATE(npc), R(EAX)); Cleanup(); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions)); - SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } void Jit64::WriteExceptionExit() { Cleanup(); - MOV(32, R(EAX), M(&PC)); - MOV(32, M(&NPC), R(EAX)); + MOV(32, R(EAX), PPCSTATE(pc)); + MOV(32, PPCSTATE(npc), R(EAX)); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions)); - SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } void Jit64::WriteExternalExceptionExit() { Cleanup(); - MOV(32, R(EAX), M(&PC)); - MOV(32, M(&NPC), R(EAX)); + MOV(32, R(EAX), PPCSTATE(pc)); + MOV(32, PPCSTATE(npc), R(EAX)); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExternalExceptions)); - SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } @@ -426,7 +426,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc // Downcount flag check. The last block decremented downcounter, and the flag should still be available. FixupBranch skip = J_CC(CC_NBE); - MOV(32, M(&PC), Imm32(js.blockStart)); + MOV(32, PPCSTATE(pc), Imm32(js.blockStart)); JMP(asm_routines.doTiming, true); // downcount hit zero - go doTiming. SetJumpTarget(skip); @@ -452,7 +452,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc } #if defined(_DEBUG) || defined(DEBUGFAST) || defined(NAN_CHECK) // should help logged stack-traces become more accurate - MOV(32, M(&PC), Imm32(js.blockStart)); + MOV(32, PPCSTATE(pc), Imm32(js.blockStart)); #endif // Start up the register allocators @@ -501,7 +501,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32) { js.fifoBytesThisBlock -= 32; - MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write + MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write u32 registersInUse = CallerSavedRegistersInUse(); ABI_PushRegistersAndAdjustStack(registersInUse, false); ABI_CallFunction((void *)&GPFifo::CheckGatherPipe); @@ -520,7 +520,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc HLEFunction(function); if (type == HLE::HLE_HOOK_REPLACE) { - MOV(32, R(EAX), M(&NPC)); + MOV(32, R(EAX), PPCSTATE(npc)); js.downcountAmount += js.st.numCycles; WriteExitDestInEAX(); break; @@ -537,13 +537,13 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc fpr.Flush(); //This instruction uses FPU - needs to add FP exception bailout - TEST(32, M(&PowerPC::ppcState.msr), Imm32(1 << 13)); // Test FP enabled bit + TEST(32, PPCSTATE(msr), Imm32(1 << 13)); // Test FP enabled bit FixupBranch b1 = J_CC(CC_NZ, true); // If a FPU exception occurs, the exception handler will read // from PC. Update PC with the latest value in case that happens. - MOV(32, M(&PC), Imm32(ops[i].address)); - OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); + MOV(32, PPCSTATE(pc), Imm32(ops[i].address)); + OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); WriteExceptionExit(); SetJumpTarget(b1); @@ -557,16 +557,16 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc gpr.Flush(); fpr.Flush(); - TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_ISI | EXCEPTION_PROGRAM | EXCEPTION_SYSCALL | EXCEPTION_FPU_UNAVAILABLE | EXCEPTION_DSI | EXCEPTION_ALIGNMENT)); + TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_ISI | EXCEPTION_PROGRAM | EXCEPTION_SYSCALL | EXCEPTION_FPU_UNAVAILABLE | EXCEPTION_DSI | EXCEPTION_ALIGNMENT)); FixupBranch clearInt = J_CC(CC_NZ, true); - TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_EXTERNAL_INT)); + TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT)); FixupBranch noExtException = J_CC(CC_Z, true); - TEST(32, M((void *)&PowerPC::ppcState.msr), Imm32(0x0008000)); + TEST(32, PPCSTATE(msr), Imm32(0x0008000)); FixupBranch noExtIntEnable = J_CC(CC_Z, true); TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH)); FixupBranch noCPInt = J_CC(CC_Z, true); - MOV(32, M(&PC), Imm32(ops[i].address)); + MOV(32, PPCSTATE(pc), Imm32(ops[i].address)); WriteExternalExceptionExit(); SetJumpTarget(noCPInt); @@ -580,7 +580,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc gpr.Flush(); fpr.Flush(); - MOV(32, M(&PC), Imm32(ops[i].address)); + MOV(32, PPCSTATE(pc), Imm32(ops[i].address)); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckBreakPoints)); TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); FixupBranch noBreakpoint = J_CC(CC_Z); @@ -597,12 +597,12 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc gpr.Flush(); fpr.Flush(); - TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_DSI)); + TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI)); FixupBranch noMemException = J_CC(CC_Z, true); // If a memory exception occurs, the exception handler will read // from PC. Update PC with the latest value in case that happens. - MOV(32, M(&PC), Imm32(ops[i].address)); + MOV(32, PPCSTATE(pc), Imm32(ops[i].address)); WriteExceptionExit(); SetJumpTarget(noMemException); } @@ -645,9 +645,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc if (code_block.m_memory_exception) { // Address of instruction could not be translated - MOV(32, M(&NPC), Imm32(js.compilerPC)); + MOV(32, PPCSTATE(npc), Imm32(js.compilerPC)); - OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_ISI)); + OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_ISI)); // Remove the invalid instruction from the icache, forcing a recompile MOV(64, R(RAX), ImmPtr(jit->GetBlockCache()->GetICachePtr(js.compilerPC))); diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index 2682ea80f2..b4c3bb9bc5 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -29,6 +29,7 @@ void Jit64AsmRoutineManager::Generate() // Two statically allocated registers. MOV(64, R(RBX), Imm64((u64)Memory::base)); MOV(64, R(R15), Imm64((u64)jit->GetBlockCache()->GetCodePointers())); //It's below 2GB so 32 bits are good enough + MOV(64, R(RBP), Imm64((u64)&PowerPC::ppcState + 0x80)); const u8* outerLoop = GetCodePtr(); ABI_CallFunction(reinterpret_cast(&CoreTiming::Advance)); @@ -55,7 +56,7 @@ void Jit64AsmRoutineManager::Generate() SetJumpTarget(skipToRealDispatch); dispatcherNoCheck = GetCodePtr(); - MOV(32, R(EAX), M(&PowerPC::ppcState.pc)); + MOV(32, R(EAX), PPCSTATE(pc)); dispatcherPcInEAX = GetCodePtr(); u32 mask = 0; @@ -113,7 +114,7 @@ void Jit64AsmRoutineManager::Generate() SetJumpTarget(notfound); //Ok, no block, let's jit - MOV(32, R(ABI_PARAM1), M(&PowerPC::ppcState.pc)); + MOV(32, R(ABI_PARAM1), PPCSTATE(pc)); CALL((void *)&Jit); JMP(dispatcherNoCheck); // no point in special casing this @@ -122,10 +123,10 @@ void Jit64AsmRoutineManager::Generate() doTiming = GetCodePtr(); // Test external exceptions. - TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_EXTERNAL_INT | EXCEPTION_PERFORMANCE_MONITOR | EXCEPTION_DECREMENTER)); + TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT | EXCEPTION_PERFORMANCE_MONITOR | EXCEPTION_DECREMENTER)); FixupBranch noExtException = J_CC(CC_Z); - MOV(32, R(EAX), M(&PC)); - MOV(32, M(&NPC), R(EAX)); + MOV(32, R(EAX), PPCSTATE(pc)); + MOV(32, PPCSTATE(npc), R(EAX)); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExternalExceptions)); SetJumpTarget(noExtException); diff --git a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp index e2e0ed6a6c..8a329eb723 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp @@ -202,7 +202,7 @@ const int* GPRRegCache::GetAllocationOrder(size_t& count) #ifdef _WIN32 RSI, RDI, R13, R14, R8, R9, R10, R11, R12, //, RCX #else - RBP, R13, R14, R8, R9, R10, R11, R12, //, RCX + R13, R14, R8, R9, R10, R11, R12, //, RCX #endif }; count = sizeof(allocationOrder) / sizeof(const int); @@ -221,12 +221,12 @@ const int* FPURegCache::GetAllocationOrder(size_t& count) OpArg GPRRegCache::GetDefaultLocation(size_t reg) const { - return M(&ppcState.gpr[reg]); + return PPCSTATE(gpr[reg]); } OpArg FPURegCache::GetDefaultLocation(size_t reg) const { - return M(&ppcState.ps[reg][0]); + return PPCSTATE(ps[reg][0]); } void RegCache::KillImmediate(size_t preg, bool doLoad, bool makeDirty) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp index 8bef37cb51..65b2cad7e8 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp @@ -28,9 +28,9 @@ void Jit64::sc(UGeckoInstruction inst) gpr.Flush(); fpr.Flush(); - MOV(32, M(&PC), Imm32(js.compilerPC + 4)); + MOV(32, PPCSTATE(pc), Imm32(js.compilerPC + 4)); LOCK(); - OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_SYSCALL)); + OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_SYSCALL)); WriteExceptionExit(); } @@ -45,12 +45,12 @@ void Jit64::rfi(UGeckoInstruction inst) const u32 mask = 0x87C0FFFF; const u32 clearMSR13 = 0xFFFBFFFF; // Mask used to clear the bit MSR[13] // MSR = ((MSR & ~mask) | (SRR1 & mask)) & clearMSR13; - AND(32, M(&MSR), Imm32((~mask) & clearMSR13)); - MOV(32, R(EAX), M(&SRR1)); + AND(32, PPCSTATE(msr), Imm32((~mask) & clearMSR13)); + MOV(32, R(EAX), PPCSTATE_SRR1); AND(32, R(EAX), Imm32(mask & clearMSR13)); - OR(32, M(&MSR), R(EAX)); + OR(32, PPCSTATE(msr), R(EAX)); // NPC = SRR0; - MOV(32, R(EAX), M(&SRR0)); + MOV(32, R(EAX), PPCSTATE_SRR0); WriteRfiExitDestInEAX(); } @@ -62,7 +62,7 @@ void Jit64::bx(UGeckoInstruction inst) // We must always process the following sentence // even if the blocks are merged by PPCAnalyst::Flatten(). if (inst.LK) - MOV(32, M(&LR), Imm32(js.compilerPC + 4)); + MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // If this is not the last instruction of a block, // we will skip the rest process. @@ -82,7 +82,7 @@ void Jit64::bx(UGeckoInstruction inst) destination = js.compilerPC + SignExt26(inst.LI << 2); #ifdef ACID_TEST if (inst.LK) - AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xFF000000))); + AND(32, PPCSTATE(cr), Imm32(~(0xFF000000))); #endif if (destination == js.compilerPC) { @@ -108,7 +108,7 @@ void Jit64::bcx(UGeckoInstruction inst) FixupBranch pCTRDontBranch; if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR { - SUB(32, M(&CTR), Imm8(1)); + SUB(32, PPCSTATE_CTR, Imm8(1)); if (inst.BO & BO_BRANCH_IF_CTR_0) pCTRDontBranch = J_CC(CC_NZ, true); else @@ -123,7 +123,7 @@ void Jit64::bcx(UGeckoInstruction inst) } if (inst.LK) - MOV(32, M(&LR), Imm32(js.compilerPC + 4)); + MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); u32 destination; if (inst.AA) @@ -164,9 +164,9 @@ void Jit64::bcctrx(UGeckoInstruction inst) gpr.Flush(); fpr.Flush(); - MOV(32, R(EAX), M(&CTR)); + MOV(32, R(EAX), PPCSTATE_CTR); if (inst.LK_3) - MOV(32, M(&LR), Imm32(js.compilerPC + 4)); // LR = PC + 4; + MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4; AND(32, R(EAX), Imm32(0xFFFFFFFC)); WriteExitDestInEAX(); } @@ -179,11 +179,11 @@ void Jit64::bcctrx(UGeckoInstruction inst) FixupBranch b = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), !(inst.BO_2 & BO_BRANCH_IF_TRUE)); - MOV(32, R(EAX), M(&CTR)); + MOV(32, R(EAX), PPCSTATE_CTR); AND(32, R(EAX), Imm32(0xFFFFFFFC)); - //MOV(32, M(&PC), R(EAX)); => Already done in WriteExitDestInEAX() + //MOV(32, PPCSTATE(pc), R(EAX)); => Already done in WriteExitDestInEAX() if (inst.LK_3) - MOV(32, M(&LR), Imm32(js.compilerPC + 4)); // LR = PC + 4; + MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4; gpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE); @@ -204,7 +204,7 @@ void Jit64::bclrx(UGeckoInstruction inst) FixupBranch pCTRDontBranch; if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR { - SUB(32, M(&CTR), Imm8(1)); + SUB(32, PPCSTATE_CTR, Imm8(1)); if (inst.BO & BO_BRANCH_IF_CTR_0) pCTRDontBranch = J_CC(CC_NZ, true); else @@ -221,13 +221,13 @@ void Jit64::bclrx(UGeckoInstruction inst) // This below line can be used to prove that blr "eats flags" in practice. // This observation will let us do a lot of fun observations. #ifdef ACID_TEST - AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xFF000000))); + AND(32, PPCSTATE(cr), Imm32(~(0xFF000000))); #endif - MOV(32, R(EAX), M(&LR)); + MOV(32, R(EAX), PPCSTATE_LR); AND(32, R(EAX), Imm32(0xFFFFFFFC)); if (inst.LK) - MOV(32, M(&LR), Imm32(js.compilerPC + 4)); + MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); gpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index 537f02db0d..88a23e8a19 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -248,7 +248,7 @@ void Jit64::fcmpx(UGeckoInstruction inst) fpr.BindToRegister(b, true); if (fprf) - AND(32, M(&FPSCR), Imm32(~FPRF_MASK)); + AND(32, PPCSTATE(fpscr), Imm32(~FPRF_MASK)); // Are we masking sNaN invalid floating point exceptions? If not this could crash if we don't handle the exception? UCOMISD(fpr.R(b).GetSimpleReg(), fpr.R(a)); @@ -273,14 +273,14 @@ void Jit64::fcmpx(UGeckoInstruction inst) MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_EQ))); if (fprf) - OR(32, M(&FPSCR), Imm32(CR_EQ << FPRF_SHIFT)); + OR(32, PPCSTATE(fpscr), Imm32(CR_EQ << FPRF_SHIFT)); continue1 = J(); SetJumpTarget(pNaN); MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_SO))); if (fprf) - OR(32, M(&FPSCR), Imm32(CR_SO << FPRF_SHIFT)); + OR(32, PPCSTATE(fpscr), Imm32(CR_SO << FPRF_SHIFT)); if (a != b) { @@ -289,13 +289,13 @@ void Jit64::fcmpx(UGeckoInstruction inst) SetJumpTarget(pGreater); MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_GT))); if (fprf) - OR(32, M(&FPSCR), Imm32(CR_GT << FPRF_SHIFT)); + OR(32, PPCSTATE(fpscr), Imm32(CR_GT << FPRF_SHIFT)); continue3 = J(); SetJumpTarget(pLesser); MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_LT))); if (fprf) - OR(32, M(&FPSCR), Imm32(CR_LT << FPRF_SHIFT)); + OR(32, PPCSTATE(fpscr), Imm32(CR_LT << FPRF_SHIFT)); } SetJumpTarget(continue1); @@ -305,7 +305,7 @@ void Jit64::fcmpx(UGeckoInstruction inst) SetJumpTarget(continue3); } - MOV(64, M(&PowerPC::ppcState.cr_val[crf]), R(RAX)); + MOV(64, PPCSTATE(cr_val[crf]), R(RAX)); fpr.UnlockAll(); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index a6b60f8f23..82b1e6e5a3 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -21,12 +21,12 @@ void Jit64::GenerateConstantOverflow(bool overflow) if (overflow) { //XER[OV/SO] = 1 - OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); + OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); } else { //XER[OV] = 0 - AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_OV_MASK)); + AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_OV_MASK)); } } @@ -34,11 +34,11 @@ void Jit64::GenerateOverflow() { FixupBranch jno = J_CC(CC_NO); //XER[OV/SO] = 1 - OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); + OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); FixupBranch exit = J(); SetJumpTarget(jno); //XER[OV] = 0 - AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_OV_MASK)); + AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_OV_MASK)); SetJumpTarget(exit); } @@ -54,7 +54,7 @@ void Jit64::FinalizeCarryOverflow(bool oe, bool inv) JitSetCA(); SetJumpTarget(carry1); //XER[OV/SO] = 1 - OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); + OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); FixupBranch exit = J(); SetJumpTarget(jno); // Do carry @@ -74,7 +74,7 @@ void Jit64::FinalizeCarryOverflow(bool oe, bool inv) void Jit64::GetCarryEAXAndClear() { - MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); + MOV(32, R(EAX), PPCSTATE(spr[SPR_XER])); BTR(32, R(EAX), Imm8(29)); } @@ -109,7 +109,7 @@ void Jit64::FinalizeCarryGenerateOverflowEAX(bool oe, bool inv) SetJumpTarget(carry1); } // Dump EAX back into XER - MOV(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX)); + MOV(32, PPCSTATE(spr[SPR_XER]), R(EAX)); } // Assumes that the flags were just set through an addition. @@ -117,10 +117,10 @@ void Jit64::GenerateCarry() { // USES_XER FixupBranch pNoCarry = J_CC(CC_NC); - OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); + OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_CA_MASK)); FixupBranch pContinue = J(); SetJumpTarget(pNoCarry); - AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~(XER_CA_MASK))); + AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~(XER_CA_MASK))); SetJumpTarget(pContinue); } @@ -128,12 +128,12 @@ void Jit64::ComputeRC(const Gen::OpArg & arg) { if (arg.IsImm()) { - MOV(64, M(&PowerPC::ppcState.cr_val[0]), Imm32((s32)arg.offset)); + MOV(64, PPCSTATE(cr_val[0]), Imm32((s32)arg.offset)); } else { MOVSX(64, 32, RAX, arg); - MOV(64, M(&PowerPC::ppcState.cr_val[0]), R(RAX)); + MOV(64, PPCSTATE(cr_val[0]), R(RAX)); } } @@ -375,7 +375,7 @@ void Jit64::cmpXX(UGeckoInstruction inst) compareResult = CR_LT; } MOV(64, R(RAX), Imm64(PPCCRToInternal(compareResult))); - MOV(64, M(&PowerPC::ppcState.cr_val[crf]), R(RAX)); + MOV(64, PPCSTATE(cr_val[crf]), R(RAX)); gpr.UnlockAll(); if (merge_branch) @@ -393,7 +393,7 @@ void Jit64::cmpXX(UGeckoInstruction inst) if (js.next_inst.OPCD == 16) // bcx { if (js.next_inst.LK) - MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); + MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); u32 destination; if (js.next_inst.AA) @@ -405,16 +405,16 @@ void Jit64::cmpXX(UGeckoInstruction inst) else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx { if (js.next_inst.LK) - MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); - MOV(32, R(EAX), M(&CTR)); + MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); + MOV(32, R(EAX), PPCSTATE_CTR); AND(32, R(EAX), Imm32(0xFFFFFFFC)); WriteExitDestInEAX(); } else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx { - MOV(32, R(EAX), M(&LR)); + MOV(32, R(EAX), PPCSTATE_LR); if (js.next_inst.LK) - MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); + MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); WriteExitDestInEAX(); } else @@ -461,7 +461,7 @@ void Jit64::cmpXX(UGeckoInstruction inst) comparand = R(ABI_PARAM1); } SUB(64, R(RAX), comparand); - MOV(64, M(&PowerPC::ppcState.cr_val[crf]), R(RAX)); + MOV(64, PPCSTATE(cr_val[crf]), R(RAX)); if (merge_branch) { @@ -492,7 +492,7 @@ void Jit64::cmpXX(UGeckoInstruction inst) if (js.next_inst.OPCD == 16) // bcx { if (js.next_inst.LK) - MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); + MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); u32 destination; if (js.next_inst.AA) @@ -504,19 +504,19 @@ void Jit64::cmpXX(UGeckoInstruction inst) else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx { if (js.next_inst.LK) - MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); + MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); - MOV(32, R(EAX), M(&CTR)); + MOV(32, R(EAX), PPCSTATE_CTR); AND(32, R(EAX), Imm32(0xFFFFFFFC)); WriteExitDestInEAX(); } else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx { - MOV(32, R(EAX), M(&LR)); + MOV(32, R(EAX), PPCSTATE_LR); AND(32, R(EAX), Imm32(0xFFFFFFFC)); if (js.next_inst.LK) - MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); + MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); WriteExitDestInEAX(); } @@ -2020,7 +2020,7 @@ void Jit64::twx(UGeckoInstruction inst) SetJumpTarget(fixup); } LOCK(); - OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_PROGRAM)); + OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_PROGRAM)); gpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index f085284ed8..5042018cc9 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -123,7 +123,7 @@ void Jit64::lXXx(UGeckoInstruction inst) ABI_PopRegistersAndAdjustStack(registersInUse, false); // ! we must continue executing of the loop after exception handling, maybe there is still 0 in r0 - //MOV(32, M(&PowerPC::ppcState.pc), Imm32(js.compilerPC)); + //MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); WriteExceptionExit(); SetJumpTarget(noIdle); @@ -331,7 +331,7 @@ void Jit64::stX(UGeckoInstruction inst) if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe) { // Helps external systems know which instruction triggered the write - MOV(32, M(&PC), Imm32(jit->js.compilerPC)); + MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); gpr.FlushLockX(ABI_PARAM1); MOV(32, R(ABI_PARAM1), gpr.R(s)); @@ -367,7 +367,7 @@ void Jit64::stX(UGeckoInstruction inst) else { // Helps external systems know which instruction triggered the write - MOV(32, M(&PC), Imm32(jit->js.compilerPC)); + MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); u32 registersInUse = CallerSavedRegistersInUse(); ABI_PushRegistersAndAdjustStack(registersInUse, false); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 1129d5e833..894d96789d 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -43,7 +43,7 @@ void Jit64::psq_st(UGeckoInstruction inst) // UU[SCALE]UUUUU[TYPE] where SCALE is 6 bits and TYPE is 3 bits, so we have to AND with // 0b0011111100000111, or 0x3F07. MOV(32, R(EAX), Imm32(0x3F07)); - AND(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_GQR0 + inst.I])); + AND(32, R(EAX), PPCSTATE(spr[SPR_GQR0 + inst.I])); MOVZX(32, 8, EDX, R(AL)); // FIXME: Fix ModR/M encoding to allow [EDX*4+disp32] without a base register! diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index 9c00e70be9..e430144aa8 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -16,22 +16,22 @@ void Jit64::GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate) switch (bit) { case CR_SO_BIT: // check bit 61 set - BT(64, M(&PowerPC::ppcState.cr_val[field]), Imm8(61)); + BT(64, PPCSTATE(cr_val[field]), Imm8(61)); SETcc(negate ? CC_NC : CC_C, R(out)); break; case CR_EQ_BIT: // check bits 31-0 == 0 - CMP(32, M(&PowerPC::ppcState.cr_val[field]), Imm8(0)); + CMP(32, PPCSTATE(cr_val[field]), Imm8(0)); SETcc(negate ? CC_NZ : CC_Z, R(out)); break; case CR_GT_BIT: // check val > 0 - CMP(64, M(&PowerPC::ppcState.cr_val[field]), Imm8(0)); + CMP(64, PPCSTATE(cr_val[field]), Imm8(0)); SETcc(negate ? CC_NG : CC_G, R(out)); break; case CR_LT_BIT: // check bit 62 set - BT(64, M(&PowerPC::ppcState.cr_val[field]), Imm8(62)); + BT(64, PPCSTATE(cr_val[field]), Imm8(62)); SETcc(negate ? CC_NC : CC_C, R(out)); break; @@ -42,7 +42,7 @@ void Jit64::GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate) void Jit64::SetCRFieldBit(int field, int bit, Gen::X64Reg in) { - MOV(64, R(ABI_PARAM1), M(&PowerPC::ppcState.cr_val[field])); + MOV(64, R(ABI_PARAM1), PPCSTATE(cr_val[field])); MOVZX(32, 8, in, R(in)); switch (bit) @@ -75,7 +75,7 @@ void Jit64::SetCRFieldBit(int field, int bit, Gen::X64Reg in) } BTS(64, R(ABI_PARAM1), Imm8(32)); - MOV(64, M(&PowerPC::ppcState.cr_val[field]), R(ABI_PARAM1)); + MOV(64, PPCSTATE(cr_val[field]), R(ABI_PARAM1)); } FixupBranch Jit64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) @@ -83,19 +83,19 @@ FixupBranch Jit64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) switch (bit) { case CR_SO_BIT: // check bit 61 set - BT(64, M(&PowerPC::ppcState.cr_val[field]), Imm8(61)); + BT(64, PPCSTATE(cr_val[field]), Imm8(61)); return J_CC(jump_if_set ? CC_C : CC_NC, true); case CR_EQ_BIT: // check bits 31-0 == 0 - CMP(32, M(&PowerPC::ppcState.cr_val[field]), Imm8(0)); + CMP(32, PPCSTATE(cr_val[field]), Imm8(0)); return J_CC(jump_if_set ? CC_Z : CC_NZ, true); case CR_GT_BIT: // check val > 0 - CMP(64, M(&PowerPC::ppcState.cr_val[field]), Imm8(0)); + CMP(64, PPCSTATE(cr_val[field]), Imm8(0)); return J_CC(jump_if_set ? CC_G : CC_LE, true); case CR_LT_BIT: // check bit 62 set - BT(64, M(&PowerPC::ppcState.cr_val[field]), Imm8(62)); + BT(64, PPCSTATE(cr_val[field]), Imm8(62)); return J_CC(jump_if_set ? CC_C : CC_NC, true); default: @@ -154,7 +154,7 @@ void Jit64::mtspr(UGeckoInstruction inst) gpr.Lock(d); gpr.BindToRegister(d, true, false); } - MOV(32, M(&PowerPC::ppcState.spr[iIndex]), gpr.R(d)); + MOV(32, PPCSTATE(spr[iIndex]), gpr.R(d)); gpr.UnlockAll(); } @@ -190,7 +190,7 @@ void Jit64::mfspr(UGeckoInstruction inst) LEA(64, RAX, MComplex(RAX, RDX, SCALE_1, offset)); else ADD(64, R(RAX), R(RDX)); - MOV(64, M(&TL), R(RAX)); + MOV(64, PPCSTATE(spr[SPR_TL]), R(RAX)); // Two calls of TU/TL next to each other are extremely common in typical usage, so merge them // if we can. @@ -234,7 +234,7 @@ void Jit64::mfspr(UGeckoInstruction inst) default: gpr.Lock(d); gpr.BindToRegister(d, false); - MOV(32, gpr.R(d), M(&PowerPC::ppcState.spr[iIndex])); + MOV(32, gpr.R(d), PPCSTATE(spr[iIndex])); break; } gpr.UnlockAll(); @@ -251,7 +251,7 @@ void Jit64::mtmsr(UGeckoInstruction inst) gpr.Lock(inst.RS); gpr.BindToRegister(inst.RS, true, false); } - MOV(32, M(&MSR), gpr.R(inst.RS)); + MOV(32, PPCSTATE(msr), gpr.R(inst.RS)); gpr.UnlockAll(); gpr.Flush(); fpr.Flush(); @@ -259,17 +259,17 @@ void Jit64::mtmsr(UGeckoInstruction inst) // If some exceptions are pending and EE are now enabled, force checking // external exceptions when going out of mtmsr in order to execute delayed // interrupts as soon as possible. - TEST(32, M(&MSR), Imm32(0x8000)); + TEST(32, PPCSTATE(msr), Imm32(0x8000)); FixupBranch eeDisabled = J_CC(CC_Z); - TEST(32, M((void*)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_EXTERNAL_INT | EXCEPTION_PERFORMANCE_MONITOR | EXCEPTION_DECREMENTER)); + TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT | EXCEPTION_PERFORMANCE_MONITOR | EXCEPTION_DECREMENTER)); FixupBranch noExceptionsPending = J_CC(CC_Z); // Check if a CP interrupt is waiting and keep the GPU emulation in sync (issue 4336) TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP)); FixupBranch cpInt = J_CC(CC_NZ); - MOV(32, M(&PC), Imm32(js.compilerPC + 4)); + MOV(32, PPCSTATE(pc), Imm32(js.compilerPC + 4)); WriteExternalExceptionExit(); SetJumpTarget(cpInt); @@ -288,7 +288,7 @@ void Jit64::mfmsr(UGeckoInstruction inst) //Privileged? gpr.Lock(inst.RD); gpr.BindToRegister(inst.RD, false, true); - MOV(32, gpr.R(inst.RD), M(&MSR)); + MOV(32, gpr.R(inst.RD), PPCSTATE(msr)); gpr.UnlockAll(); } @@ -318,7 +318,7 @@ void Jit64::mfcr(UGeckoInstruction inst) if (i != 0) SHL(32, gpr.R(d), Imm8(4)); - MOV(64, R(cr_val), M(&PowerPC::ppcState.cr_val[i])); + MOV(64, R(cr_val), PPCSTATE(cr_val[i])); // EQ: Bits 31-0 == 0; set flag bit 1 TEST(32, R(cr_val), R(cr_val)); @@ -360,12 +360,12 @@ void Jit64::mtcrf(UGeckoInstruction inst) u64 newcrval = PPCCRToInternal(newcr); if ((s64)newcrval == (s32)newcrval) { - MOV(64, M(&PowerPC::ppcState.cr_val[i]), Imm32((s32)newcrval)); + MOV(64, PPCSTATE(cr_val[i]), Imm32((s32)newcrval)); } else { MOV(64, R(RAX), Imm64(newcrval)); - MOV(64, M(&PowerPC::ppcState.cr_val[i]), R(RAX)); + MOV(64, PPCSTATE(cr_val[i]), R(RAX)); } } } @@ -384,7 +384,7 @@ void Jit64::mtcrf(UGeckoInstruction inst) if (i != 0) AND(32, R(EAX), Imm8(0xF)); MOV(64, R(EAX), MScaled(EAX, SCALE_8, (u32)(u64)m_crTable)); - MOV(64, M(&PowerPC::ppcState.cr_val[i]), R(EAX)); + MOV(64, PPCSTATE(cr_val[i]), R(EAX)); } } gpr.UnlockAll(); @@ -400,8 +400,8 @@ void Jit64::mcrf(UGeckoInstruction inst) // USES_CR if (inst.CRFS != inst.CRFD) { - MOV(64, R(EAX), M(&PowerPC::ppcState.cr_val[inst.CRFS])); - MOV(64, M(&PowerPC::ppcState.cr_val[inst.CRFD]), R(EAX)); + MOV(64, R(EAX), PPCSTATE(cr_val[inst.CRFS])); + MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(EAX)); } } @@ -413,14 +413,14 @@ void Jit64::mcrxr(UGeckoInstruction inst) // USES_CR // Copy XER[0-3] into CR[inst.CRFD] - MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); + MOV(32, R(EAX), PPCSTATE(spr[SPR_XER])); SHR(32, R(EAX), Imm8(28)); MOV(64, R(EAX), MScaled(EAX, SCALE_8, (u32)(u64)m_crTable)); - MOV(64, M(&PowerPC::ppcState.cr_val[inst.CRFD]), R(EAX)); + MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(EAX)); // Clear XER[0-3] - AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(0x0FFFFFFF)); + AND(32, PPCSTATE(spr[SPR_XER]), Imm32(0x0FFFFFFF)); } void Jit64::crXXX(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp index 6798f390cc..8e9a2e5107 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp @@ -163,7 +163,7 @@ static void fregSpill(RegInfo& RI, X64Reg reg) #ifdef _WIN32 static const X64Reg RegAllocOrder[] = {RSI, RDI, R12, R13, R14, R8, R9, R10, R11}; #else -static const X64Reg RegAllocOrder[] = {RBP, R12, R13, R14, R8, R9, R10, R11}; +static const X64Reg RegAllocOrder[] = {R12, R13, R14, R8, R9, R10, R11}; #endif static const int RegAllocSize = sizeof(RegAllocOrder) / sizeof(X64Reg); static const X64Reg FRegAllocOrder[] = {XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, XMM2, XMM3, XMM4, XMM5}; @@ -949,8 +949,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) // interpreter call at the moment, but optimizing interpreter // calls isn't completely out of the question... regSpillCallerSaved(RI); - Jit->MOV(32, M(&PC), Imm32(InstLoc)); - Jit->MOV(32, M(&NPC), Imm32(InstLoc+4)); + Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc)); + Jit->MOV(32, PPCSTATE(npc), Imm32(InstLoc+4)); Jit->ABI_CallFunctionC((void*)GetInterpreterOp(InstCode), InstCode); break; @@ -962,7 +962,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg reg = regFindFreeReg(RI); unsigned ppcreg = *I >> 8; - Jit->MOV(32, R(reg), M(&PowerPC::ppcState.gpr[ppcreg])); + Jit->MOV(32, R(reg), PPCSTATE(gpr[ppcreg])); RI.regs[reg] = I; break; } @@ -973,7 +973,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg reg = regFindFreeReg(RI); unsigned ppcreg = *I >> 8; - Jit->MOV(64, R(reg), M(&PowerPC::ppcState.cr_val[ppcreg])); + Jit->MOV(64, R(reg), PPCSTATE(cr_val[ppcreg])); RI.regs[reg] = I; break; } @@ -983,7 +983,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = regFindFreeReg(RI); - Jit->MOV(32, R(reg), M(&CTR)); + Jit->MOV(32, R(reg), PPCSTATE_CTR); RI.regs[reg] = I; break; } @@ -993,7 +993,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = regFindFreeReg(RI); - Jit->MOV(32, R(reg), M(&LR)); + Jit->MOV(32, R(reg), PPCSTATE_LR); RI.regs[reg] = I; break; } @@ -1003,7 +1003,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = regFindFreeReg(RI); - Jit->MOV(32, R(reg), M(&MSR)); + Jit->MOV(32, R(reg), PPCSTATE(msr)); RI.regs[reg] = I; break; } @@ -1014,7 +1014,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg reg = regFindFreeReg(RI); unsigned gqr = *I >> 8; - Jit->MOV(32, R(reg), M(&GQR(gqr))); + Jit->MOV(32, R(reg), PPCSTATE(spr[SPR_GQR0 + gqr])); RI.regs[reg] = I; break; } @@ -1024,7 +1024,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = regFindFreeReg(RI); - Jit->MOV(32, R(reg), M(&PowerPC::ppcState.spr[SPR_XER])); + Jit->MOV(32, R(reg), PPCSTATE(spr[SPR_XER])); Jit->SHR(32, R(reg), Imm8(29)); Jit->AND(32, R(reg), Imm8(1)); RI.regs[reg] = I; @@ -1042,7 +1042,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { X64Reg reg = regEnsureInReg(RI, getOp1(I)); unsigned ppcreg = *I >> 16; - Jit->MOV(64, M(&PowerPC::ppcState.cr_val[ppcreg]), R(reg)); + Jit->MOV(64, PPCSTATE(cr_val[ppcreg]), R(reg)); regNormalRegClear(RI, I); break; } @@ -1067,15 +1067,15 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) // If some exceptions are pending and EE are now enabled, force checking // external exceptions when going out of mtmsr in order to execute delayed // interrupts as soon as possible. - Jit->MOV(32, R(EAX), M(&MSR)); + Jit->MOV(32, R(EAX), PPCSTATE(msr)); Jit->TEST(32, R(EAX), Imm32(0x8000)); FixupBranch eeDisabled = Jit->J_CC(CC_Z); - Jit->MOV(32, R(EAX), M((void*)&PowerPC::ppcState.Exceptions)); + Jit->MOV(32, R(EAX), PPCSTATE(Exceptions)); Jit->TEST(32, R(EAX), R(EAX)); FixupBranch noExceptionsPending = Jit->J_CC(CC_Z); - Jit->MOV(32, M(&PC), Imm32(InstLoc + 4)); + Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc + 4)); Jit->WriteExceptionExit(); // TODO: Implement WriteExternalExceptionExit for JitIL Jit->SetJumpTarget(eeDisabled); @@ -1114,8 +1114,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); Jit->AND(32, R(ECX), Imm8(0x1F)); Jit->SHL(32, R(ECX), Imm8(12)); - Jit->AND(32, M(&FPSCR), Imm32(~(0x1F << 12))); - Jit->OR(32, M(&FPSCR), R(ECX)); + Jit->AND(32, PPCSTATE(fpscr), Imm32(~(0x1F << 12))); + Jit->OR(32, PPCSTATE(fpscr), R(ECX)); regNormalRegClear(RI, I); break; } @@ -1641,7 +1641,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) regSpill(RI, EDX); u32 quantreg = *I >> 24; Jit->MOV(32, R(EAX), Imm32(0x3F07)); - Jit->AND(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_GQR0 + quantreg])); + Jit->AND(32, R(EAX), PPCSTATE(spr[SPR_GQR0 + quantreg])); Jit->MOVZX(32, 8, EDX, R(AL)); Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I))); @@ -1778,7 +1778,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg reg = fregFindFreeReg(RI); unsigned ppcreg = *I >> 8; - Jit->MOVAPD(reg, M(&PowerPC::ppcState.ps[ppcreg])); + Jit->MOVAPD(reg, PPCSTATE(ps[ppcreg])); RI.fregs[reg] = I; break; } @@ -1797,14 +1797,14 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) Jit->AND(32, M(p+4), Imm32(0x80000000)); Jit->MOV(32, M(p), Imm32(0)); Jit->SetJumpTarget(ok); - Jit->MOVAPD(reg, M(&PowerPC::ppcState.ps[ppcreg])); + Jit->MOVAPD(reg, PPCSTATE(ps[ppcreg])); RI.fregs[reg] = I; break; } case StoreFReg: { unsigned ppcreg = *I >> 16; - Jit->MOVAPD(M(&PowerPC::ppcState.ps[ppcreg]), + Jit->MOVAPD(PPCSTATE(ps[ppcreg]), fregEnsureInReg(RI, getOp1(I))); fregNormalRegClear(RI, I); break; @@ -1913,15 +1913,15 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) Jit->ABI_CallFunction((void*)checkIsSNAN); Jit->TEST(8, R(EAX), R(EAX)); FixupBranch ok = Jit->J_CC(CC_Z); - Jit->OR(32, M(&FPSCR), Imm32(FPSCR_FX)); // FPSCR.FX = 1; - Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXSNAN)); // FPSCR.Hex |= mask; - Jit->TEST(32, M(&FPSCR), Imm32(FPSCR_VE)); + Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); // FPSCR.FX = 1; + Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXSNAN)); // FPSCR.Hex |= mask; + Jit->TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_VE)); FixupBranch finish0 = Jit->J_CC(CC_NZ); - Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask; + Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask; FixupBranch finish1 = Jit->J(); Jit->SetJumpTarget(ok); - Jit->OR(32, M(&FPSCR), Imm32(FPSCR_FX)); // FPSCR.FX = 1; - Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask; + Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); // FPSCR.FX = 1; + Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask; Jit->SetJumpTarget(finish0); Jit->SetJumpTarget(finish1); } @@ -1942,8 +1942,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) Jit->ABI_CallFunction((void*)checkIsSNAN); Jit->TEST(8, R(EAX), R(EAX)); FixupBranch finish = Jit->J_CC(CC_Z); - Jit->OR(32, M(&FPSCR), Imm32(FPSCR_FX)); // FPSCR.FX = 1; - Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask; + Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); // FPSCR.FX = 1; + Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask; Jit->SetJumpTarget(finish); } @@ -2094,7 +2094,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) RI.Jit->Cleanup(); // is it needed? Jit->ABI_CallFunction((void *)&PowerPC::OnIdleIL); - Jit->MOV(32, M(&PC), Imm32(ibuild->GetImmValue( getOp2(I) ))); + Jit->MOV(32, PPCSTATE(pc), Imm32(ibuild->GetImmValue( getOp2(I) ))); Jit->WriteExceptionExit(); Jit->SetJumpTarget(cont); @@ -2179,7 +2179,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); Jit->ABI_CallFunction((void *)&CoreTiming::Idle); - Jit->MOV(32, M(&PC), Imm32(InstLoc)); + Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc)); Jit->WriteExceptionExit(); break; } @@ -2187,14 +2187,14 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); Jit->LOCK(); - Jit->OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_SYSCALL)); - Jit->MOV(32, M(&PC), Imm32(InstLoc + 4)); + Jit->OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_SYSCALL)); + Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc + 4)); Jit->WriteExceptionExit(); break; } case InterpreterBranch: { - Jit->MOV(32, R(EAX), M(&NPC)); + Jit->MOV(32, R(EAX), PPCSTATE(npc)); Jit->WriteExitDestInOpArg(R(EAX)); break; } @@ -2203,16 +2203,16 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) // See Interpreter rfi for details const u32 mask = 0x87C0FFFF; // MSR = (MSR & ~mask) | (SRR1 & mask); - Jit->MOV(32, R(EAX), M(&MSR)); - Jit->MOV(32, R(ECX), M(&SRR1)); + Jit->MOV(32, R(EAX), PPCSTATE(msr)); + Jit->MOV(32, R(ECX), PPCSTATE_SRR1); Jit->AND(32, R(EAX), Imm32(~mask)); Jit->AND(32, R(ECX), Imm32(mask)); Jit->OR(32, R(EAX), R(ECX)); // MSR &= 0xFFFBFFFF; // Mask used to clear the bit MSR[13] Jit->AND(32, R(EAX), Imm32(0xFFFBFFFF)); - Jit->MOV(32, M(&MSR), R(EAX)); + Jit->MOV(32, PPCSTATE(msr), R(EAX)); // NPC = SRR0; - Jit->MOV(32, R(EAX), M(&SRR0)); + Jit->MOV(32, R(EAX), PPCSTATE_SRR0); Jit->WriteRfiExitDestInOpArg(R(EAX)); break; } @@ -2220,14 +2220,14 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); //This instruction uses FPU - needs to add FP exception bailout - Jit->TEST(32, M(&PowerPC::ppcState.msr), Imm32(1 << 13)); // Test FP enabled bit + Jit->TEST(32, PPCSTATE(msr), Imm32(1 << 13)); // Test FP enabled bit FixupBranch b1 = Jit->J_CC(CC_NZ); // If a FPU exception occurs, the exception handler will read // from PC. Update PC with the latest value in case that happens. - Jit->MOV(32, M(&PC), Imm32(InstLoc)); - Jit->SUB(32, M(&PowerPC::ppcState.downcount), Imm32(Jit->js.downcountAmount)); - Jit->OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); + Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc)); + Jit->SUB(32, PPCSTATE(downcount), Imm32(Jit->js.downcountAmount)); + Jit->OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); Jit->WriteExceptionExit(); Jit->SetJumpTarget(b1); break; @@ -2235,12 +2235,12 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) case DSIExceptionCheck: { unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); - Jit->TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_DSI)); + Jit->TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI)); FixupBranch noMemException = Jit->J_CC(CC_Z); // If a memory exception occurs, the exception handler will read // from PC. Update PC with the latest value in case that happens. - Jit->MOV(32, M(&PC), Imm32(InstLoc)); + Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc)); Jit->WriteExceptionExit(); Jit->SetJumpTarget(noMemException); break; @@ -2250,8 +2250,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); // Address of instruction could not be translated - Jit->MOV(32, M(&NPC), Imm32(InstLoc)); - Jit->OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_ISI)); + Jit->MOV(32, PPCSTATE(npc), Imm32(InstLoc)); + Jit->OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_ISI)); // Remove the invalid instruction from the icache, forcing a recompile Jit->MOV(64, R(RAX), ImmPtr(jit->GetBlockCache()->GetICachePtr(InstLoc))); @@ -2263,16 +2263,16 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); - Jit->TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_ISI | EXCEPTION_PROGRAM | EXCEPTION_SYSCALL | EXCEPTION_FPU_UNAVAILABLE | EXCEPTION_DSI | EXCEPTION_ALIGNMENT)); + Jit->TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_ISI | EXCEPTION_PROGRAM | EXCEPTION_SYSCALL | EXCEPTION_FPU_UNAVAILABLE | EXCEPTION_DSI | EXCEPTION_ALIGNMENT)); FixupBranch clearInt = Jit->J_CC(CC_NZ); - Jit->TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_EXTERNAL_INT)); + Jit->TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT)); FixupBranch noExtException = Jit->J_CC(CC_Z); - Jit->TEST(32, M((void *)&PowerPC::ppcState.msr), Imm32(0x0008000)); + Jit->TEST(32, PPCSTATE(msr), Imm32(0x0008000)); FixupBranch noExtIntEnable = Jit->J_CC(CC_Z); Jit->TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH)); FixupBranch noCPInt = Jit->J_CC(CC_Z); - Jit->MOV(32, M(&PC), Imm32(InstLoc)); + Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc)); Jit->WriteExceptionExit(); Jit->SetJumpTarget(noCPInt); @@ -2285,7 +2285,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); - Jit->MOV(32, M(&PC), Imm32(InstLoc)); + Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc)); Jit->ABI_CallFunction(reinterpret_cast(&PowerPC::CheckBreakPoints)); Jit->TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); FixupBranch noBreakpoint = Jit->J_CC(CC_Z); diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp index 7b9cd785f2..dcc43c33e1 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp @@ -313,14 +313,14 @@ void JitIL::WriteCallInterpreter(UGeckoInstruction inst) { if (js.isLastInstruction) { - MOV(32, M(&PC), Imm32(js.compilerPC)); - MOV(32, M(&NPC), Imm32(js.compilerPC + 4)); + MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); + MOV(32, PPCSTATE(npc), Imm32(js.compilerPC + 4)); } Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst); ABI_CallFunctionC((void*)instr, inst.hex); if (js.isLastInstruction) { - MOV(32, R(EAX), M(&NPC)); + MOV(32, R(EAX), PPCSTATE(npc)); WriteRfiExitDestInOpArg(R(EAX)); } } @@ -341,7 +341,7 @@ void JitIL::FallBackToInterpreter(UGeckoInstruction _inst) void JitIL::HLEFunction(UGeckoInstruction _inst) { ABI_CallFunctionCC((void*)&HLE::Execute, js.compilerPC, _inst.hex); - MOV(32, R(EAX), M(&NPC)); + MOV(32, R(EAX), PPCSTATE(npc)); WriteExitDestInOpArg(R(EAX)); } @@ -398,7 +398,7 @@ void JitIL::WriteExit(u32 destination) { ABI_CallFunction((void *)JitILProfiler::End); } - SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); //If nobody has taken care of this yet (this can be removed when all branches are done) JitBlock *b = js.curBlock; @@ -417,7 +417,7 @@ void JitIL::WriteExit(u32 destination) } else { - MOV(32, M(&PC), Imm32(destination)); + MOV(32, PPCSTATE(pc), Imm32(destination)); JMP(asm_routines.dispatcher, true); } b->linkData.push_back(linkData); @@ -425,27 +425,27 @@ void JitIL::WriteExit(u32 destination) void JitIL::WriteExitDestInOpArg(const Gen::OpArg& arg) { - MOV(32, M(&PC), arg); + MOV(32, PPCSTATE(pc), arg); Cleanup(); if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) { ABI_CallFunction((void *)JitILProfiler::End); } - SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } void JitIL::WriteRfiExitDestInOpArg(const Gen::OpArg& arg) { - MOV(32, M(&PC), arg); - MOV(32, M(&NPC), arg); + MOV(32, PPCSTATE(pc), arg); + MOV(32, PPCSTATE(npc), arg); Cleanup(); if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) { ABI_CallFunction((void *)JitILProfiler::End); } ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions)); - SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } @@ -456,10 +456,10 @@ void JitIL::WriteExceptionExit() { ABI_CallFunction((void *)JitILProfiler::End); } - MOV(32, R(EAX), M(&PC)); - MOV(32, M(&NPC), R(EAX)); + MOV(32, R(EAX), PPCSTATE(pc)); + MOV(32, PPCSTATE(npc), R(EAX)); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions)); - SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } @@ -548,7 +548,7 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc // Downcount flag check. The last block decremented downcounter, and the flag should still be available. FixupBranch skip = J_CC(CC_NBE); - MOV(32, M(&PC), Imm32(js.blockStart)); + MOV(32, PPCSTATE(pc), Imm32(js.blockStart)); JMP(asm_routines.doTiming, true); // downcount hit zero - go doTiming. SetJumpTarget(skip); @@ -561,13 +561,13 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc if (js.fpa.any) { // This block uses FPU - needs to add FP exception bailout - TEST(32, M(&PowerPC::ppcState.msr), Imm32(1 << 13)); //Test FP enabled bit + TEST(32, PPCSTATE(msr), Imm32(1 << 13)); //Test FP enabled bit FixupBranch b1 = J_CC(CC_NZ); // If a FPU exception occurs, the exception handler will read // from PC. Update PC with the latest value in case that happens. - MOV(32, M(&PC), Imm32(js.blockStart)); - OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); + MOV(32, PPCSTATE(pc), Imm32(js.blockStart)); + OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); WriteExceptionExit(); SetJumpTarget(b1); @@ -635,7 +635,7 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc HLEFunction(function); if (type == HLE::HLE_HOOK_REPLACE) { - MOV(32, R(EAX), M(&NPC)); + MOV(32, R(EAX), PPCSTATE(npc)); jit->js.downcountAmount += jit->js.st.numCycles; WriteExitDestInOpArg(R(EAX)); break; diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp index 7ab095bf36..68390396b8 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp @@ -93,20 +93,20 @@ void CommonAsmRoutines::GenFrsqrte() // Exception flags for zero input. SetJumpTarget(zero); - TEST(32, M(&FPSCR), Imm32(FPSCR_ZX)); + TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_ZX)); FixupBranch skip_set_fx1 = J_CC(CC_NZ); - OR(32, M(&FPSCR), Imm32(FPSCR_FX)); + OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); SetJumpTarget(skip_set_fx1); - OR(32, M(&FPSCR), Imm32(FPSCR_ZX)); + OR(32, PPCSTATE(fpscr), Imm32(FPSCR_ZX)); FixupBranch complex3 = J(); // Exception flags for negative input. SetJumpTarget(negative); - TEST(32, M(&FPSCR), Imm32(FPSCR_VXSQRT)); + TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_VXSQRT)); FixupBranch skip_set_fx2 = J_CC(CC_NZ); - OR(32, M(&FPSCR), Imm32(FPSCR_FX)); + OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); SetJumpTarget(skip_set_fx2); - OR(32, M(&FPSCR), Imm32(FPSCR_VXSQRT)); + OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXSQRT)); SetJumpTarget(complex1); SetJumpTarget(complex2); @@ -162,11 +162,11 @@ void CommonAsmRoutines::GenFres() // Exception flags for zero input. SetJumpTarget(zero); - TEST(32, M(&FPSCR), Imm32(FPSCR_ZX)); + TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_ZX)); FixupBranch skip_set_fx1 = J_CC(CC_NZ); - OR(32, M(&FPSCR), Imm32(FPSCR_FX)); + OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); SetJumpTarget(skip_set_fx1); - OR(32, M(&FPSCR), Imm32(FPSCR_ZX)); + OR(32, PPCSTATE(fpscr), Imm32(FPSCR_ZX)); SetJumpTarget(complex1); SetJumpTarget(complex2); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp index ac7ed17986..1fe88f9e28 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp @@ -113,7 +113,7 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r // check anyway. // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs - MOV(32, M(&PC), Imm32(pc)); + MOV(32, PPCSTATE(pc), Imm32(pc)); if (dataReg == ABI_PARAM2) PanicAlert("Incorrect use of SafeWriteRegToReg"); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index 2b927ba0d9..46c4be1715 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -370,6 +370,6 @@ using namespace Gen; void JitBlockCache::WriteDestroyBlock(const u8* location, u32 address) { XEmitter emit((u8 *)location); - emit.MOV(32, M(&PC), Imm32(address)); + emit.MOV(32, PPCSTATE(pc), Imm32(address)); emit.JMP(jit->GetAsmRoutines()->dispatcher, true); } diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index 58340b072e..4dd81015e8 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -468,7 +468,7 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce TEST(32, R(reg_addr), Imm32(mem_mask)); FixupBranch fast = J_CC(CC_Z, true); // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs - MOV(32, M(&PC), Imm32(jit->js.compilerPC)); + MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); bool noProlog = (0 != (flags & SAFE_LOADSTORE_NO_PROLOG)); bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP); ABI_PushRegistersAndAdjustStack(registersInUse, noProlog); @@ -718,7 +718,7 @@ static const u64 GC_ALIGNED16(psDoubleNoSign[2]) = {0x7FFFFFFFFFFFFFFFULL, 0}; // quite that necessary. void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm) { - AND(32, M(&FPSCR), Imm32(~FPRF_MASK)); + AND(32, PPCSTATE(fpscr), Imm32(~FPRF_MASK)); FixupBranch continue1, continue2, continue3, continue4; if (cpu_info.bSSE4_1) @@ -799,24 +799,24 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm) SetJumpTarget(continue3); SetJumpTarget(continue4); SHL(32, R(EAX), Imm8(FPRF_SHIFT)); - OR(32, M(&FPSCR), R(EAX)); + OR(32, PPCSTATE(fpscr), R(EAX)); } void EmuCodeBlock::JitClearCA() { - AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0 + AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0 } void EmuCodeBlock::JitSetCA() { - OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1 + OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1 } void EmuCodeBlock::JitClearCAOV(bool oe) { if (oe) - AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK & ~XER_OV_MASK)); //XER.CA, XER.OV = 0 + AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK & ~XER_OV_MASK)); //XER.CA, XER.OV = 0 else - AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0 + AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0 } diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h index addce16e93..2e865e50e6 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h @@ -13,13 +13,23 @@ namespace MMIO { class Mapping; } #define MEMCHECK_START \ Gen::FixupBranch memException; \ if (jit->js.memcheck) \ - { TEST(32, Gen::M((void *)&PowerPC::ppcState.Exceptions), Gen::Imm32(EXCEPTION_DSI)); \ + { TEST(32, PPCSTATE(Exceptions), Gen::Imm32(EXCEPTION_DSI)); \ memException = J_CC(Gen::CC_NZ, true); } #define MEMCHECK_END \ if (jit->js.memcheck) \ SetJumpTarget(memException); +// We offset by 0x80 because the range of one byte memory offsets is +// -0x80..0x7f. +#define PPCSTATE(x) MDisp(RBP, \ + (int) ((char *) &PowerPC::ppcState.x - (char *) &PowerPC::ppcState) - 0x80) +// In case you want to disable the ppcstate register: +// #define PPCSTATE(x) M((void*) &PowerPC::ppcState.x) +#define PPCSTATE_LR PPCSTATE(spr[SPR_LR]) +#define PPCSTATE_CTR PPCSTATE(spr[SPR_CTR]) +#define PPCSTATE_SRR0 PPCSTATE(spr[SPR_SRR0]) +#define PPCSTATE_SRR1 PPCSTATE(spr[SPR_SRR1]) // Like XCodeBlock but has some utilities for memory access. class EmuCodeBlock : public Gen::X64CodeBlock diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 7dd59f1573..26e4aa75c4 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -4,6 +4,8 @@ #pragma once +#include + #include "Common/BreakPoints.h" #include "Common/Common.h" @@ -30,11 +32,6 @@ struct GC_ALIGNED64(PowerPCState) { u32 gpr[32]; // General purpose registers. r1 = stack pointer. - // The paired singles are strange : PS0 is stored in the full 64 bits of each FPR - // but ps calculations are only done in 32-bit precision, and PS1 is only 32 bits. - // Since we want to use SIMD, SSE2 is the only viable alternative - 2x double. - u64 ps[32][2]; - u32 pc; // program counter u32 npc; @@ -64,6 +61,20 @@ struct GC_ALIGNED64(PowerPCState) // This variable should be inside of the CoreTiming namespace if we wanted to be correct. int downcount; +#if _M_X86_64 + // This member exists for the purpose of an assertion in x86 JitBase.cpp + // that its offset <= 0x100. To minimize code size on x86, we want as much + // useful stuff in the one-byte offset range as possible - which is why ps + // is sitting down here. It currently doesn't make a difference on other + // supported architectures. + std::tuple<> above_fits_in_first_0x100; +#endif + + // The paired singles are strange : PS0 is stored in the full 64 bits of each FPR + // but ps calculations are only done in 32-bit precision, and PS1 is only 32 bits. + // Since we want to use SIMD, SSE2 is the only viable alternative - 2x double. + GC_ALIGNED16(u64 ps[32][2]); + u32 sr[16]; // Segment registers. // special purpose registers - controls quantizers, DMA, and lots of other misc extensions. @@ -84,6 +95,10 @@ struct GC_ALIGNED64(PowerPCState) InstructionCache iCache; }; +#if _M_X86_64 +static_assert(offsetof(PowerPC::PowerPCState, above_fits_in_first_0x100) <= 0x100, "top of PowerPCState too big"); +#endif + enum CPUState { CPU_RUNNING = 0, From 487eb967eb3696a76f15aed3a53b0bdd4b0cfc03 Mon Sep 17 00:00:00 2001 From: comex Date: Wed, 3 Sep 2014 02:19:32 -0400 Subject: [PATCH 2/6] Fix a bug with update loads in memcheck mode. In two cases, my old code was using a temporary register but not saving it properly; it basically worked by accident (an otherwise useless FlushLock was causing CallerSavedRegistersInUse to think it was in use by the GPR cache, even though it was actually a temporary). I'm going to modify this in the next commit to use RDX, but I didn't want to leave a broken revision in the middle. --- Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index 5042018cc9..57af170b42 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -228,7 +228,13 @@ void Jit64::lXXx(UGeckoInstruction inst) gpr.Lock(a, b, d); gpr.BindToRegister(d, js.memcheck, true); - SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, CallerSavedRegistersInUse(), signExtend); + u32 registersInUse = CallerSavedRegistersInUse(); + if (update && storeAddress) + { + // We need to save the (usually scratch) address register for the update. + registersInUse |= (1 << ABI_PARAM1); + } + SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, registersInUse, signExtend); if (update && storeAddress) { @@ -482,7 +488,7 @@ void Jit64::lmw(UGeckoInstruction inst) ADD(32, R(ECX), gpr.R(inst.RA)); for (int i = inst.RD; i < 32; i++) { - SafeLoadToReg(EAX, R(ECX), 32, (i - inst.RD) * 4, CallerSavedRegistersInUse(), false); + SafeLoadToReg(EAX, R(ECX), 32, (i - inst.RD) * 4, CallerSavedRegistersInUse() | (1 << ECX), false); gpr.BindToRegister(i, false, true); MOV(32, gpr.R(i), R(EAX)); } From 67cdb6e07a9c1c3ddbfd6e61036756259344e289 Mon Sep 17 00:00:00 2001 From: comex Date: Thu, 4 Sep 2014 01:02:21 -0400 Subject: [PATCH 3/6] Factor code from ABI_CallFunctionRR and GetWriteTrampoline into a helper, and fix a special case. The special case is where the registers are actually to be swapped (i.e. func(ABI_PARAM2, ABI_PARAM1); this was previously impossible but would be ugly not to handle anyway. --- Source/Core/Common/x64ABI.cpp | 39 ++++++++++++------- Source/Core/Common/x64Emitter.h | 3 ++ .../Core/PowerPC/JitCommon/JitBackpatch.cpp | 17 +------- 3 files changed, 29 insertions(+), 30 deletions(-) diff --git a/Source/Core/Common/x64ABI.cpp b/Source/Core/Common/x64ABI.cpp index c86c8f8b24..45465619bd 100644 --- a/Source/Core/Common/x64ABI.cpp +++ b/Source/Core/Common/x64ABI.cpp @@ -353,20 +353,7 @@ void XEmitter::ABI_CallFunctionR(void *func, X64Reg reg1) void XEmitter::ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2, bool noProlog) { ABI_AlignStack(0, noProlog); - if (reg2 != ABI_PARAM1) - { - if (reg1 != ABI_PARAM1) - MOV(64, R(ABI_PARAM1), R(reg1)); - if (reg2 != ABI_PARAM2) - MOV(64, R(ABI_PARAM2), R(reg2)); - } - else - { - if (reg2 != ABI_PARAM2) - MOV(64, R(ABI_PARAM2), R(reg2)); - if (reg1 != ABI_PARAM1) - MOV(64, R(ABI_PARAM1), R(reg1)); - } + MOVTwo(64, ABI_PARAM1, reg1, ABI_PARAM2, reg2, ABI_PARAM3); u64 distance = u64(func) - (u64(code) + 5); if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) @@ -382,6 +369,30 @@ void XEmitter::ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2, bool noP ABI_RestoreStack(0, noProlog); } +void XEmitter::MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, Gen::X64Reg dst2, Gen::X64Reg src2, X64Reg temp) +{ + if (dst1 == src2 && dst2 == src1) + { + // need a temporary + MOV(bits, R(temp), R(src1)); + src1 = temp; + } + if (src2 != dst1) + { + if (dst1 != src1) + MOV(bits, R(dst1), R(src1)); + if (dst2 != src2) + MOV(bits, R(dst2), R(src2)); + } + else + { + if (dst2 != src2) + MOV(bits, R(dst2), R(src2)); + if (dst1 != src1) + MOV(bits, R(dst1), R(src1)); + } +} + void XEmitter::ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2) { ABI_AlignStack(0); diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index 558af41767..d6f0699e84 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -753,6 +753,9 @@ public: void ABI_CallFunctionR(void *func, X64Reg reg1); void ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2, bool noProlog = false); + // Helper method for the above, or can be used separately. + void MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, Gen::X64Reg dst2, Gen::X64Reg src2, Gen::X64Reg temp); + // A function that doesn't have any control over what it will do to regs, // such as the dispatcher, should be surrounded by these. void ABI_PushAllCalleeSavedRegsAndAdjustStack(); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp index 1fe88f9e28..a51aac6480 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp @@ -115,22 +115,7 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs MOV(32, PPCSTATE(pc), Imm32(pc)); - if (dataReg == ABI_PARAM2) - PanicAlert("Incorrect use of SafeWriteRegToReg"); - if (addrReg != ABI_PARAM1) - { - if (ABI_PARAM1 != dataReg) - MOV(64, R(ABI_PARAM1), R((X64Reg)dataReg)); - if (ABI_PARAM2 != addrReg) - MOV(64, R(ABI_PARAM2), R((X64Reg)addrReg)); - } - else - { - if (ABI_PARAM2 != addrReg) - MOV(64, R(ABI_PARAM2), R((X64Reg)addrReg)); - if (ABI_PARAM1 != dataReg) - MOV(64, R(ABI_PARAM1), R((X64Reg)dataReg)); - } + MOVTwo(64, ABI_PARAM1, dataReg, ABI_PARAM2, addrReg, ABI_PARAM3); if (info.displacement) { From 8dea26762dcc842ebcb227c0b9b9bbefc487624c Mon Sep 17 00:00:00 2001 From: comex Date: Tue, 2 Sep 2014 18:54:46 -0400 Subject: [PATCH 4/6] Rationalize temporary register usage. Rather than using a variety of registers including RSI, ABI_PARAM1 (either RCX or RDI), RCX, and RDX, the rule is: - RDI and RSI are never used. This allows them to be allocated on Unix, bringing parity with Windows. - RDX is a permanent temporary register along with RAX (and is thus not FlushLocked). It's used frequently enough that allocating it would probably be a bad idea, as it would constantly get flushed. - RCX is allocatable, but is flushed in two situations: - Non-immediate shifts (rlwnm), because x86 requires RCX to be used. - Paired single loads and stores, because they require three temporary registers: the helper functions take two integer arguments, and another register is used as an index to get the function address. These should be relatively rare. While we're at it, in stores, use the registers directly where possible rather than always using temporaries (by making SafeWriteRegToReg clobber less). The address doesn't need to be clobbered in the usual case, and on CPUs with MOVBE, neither does the value. Oh, and get rid of a useless MEMCHECK. This commit does not actually add new registers to the allocation order; it is intended to test for any performance or correctness issues separately. --- Source/Core/Core/PowerPC/Jit64/Jit.h | 5 +- Source/Core/Core/PowerPC/Jit64/JitAsm.cpp | 25 ++-- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 15 +-- .../Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp | 64 ++++++---- .../PowerPC/Jit64/Jit_LoadStoreFloating.cpp | 22 ++-- .../PowerPC/Jit64/Jit_LoadStorePaired.cpp | 6 +- .../PowerPC/Jit64/Jit_SystemRegisters.cpp | 40 +++--- Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp | 115 +++++++++--------- .../Core/PowerPC/JitCommon/JitAsmCommon.cpp | 12 +- .../Core/PowerPC/JitCommon/JitBackpatch.cpp | 5 +- .../Core/Core/PowerPC/JitCommon/Jit_Util.cpp | 27 ++-- Source/Core/Core/PowerPC/JitCommon/Jit_Util.h | 13 +- Source/Core/Core/PowerPC/JitILCommon/IR.cpp | 2 +- 13 files changed, 179 insertions(+), 172 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index c0b5c73260..76abfcf0d4 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -107,10 +107,9 @@ public: void GenerateRC(); void ComputeRC(const Gen::OpArg & arg); - // Reads a given bit of a given CR register part. Clobbers ABI_PARAM1, - // don't forget to xlock it before. + // Reads a given bit of a given CR register part. void GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate = false); - // Clobbers ABI_PARAM1, xlock it before. + // Clobbers RDX. void SetCRFieldBit(int field, int bit, Gen::X64Reg in); // Generates a branch that will check if a given bit of a CR register part diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index b4c3bb9bc5..5c56bfe8c1 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -9,13 +9,12 @@ using namespace Gen; -//GLOBAL STATIC ALLOCATIONS x86 -//EAX - ubiquitous scratch register - EVERYBODY scratches this - -//GLOBAL STATIC ALLOCATIONS x64 -//EAX - ubiquitous scratch register - EVERYBODY scratches this -//RBX - Base pointer of memory -//R15 - Pointer to array of block pointers +// GLOBAL STATIC ALLOCATIONS x64 +// RAX - ubiquitous scratch register - EVERYBODY scratches this +// RDX - second scratch register +// RBX - Base pointer of memory +// R15 - Pointer to array of block pointers +// RBP - Pointer to ppcState+0x80 // PLAN: no more block numbers - crazy opcodes just contain offset within // dynarec buffer @@ -73,8 +72,8 @@ void Jit64AsmRoutineManager::Generate() no_mem = J_CC(CC_NZ); } AND(32, R(EAX), Imm32(JIT_ICACHE_MASK)); - MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->iCache)); - MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0)); + MOV(64, R(RDX), Imm64((u64)jit->GetBlockCache()->iCache)); + MOV(32, R(EAX), MComplex(RDX, EAX, SCALE_1, 0)); if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack) { @@ -86,8 +85,8 @@ void Jit64AsmRoutineManager::Generate() TEST(32, R(EAX), Imm32(JIT_ICACHE_VMEM_BIT)); FixupBranch no_vmem = J_CC(CC_Z); AND(32, R(EAX), Imm32(JIT_ICACHE_MASK)); - MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->iCacheVMEM)); - MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0)); + MOV(64, R(RDX), Imm64((u64)jit->GetBlockCache()->iCacheVMEM)); + MOV(32, R(EAX), MComplex(RDX, EAX, SCALE_1, 0)); if (Core::g_CoreStartupParameter.bWii) exit_vmem = J(); SetJumpTarget(no_vmem); @@ -97,8 +96,8 @@ void Jit64AsmRoutineManager::Generate() TEST(32, R(EAX), Imm32(JIT_ICACHE_EXRAM_BIT)); FixupBranch no_exram = J_CC(CC_Z); AND(32, R(EAX), Imm32(JIT_ICACHEEX_MASK)); - MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->iCacheEx)); - MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0)); + MOV(64, R(RDX), Imm64((u64)jit->GetBlockCache()->iCacheEx)); + MOV(32, R(EAX), MComplex(RDX, EAX, SCALE_1, 0)); SetJumpTarget(no_exram); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 82b1e6e5a3..dd4db95c55 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -442,8 +442,8 @@ void Jit64::cmpXX(UGeckoInstruction inst) if (!comparand.IsImm()) { - MOVSX(64, 32, ABI_PARAM1, comparand); - comparand = R(ABI_PARAM1); + MOVSX(64, 32, RDX, comparand); + comparand = R(RDX); } } else @@ -454,11 +454,11 @@ void Jit64::cmpXX(UGeckoInstruction inst) MOVZX(64, 32, RAX, gpr.R(a)); if (comparand.IsImm()) - MOV(32, R(ABI_PARAM1), comparand); + MOV(32, R(RDX), comparand); else - MOVZX(64, 32, ABI_PARAM1, comparand); + MOVZX(64, 32, RDX, comparand); - comparand = R(ABI_PARAM1); + comparand = R(RDX); } SUB(64, R(RAX), comparand); MOV(64, PPCSTATE(cr_val[crf]), R(RAX)); @@ -1170,7 +1170,6 @@ void Jit64::mulhwXx(UGeckoInstruction inst) } else { - gpr.FlushLockX(EDX); gpr.Lock(a, b, d); gpr.BindToRegister(d, (d == a || d == b), true); if (gpr.RX(d) == EDX) @@ -1288,7 +1287,6 @@ void Jit64::divwux(UGeckoInstruction inst) } else { - gpr.FlushLockX(EDX); gpr.Lock(a, b, d); gpr.BindToRegister(d, (d == a || d == b), true); MOV(32, R(EAX), gpr.R(a)); @@ -1349,7 +1347,6 @@ void Jit64::divwx(UGeckoInstruction inst) } else { - gpr.FlushLockX(EDX); gpr.Lock(a, b, d); gpr.BindToRegister(d, (d == a || d == b), true); MOV(32, R(EAX), gpr.R(a)); @@ -1881,8 +1878,8 @@ void Jit64::srawx(UGeckoInstruction inst) int a = inst.RA; int b = inst.RB; int s = inst.RS; - gpr.Lock(a, s, b); gpr.FlushLockX(ECX); + gpr.Lock(a, s, b); gpr.BindToRegister(a, (a == s || a == b), true); JitClearCA(); MOV(32, R(ECX), gpr.R(b)); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index 57af170b42..c1a5913e37 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -197,14 +197,13 @@ void Jit64::lXXx(UGeckoInstruction inst) else { // In this case we need an extra temporary register. - gpr.FlushLockX(ABI_PARAM1); - opAddress = R(ABI_PARAM1); + opAddress = R(RDX); storeAddress = true; if (use_constant_offset) { if (gpr.R(a).IsSimpleReg() && offset != 0) { - LEA(32, ABI_PARAM1, MDisp(gpr.RX(a), offset)); + LEA(32, RDX, MDisp(gpr.RX(a), offset)); } else { @@ -215,7 +214,7 @@ void Jit64::lXXx(UGeckoInstruction inst) } else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) { - LEA(32, ABI_PARAM1, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); + LEA(32, RDX, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); } else { @@ -232,7 +231,7 @@ void Jit64::lXXx(UGeckoInstruction inst) if (update && storeAddress) { // We need to save the (usually scratch) address register for the update. - registersInUse |= (1 << ABI_PARAM1); + registersInUse |= (1 << RDX); } SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, registersInUse, signExtend); @@ -339,8 +338,7 @@ void Jit64::stX(UGeckoInstruction inst) // Helps external systems know which instruction triggered the write MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); - gpr.FlushLockX(ABI_PARAM1); - MOV(32, R(ABI_PARAM1), gpr.R(s)); + MOV(32, R(EDX), gpr.R(s)); if (update) gpr.SetImmediate32(a, addr); @@ -396,24 +394,31 @@ void Jit64::stX(UGeckoInstruction inst) } } - gpr.FlushLockX(ECX, EDX); - gpr.Lock(s, a); - MOV(32, R(EDX), gpr.R(a)); - MOV(32, R(ECX), gpr.R(s)); - SafeWriteRegToReg(ECX, EDX, accessSize, offset, CallerSavedRegistersInUse()); + gpr.Lock(a, s); + gpr.BindToRegister(a, true, false); + X64Reg reg_value; + if (WriteClobbersRegValue(accessSize, /* swap */ true)) + { + MOV(32, R(EDX), gpr.R(s)); + reg_value = EDX; + } + else + { + gpr.BindToRegister(s, true, false); + reg_value = gpr.RX(s); + } + SafeWriteRegToReg(reg_value, gpr.RX(a), accessSize, offset, CallerSavedRegistersInUse(), SAFE_LOADSTORE_CLOBBER_EAX_INSTEAD_OF_ADDR); if (update && offset) { - gpr.KillImmediate(a, true, true); MEMCHECK_START + gpr.KillImmediate(a, true, true); ADD(32, gpr.R(a), Imm32((u32)offset)); MEMCHECK_END } - gpr.UnlockAll(); - gpr.UnlockAllX(); } else { @@ -430,15 +435,12 @@ void Jit64::stXx(UGeckoInstruction inst) FALLBACK_IF(!a || a == s || a == b); gpr.Lock(a, b, s); - gpr.FlushLockX(ECX, EDX); if (inst.SUBOP10 & 32) { - MEMCHECK_START gpr.BindToRegister(a, true, true); ADD(32, gpr.R(a), gpr.R(b)); MOV(32, R(EDX), gpr.R(a)); - MEMCHECK_END } else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) { @@ -468,8 +470,18 @@ void Jit64::stXx(UGeckoInstruction inst) break; } - MOV(32, R(ECX), gpr.R(s)); - SafeWriteRegToReg(ECX, EDX, accessSize, 0, CallerSavedRegistersInUse()); + X64Reg reg_value; + if (WriteClobbersRegValue(accessSize, /* swap */ true)) + { + MOV(32, R(EAX), gpr.R(s)); + reg_value = EAX; + } + else + { + gpr.BindToRegister(s, true, false); + reg_value = gpr.RX(s); + } + SafeWriteRegToReg(reg_value, EDX, accessSize, 0, CallerSavedRegistersInUse()); gpr.UnlockAll(); gpr.UnlockAllX(); @@ -482,13 +494,12 @@ void Jit64::lmw(UGeckoInstruction inst) JITDISABLE(bJITLoadStoreOff); // TODO: This doesn't handle rollback on DSI correctly - gpr.FlushLockX(ECX); - MOV(32, R(ECX), Imm32((u32)(s32)inst.SIMM_16)); + MOV(32, R(EDX), Imm32((u32)(s32)inst.SIMM_16)); if (inst.RA) - ADD(32, R(ECX), gpr.R(inst.RA)); + ADD(32, R(EDX), gpr.R(inst.RA)); for (int i = inst.RD; i < 32; i++) { - SafeLoadToReg(EAX, R(ECX), 32, (i - inst.RD) * 4, CallerSavedRegistersInUse() | (1 << ECX), false); + SafeLoadToReg(EAX, R(EDX), 32, (i - inst.RD) * 4, CallerSavedRegistersInUse() | (1 << ECX), false); gpr.BindToRegister(i, false, true); MOV(32, gpr.R(i), R(EAX)); } @@ -501,15 +512,14 @@ void Jit64::stmw(UGeckoInstruction inst) JITDISABLE(bJITLoadStoreOff); // TODO: This doesn't handle rollback on DSI correctly - gpr.FlushLockX(ECX); for (int i = inst.RD; i < 32; i++) { if (inst.RA) MOV(32, R(EAX), gpr.R(inst.RA)); else XOR(32, R(EAX), R(EAX)); - MOV(32, R(ECX), gpr.R(i)); - SafeWriteRegToReg(ECX, EAX, 32, (i - inst.RD) * 4 + (u32)(s32)inst.SIMM_16, CallerSavedRegistersInUse()); + MOV(32, R(EDX), gpr.R(i)); + SafeWriteRegToReg(EDX, EAX, 32, (i - inst.RD) * 4 + (u32)(s32)inst.SIMM_16, CallerSavedRegistersInUse()); } gpr.UnlockAllX(); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index d1f7ca9f8f..5880c22b7e 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -96,24 +96,23 @@ void Jit64::stfXXX(UGeckoInstruction inst) FALLBACK_IF(!indexed && !a); s32 offset = 0; - gpr.FlushLockX(ABI_PARAM1); if (indexed) { if (update) { gpr.BindToRegister(a, true, true); ADD(32, gpr.R(a), gpr.R(b)); - MOV(32, R(ABI_PARAM1), gpr.R(a)); + MOV(32, R(RDX), gpr.R(a)); } else { if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) - LEA(32, ABI_PARAM1, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); + LEA(32, RDX, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); else { - MOV(32, R(ABI_PARAM1), gpr.R(b)); + MOV(32, R(RDX), gpr.R(b)); if (a) - ADD(32, R(ABI_PARAM1), gpr.R(a)); + ADD(32, R(RDX), gpr.R(a)); } } } @@ -128,14 +127,14 @@ void Jit64::stfXXX(UGeckoInstruction inst) { offset = (s32)(s16)inst.SIMM_16; } - MOV(32, R(ABI_PARAM1), gpr.R(a)); + MOV(32, R(RDX), gpr.R(a)); } if (single) { fpr.BindToRegister(s, true, false); ConvertDoubleToSingle(XMM0, fpr.RX(s)); - SafeWriteF32ToReg(XMM0, ABI_PARAM1, offset, CallerSavedRegistersInUse()); + SafeWriteF32ToReg(XMM0, RDX, offset, CallerSavedRegistersInUse()); fpr.UnlockAll(); } else @@ -144,7 +143,7 @@ void Jit64::stfXXX(UGeckoInstruction inst) MOVQ_xmm(R(RAX), fpr.RX(s)); else MOV(64, R(RAX), fpr.R(s)); - SafeWriteRegToReg(RAX, ABI_PARAM1, 64, offset, CallerSavedRegistersInUse()); + SafeWriteRegToReg(RAX, RDX, 64, offset, CallerSavedRegistersInUse()); } gpr.UnlockAll(); gpr.UnlockAllX(); @@ -160,15 +159,14 @@ void Jit64::stfiwx(UGeckoInstruction inst) int a = inst.RA; int b = inst.RB; - gpr.FlushLockX(ABI_PARAM1); - MOV(32, R(ABI_PARAM1), gpr.R(b)); + MOV(32, R(RDX), gpr.R(b)); if (a) - ADD(32, R(ABI_PARAM1), gpr.R(a)); + ADD(32, R(RDX), gpr.R(a)); if (fpr.R(s).IsSimpleReg()) MOVD_xmm(R(EAX), fpr.RX(s)); else MOV(32, R(EAX), fpr.R(s)); - SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, CallerSavedRegistersInUse()); + SafeWriteRegToReg(EAX, RDX, 32, 0, CallerSavedRegistersInUse()); gpr.UnlockAllX(); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 894d96789d..47cd9061ba 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -28,8 +28,7 @@ void Jit64::psq_st(UGeckoInstruction inst) int a = inst.RA; int s = inst.RS; // Fp numbers - gpr.FlushLockX(EAX, EDX); - gpr.FlushLockX(ECX); + gpr.FlushLockX(EAX, ECX); if (update) gpr.BindToRegister(inst.RA, true, true); fpr.BindToRegister(inst.RS, true, false); @@ -73,8 +72,7 @@ void Jit64::psq_l(UGeckoInstruction inst) bool update = inst.OPCD == 57; int offset = inst.SIMM_12; - gpr.FlushLockX(EAX, EDX); - gpr.FlushLockX(ECX); + gpr.FlushLockX(EAX, ECX); gpr.BindToRegister(inst.RA, true, update && offset); fpr.BindToRegister(inst.RS, false, true); if (offset) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index e430144aa8..59ea14cf8d 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -42,40 +42,40 @@ void Jit64::GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate) void Jit64::SetCRFieldBit(int field, int bit, Gen::X64Reg in) { - MOV(64, R(ABI_PARAM1), PPCSTATE(cr_val[field])); + MOV(64, R(RDX), PPCSTATE(cr_val[field])); MOVZX(32, 8, in, R(in)); switch (bit) { case CR_SO_BIT: // set bit 61 to input - BTR(64, R(ABI_PARAM1), Imm8(61)); + BTR(64, R(RDX), Imm8(61)); SHL(64, R(in), Imm8(61)); - OR(64, R(ABI_PARAM1), R(in)); + OR(64, R(RDX), R(in)); break; case CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input - SHR(64, R(ABI_PARAM1), Imm8(32)); - SHL(64, R(ABI_PARAM1), Imm8(32)); + SHR(64, R(RDX), Imm8(32)); + SHL(64, R(RDX), Imm8(32)); XOR(32, R(in), Imm8(1)); - OR(64, R(ABI_PARAM1), R(in)); + OR(64, R(RDX), R(in)); break; case CR_GT_BIT: // set bit 63 to !input - BTR(64, R(ABI_PARAM1), Imm8(63)); + BTR(64, R(RDX), Imm8(63)); NOT(32, R(in)); SHL(64, R(in), Imm8(63)); - OR(64, R(ABI_PARAM1), R(in)); + OR(64, R(RDX), R(in)); break; case CR_LT_BIT: // set bit 62 to input - BTR(64, R(ABI_PARAM1), Imm8(62)); + BTR(64, R(RDX), Imm8(62)); SHL(64, R(in), Imm8(62)); - OR(64, R(ABI_PARAM1), R(in)); + OR(64, R(RDX), R(in)); break; } - BTS(64, R(ABI_PARAM1), Imm8(32)); - MOV(64, PPCSTATE(cr_val[field]), R(ABI_PARAM1)); + BTS(64, R(RDX), Imm8(32)); + MOV(64, PPCSTATE(cr_val[field]), R(RDX)); } FixupBranch Jit64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) @@ -308,8 +308,7 @@ void Jit64::mfcr(UGeckoInstruction inst) gpr.BindToRegister(d, false, true); XOR(32, gpr.R(d), gpr.R(d)); - gpr.FlushLockX(ABI_PARAM1); - X64Reg cr_val = ABI_PARAM1; + X64Reg cr_val = RDX; // we only need to zero the high bits of EAX once XOR(32, R(EAX), R(EAX)); for (int i = 0; i < 8; i++) @@ -439,9 +438,8 @@ void Jit64::crXXX(UGeckoInstruction inst) // crnand or crnor bool negateB = inst.SUBOP10 == 225 || inst.SUBOP10 == 33; - gpr.FlushLockX(ABI_PARAM1); - GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), ABI_PARAM1, negateA); - GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), EAX, negateB); + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), DL, negateA); + GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), AL, negateB); // Compute combined bit switch (inst.SUBOP10) @@ -449,23 +447,23 @@ void Jit64::crXXX(UGeckoInstruction inst) case 33: // crnor: ~(A || B) == (~A && ~B) case 129: // crandc case 257: // crand - AND(8, R(EAX), R(ABI_PARAM1)); + AND(8, R(AL), R(DL)); break; case 193: // crxor case 289: // creqv - XOR(8, R(EAX), R(ABI_PARAM1)); + XOR(8, R(AL), R(DL)); break; case 225: // crnand: ~(A && B) == (~A || ~B) case 417: // crorc case 449: // cror - OR(8, R(EAX), R(ABI_PARAM1)); + OR(8, R(AL), R(DL)); break; } // Store result bit in CRBD - SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), EAX); + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), AL); gpr.UnlockAllX(); } diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp index 8e9a2e5107..8ae451ed34 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp @@ -157,7 +157,9 @@ static void fregSpill(RegInfo& RI, X64Reg reg) RI.fregs[reg] = nullptr; } -// ECX is scratch, so we don't allocate it +// RAX and RDX are scratch, so we don't allocate them +// (TODO: if we could lock RCX here too then we could allocate it - needed for +// shifts) // 64-bit - calling conventions differ between linux & windows, so... #ifdef _WIN32 @@ -602,9 +604,9 @@ static void regEmitMemStore(RegInfo& RI, InstLoc I, unsigned Size) { auto info = regBuildMemAddress(RI, I, getOp2(I), 2, Size, nullptr); if (info.first.IsImm()) - RI.Jit->MOV(32, R(ECX), info.first); + RI.Jit->MOV(32, R(EDX), info.first); else - RI.Jit->LEA(32, ECX, MDisp(info.first.GetSimpleReg(), info.second)); + RI.Jit->LEA(32, EDX, MDisp(info.first.GetSimpleReg(), info.second)); regSpill(RI, EAX); @@ -617,7 +619,7 @@ static void regEmitMemStore(RegInfo& RI, InstLoc I, unsigned Size) RI.Jit->MOV(32, R(EAX), regLocForInst(RI, getOp1(I))); } - RI.Jit->SafeWriteRegToReg(EAX, ECX, Size, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); + RI.Jit->SafeWriteRegToReg(EAX, EDX, Size, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); if (RI.IInfo[I - RI.FirstI] & 4) regClearInst(RI, getOp1(I)); } @@ -675,9 +677,9 @@ static void regEmitCmp(RegInfo& RI, InstLoc I) static void regEmitICmpInst(RegInfo& RI, InstLoc I, CCFlags flag) { regEmitCmp(RI, I); - RI.Jit->SETcc(flag, R(ECX)); // Caution: SETCC uses 8-bit regs! + RI.Jit->SETcc(flag, R(EDX)); // Caution: SETCC uses 8-bit regs! X64Reg reg = regBinReg(RI, I); - RI.Jit->MOVZX(32, 8, reg, R(ECX)); + RI.Jit->MOVZX(32, 8, reg, R(EDX)); RI.regs[reg] = I; regNormalRegClear(RI, I); } @@ -1111,11 +1113,11 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) } case StoreFPRF: { - Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); - Jit->AND(32, R(ECX), Imm8(0x1F)); - Jit->SHL(32, R(ECX), Imm8(12)); + Jit->MOV(32, R(EDX), regLocForInst(RI, getOp1(I))); + Jit->AND(32, R(EDX), Imm8(0x1F)); + Jit->SHL(32, R(EDX), Imm8(12)); Jit->AND(32, PPCSTATE(fpscr), Imm32(~(0x1F << 12))); - Jit->OR(32, PPCSTATE(fpscr), R(ECX)); + Jit->OR(32, PPCSTATE(fpscr), R(EDX)); regNormalRegClear(RI, I); break; } @@ -1155,8 +1157,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = regUReg(RI, I); - Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); - Jit->MOVSX(32, 8, reg, R(ECX)); + Jit->MOV(32, R(EDX), regLocForInst(RI, getOp1(I))); + Jit->MOVSX(32, 8, reg, R(EDX)); RI.regs[reg] = I; regNormalRegClear(RI, I); break; @@ -1178,9 +1180,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = regUReg(RI, I); - Jit->MOV(32, R(ECX), Imm32(63)); + Jit->MOV(32, R(EDX), Imm32(63)); Jit->BSR(32, reg, regLocForInst(RI, getOp1(I))); - Jit->CMOVcc(32, reg, R(ECX), CC_Z); + Jit->CMOVcc(32, reg, R(EDX), CC_Z); Jit->XOR(32, R(reg), Imm8(31)); RI.regs[reg] = I; regNormalRegClear(RI, I); @@ -1422,30 +1424,30 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) Jit->XOR(32, R(EAX), R(EAX)); // SO: Bit 61 set. - Jit->MOV(64, R(RCX), R(cr_val)); - Jit->SHR(64, R(RCX), Imm8(61)); - Jit->AND(32, R(ECX), Imm8(1)); - Jit->OR(32, R(EAX), R(ECX)); + Jit->MOV(64, R(RDX), R(cr_val)); + Jit->SHR(64, R(RDX), Imm8(61)); + Jit->AND(32, R(EDX), Imm8(1)); + Jit->OR(32, R(EAX), R(EDX)); // EQ: Bits 31-0 == 0. - Jit->XOR(32, R(ECX), R(ECX)); + Jit->XOR(32, R(EDX), R(EDX)); Jit->TEST(32, R(cr_val), R(cr_val)); - Jit->SETcc(CC_Z, R(ECX)); - Jit->SHL(32, R(ECX), Imm8(1)); - Jit->OR(32, R(EAX), R(ECX)); + Jit->SETcc(CC_Z, R(EDX)); + Jit->SHL(32, R(EDX), Imm8(1)); + Jit->OR(32, R(EAX), R(EDX)); // GT: Value > 0. - Jit->XOR(32, R(ECX), R(ECX)); + Jit->XOR(32, R(EDX), R(EDX)); Jit->TEST(64, R(cr_val), R(cr_val)); - Jit->SETcc(CC_G, R(ECX)); - Jit->SHL(32, R(ECX), Imm8(2)); - Jit->OR(32, R(EAX), R(ECX)); + Jit->SETcc(CC_G, R(EDX)); + Jit->SHL(32, R(EDX), Imm8(2)); + Jit->OR(32, R(EAX), R(EDX)); // LT: Bit 62 set. - Jit->MOV(64, R(ECX), R(cr_val)); - Jit->SHR(64, R(ECX), Imm8(62 - 3)); - Jit->AND(32, R(ECX), Imm8(0x8)); - Jit->OR(32, R(EAX), R(ECX)); + Jit->MOV(64, R(EDX), R(cr_val)); + Jit->SHR(64, R(EDX), Imm8(62 - 3)); + Jit->AND(32, R(EDX), Imm8(0x8)); + Jit->OR(32, R(EAX), R(EDX)); Jit->MOV(32, R(cr_val), R(EAX)); RI.regs[cr_val] = I; @@ -1460,34 +1462,34 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg cr_val = regUReg(RI, I); Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I))); - Jit->MOV(64, R(RCX), Imm64(1ull << 32)); + Jit->MOV(64, R(RDX), Imm64(1ull << 32)); // SO Jit->MOV(64, R(RAX), R(cr_val)); Jit->SHL(64, R(RAX), Imm8(63)); Jit->SHR(64, R(RAX), Imm8(63 - 61)); - Jit->OR(64, R(RCX), R(RAX)); + Jit->OR(64, R(RDX), R(RAX)); // EQ Jit->MOV(64, R(RAX), R(cr_val)); Jit->NOT(64, R(RAX)); Jit->AND(64, R(RAX), Imm8(CR_EQ)); - Jit->OR(64, R(RCX), R(RAX)); + Jit->OR(64, R(RDX), R(RAX)); // GT Jit->MOV(64, R(RAX), R(cr_val)); Jit->NOT(64, R(RAX)); Jit->AND(64, R(RAX), Imm8(CR_GT)); Jit->SHL(64, R(RAX), Imm8(63 - 2)); - Jit->OR(64, R(RCX), R(RAX)); + Jit->OR(64, R(RDX), R(RAX)); // LT Jit->MOV(64, R(RAX), R(cr_val)); Jit->AND(64, R(RAX), Imm8(CR_LT)); Jit->SHL(64, R(RAX), Imm8(62 - 3)); - Jit->OR(64, R(RCX), R(RAX)); + Jit->OR(64, R(RDX), R(RAX)); - Jit->MOV(64, R(cr_val), R(RCX)); + Jit->MOV(64, R(cr_val), R(RDX)); RI.regs[cr_val] = I; regNormalRegClear(RI, I); @@ -1553,9 +1555,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = fregFindFreeReg(RI); - Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); - RI.Jit->SafeLoadToReg(ECX, R(ECX), 32, 0, regsInUse(RI), false, EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); - Jit->MOVD_xmm(reg, R(ECX)); + Jit->MOV(32, R(EDX), regLocForInst(RI, getOp1(I))); + RI.Jit->SafeLoadToReg(EDX, R(EDX), 32, 0, regsInUse(RI), false, EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); + Jit->MOVD_xmm(reg, R(EDX)); RI.fregs[reg] = I; regNormalRegClear(RI, I); break; @@ -1567,9 +1569,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg reg = fregFindFreeReg(RI); const OpArg loc = regLocForInst(RI, getOp1(I)); - Jit->MOV(32, R(ECX), loc); - RI.Jit->SafeLoadToReg(RCX, R(ECX), 64, 0, regsInUse(RI), false, EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); - Jit->MOVQ_xmm(reg, R(RCX)); + Jit->MOV(32, R(EDX), loc); + RI.Jit->SafeLoadToReg(RDX, R(EDX), 64, 0, regsInUse(RI), false, EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); + Jit->MOVQ_xmm(reg, R(RDX)); RI.fregs[reg] = I; regNormalRegClear(RI, I); break; @@ -1591,11 +1593,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) // 0b0011111100000111, or 0x3F07. Jit->MOV(32, R(EAX), Imm32(0x3F07)); Jit->AND(32, R(EAX), M(((char *)&GQR(quantreg)) + 2)); - Jit->MOVZX(32, 8, EDX, R(AL)); - Jit->OR(32, R(EDX), Imm8(w << 3)); + Jit->OR(32, R(EAX), Imm8(w << 3)); - Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); - Jit->CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedLoadQuantized))); + Jit->MOV(32, R(EDX), regLocForInst(RI, getOp1(I))); + Jit->CALLptr(MScaled(EAX, SCALE_8, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedLoadQuantized))); Jit->MOVAPD(reg, R(XMM0)); RI.fregs[reg] = I; regNormalRegClear(RI, I); @@ -1610,8 +1611,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) else Jit->MOV(32, R(EAX), loc1); - Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I))); - RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); + Jit->MOV(32, R(EDX), regLocForInst(RI, getOp2(I))); + RI.Jit->SafeWriteRegToReg(EAX, EDX, 32, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); if (RI.IInfo[I - RI.FirstI] & 4) fregClearInst(RI, getOp1(I)); if (RI.IInfo[I - RI.FirstI] & 8) @@ -1626,8 +1627,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) OpArg address = regLocForInst(RI, getOp2(I)); Jit->MOVAPD(XMM0, value); Jit->MOVQ_xmm(R(RAX), XMM0); - Jit->MOV(32, R(ECX), address); - RI.Jit->SafeWriteRegToReg(RAX, ECX, 64, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); + Jit->MOV(32, R(EDX), address); + RI.Jit->SafeWriteRegToReg(RAX, EDX, 64, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); if (RI.IInfo[I - RI.FirstI] & 4) fregClearInst(RI, getOp1(I)); @@ -1644,7 +1645,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) Jit->AND(32, R(EAX), PPCSTATE(spr[SPR_GQR0 + quantreg])); Jit->MOVZX(32, 8, EDX, R(AL)); - Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I))); + Jit->MOV(32, R(EDX), regLocForInst(RI, getOp2(I))); Jit->MOVAPD(XMM0, fregLocForInst(RI, getOp1(I))); Jit->CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedStoreQuantized))); if (RI.IInfo[I - RI.FirstI] & 4) @@ -1790,9 +1791,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg reg = fregFindFreeReg(RI); unsigned ppcreg = *I >> 8; char *p = (char*)&(PowerPC::ppcState.ps[ppcreg][0]); - Jit->MOV(32, R(ECX), M(p+4)); - Jit->AND(32, R(ECX), Imm32(0x7ff00000)); - Jit->CMP(32, R(ECX), Imm32(0x38000000)); + Jit->MOV(32, R(EDX), M(p+4)); + Jit->AND(32, R(EDX), Imm32(0x7ff00000)); + Jit->CMP(32, R(EDX), Imm32(0x38000000)); FixupBranch ok = Jit->J_CC(CC_AE); Jit->AND(32, M(p+4), Imm32(0x80000000)); Jit->MOV(32, M(p), Imm32(0)); @@ -2204,10 +2205,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) const u32 mask = 0x87C0FFFF; // MSR = (MSR & ~mask) | (SRR1 & mask); Jit->MOV(32, R(EAX), PPCSTATE(msr)); - Jit->MOV(32, R(ECX), PPCSTATE_SRR1); + Jit->MOV(32, R(EDX), PPCSTATE_SRR1); Jit->AND(32, R(EAX), Imm32(~mask)); - Jit->AND(32, R(ECX), Imm32(mask)); - Jit->OR(32, R(EAX), R(ECX)); + Jit->AND(32, R(EDX), Imm32(mask)); + Jit->OR(32, R(EAX), R(EDX)); // MSR &= 0xFFFBFFFF; // Mask used to clear the bit MSR[13] Jit->AND(32, R(EAX), Imm32(0xFFFBFFFF)); Jit->MOV(32, PPCSTATE(msr), R(EAX)); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp index 68390396b8..c7b35dd186 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp @@ -9,7 +9,7 @@ #include "Core/PowerPC/JitCommon/JitAsmCommon.h" #include "Core/PowerPC/JitCommon/JitBase.h" -#define QUANTIZED_REGS_TO_SAVE (ABI_ALL_CALLER_SAVED & ~((1 << RAX) | (1 << RCX) | (1 << RDX) | \ +#define QUANTIZED_REGS_TO_SAVE (ABI_ALL_CALLER_SAVED & ~((1 << RAX) | (1 << RCX) | \ (1 << (XMM0+16)) | (1 << (XMM1+16)))) using namespace Gen; @@ -18,19 +18,15 @@ static int temp32; void CommonAsmRoutines::GenFifoWrite(int size) { - // Assume value in ABI_PARAM1 + // Assume value in EDX PUSH(ESI); - if (size != 32) - PUSH(EDX); MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe)); MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount)); - SwapAndStore(size, MComplex(RAX, RSI, 1, 0), ABI_PARAM1); + SwapAndStore(size, MComplex(RAX, RSI, 1, 0), EDX); ADD(32, R(ESI), Imm8(size >> 3)); MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI)); - if (size != 32) - POP(EDX); POP(ESI); RET(); } @@ -39,7 +35,6 @@ void CommonAsmRoutines::GenFifoFloatWrite() { // Assume value in XMM0 PUSH(ESI); - PUSH(EDX); MOVSS(M(&temp32), XMM0); MOV(32, R(EDX), M(&temp32)); MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe)); @@ -47,7 +42,6 @@ void CommonAsmRoutines::GenFifoFloatWrite() SwapAndStore(32, MComplex(RAX, RSI, 1, 0), EDX); ADD(32, R(ESI), Imm8(4)); MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI)); - POP(EDX); POP(ESI); RET(); } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp index a51aac6480..29ee9146bd 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp @@ -59,6 +59,7 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re // It ought to be necessary to align the stack here. Since it seems to not // affect anybody, I'm not going to add it just to be completely safe about // performance. + ABI_PushRegistersAndAdjustStack(registersInUse, true); if (addrReg != ABI_PARAM1) MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg)); @@ -66,7 +67,6 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re if (info.displacement) ADD(32, R(ABI_PARAM1), Imm32(info.displacement)); - ABI_PushRegistersAndAdjustStack(registersInUse, true); switch (info.operandSize) { case 4: @@ -115,6 +115,8 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs MOV(32, PPCSTATE(pc), Imm32(pc)); + ABI_PushRegistersAndAdjustStack(registersInUse, true); + MOVTwo(64, ABI_PARAM1, dataReg, ABI_PARAM2, addrReg, ABI_PARAM3); if (info.displacement) @@ -122,7 +124,6 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r ADD(32, R(ABI_PARAM2), Imm32(info.displacement)); } - ABI_PushRegistersAndAdjustStack(registersInUse, true); switch (info.operandSize) { case 8: diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index 4dd81015e8..5847216920 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -5,7 +5,6 @@ #include #include "Common/Common.h" -#include "Common/CPUDetect.h" #include "Common/MathUtil.h" #include "Core/HW/MMIO.h" @@ -248,13 +247,11 @@ void EmuCodeBlock::MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value, } } -// Always clobbers EAX. Preserves the address. -// Preserves the value if the load fails and js.memcheck is enabled. void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags) { if (!jit->js.memcheck) { - registersInUse &= ~(1 << RAX | 1 << reg_value); + registersInUse &= ~(1 << reg_value); } if (!Core::g_CoreStartupParameter.bMMU && Core::g_CoreStartupParameter.bFastmem && @@ -395,11 +392,6 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap) { - if (accessSize == 8 && reg_value >= 4) - { - PanicAlert("WARNING: likely incorrect use of UnsafeWriteRegToReg!"); - } - u8* result = GetWritableCodePtr(); OpArg dest = MComplex(RBX, reg_addr, SCALE_1, offset); if (swap) @@ -410,7 +402,8 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acc } else { - BSWAP(accessSize, reg_value); + if (accessSize > 8) + BSWAP(accessSize, reg_value); result = GetWritableCodePtr(); MOV(accessSize, dest, R(reg_value)); } @@ -423,10 +416,8 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acc return result; } -// Destroys both arg registers void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags) { - registersInUse &= ~(1 << RAX); if (!Core::g_CoreStartupParameter.bMMU && Core::g_CoreStartupParameter.bFastmem && !(flags & (SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_FASTMEM)) @@ -449,7 +440,17 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce } if (offset) - ADD(32, R(reg_addr), Imm32((u32)offset)); + { + if (flags & SAFE_LOADSTORE_CLOBBER_EAX_INSTEAD_OF_ADDR) + { + LEA(32, EAX, MDisp(reg_addr, (u32)offset)); + reg_addr = EAX; + } + else + { + ADD(32, R(reg_addr), Imm32((u32)offset)); + } + } u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS; diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h index 2e865e50e6..ca073854eb 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h @@ -6,6 +6,7 @@ #include +#include "Common/CPUDetect.h" #include "Common/x64Emitter.h" namespace MMIO { class Mapping; } @@ -52,11 +53,21 @@ public: { SAFE_LOADSTORE_NO_SWAP = 1, SAFE_LOADSTORE_NO_PROLOG = 2, - SAFE_LOADSTORE_NO_FASTMEM = 4 + SAFE_LOADSTORE_NO_FASTMEM = 4, + SAFE_LOADSTORE_CLOBBER_EAX_INSTEAD_OF_ADDR = 8 }; + void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags = 0); + // Clobbers EAX or reg_addr depending on the relevant flag. Preserves + // reg_value if the load fails and js.memcheck is enabled. void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags = 0); + // applies to safe and unsafe WriteRegToReg + bool WriteClobbersRegValue(int accessSize, bool swap) + { + return swap && !cpu_info.bMOVBE && accessSize > 8; + } + void SafeWriteF32ToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, s32 offset, u32 registersInUse, int flags = 0); void WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap = false); diff --git a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp b/Source/Core/Core/PowerPC/JitILCommon/IR.cpp index f078a4cac9..c07e1a1216 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/IR.cpp @@ -40,7 +40,7 @@ instruction and generates code. Dead code elimination works in this step, by simply skipping unused instructions. The register allocator is a dumb, greedy allocator: at the moment, it's really a bit too dumb, but it's actually not as bad as it looks: unless a block is relatively long, spills -are rarely needed. ECX is used as a scratch register: requiring a scratch +are rarely needed. EDX is used as a scratch register: requiring a scratch register isn't ideal, but the register allocator is too dumb to handle instructions that need a specific register at the moment. From 100a7ac97b090e85eae9c4dfbd6e14c15a89f896 Mon Sep 17 00:00:00 2001 From: comex Date: Tue, 2 Sep 2014 23:18:38 -0400 Subject: [PATCH 5/6] Actually add RCX, plus RDI and RSI on Unix. And switch to a register order that consistently prefers callee-save to caller-save. phire suggested putting rdi/rsi first, even though they're caller-save, to save code space; this is more conservative and I can do that later. --- Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp index 8a329eb723..11eb9de2c7 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp @@ -200,9 +200,9 @@ const int* GPRRegCache::GetAllocationOrder(size_t& count) { // R12, when used as base register, for example in a LEA, can generate bad code! Need to look into this. #ifdef _WIN32 - RSI, RDI, R13, R14, R8, R9, R10, R11, R12, //, RCX + RSI, RDI, R13, R14, R8, R9, R10, R11, R12, RCX #else - R13, R14, R8, R9, R10, R11, R12, //, RCX + R12, R13, R14, RSI, RDI, R8, R9, R10, R11, RCX #endif }; count = sizeof(allocationOrder) / sizeof(const int); From 6fd0333c14ddf5990ae5540a14d84f71226fc9af Mon Sep 17 00:00:00 2001 From: comex Date: Fri, 5 Sep 2014 20:17:13 -0400 Subject: [PATCH 6/6] Symbolicize explicit uses of x86 registers where possible (GPRs only for now). Uses are split into three categories: - Arbitrary (except for size savings) - constants like RSCRATCH are used. - ABI (i.e. RAX as return value) - ABI_RETURN is used. - Fixed by architecture (RCX shifts, RDX/RAX for some instructions) - explicit register is kept. In theory this allows the assignments to be modified easily. I verified that I was able to run Melee with all the registers changed, although there may be issues if RSCRATCH[2] and ABI_PARAM{1,2} conflict. --- Source/Core/Common/x64ABI.h | 2 + Source/Core/Core/PowerPC/Jit64/Jit.cpp | 34 +- Source/Core/Core/PowerPC/Jit64/Jit.h | 8 +- Source/Core/Core/PowerPC/Jit64/JitAsm.cpp | 53 ++- Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp | 30 +- .../Core/PowerPC/Jit64/Jit_FloatingPoint.cpp | 16 +- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 195 +++++------ .../Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp | 64 ++-- .../PowerPC/Jit64/Jit_LoadStoreFloating.cpp | 38 +-- .../PowerPC/Jit64/Jit_LoadStorePaired.cpp | 38 +-- .../PowerPC/Jit64/Jit_SystemRegisters.cpp | 96 +++--- Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp | 241 +++++++------- Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp | 8 +- .../Core/PowerPC/JitCommon/JitAsmCommon.cpp | 307 +++++++++--------- .../Core/PowerPC/JitCommon/JitAsmCommon.h | 8 +- .../Core/PowerPC/JitCommon/JitBackpatch.cpp | 10 +- Source/Core/Core/PowerPC/JitCommon/JitBase.h | 17 + .../Core/Core/PowerPC/JitCommon/Jit_Util.cpp | 142 ++++---- Source/Core/Core/PowerPC/JitCommon/Jit_Util.h | 9 +- .../PowerPC/JitILCommon/JitILBase_Integer.cpp | 8 +- .../JitILCommon/JitILBase_LoadStore.cpp | 16 +- 21 files changed, 672 insertions(+), 668 deletions(-) diff --git a/Source/Core/Common/x64ABI.h b/Source/Core/Common/x64ABI.h index 66abeee5ef..abc9236ef7 100644 --- a/Source/Core/Common/x64ABI.h +++ b/Source/Core/Common/x64ABI.h @@ -53,3 +53,5 @@ #endif // WIN32 +#define ABI_RETURN RAX + diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 4ec492b1ab..a3707dbbe1 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -39,14 +39,6 @@ using namespace PowerPC; // Various notes below -// Register allocation -// RAX - Generic quicktemp register -// RBX - point to base of memory map -// RSI RDI R12 R13 R14 R15 - free for allocation -// RCX RDX R8 R9 R10 R11 - allocate in emergencies. These need to be flushed before functions are called. -// RSP - stack pointer, do not generally use, very dangerous -// RBP - ? - // IMPORTANT: // Make sure that all generated code and all emulator state sits under the 2GB boundary so that // RIP addressing can be used easily. Windows will always allocate static code under the 2GB boundary. @@ -305,18 +297,18 @@ void Jit64::WriteExit(u32 destination) b->linkData.push_back(linkData); } -void Jit64::WriteExitDestInEAX() +void Jit64::WriteExitDestInRSCRATCH() { - MOV(32, PPCSTATE(pc), R(EAX)); + MOV(32, PPCSTATE(pc), R(RSCRATCH)); Cleanup(); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } -void Jit64::WriteRfiExitDestInEAX() +void Jit64::WriteRfiExitDestInRSCRATCH() { - MOV(32, PPCSTATE(pc), R(EAX)); - MOV(32, PPCSTATE(npc), R(EAX)); + MOV(32, PPCSTATE(pc), R(RSCRATCH)); + MOV(32, PPCSTATE(npc), R(RSCRATCH)); Cleanup(); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions)); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); @@ -326,8 +318,8 @@ void Jit64::WriteRfiExitDestInEAX() void Jit64::WriteExceptionExit() { Cleanup(); - MOV(32, R(EAX), PPCSTATE(pc)); - MOV(32, PPCSTATE(npc), R(EAX)); + MOV(32, R(RSCRATCH), PPCSTATE(pc)); + MOV(32, PPCSTATE(npc), R(RSCRATCH)); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions)); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); @@ -336,8 +328,8 @@ void Jit64::WriteExceptionExit() void Jit64::WriteExternalExceptionExit() { Cleanup(); - MOV(32, R(EAX), PPCSTATE(pc)); - MOV(32, PPCSTATE(npc), R(EAX)); + MOV(32, R(RSCRATCH), PPCSTATE(pc)); + MOV(32, PPCSTATE(npc), R(RSCRATCH)); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExternalExceptions)); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); @@ -520,9 +512,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc HLEFunction(function); if (type == HLE::HLE_HOOK_REPLACE) { - MOV(32, R(EAX), PPCSTATE(npc)); + MOV(32, R(RSCRATCH), PPCSTATE(npc)); js.downcountAmount += js.st.numCycles; - WriteExitDestInEAX(); + WriteExitDestInRSCRATCH(); break; } } @@ -650,8 +642,8 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_ISI)); // Remove the invalid instruction from the icache, forcing a recompile - MOV(64, R(RAX), ImmPtr(jit->GetBlockCache()->GetICachePtr(js.compilerPC))); - MOV(32,MatR(RAX),Imm32(JIT_ICACHE_INVALID_WORD)); + MOV(64, R(RSCRATCH), ImmPtr(jit->GetBlockCache()->GetICachePtr(js.compilerPC))); + MOV(32,MatR(RSCRATCH),Imm32(JIT_ICACHE_INVALID_WORD)); WriteExceptionExit(); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 76abfcf0d4..de95967df0 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -90,10 +90,10 @@ public: // Utilities for use by opcodes void WriteExit(u32 destination); - void WriteExitDestInEAX(); + void WriteExitDestInRSCRATCH(); void WriteExceptionExit(); void WriteExternalExceptionExit(); - void WriteRfiExitDestInEAX(); + void WriteRfiExitDestInRSCRATCH(); void WriteCallInterpreter(UGeckoInstruction _inst); void Cleanup(); @@ -101,8 +101,8 @@ public: void GenerateConstantOverflow(s64 val); void GenerateOverflow(); void FinalizeCarryOverflow(bool oe, bool inv = false); - void GetCarryEAXAndClear(); - void FinalizeCarryGenerateOverflowEAX(bool oe, bool inv = false); + void GetCarryRSCRATCHAndClear(); + void FinalizeCarryGenerateOverflowRSCRATCH(bool oe, bool inv = false); void GenerateCarry(); void GenerateRC(); void ComputeRC(const Gen::OpArg & arg); diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index 5c56bfe8c1..1c5b78666f 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -9,13 +9,6 @@ using namespace Gen; -// GLOBAL STATIC ALLOCATIONS x64 -// RAX - ubiquitous scratch register - EVERYBODY scratches this -// RDX - second scratch register -// RBX - Base pointer of memory -// R15 - Pointer to array of block pointers -// RBP - Pointer to ppcState+0x80 - // PLAN: no more block numbers - crazy opcodes just contain offset within // dynarec buffer // At this offset - 4, there is an int specifying the block number. @@ -26,9 +19,9 @@ void Jit64AsmRoutineManager::Generate() ABI_PushAllCalleeSavedRegsAndAdjustStack(); // Two statically allocated registers. - MOV(64, R(RBX), Imm64((u64)Memory::base)); - MOV(64, R(R15), Imm64((u64)jit->GetBlockCache()->GetCodePointers())); //It's below 2GB so 32 bits are good enough - MOV(64, R(RBP), Imm64((u64)&PowerPC::ppcState + 0x80)); + MOV(64, R(RMEM), Imm64((u64)Memory::base)); + MOV(64, R(RCODE_POINTERS), Imm64((u64)jit->GetBlockCache()->GetCodePointers())); //It's below 2GB so 32 bits are good enough + MOV(64, R(RPPCSTATE), Imm64((u64)&PowerPC::ppcState + 0x80)); const u8* outerLoop = GetCodePtr(); ABI_CallFunction(reinterpret_cast(&CoreTiming::Advance)); @@ -55,8 +48,8 @@ void Jit64AsmRoutineManager::Generate() SetJumpTarget(skipToRealDispatch); dispatcherNoCheck = GetCodePtr(); - MOV(32, R(EAX), PPCSTATE(pc)); - dispatcherPcInEAX = GetCodePtr(); + MOV(32, R(RSCRATCH), PPCSTATE(pc)); + dispatcherPcInRSCRATCH = GetCodePtr(); u32 mask = 0; FixupBranch no_mem; @@ -68,12 +61,12 @@ void Jit64AsmRoutineManager::Generate() mask |= JIT_ICACHE_VMEM_BIT; if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack) { - TEST(32, R(EAX), Imm32(mask)); + TEST(32, R(RSCRATCH), Imm32(mask)); no_mem = J_CC(CC_NZ); } - AND(32, R(EAX), Imm32(JIT_ICACHE_MASK)); - MOV(64, R(RDX), Imm64((u64)jit->GetBlockCache()->iCache)); - MOV(32, R(EAX), MComplex(RDX, EAX, SCALE_1, 0)); + AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK)); + MOV(64, R(RSCRATCH2), Imm64((u64)jit->GetBlockCache()->iCache)); + MOV(32, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_1, 0)); if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack) { @@ -82,22 +75,22 @@ void Jit64AsmRoutineManager::Generate() } if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack) { - TEST(32, R(EAX), Imm32(JIT_ICACHE_VMEM_BIT)); + TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_VMEM_BIT)); FixupBranch no_vmem = J_CC(CC_Z); - AND(32, R(EAX), Imm32(JIT_ICACHE_MASK)); - MOV(64, R(RDX), Imm64((u64)jit->GetBlockCache()->iCacheVMEM)); - MOV(32, R(EAX), MComplex(RDX, EAX, SCALE_1, 0)); + AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK)); + MOV(64, R(RSCRATCH2), Imm64((u64)jit->GetBlockCache()->iCacheVMEM)); + MOV(32, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_1, 0)); if (Core::g_CoreStartupParameter.bWii) exit_vmem = J(); SetJumpTarget(no_vmem); } if (Core::g_CoreStartupParameter.bWii) { - TEST(32, R(EAX), Imm32(JIT_ICACHE_EXRAM_BIT)); + TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_EXRAM_BIT)); FixupBranch no_exram = J_CC(CC_Z); - AND(32, R(EAX), Imm32(JIT_ICACHEEX_MASK)); - MOV(64, R(RDX), Imm64((u64)jit->GetBlockCache()->iCacheEx)); - MOV(32, R(EAX), MComplex(RDX, EAX, SCALE_1, 0)); + AND(32, R(RSCRATCH), Imm32(JIT_ICACHEEX_MASK)); + MOV(64, R(RSCRATCH2), Imm64((u64)jit->GetBlockCache()->iCacheEx)); + MOV(32, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_1, 0)); SetJumpTarget(no_exram); } @@ -106,10 +99,10 @@ void Jit64AsmRoutineManager::Generate() if (Core::g_CoreStartupParameter.bWii && (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack)) SetJumpTarget(exit_vmem); - TEST(32, R(EAX), R(EAX)); + TEST(32, R(RSCRATCH), R(RSCRATCH)); FixupBranch notfound = J_CC(CC_L); //grab from list and jump to it - JMPptr(MComplex(R15, RAX, 8, 0)); + JMPptr(MComplex(RCODE_POINTERS, RSCRATCH, 8, 0)); SetJumpTarget(notfound); //Ok, no block, let's jit @@ -124,8 +117,8 @@ void Jit64AsmRoutineManager::Generate() // Test external exceptions. TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT | EXCEPTION_PERFORMANCE_MONITOR | EXCEPTION_DECREMENTER)); FixupBranch noExtException = J_CC(CC_Z); - MOV(32, R(EAX), PPCSTATE(pc)); - MOV(32, PPCSTATE(npc), R(EAX)); + MOV(32, R(RSCRATCH), PPCSTATE(pc)); + MOV(32, PPCSTATE(npc), R(RSCRATCH)); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExternalExceptions)); SetJumpTarget(noExtException); @@ -168,8 +161,8 @@ void Jit64AsmRoutineManager::GenerateCommon() const u8 *fastMemWrite8 = AlignCode16(); CMP(32, R(ABI_PARAM2), Imm32(0xCC008000)); FixupBranch skip_fast_write = J_CC(CC_NE, false); - MOV(32, EAX, M(&m_gatherPipeCount)); - MOV(8, MDisp(EAX, (u32)&m_gatherPipe), ABI_PARAM1); + MOV(32, RSCRATCH, M(&m_gatherPipeCount)); + MOV(8, MDisp(RSCRATCH, (u32)&m_gatherPipe), ABI_PARAM1); ADD(32, 1, M(&m_gatherPipeCount)); RET(); SetJumpTarget(skip_fast_write); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp index 65b2cad7e8..ddeddb1fb4 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp @@ -46,12 +46,12 @@ void Jit64::rfi(UGeckoInstruction inst) const u32 clearMSR13 = 0xFFFBFFFF; // Mask used to clear the bit MSR[13] // MSR = ((MSR & ~mask) | (SRR1 & mask)) & clearMSR13; AND(32, PPCSTATE(msr), Imm32((~mask) & clearMSR13)); - MOV(32, R(EAX), PPCSTATE_SRR1); - AND(32, R(EAX), Imm32(mask & clearMSR13)); - OR(32, PPCSTATE(msr), R(EAX)); + MOV(32, R(RSCRATCH), PPCSTATE_SRR1); + AND(32, R(RSCRATCH), Imm32(mask & clearMSR13)); + OR(32, PPCSTATE(msr), R(RSCRATCH)); // NPC = SRR0; - MOV(32, R(EAX), PPCSTATE_SRR0); - WriteRfiExitDestInEAX(); + MOV(32, R(RSCRATCH), PPCSTATE_SRR0); + WriteRfiExitDestInRSCRATCH(); } void Jit64::bx(UGeckoInstruction inst) @@ -164,11 +164,11 @@ void Jit64::bcctrx(UGeckoInstruction inst) gpr.Flush(); fpr.Flush(); - MOV(32, R(EAX), PPCSTATE_CTR); + MOV(32, R(RSCRATCH), PPCSTATE_CTR); if (inst.LK_3) MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4; - AND(32, R(EAX), Imm32(0xFFFFFFFC)); - WriteExitDestInEAX(); + AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); + WriteExitDestInRSCRATCH(); } else { @@ -179,15 +179,15 @@ void Jit64::bcctrx(UGeckoInstruction inst) FixupBranch b = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), !(inst.BO_2 & BO_BRANCH_IF_TRUE)); - MOV(32, R(EAX), PPCSTATE_CTR); - AND(32, R(EAX), Imm32(0xFFFFFFFC)); - //MOV(32, PPCSTATE(pc), R(EAX)); => Already done in WriteExitDestInEAX() + MOV(32, R(RSCRATCH), PPCSTATE_CTR); + AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); + //MOV(32, PPCSTATE(pc), R(RSCRATCH)); => Already done in WriteExitDestInRSCRATCH() if (inst.LK_3) MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4; gpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE); - WriteExitDestInEAX(); + WriteExitDestInRSCRATCH(); // Would really like to continue the block here, but it ends. TODO. SetJumpTarget(b); @@ -224,14 +224,14 @@ void Jit64::bclrx(UGeckoInstruction inst) AND(32, PPCSTATE(cr), Imm32(~(0xFF000000))); #endif - MOV(32, R(EAX), PPCSTATE_LR); - AND(32, R(EAX), Imm32(0xFFFFFFFC)); + MOV(32, R(RSCRATCH), PPCSTATE_LR); + AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); if (inst.LK) MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); gpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE); - WriteExitDestInEAX(); + WriteExitDestInRSCRATCH(); if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) SetJumpTarget( pConditionDontBranch ); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index 88a23e8a19..54c5f22275 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -271,14 +271,14 @@ void Jit64::fcmpx(UGeckoInstruction inst) pGreater = J_CC(CC_B); } - MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_EQ))); + MOV(64, R(RSCRATCH), Imm64(PPCCRToInternal(CR_EQ))); if (fprf) OR(32, PPCSTATE(fpscr), Imm32(CR_EQ << FPRF_SHIFT)); continue1 = J(); SetJumpTarget(pNaN); - MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_SO))); + MOV(64, R(RSCRATCH), Imm64(PPCCRToInternal(CR_SO))); if (fprf) OR(32, PPCSTATE(fpscr), Imm32(CR_SO << FPRF_SHIFT)); @@ -287,13 +287,13 @@ void Jit64::fcmpx(UGeckoInstruction inst) continue2 = J(); SetJumpTarget(pGreater); - MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_GT))); + MOV(64, R(RSCRATCH), Imm64(PPCCRToInternal(CR_GT))); if (fprf) OR(32, PPCSTATE(fpscr), Imm32(CR_GT << FPRF_SHIFT)); continue3 = J(); SetJumpTarget(pLesser); - MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_LT))); + MOV(64, R(RSCRATCH), Imm64(PPCCRToInternal(CR_LT))); if (fprf) OR(32, PPCSTATE(fpscr), Imm32(CR_LT << FPRF_SHIFT)); } @@ -305,7 +305,7 @@ void Jit64::fcmpx(UGeckoInstruction inst) SetJumpTarget(continue3); } - MOV(64, PPCSTATE(cr_val[crf]), R(RAX)); + MOV(64, PPCSTATE(cr_val[crf]), R(RSCRATCH)); fpr.UnlockAll(); } @@ -375,8 +375,7 @@ void Jit64::frsqrtex(UGeckoInstruction inst) int b = inst.FB; int d = inst.FD; - // rsqrtex requires ECX and EDX free - gpr.FlushLockX(ECX, EDX); + gpr.FlushLockX(RSCRATCH_EXTRA); fpr.Lock(b, d); fpr.BindToRegister(d, d == b); MOVSD(XMM0, fpr.R(b)); @@ -395,8 +394,7 @@ void Jit64::fresx(UGeckoInstruction inst) int b = inst.FB; int d = inst.FD; - // resx requires ECX and EDX free - gpr.FlushLockX(ECX, EDX); + gpr.FlushLockX(RSCRATCH_EXTRA); fpr.Lock(b, d); fpr.BindToRegister(d, d == b); MOVSD(XMM0, fpr.R(b)); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index dd4db95c55..6a37e9bd29 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -72,14 +72,14 @@ void Jit64::FinalizeCarryOverflow(bool oe, bool inv) } } -void Jit64::GetCarryEAXAndClear() +void Jit64::GetCarryRSCRATCHAndClear() { - MOV(32, R(EAX), PPCSTATE(spr[SPR_XER])); - BTR(32, R(EAX), Imm8(29)); + MOV(32, R(RSCRATCH), PPCSTATE(spr[SPR_XER])); + BTR(32, R(RSCRATCH), Imm8(29)); } -// Assumes that XER is in EAX and that the CA bit is clear. -void Jit64::FinalizeCarryGenerateOverflowEAX(bool oe, bool inv) +// Assumes that XER is in RSCRATCH and that the CA bit is clear. +void Jit64::FinalizeCarryGenerateOverflowRSCRATCH(bool oe, bool inv) { // USES_XER if (oe) @@ -87,29 +87,29 @@ void Jit64::FinalizeCarryGenerateOverflowEAX(bool oe, bool inv) FixupBranch jno = J_CC(CC_NO); // Do carry FixupBranch carry1 = J_CC(inv ? CC_C : CC_NC); - OR(32, R(EAX), Imm32(XER_CA_MASK)); + OR(32, R(RSCRATCH), Imm32(XER_CA_MASK)); SetJumpTarget(carry1); //XER[OV/SO] = 1 - OR(32, R(EAX), Imm32(XER_SO_MASK | XER_OV_MASK)); + OR(32, R(RSCRATCH), Imm32(XER_SO_MASK | XER_OV_MASK)); FixupBranch exit = J(); SetJumpTarget(jno); // Do carry FixupBranch carry2 = J_CC(inv ? CC_C : CC_NC); - OR(32, R(EAX), Imm32(XER_CA_MASK)); + OR(32, R(RSCRATCH), Imm32(XER_CA_MASK)); SetJumpTarget(carry2); //XER[OV] = 0 - AND(32, R(EAX), Imm32(~XER_OV_MASK)); + AND(32, R(RSCRATCH), Imm32(~XER_OV_MASK)); SetJumpTarget(exit); } else { // Do carry FixupBranch carry1 = J_CC(inv ? CC_C : CC_NC); - OR(32, R(EAX), Imm32(XER_CA_MASK)); + OR(32, R(RSCRATCH), Imm32(XER_CA_MASK)); SetJumpTarget(carry1); } - // Dump EAX back into XER - MOV(32, PPCSTATE(spr[SPR_XER]), R(EAX)); + // Dump RSCRATCH back into XER + MOV(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); } // Assumes that the flags were just set through an addition. @@ -132,8 +132,8 @@ void Jit64::ComputeRC(const Gen::OpArg & arg) } else { - MOVSX(64, 32, RAX, arg); - MOV(64, PPCSTATE(cr_val[0]), R(RAX)); + MOVSX(64, 32, RSCRATCH, arg); + MOV(64, PPCSTATE(cr_val[0]), R(RSCRATCH)); } } @@ -374,8 +374,8 @@ void Jit64::cmpXX(UGeckoInstruction inst) else compareResult = CR_LT; } - MOV(64, R(RAX), Imm64(PPCCRToInternal(compareResult))); - MOV(64, PPCSTATE(cr_val[crf]), R(RAX)); + MOV(64, R(RSCRATCH), Imm64(PPCCRToInternal(compareResult))); + MOV(64, PPCSTATE(cr_val[crf]), R(RSCRATCH)); gpr.UnlockAll(); if (merge_branch) @@ -406,16 +406,16 @@ void Jit64::cmpXX(UGeckoInstruction inst) { if (js.next_inst.LK) MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); - MOV(32, R(EAX), PPCSTATE_CTR); - AND(32, R(EAX), Imm32(0xFFFFFFFC)); - WriteExitDestInEAX(); + MOV(32, R(RSCRATCH), PPCSTATE_CTR); + AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); + WriteExitDestInRSCRATCH(); } else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx { - MOV(32, R(EAX), PPCSTATE_LR); + MOV(32, R(RSCRATCH), PPCSTATE_LR); if (js.next_inst.LK) MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); - WriteExitDestInEAX(); + WriteExitDestInRSCRATCH(); } else { @@ -436,32 +436,32 @@ void Jit64::cmpXX(UGeckoInstruction inst) if (signedCompare) { if (gpr.R(a).IsImm()) - MOV(64, R(RAX), Imm32((s32)gpr.R(a).offset)); + MOV(64, R(RSCRATCH), Imm32((s32)gpr.R(a).offset)); else - MOVSX(64, 32, RAX, gpr.R(a)); + MOVSX(64, 32, RSCRATCH, gpr.R(a)); if (!comparand.IsImm()) { - MOVSX(64, 32, RDX, comparand); - comparand = R(RDX); + MOVSX(64, 32, RSCRATCH2, comparand); + comparand = R(RSCRATCH2); } } else { if (gpr.R(a).IsImm()) - MOV(32, R(RAX), Imm32((u32)gpr.R(a).offset)); + MOV(32, R(RSCRATCH), Imm32((u32)gpr.R(a).offset)); else - MOVZX(64, 32, RAX, gpr.R(a)); + MOVZX(64, 32, RSCRATCH, gpr.R(a)); if (comparand.IsImm()) - MOV(32, R(RDX), comparand); + MOV(32, R(RSCRATCH2), comparand); else - MOVZX(64, 32, RDX, comparand); + MOVZX(64, 32, RSCRATCH2, comparand); - comparand = R(RDX); + comparand = R(RSCRATCH2); } - SUB(64, R(RAX), comparand); - MOV(64, PPCSTATE(cr_val[crf]), R(RAX)); + SUB(64, R(RSCRATCH), comparand); + MOV(64, PPCSTATE(cr_val[crf]), R(RSCRATCH)); if (merge_branch) { @@ -506,19 +506,19 @@ void Jit64::cmpXX(UGeckoInstruction inst) if (js.next_inst.LK) MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); - MOV(32, R(EAX), PPCSTATE_CTR); - AND(32, R(EAX), Imm32(0xFFFFFFFC)); - WriteExitDestInEAX(); + MOV(32, R(RSCRATCH), PPCSTATE_CTR); + AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); + WriteExitDestInRSCRATCH(); } else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx { - MOV(32, R(EAX), PPCSTATE_LR); - AND(32, R(EAX), Imm32(0xFFFFFFFC)); + MOV(32, R(RSCRATCH), PPCSTATE_LR); + AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); if (js.next_inst.LK) MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); - WriteExitDestInEAX(); + WriteExitDestInRSCRATCH(); } else { @@ -636,9 +636,9 @@ void Jit64::boolX(UGeckoInstruction inst) } else { - MOV(32, R(EAX), operand); - NOT(32, R(EAX)); - AND(32, gpr.R(a), R(EAX)); + MOV(32, R(RSCRATCH), operand); + NOT(32, R(RSCRATCH)); + AND(32, gpr.R(a), R(RSCRATCH)); } } else if (inst.SUBOP10 == 444) // orx @@ -659,9 +659,9 @@ void Jit64::boolX(UGeckoInstruction inst) } else { - MOV(32, R(EAX), operand); - NOT(32, R(EAX)); - OR(32, gpr.R(a), R(EAX)); + MOV(32, R(RSCRATCH), operand); + NOT(32, R(RSCRATCH)); + OR(32, gpr.R(a), R(RSCRATCH)); } } else if (inst.SUBOP10 == 316) // xorx @@ -755,11 +755,7 @@ void Jit64::extsbx(UGeckoInstruction inst) { gpr.Lock(a, s); gpr.BindToRegister(a, a == s, true); - // Always force moving to EAX because it isn't possible - // to refer to the lowest byte of some registers, at least in - // 32-bit mode. - MOV(32, R(EAX), gpr.R(s)); - MOVSX(32, 8, gpr.RX(a), R(AL)); // watch out for ah and friends + MOVSX(32, 8, gpr.RX(a), gpr.R(s)); gpr.UnlockAll(); } @@ -863,9 +859,9 @@ void Jit64::subfcx(UGeckoInstruction inst) } else if (d == a) { - MOV(32, R(EAX), gpr.R(a)); + MOV(32, R(RSCRATCH), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(b)); - SUB(32, gpr.R(d), R(EAX)); + SUB(32, gpr.R(d), R(RSCRATCH)); } else { @@ -887,7 +883,7 @@ void Jit64::subfex(UGeckoInstruction inst) gpr.Lock(a, b, d); gpr.BindToRegister(d, (d == a || d == b), true); - GetCarryEAXAndClear(); + GetCarryRSCRATCHAndClear(); bool invertedCarry = false; if (d == b) @@ -908,7 +904,7 @@ void Jit64::subfex(UGeckoInstruction inst) NOT(32, gpr.R(d)); ADC(32, gpr.R(d), gpr.R(b)); } - FinalizeCarryGenerateOverflowEAX(inst.OE, invertedCarry); + FinalizeCarryGenerateOverflowRSCRATCH(inst.OE, invertedCarry); if (inst.Rc) ComputeRC(gpr.R(d)); @@ -924,14 +920,14 @@ void Jit64::subfmex(UGeckoInstruction inst) gpr.Lock(a, d); gpr.BindToRegister(d, d == a); - GetCarryEAXAndClear(); + GetCarryRSCRATCHAndClear(); if (d != a) { MOV(32, gpr.R(d), gpr.R(a)); } NOT(32, gpr.R(d)); ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); - FinalizeCarryGenerateOverflowEAX(inst.OE); + FinalizeCarryGenerateOverflowRSCRATCH(inst.OE); if (inst.Rc) ComputeRC(gpr.R(d)); gpr.UnlockAll(); @@ -947,14 +943,14 @@ void Jit64::subfzex(UGeckoInstruction inst) gpr.Lock(a, d); gpr.BindToRegister(d, d == a); - GetCarryEAXAndClear(); + GetCarryRSCRATCHAndClear(); if (d != a) { MOV(32, gpr.R(d), gpr.R(a)); } NOT(32, gpr.R(d)); ADC(32, gpr.R(d), Imm8(0)); - FinalizeCarryGenerateOverflowEAX(inst.OE); + FinalizeCarryGenerateOverflowRSCRATCH(inst.OE); if (inst.Rc) ComputeRC(gpr.R(d)); @@ -990,9 +986,9 @@ void Jit64::subfx(UGeckoInstruction inst) } else if (d == a) { - MOV(32, R(EAX), gpr.R(a)); + MOV(32, R(RSCRATCH), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(b)); - SUB(32, gpr.R(d), R(EAX)); + SUB(32, gpr.R(d), R(RSCRATCH)); } else { @@ -1171,9 +1167,9 @@ void Jit64::mulhwXx(UGeckoInstruction inst) else { gpr.Lock(a, b, d); + // no register choice + gpr.FlushLockX(EDX, EAX); gpr.BindToRegister(d, (d == a || d == b), true); - if (gpr.RX(d) == EDX) - PanicAlert("mulhwux : WTF"); MOV(32, R(EAX), gpr.R(a)); gpr.KillImmediate(b, true, false); if (sign) @@ -1252,11 +1248,11 @@ void Jit64::divwux(UGeckoInstruction inst) // If failed, use slower round-down method gpr.Lock(a, b, d); gpr.BindToRegister(d, d == a, true); - MOV(32, R(EAX), Imm32(magic)); + MOV(32, R(RSCRATCH), Imm32(magic)); if (d != a) MOV(32, gpr.R(d), gpr.R(a)); - IMUL(64, gpr.RX(d), R(RAX)); - ADD(64, gpr.R(d), R(RAX)); + IMUL(64, gpr.RX(d), R(RSCRATCH)); + ADD(64, gpr.R(d), R(RSCRATCH)); SHR(64, gpr.R(d), Imm8(shift+32)); } else @@ -1267,8 +1263,8 @@ void Jit64::divwux(UGeckoInstruction inst) gpr.BindToRegister(d, false, true); if (d == a) { - MOV(32, R(EAX), Imm32(magic+1)); - IMUL(64, gpr.RX(d), R(RAX)); + MOV(32, R(RSCRATCH), Imm32(magic+1)); + IMUL(64, gpr.RX(d), R(RSCRATCH)); } else { @@ -1288,6 +1284,8 @@ void Jit64::divwux(UGeckoInstruction inst) else { gpr.Lock(a, b, d); + // no register choice (do we need to do this?) + gpr.FlushLockX(EAX, EDX); gpr.BindToRegister(d, (d == a || d == b), true); MOV(32, R(EAX), gpr.R(a)); XOR(32, R(EDX), R(EDX)); @@ -1299,7 +1297,7 @@ void Jit64::divwux(UGeckoInstruction inst) { GenerateConstantOverflow(true); } - //MOV(32, R(EAX), gpr.R(d)); + //MOV(32, R(RAX), gpr.R(d)); FixupBranch end = J(); SetJumpTarget(not_div_by_zero); DIV(32, gpr.R(b)); @@ -1348,6 +1346,8 @@ void Jit64::divwx(UGeckoInstruction inst) else { gpr.Lock(a, b, d); + // no register choice + gpr.FlushLockX(EAX, EDX); gpr.BindToRegister(d, (d == a || d == b), true); MOV(32, R(EAX), gpr.R(a)); CDQ(); @@ -1456,9 +1456,9 @@ void Jit64::addex(UGeckoInstruction inst) gpr.Lock(a, b, d); gpr.BindToRegister(d, true); - GetCarryEAXAndClear(); + GetCarryRSCRATCHAndClear(); ADC(32, gpr.R(d), gpr.R((d == a) ? b : a)); - FinalizeCarryGenerateOverflowEAX(inst.OE); + FinalizeCarryGenerateOverflowRSCRATCH(inst.OE); if (inst.Rc) ComputeRC(gpr.R(d)); gpr.UnlockAll(); @@ -1468,10 +1468,10 @@ void Jit64::addex(UGeckoInstruction inst) gpr.Lock(a, b, d); gpr.BindToRegister(d, false); - GetCarryEAXAndClear(); + GetCarryRSCRATCHAndClear(); MOV(32, gpr.R(d), gpr.R(a)); ADC(32, gpr.R(d), gpr.R(b)); - FinalizeCarryGenerateOverflowEAX(inst.OE); + FinalizeCarryGenerateOverflowRSCRATCH(inst.OE); if (inst.Rc) ComputeRC(gpr.R(d)); gpr.UnlockAll(); @@ -1522,9 +1522,9 @@ void Jit64::addmex(UGeckoInstruction inst) gpr.Lock(d); gpr.BindToRegister(d, true); - GetCarryEAXAndClear(); + GetCarryRSCRATCHAndClear(); ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); - FinalizeCarryGenerateOverflowEAX(inst.OE); + FinalizeCarryGenerateOverflowRSCRATCH(inst.OE); if (inst.Rc) ComputeRC(gpr.R(d)); gpr.UnlockAll(); @@ -1534,10 +1534,10 @@ void Jit64::addmex(UGeckoInstruction inst) gpr.Lock(a, d); gpr.BindToRegister(d, false); - GetCarryEAXAndClear(); + GetCarryRSCRATCHAndClear(); MOV(32, gpr.R(d), gpr.R(a)); ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); - FinalizeCarryGenerateOverflowEAX(inst.OE); + FinalizeCarryGenerateOverflowRSCRATCH(inst.OE); if (inst.Rc) ComputeRC(gpr.R(d)); gpr.UnlockAll(); @@ -1556,9 +1556,9 @@ void Jit64::addzex(UGeckoInstruction inst) gpr.Lock(d); gpr.BindToRegister(d, true); - GetCarryEAXAndClear(); + GetCarryRSCRATCHAndClear(); ADC(32, gpr.R(d), Imm8(0)); - FinalizeCarryGenerateOverflowEAX(inst.OE); + FinalizeCarryGenerateOverflowRSCRATCH(inst.OE); if (inst.Rc) ComputeRC(gpr.R(d)); gpr.UnlockAll(); @@ -1568,10 +1568,10 @@ void Jit64::addzex(UGeckoInstruction inst) gpr.Lock(a, d); gpr.BindToRegister(d, false); - GetCarryEAXAndClear(); + GetCarryRSCRATCHAndClear(); MOV(32, gpr.R(d), gpr.R(a)); ADC(32, gpr.R(d), Imm8(0)); - FinalizeCarryGenerateOverflowEAX(inst.OE); + FinalizeCarryGenerateOverflowRSCRATCH(inst.OE); if (inst.Rc) ComputeRC(gpr.R(d)); gpr.UnlockAll(); @@ -1689,25 +1689,25 @@ void Jit64::rlwimix(UGeckoInstruction inst) { if (mask == 0U - (1U << inst.SH)) { - MOV(32, R(EAX), gpr.R(s)); - SHL(32, R(EAX), Imm8(inst.SH)); + MOV(32, R(RSCRATCH), gpr.R(s)); + SHL(32, R(RSCRATCH), Imm8(inst.SH)); AND(32, gpr.R(a), Imm32(~mask)); - OR(32, gpr.R(a), R(EAX)); + OR(32, gpr.R(a), R(RSCRATCH)); } else if (mask == (1U << inst.SH) - 1) { - MOV(32, R(EAX), gpr.R(s)); - SHR(32, R(EAX), Imm8(32-inst.SH)); + MOV(32, R(RSCRATCH), gpr.R(s)); + SHR(32, R(RSCRATCH), Imm8(32-inst.SH)); AND(32, gpr.R(a), Imm32(~mask)); - OR(32, gpr.R(a), R(EAX)); + OR(32, gpr.R(a), R(RSCRATCH)); } else { - MOV(32, R(EAX), gpr.R(s)); - ROL(32, R(EAX), Imm8(inst.SH)); - XOR(32, R(EAX), gpr.R(a)); - AND(32, R(EAX), Imm32(mask)); - XOR(32, gpr.R(a), R(EAX)); + MOV(32, R(RSCRATCH), gpr.R(s)); + ROL(32, R(RSCRATCH), Imm8(inst.SH)); + XOR(32, R(RSCRATCH), gpr.R(a)); + AND(32, R(RSCRATCH), Imm32(mask)); + XOR(32, gpr.R(a), R(RSCRATCH)); } if (inst.Rc) @@ -1742,6 +1742,7 @@ void Jit64::rlwnmx(UGeckoInstruction inst) } else { + // no register choice gpr.FlushLockX(ECX); gpr.Lock(a, b, s); gpr.BindToRegister(a, (a == b || a == s), true); @@ -1809,6 +1810,7 @@ void Jit64::srwx(UGeckoInstruction inst) } else { + // no register choice gpr.FlushLockX(ECX); gpr.Lock(a, b, s); gpr.BindToRegister(a, (a == b || a == s), true); @@ -1847,6 +1849,7 @@ void Jit64::slwx(UGeckoInstruction inst) } else { + // no register choice gpr.FlushLockX(ECX); gpr.Lock(a, b, s); gpr.BindToRegister(a, (a == b || a == s), true); @@ -1887,9 +1890,9 @@ void Jit64::srawx(UGeckoInstruction inst) MOV(32, gpr.R(a), gpr.R(s)); SHL(64, gpr.R(a), Imm8(32)); SAR(64, gpr.R(a), R(ECX)); - MOV(32, R(EAX), gpr.R(a)); + MOV(32, R(RSCRATCH), gpr.R(a)); SHR(64, gpr.R(a), Imm8(32)); - TEST(32, gpr.R(a), R(EAX)); + TEST(32, gpr.R(a), R(RSCRATCH)); FixupBranch nocarry = J_CC(CC_Z); JitSetCA(); SetJumpTarget(nocarry); @@ -1914,16 +1917,16 @@ void Jit64::srawix(UGeckoInstruction inst) gpr.Lock(a, s); gpr.BindToRegister(a, a == s, true); JitClearCA(); - MOV(32, R(EAX), gpr.R(s)); + MOV(32, R(RSCRATCH), gpr.R(s)); if (a != s) { - MOV(32, gpr.R(a), R(EAX)); + MOV(32, gpr.R(a), R(RSCRATCH)); } SAR(32, gpr.R(a), Imm8(amount)); if (inst.Rc) ComputeRC(gpr.R(a)); - SHL(32, R(EAX), Imm8(32-amount)); - TEST(32, R(EAX), gpr.R(a)); + SHL(32, R(RSCRATCH), Imm8(32-amount)); + TEST(32, R(RSCRATCH), gpr.R(a)); FixupBranch nocarry = J_CC(CC_Z); JitSetCA(); SetJumpTarget(nocarry); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index c1a5913e37..ba9cf8b293 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -197,13 +197,13 @@ void Jit64::lXXx(UGeckoInstruction inst) else { // In this case we need an extra temporary register. - opAddress = R(RDX); + opAddress = R(RSCRATCH2); storeAddress = true; if (use_constant_offset) { if (gpr.R(a).IsSimpleReg() && offset != 0) { - LEA(32, RDX, MDisp(gpr.RX(a), offset)); + LEA(32, RSCRATCH2, MDisp(gpr.RX(a), offset)); } else { @@ -214,7 +214,7 @@ void Jit64::lXXx(UGeckoInstruction inst) } else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) { - LEA(32, RDX, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); + LEA(32, RSCRATCH2, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); } else { @@ -231,7 +231,7 @@ void Jit64::lXXx(UGeckoInstruction inst) if (update && storeAddress) { // We need to save the (usually scratch) address register for the update. - registersInUse |= (1 << RDX); + registersInUse |= (1 << RSCRATCH2); } SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, registersInUse, signExtend); @@ -274,11 +274,11 @@ void Jit64::dcbz(UGeckoInstruction inst) if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack) mem_mask |= Memory::ADDR_MASK_MEM1; - MOV(32, R(EAX), gpr.R(b)); + MOV(32, R(RSCRATCH), gpr.R(b)); if (a) - ADD(32, R(EAX), gpr.R(a)); - AND(32, R(EAX), Imm32(~31)); - TEST(32, R(EAX), Imm32(mem_mask)); + ADD(32, R(RSCRATCH), gpr.R(a)); + AND(32, R(RSCRATCH), Imm32(~31)); + TEST(32, R(RSCRATCH), Imm32(mem_mask)); FixupBranch fast = J_CC(CC_Z, true); // Should this code ever run? I can't find any games that use DCBZ on non-physical addresses, but @@ -286,14 +286,14 @@ void Jit64::dcbz(UGeckoInstruction inst) MOV(32, M(&PC), Imm32(jit->js.compilerPC)); u32 registersInUse = CallerSavedRegistersInUse(); ABI_PushRegistersAndAdjustStack(registersInUse, false); - ABI_CallFunctionR((void *)&Memory::ClearCacheLine, EAX); + ABI_CallFunctionR((void *)&Memory::ClearCacheLine, RSCRATCH); ABI_PopRegistersAndAdjustStack(registersInUse, false); FixupBranch exit = J(); SetJumpTarget(fast); PXOR(XMM0, R(XMM0)); - MOVAPS(MComplex(RBX, RAX, SCALE_1, 0), XMM0); - MOVAPS(MComplex(RBX, RAX, SCALE_1, 16), XMM0); + MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 0), XMM0); + MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 16), XMM0); SetJumpTarget(exit); } @@ -338,7 +338,7 @@ void Jit64::stX(UGeckoInstruction inst) // Helps external systems know which instruction triggered the write MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); - MOV(32, R(EDX), gpr.R(s)); + MOV(32, R(RSCRATCH2), gpr.R(s)); if (update) gpr.SetImmediate32(a, addr); @@ -362,8 +362,8 @@ void Jit64::stX(UGeckoInstruction inst) } else if (Memory::IsRAMAddress(addr)) { - MOV(32, R(EAX), gpr.R(s)); - WriteToConstRamAddress(accessSize, EAX, addr, true); + MOV(32, R(RSCRATCH), gpr.R(s)); + WriteToConstRamAddress(accessSize, RSCRATCH, addr, true); if (update) gpr.SetImmediate32(a, addr); return; @@ -399,15 +399,15 @@ void Jit64::stX(UGeckoInstruction inst) X64Reg reg_value; if (WriteClobbersRegValue(accessSize, /* swap */ true)) { - MOV(32, R(EDX), gpr.R(s)); - reg_value = EDX; + MOV(32, R(RSCRATCH2), gpr.R(s)); + reg_value = RSCRATCH2; } else { gpr.BindToRegister(s, true, false); reg_value = gpr.RX(s); } - SafeWriteRegToReg(reg_value, gpr.RX(a), accessSize, offset, CallerSavedRegistersInUse(), SAFE_LOADSTORE_CLOBBER_EAX_INSTEAD_OF_ADDR); + SafeWriteRegToReg(reg_value, gpr.RX(a), accessSize, offset, CallerSavedRegistersInUse(), SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR); if (update && offset) { @@ -440,16 +440,16 @@ void Jit64::stXx(UGeckoInstruction inst) { gpr.BindToRegister(a, true, true); ADD(32, gpr.R(a), gpr.R(b)); - MOV(32, R(EDX), gpr.R(a)); + MOV(32, R(RSCRATCH2), gpr.R(a)); } else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) { - LEA(32, EDX, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); + LEA(32, RSCRATCH2, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); } else { - MOV(32, R(EDX), gpr.R(a)); - ADD(32, R(EDX), gpr.R(b)); + MOV(32, R(RSCRATCH2), gpr.R(a)); + ADD(32, R(RSCRATCH2), gpr.R(b)); } int accessSize; @@ -473,15 +473,15 @@ void Jit64::stXx(UGeckoInstruction inst) X64Reg reg_value; if (WriteClobbersRegValue(accessSize, /* swap */ true)) { - MOV(32, R(EAX), gpr.R(s)); - reg_value = EAX; + MOV(32, R(RSCRATCH), gpr.R(s)); + reg_value = RSCRATCH; } else { gpr.BindToRegister(s, true, false); reg_value = gpr.RX(s); } - SafeWriteRegToReg(reg_value, EDX, accessSize, 0, CallerSavedRegistersInUse()); + SafeWriteRegToReg(reg_value, RSCRATCH2, accessSize, 0, CallerSavedRegistersInUse()); gpr.UnlockAll(); gpr.UnlockAllX(); @@ -494,14 +494,14 @@ void Jit64::lmw(UGeckoInstruction inst) JITDISABLE(bJITLoadStoreOff); // TODO: This doesn't handle rollback on DSI correctly - MOV(32, R(EDX), Imm32((u32)(s32)inst.SIMM_16)); + MOV(32, R(RSCRATCH2), Imm32((u32)(s32)inst.SIMM_16)); if (inst.RA) - ADD(32, R(EDX), gpr.R(inst.RA)); + ADD(32, R(RSCRATCH2), gpr.R(inst.RA)); for (int i = inst.RD; i < 32; i++) { - SafeLoadToReg(EAX, R(EDX), 32, (i - inst.RD) * 4, CallerSavedRegistersInUse() | (1 << ECX), false); + SafeLoadToReg(RSCRATCH, R(RSCRATCH2), 32, (i - inst.RD) * 4, CallerSavedRegistersInUse() | (1 << RSCRATCH_EXTRA), false); gpr.BindToRegister(i, false, true); - MOV(32, gpr.R(i), R(EAX)); + MOV(32, gpr.R(i), R(RSCRATCH)); } gpr.UnlockAllX(); } @@ -515,11 +515,11 @@ void Jit64::stmw(UGeckoInstruction inst) for (int i = inst.RD; i < 32; i++) { if (inst.RA) - MOV(32, R(EAX), gpr.R(inst.RA)); + MOV(32, R(RSCRATCH), gpr.R(inst.RA)); else - XOR(32, R(EAX), R(EAX)); - MOV(32, R(EDX), gpr.R(i)); - SafeWriteRegToReg(EDX, EAX, 32, (i - inst.RD) * 4 + (u32)(s32)inst.SIMM_16, CallerSavedRegistersInUse()); + XOR(32, R(RSCRATCH), R(RSCRATCH)); + MOV(32, R(RSCRATCH2), gpr.R(i)); + SafeWriteRegToReg(RSCRATCH2, RSCRATCH, 32, (i - inst.RD) * 4 + (u32)(s32)inst.SIMM_16, CallerSavedRegistersInUse()); } gpr.UnlockAllX(); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index 5880c22b7e..4e6ea7ad09 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -42,9 +42,9 @@ void Jit64::lfXXX(UGeckoInstruction inst) } else { - addr = R(EAX); + addr = R(RSCRATCH); if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) - LEA(32, EAX, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); + LEA(32, RSCRATCH, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); else { MOV(32, addr, gpr.R(b)); @@ -61,18 +61,18 @@ void Jit64::lfXXX(UGeckoInstruction inst) offset = (s32)(s16)inst.SIMM_16; } - SafeLoadToReg(RAX, addr, single ? 32 : 64, offset, CallerSavedRegistersInUse(), false); + SafeLoadToReg(RSCRATCH, addr, single ? 32 : 64, offset, CallerSavedRegistersInUse(), false); fpr.Lock(d); fpr.BindToRegister(d, js.memcheck || !single); MEMCHECK_START if (single) { - ConvertSingleToDouble(fpr.RX(d), EAX, true); + ConvertSingleToDouble(fpr.RX(d), RSCRATCH, true); } else { - MOVQ_xmm(XMM0, R(RAX)); + MOVQ_xmm(XMM0, R(RSCRATCH)); MOVSD(fpr.RX(d), R(XMM0)); } MEMCHECK_END @@ -102,17 +102,17 @@ void Jit64::stfXXX(UGeckoInstruction inst) { gpr.BindToRegister(a, true, true); ADD(32, gpr.R(a), gpr.R(b)); - MOV(32, R(RDX), gpr.R(a)); + MOV(32, R(RSCRATCH2), gpr.R(a)); } else { if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) - LEA(32, RDX, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); + LEA(32, RSCRATCH2, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); else { - MOV(32, R(RDX), gpr.R(b)); + MOV(32, R(RSCRATCH2), gpr.R(b)); if (a) - ADD(32, R(RDX), gpr.R(a)); + ADD(32, R(RSCRATCH2), gpr.R(a)); } } } @@ -127,23 +127,23 @@ void Jit64::stfXXX(UGeckoInstruction inst) { offset = (s32)(s16)inst.SIMM_16; } - MOV(32, R(RDX), gpr.R(a)); + MOV(32, R(RSCRATCH2), gpr.R(a)); } if (single) { fpr.BindToRegister(s, true, false); ConvertDoubleToSingle(XMM0, fpr.RX(s)); - SafeWriteF32ToReg(XMM0, RDX, offset, CallerSavedRegistersInUse()); + SafeWriteF32ToReg(XMM0, RSCRATCH2, offset, CallerSavedRegistersInUse()); fpr.UnlockAll(); } else { if (fpr.R(s).IsSimpleReg()) - MOVQ_xmm(R(RAX), fpr.RX(s)); + MOVQ_xmm(R(RSCRATCH), fpr.RX(s)); else - MOV(64, R(RAX), fpr.R(s)); - SafeWriteRegToReg(RAX, RDX, 64, offset, CallerSavedRegistersInUse()); + MOV(64, R(RSCRATCH), fpr.R(s)); + SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 64, offset, CallerSavedRegistersInUse()); } gpr.UnlockAll(); gpr.UnlockAllX(); @@ -159,14 +159,14 @@ void Jit64::stfiwx(UGeckoInstruction inst) int a = inst.RA; int b = inst.RB; - MOV(32, R(RDX), gpr.R(b)); + MOV(32, R(RSCRATCH2), gpr.R(b)); if (a) - ADD(32, R(RDX), gpr.R(a)); + ADD(32, R(RSCRATCH2), gpr.R(a)); if (fpr.R(s).IsSimpleReg()) - MOVD_xmm(R(EAX), fpr.RX(s)); + MOVD_xmm(R(RSCRATCH), fpr.RX(s)); else - MOV(32, R(EAX), fpr.R(s)); - SafeWriteRegToReg(EAX, RDX, 32, 0, CallerSavedRegistersInUse()); + MOV(32, R(RSCRATCH), fpr.R(s)); + SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 32, 0, CallerSavedRegistersInUse()); gpr.UnlockAllX(); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 47cd9061ba..2630395630 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -28,36 +28,36 @@ void Jit64::psq_st(UGeckoInstruction inst) int a = inst.RA; int s = inst.RS; // Fp numbers - gpr.FlushLockX(EAX, ECX); + gpr.FlushLockX(RSCRATCH, RSCRATCH_EXTRA); if (update) gpr.BindToRegister(inst.RA, true, true); fpr.BindToRegister(inst.RS, true, false); - MOV(32, R(ECX), gpr.R(inst.RA)); + MOV(32, R(RSCRATCH_EXTRA), gpr.R(inst.RA)); if (offset) - ADD(32, R(ECX), Imm32((u32)offset)); + ADD(32, R(RSCRATCH_EXTRA), Imm32((u32)offset)); if (update && offset) - MOV(32, gpr.R(a), R(ECX)); + MOV(32, gpr.R(a), R(RSCRATCH_EXTRA)); // Some games (e.g. Dirt 2) incorrectly set the unused bits which breaks the lookup table code. // Hence, we need to mask out the unused bits. The layout of the GQR register is // UU[SCALE]UUUUU[TYPE] where SCALE is 6 bits and TYPE is 3 bits, so we have to AND with // 0b0011111100000111, or 0x3F07. - MOV(32, R(EAX), Imm32(0x3F07)); - AND(32, R(EAX), PPCSTATE(spr[SPR_GQR0 + inst.I])); - MOVZX(32, 8, EDX, R(AL)); + MOV(32, R(RSCRATCH), Imm32(0x3F07)); + AND(32, R(RSCRATCH), PPCSTATE(spr[SPR_GQR0 + inst.I])); + MOVZX(32, 8, RSCRATCH2, R(RSCRATCH)); - // FIXME: Fix ModR/M encoding to allow [EDX*4+disp32] without a base register! + // FIXME: Fix ModR/M encoding to allow [RSCRATCH2*4+disp32] without a base register! if (inst.W) { // One value PXOR(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions. CVTSD2SS(XMM0, fpr.R(s)); - CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)asm_routines.singleStoreQuantized)); + CALLptr(MScaled(RSCRATCH2, SCALE_8, (u32)(u64)asm_routines.singleStoreQuantized)); } else { // Pair of values CVTPD2PS(XMM0, fpr.R(s)); - CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized)); + CALLptr(MScaled(RSCRATCH2, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized)); } gpr.UnlockAll(); gpr.UnlockAllX(); @@ -72,23 +72,23 @@ void Jit64::psq_l(UGeckoInstruction inst) bool update = inst.OPCD == 57; int offset = inst.SIMM_12; - gpr.FlushLockX(EAX, ECX); + gpr.FlushLockX(RSCRATCH, RSCRATCH_EXTRA); gpr.BindToRegister(inst.RA, true, update && offset); fpr.BindToRegister(inst.RS, false, true); if (offset) - LEA(32, ECX, MDisp(gpr.RX(inst.RA), offset)); + LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(inst.RA), offset)); else - MOV(32, R(ECX), gpr.R(inst.RA)); + MOV(32, R(RSCRATCH_EXTRA), gpr.R(inst.RA)); if (update && offset) - MOV(32, gpr.R(inst.RA), R(ECX)); - MOV(32, R(EAX), Imm32(0x3F07)); - AND(32, R(EAX), M(((char *)&GQR(inst.I)) + 2)); - MOVZX(32, 8, EDX, R(AL)); + MOV(32, gpr.R(inst.RA), R(RSCRATCH_EXTRA)); + MOV(32, R(RSCRATCH), Imm32(0x3F07)); + AND(32, R(RSCRATCH), M(((char *)&GQR(inst.I)) + 2)); + MOVZX(32, 8, RSCRATCH2, R(RSCRATCH)); if (inst.W) - OR(32, R(EDX), Imm8(8)); + OR(32, R(RSCRATCH2), Imm8(8)); ABI_AlignStack(0); - CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)asm_routines.pairedLoadQuantized)); + CALLptr(MScaled(RSCRATCH2, SCALE_8, (u32)(u64)asm_routines.pairedLoadQuantized)); ABI_RestoreStack(0); // MEMCHECK_START // FIXME: MMU does not work here because of unsafe memory access diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index 59ea14cf8d..f7278e9a55 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -42,40 +42,40 @@ void Jit64::GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate) void Jit64::SetCRFieldBit(int field, int bit, Gen::X64Reg in) { - MOV(64, R(RDX), PPCSTATE(cr_val[field])); + MOV(64, R(RSCRATCH2), PPCSTATE(cr_val[field])); MOVZX(32, 8, in, R(in)); switch (bit) { case CR_SO_BIT: // set bit 61 to input - BTR(64, R(RDX), Imm8(61)); + BTR(64, R(RSCRATCH2), Imm8(61)); SHL(64, R(in), Imm8(61)); - OR(64, R(RDX), R(in)); + OR(64, R(RSCRATCH2), R(in)); break; case CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input - SHR(64, R(RDX), Imm8(32)); - SHL(64, R(RDX), Imm8(32)); + SHR(64, R(RSCRATCH2), Imm8(32)); + SHL(64, R(RSCRATCH2), Imm8(32)); XOR(32, R(in), Imm8(1)); - OR(64, R(RDX), R(in)); + OR(64, R(RSCRATCH2), R(in)); break; case CR_GT_BIT: // set bit 63 to !input - BTR(64, R(RDX), Imm8(63)); + BTR(64, R(RSCRATCH2), Imm8(63)); NOT(32, R(in)); SHL(64, R(in), Imm8(63)); - OR(64, R(RDX), R(in)); + OR(64, R(RSCRATCH2), R(in)); break; case CR_LT_BIT: // set bit 62 to input - BTR(64, R(RDX), Imm8(62)); + BTR(64, R(RSCRATCH2), Imm8(62)); SHL(64, R(in), Imm8(62)); - OR(64, R(RDX), R(in)); + OR(64, R(RSCRATCH2), R(in)); break; } - BTS(64, R(RDX), Imm8(32)); - MOV(64, PPCSTATE(cr_val[field]), R(RDX)); + BTS(64, R(RSCRATCH2), Imm8(32)); + MOV(64, PPCSTATE(cr_val[field]), R(RSCRATCH2)); } FixupBranch Jit64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) @@ -173,8 +173,10 @@ void Jit64::mfspr(UGeckoInstruction inst) // typical use of this instruction is to call it three times, e.g. mftbu/mftbl/mftbu/cmpw/bne // to deal with possible timer wraparound. This makes the second two (out of three) completely // redundant for the JIT. + // no register choice + + gpr.FlushLockX(RDX, RAX); u32 offset = js.downcountAmount / SystemTimers::TIMER_RATIO; - gpr.FlushLockX(EDX); // An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the // cost of calling out to C for this is actually significant. @@ -205,14 +207,14 @@ void Jit64::mfspr(UGeckoInstruction inst) gpr.BindToRegister(d, false); gpr.BindToRegister(n, false); if (iIndex == SPR_TL) - MOV(32, gpr.R(d), R(EAX)); + MOV(32, gpr.R(d), R(RAX)); if (nextIndex == SPR_TL) - MOV(32, gpr.R(n), R(EAX)); + MOV(32, gpr.R(n), R(RAX)); SHR(64, R(RAX), Imm8(32)); if (iIndex == SPR_TU) - MOV(32, gpr.R(d), R(EAX)); + MOV(32, gpr.R(d), R(RAX)); if (nextIndex == SPR_TU) - MOV(32, gpr.R(n), R(EAX)); + MOV(32, gpr.R(n), R(RAX)); } else { @@ -220,8 +222,9 @@ void Jit64::mfspr(UGeckoInstruction inst) gpr.BindToRegister(d, false); if (iIndex == SPR_TU) SHR(64, R(RAX), Imm8(32)); - MOV(32, gpr.R(d), R(EAX)); + MOV(32, gpr.R(d), R(RAX)); } + gpr.UnlockAllX(); break; } case SPR_WPAR: @@ -238,7 +241,6 @@ void Jit64::mfspr(UGeckoInstruction inst) break; } gpr.UnlockAll(); - gpr.UnlockAllX(); } void Jit64::mtmsr(UGeckoInstruction inst) @@ -308,9 +310,9 @@ void Jit64::mfcr(UGeckoInstruction inst) gpr.BindToRegister(d, false, true); XOR(32, gpr.R(d), gpr.R(d)); - X64Reg cr_val = RDX; - // we only need to zero the high bits of EAX once - XOR(32, R(EAX), R(EAX)); + X64Reg cr_val = RSCRATCH2; + // we only need to zero the high bits of RSCRATCH once + XOR(32, R(RSCRATCH), R(RSCRATCH)); for (int i = 0; i < 8; i++) { static const u8 m_flagTable[8] = {0x0,0x1,0x8,0x9,0x0,0x1,0x8,0x9}; @@ -321,19 +323,19 @@ void Jit64::mfcr(UGeckoInstruction inst) // EQ: Bits 31-0 == 0; set flag bit 1 TEST(32, R(cr_val), R(cr_val)); - SETcc(CC_Z, R(EAX)); - LEA(32, gpr.RX(d), MComplex(gpr.RX(d), EAX, SCALE_2, 0)); + SETcc(CC_Z, R(RSCRATCH)); + LEA(32, gpr.RX(d), MComplex(gpr.RX(d), RSCRATCH, SCALE_2, 0)); // GT: Value > 0; set flag bit 2 TEST(64, R(cr_val), R(cr_val)); - SETcc(CC_G, R(EAX)); - LEA(32, gpr.RX(d), MComplex(gpr.RX(d), EAX, SCALE_4, 0)); + SETcc(CC_G, R(RSCRATCH)); + LEA(32, gpr.RX(d), MComplex(gpr.RX(d), RSCRATCH, SCALE_4, 0)); // SO: Bit 61 set; set flag bit 0 // LT: Bit 62 set; set flag bit 3 SHR(64, R(cr_val), Imm8(61)); - MOVZX(32, 8, EAX, MDisp(cr_val, (u32)(u64)m_flagTable)); - OR(32, gpr.R(d), R(EAX)); + MOVZX(32, 8, RSCRATCH, MDisp(cr_val, (u32)(u64)m_flagTable)); + OR(32, gpr.R(d), R(RSCRATCH)); } gpr.UnlockAll(); @@ -363,8 +365,8 @@ void Jit64::mtcrf(UGeckoInstruction inst) } else { - MOV(64, R(RAX), Imm64(newcrval)); - MOV(64, PPCSTATE(cr_val[i]), R(RAX)); + MOV(64, R(RSCRATCH), Imm64(newcrval)); + MOV(64, PPCSTATE(cr_val[i]), R(RSCRATCH)); } } } @@ -377,13 +379,13 @@ void Jit64::mtcrf(UGeckoInstruction inst) { if ((crm & (0x80 >> i)) != 0) { - MOV(32, R(EAX), gpr.R(inst.RS)); + MOV(32, R(RSCRATCH), gpr.R(inst.RS)); if (i != 7) - SHR(32, R(EAX), Imm8(28 - (i * 4))); + SHR(32, R(RSCRATCH), Imm8(28 - (i * 4))); if (i != 0) - AND(32, R(EAX), Imm8(0xF)); - MOV(64, R(EAX), MScaled(EAX, SCALE_8, (u32)(u64)m_crTable)); - MOV(64, PPCSTATE(cr_val[i]), R(EAX)); + AND(32, R(RSCRATCH), Imm8(0xF)); + MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_8, (u32)(u64)m_crTable)); + MOV(64, PPCSTATE(cr_val[i]), R(RSCRATCH)); } } gpr.UnlockAll(); @@ -399,8 +401,8 @@ void Jit64::mcrf(UGeckoInstruction inst) // USES_CR if (inst.CRFS != inst.CRFD) { - MOV(64, R(EAX), PPCSTATE(cr_val[inst.CRFS])); - MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(EAX)); + MOV(64, R(RSCRATCH), PPCSTATE(cr_val[inst.CRFS])); + MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(RSCRATCH)); } } @@ -412,11 +414,11 @@ void Jit64::mcrxr(UGeckoInstruction inst) // USES_CR // Copy XER[0-3] into CR[inst.CRFD] - MOV(32, R(EAX), PPCSTATE(spr[SPR_XER])); - SHR(32, R(EAX), Imm8(28)); + MOV(32, R(RSCRATCH), PPCSTATE(spr[SPR_XER])); + SHR(32, R(RSCRATCH), Imm8(28)); - MOV(64, R(EAX), MScaled(EAX, SCALE_8, (u32)(u64)m_crTable)); - MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(EAX)); + MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_8, (u32)(u64)m_crTable)); + MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(RSCRATCH)); // Clear XER[0-3] AND(32, PPCSTATE(spr[SPR_XER]), Imm32(0x0FFFFFFF)); @@ -438,8 +440,8 @@ void Jit64::crXXX(UGeckoInstruction inst) // crnand or crnor bool negateB = inst.SUBOP10 == 225 || inst.SUBOP10 == 33; - GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), DL, negateA); - GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), AL, negateB); + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), RSCRATCH2, negateA); + GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), RSCRATCH, negateB); // Compute combined bit switch (inst.SUBOP10) @@ -447,23 +449,23 @@ void Jit64::crXXX(UGeckoInstruction inst) case 33: // crnor: ~(A || B) == (~A && ~B) case 129: // crandc case 257: // crand - AND(8, R(AL), R(DL)); + AND(8, R(RSCRATCH), R(RSCRATCH2)); break; case 193: // crxor case 289: // creqv - XOR(8, R(AL), R(DL)); + XOR(8, R(RSCRATCH), R(RSCRATCH2)); break; case 225: // crnand: ~(A && B) == (~A || ~B) case 417: // crorc case 449: // cror - OR(8, R(AL), R(DL)); + OR(8, R(RSCRATCH), R(RSCRATCH2)); break; } // Store result bit in CRBD - SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), AL); + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), RSCRATCH); gpr.UnlockAllX(); } diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp index 8ae451ed34..d266023df5 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp @@ -604,22 +604,22 @@ static void regEmitMemStore(RegInfo& RI, InstLoc I, unsigned Size) { auto info = regBuildMemAddress(RI, I, getOp2(I), 2, Size, nullptr); if (info.first.IsImm()) - RI.Jit->MOV(32, R(EDX), info.first); + RI.Jit->MOV(32, R(RSCRATCH2), info.first); else - RI.Jit->LEA(32, EDX, MDisp(info.first.GetSimpleReg(), info.second)); + RI.Jit->LEA(32, RSCRATCH2, MDisp(info.first.GetSimpleReg(), info.second)); - regSpill(RI, EAX); + regSpill(RI, RSCRATCH); if (isImm(*getOp1(I))) { - RI.Jit->MOV(Size, R(EAX), regImmForConst(RI, getOp1(I), Size)); + RI.Jit->MOV(Size, R(RSCRATCH), regImmForConst(RI, getOp1(I), Size)); } else { - RI.Jit->MOV(32, R(EAX), regLocForInst(RI, getOp1(I))); + RI.Jit->MOV(32, R(RSCRATCH), regLocForInst(RI, getOp1(I))); } - RI.Jit->SafeWriteRegToReg(EAX, EDX, Size, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); + RI.Jit->SafeWriteRegToReg(RSCRATCH, RSCRATCH2, Size, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); if (RI.IInfo[I - RI.FirstI] & 4) regClearInst(RI, getOp1(I)); } @@ -677,9 +677,9 @@ static void regEmitCmp(RegInfo& RI, InstLoc I) static void regEmitICmpInst(RegInfo& RI, InstLoc I, CCFlags flag) { regEmitCmp(RI, I); - RI.Jit->SETcc(flag, R(EDX)); // Caution: SETCC uses 8-bit regs! + RI.Jit->SETcc(flag, R(RSCRATCH2)); // Caution: SETCC uses 8-bit regs! X64Reg reg = regBinReg(RI, I); - RI.Jit->MOVZX(32, 8, reg, R(EDX)); + RI.Jit->MOVZX(32, 8, reg, R(RSCRATCH2)); RI.regs[reg] = I; regNormalRegClear(RI, I); } @@ -709,8 +709,8 @@ static void regEmitICmpCRInst(RegInfo& RI, InstLoc I) unsigned RHS = RI.Build->GetImmValue(getOp2(I)); if (!signed_compare && (RHS & 0x80000000U)) { - RI.Jit->MOV(32, R(EAX), Imm32(RHS)); - RI.Jit->SUB(64, R(reg), R(RAX)); + RI.Jit->MOV(32, R(RSCRATCH), Imm32(RHS)); + RI.Jit->SUB(64, R(reg), R(RSCRATCH)); } else if (RHS) { @@ -720,10 +720,10 @@ static void regEmitICmpCRInst(RegInfo& RI, InstLoc I) else { if (signed_compare) - RI.Jit->MOVSX(64, 32, RAX, regLocForInst(RI, getOp2(I))); + RI.Jit->MOVSX(64, 32, RSCRATCH, regLocForInst(RI, getOp2(I))); else - RI.Jit->MOV(32, R(EAX), regLocForInst(RI, getOp2(I))); - RI.Jit->SUB(64, R(reg), R(RAX)); + RI.Jit->MOV(32, R(RSCRATCH), regLocForInst(RI, getOp2(I))); + RI.Jit->SUB(64, R(reg), R(RSCRATCH)); } RI.regs[reg] = I; @@ -1069,12 +1069,12 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) // If some exceptions are pending and EE are now enabled, force checking // external exceptions when going out of mtmsr in order to execute delayed // interrupts as soon as possible. - Jit->MOV(32, R(EAX), PPCSTATE(msr)); - Jit->TEST(32, R(EAX), Imm32(0x8000)); + Jit->MOV(32, R(RSCRATCH), PPCSTATE(msr)); + Jit->TEST(32, R(RSCRATCH), Imm32(0x8000)); FixupBranch eeDisabled = Jit->J_CC(CC_Z); - Jit->MOV(32, R(EAX), PPCSTATE(Exceptions)); - Jit->TEST(32, R(EAX), R(EAX)); + Jit->MOV(32, R(RSCRATCH), PPCSTATE(Exceptions)); + Jit->TEST(32, R(RSCRATCH), R(RSCRATCH)); FixupBranch noExceptionsPending = Jit->J_CC(CC_Z); Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc + 4)); @@ -1113,11 +1113,11 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) } case StoreFPRF: { - Jit->MOV(32, R(EDX), regLocForInst(RI, getOp1(I))); - Jit->AND(32, R(EDX), Imm8(0x1F)); - Jit->SHL(32, R(EDX), Imm8(12)); + Jit->MOV(32, R(RSCRATCH2), regLocForInst(RI, getOp1(I))); + Jit->AND(32, R(RSCRATCH2), Imm8(0x1F)); + Jit->SHL(32, R(RSCRATCH2), Imm8(12)); Jit->AND(32, PPCSTATE(fpscr), Imm32(~(0x1F << 12))); - Jit->OR(32, PPCSTATE(fpscr), R(EDX)); + Jit->OR(32, PPCSTATE(fpscr), R(RSCRATCH2)); regNormalRegClear(RI, I); break; } @@ -1157,8 +1157,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = regUReg(RI, I); - Jit->MOV(32, R(EDX), regLocForInst(RI, getOp1(I))); - Jit->MOVSX(32, 8, reg, R(EDX)); + Jit->MOV(32, R(RSCRATCH2), regLocForInst(RI, getOp1(I))); + Jit->MOVSX(32, 8, reg, R(RSCRATCH2)); RI.regs[reg] = I; regNormalRegClear(RI, I); break; @@ -1180,9 +1180,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = regUReg(RI, I); - Jit->MOV(32, R(EDX), Imm32(63)); + Jit->MOV(32, R(RSCRATCH2), Imm32(63)); Jit->BSR(32, reg, regLocForInst(RI, getOp1(I))); - Jit->CMOVcc(32, reg, R(EDX), CC_Z); + Jit->CMOVcc(32, reg, R(RSCRATCH2), CC_Z); Jit->XOR(32, R(reg), Imm8(31)); RI.regs[reg] = I; regNormalRegClear(RI, I); @@ -1267,6 +1267,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) if (!thisUsed) break; + // no register choice regSpill(RI, EAX); regSpill(RI, EDX); X64Reg reg = regBinReg(RI, I); @@ -1421,35 +1422,35 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg cr_val = regUReg(RI, I); Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I))); - Jit->XOR(32, R(EAX), R(EAX)); + Jit->XOR(32, R(RSCRATCH), R(RSCRATCH)); // SO: Bit 61 set. - Jit->MOV(64, R(RDX), R(cr_val)); - Jit->SHR(64, R(RDX), Imm8(61)); - Jit->AND(32, R(EDX), Imm8(1)); - Jit->OR(32, R(EAX), R(EDX)); + Jit->MOV(64, R(RSCRATCH2), R(cr_val)); + Jit->SHR(64, R(RSCRATCH2), Imm8(61)); + Jit->AND(32, R(RSCRATCH2), Imm8(1)); + Jit->OR(32, R(RSCRATCH), R(RSCRATCH2)); // EQ: Bits 31-0 == 0. - Jit->XOR(32, R(EDX), R(EDX)); + Jit->XOR(32, R(RSCRATCH2), R(RSCRATCH2)); Jit->TEST(32, R(cr_val), R(cr_val)); - Jit->SETcc(CC_Z, R(EDX)); - Jit->SHL(32, R(EDX), Imm8(1)); - Jit->OR(32, R(EAX), R(EDX)); + Jit->SETcc(CC_Z, R(RSCRATCH2)); + Jit->SHL(32, R(RSCRATCH2), Imm8(1)); + Jit->OR(32, R(RSCRATCH), R(RSCRATCH2)); // GT: Value > 0. - Jit->XOR(32, R(EDX), R(EDX)); + Jit->XOR(32, R(RSCRATCH2), R(RSCRATCH2)); Jit->TEST(64, R(cr_val), R(cr_val)); - Jit->SETcc(CC_G, R(EDX)); - Jit->SHL(32, R(EDX), Imm8(2)); - Jit->OR(32, R(EAX), R(EDX)); + Jit->SETcc(CC_G, R(RSCRATCH2)); + Jit->SHL(32, R(RSCRATCH2), Imm8(2)); + Jit->OR(32, R(RSCRATCH), R(RSCRATCH2)); // LT: Bit 62 set. - Jit->MOV(64, R(EDX), R(cr_val)); - Jit->SHR(64, R(EDX), Imm8(62 - 3)); - Jit->AND(32, R(EDX), Imm8(0x8)); - Jit->OR(32, R(EAX), R(EDX)); + Jit->MOV(64, R(RSCRATCH2), R(cr_val)); + Jit->SHR(64, R(RSCRATCH2), Imm8(62 - 3)); + Jit->AND(32, R(RSCRATCH2), Imm8(0x8)); + Jit->OR(32, R(RSCRATCH), R(RSCRATCH2)); - Jit->MOV(32, R(cr_val), R(EAX)); + Jit->MOV(32, R(cr_val), R(RSCRATCH)); RI.regs[cr_val] = I; regNormalRegClear(RI, I); break; @@ -1462,34 +1463,34 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg cr_val = regUReg(RI, I); Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I))); - Jit->MOV(64, R(RDX), Imm64(1ull << 32)); + Jit->MOV(64, R(RSCRATCH2), Imm64(1ull << 32)); // SO - Jit->MOV(64, R(RAX), R(cr_val)); - Jit->SHL(64, R(RAX), Imm8(63)); - Jit->SHR(64, R(RAX), Imm8(63 - 61)); - Jit->OR(64, R(RDX), R(RAX)); + Jit->MOV(64, R(RSCRATCH), R(cr_val)); + Jit->SHL(64, R(RSCRATCH), Imm8(63)); + Jit->SHR(64, R(RSCRATCH), Imm8(63 - 61)); + Jit->OR(64, R(RSCRATCH2), R(RSCRATCH)); // EQ - Jit->MOV(64, R(RAX), R(cr_val)); - Jit->NOT(64, R(RAX)); - Jit->AND(64, R(RAX), Imm8(CR_EQ)); - Jit->OR(64, R(RDX), R(RAX)); + Jit->MOV(64, R(RSCRATCH), R(cr_val)); + Jit->NOT(64, R(RSCRATCH)); + Jit->AND(64, R(RSCRATCH), Imm8(CR_EQ)); + Jit->OR(64, R(RSCRATCH2), R(RSCRATCH)); // GT - Jit->MOV(64, R(RAX), R(cr_val)); - Jit->NOT(64, R(RAX)); - Jit->AND(64, R(RAX), Imm8(CR_GT)); - Jit->SHL(64, R(RAX), Imm8(63 - 2)); - Jit->OR(64, R(RDX), R(RAX)); + Jit->MOV(64, R(RSCRATCH), R(cr_val)); + Jit->NOT(64, R(RSCRATCH)); + Jit->AND(64, R(RSCRATCH), Imm8(CR_GT)); + Jit->SHL(64, R(RSCRATCH), Imm8(63 - 2)); + Jit->OR(64, R(RSCRATCH2), R(RSCRATCH)); // LT - Jit->MOV(64, R(RAX), R(cr_val)); - Jit->AND(64, R(RAX), Imm8(CR_LT)); - Jit->SHL(64, R(RAX), Imm8(62 - 3)); - Jit->OR(64, R(RDX), R(RAX)); + Jit->MOV(64, R(RSCRATCH), R(cr_val)); + Jit->AND(64, R(RSCRATCH), Imm8(CR_LT)); + Jit->SHL(64, R(RSCRATCH), Imm8(62 - 3)); + Jit->OR(64, R(RSCRATCH2), R(RSCRATCH)); - Jit->MOV(64, R(cr_val), R(RDX)); + Jit->MOV(64, R(cr_val), R(RSCRATCH2)); RI.regs[cr_val] = I; regNormalRegClear(RI, I); @@ -1501,10 +1502,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = regUReg(RI, I); - Jit->MOV(64, R(RAX), Imm64(1ull << 61)); - Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RAX)); - Jit->SETcc(CC_NZ, R(AL)); - Jit->MOVZX(32, 8, reg, R(AL)); + Jit->MOV(64, R(RSCRATCH), Imm64(1ull << 61)); + Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RSCRATCH)); + Jit->SETcc(CC_NZ, R(RSCRATCH)); + Jit->MOVZX(32, 8, reg, R(RSCRATCH)); RI.regs[reg] = I; regNormalRegClear(RI, I); break; @@ -1516,8 +1517,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg reg = regUReg(RI, I); Jit->CMP(32, regLocForInst(RI, getOp1(I)), Imm32(0)); - Jit->SETcc(CC_Z, R(AL)); - Jit->MOVZX(32, 8, reg, R(AL)); + Jit->SETcc(CC_Z, R(RSCRATCH)); + Jit->MOVZX(32, 8, reg, R(RSCRATCH)); RI.regs[reg] = I; regNormalRegClear(RI, I); break; @@ -1529,8 +1530,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg reg = regUReg(RI, I); Jit->CMP(64, regLocForInst(RI, getOp1(I)), Imm8(0)); - Jit->SETcc(CC_G, R(AL)); - Jit->MOVZX(32, 8, reg, R(AL)); + Jit->SETcc(CC_G, R(RSCRATCH)); + Jit->MOVZX(32, 8, reg, R(RSCRATCH)); RI.regs[reg] = I; regNormalRegClear(RI, I); break; @@ -1541,10 +1542,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = regUReg(RI, I); - Jit->MOV(64, R(RAX), Imm64(1ull << 62)); - Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RAX)); - Jit->SETcc(CC_NZ, R(AL)); - Jit->MOVZX(32, 8, reg, R(AL)); + Jit->MOV(64, R(RSCRATCH), Imm64(1ull << 62)); + Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RSCRATCH)); + Jit->SETcc(CC_NZ, R(RSCRATCH)); + Jit->MOVZX(32, 8, reg, R(RSCRATCH)); RI.regs[reg] = I; regNormalRegClear(RI, I); break; @@ -1555,9 +1556,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = fregFindFreeReg(RI); - Jit->MOV(32, R(EDX), regLocForInst(RI, getOp1(I))); - RI.Jit->SafeLoadToReg(EDX, R(EDX), 32, 0, regsInUse(RI), false, EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); - Jit->MOVD_xmm(reg, R(EDX)); + Jit->MOV(32, R(RSCRATCH2), regLocForInst(RI, getOp1(I))); + RI.Jit->SafeLoadToReg(RSCRATCH2, R(RSCRATCH2), 32, 0, regsInUse(RI), false, EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); + Jit->MOVD_xmm(reg, R(RSCRATCH2)); RI.fregs[reg] = I; regNormalRegClear(RI, I); break; @@ -1569,9 +1570,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg reg = fregFindFreeReg(RI); const OpArg loc = regLocForInst(RI, getOp1(I)); - Jit->MOV(32, R(EDX), loc); - RI.Jit->SafeLoadToReg(RDX, R(EDX), 64, 0, regsInUse(RI), false, EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); - Jit->MOVQ_xmm(reg, R(RDX)); + Jit->MOV(32, R(RSCRATCH2), loc); + RI.Jit->SafeLoadToReg(RSCRATCH2, R(RSCRATCH2), 64, 0, regsInUse(RI), false, EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); + Jit->MOVQ_xmm(reg, R(RSCRATCH2)); RI.fregs[reg] = I; regNormalRegClear(RI, I); break; @@ -1581,8 +1582,6 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) if (!thisUsed) break; - regSpill(RI, EAX); - regSpill(RI, EDX); X64Reg reg = fregFindFreeReg(RI); // The lower 3 bits is for GQR index. The next 1 bit is for inst.W unsigned int quantreg = (*I >> 16) & 0x7; @@ -1591,12 +1590,12 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) // Hence, we need to mask out the unused bits. The layout of the GQR register is // UU[SCALE]UUUUU[TYPE] where SCALE is 6 bits and TYPE is 3 bits, so we have to AND with // 0b0011111100000111, or 0x3F07. - Jit->MOV(32, R(EAX), Imm32(0x3F07)); - Jit->AND(32, R(EAX), M(((char *)&GQR(quantreg)) + 2)); - Jit->OR(32, R(EAX), Imm8(w << 3)); + Jit->MOV(32, R(RSCRATCH), Imm32(0x3F07)); + Jit->AND(32, R(RSCRATCH), M(((char *)&GQR(quantreg)) + 2)); + Jit->OR(32, R(RSCRATCH), Imm8(w << 3)); - Jit->MOV(32, R(EDX), regLocForInst(RI, getOp1(I))); - Jit->CALLptr(MScaled(EAX, SCALE_8, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedLoadQuantized))); + Jit->MOV(32, R(RSCRATCH2), regLocForInst(RI, getOp1(I))); + Jit->CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedLoadQuantized))); Jit->MOVAPD(reg, R(XMM0)); RI.fregs[reg] = I; regNormalRegClear(RI, I); @@ -1604,15 +1603,15 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) } case StoreSingle: { - regSpill(RI, EAX); + regSpill(RI, RSCRATCH); const OpArg loc1 = fregLocForInst(RI, getOp1(I)); if (loc1.IsSimpleReg()) - Jit->MOVD_xmm(R(EAX), loc1.GetSimpleReg()); + Jit->MOVD_xmm(R(RSCRATCH), loc1.GetSimpleReg()); else - Jit->MOV(32, R(EAX), loc1); + Jit->MOV(32, R(RSCRATCH), loc1); - Jit->MOV(32, R(EDX), regLocForInst(RI, getOp2(I))); - RI.Jit->SafeWriteRegToReg(EAX, EDX, 32, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); + Jit->MOV(32, R(RSCRATCH2), regLocForInst(RI, getOp2(I))); + RI.Jit->SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 32, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); if (RI.IInfo[I - RI.FirstI] & 4) fregClearInst(RI, getOp1(I)); if (RI.IInfo[I - RI.FirstI] & 8) @@ -1621,14 +1620,14 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) } case StoreDouble: { - regSpill(RI, EAX); + regSpill(RI, RSCRATCH); OpArg value = fregLocForInst(RI, getOp1(I)); OpArg address = regLocForInst(RI, getOp2(I)); Jit->MOVAPD(XMM0, value); - Jit->MOVQ_xmm(R(RAX), XMM0); - Jit->MOV(32, R(EDX), address); - RI.Jit->SafeWriteRegToReg(RAX, EDX, 64, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); + Jit->MOVQ_xmm(R(RSCRATCH), XMM0); + Jit->MOV(32, R(RSCRATCH2), address); + RI.Jit->SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 64, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); if (RI.IInfo[I - RI.FirstI] & 4) fregClearInst(RI, getOp1(I)); @@ -1638,16 +1637,16 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) } case StorePaired: { - regSpill(RI, EAX); - regSpill(RI, EDX); + regSpill(RI, RSCRATCH); + regSpill(RI, RSCRATCH2); u32 quantreg = *I >> 24; - Jit->MOV(32, R(EAX), Imm32(0x3F07)); - Jit->AND(32, R(EAX), PPCSTATE(spr[SPR_GQR0 + quantreg])); - Jit->MOVZX(32, 8, EDX, R(AL)); + Jit->MOV(32, R(RSCRATCH), Imm32(0x3F07)); + Jit->AND(32, R(RSCRATCH), PPCSTATE(spr[SPR_GQR0 + quantreg])); + Jit->MOVZX(32, 8, RSCRATCH2, R(RSCRATCH)); - Jit->MOV(32, R(EDX), regLocForInst(RI, getOp2(I))); + Jit->MOV(32, R(RSCRATCH2), regLocForInst(RI, getOp2(I))); Jit->MOVAPD(XMM0, fregLocForInst(RI, getOp1(I))); - Jit->CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedStoreQuantized))); + Jit->CALLptr(MScaled(RSCRATCH2, SCALE_8, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedStoreQuantized))); if (RI.IInfo[I - RI.FirstI] & 4) fregClearInst(RI, getOp1(I)); if (RI.IInfo[I - RI.FirstI] & 8) @@ -1791,9 +1790,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg reg = fregFindFreeReg(RI); unsigned ppcreg = *I >> 8; char *p = (char*)&(PowerPC::ppcState.ps[ppcreg][0]); - Jit->MOV(32, R(EDX), M(p+4)); - Jit->AND(32, R(EDX), Imm32(0x7ff00000)); - Jit->CMP(32, R(EDX), Imm32(0x38000000)); + Jit->MOV(32, R(RSCRATCH2), M(p+4)); + Jit->AND(32, R(RSCRATCH2), Imm32(0x7ff00000)); + Jit->CMP(32, R(RSCRATCH2), Imm32(0x38000000)); FixupBranch ok = Jit->J_CC(CC_AE); Jit->AND(32, M(p+4), Imm32(0x80000000)); Jit->MOV(32, M(p), Imm32(0)); @@ -1912,7 +1911,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) Jit->MOVSD(M(isSNANTemp[1]), XMM0); } Jit->ABI_CallFunction((void*)checkIsSNAN); - Jit->TEST(8, R(EAX), R(EAX)); + Jit->TEST(8, R(ABI_RETURN), R(ABI_RETURN)); FixupBranch ok = Jit->J_CC(CC_Z); Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); // FPSCR.FX = 1; Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXSNAN)); // FPSCR.Hex |= mask; @@ -1941,7 +1940,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) Jit->MOVSD(M(isSNANTemp[1]), XMM0); } Jit->ABI_CallFunction((void*)checkIsSNAN); - Jit->TEST(8, R(EAX), R(EAX)); + Jit->TEST(8, R(ABI_RETURN), R(ABI_RETURN)); FixupBranch finish = Jit->J_CC(CC_Z); Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); // FPSCR.FX = 1; Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask; @@ -2195,8 +2194,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) } case InterpreterBranch: { - Jit->MOV(32, R(EAX), PPCSTATE(npc)); - Jit->WriteExitDestInOpArg(R(EAX)); + Jit->MOV(32, R(RSCRATCH), PPCSTATE(npc)); + Jit->WriteExitDestInOpArg(R(RSCRATCH)); break; } case RFIExit: @@ -2204,17 +2203,17 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) // See Interpreter rfi for details const u32 mask = 0x87C0FFFF; // MSR = (MSR & ~mask) | (SRR1 & mask); - Jit->MOV(32, R(EAX), PPCSTATE(msr)); - Jit->MOV(32, R(EDX), PPCSTATE_SRR1); - Jit->AND(32, R(EAX), Imm32(~mask)); - Jit->AND(32, R(EDX), Imm32(mask)); - Jit->OR(32, R(EAX), R(EDX)); + Jit->MOV(32, R(RSCRATCH), PPCSTATE(msr)); + Jit->MOV(32, R(RSCRATCH2), PPCSTATE_SRR1); + Jit->AND(32, R(RSCRATCH), Imm32(~mask)); + Jit->AND(32, R(RSCRATCH2), Imm32(mask)); + Jit->OR(32, R(RSCRATCH), R(RSCRATCH2)); // MSR &= 0xFFFBFFFF; // Mask used to clear the bit MSR[13] - Jit->AND(32, R(EAX), Imm32(0xFFFBFFFF)); - Jit->MOV(32, PPCSTATE(msr), R(EAX)); + Jit->AND(32, R(RSCRATCH), Imm32(0xFFFBFFFF)); + Jit->MOV(32, PPCSTATE(msr), R(RSCRATCH)); // NPC = SRR0; - Jit->MOV(32, R(EAX), PPCSTATE_SRR0); - Jit->WriteRfiExitDestInOpArg(R(EAX)); + Jit->MOV(32, R(RSCRATCH), PPCSTATE_SRR0); + Jit->WriteRfiExitDestInOpArg(R(RSCRATCH)); break; } case FPExceptionCheck: @@ -2255,8 +2254,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) Jit->OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_ISI)); // Remove the invalid instruction from the icache, forcing a recompile - Jit->MOV(64, R(RAX), ImmPtr(jit->GetBlockCache()->GetICachePtr(InstLoc))); - Jit->MOV(32, MatR(RAX), Imm32(JIT_ICACHE_INVALID_WORD)); + Jit->MOV(64, R(RSCRATCH), ImmPtr(jit->GetBlockCache()->GetICachePtr(InstLoc))); + Jit->MOV(32, MatR(RSCRATCH), Imm32(JIT_ICACHE_INVALID_WORD)); Jit->WriteExceptionExit(); break; } diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp index dcc43c33e1..8f1c36e1fa 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp @@ -320,8 +320,8 @@ void JitIL::WriteCallInterpreter(UGeckoInstruction inst) ABI_CallFunctionC((void*)instr, inst.hex); if (js.isLastInstruction) { - MOV(32, R(EAX), PPCSTATE(npc)); - WriteRfiExitDestInOpArg(R(EAX)); + MOV(32, R(RSCRATCH), PPCSTATE(npc)); + WriteRfiExitDestInOpArg(R(RSCRATCH)); } } @@ -341,8 +341,8 @@ void JitIL::FallBackToInterpreter(UGeckoInstruction _inst) void JitIL::HLEFunction(UGeckoInstruction _inst) { ABI_CallFunctionCC((void*)&HLE::Execute, js.compilerPC, _inst.hex); - MOV(32, R(EAX), PPCSTATE(npc)); - WriteExitDestInOpArg(R(EAX)); + MOV(32, R(RSCRATCH), PPCSTATE(npc)); + WriteExitDestInOpArg(R(RSCRATCH)); } void JitIL::DoNothing(UGeckoInstruction _inst) diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp index c7b35dd186..d5cce9882e 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp @@ -9,8 +9,13 @@ #include "Core/PowerPC/JitCommon/JitAsmCommon.h" #include "Core/PowerPC/JitCommon/JitBase.h" -#define QUANTIZED_REGS_TO_SAVE (ABI_ALL_CALLER_SAVED & ~((1 << RAX) | (1 << RCX) | \ - (1 << (XMM0+16)) | (1 << (XMM1+16)))) +#define QUANTIZED_REGS_TO_SAVE \ + (ABI_ALL_CALLER_SAVED & ~(\ + (1 << RSCRATCH) | \ + (1 << RSCRATCH2) | \ + (1 << RSCRATCH_EXTRA)| \ + (1 << (XMM0+16)) | \ + (1 << (XMM1+16)))) using namespace Gen; @@ -18,12 +23,12 @@ static int temp32; void CommonAsmRoutines::GenFifoWrite(int size) { - // Assume value in EDX + // Assume value in RSCRATCH2 PUSH(ESI); - MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe)); + MOV(32, R(RSCRATCH), Imm32((u32)(u64)GPFifo::m_gatherPipe)); MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount)); - SwapAndStore(size, MComplex(RAX, RSI, 1, 0), EDX); + SwapAndStore(size, MComplex(RSCRATCH, ESI, 1, 0), RSCRATCH2); ADD(32, R(ESI), Imm8(size >> 3)); MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI)); @@ -36,10 +41,10 @@ void CommonAsmRoutines::GenFifoFloatWrite() // Assume value in XMM0 PUSH(ESI); MOVSS(M(&temp32), XMM0); - MOV(32, R(EDX), M(&temp32)); - MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe)); + MOV(32, R(RSCRATCH2), M(&temp32)); + MOV(32, R(RSCRATCH), Imm32((u32)(u64)GPFifo::m_gatherPipe)); MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount)); - SwapAndStore(32, MComplex(RAX, RSI, 1, 0), EDX); + SwapAndStore(32, MComplex(RSCRATCH, RSI, 1, 0), RSCRATCH2); ADD(32, R(ESI), Imm8(4)); MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI)); POP(ESI); @@ -49,40 +54,40 @@ void CommonAsmRoutines::GenFifoFloatWrite() void CommonAsmRoutines::GenFrsqrte() { // Assume input in XMM0. - // This function clobbers EAX, ECX, and EDX. - MOVQ_xmm(R(RAX), XMM0); + // This function clobbers all three RSCRATCH. + MOVQ_xmm(R(RSCRATCH), XMM0); // Negative and zero inputs set an exception and take the complex path. - TEST(64, R(RAX), R(RAX)); + TEST(64, R(RSCRATCH), R(RSCRATCH)); FixupBranch zero = J_CC(CC_Z, true); FixupBranch negative = J_CC(CC_S, true); - MOV(64, R(RCX), R(RAX)); - SHR(64, R(RCX), Imm8(52)); + MOV(64, R(RSCRATCH_EXTRA), R(RSCRATCH)); + SHR(64, R(RSCRATCH_EXTRA), Imm8(52)); // Zero and max exponents (non-normal floats) take the complex path. FixupBranch complex1 = J_CC(CC_Z, true); - CMP(32, R(ECX), Imm32(0x7FF)); + CMP(32, R(RSCRATCH_EXTRA), Imm32(0x7FF)); FixupBranch complex2 = J_CC(CC_E, true); - SUB(32, R(ECX), Imm32(0x3FD)); - SAR(32, R(ECX), Imm8(1)); - MOV(32, R(EDX), Imm32(0x3FF)); - SUB(32, R(EDX), R(ECX)); - SHL(64, R(RDX), Imm8(52)); // exponent = ((0x3FFLL << 52) - ((exponent - (0x3FELL << 52)) / 2)) & (0x7FFLL << 52); + SUB(32, R(RSCRATCH_EXTRA), Imm32(0x3FD)); + SAR(32, R(RSCRATCH_EXTRA), Imm8(1)); + MOV(32, R(RSCRATCH2), Imm32(0x3FF)); + SUB(32, R(RSCRATCH2), R(RSCRATCH_EXTRA)); + SHL(64, R(RSCRATCH2), Imm8(52)); // exponent = ((0x3FFLL << 52) - ((exponent - (0x3FELL << 52)) / 2)) & (0x7FFLL << 52); - MOV(64, R(RCX), R(RAX)); - SHR(64, R(RCX), Imm8(48)); - AND(32, R(ECX), Imm8(0x1F)); - XOR(32, R(ECX), Imm8(0x10)); // int index = i / 2048 + (odd_exponent ? 16 : 0); + MOV(64, R(RSCRATCH_EXTRA), R(RSCRATCH)); + SHR(64, R(RSCRATCH_EXTRA), Imm8(48)); + AND(32, R(RSCRATCH_EXTRA), Imm8(0x1F)); + XOR(32, R(RSCRATCH_EXTRA), Imm8(0x10)); // int index = i / 2048 + (odd_exponent ? 16 : 0); - SHR(64, R(RAX), Imm8(37)); - AND(32, R(EAX), Imm32(0x7FF)); - IMUL(32, EAX, MScaled(RCX, SCALE_4, (u32)(u64)MathUtil::frsqrte_expected_dec)); - MOV(32, R(ECX), MScaled(RCX, SCALE_4, (u32)(u64)MathUtil::frsqrte_expected_base)); - SUB(32, R(ECX), R(EAX)); - SHL(64, R(RCX), Imm8(26)); - OR(64, R(RDX), R(RCX)); // vali |= (s64)(frsqrte_expected_base[index] - frsqrte_expected_dec[index] * (i % 2048)) << 26; - MOVQ_xmm(XMM0, R(RDX)); + SHR(64, R(RSCRATCH), Imm8(37)); + AND(32, R(RSCRATCH), Imm32(0x7FF)); + IMUL(32, RSCRATCH, MScaled(RSCRATCH_EXTRA, SCALE_4, (u32)(u64)MathUtil::frsqrte_expected_dec)); + MOV(32, R(RSCRATCH_EXTRA), MScaled(RSCRATCH_EXTRA, SCALE_4, (u32)(u64)MathUtil::frsqrte_expected_base)); + SUB(32, R(RSCRATCH_EXTRA), R(RSCRATCH)); + SHL(64, R(RSCRATCH_EXTRA), Imm8(26)); + OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(frsqrte_expected_base[index] - frsqrte_expected_dec[index] * (i % 2048)) << 26; + MOVQ_xmm(XMM0, R(RSCRATCH2)); RET(); // Exception flags for zero input. @@ -114,44 +119,44 @@ void CommonAsmRoutines::GenFrsqrte() void CommonAsmRoutines::GenFres() { // Assume input in XMM0. - // This function clobbers EAX, ECX, and EDX. - MOVQ_xmm(R(RAX), XMM0); + // This function clobbers all three RSCRATCH. + MOVQ_xmm(R(RSCRATCH), XMM0); // Zero inputs set an exception and take the complex path. - TEST(64, R(RAX), R(RAX)); + TEST(64, R(RSCRATCH), R(RSCRATCH)); FixupBranch zero = J_CC(CC_Z); - MOV(64, R(RCX), R(RAX)); - SHR(64, R(RCX), Imm8(52)); - MOV(32, R(EDX), R(ECX)); - AND(32, R(ECX), Imm32(0x7FF)); // exp - AND(32, R(EDX), Imm32(0x800)); // sign - CMP(32, R(ECX), Imm32(895)); + MOV(64, R(RSCRATCH_EXTRA), R(RSCRATCH)); + SHR(64, R(RSCRATCH_EXTRA), Imm8(52)); + MOV(32, R(RSCRATCH2), R(RSCRATCH_EXTRA)); + AND(32, R(RSCRATCH_EXTRA), Imm32(0x7FF)); // exp + AND(32, R(RSCRATCH2), Imm32(0x800)); // sign + CMP(32, R(RSCRATCH_EXTRA), Imm32(895)); // Take the complex path for very large/small exponents. FixupBranch complex1 = J_CC(CC_L); - CMP(32, R(ECX), Imm32(1149)); + CMP(32, R(RSCRATCH_EXTRA), Imm32(1149)); FixupBranch complex2 = J_CC(CC_GE); - SUB(32, R(ECX), Imm32(0x7FD)); - NEG(32, R(ECX)); - OR(32, R(ECX), R(EDX)); - SHL(64, R(RCX), Imm8(52)); // vali = sign | exponent + SUB(32, R(RSCRATCH_EXTRA), Imm32(0x7FD)); + NEG(32, R(RSCRATCH_EXTRA)); + OR(32, R(RSCRATCH_EXTRA), R(RSCRATCH2)); + SHL(64, R(RSCRATCH_EXTRA), Imm8(52)); // vali = sign | exponent - MOV(64, R(RDX), R(RAX)); - SHR(64, R(RAX), Imm8(37)); - SHR(64, R(RDX), Imm8(47)); - AND(32, R(EAX), Imm32(0x3FF)); // i % 1024 - AND(32, R(RDX), Imm8(0x1F)); // i / 1024 + MOV(64, R(RSCRATCH2), R(RSCRATCH)); + SHR(64, R(RSCRATCH), Imm8(37)); + SHR(64, R(RSCRATCH2), Imm8(47)); + AND(32, R(RSCRATCH), Imm32(0x3FF)); // i % 1024 + AND(32, R(RSCRATCH2), Imm8(0x1F)); // i / 1024 - IMUL(32, EAX, MScaled(RDX, SCALE_4, (u32)(u64)MathUtil::fres_expected_dec)); - ADD(32, R(EAX), Imm8(1)); - SHR(32, R(EAX), Imm8(1)); + IMUL(32, RSCRATCH, MScaled(RSCRATCH2, SCALE_4, (u32)(u64)MathUtil::fres_expected_dec)); + ADD(32, R(RSCRATCH), Imm8(1)); + SHR(32, R(RSCRATCH), Imm8(1)); - MOV(32, R(EDX), MScaled(RDX, SCALE_4, (u32)(u64)MathUtil::fres_expected_base)); - SUB(32, R(EDX), R(EAX)); - SHL(64, R(RDX), Imm8(29)); - OR(64, R(RDX), R(RCX)); // vali |= (s64)(fres_expected_base[i / 1024] - (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2) << 29 - MOVQ_xmm(XMM0, R(RDX)); + MOV(32, R(RSCRATCH2), MScaled(RSCRATCH2, SCALE_4, (u32)(u64)MathUtil::fres_expected_base)); + SUB(32, R(RSCRATCH2), R(RSCRATCH)); + SHL(64, R(RSCRATCH2), Imm8(29)); + OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(fres_expected_base[i / 1024] - (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2) << 29 + MOVQ_xmm(XMM0, R(RSCRATCH2)); RET(); // Exception flags for zero input. @@ -247,21 +252,21 @@ void CommonAsmRoutines::GenQuantizedStores() SHUFPS(XMM0, R(XMM0), 1); MOVQ_xmm(M(&psTemp[0]), XMM0); - TEST(32, R(ECX), Imm32(0x0C000000)); + TEST(32, R(RSCRATCH_EXTRA), Imm32(0x0C000000)); FixupBranch too_complex = J_CC(CC_NZ, true); - MOV(64, R(RAX), M(&psTemp[0])); - SwapAndStore(64, MComplex(RBX, RCX, SCALE_1, 0), RAX); + MOV(64, R(RSCRATCH), M(&psTemp[0])); + SwapAndStore(64, MComplex(RMEM, RSCRATCH_EXTRA, SCALE_1, 0), RSCRATCH); FixupBranch skip_complex = J(true); SetJumpTarget(too_complex); ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true); - ABI_CallFunctionR((void *)&WriteDual32, RCX); + ABI_CallFunctionR((void *)&WriteDual32, RSCRATCH_EXTRA); ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true); SetJumpTarget(skip_complex); RET(); const u8* storePairedU8 = AlignCode4(); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS)); PUNPCKLDQ(XMM1, R(XMM1)); MULPS(XMM0, R(XMM1)); #ifdef QUANTIZE_OVERFLOW_SAFE @@ -272,14 +277,14 @@ void CommonAsmRoutines::GenQuantizedStores() CVTTPS2DQ(XMM0, R(XMM0)); PACKSSDW(XMM0, R(XMM0)); PACKUSWB(XMM0, R(XMM0)); - MOVD_xmm(R(EAX), XMM0); - SafeWriteRegToReg(AX, ECX, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + MOVD_xmm(R(RSCRATCH), XMM0); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); RET(); const u8* storePairedS8 = AlignCode4(); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS)); PUNPCKLDQ(XMM1, R(XMM1)); MULPS(XMM0, R(XMM1)); #ifdef QUANTIZE_OVERFLOW_SAFE @@ -290,15 +295,15 @@ void CommonAsmRoutines::GenQuantizedStores() CVTTPS2DQ(XMM0, R(XMM0)); PACKSSDW(XMM0, R(XMM0)); PACKSSWB(XMM0, R(XMM0)); - MOVD_xmm(R(EAX), XMM0); + MOVD_xmm(R(RSCRATCH), XMM0); - SafeWriteRegToReg(AX, ECX, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); RET(); const u8* storePairedU16 = AlignCode4(); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS)); PUNPCKLDQ(XMM1, R(XMM1)); MULPS(XMM0, R(XMM1)); @@ -313,18 +318,18 @@ void CommonAsmRoutines::GenQuantizedStores() MOVQ_xmm(M(psTemp), XMM0); // place ps[0] into the higher word, ps[1] into the lower // so no need in ROL after BSWAP - MOVZX(32, 16, EAX, M((char*)psTemp + 0)); - SHL(32, R(EAX), Imm8(16)); - MOV(16, R(AX), M((char*)psTemp + 4)); + MOVZX(32, 16, RSCRATCH, M((char*)psTemp + 0)); + SHL(32, R(RSCRATCH), Imm8(16)); + MOV(16, R(RSCRATCH), M((char*)psTemp + 4)); - BSWAP(32, EAX); - SafeWriteRegToReg(EAX, ECX, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + BSWAP(32, RSCRATCH); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); RET(); const u8* storePairedS16 = AlignCode4(); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS)); // SHUFPS or UNPCKLPS might be a better choice here. The last one might just be an alias though. PUNPCKLDQ(XMM1, R(XMM1)); MULPS(XMM0, R(XMM1)); @@ -335,10 +340,10 @@ void CommonAsmRoutines::GenQuantizedStores() #endif CVTTPS2DQ(XMM0, R(XMM0)); PACKSSDW(XMM0, R(XMM0)); - MOVD_xmm(R(EAX), XMM0); - BSWAP(32, EAX); - ROL(32, R(EAX), Imm8(16)); - SafeWriteRegToReg(EAX, ECX, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + MOVD_xmm(R(RSCRATCH), XMM0); + BSWAP(32, RSCRATCH); + ROL(32, R(RSCRATCH), Imm8(16)); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); RET(); @@ -363,7 +368,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() // Easy! const u8* storeSingleFloat = AlignCode4(); - SafeWriteF32ToReg(XMM0, ECX, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + SafeWriteF32ToReg(XMM0, RSCRATCH_EXTRA, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); RET(); /* if (cpu_info.bSSSE3) @@ -371,56 +376,56 @@ void CommonAsmRoutines::GenQuantizedSingleStores() PSHUFB(XMM0, M((void *)pbswapShuffle2x4)); // TODO: SafeWriteFloat MOVSS(M(&psTemp[0]), XMM0); - MOV(32, R(EAX), M(&psTemp[0])); - SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + MOV(32, R(RSCRATCH), M(&psTemp[0])); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); } else { MOVSS(M(&psTemp[0]), XMM0); - MOV(32, R(EAX), M(&psTemp[0])); - SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + MOV(32, R(RSCRATCH), M(&psTemp[0])); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); }*/ const u8* storeSingleU8 = AlignCode4(); // Used by MKWii - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS)); MULSS(XMM0, R(XMM1)); PXOR(XMM1, R(XMM1)); MAXSS(XMM0, R(XMM1)); MINSS(XMM0, M((void *)&m_255)); - CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(AL, ECX, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + CVTTSS2SI(RSCRATCH, R(XMM0)); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); RET(); const u8* storeSingleS8 = AlignCode4(); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS)); MULSS(XMM0, R(XMM1)); MAXSS(XMM0, M((void *)&m_m128)); MINSS(XMM0, M((void *)&m_127)); - CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(AL, ECX, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + CVTTSS2SI(RSCRATCH, R(XMM0)); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); RET(); const u8* storeSingleU16 = AlignCode4(); // Used by MKWii - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS)); MULSS(XMM0, R(XMM1)); PXOR(XMM1, R(XMM1)); MAXSS(XMM0, R(XMM1)); MINSS(XMM0, M((void *)&m_65535)); - CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(EAX, ECX, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + CVTTSS2SI(RSCRATCH, R(XMM0)); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); RET(); const u8* storeSingleS16 = AlignCode4(); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS)); MULSS(XMM0, R(XMM1)); MAXSS(XMM0, M((void *)&m_m32768)); MINSS(XMM0, M((void *)&m_32767)); - CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(EAX, ECX, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + CVTTSS2SI(RSCRATCH, R(XMM0)); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); RET(); singleStoreQuantized = reinterpret_cast(const_cast(AlignCode16())); @@ -444,126 +449,126 @@ void CommonAsmRoutines::GenQuantizedLoads() const u8* loadPairedFloatTwo = AlignCode4(); if (cpu_info.bSSSE3) { - MOVQ_xmm(XMM0, MComplex(RBX, RCX, 1, 0)); + MOVQ_xmm(XMM0, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0)); PSHUFB(XMM0, M((void *)pbswapShuffle2x4)); } else { - LoadAndSwap(64, RCX, MComplex(RBX, RCX, 1, 0)); - ROL(64, R(RCX), Imm8(32)); - MOVQ_xmm(XMM0, R(RCX)); + LoadAndSwap(64, RSCRATCH_EXTRA, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0)); + ROL(64, R(RSCRATCH_EXTRA), Imm8(32)); + MOVQ_xmm(XMM0, R(RSCRATCH_EXTRA)); } RET(); const u8* loadPairedFloatOne = AlignCode4(); if (cpu_info.bSSSE3) { - MOVD_xmm(XMM0, MComplex(RBX, RCX, 1, 0)); + MOVD_xmm(XMM0, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0)); PSHUFB(XMM0, M((void *)pbswapShuffle1x4)); UNPCKLPS(XMM0, M((void*)m_one)); } else { - LoadAndSwap(32, RCX, MComplex(RBX, RCX, 1, 0)); - MOVD_xmm(XMM0, R(RCX)); + LoadAndSwap(32, RSCRATCH_EXTRA, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0)); + MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); UNPCKLPS(XMM0, M((void*)m_one)); } RET(); const u8* loadPairedU8Two = AlignCode4(); - UnsafeLoadRegToRegNoSwap(ECX, ECX, 16, 0); - MOVD_xmm(XMM0, R(ECX)); + UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0); + MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); PXOR(XMM1, R(XMM1)); PUNPCKLBW(XMM0, R(XMM1)); PUNPCKLWD(XMM0, R(XMM1)); CVTDQ2PS(XMM0, R(XMM0)); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS)); PUNPCKLDQ(XMM1, R(XMM1)); MULPS(XMM0, R(XMM1)); RET(); const u8* loadPairedU8One = AlignCode4(); - UnsafeLoadRegToRegNoSwap(ECX, ECX, 8, 0); // ECX = 0x000000xx - MOVD_xmm(XMM0, R(ECX)); + UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0); // RSCRATCH_EXTRA = 0x000000xx + MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); CVTDQ2PS(XMM0, R(XMM0)); // Is CVTSI2SS better? - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS)); MULSS(XMM0, R(XMM1)); UNPCKLPS(XMM0, M((void*)m_one)); RET(); const u8* loadPairedS8Two = AlignCode4(); - UnsafeLoadRegToRegNoSwap(ECX, ECX, 16, 0); - MOVD_xmm(XMM0, R(ECX)); + UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0); + MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); PUNPCKLBW(XMM0, R(XMM0)); PUNPCKLWD(XMM0, R(XMM0)); PSRAD(XMM0, 24); CVTDQ2PS(XMM0, R(XMM0)); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS)); PUNPCKLDQ(XMM1, R(XMM1)); MULPS(XMM0, R(XMM1)); RET(); const u8* loadPairedS8One = AlignCode4(); - UnsafeLoadRegToRegNoSwap(ECX, ECX, 8, 0); - SHL(32, R(ECX), Imm8(24)); - SAR(32, R(ECX), Imm8(24)); - MOVD_xmm(XMM0, R(ECX)); + UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0); + SHL(32, R(RSCRATCH_EXTRA), Imm8(24)); + SAR(32, R(RSCRATCH_EXTRA), Imm8(24)); + MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); CVTDQ2PS(XMM0, R(XMM0)); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS)); MULSS(XMM0, R(XMM1)); UNPCKLPS(XMM0, M((void*)m_one)); RET(); const u8* loadPairedU16Two = AlignCode4(); - UnsafeLoadRegToReg(ECX, ECX, 32, 0, false); - ROL(32, R(ECX), Imm8(16)); - MOVD_xmm(XMM0, R(ECX)); + UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 32, 0, false); + ROL(32, R(RSCRATCH_EXTRA), Imm8(16)); + MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); PXOR(XMM1, R(XMM1)); PUNPCKLWD(XMM0, R(XMM1)); CVTDQ2PS(XMM0, R(XMM0)); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS)); PUNPCKLDQ(XMM1, R(XMM1)); MULPS(XMM0, R(XMM1)); RET(); const u8* loadPairedU16One = AlignCode4(); - UnsafeLoadRegToReg(ECX, ECX, 32, 0, false); - SHR(32, R(ECX), Imm8(16)); - MOVD_xmm(XMM0, R(ECX)); + UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 32, 0, false); + SHR(32, R(RSCRATCH_EXTRA), Imm8(16)); + MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); CVTDQ2PS(XMM0, R(XMM0)); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS)); MULSS(XMM0, R(XMM1)); UNPCKLPS(XMM0, M((void*)m_one)); RET(); const u8* loadPairedS16Two = AlignCode4(); - UnsafeLoadRegToReg(ECX, ECX, 32, 0, false); - ROL(32, R(ECX), Imm8(16)); - MOVD_xmm(XMM0, R(ECX)); + UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 32, 0, false); + ROL(32, R(RSCRATCH_EXTRA), Imm8(16)); + MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); PUNPCKLWD(XMM0, R(XMM0)); PSRAD(XMM0, 16); CVTDQ2PS(XMM0, R(XMM0)); - SHR(32, R(EAX), Imm8(6)); - AND(32, R(EAX), Imm32(0xFC)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + AND(32, R(RSCRATCH), Imm32(0xFC)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS)); PUNPCKLDQ(XMM1, R(XMM1)); MULPS(XMM0, R(XMM1)); RET(); const u8* loadPairedS16One = AlignCode4(); - UnsafeLoadRegToReg(ECX, ECX, 32, 0, false); - SAR(32, R(ECX), Imm8(16)); - MOVD_xmm(XMM0, R(ECX)); + UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 32, 0, false); + SAR(32, R(RSCRATCH_EXTRA), Imm8(16)); + MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); CVTDQ2PS(XMM0, R(XMM0)); - SHR(32, R(EAX), Imm8(6)); - AND(32, R(EAX), Imm32(0xFC)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + AND(32, R(RSCRATCH), Imm32(0xFC)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS)); MULSS(XMM0, R(XMM1)); UNPCKLPS(XMM0, M((void*)m_one)); RET(); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h index 1ae548bce1..2702db95e1 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h @@ -19,9 +19,9 @@ public: const u8 *dispatcher; const u8 *dispatcherNoCheck; - const u8 *dispatcherPcInEAX; + const u8 *dispatcherPcInRSCRATCH; - const u8 *dispatchPcInEAX; + const u8 *dispatchPcInRSCRATCH; const u8 *doTiming; const u8 *frsqrte; @@ -31,14 +31,14 @@ public: // In: ECX: Address to read from. // Out: XMM0: Bottom two 32-bit slots hold the read value, // converted to a pair of floats. - // Trashes: EAX ECX EDX + // Trashes: all three RSCRATCH const u8 **pairedLoadQuantized; // In: array index: GQR to use. // In: ECX: Address to write to. // In: XMM0: Bottom two 32-bit slots hold the pair of floats to be written. // Out: Nothing. - // Trashes: EAX ECX EDX + // Trashes: all three RSCRATCH const u8 **pairedStoreQuantized; // In: array index: GQR to use. diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp index 29ee9146bd..c1a6436e62 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp @@ -74,7 +74,7 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re break; case 2: CALL((void *)&Memory::Read_U16); - SHL(32, R(EAX), Imm8(16)); + SHL(32, R(ABI_RETURN), Imm8(16)); break; case 1: CALL((void *)&Memory::Read_U8); @@ -84,11 +84,11 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re if (info.signExtend && info.operandSize == 1) { // Need to sign extend value from Read_U8. - MOVSX(32, 8, dataReg, R(EAX)); + MOVSX(32, 8, dataReg, R(ABI_RETURN)); } else if (dataReg != EAX) { - MOV(32, R(dataReg), R(EAX)); + MOV(32, R(dataReg), R(ABI_RETURN)); } ABI_PopRegistersAndAdjustStack(registersInUse, true); @@ -166,9 +166,9 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void) return nullptr; } - if (info.otherReg != RBX) + if (info.otherReg != RMEM) { - PanicAlert("BackPatch : Base reg not RBX." + PanicAlert("BackPatch : Base reg not RMEM." "\n\nAttempted to access %08x.", emAddress); return nullptr; } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index fa842679e7..95cd723d6d 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -27,6 +27,23 @@ #include "Core/PowerPC/JitCommon/JitBackpatch.h" #include "Core/PowerPC/JitCommon/JitCache.h" +// TODO: find a better place for x86-specific stuff +// The following register assignments are common to Jit64 and Jit64IL: +// RSCRATCH and RSCRATCH2 are always scratch registers and can be used without +// limitation. +#define RSCRATCH RAX +#define RSCRATCH2 RDX +// RSCRATCH_EXTRA may be in the allocation order, so it has to be flushed +// before use. +#define RSCRATCH_EXTRA RCX +// RMEM points to the start of emulated memory. +#define RMEM RBX +// RCODE_POINTERS does what it says. +#define RCODE_POINTERS R15 +// RPPCSTATE points to ppcState + 0x80. It's offset because we want to be able +// to address as much as possible in a one-byte offset form. +#define RPPCSTATE RBP + // Use these to control the instruction selection // #define INSTRUCTION_START FallBackToInterpreter(inst); return; // #define INSTRUCTION_START PPCTables::CountInstruction(inst); diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index 5847216920..be43680e88 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -41,7 +41,7 @@ void EmuCodeBlock::SwapAndStore(int size, const Gen::OpArg& dst, Gen::X64Reg src void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend) { - MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset)); + MOVZX(32, accessSize, reg_value, MComplex(RMEM, reg_addr, SCALE_1, offset)); if (accessSize == 32) { BSWAP(32, reg_value); @@ -63,7 +63,7 @@ void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int acc void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset) { - MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset)); + MOVZX(32, accessSize, reg_value, MComplex(RMEM, reg_addr, SCALE_1, offset)); } u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessSize, s32 offset, bool signExtend) @@ -85,16 +85,16 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessS offset = 0; } - memOperand = MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset); + memOperand = MComplex(RMEM, opAddress.GetSimpleReg(), SCALE_1, offset); } else if (opAddress.IsImm()) { - memOperand = MDisp(RBX, (opAddress.offset + offset) & 0x3FFFFFFF); + memOperand = MDisp(RMEM, (opAddress.offset + offset) & 0x3FFFFFFF); } else { MOV(32, R(reg_value), opAddress); - memOperand = MComplex(RBX, reg_value, SCALE_1, offset); + memOperand = MComplex(RMEM, reg_value, SCALE_1, offset); } result = GetWritableCodePtr(); @@ -129,7 +129,7 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessS return result; } -// Visitor that generates code to read a MMIO value to EAX. +// Visitor that generates code to read a MMIO value. template class MMIOReadCodeGenerator : public MMIO::ReadHandlingMethodVisitor { @@ -181,9 +181,9 @@ private: void LoadAddrMaskToReg(int sbits, const void* ptr, u32 mask) { #ifdef _ARCH_64 - m_code->MOV(64, R(EAX), ImmPtr(ptr)); + m_code->MOV(64, R(RSCRATCH), ImmPtr(ptr)); #else - m_code->MOV(32, R(EAX), ImmPtr(ptr)); + m_code->MOV(32, R(RSCRATCH), ImmPtr(ptr)); #endif // If we do not need to mask, we can do the sign extend while loading // from memory. If masking is required, we have to first zero extend, @@ -191,11 +191,11 @@ private: u32 all_ones = (1ULL << sbits) - 1; if ((all_ones & mask) == all_ones) { - MoveOpArgToReg(sbits, MDisp(EAX, 0)); + MoveOpArgToReg(sbits, MDisp(RSCRATCH, 0)); } else { - m_code->MOVZX(32, sbits, m_dst_reg, MDisp(EAX, 0)); + m_code->MOVZX(32, sbits, m_dst_reg, MDisp(RSCRATCH, 0)); m_code->AND(32, R(m_dst_reg), Imm32(mask)); if (m_sign_extend) m_code->MOVSX(32, sbits, m_dst_reg, R(m_dst_reg)); @@ -207,7 +207,7 @@ private: m_code->ABI_PushRegistersAndAdjustStack(m_registers_in_use, false); m_code->ABI_CallLambdaC(lambda, m_address); m_code->ABI_PopRegistersAndAdjustStack(m_registers_in_use, false); - MoveOpArgToReg(sbits, R(EAX)); + MoveOpArgToReg(sbits, R(ABI_RETURN)); } Gen::X64CodeBlock* m_code; @@ -320,11 +320,11 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, if (signExtend && accessSize < 32) { // Need to sign extend values coming from the Read_U* functions. - MOVSX(32, accessSize, reg_value, R(EAX)); + MOVSX(32, accessSize, reg_value, R(ABI_RETURN)); } - else if (reg_value != EAX) + else if (reg_value != ABI_RETURN) { - MOVZX(64, accessSize, reg_value, R(EAX)); + MOVZX(64, accessSize, reg_value, R(ABI_RETURN)); } MEMCHECK_END @@ -335,15 +335,15 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, OpArg addr_loc = opAddress; if (offset) { - addr_loc = R(EAX); + addr_loc = R(RSCRATCH); if (opAddress.IsSimpleReg()) { - LEA(32, EAX, MDisp(opAddress.GetSimpleReg(), offset)); + LEA(32, RSCRATCH, MDisp(opAddress.GetSimpleReg(), offset)); } else { - MOV(32, R(EAX), opAddress); - ADD(32, R(EAX), Imm32(offset)); + MOV(32, R(RSCRATCH), opAddress); + ADD(32, R(RSCRATCH), Imm32(offset)); } } TEST(32, addr_loc, Imm32(mem_mask)); @@ -373,11 +373,11 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, if (signExtend && accessSize < 32) { // Need to sign extend values coming from the Read_U* functions. - MOVSX(32, accessSize, reg_value, R(EAX)); + MOVSX(32, accessSize, reg_value, R(ABI_RETURN)); } - else if (reg_value != EAX) + else if (reg_value != ABI_RETURN) { - MOVZX(64, accessSize, reg_value, R(EAX)); + MOVZX(64, accessSize, reg_value, R(ABI_RETURN)); } MEMCHECK_END @@ -393,7 +393,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap) { u8* result = GetWritableCodePtr(); - OpArg dest = MComplex(RBX, reg_addr, SCALE_1, offset); + OpArg dest = MComplex(RMEM, reg_addr, SCALE_1, offset); if (swap) { if (cpu_info.bMOVBE) @@ -441,10 +441,10 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce if (offset) { - if (flags & SAFE_LOADSTORE_CLOBBER_EAX_INSTEAD_OF_ADDR) + if (flags & SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR) { - LEA(32, EAX, MDisp(reg_addr, (u32)offset)); - reg_addr = EAX; + LEA(32, RSCRATCH, MDisp(reg_addr, (u32)offset)); + reg_addr = RSCRATCH; } else { @@ -495,20 +495,20 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce SetJumpTarget(exit); } -// Destroys both arg registers and EAX +// Destroys the same as SafeWrite plus RSCRATCH. TODO: see if we can avoid temporaries here void EmuCodeBlock::SafeWriteF32ToReg(X64Reg xmm_value, X64Reg reg_addr, s32 offset, u32 registersInUse, int flags) { // TODO: PSHUFB might be faster if fastmem supported MOVSS. - MOVD_xmm(R(EAX), xmm_value); - SafeWriteRegToReg(EAX, reg_addr, 32, offset, registersInUse, flags); + MOVD_xmm(R(RSCRATCH), xmm_value); + SafeWriteRegToReg(RSCRATCH, reg_addr, 32, offset, registersInUse, flags); } void EmuCodeBlock::WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap) { if (swap) - SwapAndStore(accessSize, MDisp(RBX, address & 0x3FFFFFFF), arg); + SwapAndStore(accessSize, MDisp(RMEM, address & 0x3FFFFFFF), arg); else - MOV(accessSize, MDisp(RBX, address & 0x3FFFFFFF), R(arg)); + MOV(accessSize, MDisp(RMEM, address & 0x3FFFFFFF), R(arg)); } void EmuCodeBlock::ForceSinglePrecisionS(X64Reg xmm) @@ -585,20 +585,20 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src) // Grab Exponent PAND(XMM1, M((void *)&double_exponent)); PSRLQ(XMM1, 52); - MOVD_xmm(R(EAX), XMM1); + MOVD_xmm(R(RSCRATCH), XMM1); // Check if the double is in the range of valid single subnormal - CMP(16, R(EAX), Imm16(896)); + CMP(16, R(RSCRATCH), Imm16(896)); FixupBranch NoDenormalize = J_CC(CC_G); - CMP(16, R(EAX), Imm16(874)); + CMP(16, R(RSCRATCH), Imm16(874)); FixupBranch NoDenormalize2 = J_CC(CC_L); // Denormalise // shift = (905 - Exponent) plus the 21 bit double to single shift - MOV(16, R(EAX), Imm16(905 + 21)); - MOVD_xmm(XMM0, R(EAX)); + MOV(16, R(RSCRATCH), Imm16(905 + 21)); + MOVD_xmm(XMM0, R(RSCRATCH)); PSUBQ(XMM0, R(XMM1)); // xmm1 = fraction | 0x0010000000000000 @@ -649,12 +649,12 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src) // Changing the FPU mode is very expensive, so we can't do that. // Here, check to see if the exponent is small enough that it will result in a denormal, and pass it to the x87 unit // if it is. - MOVQ_xmm(R(RAX), src); - SHR(64, R(RAX), Imm8(55)); + MOVQ_xmm(R(RSCRATCH), src); + SHR(64, R(RSCRATCH), Imm8(55)); // Exponents 0x369 <= x <= 0x380 are denormal. This code accepts the range 0x368 <= x <= 0x387 // to save an instruction, since diverting a few more floats to the slow path can't hurt much. - SUB(8, R(AL), Imm8(0x6D)); - CMP(8, R(AL), Imm8(0x3)); + SUB(8, R(RSCRATCH), Imm8(0x6D)); + CMP(8, R(RSCRATCH), Imm8(0x3)); FixupBranch x87Conversion = J_CC(CC_BE); CVTSD2SS(dst, R(src)); FixupBranch continue1 = J(); @@ -675,7 +675,7 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr { // If the input isn't denormal, just do things the simple way -- otherwise, go through the x87 unit, which has // flush-to-zero off. - X64Reg gprsrc = src_is_gpr ? src : EAX; + X64Reg gprsrc = src_is_gpr ? src : RSCRATCH; if (src_is_gpr) { MOVD_xmm(dst, R(src)); @@ -684,7 +684,7 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr { if (dst != src) MOVAPD(dst, R(src)); - MOVD_xmm(EAX, R(src)); + MOVD_xmm(RSCRATCH, R(src)); } // A sneaky hack: floating-point zero is rather common and we don't want to confuse it for denormals and // needlessly send it through the slow path. If we subtract 1 before doing the comparison, it turns @@ -724,14 +724,14 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm) FixupBranch continue1, continue2, continue3, continue4; if (cpu_info.bSSE4_1) { - MOVQ_xmm(R(RAX), xmm); - SHR(64, R(RAX), Imm8(63)); // Get the sign bit; almost all the branches need it. + MOVQ_xmm(R(RSCRATCH), xmm); + SHR(64, R(RSCRATCH), Imm8(63)); // Get the sign bit; almost all the branches need it. PTEST(xmm, M((void*)psDoubleExp)); FixupBranch maxExponent = J_CC(CC_C); FixupBranch zeroExponent = J_CC(CC_Z); // Nice normalized number: sign ? PPC_FPCLASS_NN : PPC_FPCLASS_PN; - LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_NN - MathUtil::PPC_FPCLASS_PN, MathUtil::PPC_FPCLASS_PN)); + LEA(32, RSCRATCH, MScaled(RSCRATCH, MathUtil::PPC_FPCLASS_NN - MathUtil::PPC_FPCLASS_PN, MathUtil::PPC_FPCLASS_PN)); continue1 = J(); SetJumpTarget(maxExponent); @@ -739,12 +739,12 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm) FixupBranch notNAN = J_CC(CC_Z); // Max exponent + mantissa: PPC_FPCLASS_QNAN - MOV(32, R(EAX), Imm32(MathUtil::PPC_FPCLASS_QNAN)); + MOV(32, R(RSCRATCH), Imm32(MathUtil::PPC_FPCLASS_QNAN)); continue2 = J(); // Max exponent + no mantissa: sign ? PPC_FPCLASS_NINF : PPC_FPCLASS_PINF; SetJumpTarget(notNAN); - LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_NINF - MathUtil::PPC_FPCLASS_PINF, MathUtil::PPC_FPCLASS_NINF)); + LEA(32, RSCRATCH, MScaled(RSCRATCH, MathUtil::PPC_FPCLASS_NINF - MathUtil::PPC_FPCLASS_PINF, MathUtil::PPC_FPCLASS_NINF)); continue3 = J(); SetJumpTarget(zeroExponent); @@ -752,55 +752,55 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm) FixupBranch zero = J_CC(CC_Z); // No exponent + mantissa: sign ? PPC_FPCLASS_ND : PPC_FPCLASS_PD; - LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_ND - MathUtil::PPC_FPCLASS_PD, MathUtil::PPC_FPCLASS_ND)); + LEA(32, RSCRATCH, MScaled(RSCRATCH, MathUtil::PPC_FPCLASS_ND - MathUtil::PPC_FPCLASS_PD, MathUtil::PPC_FPCLASS_ND)); continue4 = J(); // Zero: sign ? PPC_FPCLASS_NZ : PPC_FPCLASS_PZ; SetJumpTarget(zero); - SHL(32, R(EAX), Imm8(4)); - ADD(32, R(EAX), Imm8(MathUtil::PPC_FPCLASS_PZ)); + SHL(32, R(RSCRATCH), Imm8(4)); + ADD(32, R(RSCRATCH), Imm8(MathUtil::PPC_FPCLASS_PZ)); } else { - MOVQ_xmm(R(RAX), xmm); - TEST(64, R(RAX), M((void*)psDoubleExp)); + MOVQ_xmm(R(RSCRATCH), xmm); + TEST(64, R(RSCRATCH), M((void*)psDoubleExp)); FixupBranch zeroExponent = J_CC(CC_Z); - AND(64, R(RAX), M((void*)psDoubleNoSign)); - CMP(64, R(RAX), M((void*)psDoubleExp)); - FixupBranch nan = J_CC(CC_G); // This works because if the sign bit is set, RAX is negative + AND(64, R(RSCRATCH), M((void*)psDoubleNoSign)); + CMP(64, R(RSCRATCH), M((void*)psDoubleExp)); + FixupBranch nan = J_CC(CC_G); // This works because if the sign bit is set, RSCRATCH is negative FixupBranch infinity = J_CC(CC_E); - MOVQ_xmm(R(RAX), xmm); - SHR(64, R(RAX), Imm8(63)); - LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_NN - MathUtil::PPC_FPCLASS_PN, MathUtil::PPC_FPCLASS_PN)); + MOVQ_xmm(R(RSCRATCH), xmm); + SHR(64, R(RSCRATCH), Imm8(63)); + LEA(32, RSCRATCH, MScaled(RSCRATCH, MathUtil::PPC_FPCLASS_NN - MathUtil::PPC_FPCLASS_PN, MathUtil::PPC_FPCLASS_PN)); continue1 = J(); SetJumpTarget(nan); - MOVQ_xmm(R(RAX), xmm); - SHR(64, R(RAX), Imm8(63)); - MOV(32, R(EAX), Imm32(MathUtil::PPC_FPCLASS_QNAN)); + MOVQ_xmm(R(RSCRATCH), xmm); + SHR(64, R(RSCRATCH), Imm8(63)); + MOV(32, R(RSCRATCH), Imm32(MathUtil::PPC_FPCLASS_QNAN)); continue2 = J(); SetJumpTarget(infinity); - MOVQ_xmm(R(RAX), xmm); - SHR(64, R(RAX), Imm8(63)); - LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_NINF - MathUtil::PPC_FPCLASS_PINF, MathUtil::PPC_FPCLASS_NINF)); + MOVQ_xmm(R(RSCRATCH), xmm); + SHR(64, R(RSCRATCH), Imm8(63)); + LEA(32, RSCRATCH, MScaled(RSCRATCH, MathUtil::PPC_FPCLASS_NINF - MathUtil::PPC_FPCLASS_PINF, MathUtil::PPC_FPCLASS_NINF)); continue3 = J(); SetJumpTarget(zeroExponent); - TEST(64, R(RAX), R(RAX)); + TEST(64, R(RSCRATCH), R(RSCRATCH)); FixupBranch zero = J_CC(CC_Z); - SHR(64, R(RAX), Imm8(63)); - LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_ND - MathUtil::PPC_FPCLASS_PD, MathUtil::PPC_FPCLASS_ND)); + SHR(64, R(RSCRATCH), Imm8(63)); + LEA(32, RSCRATCH, MScaled(RSCRATCH, MathUtil::PPC_FPCLASS_ND - MathUtil::PPC_FPCLASS_PD, MathUtil::PPC_FPCLASS_ND)); continue4 = J(); SetJumpTarget(zero); - SHR(64, R(RAX), Imm8(63)); - SHL(32, R(EAX), Imm8(4)); - ADD(32, R(EAX), Imm8(MathUtil::PPC_FPCLASS_PZ)); + SHR(64, R(RSCRATCH), Imm8(63)); + SHL(32, R(RSCRATCH), Imm8(4)); + ADD(32, R(RSCRATCH), Imm8(MathUtil::PPC_FPCLASS_PZ)); } SetJumpTarget(continue1); SetJumpTarget(continue2); SetJumpTarget(continue3); SetJumpTarget(continue4); - SHL(32, R(EAX), Imm8(FPRF_SHIFT)); - OR(32, PPCSTATE(fpscr), R(EAX)); + SHL(32, R(RSCRATCH), Imm8(FPRF_SHIFT)); + OR(32, PPCSTATE(fpscr), R(RSCRATCH)); } diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h index ca073854eb..73eb9ebfe8 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h @@ -23,7 +23,7 @@ namespace MMIO { class Mapping; } // We offset by 0x80 because the range of one byte memory offsets is // -0x80..0x7f. -#define PPCSTATE(x) MDisp(RBP, \ +#define PPCSTATE(x) MDisp(RPPCSTATE, \ (int) ((char *) &PowerPC::ppcState.x - (char *) &PowerPC::ppcState) - 0x80) // In case you want to disable the ppcstate register: // #define PPCSTATE(x) M((void*) &PowerPC::ppcState.x) @@ -54,11 +54,11 @@ public: SAFE_LOADSTORE_NO_SWAP = 1, SAFE_LOADSTORE_NO_PROLOG = 2, SAFE_LOADSTORE_NO_FASTMEM = 4, - SAFE_LOADSTORE_CLOBBER_EAX_INSTEAD_OF_ADDR = 8 + SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR = 8 }; void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags = 0); - // Clobbers EAX or reg_addr depending on the relevant flag. Preserves + // Clobbers RSCRATCH or reg_addr depending on the relevant flag. Preserves // reg_value if the load fails and js.memcheck is enabled. void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags = 0); @@ -79,9 +79,8 @@ public: void ForceSinglePrecisionP(Gen::X64Reg xmm); void Force25BitPrecision(Gen::X64Reg xmm, Gen::X64Reg tmp); - // EAX might get trashed + // RSCRATCH might get trashed void ConvertSingleToDouble(Gen::X64Reg dst, Gen::X64Reg src, bool src_is_gpr = false); - // EAX might get trashed void ConvertDoubleToSingle(Gen::X64Reg dst, Gen::X64Reg src); void SetFPRF(Gen::X64Reg xmm); protected: diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp index 10449eebb9..c5fcfb1256 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp @@ -321,7 +321,7 @@ void JitILBase::divwux(UGeckoInstruction inst) #if 0 int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.FlushLockX(EDX); + gpr.FlushLockX(RSCRATCH1); gpr.Lock(a, b, d); if (d != a && d != b) @@ -333,11 +333,11 @@ void JitILBase::divwux(UGeckoInstruction inst) gpr.LoadToX64(d, true, true); } - MOV(32, R(EAX), gpr.R(a)); - XOR(32, R(EDX), R(EDX)); + MOV(32, R(RSCRATCH), gpr.R(a)); + XOR(32, R(RSCRATCH2), R(RSCRATCH)); gpr.KillImmediate(b); DIV(32, gpr.R(b)); - MOV(32, gpr.R(d), R(EAX)); + MOV(32, gpr.R(d), R(RSCRATCH)); gpr.UnlockAll(); gpr.UnlockAllX(); diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStore.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStore.cpp index 3801ac7ba7..f889181dc9 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStore.cpp @@ -137,19 +137,13 @@ void JitILBase::dcbz(UGeckoInstruction inst) return; } INSTRUCTION_START; - MOV(32, R(EAX), gpr.R(inst.RB)); + MOV(32, R(RSCRATCH), gpr.R(inst.RB)); if (inst.RA) - ADD(32, R(EAX), gpr.R(inst.RA)); - AND(32, R(EAX), Imm32(~31)); + ADD(32, R(RSCRATCH), gpr.R(inst.RA)); + AND(32, R(RSCRATCH), Imm32(~31)); PXOR(XMM0, R(XMM0)); -#if _M_X86_64 - MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0); - MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0); -#else - AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); - MOVAPS(MDisp(EAX, (u32)Memory::base), XMM0); - MOVAPS(MDisp(EAX, (u32)Memory::base + 16), XMM0); -#endif + MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 0), XMM0); + MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 16), XMM0); #endif }