From 8cfd8aa309aeeefff04f40b88828e1a3595a8a12 Mon Sep 17 00:00:00 2001 From: hrydgard Date: Sun, 10 Aug 2008 18:24:01 +0000 Subject: [PATCH] Latest round of JIT changes. Probably broke something as usual. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@170 8ced0084-cf51-0410-be5f-012b33b47a6e --- .../Interpreter/Interpreter_LoadStore.cpp | 4 +- .../Interpreter/Interpreter_Paired.cpp | 2 + .../Interpreter_SystemRegisters.cpp | 37 +++++++++++++------ Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp | 6 ++- .../Core/Src/PowerPC/Jit64/JitRegCache.cpp | 3 +- .../Core/Core/Src/PowerPC/Jit64/JitRegCache.h | 8 ++++ .../Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp | 21 +++++++++-- .../PowerPC/Jit64/Jit_LoadStoreFloating.cpp | 24 ++++++++---- .../Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp | 13 +++---- 9 files changed, 83 insertions(+), 35 deletions(-) diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp index 9d893c1fbe..952773c5bb 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp @@ -83,7 +83,7 @@ void CInterpreter::lfdx(UGeckoInstruction _inst) void CInterpreter::lfs(UGeckoInstruction _inst) { - u32 uTemp = Memory::Read_U32(Helper_Get_EA(_inst)); + u32 uTemp = Memory::Read_U32(Helper_Get_EA(_inst)); rPS0(_inst.FD) = *(float*)&uTemp; rPS1(_inst.FD) = rPS0(_inst.FD); } @@ -629,6 +629,8 @@ void CInterpreter::sync(UGeckoInstruction _inst) void CInterpreter::tlbia(UGeckoInstruction _inst) { + // Gekko does not support this instructions. + PanicAlert("The GC CPU does not support tlbia"); // invalid the whole TLB //MessageBox(0,"TLBIA","TLBIA",0); } diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Paired.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Paired.cpp index a6d3335440..4554308607 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Paired.cpp +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Paired.cpp @@ -642,6 +642,8 @@ void CInterpreter::ps_merge11(UGeckoInstruction _inst) void CInterpreter::dcbz_l(UGeckoInstruction _inst) { + // This is supposed to allocate a cache line in the locked cache. Not entirely sure how + // this is visible to the rest of the world. For now, we ignore it. /* addr_t ea = Helper_Get_EA(_inst); diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp index 98fe7665e9..aba334ddf8 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp @@ -221,10 +221,18 @@ void CInterpreter::mtsrin(UGeckoInstruction _inst) PowerPC::ppcState.sr[index] = m_GPR[_inst.RS]; } +void CInterpreter::mftb(UGeckoInstruction _inst) +{ + int iIndex = (_inst.TBR >> 5) | ((_inst.TBR&0x1F) << 5); + if (iIndex == 268) m_GPR[_inst.RD] = TL; + else if (iIndex == 269) m_GPR[_inst.RD] = TU; + else _dbg_assert_(GEKKO,0); +} + + void CInterpreter::mfspr(UGeckoInstruction _inst) { u32 iIndex = ((_inst.SPR & 0x1F) << 5) + ((_inst.SPR >> 5)&0x1F); - m_GPR[_inst.RD] = rSPR(iIndex); //TODO - check processor privilege level - many of these require privilege //XER LR CTR are the only ones available in user mode, time base can be read too. @@ -241,20 +249,17 @@ void CInterpreter::mfspr(UGeckoInstruction _inst) //(or if it's full, not sure) //MessageBox(NULL, "Read from SPR_WPAR", "????", MB_OK); //Paper Mario reads here, this should be investigated ... TODO(ector) + bool wpar_empty = false; + if (!wpar_empty) + rSPR(iIndex) |= 1; // BNE = buffer not empty + else + rSPR(iIndex) &= ~1; } break; } + m_GPR[_inst.RD] = rSPR(iIndex); } -void CInterpreter::mftb(UGeckoInstruction _inst) -{ - int iIndex = (_inst.TBR >> 5) | ((_inst.TBR&0x1F) << 5); - if (iIndex == 268) m_GPR[_inst.RD] = TL; - else if (iIndex == 269) m_GPR[_inst.RD] = TU; - else _dbg_assert_(GEKKO,0); -} - - void CInterpreter::mtspr(UGeckoInstruction _inst) { u32 iIndex = (_inst.SPRU << 5) | (_inst.SPRL & 0x1F); @@ -285,14 +290,20 @@ void CInterpreter::mtspr(UGeckoInstruction _inst) case SPR_HID2: // HID2 { + UReg_HID2 old_hid2; + old_hid2.Hex = oldValue; + if (HID2.PSE == 0) PanicAlert("WARNING: PSE in HID2 isnt set"); bool WriteGatherPipeEnable = (bool)HID2.WPE; //TODO? bool LockedCacheEnable = (bool)HID2.LCE; int DMAQueueLength = HID2.DMAQL; // Ignore - our DMA:s are instantaneous + bool PairedSingleEnable = HID2.PSE; + bool QuantizeEnable = HID2.LSQE; //TODO(ector): Protect LC memory if LCE is false. //TODO(ector): Honor PSE. + // //_assert_msg_(GEKKO, WriteGatherPipeEnable, "Write gather pipe not enabled!"); //if ((HID2.PSE == 0)) @@ -301,11 +312,13 @@ void CInterpreter::mtspr(UGeckoInstruction _inst) break; case SPR_WPAR: - _assert_msg_(GEKKO, m_GPR[_inst.RD] == 0x0C008000,"Gather pipe @ %08x", ); + _assert_msg_(GEKKO, m_GPR[_inst.RD] == 0x0C008000, "Gather pipe @ %08x"); GPFifo::ResetGatherPipe(); break; - case SPR_DMAL: //locked cache DMA + case SPR_DMAL: + // Locked cache<->Memory DMA + // Total fake, we ignore that DMAs take time. if (DMAL.DMA_T) { u32 dwMemAddress = DMAU.MEM_ADDR << 5; diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp index 45bcee2529..ed8d94b3d0 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp @@ -14,8 +14,10 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ -#include "Common.h" + #include + +#include "Common.h" #include "x64Emitter.h" #include "ABI.h" #include "../../HLE/HLE.h" @@ -235,7 +237,7 @@ namespace Jit64 if (js.isLastInstruction) { MOV(32, M(&PC), Imm32(js.compilerPC)); - MOV(32, M(&NPC), Imm32(js.compilerPC+4)); + MOV(32, M(&NPC), Imm32(js.compilerPC + 4)); } CInterpreter::_interpreterInstruction instr = GetInterpreterOp(_inst); ABI_CallFunctionC((void*)instr, _inst.hex); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp index c29b703df0..36021251d0 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp @@ -234,6 +234,7 @@ namespace Jit64 bool FPURegCache::IsXRegVolatile(X64Reg reg) const { #ifdef _WIN32 + // return true; if (reg < 6) return true; else @@ -263,7 +264,7 @@ namespace Jit64 R12, R13, R14, R8, R9, R10, R11, RSI, RDI //, RCX #endif #elif _M_IX86 - ESI, EDI, EBX, EBP, EDX //, RCX + ESI, EDI, EBX, EBP, EDX #endif }; count = sizeof(allocationOrder) / sizeof(const int); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h index b7df21e1e5..eb0e6b22dd 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h @@ -78,6 +78,14 @@ namespace Jit64 virtual void Start(PPCAnalyst::BlockRegStats &stats) = 0; void FlushR(X64Reg reg); void FlushR(X64Reg reg, X64Reg reg2) {FlushR(reg); FlushR(reg2);} + void FlushLockX(X64Reg reg) { + FlushR(reg); + LockX(reg); + } + void FlushLockX(X64Reg reg1, X64Reg reg2) { + FlushR(reg1); FlushR(reg2); + LockX(reg1); LockX(reg2); + } virtual void Flush(FlushMode mode); virtual void Flush(PPCAnalyst::CodeOp *op) {Flush(FLUSH_ALL);} void End() {Flush(FLUSH_ALL);} diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp index fa792f54b6..b5935b9694 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp @@ -120,6 +120,8 @@ namespace Jit64 void lbzx(UGeckoInstruction inst) { INSTRUCTION_START; + gpr.Flush(FLUSH_VOLATILE); + fpr.Flush(FLUSH_VOLATILE); int a = inst.RA, b = inst.RB, d = inst.RD; gpr.Lock(a, b, d); if (b == d || a == d) @@ -183,12 +185,14 @@ namespace Jit64 // Safe and boring gpr.Flush(FLUSH_VOLATILE); fpr.Flush(FLUSH_VOLATILE); + gpr.FlushLockX(ABI_PARAM1); gpr.Lock(d, a); MOV(32, R(ABI_PARAM1), gpr.R(a)); SafeLoadRegToEAX(ABI_PARAM1, accessSize, offset); gpr.LoadToX64(d, false, true); MOV(32, gpr.R(d), R(EAX)); gpr.UnlockAll(); + gpr.UnlockAllX(); return; } @@ -219,12 +223,14 @@ namespace Jit64 // Safe and boring gpr.Flush(FLUSH_VOLATILE); fpr.Flush(FLUSH_VOLATILE); + gpr.FlushLockX(ABI_PARAM1); gpr.Lock(d, a); MOV(32, R(ABI_PARAM1), gpr.R(a)); SafeLoadRegToEAX(ABI_PARAM1, 16, offset, true); - gpr.LoadToX64(d, false, true); + gpr.LoadToX64(d, d == a, true); MOV(32, gpr.R(d), R(EAX)); gpr.UnlockAll(); + gpr.UnlockAllX(); return; } @@ -232,14 +238,19 @@ namespace Jit64 void dcbz(UGeckoInstruction inst) { INSTRUCTION_START; - DISABLE_32BIT; MOV(32, R(EAX), gpr.R(inst.RB)); if (inst.RA) ADD(32, R(EAX), gpr.R(inst.RA)); AND(32, R(EAX), Imm32(~31)); XORPD(XMM0, R(XMM0)); +#ifdef _M_X64 MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0); MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0); +#else + AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); + MOVAPS(MDisp(EAX, (u32)Memory::base), XMM0); + MOVAPS(MDisp(EAX, (u32)Memory::base + 16), XMM0); +#endif } #ifndef _WIN32 @@ -262,7 +273,7 @@ namespace Jit64 if (a || update) { gpr.Flush(FLUSH_VOLATILE); - + fpr.Flush(FLUSH_VOLATILE); int accessSize; switch (inst.OPCD & ~1) { @@ -340,6 +351,7 @@ namespace Jit64 */ //Still here? Do regular path. gpr.Lock(s, a); + gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); MOV(32, R(ABI_PARAM2), gpr.R(a)); MOV(32, R(ABI_PARAM1), gpr.R(s)); if (offset) @@ -370,10 +382,11 @@ namespace Jit64 { case 32: ABI_CallFunctionRR((void *)&Memory::Write_U32, ABI_PARAM1, ABI_PARAM2); break; case 16: ABI_CallFunctionRR((void *)&Memory::Write_U16, ABI_PARAM1, ABI_PARAM2); break; - case 8: ABI_CallFunctionRR((void *)&Memory::Write_U8, ABI_PARAM1, ABI_PARAM2); break; + case 8: ABI_CallFunctionRR((void *)&Memory::Write_U8, ABI_PARAM1, ABI_PARAM2); break; } SetJumpTarget(arg2); gpr.UnlockAll(); + gpr.UnlockAllX(); } else { diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index 9d050ad7f6..d7e437dd08 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -47,6 +47,12 @@ namespace Jit64 { +// pshufb todo: MOVQ +const u8 GC_ALIGNED16(bswapShuffle1x4[16]) = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; +const u8 GC_ALIGNED16(bswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15}; +const u8 GC_ALIGNED16(bswapShuffle1x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 8, 9, 10, 11, 12, 13, 14, 15}; +const u8 GC_ALIGNED16(bswapShuffle2x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}; + static u64 GC_ALIGNED16(temp64); static u32 GC_ALIGNED16(temp32); @@ -67,7 +73,12 @@ void lfs(UGeckoInstruction inst) } s32 offset = (s32)(s16)inst.SIMM_16; - gpr.Flush(FLUSH_VOLATILE); + if (jo.noAssumeFPLoadFromMem) { + // We might call a function. + gpr.Flush(FLUSH_VOLATILE); + fpr.Flush(FLUSH_VOLATILE); + gpr.FlushLockX(ABI_PARAM1); + } gpr.Lock(d, a); MOV(32, R(ABI_PARAM1), gpr.R(a)); @@ -86,6 +97,7 @@ void lfs(UGeckoInstruction inst) CVTSS2SD(fpr.RX(d), M(&temp32)); MOVDDUP(fpr.RX(d), fpr.R(d)); gpr.UnlockAll(); + gpr.UnlockAllX(); fpr.UnlockAll(); } @@ -127,6 +139,7 @@ void stfd(UGeckoInstruction inst) s32 offset = (s32)(s16)inst.SIMM_16; gpr.Lock(a); fpr.Lock(s); + gpr.FlushLockX(ABI_PARAM1); fpr.LoadToX64(s, true, false); MOVSD(M(&temp64), fpr.RX(s)); MOV(32, R(ABI_PARAM1), gpr.R(a)); @@ -134,25 +147,24 @@ void stfd(UGeckoInstruction inst) BSWAP(64, EAX); MOV(64, MComplex(RBX, ABI_PARAM1, SCALE_1, offset), R(EAX)); gpr.UnlockAll(); + gpr.UnlockAllX(); fpr.UnlockAll(); } void stfs(UGeckoInstruction inst) { INSTRUCTION_START; - DISABLE_32BIT; bool update = inst.OPCD & 1; int s = inst.RS; int a = inst.RA; s32 offset = (s32)(s16)inst.SIMM_16; - if (a && !update) { gpr.Flush(FLUSH_VOLATILE); -// fpr.Flush(FLUSH_VOLATILE); + fpr.Flush(FLUSH_VOLATILE); gpr.Lock(a); fpr.Lock(s); - gpr.LockX(ABI_PARAM1, ABI_PARAM2); + gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); MOV(32, R(ABI_PARAM2), gpr.R(a)); if (update && offset) { @@ -161,7 +173,6 @@ void stfs(UGeckoInstruction inst) CVTSD2SS(XMM0, fpr.R(s)); MOVSS(M(&temp32), XMM0); MOV(32, R(ABI_PARAM1), M(&temp32)); - SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, 32, offset); gpr.UnlockAll(); gpr.UnlockAllX(); @@ -176,7 +187,6 @@ void stfs(UGeckoInstruction inst) void lfsx(UGeckoInstruction inst) { INSTRUCTION_START; - DISABLE_32BIT; fpr.Lock(inst.RS); fpr.LoadToX64(inst.RS, false, true); MOV(32, R(EAX), gpr.R(inst.RB)); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 88e2ff381b..18795b6a9b 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -125,6 +125,8 @@ void psq_st(UGeckoInstruction inst) { DISABLE_32BIT; gpr.Flush(FLUSH_VOLATILE); + fpr.Flush(FLUSH_VOLATILE); + gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); gpr.Lock(a); fpr.Lock(s); if (update) @@ -147,14 +149,12 @@ void psq_st(UGeckoInstruction inst) CALL((void *)&WriteDual32); SetJumpTarget(arg2); gpr.UnlockAll(); + gpr.UnlockAllX(); fpr.UnlockAll(); } else if (stType == QUANTIZE_U8) { - gpr.FlushR(ABI_PARAM1); - gpr.FlushR(ABI_PARAM2); - gpr.LockX(ABI_PARAM1); - gpr.LockX(ABI_PARAM2); + gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); gpr.Lock(a); fpr.Lock(s); if (update) @@ -187,10 +187,7 @@ void psq_st(UGeckoInstruction inst) } else if (stType == QUANTIZE_S16) { - gpr.FlushR(ABI_PARAM1); - gpr.FlushR(ABI_PARAM2); - gpr.LockX(ABI_PARAM1); - gpr.LockX(ABI_PARAM2); + gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); gpr.Lock(a); fpr.Lock(s); if (update)