diff --git a/Source/Core/Core/HW/DSP.cpp b/Source/Core/Core/HW/DSP.cpp index 5ad1f2d143..8935139cef 100644 --- a/Source/Core/Core/HW/DSP.cpp +++ b/Source/Core/Core/HW/DSP.cpp @@ -145,12 +145,12 @@ struct ARAMInfo // STATE_TO_SAVE static ARAMInfo g_ARAM; -static UDSPControl g_dspState; static AudioDMA g_audioDMA; static ARAM_DMA g_arDMA; static u32 last_mmaddr; static u32 last_aram_dma_count; static bool instant_dma; +UDSPControl g_dspState; union ARAM_Info { @@ -216,6 +216,22 @@ void EnableInstantDMA() instant_dma = true; } +void FlushInstantDMA(u32 address) +{ + u64 dma_in_progress = DSP::DMAInProgress(); + if (dma_in_progress != 0) + { + u32 start_addr = (dma_in_progress >> 32) & Memory::RAM_MASK; + u32 end_addr = (dma_in_progress & Memory::RAM_MASK) & 0xffffffff; + u32 invalidated_addr = (address & Memory::RAM_MASK) & ~0x1f; + + if (invalidated_addr >= start_addr && invalidated_addr <= end_addr) + { + DSP::EnableInstantDMA(); + } + } +} + DSPEmulator *GetDSPEmulator() { return dsp_emulator; diff --git a/Source/Core/Core/HW/DSP.h b/Source/Core/Core/HW/DSP.h index 6245c435e0..2152cb41c6 100644 --- a/Source/Core/Core/HW/DSP.h +++ b/Source/Core/Core/HW/DSP.h @@ -56,6 +56,8 @@ union UDSPControl UDSPControl(u16 _Hex = 0) : Hex(_Hex) {} }; +extern UDSPControl g_dspState; + void Init(bool hle); void Shutdown(); @@ -78,5 +80,6 @@ void UpdateAudioDMA(); void UpdateDSPSlice(int cycles); u64 DMAInProgress(); void EnableInstantDMA(); +void FlushInstantDMA(u32 address); }// end of namespace DSP diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp index 2bd39cafec..43e89a54e3 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp @@ -340,18 +340,7 @@ void Interpreter::dcbi(UGeckoInstruction _inst) // The following detects a situation where the game is writing to the dcache at the address being DMA'd. As we do not // have dcache emulation, invalid data is being DMA'd causing audio glitches. The following code detects this and // enables the DMA to complete instantly before the invalid data is written. Resident Evil 2 & 3 trigger this. - u64 dma_in_progress = DSP::DMAInProgress(); - if (dma_in_progress != 0) - { - u32 start_addr = (dma_in_progress >> 32) & Memory::RAM_MASK; - u32 end_addr = (dma_in_progress & Memory::RAM_MASK) & 0xffffffff; - u32 invalidated_addr = (address & Memory::RAM_MASK) & ~0x1f; - - if (invalidated_addr >= start_addr && invalidated_addr <= end_addr) - { - DSP::EnableInstantDMA(); - } - } + DSP::FlushInstantDMA(address); } void Interpreter::dcbst(UGeckoInstruction _inst) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index a4449ae256..235713108c 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -255,4 +255,6 @@ public: void lmw(UGeckoInstruction inst); void stmw(UGeckoInstruction inst); + + void dcbx(UGeckoInstruction inst); }; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp index 90d9405601..674c768f85 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp @@ -214,11 +214,11 @@ static GekkoOPTemplate table31[] = {824, &Jit64::srawix}, // srawix {24, &Jit64::slwx}, // slwx - {54, &Jit64::FallBackToInterpreter}, // dcbst - {86, &Jit64::FallBackToInterpreter}, // dcbf - {246, &Jit64::dcbt }, // dcbtst - {278, &Jit64::dcbt }, // dcbt - {470, &Jit64::FallBackToInterpreter}, // dcbi + {54, &Jit64::dcbx}, // dcbst + {86, &Jit64::dcbx}, // dcbf + {246, &Jit64::dcbt}, // dcbtst + {278, &Jit64::dcbt}, // dcbt + {470, &Jit64::dcbx}, // dcbi {758, &Jit64::DoNothing}, // dcba {1014, &Jit64::dcbz}, // dcbz diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index d3bd0aa588..8e3e1cff11 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -7,6 +7,8 @@ #include "Common/CommonTypes.h" +#include "Core/HW/DSP.h" +#include "Core/PowerPC/JitInterface.h" #include "Core/PowerPC/Jit64/Jit.h" #include "Core/PowerPC/Jit64/JitAsm.h" #include "Core/PowerPC/Jit64/JitRegCache.h" @@ -290,6 +292,70 @@ void Jit64::lXXx(UGeckoInstruction inst) gpr.UnlockAllX(); } +void Jit64::dcbx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITLoadStoreOff); + + X64Reg addr = RSCRATCH; + X64Reg value = RSCRATCH2; + X64Reg tmp = gpr.GetFreeXReg(); + gpr.FlushLockX(tmp); + + if (inst.RA && gpr.R(inst.RA).IsSimpleReg() && gpr.R(inst.RB).IsSimpleReg()) + { + LEA(32, addr, MRegSum(gpr.RX(inst.RA), gpr.RX(inst.RB))); + } + else + { + MOV(32, R(addr), gpr.R(inst.RB)); + if (inst.RA) + ADD(32, R(addr), gpr.R(inst.RA)); + } + + // Check whether a JIT cache line needs to be invalidated. + LEA(32, value, MScaled(addr, SCALE_8, 0)); // addr << 3 (masks the first 3 bits) + SHR(32, R(value), Imm8(3 + 5 + 5)); // >> 5 for cache line size, >> 5 for width of bitset + MOV(64, R(tmp), ImmPtr(jit->GetBlockCache()->GetBlockBitSet())); + MOV(32, R(value), MComplex(tmp, value, SCALE_4, 0)); + SHR(32, R(addr), Imm8(5)); + BT(32, R(value), R(addr)); + + FixupBranch c = J_CC(CC_C, true); + SwitchToFarCode(); + SetJumpTarget(c); + BitSet32 registersInUse = CallerSavedRegistersInUse(); + ABI_PushRegistersAndAdjustStack(registersInUse, 0); + MOV(32, R(ABI_PARAM1), R(addr)); + SHL(32, R(ABI_PARAM1), Imm8(5)); + MOV(32, R(ABI_PARAM2), Imm32(32)); + XOR(32, R(ABI_PARAM3), R(ABI_PARAM3)); + ABI_CallFunction((void*)JitInterface::InvalidateICache); + ABI_PopRegistersAndAdjustStack(registersInUse, 0); + c = J(true); + SwitchToNearCode(); + SetJumpTarget(c); + + // dcbi + if (inst.SUBOP10 == 470) + { + // Flush DSP DMA if DMAState bit is set + TEST(16, M(&DSP::g_dspState), Imm16(1 << 9)); + c = J_CC(CC_NZ, true); + SwitchToFarCode(); + SetJumpTarget(c); + ABI_PushRegistersAndAdjustStack(registersInUse, 0); + SHL(32, R(addr), Imm8(5)); + ABI_CallFunctionR((void*)DSP::FlushInstantDMA, addr); + ABI_PopRegistersAndAdjustStack(registersInUse, 0); + c = J(true); + SwitchToNearCode(); + SetJumpTarget(c); + } + + gpr.UnlockAllX(); +} + void Jit64::dcbt(UGeckoInstruction inst) { INSTRUCTION_START diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index 7e310f6a35..af0580ac85 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -60,14 +60,15 @@ typedef void (*CompiledCode)(); // implementation of std::bitset is slow. class ValidBlockBitSet final { +public: enum { VALID_BLOCK_MASK_SIZE = 0x20000000 / 32, VALID_BLOCK_ALLOC_ELEMENTS = VALID_BLOCK_MASK_SIZE / 32 }; + // Directly accessed by Jit64. std::unique_ptr m_valid_block; -public: ValidBlockBitSet() { m_valid_block.reset(new u32[VALID_BLOCK_ALLOC_ELEMENTS]); @@ -157,6 +158,11 @@ public: // DOES NOT WORK CORRECTLY WITH INLINING void InvalidateICache(u32 address, const u32 length, bool forced); + + u32* GetBlockBitSet() const + { + return valid_block.m_valid_block.get(); + } }; // x86 BlockCache