diff --git a/Source/Core/Common/x64ABI.cpp b/Source/Core/Common/x64ABI.cpp index 6f423eaa6a..0116212485 100644 --- a/Source/Core/Common/x64ABI.cpp +++ b/Source/Core/Common/x64ABI.cpp @@ -176,6 +176,14 @@ void XEmitter::ABI_CallFunctionCC(void *func, u32 param1, u32 param2) { ABI_RestoreStack(2 * 4); } +void XEmitter::ABI_CallFunctionCP(void *func, u32 param1, void *param2) { + ABI_AlignStack(2 * 4); + PUSH(32, Imm32((u32)param2)); + PUSH(32, Imm32(param1)); + CALL(func); + ABI_RestoreStack(2 * 4); +} + void XEmitter::ABI_CallFunctionCCC(void *func, u32 param1, u32 param2, u32 param3) { ABI_AlignStack(3 * 4); PUSH(32, Imm32(param3)); @@ -204,6 +212,14 @@ void XEmitter::ABI_CallFunctionCCCP(void *func, u32 param1, u32 param2,u32 param ABI_RestoreStack(4 * 4); } +void XEmitter::ABI_CallFunctionPC(void *func, void *param1, u32 param2) { + ABI_AlignStack(3 * 4); + PUSH(32, Imm32(param2)); + PUSH(32, Imm32((u32)param1)); + CALL(func); + ABI_RestoreStack(3 * 4); +} + void XEmitter::ABI_CallFunctionPPC(void *func, void *param1, void *param2,u32 param3) { ABI_AlignStack(3 * 4); PUSH(32, Imm32(param3)); @@ -344,6 +360,22 @@ void XEmitter::ABI_CallFunctionCC(void *func, u32 param1, u32 param2) { ABI_RestoreStack(0); } +void XEmitter::ABI_CallFunctionCP(void *func, u32 param1, void *param2) { + ABI_AlignStack(0); + MOV(32, R(ABI_PARAM1), Imm32(param1)); + MOV(64, R(ABI_PARAM2), Imm64((u64)param2)); + u64 distance = u64(func) - (u64(code) + 5); + if (distance >= 0x0000000080000000ULL + && distance < 0xFFFFFFFF80000000ULL) { + // Far call + MOV(64, R(RAX), Imm64((u64)func)); + CALLptr(R(RAX)); + } else { + CALL(func); + } + ABI_RestoreStack(0); +} + void XEmitter::ABI_CallFunctionCCC(void *func, u32 param1, u32 param2, u32 param3) { ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32(param1)); @@ -396,6 +428,22 @@ void XEmitter::ABI_CallFunctionCCCP(void *func, u32 param1, u32 param2, u32 para ABI_RestoreStack(0); } +void XEmitter::ABI_CallFunctionPC(void *func, void *param1, u32 param2) { + ABI_AlignStack(0); + MOV(64, R(ABI_PARAM1), Imm64((u64)param1)); + MOV(32, R(ABI_PARAM2), Imm32(param2)); + u64 distance = u64(func) - (u64(code) + 5); + if (distance >= 0x0000000080000000ULL + && distance < 0xFFFFFFFF80000000ULL) { + // Far call + MOV(64, R(RAX), Imm64((u64)func)); + CALLptr(R(RAX)); + } else { + CALL(func); + } + ABI_RestoreStack(0); +} + void XEmitter::ABI_CallFunctionPPC(void *func, void *param1, void *param2, u32 param3) { ABI_AlignStack(0); MOV(64, R(ABI_PARAM1), Imm64((u64)param1)); diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index f8acb6fe95..b35fff3319 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -8,6 +8,7 @@ #include #include +#include #include "Common/Common.h" #include "Common/MemoryUtil.h" @@ -171,7 +172,7 @@ private: u16 indexReg; }; -inline OpArg M(void *ptr) {return OpArg((u64)ptr, (int)SCALE_RIP);} +inline OpArg M(const void *ptr) {return OpArg((u64)ptr, (int)SCALE_RIP);} inline OpArg R(X64Reg value) {return OpArg(0, SCALE_NONE, value);} inline OpArg MatR(X64Reg value) {return OpArg(0, SCALE_ATREG, value);} inline OpArg MDisp(X64Reg value, int offset) { @@ -194,9 +195,9 @@ inline OpArg Imm16(u16 imm) {return OpArg(imm, SCALE_IMM16);} //rarely used inline OpArg Imm32(u32 imm) {return OpArg(imm, SCALE_IMM32);} inline OpArg Imm64(u64 imm) {return OpArg(imm, SCALE_IMM64);} #ifdef _ARCH_64 -inline OpArg ImmPtr(void* imm) {return Imm64((u64)imm);} +inline OpArg ImmPtr(const void* imm) {return Imm64((u64)imm);} #else -inline OpArg ImmPtr(void* imm) {return Imm32((u32)imm);} +inline OpArg ImmPtr(const void* imm) {return Imm32((u32)imm);} #endif inline u32 PtrOffset(void* ptr, void* base) { #ifdef _ARCH_64 @@ -671,9 +672,11 @@ public: // These will destroy the 1 or 2 first "parameter regs". void ABI_CallFunctionC(void *func, u32 param1); void ABI_CallFunctionCC(void *func, u32 param1, u32 param2); + void ABI_CallFunctionCP(void *func, u32 param1, void *param2); void ABI_CallFunctionCCC(void *func, u32 param1, u32 param2, u32 param3); void ABI_CallFunctionCCP(void *func, u32 param1, u32 param2, void *param3); void ABI_CallFunctionCCCP(void *func, u32 param1, u32 param2,u32 param3, void *param4); + void ABI_CallFunctionPC(void *func, void *param1, u32 param2); void ABI_CallFunctionPPC(void *func, void *param1, void *param2,u32 param3); void ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2); void ABI_CallFunctionA(void *func, const Gen::OpArg &arg1); @@ -737,6 +740,26 @@ public: #define DECLARE_IMPORT(x) extern "C" void *__imp_##x #endif + + // Utility to generate a call to a std::function object. + // + // Unfortunately, calling operator() directly is undefined behavior in C++ + // (this method might be a thunk in the case of multi-inheritance) so we + // have to go through a trampoline function. + template + static void CallLambdaTrampoline(const std::function* f, + Args... args) + { + (*f)(args...); + } + + template + void ABI_CallLambdaC(const std::function* f, u32 p1) + { + // Double casting is required by VC++ for some reason. + auto trampoline = (void(*)())&XEmitter::CallLambdaTrampoline; + ABI_CallFunctionPC((void*)trampoline, const_cast((const void*)f), p1); + } }; // class XEmitter diff --git a/Source/Core/Core/HW/MMIO.cpp b/Source/Core/Core/HW/MMIO.cpp index 39ae3187c3..f8cbe9ee18 100644 --- a/Source/Core/Core/HW/MMIO.cpp +++ b/Source/Core/Core/HW/MMIO.cpp @@ -148,12 +148,12 @@ public: virtual void AcceptReadVisitor(ReadHandlingMethodVisitor& v) const { - v.VisitComplex(read_lambda_); + v.VisitComplex(&read_lambda_); } virtual void AcceptWriteVisitor(WriteHandlingMethodVisitor& v) const { - v.VisitComplex(write_lambda_); + v.VisitComplex(&write_lambda_); } private: @@ -313,9 +313,9 @@ void ReadHandler::ResetMethod(ReadHandlingMethod* method) ret = [addr, mask](u32) { return *addr & mask; }; } - virtual void VisitComplex(std::function lambda) + virtual void VisitComplex(const std::function* lambda) { - ret = lambda; + ret = *lambda; } }; @@ -367,9 +367,9 @@ void WriteHandler::ResetMethod(WriteHandlingMethod* method) ret = [ptr, mask](u32, T val) { *ptr = val & mask; }; } - virtual void VisitComplex(std::function lambda) + virtual void VisitComplex(const std::function* lambda) { - ret = lambda; + ret = *lambda; } }; diff --git a/Source/Core/Core/HW/MMIO.h b/Source/Core/Core/HW/MMIO.h index 447ae5285d..28fe19ba5a 100644 --- a/Source/Core/Core/HW/MMIO.h +++ b/Source/Core/Core/HW/MMIO.h @@ -31,6 +31,19 @@ enum Block const u32 BLOCK_SIZE = 0x10000; const u32 NUM_MMIOS = NUM_BLOCKS * BLOCK_SIZE; +// Checks if a given physical memory address refers to the MMIO address range. +// In practice, most games use a virtual memory mapping (via BATs set in the +// IPL) that matches the physical memory mapping for MMIOs. +// +// We have a special exception here for FIFO writes: these are handled via a +// different mechanism and should not go through the normal MMIO access +// interface. +inline bool IsMMIOAddress(u32 address) +{ + return ((address & 0xE0000000) == 0xC0000000) && + ((address & 0x0000FFFF) != 0x00008000); +} + // Compute the internal unique ID for a given MMIO address. This ID is computed // from a very simple formula: (block_id << 16) | lower_16_bits(address). // diff --git a/Source/Core/Core/HW/MMIOHandlers.h b/Source/Core/Core/HW/MMIOHandlers.h index b0f41a4fef..7afe9f60b5 100644 --- a/Source/Core/Core/HW/MMIOHandlers.h +++ b/Source/Core/Core/HW/MMIOHandlers.h @@ -88,7 +88,7 @@ class ReadHandlingMethodVisitor public: virtual void VisitConstant(T value) = 0; virtual void VisitDirect(const T* addr, u32 mask) = 0; - virtual void VisitComplex(std::function lambda) = 0; + virtual void VisitComplex(const std::function* lambda) = 0; }; template class WriteHandlingMethodVisitor @@ -96,7 +96,7 @@ class WriteHandlingMethodVisitor public: virtual void VisitNop() = 0; virtual void VisitDirect(T* addr, u32 mask) = 0; - virtual void VisitComplex(std::function lambda) = 0; + virtual void VisitComplex(const std::function* lambda) = 0; }; // These classes are INTERNAL. Do not use outside of the MMIO implementation diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index 8140c235eb..ebe89b0e5b 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -6,6 +6,8 @@ #include "Common/Common.h" #include "Common/CPUDetect.h" + +#include "Core/HW/MMIO.h" #include "Core/PowerPC/JitCommon/Jit_Util.h" #include "Core/PowerPC/JitCommon/JitBase.h" @@ -118,6 +120,122 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, Gen::OpArg opAddress, int ac return result; } +// Visitor that generates code to read a MMIO value to EAX. +template +class MMIOReadCodeGenerator : public MMIO::ReadHandlingMethodVisitor +{ +public: + MMIOReadCodeGenerator(Gen::XCodeBlock* code, u32 registers_in_use, + Gen::X64Reg dst_reg, u32 address, bool sign_extend) + : m_code(code), m_registers_in_use(registers_in_use), m_dst_reg(dst_reg), + m_address(address), m_sign_extend(sign_extend) + { + } + + virtual void VisitConstant(T value) + { + LoadConstantToReg(8 * sizeof (T), value); + } + virtual void VisitDirect(const T* addr, u32 mask) + { + LoadAddrMaskToReg(8 * sizeof (T), addr, mask); + } + virtual void VisitComplex(const std::function* lambda) + { + CallLambda(8 * sizeof (T), lambda); + } + +private: + // Generates code to load a constant to the destination register. In + // practice it would be better to avoid using a register for this, but it + // would require refactoring a lot of JIT code. + void LoadConstantToReg(int sbits, u32 value) + { + if (m_sign_extend) + { + u32 sign = !!(value & (1 << (sbits - 1))); + value |= sign * ((0xFFFFFFFF >> sbits) << sbits); + } + m_code->MOV(32, R(m_dst_reg), Gen::Imm32(value)); + } + + // Generate the proper MOV instruction depending on whether the read should + // be sign extended or zero extended. + void MoveOpArgToReg(int sbits, Gen::OpArg arg) + { + if (m_sign_extend) + m_code->MOVSX(32, sbits, m_dst_reg, arg); + else + m_code->MOVZX(32, sbits, m_dst_reg, arg); + } + + void LoadAddrMaskToReg(int sbits, const void* ptr, u32 mask) + { +#ifdef _ARCH_64 + m_code->MOV(64, R(EAX), ImmPtr(ptr)); +#else + m_code->MOV(32, R(EAX), ImmPtr(ptr)); +#endif + // If we do not need to mask, we can do the sign extend while loading + // from memory. If masking is required, we have to first zero extend, + // then mask, then sign extend if needed (1 instr vs. 2/3). + u32 all_ones = (1ULL << sbits) - 1; + if ((all_ones & mask) == all_ones) + MoveOpArgToReg(sbits, MDisp(EAX, 0)); + else + { + m_code->MOVZX(32, sbits, m_dst_reg, MDisp(EAX, 0)); + m_code->AND(32, R(m_dst_reg), Imm32(mask)); + if (m_sign_extend) + m_code->MOVSX(32, sbits, m_dst_reg, R(m_dst_reg)); + } + } + + void CallLambda(int sbits, const std::function* lambda) + { + m_code->ABI_PushRegistersAndAdjustStack(m_registers_in_use, false); + m_code->ABI_CallLambdaC(lambda, m_address); + m_code->ABI_PopRegistersAndAdjustStack(m_registers_in_use, false); + MoveOpArgToReg(sbits, R(EAX)); + } + + Gen::XCodeBlock* m_code; + u32 m_registers_in_use; + Gen::X64Reg m_dst_reg; + u32 m_address; + bool m_sign_extend; +}; + +void EmuCodeBlock::MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value, + u32 registers_in_use, u32 address, + int access_size, bool sign_extend) +{ + switch (access_size) + { + case 8: + { + MMIOReadCodeGenerator gen(this, registers_in_use, reg_value, + address, sign_extend); + mmio->GetHandlerForRead8(address).Visit(gen); + break; + } + case 16: + { + MMIOReadCodeGenerator gen(this, registers_in_use, reg_value, + address, sign_extend); + mmio->GetHandlerForRead16(address).Visit(gen); + break; + } + case 32: + { + MMIOReadCodeGenerator gen(this, registers_in_use, reg_value, + address, sign_extend); + mmio->GetHandlerForRead32(address).Visit(gen); + break; + } + } +} + void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags) { if (!jit->js.memcheck) @@ -157,10 +275,25 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, if (opAddress.IsImm()) { u32 address = (u32)opAddress.offset + offset; + + // If we know the address, try the following loading methods in + // order: + // + // 1. If the address is in RAM, generate an unsafe load (directly + // access the RAM buffer and load from there). + // 2. If the address is in the MMIO range, find the appropriate + // MMIO handler and generate the code to load using the handler. + // 3. Otherwise, just generate a call to Memory::Read_* with the + // address hardcoded. if ((address & mem_mask) == 0) { UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend); } + else if (!Core::g_CoreStartupParameter.bMMU && MMIO::IsMMIOAddress(address)) + { + MMIOLoadToReg(Memory::mmio_mapping, reg_value, registersInUse, + address, accessSize, signExtend); + } else { ABI_PushRegistersAndAdjustStack(registersInUse, false); diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h index 52d0b12d39..cb67a2efbc 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h @@ -8,6 +8,8 @@ #include "Common/x64Emitter.h" +namespace MMIO { class Mapping; } + #define MEMCHECK_START \ FixupBranch memException; \ if (jit->js.memcheck) \ @@ -28,6 +30,11 @@ public: // these return the address of the MOV, for backpatching u8 *UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true); u8 *UnsafeLoadToReg(Gen::X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset, bool signExtend); + + // Generate a load/write from the MMIO handler for a given address. Only + // call for known addresses in MMIO range (MMIO::IsMMIOAddress). + void MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value, u32 registers_in_use, u32 address, int access_size, bool sign_extend); + enum SafeLoadStoreFlags { SAFE_LOADSTORE_NO_SWAP = 1,