mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-07-29 00:59:44 -06:00
Refactor fastmem/trampoline code.
Simplication to avoid reading back the generated instructions, allowing us to handle all possible cases.
This commit is contained in:
@ -12,27 +12,12 @@
|
||||
#include "Common/CommonFuncs.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/MsgHandler.h"
|
||||
#include "Common/x64Analyzer.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
#include "Core/HW/Memmap.h"
|
||||
#include "Core/PowerPC/JitCommon/JitBase.h"
|
||||
|
||||
using namespace Gen;
|
||||
|
||||
static void BackPatchError(const std::string& text, u8* codePtr, u32 emAddress)
|
||||
{
|
||||
u64 code_addr = (u64)codePtr;
|
||||
disassembler disasm;
|
||||
char disbuf[256];
|
||||
memset(disbuf, 0, 256);
|
||||
disasm.disasm64(0, code_addr, codePtr, disbuf);
|
||||
PanicAlert("%s\n\n"
|
||||
"Error encountered accessing emulated address %08x.\n"
|
||||
"Culprit instruction: \n%s\nat %#" PRIx64,
|
||||
text.c_str(), emAddress, disbuf, code_addr);
|
||||
return;
|
||||
}
|
||||
|
||||
// This generates some fairly heavy trampolines, but it doesn't really hurt.
|
||||
// Only instructions that access I/O will get these, and there won't be that
|
||||
// many of them in a typical program/game.
|
||||
@ -56,36 +41,14 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
|
||||
if (!IsInSpace(codePtr))
|
||||
return false; // this will become a regular crash real soon after this
|
||||
|
||||
InstructionInfo info = {};
|
||||
|
||||
if (!DisassembleMov(codePtr, &info))
|
||||
{
|
||||
BackPatchError("BackPatch - failed to disassemble MOV instruction", codePtr, emAddress);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (info.otherReg != RMEM)
|
||||
{
|
||||
PanicAlert("BackPatch : Base reg not RMEM."
|
||||
"\n\nAttempted to access %08x.",
|
||||
emAddress);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (info.byteSwap && info.instructionSize < BACKPATCH_SIZE)
|
||||
{
|
||||
PanicAlert("BackPatch: MOVBE is too small");
|
||||
return false;
|
||||
}
|
||||
|
||||
auto it = registersInUseAtLoc.find(codePtr);
|
||||
if (it == registersInUseAtLoc.end())
|
||||
auto it = backPatchInfo.find(codePtr);
|
||||
if (it == backPatchInfo.end())
|
||||
{
|
||||
PanicAlert("BackPatch: no register use entry for address %p", codePtr);
|
||||
return false;
|
||||
}
|
||||
|
||||
BitSet32 registersInUse = it->second;
|
||||
TrampolineInfo& info = it->second;
|
||||
|
||||
u8* exceptionHandler = nullptr;
|
||||
if (jit->jo.memcheck)
|
||||
@ -95,110 +58,67 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
|
||||
exceptionHandler = it2->second;
|
||||
}
|
||||
|
||||
// Compute the start and length of the memory operation, including
|
||||
// any byteswapping.
|
||||
int totalSize = info.instructionSize;
|
||||
u8* start = codePtr;
|
||||
if (!info.isMemoryWrite)
|
||||
{
|
||||
// MOVBE and single bytes don't need to be swapped.
|
||||
if (!info.byteSwap && info.operandSize > 1)
|
||||
{
|
||||
// REX
|
||||
if ((codePtr[totalSize] & 0xF0) == 0x40)
|
||||
totalSize++;
|
||||
|
||||
// BSWAP
|
||||
if (codePtr[totalSize] == 0x0F && (codePtr[totalSize + 1] & 0xF8) == 0xC8)
|
||||
totalSize += 2;
|
||||
|
||||
if (info.operandSize == 2)
|
||||
{
|
||||
// operand size override
|
||||
if (codePtr[totalSize] == 0x66)
|
||||
totalSize++;
|
||||
// REX
|
||||
if ((codePtr[totalSize] & 0xF0) == 0x40)
|
||||
totalSize++;
|
||||
// SAR/ROL
|
||||
_assert_(codePtr[totalSize] == 0xC1 &&
|
||||
(codePtr[totalSize + 2] == 0x10 || codePtr[totalSize + 2] == 0x08));
|
||||
info.signExtend = (codePtr[totalSize + 1] & 0x10) != 0;
|
||||
totalSize += 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (info.byteSwap || info.hasImmediate)
|
||||
{
|
||||
// The instruction is a MOVBE but it failed so the value is still in little-endian byte order.
|
||||
}
|
||||
else
|
||||
{
|
||||
// We entered here with a BSWAP-ed register. We'll have to swap it back.
|
||||
u64* ptr = ContextRN(ctx, info.regOperandReg);
|
||||
int bswapSize = 0;
|
||||
switch (info.operandSize)
|
||||
{
|
||||
case 1:
|
||||
bswapSize = 0;
|
||||
break;
|
||||
case 2:
|
||||
bswapSize = 4 + (info.regOperandReg >= 8 ? 1 : 0);
|
||||
*ptr = Common::swap16((u16)*ptr);
|
||||
break;
|
||||
case 4:
|
||||
bswapSize = 2 + (info.regOperandReg >= 8 ? 1 : 0);
|
||||
*ptr = Common::swap32((u32)*ptr);
|
||||
break;
|
||||
case 8:
|
||||
bswapSize = 3;
|
||||
*ptr = Common::swap64(*ptr);
|
||||
break;
|
||||
}
|
||||
start = codePtr - bswapSize;
|
||||
totalSize += bswapSize;
|
||||
}
|
||||
}
|
||||
|
||||
// In the trampoline code, we jump back into the block at the beginning
|
||||
// of the next instruction. The next instruction comes immediately
|
||||
// after the backpatched operation, or BACKPATCH_SIZE bytes after the start
|
||||
// of the backpatched operation, whichever comes last. (The JIT inserts NOPs
|
||||
// into the original code if necessary to ensure there is enough space
|
||||
// to insert the backpatch jump.)
|
||||
int padding = totalSize > BACKPATCH_SIZE ? totalSize - BACKPATCH_SIZE : 0;
|
||||
u8* returnPtr = start + 5 + padding;
|
||||
|
||||
jit->js.generatingTrampoline = true;
|
||||
jit->js.trampolineExceptionHandler = exceptionHandler;
|
||||
|
||||
// Generate the trampoline.
|
||||
const u8* trampoline;
|
||||
if (info.isMemoryWrite)
|
||||
{
|
||||
// TODO: special case FIFO writes.
|
||||
auto it3 = pcAtLoc.find(codePtr);
|
||||
if (it3 == pcAtLoc.end())
|
||||
{
|
||||
PanicAlert("BackPatch: no pc entry for address %p", codePtr);
|
||||
return false;
|
||||
}
|
||||
const u8* trampoline = trampolines.GenerateTrampoline(info);
|
||||
jit->js.generatingTrampoline = false;
|
||||
jit->js.trampolineExceptionHandler = nullptr;
|
||||
|
||||
u32 pc = it3->second;
|
||||
trampoline =
|
||||
trampolines.GenerateWriteTrampoline(info, registersInUse, exceptionHandler, returnPtr, pc);
|
||||
}
|
||||
else
|
||||
{
|
||||
trampoline =
|
||||
trampolines.GenerateReadTrampoline(info, registersInUse, exceptionHandler, returnPtr);
|
||||
}
|
||||
u8* start = info.start;
|
||||
|
||||
// Patch the original memory operation.
|
||||
XEmitter emitter(start);
|
||||
emitter.JMP(trampoline, true);
|
||||
for (int i = 0; i < padding; ++i)
|
||||
// NOPs become dead code
|
||||
const u8* end = info.start + info.len;
|
||||
for (const u8* i = emitter.GetCodePtr(); i < end; ++i)
|
||||
emitter.INT3();
|
||||
ctx->CTX_PC = (u64)start;
|
||||
|
||||
// Rewind time to just before the start of the write block. If we swapped memory
|
||||
// before faulting (eg: the store+swap was not an atomic op like MOVBE), let's
|
||||
// swap it back so that the swap can happen again (this double swap isn't ideal but
|
||||
// only happens the first time we fault).
|
||||
if (info.nonAtomicSwapStoreSrc != INVALID_REG)
|
||||
{
|
||||
u64* ptr = ContextRN(ctx, info.nonAtomicSwapStoreSrc);
|
||||
switch (info.accessSize << 3)
|
||||
{
|
||||
case 8:
|
||||
// No need to swap a byte
|
||||
break;
|
||||
case 16:
|
||||
*ptr = Common::swap16(static_cast<u16>(*ptr));
|
||||
break;
|
||||
case 32:
|
||||
*ptr = Common::swap32(static_cast<u32>(*ptr));
|
||||
break;
|
||||
case 64:
|
||||
*ptr = Common::swap64(static_cast<u64>(*ptr));
|
||||
break;
|
||||
default:
|
||||
_dbg_assert_(DYNA_REC, 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// This is special code to undo the LEA in SafeLoadToReg if it clobbered the address
|
||||
// register in the case where reg_value shared the same location as opAddress.
|
||||
if (info.offsetAddedToAddress)
|
||||
{
|
||||
u64* ptr = ContextRN(ctx, info.op_arg.GetSimpleReg());
|
||||
*ptr -= static_cast<u32>(info.offset);
|
||||
}
|
||||
|
||||
ctx->CTX_PC = reinterpret_cast<u64>(trampoline);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -96,6 +96,9 @@ protected:
|
||||
bool carryFlagSet;
|
||||
bool carryFlagInverted;
|
||||
|
||||
bool generatingTrampoline;
|
||||
u8* trampolineExceptionHandler;
|
||||
|
||||
int fifoBytesThisBlock;
|
||||
|
||||
PPCAnalyst::BlockStats st;
|
||||
|
@ -18,6 +18,26 @@ using namespace Gen;
|
||||
|
||||
void EmuCodeBlock::MemoryExceptionCheck()
|
||||
{
|
||||
// TODO: We really should untangle the trampolines, exception handlers and
|
||||
// memory checks.
|
||||
|
||||
// If we are currently generating a trampoline for a failed fastmem
|
||||
// load/store, the trampoline generator will have stashed the exception
|
||||
// handler (that we previously generated after the fastmem instruction) in
|
||||
// trampolineExceptionHandler.
|
||||
if (jit->js.generatingTrampoline)
|
||||
{
|
||||
if (jit->js.trampolineExceptionHandler)
|
||||
{
|
||||
TEST(32, PPCSTATE(Exceptions), Gen::Imm32(EXCEPTION_DSI));
|
||||
J_CC(CC_NZ, jit->js.trampolineExceptionHandler);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// If memcheck (ie: MMU) mode is enabled and we haven't generated an
|
||||
// exception handler for this instruction yet, we will generate an
|
||||
// exception check.
|
||||
if (jit->jo.memcheck && !jit->js.fastmemLoadStore && !jit->js.fixupExceptionHandler)
|
||||
{
|
||||
TEST(32, PPCSTATE(Exceptions), Gen::Imm32(EXCEPTION_DSI));
|
||||
@ -42,10 +62,10 @@ void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, i
|
||||
MOVZX(32, accessSize, reg_value, MComplex(RMEM, reg_addr, SCALE_1, offset));
|
||||
}
|
||||
|
||||
u8* EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessSize, s32 offset,
|
||||
bool signExtend)
|
||||
bool EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessSize, s32 offset,
|
||||
bool signExtend, MovInfo* info)
|
||||
{
|
||||
u8* result;
|
||||
bool offsetAddedToAddress = false;
|
||||
OpArg memOperand;
|
||||
if (opAddress.IsSimpleReg())
|
||||
{
|
||||
@ -57,6 +77,11 @@ u8* EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessS
|
||||
// place to address the issue.)
|
||||
if ((u32)offset >= 0x1000)
|
||||
{
|
||||
// This method can potentially clobber the address if it shares a register
|
||||
// with the load target. In this case we can just subtract offset from the
|
||||
// register (see JitBackpatch for this implementation).
|
||||
offsetAddedToAddress = (reg_value == opAddress.GetSimpleReg());
|
||||
|
||||
LEA(32, reg_value, MDisp(opAddress.GetSimpleReg(), offset));
|
||||
opAddress = R(reg_value);
|
||||
offset = 0;
|
||||
@ -74,9 +99,8 @@ u8* EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessS
|
||||
memOperand = MComplex(RMEM, reg_value, SCALE_1, offset);
|
||||
}
|
||||
|
||||
result = GetWritableCodePtr();
|
||||
LoadAndSwap(accessSize, reg_value, memOperand, signExtend);
|
||||
return result;
|
||||
LoadAndSwap(accessSize, reg_value, memOperand, signExtend, info);
|
||||
return offsetAddedToAddress;
|
||||
}
|
||||
|
||||
// Visitor that generates code to read a MMIO value.
|
||||
@ -231,72 +255,43 @@ FixupBranch EmuCodeBlock::CheckIfSafeAddress(const OpArg& reg_value, X64Reg reg_
|
||||
void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize,
|
||||
s32 offset, BitSet32 registersInUse, bool signExtend, int flags)
|
||||
{
|
||||
registersInUse[reg_value] = false;
|
||||
if (jit->jo.fastmem && !opAddress.IsImm() &&
|
||||
!(flags & (SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_FASTMEM)))
|
||||
{
|
||||
u8* mov = UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend);
|
||||
bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0;
|
||||
|
||||
registersInUseAtLoc[mov] = registersInUse;
|
||||
jit->js.fastmemLoadStore = mov;
|
||||
registersInUse[reg_value] = false;
|
||||
if (jit->jo.fastmem && !(flags & SAFE_LOADSTORE_NO_FASTMEM) && !slowmem)
|
||||
{
|
||||
u8* backpatchStart = GetWritableCodePtr();
|
||||
MovInfo mov;
|
||||
bool offsetAddedToAddress =
|
||||
UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend, &mov);
|
||||
TrampolineInfo& info = backPatchInfo[mov.address];
|
||||
info.pc = jit->js.compilerPC;
|
||||
info.nonAtomicSwapStoreSrc = mov.nonAtomicSwapStore ? mov.nonAtomicSwapStoreSrc : INVALID_REG;
|
||||
info.start = backpatchStart;
|
||||
info.read = true;
|
||||
info.op_reg = reg_value;
|
||||
info.op_arg = opAddress;
|
||||
info.offsetAddedToAddress = offsetAddedToAddress;
|
||||
info.accessSize = accessSize >> 3;
|
||||
info.offset = offset;
|
||||
info.registersInUse = registersInUse;
|
||||
info.flags = flags;
|
||||
info.signExtend = signExtend;
|
||||
ptrdiff_t padding = BACKPATCH_SIZE - (GetCodePtr() - backpatchStart);
|
||||
if (padding > 0)
|
||||
{
|
||||
NOP(padding);
|
||||
}
|
||||
info.len = static_cast<u32>(GetCodePtr() - info.start);
|
||||
|
||||
jit->js.fastmemLoadStore = mov.address;
|
||||
return;
|
||||
}
|
||||
|
||||
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
|
||||
|
||||
// The following masks the region used by the GC/Wii virtual memory lib
|
||||
mem_mask |= Memory::ADDR_MASK_MEM1;
|
||||
|
||||
if (opAddress.IsImm())
|
||||
{
|
||||
u32 address = opAddress.Imm32() + offset;
|
||||
|
||||
// If the address is known to be RAM, just load it directly.
|
||||
if (PowerPC::IsOptimizableRAMAddress(address))
|
||||
{
|
||||
UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend);
|
||||
return;
|
||||
}
|
||||
|
||||
// If the address maps to an MMIO register, inline MMIO read code.
|
||||
u32 mmioAddress = PowerPC::IsOptimizableMMIOAccess(address, accessSize);
|
||||
if (accessSize != 64 && mmioAddress)
|
||||
{
|
||||
MMIOLoadToReg(Memory::mmio_mapping.get(), reg_value, registersInUse, mmioAddress, accessSize,
|
||||
signExtend);
|
||||
return;
|
||||
}
|
||||
|
||||
// Fall back to general-case code.
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
||||
switch (accessSize)
|
||||
{
|
||||
case 64:
|
||||
ABI_CallFunctionC((void*)&PowerPC::Read_U64, address);
|
||||
break;
|
||||
case 32:
|
||||
ABI_CallFunctionC((void*)&PowerPC::Read_U32, address);
|
||||
break;
|
||||
case 16:
|
||||
ABI_CallFunctionC((void*)&PowerPC::Read_U16_ZX, address);
|
||||
break;
|
||||
case 8:
|
||||
ABI_CallFunctionC((void*)&PowerPC::Read_U8_ZX, address);
|
||||
break;
|
||||
}
|
||||
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
|
||||
|
||||
MemoryExceptionCheck();
|
||||
if (signExtend && accessSize < 32)
|
||||
{
|
||||
// Need to sign extend values coming from the Read_U* functions.
|
||||
MOVSX(32, accessSize, reg_value, R(ABI_RETURN));
|
||||
}
|
||||
else if (reg_value != ABI_RETURN)
|
||||
{
|
||||
MOVZX(64, accessSize, reg_value, R(ABI_RETURN));
|
||||
}
|
||||
|
||||
SafeLoadToRegImmediate(reg_value, address, accessSize, registersInUse, signExtend);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -310,8 +305,13 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
|
||||
}
|
||||
|
||||
FixupBranch exit;
|
||||
if (!jit->jo.alwaysUseMemFuncs)
|
||||
if (!jit->jo.alwaysUseMemFuncs && !slowmem)
|
||||
{
|
||||
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
|
||||
|
||||
// The following masks the region used by the GC/Wii virtual memory lib
|
||||
mem_mask |= Memory::ADDR_MASK_MEM1;
|
||||
|
||||
FixupBranch slow = CheckIfSafeAddress(R(reg_value), reg_addr, registersInUse, mem_mask);
|
||||
UnsafeLoadToReg(reg_value, R(reg_addr), accessSize, 0, signExtend);
|
||||
if (farcode.Enabled())
|
||||
@ -350,7 +350,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
|
||||
MOVZX(64, accessSize, reg_value, R(ABI_RETURN));
|
||||
}
|
||||
|
||||
if (!jit->jo.alwaysUseMemFuncs)
|
||||
if (!jit->jo.alwaysUseMemFuncs && !slowmem)
|
||||
{
|
||||
if (farcode.Enabled())
|
||||
{
|
||||
@ -361,6 +361,56 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
|
||||
}
|
||||
}
|
||||
|
||||
void EmuCodeBlock::SafeLoadToRegImmediate(X64Reg reg_value, u32 address, int accessSize,
|
||||
BitSet32 registersInUse, bool signExtend)
|
||||
{
|
||||
// If the address is known to be RAM, just load it directly.
|
||||
if (PowerPC::IsOptimizableRAMAddress(address))
|
||||
{
|
||||
UnsafeLoadToReg(reg_value, Imm32(address), accessSize, 0, signExtend);
|
||||
return;
|
||||
}
|
||||
|
||||
// If the address maps to an MMIO register, inline MMIO read code.
|
||||
u32 mmioAddress = PowerPC::IsOptimizableMMIOAccess(address, accessSize);
|
||||
if (accessSize != 64 && mmioAddress)
|
||||
{
|
||||
MMIOLoadToReg(Memory::mmio_mapping.get(), reg_value, registersInUse, mmioAddress, accessSize,
|
||||
signExtend);
|
||||
return;
|
||||
}
|
||||
|
||||
// Fall back to general-case code.
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
||||
switch (accessSize)
|
||||
{
|
||||
case 64:
|
||||
ABI_CallFunctionC(reinterpret_cast<void*>(&PowerPC::Read_U64), address);
|
||||
break;
|
||||
case 32:
|
||||
ABI_CallFunctionC(reinterpret_cast<void*>(&PowerPC::Read_U32), address);
|
||||
break;
|
||||
case 16:
|
||||
ABI_CallFunctionC(reinterpret_cast<void*>(&PowerPC::Read_U16_ZX), address);
|
||||
break;
|
||||
case 8:
|
||||
ABI_CallFunctionC(reinterpret_cast<void*>(&PowerPC::Read_U8_ZX), address);
|
||||
break;
|
||||
}
|
||||
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
|
||||
|
||||
MemoryExceptionCheck();
|
||||
if (signExtend && accessSize < 32)
|
||||
{
|
||||
// Need to sign extend values coming from the Read_U* functions.
|
||||
MOVSX(32, accessSize, reg_value, R(ABI_RETURN));
|
||||
}
|
||||
else if (reg_value != ABI_RETURN)
|
||||
{
|
||||
MOVZX(64, accessSize, reg_value, R(ABI_RETURN));
|
||||
}
|
||||
}
|
||||
|
||||
static OpArg SwapImmediate(int accessSize, const OpArg& reg_value)
|
||||
{
|
||||
if (accessSize == 32)
|
||||
@ -371,10 +421,15 @@ static OpArg SwapImmediate(int accessSize, const OpArg& reg_value)
|
||||
return Imm8(reg_value.Imm8());
|
||||
}
|
||||
|
||||
u8* EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset,
|
||||
bool swap)
|
||||
void EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset,
|
||||
bool swap, MovInfo* info)
|
||||
{
|
||||
u8* result = GetWritableCodePtr();
|
||||
if (info)
|
||||
{
|
||||
info->address = GetWritableCodePtr();
|
||||
info->nonAtomicSwapStore = false;
|
||||
}
|
||||
|
||||
OpArg dest = MComplex(RMEM, reg_addr, SCALE_1, offset);
|
||||
if (reg_value.IsImm())
|
||||
{
|
||||
@ -384,22 +439,19 @@ u8* EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acce
|
||||
}
|
||||
else if (swap)
|
||||
{
|
||||
result = SwapAndStore(accessSize, dest, reg_value.GetSimpleReg());
|
||||
SwapAndStore(accessSize, dest, reg_value.GetSimpleReg(), info);
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV(accessSize, dest, reg_value);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static OpArg FixImmediate(int accessSize, OpArg arg)
|
||||
{
|
||||
if (arg.IsImm())
|
||||
{
|
||||
arg = accessSize == 8 ? Imm8((u8)arg.Imm32()) : accessSize == 16 ? Imm16((u16)arg.Imm32()) :
|
||||
Imm32((u32)arg.Imm32());
|
||||
arg = accessSize == 8 ? arg.AsImm8() : accessSize == 16 ? arg.AsImm16() : arg.AsImm32();
|
||||
}
|
||||
return arg;
|
||||
}
|
||||
@ -475,25 +527,38 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address,
|
||||
void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset,
|
||||
BitSet32 registersInUse, int flags)
|
||||
{
|
||||
bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP);
|
||||
bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0;
|
||||
|
||||
// set the correct immediate format
|
||||
reg_value = FixImmediate(accessSize, reg_value);
|
||||
|
||||
// TODO: support byte-swapped non-immediate fastmem stores
|
||||
if (jit->jo.fastmem && !(flags & SAFE_LOADSTORE_NO_FASTMEM) &&
|
||||
(reg_value.IsImm() || !(flags & SAFE_LOADSTORE_NO_SWAP)))
|
||||
if (jit->jo.fastmem && !(flags & SAFE_LOADSTORE_NO_FASTMEM) && !slowmem)
|
||||
{
|
||||
const u8* backpatchStart = GetCodePtr();
|
||||
u8* mov = UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset,
|
||||
!(flags & SAFE_LOADSTORE_NO_SWAP));
|
||||
u8* backpatchStart = GetWritableCodePtr();
|
||||
MovInfo mov;
|
||||
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset, swap, &mov);
|
||||
TrampolineInfo& info = backPatchInfo[mov.address];
|
||||
info.pc = jit->js.compilerPC;
|
||||
info.nonAtomicSwapStoreSrc = mov.nonAtomicSwapStore ? mov.nonAtomicSwapStoreSrc : INVALID_REG;
|
||||
info.start = backpatchStart;
|
||||
info.read = false;
|
||||
info.op_arg = reg_value;
|
||||
info.op_reg = reg_addr;
|
||||
info.offsetAddedToAddress = false;
|
||||
info.accessSize = accessSize >> 3;
|
||||
info.offset = offset;
|
||||
info.registersInUse = registersInUse;
|
||||
info.flags = flags;
|
||||
ptrdiff_t padding = BACKPATCH_SIZE - (GetCodePtr() - backpatchStart);
|
||||
if (padding > 0)
|
||||
{
|
||||
NOP(padding);
|
||||
}
|
||||
info.len = static_cast<u32>(GetCodePtr() - info.start);
|
||||
|
||||
jit->js.fastmemLoadStore = mov.address;
|
||||
|
||||
registersInUseAtLoc[mov] = registersInUse;
|
||||
pcAtLoc[mov] = jit->js.compilerPC;
|
||||
jit->js.fastmemLoadStore = mov;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -510,21 +575,22 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
|
||||
}
|
||||
}
|
||||
|
||||
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
|
||||
|
||||
// The following masks the region used by the GC/Wii virtual memory lib
|
||||
mem_mask |= Memory::ADDR_MASK_MEM1;
|
||||
|
||||
bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP);
|
||||
|
||||
FixupBranch slow, exit;
|
||||
slow = CheckIfSafeAddress(reg_value, reg_addr, registersInUse, mem_mask);
|
||||
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap);
|
||||
if (farcode.Enabled())
|
||||
SwitchToFarCode();
|
||||
else
|
||||
exit = J(true);
|
||||
SetJumpTarget(slow);
|
||||
if (!slowmem)
|
||||
{
|
||||
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
|
||||
|
||||
// The following masks the region used by the GC/Wii virtual memory lib
|
||||
mem_mask |= Memory::ADDR_MASK_MEM1;
|
||||
|
||||
slow = CheckIfSafeAddress(reg_value, reg_addr, registersInUse, mem_mask);
|
||||
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap);
|
||||
if (farcode.Enabled())
|
||||
SwitchToFarCode();
|
||||
else
|
||||
exit = J(true);
|
||||
SetJumpTarget(slow);
|
||||
}
|
||||
|
||||
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
|
||||
MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC));
|
||||
@ -563,12 +629,18 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
|
||||
break;
|
||||
}
|
||||
ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment);
|
||||
if (farcode.Enabled())
|
||||
|
||||
MemoryExceptionCheck();
|
||||
|
||||
if (!slowmem)
|
||||
{
|
||||
exit = J(true);
|
||||
SwitchToNearCode();
|
||||
if (farcode.Enabled())
|
||||
{
|
||||
exit = J(true);
|
||||
SwitchToNearCode();
|
||||
}
|
||||
SetJumpTarget(exit);
|
||||
}
|
||||
SetJumpTarget(exit);
|
||||
}
|
||||
|
||||
void EmuCodeBlock::WriteToConstRamAddress(int accessSize, OpArg arg, u32 address, bool swap)
|
||||
@ -1055,7 +1127,6 @@ void EmuCodeBlock::JitClearCA()
|
||||
|
||||
void EmuCodeBlock::Clear()
|
||||
{
|
||||
registersInUseAtLoc.clear();
|
||||
pcAtLoc.clear();
|
||||
backPatchInfo.clear();
|
||||
exceptionHandlerAtLoc.clear();
|
||||
}
|
||||
|
@ -59,6 +59,47 @@ static const int FARCODE_SIZE_MMU = 1024 * 1024 * 48;
|
||||
static const int TRAMPOLINE_CODE_SIZE = 1024 * 1024 * 8;
|
||||
static const int TRAMPOLINE_CODE_SIZE_MMU = 1024 * 1024 * 32;
|
||||
|
||||
// Stores information we need to batch-patch a MOV with a call to the slow read/write path after
|
||||
// it faults. There will be 10s of thousands of these structs live, so be wary of making this too
|
||||
// big.
|
||||
struct TrampolineInfo final
|
||||
{
|
||||
// The start of the store operation that failed -- we will patch a JMP here
|
||||
u8* start;
|
||||
|
||||
// The start + len = end of the store operation (points to the next instruction)
|
||||
u32 len;
|
||||
|
||||
// The PPC PC for the current load/store block
|
||||
u32 pc;
|
||||
|
||||
// Saved because we need these to make the ABI call in the trampoline
|
||||
BitSet32 registersInUse;
|
||||
|
||||
// The MOV operation
|
||||
Gen::X64Reg nonAtomicSwapStoreSrc;
|
||||
|
||||
// src/dest for load/store
|
||||
s32 offset;
|
||||
Gen::X64Reg op_reg;
|
||||
Gen::OpArg op_arg;
|
||||
|
||||
// Original SafeLoadXXX/SafeStoreXXX flags
|
||||
u8 flags;
|
||||
|
||||
// Memory access size (in bytes)
|
||||
u8 accessSize : 4;
|
||||
|
||||
// true if this is a read op vs a write
|
||||
bool read : 1;
|
||||
|
||||
// for read operations, true if needs sign-extension after load
|
||||
bool signExtend : 1;
|
||||
|
||||
// Set to true if we added the offset to the address and need to undo it
|
||||
bool offsetAddedToAddress : 1;
|
||||
};
|
||||
|
||||
// Like XCodeBlock but has some utilities for memory access.
|
||||
class EmuCodeBlock : public Gen::X64CodeBlock
|
||||
{
|
||||
@ -88,15 +129,15 @@ public:
|
||||
void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize,
|
||||
s32 offset, bool signExtend = false);
|
||||
// these return the address of the MOV, for backpatching
|
||||
u8* UnsafeWriteRegToReg(Gen::OpArg reg_value, Gen::X64Reg reg_addr, int accessSize,
|
||||
s32 offset = 0, bool swap = true);
|
||||
u8* UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize,
|
||||
s32 offset = 0, bool swap = true)
|
||||
void UnsafeWriteRegToReg(Gen::OpArg reg_value, Gen::X64Reg reg_addr, int accessSize,
|
||||
s32 offset = 0, bool swap = true, Gen::MovInfo* info = nullptr);
|
||||
void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize,
|
||||
s32 offset = 0, bool swap = true, Gen::MovInfo* info = nullptr)
|
||||
{
|
||||
return UnsafeWriteRegToReg(R(reg_value), reg_addr, accessSize, offset, swap);
|
||||
UnsafeWriteRegToReg(R(reg_value), reg_addr, accessSize, offset, swap, info);
|
||||
}
|
||||
u8* UnsafeLoadToReg(Gen::X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset,
|
||||
bool signExtend);
|
||||
bool UnsafeLoadToReg(Gen::X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset,
|
||||
bool signExtend, Gen::MovInfo* info = nullptr);
|
||||
void UnsafeWriteGatherPipe(int accessSize);
|
||||
|
||||
// Generate a load/write from the MMIO handler for a given address. Only
|
||||
@ -108,12 +149,18 @@ public:
|
||||
{
|
||||
SAFE_LOADSTORE_NO_SWAP = 1,
|
||||
SAFE_LOADSTORE_NO_PROLOG = 2,
|
||||
// This indicates that the write being generated cannot be patched (and thus can't use fastmem)
|
||||
SAFE_LOADSTORE_NO_FASTMEM = 4,
|
||||
SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR = 8
|
||||
SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR = 8,
|
||||
// Force slowmem (used when generating fallbacks in trampolines)
|
||||
SAFE_LOADSTORE_FORCE_SLOWMEM = 16,
|
||||
};
|
||||
|
||||
void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize, s32 offset,
|
||||
BitSet32 registersInUse, bool signExtend, int flags = 0);
|
||||
void SafeLoadToRegImmediate(Gen::X64Reg reg_value, u32 address, int accessSize,
|
||||
BitSet32 registersInUse, bool signExtend);
|
||||
|
||||
// Clobbers RSCRATCH or reg_addr depending on the relevant flag. Preserves
|
||||
// reg_value if the load fails and js.memcheck is enabled.
|
||||
// Works with immediate inputs and simple registers only.
|
||||
@ -158,7 +205,6 @@ public:
|
||||
void Clear();
|
||||
|
||||
protected:
|
||||
std::unordered_map<u8*, BitSet32> registersInUseAtLoc;
|
||||
std::unordered_map<u8*, u32> pcAtLoc;
|
||||
std::unordered_map<u8*, TrampolineInfo> backPatchInfo;
|
||||
std::unordered_map<u8*, u8*> exceptionHandlerAtLoc;
|
||||
};
|
||||
|
@ -9,7 +9,6 @@
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/JitRegister.h"
|
||||
#include "Common/x64ABI.h"
|
||||
#include "Common/x64Analyzer.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
#include "Core/PowerPC/JitCommon/JitBase.h"
|
||||
#include "Core/PowerPC/JitCommon/Jit_Util.h"
|
||||
@ -37,150 +36,50 @@ void TrampolineCache::Shutdown()
|
||||
FreeCodeSpace();
|
||||
}
|
||||
|
||||
const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo& info,
|
||||
BitSet32 registersInUse, u8* exceptionHandler,
|
||||
u8* returnPtr)
|
||||
const u8* TrampolineCache::GenerateTrampoline(const TrampolineInfo& info)
|
||||
{
|
||||
if (info.read)
|
||||
{
|
||||
return GenerateReadTrampoline(info);
|
||||
}
|
||||
|
||||
return GenerateWriteTrampoline(info);
|
||||
}
|
||||
|
||||
const u8* TrampolineCache::GenerateReadTrampoline(const TrampolineInfo& info)
|
||||
{
|
||||
if (GetSpaceLeft() < 1024)
|
||||
PanicAlert("Trampoline cache full");
|
||||
|
||||
const u8* trampoline = GetCodePtr();
|
||||
X64Reg addrReg = (X64Reg)info.scaledReg;
|
||||
X64Reg dataReg = (X64Reg)info.regOperandReg;
|
||||
int stack_offset = 0;
|
||||
bool push_param1 = registersInUse[ABI_PARAM1];
|
||||
|
||||
if (push_param1)
|
||||
{
|
||||
PUSH(ABI_PARAM1);
|
||||
stack_offset = 8;
|
||||
registersInUse[ABI_PARAM1] = 0;
|
||||
}
|
||||
SafeLoadToReg(info.op_reg, info.op_arg, info.accessSize << 3, info.offset, info.registersInUse,
|
||||
info.signExtend, info.flags | SAFE_LOADSTORE_FORCE_SLOWMEM);
|
||||
|
||||
int dataRegSize = info.operandSize == 8 ? 64 : 32;
|
||||
if (addrReg != ABI_PARAM1 && info.displacement)
|
||||
LEA(32, ABI_PARAM1, MDisp(addrReg, info.displacement));
|
||||
else if (addrReg != ABI_PARAM1)
|
||||
MOV(32, R(ABI_PARAM1), R(addrReg));
|
||||
else if (info.displacement)
|
||||
ADD(32, R(ABI_PARAM1), Imm32(info.displacement));
|
||||
JMP(info.start + info.len, true);
|
||||
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, stack_offset);
|
||||
|
||||
switch (info.operandSize)
|
||||
{
|
||||
case 8:
|
||||
CALL((void*)&PowerPC::Read_U64);
|
||||
break;
|
||||
case 4:
|
||||
CALL((void*)&PowerPC::Read_U32);
|
||||
break;
|
||||
case 2:
|
||||
CALL((void*)&PowerPC::Read_U16);
|
||||
break;
|
||||
case 1:
|
||||
CALL((void*)&PowerPC::Read_U8);
|
||||
break;
|
||||
}
|
||||
|
||||
ABI_PopRegistersAndAdjustStack(registersInUse, stack_offset);
|
||||
|
||||
if (push_param1)
|
||||
POP(ABI_PARAM1);
|
||||
|
||||
if (exceptionHandler)
|
||||
{
|
||||
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
|
||||
J_CC(CC_NZ, exceptionHandler);
|
||||
}
|
||||
|
||||
if (info.signExtend)
|
||||
MOVSX(dataRegSize, info.operandSize * 8, dataReg, R(ABI_RETURN));
|
||||
else if (dataReg != ABI_RETURN || info.operandSize < 4)
|
||||
MOVZX(dataRegSize, info.operandSize * 8, dataReg, R(ABI_RETURN));
|
||||
|
||||
JMP(returnPtr, true);
|
||||
|
||||
JitRegister::Register(trampoline, GetCodePtr(), "JIT_ReadTrampoline");
|
||||
JitRegister::Register(trampoline, GetCodePtr(), "JIT_ReadTrampoline_%x", info.pc);
|
||||
return trampoline;
|
||||
}
|
||||
|
||||
const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo& info,
|
||||
BitSet32 registersInUse, u8* exceptionHandler,
|
||||
u8* returnPtr, u32 pc)
|
||||
const u8* TrampolineCache::GenerateWriteTrampoline(const TrampolineInfo& info)
|
||||
{
|
||||
if (GetSpaceLeft() < 1024)
|
||||
PanicAlert("Trampoline cache full");
|
||||
|
||||
const u8* trampoline = GetCodePtr();
|
||||
|
||||
X64Reg dataReg = (X64Reg)info.regOperandReg;
|
||||
X64Reg addrReg = (X64Reg)info.scaledReg;
|
||||
|
||||
// Don't treat FIFO writes specially for now because they require a burst
|
||||
// check anyway.
|
||||
|
||||
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
|
||||
MOV(32, PPCSTATE(pc), Imm32(pc));
|
||||
MOV(32, PPCSTATE(pc), Imm32(info.pc));
|
||||
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
||||
SafeWriteRegToReg(info.op_arg, info.op_reg, info.accessSize << 3, info.offset,
|
||||
info.registersInUse, info.flags | SAFE_LOADSTORE_FORCE_SLOWMEM);
|
||||
|
||||
if (info.hasImmediate)
|
||||
{
|
||||
if (addrReg != ABI_PARAM2 && info.displacement)
|
||||
LEA(32, ABI_PARAM2, MDisp(addrReg, info.displacement));
|
||||
else if (addrReg != ABI_PARAM2)
|
||||
MOV(32, R(ABI_PARAM2), R(addrReg));
|
||||
else if (info.displacement)
|
||||
ADD(32, R(ABI_PARAM2), Imm32(info.displacement));
|
||||
JMP(info.start + info.len, true);
|
||||
|
||||
// we have to swap back the immediate to pass it to the write functions
|
||||
switch (info.operandSize)
|
||||
{
|
||||
case 8:
|
||||
PanicAlert("Invalid 64-bit immediate!");
|
||||
break;
|
||||
case 4:
|
||||
MOV(32, R(ABI_PARAM1), Imm32(Common::swap32((u32)info.immediate)));
|
||||
break;
|
||||
case 2:
|
||||
MOV(16, R(ABI_PARAM1), Imm16(Common::swap16((u16)info.immediate)));
|
||||
break;
|
||||
case 1:
|
||||
MOV(8, R(ABI_PARAM1), Imm8((u8)info.immediate));
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int dataRegSize = info.operandSize == 8 ? 64 : 32;
|
||||
MOVTwo(dataRegSize, ABI_PARAM2, addrReg, info.displacement, ABI_PARAM1, dataReg);
|
||||
}
|
||||
|
||||
switch (info.operandSize)
|
||||
{
|
||||
case 8:
|
||||
CALL((void*)&PowerPC::Write_U64);
|
||||
break;
|
||||
case 4:
|
||||
CALL((void*)&PowerPC::Write_U32);
|
||||
break;
|
||||
case 2:
|
||||
CALL((void*)&PowerPC::Write_U16);
|
||||
break;
|
||||
case 1:
|
||||
CALL((void*)&PowerPC::Write_U8);
|
||||
break;
|
||||
}
|
||||
|
||||
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
|
||||
if (exceptionHandler)
|
||||
{
|
||||
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
|
||||
J_CC(CC_NZ, exceptionHandler);
|
||||
}
|
||||
JMP(returnPtr, true);
|
||||
|
||||
JitRegister::Register(trampoline, GetCodePtr(), "JIT_WriteTrampoline_%x", pc);
|
||||
JitRegister::Register(trampoline, GetCodePtr(), "JIT_WriteTrampoline_%x", info.pc);
|
||||
return trampoline;
|
||||
}
|
||||
|
@ -7,21 +7,21 @@
|
||||
#include "Common/BitSet.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
#include "Core/PowerPC/JitCommon/Jit_Util.h"
|
||||
|
||||
struct InstructionInfo;
|
||||
|
||||
// We need at least this many bytes for backpatching.
|
||||
const int BACKPATCH_SIZE = 5;
|
||||
|
||||
class TrampolineCache : public Gen::X64CodeBlock
|
||||
class TrampolineCache : public EmuCodeBlock
|
||||
{
|
||||
const u8* GenerateReadTrampoline(const TrampolineInfo& info);
|
||||
const u8* GenerateWriteTrampoline(const TrampolineInfo& info);
|
||||
|
||||
public:
|
||||
void Init(int size);
|
||||
void Shutdown();
|
||||
|
||||
const u8* GenerateReadTrampoline(const InstructionInfo& info, BitSet32 registersInUse,
|
||||
u8* exceptionHandler, u8* returnPtr);
|
||||
const u8* GenerateWriteTrampoline(const InstructionInfo& info, BitSet32 registersInUse,
|
||||
u8* exceptionHandler, u8* returnPtr, u32 pc);
|
||||
const u8* GenerateTrampoline(const TrampolineInfo& info);
|
||||
void ClearCodeSpace();
|
||||
};
|
||||
|
Reference in New Issue
Block a user