Reformat all the things. Have fun with merge conflicts.

This commit is contained in:
Pierre Bourdon
2016-06-24 10:43:46 +02:00
parent 2115e8a4a6
commit 3570c7f03a
1116 changed files with 187405 additions and 180344 deletions

View File

@ -4,63 +4,77 @@
#include "Core/PowerPC/JitCommon/JitAsmCommon.h"
alignas(16) const u8 pbswapShuffle1x4[16] = { 3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
alignas(16) const u8 pbswapShuffle2x4[16] = { 3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15 };
alignas(16) const u8 pbswapShuffle1x4[16] = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
alignas(16) const u8 pbswapShuffle2x4[16] = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
alignas(16) const float m_quantizeTableS[] =
{
(1ULL << 0), (1ULL << 0), (1ULL << 1), (1ULL << 1), (1ULL << 2), (1ULL << 2), (1ULL << 3), (1ULL << 3),
(1ULL << 4), (1ULL << 4), (1ULL << 5), (1ULL << 5), (1ULL << 6), (1ULL << 6), (1ULL << 7), (1ULL << 7),
(1ULL << 8), (1ULL << 8), (1ULL << 9), (1ULL << 9), (1ULL << 10), (1ULL << 10), (1ULL << 11), (1ULL << 11),
(1ULL << 12), (1ULL << 12), (1ULL << 13), (1ULL << 13), (1ULL << 14), (1ULL << 14), (1ULL << 15), (1ULL << 15),
(1ULL << 16), (1ULL << 16), (1ULL << 17), (1ULL << 17), (1ULL << 18), (1ULL << 18), (1ULL << 19), (1ULL << 19),
(1ULL << 20), (1ULL << 20), (1ULL << 21), (1ULL << 21), (1ULL << 22), (1ULL << 22), (1ULL << 23), (1ULL << 23),
(1ULL << 24), (1ULL << 24), (1ULL << 25), (1ULL << 25), (1ULL << 26), (1ULL << 26), (1ULL << 27), (1ULL << 27),
(1ULL << 28), (1ULL << 28), (1ULL << 29), (1ULL << 29), (1ULL << 30), (1ULL << 30), (1ULL << 31), (1ULL << 31),
1.0 / (1ULL << 32), 1.0 / (1ULL << 32), 1.0 / (1ULL << 31), 1.0 / (1ULL << 31),
1.0 / (1ULL << 30), 1.0 / (1ULL << 30), 1.0 / (1ULL << 29), 1.0 / (1ULL << 29),
1.0 / (1ULL << 28), 1.0 / (1ULL << 28), 1.0 / (1ULL << 27), 1.0 / (1ULL << 27),
1.0 / (1ULL << 26), 1.0 / (1ULL << 26), 1.0 / (1ULL << 25), 1.0 / (1ULL << 25),
1.0 / (1ULL << 24), 1.0 / (1ULL << 24), 1.0 / (1ULL << 23), 1.0 / (1ULL << 23),
1.0 / (1ULL << 22), 1.0 / (1ULL << 22), 1.0 / (1ULL << 21), 1.0 / (1ULL << 21),
1.0 / (1ULL << 20), 1.0 / (1ULL << 20), 1.0 / (1ULL << 19), 1.0 / (1ULL << 19),
1.0 / (1ULL << 18), 1.0 / (1ULL << 18), 1.0 / (1ULL << 17), 1.0 / (1ULL << 17),
1.0 / (1ULL << 16), 1.0 / (1ULL << 16), 1.0 / (1ULL << 15), 1.0 / (1ULL << 15),
1.0 / (1ULL << 14), 1.0 / (1ULL << 14), 1.0 / (1ULL << 13), 1.0 / (1ULL << 13),
1.0 / (1ULL << 12), 1.0 / (1ULL << 12), 1.0 / (1ULL << 11), 1.0 / (1ULL << 11),
1.0 / (1ULL << 10), 1.0 / (1ULL << 10), 1.0 / (1ULL << 9), 1.0 / (1ULL << 9),
1.0 / (1ULL << 8), 1.0 / (1ULL << 8), 1.0 / (1ULL << 7), 1.0 / (1ULL << 7),
1.0 / (1ULL << 6), 1.0 / (1ULL << 6), 1.0 / (1ULL << 5), 1.0 / (1ULL << 5),
1.0 / (1ULL << 4), 1.0 / (1ULL << 4), 1.0 / (1ULL << 3), 1.0 / (1ULL << 3),
1.0 / (1ULL << 2), 1.0 / (1ULL << 2), 1.0 / (1ULL << 1), 1.0 / (1ULL << 1),
alignas(16) const float m_quantizeTableS[] = {
(1ULL << 0), (1ULL << 0), (1ULL << 1), (1ULL << 1),
(1ULL << 2), (1ULL << 2), (1ULL << 3), (1ULL << 3),
(1ULL << 4), (1ULL << 4), (1ULL << 5), (1ULL << 5),
(1ULL << 6), (1ULL << 6), (1ULL << 7), (1ULL << 7),
(1ULL << 8), (1ULL << 8), (1ULL << 9), (1ULL << 9),
(1ULL << 10), (1ULL << 10), (1ULL << 11), (1ULL << 11),
(1ULL << 12), (1ULL << 12), (1ULL << 13), (1ULL << 13),
(1ULL << 14), (1ULL << 14), (1ULL << 15), (1ULL << 15),
(1ULL << 16), (1ULL << 16), (1ULL << 17), (1ULL << 17),
(1ULL << 18), (1ULL << 18), (1ULL << 19), (1ULL << 19),
(1ULL << 20), (1ULL << 20), (1ULL << 21), (1ULL << 21),
(1ULL << 22), (1ULL << 22), (1ULL << 23), (1ULL << 23),
(1ULL << 24), (1ULL << 24), (1ULL << 25), (1ULL << 25),
(1ULL << 26), (1ULL << 26), (1ULL << 27), (1ULL << 27),
(1ULL << 28), (1ULL << 28), (1ULL << 29), (1ULL << 29),
(1ULL << 30), (1ULL << 30), (1ULL << 31), (1ULL << 31),
1.0 / (1ULL << 32), 1.0 / (1ULL << 32), 1.0 / (1ULL << 31), 1.0 / (1ULL << 31),
1.0 / (1ULL << 30), 1.0 / (1ULL << 30), 1.0 / (1ULL << 29), 1.0 / (1ULL << 29),
1.0 / (1ULL << 28), 1.0 / (1ULL << 28), 1.0 / (1ULL << 27), 1.0 / (1ULL << 27),
1.0 / (1ULL << 26), 1.0 / (1ULL << 26), 1.0 / (1ULL << 25), 1.0 / (1ULL << 25),
1.0 / (1ULL << 24), 1.0 / (1ULL << 24), 1.0 / (1ULL << 23), 1.0 / (1ULL << 23),
1.0 / (1ULL << 22), 1.0 / (1ULL << 22), 1.0 / (1ULL << 21), 1.0 / (1ULL << 21),
1.0 / (1ULL << 20), 1.0 / (1ULL << 20), 1.0 / (1ULL << 19), 1.0 / (1ULL << 19),
1.0 / (1ULL << 18), 1.0 / (1ULL << 18), 1.0 / (1ULL << 17), 1.0 / (1ULL << 17),
1.0 / (1ULL << 16), 1.0 / (1ULL << 16), 1.0 / (1ULL << 15), 1.0 / (1ULL << 15),
1.0 / (1ULL << 14), 1.0 / (1ULL << 14), 1.0 / (1ULL << 13), 1.0 / (1ULL << 13),
1.0 / (1ULL << 12), 1.0 / (1ULL << 12), 1.0 / (1ULL << 11), 1.0 / (1ULL << 11),
1.0 / (1ULL << 10), 1.0 / (1ULL << 10), 1.0 / (1ULL << 9), 1.0 / (1ULL << 9),
1.0 / (1ULL << 8), 1.0 / (1ULL << 8), 1.0 / (1ULL << 7), 1.0 / (1ULL << 7),
1.0 / (1ULL << 6), 1.0 / (1ULL << 6), 1.0 / (1ULL << 5), 1.0 / (1ULL << 5),
1.0 / (1ULL << 4), 1.0 / (1ULL << 4), 1.0 / (1ULL << 3), 1.0 / (1ULL << 3),
1.0 / (1ULL << 2), 1.0 / (1ULL << 2), 1.0 / (1ULL << 1), 1.0 / (1ULL << 1),
};
alignas(16) const float m_dequantizeTableS[] =
{
1.0 / (1ULL << 0), 1.0 / (1ULL << 0), 1.0 / (1ULL << 1), 1.0 / (1ULL << 1),
1.0 / (1ULL << 2), 1.0 / (1ULL << 2), 1.0 / (1ULL << 3), 1.0 / (1ULL << 3),
1.0 / (1ULL << 4), 1.0 / (1ULL << 4), 1.0 / (1ULL << 5), 1.0 / (1ULL << 5),
1.0 / (1ULL << 6), 1.0 / (1ULL << 6), 1.0 / (1ULL << 7), 1.0 / (1ULL << 7),
1.0 / (1ULL << 8), 1.0 / (1ULL << 8), 1.0 / (1ULL << 9), 1.0 / (1ULL << 9),
1.0 / (1ULL << 10), 1.0 / (1ULL << 10), 1.0 / (1ULL << 11), 1.0 / (1ULL << 11),
1.0 / (1ULL << 12), 1.0 / (1ULL << 12), 1.0 / (1ULL << 13), 1.0 / (1ULL << 13),
1.0 / (1ULL << 14), 1.0 / (1ULL << 14), 1.0 / (1ULL << 15), 1.0 / (1ULL << 15),
1.0 / (1ULL << 16), 1.0 / (1ULL << 16), 1.0 / (1ULL << 17), 1.0 / (1ULL << 17),
1.0 / (1ULL << 18), 1.0 / (1ULL << 18), 1.0 / (1ULL << 19), 1.0 / (1ULL << 19),
1.0 / (1ULL << 20), 1.0 / (1ULL << 20), 1.0 / (1ULL << 21), 1.0 / (1ULL << 21),
1.0 / (1ULL << 22), 1.0 / (1ULL << 22), 1.0 / (1ULL << 23), 1.0 / (1ULL << 23),
1.0 / (1ULL << 24), 1.0 / (1ULL << 24), 1.0 / (1ULL << 25), 1.0 / (1ULL << 25),
1.0 / (1ULL << 26), 1.0 / (1ULL << 26), 1.0 / (1ULL << 27), 1.0 / (1ULL << 27),
1.0 / (1ULL << 28), 1.0 / (1ULL << 28), 1.0 / (1ULL << 29), 1.0 / (1ULL << 29),
1.0 / (1ULL << 30), 1.0 / (1ULL << 30), 1.0 / (1ULL << 31), 1.0 / (1ULL << 31),
(1ULL << 32), (1ULL << 32), (1ULL << 31), (1ULL << 31), (1ULL << 30), (1ULL << 30), (1ULL << 29), (1ULL << 29),
(1ULL << 28), (1ULL << 28), (1ULL << 27), (1ULL << 27), (1ULL << 26), (1ULL << 26), (1ULL << 25), (1ULL << 25),
(1ULL << 24), (1ULL << 24), (1ULL << 23), (1ULL << 23), (1ULL << 22), (1ULL << 22), (1ULL << 21), (1ULL << 21),
(1ULL << 20), (1ULL << 20), (1ULL << 19), (1ULL << 19), (1ULL << 18), (1ULL << 18), (1ULL << 17), (1ULL << 17),
(1ULL << 16), (1ULL << 16), (1ULL << 15), (1ULL << 15), (1ULL << 14), (1ULL << 14), (1ULL << 13), (1ULL << 13),
(1ULL << 12), (1ULL << 12), (1ULL << 11), (1ULL << 11), (1ULL << 10), (1ULL << 10), (1ULL << 9), (1ULL << 9),
(1ULL << 8), (1ULL << 8), (1ULL << 7), (1ULL << 7), (1ULL << 6), (1ULL << 6), (1ULL << 5), (1ULL << 5),
(1ULL << 4), (1ULL << 4), (1ULL << 3), (1ULL << 3), (1ULL << 2), (1ULL << 2), (1ULL << 1), (1ULL << 1),
alignas(16) const float m_dequantizeTableS[] = {
1.0 / (1ULL << 0), 1.0 / (1ULL << 0), 1.0 / (1ULL << 1), 1.0 / (1ULL << 1),
1.0 / (1ULL << 2), 1.0 / (1ULL << 2), 1.0 / (1ULL << 3), 1.0 / (1ULL << 3),
1.0 / (1ULL << 4), 1.0 / (1ULL << 4), 1.0 / (1ULL << 5), 1.0 / (1ULL << 5),
1.0 / (1ULL << 6), 1.0 / (1ULL << 6), 1.0 / (1ULL << 7), 1.0 / (1ULL << 7),
1.0 / (1ULL << 8), 1.0 / (1ULL << 8), 1.0 / (1ULL << 9), 1.0 / (1ULL << 9),
1.0 / (1ULL << 10), 1.0 / (1ULL << 10), 1.0 / (1ULL << 11), 1.0 / (1ULL << 11),
1.0 / (1ULL << 12), 1.0 / (1ULL << 12), 1.0 / (1ULL << 13), 1.0 / (1ULL << 13),
1.0 / (1ULL << 14), 1.0 / (1ULL << 14), 1.0 / (1ULL << 15), 1.0 / (1ULL << 15),
1.0 / (1ULL << 16), 1.0 / (1ULL << 16), 1.0 / (1ULL << 17), 1.0 / (1ULL << 17),
1.0 / (1ULL << 18), 1.0 / (1ULL << 18), 1.0 / (1ULL << 19), 1.0 / (1ULL << 19),
1.0 / (1ULL << 20), 1.0 / (1ULL << 20), 1.0 / (1ULL << 21), 1.0 / (1ULL << 21),
1.0 / (1ULL << 22), 1.0 / (1ULL << 22), 1.0 / (1ULL << 23), 1.0 / (1ULL << 23),
1.0 / (1ULL << 24), 1.0 / (1ULL << 24), 1.0 / (1ULL << 25), 1.0 / (1ULL << 25),
1.0 / (1ULL << 26), 1.0 / (1ULL << 26), 1.0 / (1ULL << 27), 1.0 / (1ULL << 27),
1.0 / (1ULL << 28), 1.0 / (1ULL << 28), 1.0 / (1ULL << 29), 1.0 / (1ULL << 29),
1.0 / (1ULL << 30), 1.0 / (1ULL << 30), 1.0 / (1ULL << 31), 1.0 / (1ULL << 31),
(1ULL << 32), (1ULL << 32), (1ULL << 31), (1ULL << 31),
(1ULL << 30), (1ULL << 30), (1ULL << 29), (1ULL << 29),
(1ULL << 28), (1ULL << 28), (1ULL << 27), (1ULL << 27),
(1ULL << 26), (1ULL << 26), (1ULL << 25), (1ULL << 25),
(1ULL << 24), (1ULL << 24), (1ULL << 23), (1ULL << 23),
(1ULL << 22), (1ULL << 22), (1ULL << 21), (1ULL << 21),
(1ULL << 20), (1ULL << 20), (1ULL << 19), (1ULL << 19),
(1ULL << 18), (1ULL << 18), (1ULL << 17), (1ULL << 17),
(1ULL << 16), (1ULL << 16), (1ULL << 15), (1ULL << 15),
(1ULL << 14), (1ULL << 14), (1ULL << 13), (1ULL << 13),
(1ULL << 12), (1ULL << 12), (1ULL << 11), (1ULL << 11),
(1ULL << 10), (1ULL << 10), (1ULL << 9), (1ULL << 9),
(1ULL << 8), (1ULL << 8), (1ULL << 7), (1ULL << 7),
(1ULL << 6), (1ULL << 6), (1ULL << 5), (1ULL << 5),
(1ULL << 4), (1ULL << 4), (1ULL << 3), (1ULL << 3),
(1ULL << 2), (1ULL << 2), (1ULL << 1), (1ULL << 1),
};
alignas(16) const float m_one[] = { 1.0f, 0.0f, 0.0f, 0.0f };
alignas(16) const float m_one[] = {1.0f, 0.0f, 0.0f, 0.0f};

View File

@ -15,40 +15,39 @@ alignas(16) extern const float m_dequantizeTableS[];
class CommonAsmRoutinesBase
{
public:
const u8 *fifoDirectWrite8;
const u8 *fifoDirectWrite16;
const u8 *fifoDirectWrite32;
const u8 *fifoDirectWrite64;
const u8* fifoDirectWrite8;
const u8* fifoDirectWrite16;
const u8* fifoDirectWrite32;
const u8* fifoDirectWrite64;
const u8 *enterCode;
const u8* enterCode;
const u8 *dispatcherMispredictedBLR;
const u8 *dispatcher;
const u8 *dispatcherNoCheck;
const u8* dispatcherMispredictedBLR;
const u8* dispatcher;
const u8* dispatcherNoCheck;
const u8 *doTiming;
const u8* doTiming;
const u8 *frsqrte;
const u8 *fres;
const u8 *mfcr;
const u8* frsqrte;
const u8* fres;
const u8* mfcr;
// In: array index: GQR to use.
// In: ECX: Address to read from.
// Out: XMM0: Bottom two 32-bit slots hold the read value,
// converted to a pair of floats.
// Trashes: all three RSCRATCH
const u8 **pairedLoadQuantized;
// In: array index: GQR to use.
// In: ECX: Address to read from.
// Out: XMM0: Bottom two 32-bit slots hold the read value,
// converted to a pair of floats.
// Trashes: all three RSCRATCH
const u8** pairedLoadQuantized;
// In: array index: GQR to use.
// In: ECX: Address to write to.
// In: XMM0: Bottom two 32-bit slots hold the pair of floats to be written.
// Out: Nothing.
// Trashes: all three RSCRATCH
const u8 **pairedStoreQuantized;
// In: array index: GQR to use.
// In: ECX: Address to write to.
// In: XMM0: Bottom two 32-bit slots hold the pair of floats to be written.
// Out: Nothing.
// Trashes: all three RSCRATCH
const u8** pairedStoreQuantized;
// In: array index: GQR to use.
// In: ECX: Address to write to.
// In: XMM0: Bottom 32-bit slot holds the float to be written.
const u8 **singleStoreQuantized;
// In: array index: GQR to use.
// In: ECX: Address to write to.
// In: XMM0: Bottom 32-bit slot holds the float to be written.
const u8** singleStoreQuantized;
};

View File

@ -19,18 +19,18 @@
using namespace Gen;
static void BackPatchError(const std::string &text, u8 *codePtr, u32 emAddress)
static void BackPatchError(const std::string& text, u8* codePtr, u32 emAddress)
{
u64 code_addr = (u64)codePtr;
disassembler disasm;
char disbuf[256];
memset(disbuf, 0, 256);
disasm.disasm64(0, code_addr, codePtr, disbuf);
PanicAlert("%s\n\n"
"Error encountered accessing emulated address %08x.\n"
"Culprit instruction: \n%s\nat %#" PRIx64,
text.c_str(), emAddress, disbuf, code_addr);
return;
u64 code_addr = (u64)codePtr;
disassembler disasm;
char disbuf[256];
memset(disbuf, 0, 256);
disasm.disasm64(0, code_addr, codePtr, disbuf);
PanicAlert("%s\n\n"
"Error encountered accessing emulated address %08x.\n"
"Culprit instruction: \n%s\nat %#" PRIx64,
text.c_str(), emAddress, disbuf, code_addr);
return;
}
// This generates some fairly heavy trampolines, but it doesn't really hurt.
@ -38,163 +38,167 @@ static void BackPatchError(const std::string &text, u8 *codePtr, u32 emAddress)
// many of them in a typical program/game.
bool Jitx86Base::HandleFault(uintptr_t access_address, SContext* ctx)
{
// TODO: do we properly handle off-the-end?
if (access_address >= (uintptr_t)Memory::physical_base && access_address < (uintptr_t)Memory::physical_base + 0x100010000)
return BackPatch((u32)(access_address - (uintptr_t)Memory::physical_base), ctx);
if (access_address >= (uintptr_t)Memory::logical_base && access_address < (uintptr_t)Memory::logical_base + 0x100010000)
return BackPatch((u32)(access_address - (uintptr_t)Memory::logical_base), ctx);
// TODO: do we properly handle off-the-end?
if (access_address >= (uintptr_t)Memory::physical_base &&
access_address < (uintptr_t)Memory::physical_base + 0x100010000)
return BackPatch((u32)(access_address - (uintptr_t)Memory::physical_base), ctx);
if (access_address >= (uintptr_t)Memory::logical_base &&
access_address < (uintptr_t)Memory::logical_base + 0x100010000)
return BackPatch((u32)(access_address - (uintptr_t)Memory::logical_base), ctx);
return false;
return false;
}
bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
{
u8* codePtr = (u8*) ctx->CTX_PC;
u8* codePtr = (u8*)ctx->CTX_PC;
if (!IsInSpace(codePtr))
return false; // this will become a regular crash real soon after this
if (!IsInSpace(codePtr))
return false; // this will become a regular crash real soon after this
InstructionInfo info = {};
InstructionInfo info = {};
if (!DisassembleMov(codePtr, &info))
{
BackPatchError("BackPatch - failed to disassemble MOV instruction", codePtr, emAddress);
return false;
}
if (!DisassembleMov(codePtr, &info))
{
BackPatchError("BackPatch - failed to disassemble MOV instruction", codePtr, emAddress);
return false;
}
if (info.otherReg != RMEM)
{
PanicAlert("BackPatch : Base reg not RMEM."
"\n\nAttempted to access %08x.", emAddress);
return false;
}
if (info.otherReg != RMEM)
{
PanicAlert("BackPatch : Base reg not RMEM."
"\n\nAttempted to access %08x.",
emAddress);
return false;
}
if (info.byteSwap && info.instructionSize < BACKPATCH_SIZE)
{
PanicAlert("BackPatch: MOVBE is too small");
return false;
}
if (info.byteSwap && info.instructionSize < BACKPATCH_SIZE)
{
PanicAlert("BackPatch: MOVBE is too small");
return false;
}
auto it = registersInUseAtLoc.find(codePtr);
if (it == registersInUseAtLoc.end())
{
PanicAlert("BackPatch: no register use entry for address %p", codePtr);
return false;
}
auto it = registersInUseAtLoc.find(codePtr);
if (it == registersInUseAtLoc.end())
{
PanicAlert("BackPatch: no register use entry for address %p", codePtr);
return false;
}
BitSet32 registersInUse = it->second;
BitSet32 registersInUse = it->second;
u8* exceptionHandler = nullptr;
if (jit->jo.memcheck)
{
auto it2 = exceptionHandlerAtLoc.find(codePtr);
if (it2 != exceptionHandlerAtLoc.end())
exceptionHandler = it2->second;
}
u8* exceptionHandler = nullptr;
if (jit->jo.memcheck)
{
auto it2 = exceptionHandlerAtLoc.find(codePtr);
if (it2 != exceptionHandlerAtLoc.end())
exceptionHandler = it2->second;
}
// Compute the start and length of the memory operation, including
// any byteswapping.
int totalSize = info.instructionSize;
u8 *start = codePtr;
if (!info.isMemoryWrite)
{
// MOVBE and single bytes don't need to be swapped.
if (!info.byteSwap && info.operandSize > 1)
{
// REX
if ((codePtr[totalSize] & 0xF0) == 0x40)
totalSize++;
// Compute the start and length of the memory operation, including
// any byteswapping.
int totalSize = info.instructionSize;
u8* start = codePtr;
if (!info.isMemoryWrite)
{
// MOVBE and single bytes don't need to be swapped.
if (!info.byteSwap && info.operandSize > 1)
{
// REX
if ((codePtr[totalSize] & 0xF0) == 0x40)
totalSize++;
// BSWAP
if (codePtr[totalSize] == 0x0F && (codePtr[totalSize + 1] & 0xF8) == 0xC8)
totalSize += 2;
// BSWAP
if (codePtr[totalSize] == 0x0F && (codePtr[totalSize + 1] & 0xF8) == 0xC8)
totalSize += 2;
if (info.operandSize == 2)
{
// operand size override
if (codePtr[totalSize] == 0x66)
totalSize++;
// REX
if ((codePtr[totalSize] & 0xF0) == 0x40)
totalSize++;
// SAR/ROL
_assert_(codePtr[totalSize] == 0xC1 && (codePtr[totalSize + 2] == 0x10 ||
codePtr[totalSize + 2] == 0x08));
info.signExtend = (codePtr[totalSize + 1] & 0x10) != 0;
totalSize += 3;
}
}
}
else
{
if (info.byteSwap || info.hasImmediate)
{
// The instruction is a MOVBE but it failed so the value is still in little-endian byte order.
}
else
{
// We entered here with a BSWAP-ed register. We'll have to swap it back.
u64 *ptr = ContextRN(ctx, info.regOperandReg);
int bswapSize = 0;
switch (info.operandSize)
{
case 1:
bswapSize = 0;
break;
case 2:
bswapSize = 4 + (info.regOperandReg >= 8 ? 1 : 0);
*ptr = Common::swap16((u16) *ptr);
break;
case 4:
bswapSize = 2 + (info.regOperandReg >= 8 ? 1 : 0);
*ptr = Common::swap32((u32) *ptr);
break;
case 8:
bswapSize = 3;
*ptr = Common::swap64(*ptr);
break;
}
start = codePtr - bswapSize;
totalSize += bswapSize;
}
}
if (info.operandSize == 2)
{
// operand size override
if (codePtr[totalSize] == 0x66)
totalSize++;
// REX
if ((codePtr[totalSize] & 0xF0) == 0x40)
totalSize++;
// SAR/ROL
_assert_(codePtr[totalSize] == 0xC1 &&
(codePtr[totalSize + 2] == 0x10 || codePtr[totalSize + 2] == 0x08));
info.signExtend = (codePtr[totalSize + 1] & 0x10) != 0;
totalSize += 3;
}
}
}
else
{
if (info.byteSwap || info.hasImmediate)
{
// The instruction is a MOVBE but it failed so the value is still in little-endian byte order.
}
else
{
// We entered here with a BSWAP-ed register. We'll have to swap it back.
u64* ptr = ContextRN(ctx, info.regOperandReg);
int bswapSize = 0;
switch (info.operandSize)
{
case 1:
bswapSize = 0;
break;
case 2:
bswapSize = 4 + (info.regOperandReg >= 8 ? 1 : 0);
*ptr = Common::swap16((u16)*ptr);
break;
case 4:
bswapSize = 2 + (info.regOperandReg >= 8 ? 1 : 0);
*ptr = Common::swap32((u32)*ptr);
break;
case 8:
bswapSize = 3;
*ptr = Common::swap64(*ptr);
break;
}
start = codePtr - bswapSize;
totalSize += bswapSize;
}
}
// In the trampoline code, we jump back into the block at the beginning
// of the next instruction. The next instruction comes immediately
// after the backpatched operation, or BACKPATCH_SIZE bytes after the start
// of the backpatched operation, whichever comes last. (The JIT inserts NOPs
// into the original code if necessary to ensure there is enough space
// to insert the backpatch jump.)
int padding = totalSize > BACKPATCH_SIZE ? totalSize - BACKPATCH_SIZE : 0;
u8* returnPtr = start + 5 + padding;
// In the trampoline code, we jump back into the block at the beginning
// of the next instruction. The next instruction comes immediately
// after the backpatched operation, or BACKPATCH_SIZE bytes after the start
// of the backpatched operation, whichever comes last. (The JIT inserts NOPs
// into the original code if necessary to ensure there is enough space
// to insert the backpatch jump.)
int padding = totalSize > BACKPATCH_SIZE ? totalSize - BACKPATCH_SIZE : 0;
u8* returnPtr = start + 5 + padding;
// Generate the trampoline.
const u8* trampoline;
if (info.isMemoryWrite)
{
// TODO: special case FIFO writes.
auto it3 = pcAtLoc.find(codePtr);
if (it3 == pcAtLoc.end())
{
PanicAlert("BackPatch: no pc entry for address %p", codePtr);
return false;
}
// Generate the trampoline.
const u8* trampoline;
if (info.isMemoryWrite)
{
// TODO: special case FIFO writes.
auto it3 = pcAtLoc.find(codePtr);
if (it3 == pcAtLoc.end())
{
PanicAlert("BackPatch: no pc entry for address %p", codePtr);
return false;
}
u32 pc = it3->second;
trampoline = trampolines.GenerateWriteTrampoline(info, registersInUse, exceptionHandler, returnPtr, pc);
}
else
{
trampoline = trampolines.GenerateReadTrampoline(info, registersInUse, exceptionHandler, returnPtr);
}
u32 pc = it3->second;
trampoline =
trampolines.GenerateWriteTrampoline(info, registersInUse, exceptionHandler, returnPtr, pc);
}
else
{
trampoline =
trampolines.GenerateReadTrampoline(info, registersInUse, exceptionHandler, returnPtr);
}
// Patch the original memory operation.
XEmitter emitter(start);
emitter.JMP(trampoline, true);
for (int i = 0; i < padding; ++i)
emitter.INT3();
ctx->CTX_PC = (u64)start;
// Patch the original memory operation.
XEmitter emitter(start);
emitter.JMP(trampoline, true);
for (int i = 0; i < padding; ++i)
emitter.INT3();
ctx->CTX_PC = (u64)start;
return true;
return true;
}

View File

@ -9,86 +9,85 @@
#include "Common/CommonTypes.h"
#include "Common/GekkoDisassembler.h"
#include "Common/StringUtil.h"
#include "Common/Logging/Log.h"
#include "Common/StringUtil.h"
#include "Core/ConfigManager.h"
#include "Core/HW/CPU.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCAnalyst.h"
#include "Core/PowerPC/JitCommon/JitBase.h"
#include "Core/PowerPC/PPCAnalyst.h"
#include "Core/PowerPC/PowerPC.h"
JitBase *jit;
JitBase* jit;
void Jit(u32 em_address)
{
jit->Jit(em_address);
jit->Jit(em_address);
}
u32 Helper_Mask(u8 mb, u8 me)
{
u32 mask = ((u32)-1 >> mb) ^ (me >= 31 ? 0 : (u32)-1 >> (me + 1));
return mb > me ? ~mask : mask;
u32 mask = ((u32)-1 >> mb) ^ (me >= 31 ? 0 : (u32)-1 >> (me + 1));
return mb > me ? ~mask : mask;
}
void LogGeneratedX86(int size, PPCAnalyst::CodeBuffer *code_buffer, const u8 *normalEntry, JitBlock *b)
void LogGeneratedX86(int size, PPCAnalyst::CodeBuffer* code_buffer, const u8* normalEntry,
JitBlock* b)
{
for (int i = 0; i < size; i++)
{
const PPCAnalyst::CodeOp &op = code_buffer->codebuffer[i];
std::string temp = StringFromFormat("%08x %s", op.address, GekkoDisassembler::Disassemble(op.inst.hex, op.address).c_str());
DEBUG_LOG(DYNA_REC, "IR_X86 PPC: %s\n", temp.c_str());
}
for (int i = 0; i < size; i++)
{
const PPCAnalyst::CodeOp& op = code_buffer->codebuffer[i];
std::string temp = StringFromFormat(
"%08x %s", op.address, GekkoDisassembler::Disassemble(op.inst.hex, op.address).c_str());
DEBUG_LOG(DYNA_REC, "IR_X86 PPC: %s\n", temp.c_str());
}
disassembler x64disasm;
x64disasm.set_syntax_intel();
disassembler x64disasm;
x64disasm.set_syntax_intel();
u64 disasmPtr = (u64)normalEntry;
const u8 *end = normalEntry + b->codeSize;
u64 disasmPtr = (u64)normalEntry;
const u8* end = normalEntry + b->codeSize;
while ((u8*)disasmPtr < end)
{
char sptr[1000] = "";
disasmPtr += x64disasm.disasm64(disasmPtr, disasmPtr, (u8*)disasmPtr, sptr);
DEBUG_LOG(DYNA_REC,"IR_X86 x86: %s", sptr);
}
while ((u8*)disasmPtr < end)
{
char sptr[1000] = "";
disasmPtr += x64disasm.disasm64(disasmPtr, disasmPtr, (u8*)disasmPtr, sptr);
DEBUG_LOG(DYNA_REC, "IR_X86 x86: %s", sptr);
}
if (b->codeSize <= 250)
{
std::stringstream ss;
ss << std::hex;
for (u8 i = 0; i <= b->codeSize; i++)
{
ss.width(2);
ss.fill('0');
ss << (u32)*(normalEntry + i);
}
DEBUG_LOG(DYNA_REC,"IR_X86 bin: %s\n\n\n", ss.str().c_str());
}
if (b->codeSize <= 250)
{
std::stringstream ss;
ss << std::hex;
for (u8 i = 0; i <= b->codeSize; i++)
{
ss.width(2);
ss.fill('0');
ss << (u32) * (normalEntry + i);
}
DEBUG_LOG(DYNA_REC, "IR_X86 bin: %s\n\n\n", ss.str().c_str());
}
}
bool JitBase::MergeAllowedNextInstructions(int count)
{
if (CPU::GetState() == CPU::CPU_STEPPING || js.instructionsLeft < count)
return false;
// Be careful: a breakpoint kills flags in between instructions
for (int i = 1; i <= count; i++)
{
if (SConfig::GetInstance().bEnableDebugging &&
PowerPC::breakpoints.IsAddressBreakPoint(js.op[i].address))
return false;
if (js.op[i].isBranchTarget)
return false;
}
return true;
if (CPU::GetState() == CPU::CPU_STEPPING || js.instructionsLeft < count)
return false;
// Be careful: a breakpoint kills flags in between instructions
for (int i = 1; i <= count; i++)
{
if (SConfig::GetInstance().bEnableDebugging &&
PowerPC::breakpoints.IsAddressBreakPoint(js.op[i].address))
return false;
if (js.op[i].isBranchTarget)
return false;
}
return true;
}
void JitBase::UpdateMemoryOptions()
{
bool any_watchpoints = PowerPC::memchecks.HasAny();
jo.fastmem = SConfig::GetInstance().bFastmem &&
!any_watchpoints;
jo.memcheck = SConfig::GetInstance().bMMU ||
any_watchpoints;
jo.alwaysUseMemFuncs = any_watchpoints;
bool any_watchpoints = PowerPC::memchecks.HasAny();
jo.fastmem = SConfig::GetInstance().bFastmem && !any_watchpoints;
jo.memcheck = SConfig::GetInstance().bMMU || any_watchpoints;
jo.alwaysUseMemFuncs = any_watchpoints;
}

View File

@ -15,11 +15,11 @@
#include "Core/ConfigManager.h"
#include "Core/MachineContext.h"
#include "Core/PowerPC/CPUCoreBase.h"
#include "Core/PowerPC/PPCAnalyst.h"
#include "Core/PowerPC/Jit64Common/Jit64AsmCommon.h"
#include "Core/PowerPC/JitCommon/Jit_Util.h"
#include "Core/PowerPC/JitCommon/JitCache.h"
#include "Core/PowerPC/JitCommon/Jit_Util.h"
#include "Core/PowerPC/JitCommon/TrampolineCache.h"
#include "Core/PowerPC/PPCAnalyst.h"
// TODO: find a better place for x86-specific stuff
// The following register assignments are common to Jit64 and Jit64IL:
@ -41,101 +41,112 @@
// #define INSTRUCTION_START PPCTables::CountInstruction(inst);
#define INSTRUCTION_START
#define FALLBACK_IF(cond) do { if (cond) { FallBackToInterpreter(inst); return; } } while (0)
#define FALLBACK_IF(cond) \
do \
{ \
if (cond) \
{ \
FallBackToInterpreter(inst); \
return; \
} \
} while (0)
#define JITDISABLE(setting) FALLBACK_IF(SConfig::GetInstance().bJITOff || \
SConfig::GetInstance().setting)
#define JITDISABLE(setting) \
FALLBACK_IF(SConfig::GetInstance().bJITOff || SConfig::GetInstance().setting)
class JitBase : public CPUCoreBase
{
protected:
struct JitOptions
{
bool enableBlocklink;
bool optimizeGatherPipe;
bool accurateSinglePrecision;
bool fastmem;
bool memcheck;
bool alwaysUseMemFuncs;
};
struct JitState
{
u32 compilerPC;
u32 blockStart;
int instructionNumber;
int instructionsLeft;
int downcountAmount;
u32 numLoadStoreInst;
u32 numFloatingPointInst;
// If this is set, we need to generate an exception handler for the fastmem load.
u8* fastmemLoadStore;
// If this is set, a load or store already prepared a jump to the exception handler for us,
// so just fixup that branch instead of testing for a DSI again.
bool fixupExceptionHandler;
Gen::FixupBranch exceptionHandler;
// If these are set, we've stored the old value of a register which will be loaded in revertLoad,
// which lets us revert it on the exception path.
int revertGprLoad;
int revertFprLoad;
struct JitOptions
{
bool enableBlocklink;
bool optimizeGatherPipe;
bool accurateSinglePrecision;
bool fastmem;
bool memcheck;
bool alwaysUseMemFuncs;
};
struct JitState
{
u32 compilerPC;
u32 blockStart;
int instructionNumber;
int instructionsLeft;
int downcountAmount;
u32 numLoadStoreInst;
u32 numFloatingPointInst;
// If this is set, we need to generate an exception handler for the fastmem load.
u8* fastmemLoadStore;
// If this is set, a load or store already prepared a jump to the exception handler for us,
// so just fixup that branch instead of testing for a DSI again.
bool fixupExceptionHandler;
Gen::FixupBranch exceptionHandler;
// If these are set, we've stored the old value of a register which will be loaded in
// revertLoad,
// which lets us revert it on the exception path.
int revertGprLoad;
int revertFprLoad;
bool assumeNoPairedQuantize;
bool firstFPInstructionFound;
bool isLastInstruction;
int skipInstructions;
bool carryFlagSet;
bool carryFlagInverted;
bool assumeNoPairedQuantize;
bool firstFPInstructionFound;
bool isLastInstruction;
int skipInstructions;
bool carryFlagSet;
bool carryFlagInverted;
int fifoBytesThisBlock;
int fifoBytesThisBlock;
PPCAnalyst::BlockStats st;
PPCAnalyst::BlockRegStats gpa;
PPCAnalyst::BlockRegStats fpa;
PPCAnalyst::CodeOp* op;
u8* rewriteStart;
PPCAnalyst::BlockStats st;
PPCAnalyst::BlockRegStats gpa;
PPCAnalyst::BlockRegStats fpa;
PPCAnalyst::CodeOp* op;
u8* rewriteStart;
JitBlock *curBlock;
JitBlock* curBlock;
std::unordered_set<u32> fifoWriteAddresses;
std::unordered_set<u32> pairedQuantizeAddresses;
};
std::unordered_set<u32> fifoWriteAddresses;
std::unordered_set<u32> pairedQuantizeAddresses;
};
PPCAnalyst::CodeBlock code_block;
PPCAnalyst::PPCAnalyzer analyzer;
PPCAnalyst::CodeBlock code_block;
PPCAnalyst::PPCAnalyzer analyzer;
bool MergeAllowedNextInstructions(int count);
bool MergeAllowedNextInstructions(int count);
void UpdateMemoryOptions();
void UpdateMemoryOptions();
public:
// This should probably be removed from public:
JitOptions jo;
JitState js;
// This should probably be removed from public:
JitOptions jo;
JitState js;
virtual JitBaseBlockCache *GetBlockCache() = 0;
virtual JitBaseBlockCache* GetBlockCache() = 0;
virtual void Jit(u32 em_address) = 0;
virtual void Jit(u32 em_address) = 0;
virtual const CommonAsmRoutinesBase *GetAsmRoutines() = 0;
virtual const CommonAsmRoutinesBase* GetAsmRoutines() = 0;
virtual bool HandleFault(uintptr_t access_address, SContext* ctx) = 0;
virtual bool HandleStackFault() { return false; }
virtual bool HandleFault(uintptr_t access_address, SContext* ctx) = 0;
virtual bool HandleStackFault() { return false; }
};
class Jitx86Base : public JitBase, public EmuCodeBlock
{
protected:
bool BackPatch(u32 emAddress, SContext* ctx);
JitBlockCache blocks;
TrampolineCache trampolines;
bool BackPatch(u32 emAddress, SContext* ctx);
JitBlockCache blocks;
TrampolineCache trampolines;
public:
JitBlockCache *GetBlockCache() override { return &blocks; }
bool HandleFault(uintptr_t access_address, SContext* ctx) override;
JitBlockCache* GetBlockCache() override { return &blocks; }
bool HandleFault(uintptr_t access_address, SContext* ctx) override;
};
extern JitBase *jit;
extern JitBase* jit;
void Jit(u32 em_address);
// Merged routines that should be moved somewhere better
u32 Helper_Mask(u8 mb, u8 me);
void LogGeneratedX86(int size, PPCAnalyst::CodeBuffer *code_buffer, const u8 *normalEntry, JitBlock *b);
void LogGeneratedX86(int size, PPCAnalyst::CodeBuffer* code_buffer, const u8* normalEntry,
JitBlock* b);

View File

@ -17,9 +17,9 @@
#include "Common/JitRegister.h"
#include "Core/ConfigManager.h"
#include "Core/Core.h"
#include "Core/PowerPC/JitCommon/JitBase.h"
#include "Core/PowerPC/JitInterface.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/JitCommon/JitBase.h"
#ifdef _WIN32
#include <windows.h>
@ -27,319 +27,322 @@
using namespace Gen;
bool JitBaseBlockCache::IsFull() const
{
return GetNumBlocks() >= MAX_NUM_BLOCKS - 1;
}
bool JitBaseBlockCache::IsFull() const
{
return GetNumBlocks() >= MAX_NUM_BLOCKS - 1;
}
void JitBaseBlockCache::Init()
{
if (m_initialized)
{
PanicAlert("JitBaseBlockCache::Init() - iCache is already initialized");
return;
}
void JitBaseBlockCache::Init()
{
if (m_initialized)
{
PanicAlert("JitBaseBlockCache::Init() - iCache is already initialized");
return;
}
JitRegister::Init(SConfig::GetInstance().m_perfDir);
JitRegister::Init(SConfig::GetInstance().m_perfDir);
iCache.fill(JIT_ICACHE_INVALID_BYTE);
iCacheEx.fill(JIT_ICACHE_INVALID_BYTE);
iCacheVMEM.fill(JIT_ICACHE_INVALID_BYTE);
Clear();
iCache.fill(JIT_ICACHE_INVALID_BYTE);
iCacheEx.fill(JIT_ICACHE_INVALID_BYTE);
iCacheVMEM.fill(JIT_ICACHE_INVALID_BYTE);
Clear();
m_initialized = true;
}
m_initialized = true;
}
void JitBaseBlockCache::Shutdown()
{
num_blocks = 0;
m_initialized = false;
void JitBaseBlockCache::Shutdown()
{
num_blocks = 0;
m_initialized = false;
JitRegister::Shutdown();
}
JitRegister::Shutdown();
}
// This clears the JIT cache. It's called from JitCache.cpp when the JIT cache
// is full and when saving and loading states.
void JitBaseBlockCache::Clear()
{
// This clears the JIT cache. It's called from JitCache.cpp when the JIT cache
// is full and when saving and loading states.
void JitBaseBlockCache::Clear()
{
#if defined(_DEBUG) || defined(DEBUGFAST)
if (IsFull())
Core::DisplayMessage("Clearing block cache.", 3000);
else
Core::DisplayMessage("Clearing code cache.", 3000);
if (IsFull())
Core::DisplayMessage("Clearing block cache.", 3000);
else
Core::DisplayMessage("Clearing code cache.", 3000);
#endif
jit->js.fifoWriteAddresses.clear();
jit->js.pairedQuantizeAddresses.clear();
for (int i = 0; i < num_blocks; i++)
{
DestroyBlock(i, false);
}
links_to.clear();
block_map.clear();
jit->js.fifoWriteAddresses.clear();
jit->js.pairedQuantizeAddresses.clear();
for (int i = 0; i < num_blocks; i++)
{
DestroyBlock(i, false);
}
links_to.clear();
block_map.clear();
valid_block.ClearAll();
valid_block.ClearAll();
num_blocks = 0;
blockCodePointers.fill(nullptr);
}
num_blocks = 0;
blockCodePointers.fill(nullptr);
}
void JitBaseBlockCache::Reset()
{
Shutdown();
Init();
}
void JitBaseBlockCache::Reset()
{
Shutdown();
Init();
}
JitBlock *JitBaseBlockCache::GetBlock(int no)
{
return &blocks[no];
}
JitBlock* JitBaseBlockCache::GetBlock(int no)
{
return &blocks[no];
}
int JitBaseBlockCache::GetNumBlocks() const
{
return num_blocks;
}
int JitBaseBlockCache::GetNumBlocks() const
{
return num_blocks;
}
int JitBaseBlockCache::AllocateBlock(u32 em_address)
{
JitBlock &b = blocks[num_blocks];
b.invalid = false;
b.originalAddress = em_address;
b.linkData.clear();
num_blocks++; //commit the current block
return num_blocks - 1;
}
int JitBaseBlockCache::AllocateBlock(u32 em_address)
{
JitBlock& b = blocks[num_blocks];
b.invalid = false;
b.originalAddress = em_address;
b.linkData.clear();
num_blocks++; // commit the current block
return num_blocks - 1;
}
void JitBaseBlockCache::FinalizeBlock(int block_num, bool block_link, const u8 *code_ptr)
{
blockCodePointers[block_num] = code_ptr;
JitBlock &b = blocks[block_num];
void JitBaseBlockCache::FinalizeBlock(int block_num, bool block_link, const u8* code_ptr)
{
blockCodePointers[block_num] = code_ptr;
JitBlock& b = blocks[block_num];
std::memcpy(GetICachePtr(b.originalAddress), &block_num, sizeof(u32));
std::memcpy(GetICachePtr(b.originalAddress), &block_num, sizeof(u32));
// Convert the logical address to a physical address for the block map
u32 pAddr = b.originalAddress & 0x1FFFFFFF;
// Convert the logical address to a physical address for the block map
u32 pAddr = b.originalAddress & 0x1FFFFFFF;
for (u32 block = pAddr / 32; block <= (pAddr + (b.originalSize - 1) * 4) / 32; ++block)
valid_block.Set(block);
for (u32 block = pAddr / 32; block <= (pAddr + (b.originalSize - 1) * 4) / 32; ++block)
valid_block.Set(block);
block_map[std::make_pair(pAddr + 4 * b.originalSize - 1, pAddr)] = block_num;
block_map[std::make_pair(pAddr + 4 * b.originalSize - 1, pAddr)] = block_num;
if (block_link)
{
for (const auto& e : b.linkData)
{
links_to.emplace(e.exitAddress, block_num);
}
if (block_link)
{
for (const auto& e : b.linkData)
{
links_to.emplace(e.exitAddress, block_num);
}
LinkBlock(block_num);
LinkBlockExits(block_num);
}
LinkBlock(block_num);
LinkBlockExits(block_num);
}
JitRegister::Register(blockCodePointers[block_num], b.codeSize,
"JIT_PPC_%08x", b.originalAddress);
}
JitRegister::Register(blockCodePointers[block_num], b.codeSize, "JIT_PPC_%08x",
b.originalAddress);
}
const u8 **JitBaseBlockCache::GetCodePointers()
{
return blockCodePointers.data();
}
const u8** JitBaseBlockCache::GetCodePointers()
{
return blockCodePointers.data();
}
u8* JitBaseBlockCache::GetICachePtr(u32 addr)
{
if (addr & JIT_ICACHE_VMEM_BIT)
return &jit->GetBlockCache()->iCacheVMEM[addr & JIT_ICACHE_MASK];
u8* JitBaseBlockCache::GetICachePtr(u32 addr)
{
if (addr & JIT_ICACHE_VMEM_BIT)
return &jit->GetBlockCache()->iCacheVMEM[addr & JIT_ICACHE_MASK];
if (addr & JIT_ICACHE_EXRAM_BIT)
return &jit->GetBlockCache()->iCacheEx[addr & JIT_ICACHEEX_MASK];
if (addr & JIT_ICACHE_EXRAM_BIT)
return &jit->GetBlockCache()->iCacheEx[addr & JIT_ICACHEEX_MASK];
return &jit->GetBlockCache()->iCache[addr & JIT_ICACHE_MASK];
}
return &jit->GetBlockCache()->iCache[addr & JIT_ICACHE_MASK];
}
int JitBaseBlockCache::GetBlockNumberFromStartAddress(u32 addr)
{
u32 inst;
std::memcpy(&inst, GetICachePtr(addr), sizeof(u32));
int JitBaseBlockCache::GetBlockNumberFromStartAddress(u32 addr)
{
u32 inst;
std::memcpy(&inst, GetICachePtr(addr), sizeof(u32));
if (inst & 0xfc000000) // definitely not a JIT block
return -1;
if (inst & 0xfc000000) // definitely not a JIT block
return -1;
if ((int)inst >= num_blocks)
return -1;
if ((int)inst >= num_blocks)
return -1;
if (blocks[inst].originalAddress != addr)
return -1;
if (blocks[inst].originalAddress != addr)
return -1;
return inst;
}
return inst;
}
CompiledCode JitBaseBlockCache::GetCompiledCodeFromBlock(int block_num)
{
return (CompiledCode)blockCodePointers[block_num];
}
CompiledCode JitBaseBlockCache::GetCompiledCodeFromBlock(int block_num)
{
return (CompiledCode)blockCodePointers[block_num];
}
//Block linker
//Make sure to have as many blocks as possible compiled before calling this
//It's O(N), so it's fast :)
//Can be faster by doing a queue for blocks to link up, and only process those
//Should probably be done
// Block linker
// Make sure to have as many blocks as possible compiled before calling this
// It's O(N), so it's fast :)
// Can be faster by doing a queue for blocks to link up, and only process those
// Should probably be done
void JitBaseBlockCache::LinkBlockExits(int i)
{
JitBlock &b = blocks[i];
if (b.invalid)
{
// This block is dead. Don't relink it.
return;
}
for (auto& e : b.linkData)
{
if (!e.linkStatus)
{
int destinationBlock = GetBlockNumberFromStartAddress(e.exitAddress);
if (destinationBlock != -1)
{
WriteLinkBlock(e.exitPtrs, blocks[destinationBlock]);
e.linkStatus = true;
}
}
}
}
void JitBaseBlockCache::LinkBlockExits(int i)
{
JitBlock& b = blocks[i];
if (b.invalid)
{
// This block is dead. Don't relink it.
return;
}
for (auto& e : b.linkData)
{
if (!e.linkStatus)
{
int destinationBlock = GetBlockNumberFromStartAddress(e.exitAddress);
if (destinationBlock != -1)
{
WriteLinkBlock(e.exitPtrs, blocks[destinationBlock]);
e.linkStatus = true;
}
}
}
}
void JitBaseBlockCache::LinkBlock(int i)
{
LinkBlockExits(i);
JitBlock &b = blocks[i];
// equal_range(b) returns pair<iterator,iterator> representing the range
// of element with key b
auto ppp = links_to.equal_range(b.originalAddress);
void JitBaseBlockCache::LinkBlock(int i)
{
LinkBlockExits(i);
JitBlock& b = blocks[i];
// equal_range(b) returns pair<iterator,iterator> representing the range
// of element with key b
auto ppp = links_to.equal_range(b.originalAddress);
if (ppp.first == ppp.second)
return;
if (ppp.first == ppp.second)
return;
for (auto iter = ppp.first; iter != ppp.second; ++iter)
{
// PanicAlert("Linking block %i to block %i", iter->second, i);
LinkBlockExits(iter->second);
}
}
for (auto iter = ppp.first; iter != ppp.second; ++iter)
{
// PanicAlert("Linking block %i to block %i", iter->second, i);
LinkBlockExits(iter->second);
}
}
void JitBaseBlockCache::UnlinkBlock(int i)
{
JitBlock &b = blocks[i];
auto ppp = links_to.equal_range(b.originalAddress);
void JitBaseBlockCache::UnlinkBlock(int i)
{
JitBlock& b = blocks[i];
auto ppp = links_to.equal_range(b.originalAddress);
if (ppp.first == ppp.second)
return;
if (ppp.first == ppp.second)
return;
for (auto iter = ppp.first; iter != ppp.second; ++iter)
{
JitBlock &sourceBlock = blocks[iter->second];
for (auto& e : sourceBlock.linkData)
{
if (e.exitAddress == b.originalAddress)
e.linkStatus = false;
}
}
links_to.erase(b.originalAddress);
}
for (auto iter = ppp.first; iter != ppp.second; ++iter)
{
JitBlock& sourceBlock = blocks[iter->second];
for (auto& e : sourceBlock.linkData)
{
if (e.exitAddress == b.originalAddress)
e.linkStatus = false;
}
}
links_to.erase(b.originalAddress);
}
void JitBaseBlockCache::DestroyBlock(int block_num, bool invalidate)
{
if (block_num < 0 || block_num >= num_blocks)
{
PanicAlert("DestroyBlock: Invalid block number %d", block_num);
return;
}
JitBlock &b = blocks[block_num];
if (b.invalid)
{
if (invalidate)
PanicAlert("Invalidating invalid block %d", block_num);
return;
}
b.invalid = true;
std::memcpy(GetICachePtr(b.originalAddress), &JIT_ICACHE_INVALID_WORD, sizeof(u32));
void JitBaseBlockCache::DestroyBlock(int block_num, bool invalidate)
{
if (block_num < 0 || block_num >= num_blocks)
{
PanicAlert("DestroyBlock: Invalid block number %d", block_num);
return;
}
JitBlock& b = blocks[block_num];
if (b.invalid)
{
if (invalidate)
PanicAlert("Invalidating invalid block %d", block_num);
return;
}
b.invalid = true;
std::memcpy(GetICachePtr(b.originalAddress), &JIT_ICACHE_INVALID_WORD, sizeof(u32));
UnlinkBlock(block_num);
UnlinkBlock(block_num);
// Send anyone who tries to run this block back to the dispatcher.
// Not entirely ideal, but .. pretty good.
// Spurious entrances from previously linked blocks can only come through checkedEntry
WriteDestroyBlock(b.checkedEntry, b.originalAddress);
}
// Send anyone who tries to run this block back to the dispatcher.
// Not entirely ideal, but .. pretty good.
// Spurious entrances from previously linked blocks can only come through checkedEntry
WriteDestroyBlock(b.checkedEntry, b.originalAddress);
}
void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool forced)
{
// Convert the logical address to a physical address for the block map
u32 pAddr = address & 0x1FFFFFFF;
void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool forced)
{
// Convert the logical address to a physical address for the block map
u32 pAddr = address & 0x1FFFFFFF;
// Optimize the common case of length == 32 which is used by Interpreter::dcb*
bool destroy_block = true;
if (length == 32)
{
if (!valid_block.Test(pAddr / 32))
destroy_block = false;
else
valid_block.Clear(pAddr / 32);
}
// Optimize the common case of length == 32 which is used by Interpreter::dcb*
bool destroy_block = true;
if (length == 32)
{
if (!valid_block.Test(pAddr / 32))
destroy_block = false;
else
valid_block.Clear(pAddr / 32);
}
// destroy JIT blocks
// !! this works correctly under assumption that any two overlapping blocks end at the same address
if (destroy_block)
{
std::map<std::pair<u32,u32>, u32>::iterator it1 = block_map.lower_bound(std::make_pair(pAddr, 0)), it2 = it1;
while (it2 != block_map.end() && it2->first.second < pAddr + length)
{
JitBlock &b = blocks[it2->second];
std::memcpy(GetICachePtr(b.originalAddress), &JIT_ICACHE_INVALID_WORD, sizeof(u32));
// destroy JIT blocks
// !! this works correctly under assumption that any two overlapping blocks end at the same
// address
if (destroy_block)
{
std::map<std::pair<u32, u32>, u32>::iterator it1 = block_map.lower_bound(
std::make_pair(pAddr, 0)),
it2 = it1;
while (it2 != block_map.end() && it2->first.second < pAddr + length)
{
JitBlock& b = blocks[it2->second];
std::memcpy(GetICachePtr(b.originalAddress), &JIT_ICACHE_INVALID_WORD, sizeof(u32));
DestroyBlock(it2->second, true);
++it2;
}
if (it1 != it2)
{
block_map.erase(it1, it2);
}
DestroyBlock(it2->second, true);
++it2;
}
if (it1 != it2)
{
block_map.erase(it1, it2);
}
// If the code was actually modified, we need to clear the relevant entries from the
// FIFO write address cache, so we don't end up with FIFO checks in places they shouldn't
// be (this can clobber flags, and thus break any optimization that relies on flags
// being in the right place between instructions).
if (!forced)
{
for (u32 i = address; i < address + length; i += 4)
{
jit->js.fifoWriteAddresses.erase(i);
jit->js.pairedQuantizeAddresses.erase(i);
}
}
}
}
// If the code was actually modified, we need to clear the relevant entries from the
// FIFO write address cache, so we don't end up with FIFO checks in places they shouldn't
// be (this can clobber flags, and thus break any optimization that relies on flags
// being in the right place between instructions).
if (!forced)
{
for (u32 i = address; i < address + length; i += 4)
{
jit->js.fifoWriteAddresses.erase(i);
jit->js.pairedQuantizeAddresses.erase(i);
}
}
}
}
void JitBlockCache::WriteLinkBlock(u8* location, const JitBlock& block)
{
const u8* address = block.checkedEntry;
XEmitter emit(location);
if (*location == 0xE8)
{
emit.CALL(address);
}
else
{
// If we're going to link with the next block, there is no need
// to emit JMP. So just NOP out the gap to the next block.
// Support up to 3 additional bytes because of alignment.
s64 offset = address - emit.GetCodePtr();
if (offset > 0 && offset <= 5 + 3)
emit.NOP(offset);
else
emit.JMP(address, true);
}
}
void JitBlockCache::WriteLinkBlock(u8* location, const JitBlock& block)
{
const u8* address = block.checkedEntry;
XEmitter emit(location);
if (*location == 0xE8)
{
emit.CALL(address);
}
else
{
// If we're going to link with the next block, there is no need
// to emit JMP. So just NOP out the gap to the next block.
// Support up to 3 additional bytes because of alignment.
s64 offset = address - emit.GetCodePtr();
if (offset > 0 && offset <= 5 + 3)
emit.NOP(offset);
else
emit.JMP(address, true);
}
}
void JitBlockCache::WriteDestroyBlock(const u8* location, u32 address)
{
XEmitter emit((u8 *)location);
emit.MOV(32, PPCSTATE(pc), Imm32(address));
emit.JMP(jit->GetAsmRoutines()->dispatcher, true);
}
void JitBlockCache::WriteDestroyBlock(const u8* location, u32 address)
{
XEmitter emit((u8*)location);
emit.MOV(32, PPCSTATE(pc), Imm32(address));
emit.JMP(jit->GetAsmRoutines()->dispatcher, true);
}

View File

@ -17,7 +17,7 @@ static const u32 JIT_ICACHE_MASK = 0x1ffffff;
static const u32 JIT_ICACHEEX_SIZE = 0x4000000;
static const u32 JIT_ICACHEEX_MASK = 0x3ffffff;
static const u32 JIT_ICACHE_EXRAM_BIT = 0x10000000;
static const u32 JIT_ICACHE_VMEM_BIT = 0x20000000;
static const u32 JIT_ICACHE_VMEM_BIT = 0x20000000;
// This corresponds to opcode 5 which is invalid in PowerPC
static const u32 JIT_ICACHE_INVALID_BYTE = 0x80;
@ -25,29 +25,29 @@ static const u32 JIT_ICACHE_INVALID_WORD = 0x80808080;
struct JitBlock
{
const u8 *checkedEntry;
const u8 *normalEntry;
const u8* checkedEntry;
const u8* normalEntry;
u32 originalAddress;
u32 codeSize;
u32 originalSize;
int runCount; // for profiling.
u32 originalAddress;
u32 codeSize;
u32 originalSize;
int runCount; // for profiling.
bool invalid;
bool invalid;
struct LinkData
{
u8 *exitPtrs; // to be able to rewrite the exit jum
u32 exitAddress;
bool linkStatus; // is it already linked?
};
std::vector<LinkData> linkData;
struct LinkData
{
u8* exitPtrs; // to be able to rewrite the exit jum
u32 exitAddress;
bool linkStatus; // is it already linked?
};
std::vector<LinkData> linkData;
// we don't really need to save start and stop
// TODO (mb2): ticStart and ticStop -> "local var" mean "in block" ... low priority ;)
u64 ticStart; // for profiling - time.
u64 ticStop; // for profiling - time.
u64 ticCounter; // for profiling - time.
// we don't really need to save start and stop
// TODO (mb2): ticStart and ticStop -> "local var" mean "in block" ... low priority ;)
u64 ticStart; // for profiling - time.
u64 ticStop; // for profiling - time.
u64 ticCounter; // for profiling - time.
};
typedef void (*CompiledCode)();
@ -57,113 +57,89 @@ typedef void (*CompiledCode)();
class ValidBlockBitSet final
{
public:
enum
{
VALID_BLOCK_MASK_SIZE = 0x20000000 / 32,
VALID_BLOCK_ALLOC_ELEMENTS = VALID_BLOCK_MASK_SIZE / 32
};
// Directly accessed by Jit64.
std::unique_ptr<u32[]> m_valid_block;
enum
{
VALID_BLOCK_MASK_SIZE = 0x20000000 / 32,
VALID_BLOCK_ALLOC_ELEMENTS = VALID_BLOCK_MASK_SIZE / 32
};
// Directly accessed by Jit64.
std::unique_ptr<u32[]> m_valid_block;
ValidBlockBitSet()
{
m_valid_block.reset(new u32[VALID_BLOCK_ALLOC_ELEMENTS]);
ClearAll();
}
ValidBlockBitSet()
{
m_valid_block.reset(new u32[VALID_BLOCK_ALLOC_ELEMENTS]);
ClearAll();
}
void Set(u32 bit)
{
m_valid_block[bit / 32] |= 1u << (bit % 32);
}
void Clear(u32 bit)
{
m_valid_block[bit / 32] &= ~(1u << (bit % 32));
}
void ClearAll()
{
memset(m_valid_block.get(), 0, sizeof(u32) * VALID_BLOCK_ALLOC_ELEMENTS);
}
bool Test(u32 bit)
{
return (m_valid_block[bit / 32] & (1u << (bit % 32))) != 0;
}
void Set(u32 bit) { m_valid_block[bit / 32] |= 1u << (bit % 32); }
void Clear(u32 bit) { m_valid_block[bit / 32] &= ~(1u << (bit % 32)); }
void ClearAll() { memset(m_valid_block.get(), 0, sizeof(u32) * VALID_BLOCK_ALLOC_ELEMENTS); }
bool Test(u32 bit) { return (m_valid_block[bit / 32] & (1u << (bit % 32))) != 0; }
};
class JitBaseBlockCache
{
enum
{
MAX_NUM_BLOCKS = 65536 * 2,
};
enum
{
MAX_NUM_BLOCKS = 65536 * 2,
};
std::array<const u8*, MAX_NUM_BLOCKS> blockCodePointers;
std::array<JitBlock, MAX_NUM_BLOCKS> blocks;
int num_blocks;
std::multimap<u32, int> links_to;
std::map<std::pair<u32, u32>, u32> block_map; // (end_addr, start_addr) -> number
ValidBlockBitSet valid_block;
std::array<const u8*, MAX_NUM_BLOCKS> blockCodePointers;
std::array<JitBlock, MAX_NUM_BLOCKS> blocks;
int num_blocks;
std::multimap<u32, int> links_to;
std::map<std::pair<u32, u32>, u32> block_map; // (end_addr, start_addr) -> number
ValidBlockBitSet valid_block;
bool m_initialized;
bool m_initialized;
void LinkBlockExits(int i);
void LinkBlock(int i);
void UnlinkBlock(int i);
void LinkBlockExits(int i);
void LinkBlock(int i);
void UnlinkBlock(int i);
u8* GetICachePtr(u32 addr);
void DestroyBlock(int block_num, bool invalidate);
u8* GetICachePtr(u32 addr);
void DestroyBlock(int block_num, bool invalidate);
// Virtual for overloaded
virtual void WriteLinkBlock(u8* location, const JitBlock& block) = 0;
virtual void WriteDestroyBlock(const u8* location, u32 address) = 0;
// Virtual for overloaded
virtual void WriteLinkBlock(u8* location, const JitBlock& block) = 0;
virtual void WriteDestroyBlock(const u8* location, u32 address) = 0;
public:
JitBaseBlockCache() : num_blocks(0), m_initialized(false)
{
}
JitBaseBlockCache() : num_blocks(0), m_initialized(false) {}
virtual ~JitBaseBlockCache() {}
int AllocateBlock(u32 em_address);
void FinalizeBlock(int block_num, bool block_link, const u8* code_ptr);
virtual ~JitBaseBlockCache()
{
}
void Clear();
void Init();
void Shutdown();
void Reset();
int AllocateBlock(u32 em_address);
void FinalizeBlock(int block_num, bool block_link, const u8 *code_ptr);
bool IsFull() const;
void Clear();
void Init();
void Shutdown();
void Reset();
// Code Cache
JitBlock* GetBlock(int block_num);
int GetNumBlocks() const;
const u8** GetCodePointers();
std::array<u8, JIT_ICACHE_SIZE> iCache;
std::array<u8, JIT_ICACHEEX_SIZE> iCacheEx;
std::array<u8, JIT_ICACHE_SIZE> iCacheVMEM;
bool IsFull() const;
// Fast way to get a block. Only works on the first ppc instruction of a block.
int GetBlockNumberFromStartAddress(u32 em_address);
// Code Cache
JitBlock *GetBlock(int block_num);
int GetNumBlocks() const;
const u8 **GetCodePointers();
std::array<u8, JIT_ICACHE_SIZE> iCache;
std::array<u8, JIT_ICACHEEX_SIZE> iCacheEx;
std::array<u8, JIT_ICACHE_SIZE> iCacheVMEM;
CompiledCode GetCompiledCodeFromBlock(int block_num);
// Fast way to get a block. Only works on the first ppc instruction of a block.
int GetBlockNumberFromStartAddress(u32 em_address);
// DOES NOT WORK CORRECTLY WITH INLINING
void InvalidateICache(u32 address, const u32 length, bool forced);
CompiledCode GetCompiledCodeFromBlock(int block_num);
// DOES NOT WORK CORRECTLY WITH INLINING
void InvalidateICache(u32 address, const u32 length, bool forced);
u32* GetBlockBitSet() const
{
return valid_block.m_valid_block.get();
}
u32* GetBlockBitSet() const { return valid_block.m_valid_block.get(); }
};
// x86 BlockCache
class JitBlockCache : public JitBaseBlockCache
{
private:
void WriteLinkBlock(u8* location, const JitBlock& block) override;
void WriteDestroyBlock(const u8* location, u32 address) override;
void WriteLinkBlock(u8* location, const JitBlock& block) override;
void WriteDestroyBlock(const u8* location, u32 address) override;
};

File diff suppressed because it is too large Load Diff

View File

@ -11,12 +11,15 @@
#include "Common/x64Emitter.h"
#include "Core/PowerPC/PowerPC.h"
namespace MMIO { class Mapping; }
namespace MMIO
{
class Mapping;
}
// We offset by 0x80 because the range of one byte memory offsets is
// -0x80..0x7f.
#define PPCSTATE(x) MDisp(RPPCSTATE, \
(int) ((char *) &PowerPC::ppcState.x - (char *) &PowerPC::ppcState) - 0x80)
#define PPCSTATE(x) \
MDisp(RPPCSTATE, (int)((char*)&PowerPC::ppcState.x - (char*)&PowerPC::ppcState) - 0x80)
// In case you want to disable the ppcstate register:
// #define PPCSTATE(x) M(&PowerPC::ppcState.x)
#define PPCSTATE_LR PPCSTATE(spr[SPR_LR])
@ -29,11 +32,20 @@ namespace MMIO { class Mapping; }
class FarCodeCache : public Gen::X64CodeBlock
{
private:
bool m_enabled = false;
bool m_enabled = false;
public:
bool Enabled() const { return m_enabled; }
void Init(int size) { AllocCodeSpace(size); m_enabled = true; }
void Shutdown() { FreeCodeSpace(); m_enabled = false; }
bool Enabled() const { return m_enabled; }
void Init(int size)
{
AllocCodeSpace(size);
m_enabled = true;
}
void Shutdown()
{
FreeCodeSpace();
m_enabled = false;
}
};
static const int CODE_SIZE = 1024 * 1024 * 32;
@ -51,87 +63,102 @@ static const int TRAMPOLINE_CODE_SIZE_MMU = 1024 * 1024 * 32;
class EmuCodeBlock : public Gen::X64CodeBlock
{
public:
FarCodeCache farcode;
u8* nearcode; // Backed up when we switch to far code.
FarCodeCache farcode;
u8* nearcode; // Backed up when we switch to far code.
void MemoryExceptionCheck();
void MemoryExceptionCheck();
// Simple functions to switch between near and far code emitting
void SwitchToFarCode()
{
nearcode = GetWritableCodePtr();
SetCodePtr(farcode.GetWritableCodePtr());
}
// Simple functions to switch between near and far code emitting
void SwitchToFarCode()
{
nearcode = GetWritableCodePtr();
SetCodePtr(farcode.GetWritableCodePtr());
}
void SwitchToNearCode()
{
farcode.SetCodePtr(GetWritableCodePtr());
SetCodePtr(nearcode);
}
void SwitchToNearCode()
{
farcode.SetCodePtr(GetWritableCodePtr());
SetCodePtr(nearcode);
}
Gen::FixupBranch CheckIfSafeAddress(const Gen::OpArg& reg_value, Gen::X64Reg reg_addr, BitSet32 registers_in_use, u32 mem_mask);
void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false);
void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset, bool signExtend = false);
// these return the address of the MOV, for backpatching
u8 *UnsafeWriteRegToReg(Gen::OpArg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true);
u8 *UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true)
{
return UnsafeWriteRegToReg(R(reg_value), reg_addr, accessSize, offset, swap);
}
u8 *UnsafeLoadToReg(Gen::X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset, bool signExtend);
void UnsafeWriteGatherPipe(int accessSize);
Gen::FixupBranch CheckIfSafeAddress(const Gen::OpArg& reg_value, Gen::X64Reg reg_addr,
BitSet32 registers_in_use, u32 mem_mask);
void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize,
s32 offset = 0, bool signExtend = false);
void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize,
s32 offset, bool signExtend = false);
// these return the address of the MOV, for backpatching
u8* UnsafeWriteRegToReg(Gen::OpArg reg_value, Gen::X64Reg reg_addr, int accessSize,
s32 offset = 0, bool swap = true);
u8* UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize,
s32 offset = 0, bool swap = true)
{
return UnsafeWriteRegToReg(R(reg_value), reg_addr, accessSize, offset, swap);
}
u8* UnsafeLoadToReg(Gen::X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset,
bool signExtend);
void UnsafeWriteGatherPipe(int accessSize);
// Generate a load/write from the MMIO handler for a given address. Only
// call for known addresses in MMIO range (MMIO::IsMMIOAddress).
void MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value, BitSet32 registers_in_use, u32 address, int access_size, bool sign_extend);
// Generate a load/write from the MMIO handler for a given address. Only
// call for known addresses in MMIO range (MMIO::IsMMIOAddress).
void MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value, BitSet32 registers_in_use,
u32 address, int access_size, bool sign_extend);
enum SafeLoadStoreFlags
{
SAFE_LOADSTORE_NO_SWAP = 1,
SAFE_LOADSTORE_NO_PROLOG = 2,
SAFE_LOADSTORE_NO_FASTMEM = 4,
SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR = 8
};
enum SafeLoadStoreFlags
{
SAFE_LOADSTORE_NO_SWAP = 1,
SAFE_LOADSTORE_NO_PROLOG = 2,
SAFE_LOADSTORE_NO_FASTMEM = 4,
SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR = 8
};
void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, BitSet32 registersInUse, bool signExtend, int flags = 0);
// Clobbers RSCRATCH or reg_addr depending on the relevant flag. Preserves
// reg_value if the load fails and js.memcheck is enabled.
// Works with immediate inputs and simple registers only.
void SafeWriteRegToReg(Gen::OpArg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, BitSet32 registersInUse, int flags = 0);
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, BitSet32 registersInUse, int flags = 0)
{
SafeWriteRegToReg(R(reg_value), reg_addr, accessSize, offset, registersInUse, flags);
}
void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize, s32 offset,
BitSet32 registersInUse, bool signExtend, int flags = 0);
// Clobbers RSCRATCH or reg_addr depending on the relevant flag. Preserves
// reg_value if the load fails and js.memcheck is enabled.
// Works with immediate inputs and simple registers only.
void SafeWriteRegToReg(Gen::OpArg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset,
BitSet32 registersInUse, int flags = 0);
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset,
BitSet32 registersInUse, int flags = 0)
{
SafeWriteRegToReg(R(reg_value), reg_addr, accessSize, offset, registersInUse, flags);
}
// applies to safe and unsafe WriteRegToReg
bool WriteClobbersRegValue(int accessSize, bool swap)
{
return swap && !cpu_info.bMOVBE && accessSize > 8;
}
// applies to safe and unsafe WriteRegToReg
bool WriteClobbersRegValue(int accessSize, bool swap)
{
return swap && !cpu_info.bMOVBE && accessSize > 8;
}
void WriteToConstRamAddress(int accessSize, Gen::OpArg arg, u32 address, bool swap = true);
// returns true if an exception could have been caused
bool WriteToConstAddress(int accessSize, Gen::OpArg arg, u32 address, BitSet32 registersInUse);
void JitGetAndClearCAOV(bool oe);
void JitSetCA();
void JitSetCAIf(Gen::CCFlags conditionCode);
void JitClearCA();
void WriteToConstRamAddress(int accessSize, Gen::OpArg arg, u32 address, bool swap = true);
// returns true if an exception could have been caused
bool WriteToConstAddress(int accessSize, Gen::OpArg arg, u32 address, BitSet32 registersInUse);
void JitGetAndClearCAOV(bool oe);
void JitSetCA();
void JitSetCAIf(Gen::CCFlags conditionCode);
void JitClearCA();
void avx_op(void (Gen::XEmitter::*avxOp)(Gen::X64Reg, Gen::X64Reg, const Gen::OpArg&), void (Gen::XEmitter::*sseOp)(Gen::X64Reg, const Gen::OpArg&),
Gen::X64Reg regOp, const Gen::OpArg& arg1, const Gen::OpArg& arg2, bool packed = true, bool reversible = false);
void avx_op(void (Gen::XEmitter::*avxOp)(Gen::X64Reg, Gen::X64Reg, const Gen::OpArg&, u8), void (Gen::XEmitter::*sseOp)(Gen::X64Reg, const Gen::OpArg&, u8),
Gen::X64Reg regOp, const Gen::OpArg& arg1, const Gen::OpArg& arg2, u8 imm);
void avx_op(void (Gen::XEmitter::*avxOp)(Gen::X64Reg, Gen::X64Reg, const Gen::OpArg&),
void (Gen::XEmitter::*sseOp)(Gen::X64Reg, const Gen::OpArg&), Gen::X64Reg regOp,
const Gen::OpArg& arg1, const Gen::OpArg& arg2, bool packed = true,
bool reversible = false);
void avx_op(void (Gen::XEmitter::*avxOp)(Gen::X64Reg, Gen::X64Reg, const Gen::OpArg&, u8),
void (Gen::XEmitter::*sseOp)(Gen::X64Reg, const Gen::OpArg&, u8), Gen::X64Reg regOp,
const Gen::OpArg& arg1, const Gen::OpArg& arg2, u8 imm);
void ForceSinglePrecision(Gen::X64Reg output, const Gen::OpArg& input, bool packed = true, bool duplicate = false);
void Force25BitPrecision(Gen::X64Reg output, const Gen::OpArg& input, Gen::X64Reg tmp);
void ForceSinglePrecision(Gen::X64Reg output, const Gen::OpArg& input, bool packed = true,
bool duplicate = false);
void Force25BitPrecision(Gen::X64Reg output, const Gen::OpArg& input, Gen::X64Reg tmp);
// RSCRATCH might get trashed
void ConvertSingleToDouble(Gen::X64Reg dst, Gen::X64Reg src, bool src_is_gpr = false);
void ConvertDoubleToSingle(Gen::X64Reg dst, Gen::X64Reg src);
void SetFPRF(Gen::X64Reg xmm);
void Clear();
// RSCRATCH might get trashed
void ConvertSingleToDouble(Gen::X64Reg dst, Gen::X64Reg src, bool src_is_gpr = false);
void ConvertDoubleToSingle(Gen::X64Reg dst, Gen::X64Reg src);
void SetFPRF(Gen::X64Reg xmm);
void Clear();
protected:
std::unordered_map<u8 *, BitSet32> registersInUseAtLoc;
std::unordered_map<u8 *, u32> pcAtLoc;
std::unordered_map<u8 *, u8 *> exceptionHandlerAtLoc;
std::unordered_map<u8*, BitSet32> registersInUseAtLoc;
std::unordered_map<u8*, u32> pcAtLoc;
std::unordered_map<u8*, u8*> exceptionHandlerAtLoc;
};

View File

@ -11,173 +11,176 @@
#include "Common/x64ABI.h"
#include "Common/x64Analyzer.h"
#include "Common/x64Emitter.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/JitCommon/Jit_Util.h"
#include "Core/PowerPC/JitCommon/JitBase.h"
#include "Core/PowerPC/JitCommon/Jit_Util.h"
#include "Core/PowerPC/JitCommon/TrampolineCache.h"
#include "Core/PowerPC/PowerPC.h"
#ifdef _WIN32
#include <windows.h>
#include <windows.h>
#endif
using namespace Gen;
void TrampolineCache::Init(int size)
{
AllocCodeSpace(size);
AllocCodeSpace(size);
}
void TrampolineCache::ClearCodeSpace()
{
X64CodeBlock::ClearCodeSpace();
X64CodeBlock::ClearCodeSpace();
}
void TrampolineCache::Shutdown()
{
FreeCodeSpace();
FreeCodeSpace();
}
const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u8* returnPtr)
const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo& info,
BitSet32 registersInUse, u8* exceptionHandler,
u8* returnPtr)
{
if (GetSpaceLeft() < 1024)
PanicAlert("Trampoline cache full");
if (GetSpaceLeft() < 1024)
PanicAlert("Trampoline cache full");
const u8* trampoline = GetCodePtr();
X64Reg addrReg = (X64Reg)info.scaledReg;
X64Reg dataReg = (X64Reg)info.regOperandReg;
int stack_offset = 0;
bool push_param1 = registersInUse[ABI_PARAM1];
const u8* trampoline = GetCodePtr();
X64Reg addrReg = (X64Reg)info.scaledReg;
X64Reg dataReg = (X64Reg)info.regOperandReg;
int stack_offset = 0;
bool push_param1 = registersInUse[ABI_PARAM1];
if (push_param1)
{
PUSH(ABI_PARAM1);
stack_offset = 8;
registersInUse[ABI_PARAM1] = 0;
}
if (push_param1)
{
PUSH(ABI_PARAM1);
stack_offset = 8;
registersInUse[ABI_PARAM1] = 0;
}
int dataRegSize = info.operandSize == 8 ? 64 : 32;
if (addrReg != ABI_PARAM1 && info.displacement)
LEA(32, ABI_PARAM1, MDisp(addrReg, info.displacement));
else if (addrReg != ABI_PARAM1)
MOV(32, R(ABI_PARAM1), R(addrReg));
else if (info.displacement)
ADD(32, R(ABI_PARAM1), Imm32(info.displacement));
int dataRegSize = info.operandSize == 8 ? 64 : 32;
if (addrReg != ABI_PARAM1 && info.displacement)
LEA(32, ABI_PARAM1, MDisp(addrReg, info.displacement));
else if (addrReg != ABI_PARAM1)
MOV(32, R(ABI_PARAM1), R(addrReg));
else if (info.displacement)
ADD(32, R(ABI_PARAM1), Imm32(info.displacement));
ABI_PushRegistersAndAdjustStack(registersInUse, stack_offset);
ABI_PushRegistersAndAdjustStack(registersInUse, stack_offset);
switch (info.operandSize)
{
case 8:
CALL((void*)&PowerPC::Read_U64);
break;
case 4:
CALL((void*)&PowerPC::Read_U32);
break;
case 2:
CALL((void*)&PowerPC::Read_U16);
break;
case 1:
CALL((void*)&PowerPC::Read_U8);
break;
}
switch (info.operandSize)
{
case 8:
CALL((void*)&PowerPC::Read_U64);
break;
case 4:
CALL((void*)&PowerPC::Read_U32);
break;
case 2:
CALL((void*)&PowerPC::Read_U16);
break;
case 1:
CALL((void*)&PowerPC::Read_U8);
break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, stack_offset);
ABI_PopRegistersAndAdjustStack(registersInUse, stack_offset);
if (push_param1)
POP(ABI_PARAM1);
if (push_param1)
POP(ABI_PARAM1);
if (exceptionHandler)
{
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
J_CC(CC_NZ, exceptionHandler);
}
if (exceptionHandler)
{
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
J_CC(CC_NZ, exceptionHandler);
}
if (info.signExtend)
MOVSX(dataRegSize, info.operandSize * 8, dataReg, R(ABI_RETURN));
else if (dataReg != ABI_RETURN || info.operandSize < 4)
MOVZX(dataRegSize, info.operandSize * 8, dataReg, R(ABI_RETURN));
if (info.signExtend)
MOVSX(dataRegSize, info.operandSize * 8, dataReg, R(ABI_RETURN));
else if (dataReg != ABI_RETURN || info.operandSize < 4)
MOVZX(dataRegSize, info.operandSize * 8, dataReg, R(ABI_RETURN));
JMP(returnPtr, true);
JMP(returnPtr, true);
JitRegister::Register(trampoline, GetCodePtr(), "JIT_ReadTrampoline");
return trampoline;
JitRegister::Register(trampoline, GetCodePtr(), "JIT_ReadTrampoline");
return trampoline;
}
const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u8* returnPtr, u32 pc)
const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo& info,
BitSet32 registersInUse, u8* exceptionHandler,
u8* returnPtr, u32 pc)
{
if (GetSpaceLeft() < 1024)
PanicAlert("Trampoline cache full");
if (GetSpaceLeft() < 1024)
PanicAlert("Trampoline cache full");
const u8* trampoline = GetCodePtr();
const u8* trampoline = GetCodePtr();
X64Reg dataReg = (X64Reg)info.regOperandReg;
X64Reg addrReg = (X64Reg)info.scaledReg;
X64Reg dataReg = (X64Reg)info.regOperandReg;
X64Reg addrReg = (X64Reg)info.scaledReg;
// Don't treat FIFO writes specially for now because they require a burst
// check anyway.
// Don't treat FIFO writes specially for now because they require a burst
// check anyway.
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
MOV(32, PPCSTATE(pc), Imm32(pc));
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
MOV(32, PPCSTATE(pc), Imm32(pc));
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
if (info.hasImmediate)
{
if (addrReg != ABI_PARAM2 && info.displacement)
LEA(32, ABI_PARAM2, MDisp(addrReg, info.displacement));
else if (addrReg != ABI_PARAM2)
MOV(32, R(ABI_PARAM2), R(addrReg));
else if (info.displacement)
ADD(32, R(ABI_PARAM2), Imm32(info.displacement));
if (info.hasImmediate)
{
if (addrReg != ABI_PARAM2 && info.displacement)
LEA(32, ABI_PARAM2, MDisp(addrReg, info.displacement));
else if (addrReg != ABI_PARAM2)
MOV(32, R(ABI_PARAM2), R(addrReg));
else if (info.displacement)
ADD(32, R(ABI_PARAM2), Imm32(info.displacement));
// we have to swap back the immediate to pass it to the write functions
switch (info.operandSize)
{
case 8:
PanicAlert("Invalid 64-bit immediate!");
break;
case 4:
MOV(32, R(ABI_PARAM1), Imm32(Common::swap32((u32)info.immediate)));
break;
case 2:
MOV(16, R(ABI_PARAM1), Imm16(Common::swap16((u16)info.immediate)));
break;
case 1:
MOV(8, R(ABI_PARAM1), Imm8((u8)info.immediate));
break;
}
}
else
{
int dataRegSize = info.operandSize == 8 ? 64 : 32;
MOVTwo(dataRegSize, ABI_PARAM2, addrReg, info.displacement, ABI_PARAM1, dataReg);
}
// we have to swap back the immediate to pass it to the write functions
switch (info.operandSize)
{
case 8:
PanicAlert("Invalid 64-bit immediate!");
break;
case 4:
MOV(32, R(ABI_PARAM1), Imm32(Common::swap32((u32)info.immediate)));
break;
case 2:
MOV(16, R(ABI_PARAM1), Imm16(Common::swap16((u16)info.immediate)));
break;
case 1:
MOV(8, R(ABI_PARAM1), Imm8((u8)info.immediate));
break;
}
}
else
{
int dataRegSize = info.operandSize == 8 ? 64 : 32;
MOVTwo(dataRegSize, ABI_PARAM2, addrReg, info.displacement, ABI_PARAM1, dataReg);
}
switch (info.operandSize)
{
case 8:
CALL((void *)&PowerPC::Write_U64);
break;
case 4:
CALL((void *)&PowerPC::Write_U32);
break;
case 2:
CALL((void *)&PowerPC::Write_U16);
break;
case 1:
CALL((void *)&PowerPC::Write_U8);
break;
}
switch (info.operandSize)
{
case 8:
CALL((void*)&PowerPC::Write_U64);
break;
case 4:
CALL((void*)&PowerPC::Write_U32);
break;
case 2:
CALL((void*)&PowerPC::Write_U16);
break;
case 1:
CALL((void*)&PowerPC::Write_U8);
break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
if (exceptionHandler)
{
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
J_CC(CC_NZ, exceptionHandler);
}
JMP(returnPtr, true);
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
if (exceptionHandler)
{
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
J_CC(CC_NZ, exceptionHandler);
}
JMP(returnPtr, true);
JitRegister::Register(trampoline, GetCodePtr(), "JIT_WriteTrampoline_%x", pc);
return trampoline;
JitRegister::Register(trampoline, GetCodePtr(), "JIT_WriteTrampoline_%x", pc);
return trampoline;
}

View File

@ -16,10 +16,12 @@ const int BACKPATCH_SIZE = 5;
class TrampolineCache : public Gen::X64CodeBlock
{
public:
void Init(int size);
void Shutdown();
void Init(int size);
void Shutdown();
const u8* GenerateReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u8* returnPtr);
const u8* GenerateWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u8* returnPtr, u32 pc);
void ClearCodeSpace();
const u8* GenerateReadTrampoline(const InstructionInfo& info, BitSet32 registersInUse,
u8* exceptionHandler, u8* returnPtr);
const u8* GenerateWriteTrampoline(const InstructionInfo& info, BitSet32 registersInUse,
u8* exceptionHandler, u8* returnPtr, u32 pc);
void ClearCodeSpace();
};