diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp index f402f3542d..8db6e94bbf 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp @@ -158,7 +158,7 @@ ps_adds1 */ -static int CODE_SIZE = 1024*1024*16; +static int CODE_SIZE = 1024*1024*32; namespace CPUCompare { diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp index 949fca58c8..aef25e9779 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp @@ -81,36 +81,39 @@ void Jit64::bx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(Branch) + // We must always process the following sentence + // even if the blocks are merged by PPCAnalyst::Flatten(). if (inst.LK) MOV(32, M(&LR), Imm32(js.compilerPC + 4)); + + // If this is not the last instruction of a block, + // we will skip the rest process. + // Because PPCAnalyst::Flatten() merged the blocks. + if (!js.isLastInstruction) { + return; + } + gpr.Flush(FLUSH_ALL); fpr.Flush(FLUSH_ALL); - if (js.isLastInstruction) - { - u32 destination; - if (inst.AA) - destination = SignExt26(inst.LI << 2); - else - destination = js.compilerPC + SignExt26(inst.LI << 2); + u32 destination; + if (inst.AA) + destination = SignExt26(inst.LI << 2); + else + destination = js.compilerPC + SignExt26(inst.LI << 2); #ifdef ACID_TEST - if (inst.LK) - AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xFF000000))); + if (inst.LK) + AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xFF000000))); #endif - if (destination == js.compilerPC) - { - //PanicAlert("Idle loop detected at %08x", destination); + if (destination == js.compilerPC) + { + //PanicAlert("Idle loop detected at %08x", destination); // CALL(ProtectFunction(&CoreTiming::Idle, 0)); // JMP(Asm::testExceptions, true); - // make idle loops go faster - js.downcountAmount += 8; - } - WriteExit(destination, 0); - } - else { - // TODO: investigate the good old method of merging blocks here. - PanicAlert("bx not last instruction of block"); // this should not happen + // make idle loops go faster + js.downcountAmount += 8; } + WriteExit(destination, 0); } // TODO - optimize to hell and beyond diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp index 71c96c3a6a..42b82a532e 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp @@ -152,7 +152,7 @@ ps_adds1 */ -static int CODE_SIZE = 1024*1024*16; +static int CODE_SIZE = 1024*1024*32; namespace CPUCompare { diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Branch.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Branch.cpp index e022489f9c..608d362032 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Branch.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Branch.cpp @@ -59,9 +59,18 @@ void JitIL::bx(UGeckoInstruction inst) NORMALBRANCH_START INSTRUCTION_START; + // We must always process the following sentence + // even if the blocks are merged by PPCAnalyst::Flatten(). if (inst.LK) ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4)); + // If this is not the last instruction of a block, + // we will skip the rest process. + // Because PPCAnalyst::Flatten() merged the blocks. + if (!js.isLastInstruction) { + return; + } + u32 destination; if (inst.AA) destination = SignExt26(inst.LI << 2); diff --git a/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp b/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp index 0f5744b516..55a322a5e0 100644 --- a/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp +++ b/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp @@ -40,10 +40,9 @@ namespace PPCAnalyst { using namespace std; -enum -{ - CODEBUFFER_SIZE = 32000, -}; +static const int CODEBUFFER_SIZE = 32000; +// 0 does not perform block merging +static const int FUNCTION_FOLLOWING_THRESHOLD = 0; CodeBuffer::CodeBuffer(int size) { @@ -446,9 +445,15 @@ u32 Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, Bloc } if (follow) numFollows++; - if (numFollows > 1) + // TODO: Find the optimal value for FUNCTION_FOLLOWING_THRESHOLD. + // If it is small, the performance will be down. + // If it is big, the size of generated code will be big and + // cache clearning will happen many times. + // TODO: Investivate the reason why + // "0" is fastest in some games, MP2 for example. + if (numFollows > FUNCTION_FOLLOWING_THRESHOLD) follow = false; - follow = false; + if (!follow) { if (opinfo->flags & FL_ENDBLOCK) //right now we stop early @@ -460,7 +465,9 @@ u32 Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, Bloc } else { - code[i].skip = true; + // We don't "code[i].skip = true" here + // because bx may store a certain value to the link register. + // Instead, we skip a part of bx in Jit**::bx(). address = destination; } }