From a16c0a6e167821155bd8ed1cf40c809b2eb4a499 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sun, 13 May 2018 21:29:19 -0400 Subject: [PATCH 1/3] Jit64: Clean up code buffer accesses in DoJit() Moves the codebuffer access variables closer to their first use, and gets rid of multiple indexing expressions. We already know which op we're accessing in particular, so just make a reference to it and access it instead of duplicating the expression all over the place. --- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 48 +++++++++++++------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 315a819840..b266962324 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -651,8 +651,6 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc js.numLoadStoreInst = 0; js.numFloatingPointInst = 0; - PPCAnalyst::CodeOp* ops = code_buf->codebuffer; - const u8* start = AlignCode4(); // TODO: Test if this or AlignCode16 make a difference from GetCodePtr b->checkedEntry = start; @@ -740,13 +738,16 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc } // Translate instructions + PPCAnalyst::CodeOp* const ops = code_buf->codebuffer; for (u32 i = 0; i < code_block.m_num_instructions; i++) { - js.compilerPC = ops[i].address; - js.op = &ops[i]; + PPCAnalyst::CodeOp& op = ops[i]; + + js.compilerPC = op.address; + js.op = &op; js.instructionNumber = i; js.instructionsLeft = (code_block.m_num_instructions - 1) - i; - const GekkoOPInfo* opinfo = ops[i].opinfo; + const GekkoOPInfo* opinfo = op.opinfo; js.downcountAmount += opinfo->numCycles; js.fastmemLoadStore = nullptr; js.fixupExceptionHandler = false; @@ -762,8 +763,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc } // Gather pipe writes using a non-immediate address are discovered by profiling. - bool gatherPipeIntCheck = - js.fifoWriteAddresses.find(ops[i].address) != js.fifoWriteAddresses.end(); + bool gatherPipeIntCheck = js.fifoWriteAddresses.find(op.address) != js.fifoWriteAddresses.end(); // Gather pipe writes using an immediate address are explicitly tracked. if (jo.optimizeGatherPipe && (js.fifoBytesSinceCheck >= 32 || js.mustCheckFifo)) @@ -798,7 +798,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc gpr.Flush(RegCache::FlushMode::MaintainState); fpr.Flush(RegCache::FlushMode::MaintainState); - MOV(32, PPCSTATE(pc), Imm32(ops[i].address)); + MOV(32, PPCSTATE(pc), Imm32(op.address)); WriteExternalExceptionExit(); SwitchToNearCode(); @@ -806,7 +806,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc SetJumpTarget(noExtIntEnable); } - u32 function = HLE::GetFirstFunctionIndex(ops[i].address); + u32 function = HLE::GetFirstFunctionIndex(op.address); if (function != 0) { HLE::HookType type = HLE::GetFunctionTypeByIndex(function); @@ -827,7 +827,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc } } - if (!ops[i].skip) + if (!op.skip) { if ((opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound) { @@ -842,7 +842,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc // If a FPU exception occurs, the exception handler will read // from PC. Update PC with the latest value in case that happens. - MOV(32, PPCSTATE(pc), Imm32(ops[i].address)); + MOV(32, PPCSTATE(pc), Imm32(op.address)); OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); WriteExceptionExit(); SwitchToNearCode(); @@ -850,8 +850,8 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc js.firstFPInstructionFound = true; } - if (SConfig::GetInstance().bEnableDebugging && - breakpoints.IsAddressBreakPoint(ops[i].address) && !CPU::IsStepping()) + if (SConfig::GetInstance().bEnableDebugging && breakpoints.IsAddressBreakPoint(op.address) && + !CPU::IsStepping()) { // Turn off block linking if there are breakpoints so that the Step Over command does not // link this block. @@ -860,7 +860,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc gpr.Flush(); fpr.Flush(); - MOV(32, PPCSTATE(pc), Imm32(ops[i].address)); + MOV(32, PPCSTATE(pc), Imm32(op.address)); ABI_PushRegistersAndAdjustStack({}, 0); ABI_CallFunction(PowerPC::CheckBreakPoints); ABI_PopRegistersAndAdjustStack({}, 0); @@ -868,7 +868,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc TEST(32, MatR(RSCRATCH), Imm32(0xFFFFFFFF)); FixupBranch noBreakpoint = J_CC(CC_Z); - WriteExit(ops[i].address); + WriteExit(op.address); SetJumpTarget(noBreakpoint); } @@ -879,22 +879,22 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc // output, which needs to be bound in the actual instruction compilation. // TODO: make this smarter in the case that we're actually register-starved, i.e. // prioritize the more important registers. - for (int reg : ops[i].regsIn) + for (int reg : op.regsIn) { if (gpr.NumFreeRegisters() < 2) break; - if (ops[i].gprInReg[reg] && !gpr.R(reg).IsImm()) + if (op.gprInReg[reg] && !gpr.R(reg).IsImm()) gpr.BindToRegister(reg, true, false); } - for (int reg : ops[i].fregsIn) + for (int reg : op.fregsIn) { if (fpr.NumFreeRegisters() < 2) break; - if (ops[i].fprInXmm[reg]) + if (op.fprInXmm[reg]) fpr.BindToRegister(reg, true, false); } - CompileInstruction(ops[i]); + CompileInstruction(op); if (jo.memcheck && (opinfo->flags & FL_LOADSTORE)) { @@ -903,7 +903,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc FixupBranch memException; ASSERT_MSG(DYNA_REC, !(js.fastmemLoadStore && js.fixupExceptionHandler), "Fastmem loadstores shouldn't have exception handler fixups (PC=%x)!", - ops[i].address); + op.address); if (!js.fastmemLoadStore && !js.fixupExceptionHandler) { TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI)); @@ -934,9 +934,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc } // If we have a register that will never be used again, flush it. - for (int j : ~ops[i].gprInUse) + for (int j : ~op.gprInUse) gpr.StoreFromRegister(j); - for (int j : ~ops[i].fprInUse) + for (int j : ~op.fprInUse) fpr.StoreFromRegister(j); if (opinfo->flags & FL_LOADSTORE) @@ -949,7 +949,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc #if defined(_DEBUG) || defined(DEBUGFAST) if (gpr.SanityCheck() || fpr.SanityCheck()) { - std::string ppc_inst = GekkoDisassembler::Disassemble(ops[i].inst.hex, em_address); + std::string ppc_inst = GekkoDisassembler::Disassemble(op.inst.hex, em_address); // NOTICE_LOG(DYNA_REC, "Unflushed register: %s", ppc_inst.c_str()); } #endif From f4affa3d1d4d944af5f31a26588818a16617a463 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sun, 13 May 2018 21:38:50 -0400 Subject: [PATCH 2/3] CachedInterpreter: Clean up code buffer accesses in Jit() Done similarly for the reasons Jit64 was done. Localizes indexing to one place instead of repeatedly indexing the same place. --- .../CachedInterpreter/CachedInterpreter.cpp | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp b/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp index 3d489ca9ca..d6d85f727d 100644 --- a/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp +++ b/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp @@ -198,16 +198,17 @@ void CachedInterpreter::Jit(u32 address) js.downcountAmount = 0; js.curBlock = b; - PPCAnalyst::CodeOp* ops = code_buffer.codebuffer; - b->checkedEntry = GetCodePtr(); b->normalEntry = GetCodePtr(); + PPCAnalyst::CodeOp* const ops = code_buffer.codebuffer; for (u32 i = 0; i < code_block.m_num_instructions; i++) { - js.downcountAmount += ops[i].opinfo->numCycles; + PPCAnalyst::CodeOp& op = ops[i]; - u32 function = HLE::GetFirstFunctionIndex(ops[i].address); + js.downcountAmount += op.opinfo->numCycles; + + u32 function = HLE::GetFirstFunctionIndex(op.address); if (function != 0) { HLE::HookType type = HLE::GetFunctionTypeByIndex(function); @@ -216,7 +217,7 @@ void CachedInterpreter::Jit(u32 address) HLE::HookFlag flags = HLE::GetFunctionFlagsByIndex(function); if (HLE::IsEnabled(flags)) { - m_code.emplace_back(WritePC, ops[i].address); + m_code.emplace_back(WritePC, op.address); m_code.emplace_back(Interpreter::HLEFunction, function); if (type == HLE::HookType::Replace) { @@ -228,22 +229,22 @@ void CachedInterpreter::Jit(u32 address) } } - if (!ops[i].skip) + if (!op.skip) { - bool check_fpu = (ops[i].opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound; - bool endblock = (ops[i].opinfo->flags & FL_ENDBLOCK) != 0; - bool memcheck = (ops[i].opinfo->flags & FL_LOADSTORE) && jo.memcheck; + const bool check_fpu = (op.opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound; + const bool endblock = (op.opinfo->flags & FL_ENDBLOCK) != 0; + const bool memcheck = (op.opinfo->flags & FL_LOADSTORE) && jo.memcheck; if (check_fpu) { - m_code.emplace_back(WritePC, ops[i].address); + m_code.emplace_back(WritePC, op.address); m_code.emplace_back(CheckFPU, js.downcountAmount); js.firstFPInstructionFound = true; } if (endblock || memcheck) - m_code.emplace_back(WritePC, ops[i].address); - m_code.emplace_back(PPCTables::GetInterpreterOp(ops[i].inst), ops[i].inst); + m_code.emplace_back(WritePC, op.address); + m_code.emplace_back(PPCTables::GetInterpreterOp(op.inst), op.inst); if (memcheck) m_code.emplace_back(CheckDSI, js.downcountAmount); if (endblock) From 7437f2efdc2e98d237d3b08658041a9529e89749 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sun, 13 May 2018 21:42:36 -0400 Subject: [PATCH 3/3] JitArm64: Clean up code buffer accesses in DoJit() Done for the same reason this was done for Jit64. Avoids constantly indexing for the same known object instance over and over. --- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 3d6c8c1cf9..2848c5aaf0 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -601,8 +601,6 @@ void JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBlock* js.curBlock = b; js.carryFlagSet = false; - PPCAnalyst::CodeOp* ops = code_buf->codebuffer; - const u8* start = GetCodePtr(); b->checkedEntry = start; @@ -651,13 +649,16 @@ void JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBlock* fpr.Start(js.fpa); // Translate instructions + PPCAnalyst::CodeOp* const ops = code_buf->codebuffer; for (u32 i = 0; i < code_block.m_num_instructions; i++) { - js.compilerPC = ops[i].address; - js.op = &ops[i]; + PPCAnalyst::CodeOp& op = ops[i]; + + js.compilerPC = op.address; + js.op = &op; js.instructionNumber = i; js.instructionsLeft = (code_block.m_num_instructions - 1) - i; - const GekkoOPInfo* opinfo = ops[i].opinfo; + const GekkoOPInfo* opinfo = op.opinfo; js.downcountAmount += opinfo->numCycles; js.isLastInstruction = i == (code_block.m_num_instructions - 1); @@ -665,8 +666,7 @@ void JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBlock* js.downcountAmount += PatchEngine::GetSpeedhackCycles(js.compilerPC); // Gather pipe writes using a non-immediate address are discovered by profiling. - bool gatherPipeIntCheck = - js.fifoWriteAddresses.find(ops[i].address) != js.fifoWriteAddresses.end(); + bool gatherPipeIntCheck = js.fifoWriteAddresses.find(op.address) != js.fifoWriteAddresses.end(); if (jo.optimizeGatherPipe && (js.fifoBytesSinceCheck >= 32 || js.mustCheckFifo)) { @@ -740,7 +740,7 @@ void JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBlock* SetJumpTarget(exit); } - if (!ops[i].skip) + if (!op.skip) { if ((opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound) { @@ -771,13 +771,13 @@ void JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBlock* js.firstFPInstructionFound = true; } - CompileInstruction(ops[i]); + CompileInstruction(op); if (!CanMergeNextInstructions(1) || js.op[1].opinfo->type != ::OpType::Integer) FlushCarry(); // If we have a register that will never be used again, flush it. - gpr.StoreRegisters(~ops[i].gprInUse); - fpr.StoreRegisters(~ops[i].fprInUse); + gpr.StoreRegisters(~op.gprInUse); + fpr.StoreRegisters(~op.fprInUse); } i += js.skipInstructions;