JIT: flush a register if it won't be used for the rest of the block

This should dramatically reduce code size in the case of blocks with lots of branches, and certainly doesn't hurt elsewhere either. This can probably be improved a good bit through smarter tracking of register usage, e.g. discarding registers that are going to be overwritten, but this is a good start and should help reduce code size and register pressure. Unlike that sort of change, this is a "safe" patch; it only flushes registers, which can't affect correctness, unlike actually discarding data. As part of this, refactor PPCAnalyst to support distinguishing between float and integer registers (to properly handle instructions that access both, like floating-point loads and stores). Also update every instruction in the interpreter flags table I could find that didn't have all the correct flags.
2025-09-24 04:12:48 -06:00 · 2014-09-13 05:34:38 -07:00
parent 7f1185b941
commit f103234e2b
5 changed files with 191 additions and 127 deletions
--- a/Source/Core/Core/PowerPC/PPCAnalyst.cpp
+++ b/Source/Core/Core/PowerPC/PPCAnalyst.cpp
@ -527,6 +527,7 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInf

 	int numOut = 0;
 	int numIn = 0;
+	int numFloatIn = 0;
 	if (opinfo->flags & FL_OUT_A)
 	{
 		code->regsOut[numOut++] = code->inst.RA;
@ -563,14 +564,29 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInf
 		block->m_gpa->SetInputRegister(code->inst.RS, index);
 	}

+	code->fregOut = -1;
+	if (opinfo->flags & FL_OUT_FLOAT_D)
+		code->fregOut = code->inst.FD;
+	else if (opinfo->flags & FL_OUT_FLOAT_S)
+		code->fregOut = code->inst.FS;
+	if (opinfo->flags & FL_IN_FLOAT_A)
+		code->fregsIn[numFloatIn++] = code->inst.FA;
+	if (opinfo->flags & FL_IN_FLOAT_B)
+		code->fregsIn[numFloatIn++] = code->inst.FB;
+	if (opinfo->flags & FL_IN_FLOAT_C)
+		code->fregsIn[numFloatIn++] = code->inst.FC;
+	if (opinfo->flags & FL_IN_FLOAT_D)
+		code->fregsIn[numFloatIn++] = code->inst.FD;
+	if (opinfo->flags & FL_IN_FLOAT_S)
+		code->fregsIn[numFloatIn++] = code->inst.FS;
+
 	// Set remaining register slots as unused (-1)
 	for (int j = numIn; j < 3; j++)
 		code->regsIn[j] = -1;
 	for (int j = numOut; j < 2; j++)
 		code->regsOut[j] = -1;
-	for (int j = 0; j < 3; j++)
+	for (int j = numFloatIn; j < 4; j++)
 		code->fregsIn[j] = -1;
-	code->fregOut = -1;

 	switch (opinfo->type)
 	{
@ -580,7 +596,8 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInf
 	case OPTYPE_LOADFP:
 	case OPTYPE_STOREFP:
 		break;
-	case OPTYPE_FPU:
+	case OPTYPE_SINGLEFP:
+	case OPTYPE_DOUBLEFP:
 		break;
 	case OPTYPE_BRANCH:
 		if (code->inst.hex == 0x4e800020)
@ -788,6 +805,8 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32
 	bool wantsCR1 = true;
 	bool wantsFPRF = true;
 	bool wantsCA = true;
+	u32 fregInUse = 0;
+	u32 regInUse = 0;
 	for (int i = block->m_num_instructions - 1; i >= 0; i--)
 	{
 		bool opWantsCR0  = code[i].wantsCR0;
@ -806,6 +825,24 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32
 		wantsCR1  &= !code[i].outputCR1  || opWantsCR1;
 		wantsFPRF &= !code[i].outputFPRF || opWantsFPRF;
 		wantsCA   &= !code[i].outputCA   || opWantsCA;
+		code[i].gprInUse = regInUse;
+		code[i].fprInUse = fregInUse;
+		// TODO: if there's no possible endblocks or exceptions in between, tell the regcache
+		// we can throw away a register if it's going to be overwritten later.
+		for (int j = 0; j < 3; j++)
+			if (code[i].regsIn[j] >= 0)
+				regInUse |= 1 << code[i].regsIn[j];
+		for (int j = 0; j < 4; j++)
+			if (code[i].fregsIn[j] >= 0)
+				fregInUse |= 1 << code[i].fregsIn[j];
+		// For now, we need to count output registers as "used" though; otherwise the flush
+		// will result in a redundant store (e.g. store to regcache, then store again to
+		// the same location later).
+		for (int j = 0; j < 2; j++)
+			if (code[i].regsOut[j] >= 0)
+				regInUse |= 1 << code[i].regsOut[j];
+		if (code[i].fregOut >= 0)
+			fregInUse |= 1 << code[i].fregOut;
 	}
 	return address;
 }