GX: add fastpath for single parameter cmds

This commit is contained in:
RSDuck
2020-11-03 21:13:49 +01:00
parent 1649c0e089
commit d2c04c5c51

View File

@ -99,7 +99,7 @@
namespace GPU3D
{
const u32 CmdNumParams[256] =
const u8 CmdNumParams[256] =
{
// 0x00
0,
@ -1798,7 +1798,37 @@ CmdFIFOEntry CmdFIFORead()
return ret;
}
inline void VertexPipelineSubmitCmd()
{
// vertex commands 0x24, 0x25, 0x26, 0x27, 0x28
if (!(VertexSlotsFree & 0x1)) NextVertexSlot();
else AddCycles(1);
NormalPipeline = 0;
}
inline void VertexPipelineCmdDelayed6()
{
// commands 0x20, 0x30, 0x31, 0x72 that can run 6 cycles after a vertex
if (VertexPipeline > 2) AddCycles((VertexPipeline - 2) + 1);
else AddCycles(NormalPipeline + 1);
NormalPipeline = 0;
}
inline void VertexPipelineCmdDelayed8()
{
// commands 0x29, 0x2A, 0x2B, 0x33, 0x34, 0x41, 0x60, 0x71 that can run 8 cycles after a vertex
if (VertexPipeline > 0) AddCycles(VertexPipeline + 1);
else AddCycles(NormalPipeline + 1);
NormalPipeline = 0;
}
inline void VertexPipelineCmdDelayed4()
{
// all other commands can run 4 cycles after a vertex
// no need to do much here since that is the minimum
AddCycles(NormalPipeline + 1);
NormalPipeline = 0;
}
void ExecuteCommand()
{
@ -1809,81 +1839,23 @@ void ExecuteCommand()
// each FIFO entry takes 1 cycle to be processed
// commands (presumably) run when all the needed parameters have been read
// which is where we add the remaining cycles if any
if (ExecParamCount == 0)
u32 paramsRequiredCount = CmdNumParams[entry.Command];
if (paramsRequiredCount <= 1)
{
// delay the first command entry as needed
switch (entry.Command)
{
// commands that stall the polygon pipeline
case 0x32: StallPolygonPipeline(8 + 1, 2); break; // 32 can run 6 cycles after a vertex
case 0x40: StallPolygonPipeline(1, 0); break;
case 0x70: StallPolygonPipeline(10 + 1, 0); break;
// fast path for command which only have a single parameter
case 0x23:
case 0x24:
case 0x25:
case 0x26:
case 0x27:
case 0x28:
// vertex
if (!(VertexSlotsFree & 0x1)) NextVertexSlot();
else AddCycles(1);
NormalPipeline = 0;
break;
case 0x20:
case 0x30:
case 0x31:
case 0x72:
// commands that can run 6 cycles after a vertex
if (VertexPipeline > 2) AddCycles((VertexPipeline - 2) + 1);
else AddCycles(NormalPipeline + 1);
NormalPipeline = 0;
break;
case 0x29:
case 0x2A:
case 0x2B:
case 0x33:
case 0x34:
case 0x41:
case 0x60:
case 0x71:
// command that can run 8 cycles after a vertex
if (VertexPipeline > 0) AddCycles(VertexPipeline + 1);
else AddCycles(NormalPipeline + 1);
NormalPipeline = 0;
break;
default:
// all other commands can run 4 cycles after a vertex
// no need to do much here since that is the minimum
AddCycles(NormalPipeline + 1);
NormalPipeline = 0;
break;
}
}
else
AddCycles(1);
ExecParams[ExecParamCount] = entry.Param;
ExecParamCount++;
if (ExecParamCount >= CmdNumParams[entry.Command])
{
/*printf("[GXS:%08X] 0x%02X, ", GXStat, entry.Command);
for (int k = 0; k < ExecParamCount; k++) printf("0x%08X, ", ExecParams[k]);
printf("\n");*/
ExecParamCount = 0;
/*printf("[GXS:%08X] 0x%02X, 0x%08X", GXStat, entry.Command, entry.Param);*/
switch (entry.Command)
{
case 0x10: // matrix mode
MatrixMode = ExecParams[0] & 0x3;
VertexPipelineCmdDelayed4();
MatrixMode = entry.Param & 0x3;
break;
case 0x11: // push matrix
VertexPipelineCmdDelayed4();
NumPushPopCommands--;
if (MatrixMode == 0)
{
@ -1914,6 +1886,7 @@ void ExecuteCommand()
break;
case 0x12: // pop matrix
VertexPipelineCmdDelayed4();
NumPushPopCommands--;
if (MatrixMode == 0)
{
@ -1936,7 +1909,7 @@ void ExecuteCommand()
}
else
{
s32 offset = (s32)(ExecParams[0] << 26) >> 26;
s32 offset = (s32)(entry.Param << 26) >> 26;
PosMatrixStackPointer -= offset;
PosMatrixStackPointer &= 0x3F;
@ -1950,6 +1923,7 @@ void ExecuteCommand()
break;
case 0x13: // store matrix
VertexPipelineCmdDelayed4();
if (MatrixMode == 0)
{
memcpy(ProjMatrixStack, ProjMatrix, 16*4);
@ -1960,7 +1934,7 @@ void ExecuteCommand()
}
else
{
u32 addr = ExecParams[0] & 0x1F;
u32 addr = entry.Param & 0x1F;
if (addr > 30) GXStat |= (1<<15);
memcpy(PosMatrixStack[addr], PosMatrix, 16*4);
@ -1970,6 +1944,7 @@ void ExecuteCommand()
break;
case 0x14: // restore matrix
VertexPipelineCmdDelayed4();
if (MatrixMode == 0)
{
memcpy(ProjMatrix, ProjMatrixStack, 16*4);
@ -1983,7 +1958,7 @@ void ExecuteCommand()
}
else
{
u32 addr = ExecParams[0] & 0x1F;
u32 addr = entry.Param & 0x1F;
if (addr > 30) GXStat |= (1<<15);
memcpy(PosMatrix, PosMatrixStack[addr], 16*4);
@ -1994,6 +1969,7 @@ void ExecuteCommand()
break;
case 0x15: // identity
VertexPipelineCmdDelayed4();
if (MatrixMode == 0)
{
MatrixLoadIdentity(ProjMatrix);
@ -2012,6 +1988,236 @@ void ExecuteCommand()
}
break;
case 0x20: // vertex color
VertexPipelineCmdDelayed6();
{
u32 c = entry.Param;
u32 r = c & 0x1F;
u32 g = (c >> 5) & 0x1F;
u32 b = (c >> 10) & 0x1F;
VertexColor[0] = r;
VertexColor[1] = g;
VertexColor[2] = b;
}
break;
case 0x21: // normal
VertexPipelineCmdDelayed4();
Normal[0] = (s16)((entry.Param & 0x000003FF) << 6) >> 6;
Normal[1] = (s16)((entry.Param & 0x000FFC00) >> 4) >> 6;
Normal[2] = (s16)((entry.Param & 0x3FF00000) >> 14) >> 6;
CalculateLighting();
break;
case 0x22: // texcoord
VertexPipelineCmdDelayed4();
RawTexCoords[0] = entry.Param & 0xFFFF;
RawTexCoords[1] = entry.Param >> 16;
if ((TexParam >> 30) == 1)
{
TexCoords[0] = (RawTexCoords[0]*TexMatrix[0] + RawTexCoords[1]*TexMatrix[4] + TexMatrix[8] + TexMatrix[12]) >> 12;
TexCoords[1] = (RawTexCoords[0]*TexMatrix[1] + RawTexCoords[1]*TexMatrix[5] + TexMatrix[9] + TexMatrix[13]) >> 12;
}
else
{
TexCoords[0] = RawTexCoords[0];
TexCoords[1] = RawTexCoords[1];
}
break;
case 0x24: // 10-bit vertex
VertexPipelineSubmitCmd();
CurVertex[0] = (entry.Param & 0x000003FF) << 6;
CurVertex[1] = (entry.Param & 0x000FFC00) >> 4;
CurVertex[2] = (entry.Param & 0x3FF00000) >> 14;
SubmitVertex();
break;
case 0x25: // vertex XY
VertexPipelineSubmitCmd();
CurVertex[0] = entry.Param & 0xFFFF;
CurVertex[1] = entry.Param >> 16;
SubmitVertex();
break;
case 0x26: // vertex XZ
VertexPipelineSubmitCmd();
CurVertex[0] = entry.Param & 0xFFFF;
CurVertex[2] = entry.Param >> 16;
SubmitVertex();
break;
case 0x27: // vertex YZ
VertexPipelineSubmitCmd();
CurVertex[1] = entry.Param & 0xFFFF;
CurVertex[2] = entry.Param >> 16;
SubmitVertex();
break;
case 0x28: // 10-bit delta vertex
VertexPipelineSubmitCmd();
CurVertex[0] += (s16)((entry.Param & 0x000003FF) << 6) >> 6;
CurVertex[1] += (s16)((entry.Param & 0x000FFC00) >> 4) >> 6;
CurVertex[2] += (s16)((entry.Param & 0x3FF00000) >> 14) >> 6;
SubmitVertex();
break;
case 0x29: // polygon attributes
VertexPipelineCmdDelayed8();
PolygonAttr = entry.Param;
break;
case 0x2A: // texture param
VertexPipelineCmdDelayed8();
TexParam = entry.Param;
break;
case 0x2B: // texture palette
VertexPipelineCmdDelayed8();
TexPalette = entry.Param & 0x1FFF;
break;
case 0x30: // diffuse/ambient material
VertexPipelineCmdDelayed6();
MatDiffuse[0] = entry.Param & 0x1F;
MatDiffuse[1] = (entry.Param >> 5) & 0x1F;
MatDiffuse[2] = (entry.Param >> 10) & 0x1F;
MatAmbient[0] = (entry.Param >> 16) & 0x1F;
MatAmbient[1] = (entry.Param >> 21) & 0x1F;
MatAmbient[2] = (entry.Param >> 26) & 0x1F;
if (entry.Param & 0x8000)
{
VertexColor[0] = MatDiffuse[0];
VertexColor[1] = MatDiffuse[1];
VertexColor[2] = MatDiffuse[2];
}
AddCycles(3);
break;
case 0x31: // specular/emission material
VertexPipelineCmdDelayed6();
MatSpecular[0] = entry.Param & 0x1F;
MatSpecular[1] = (entry.Param >> 5) & 0x1F;
MatSpecular[2] = (entry.Param >> 10) & 0x1F;
MatEmission[0] = (entry.Param >> 16) & 0x1F;
MatEmission[1] = (entry.Param >> 21) & 0x1F;
MatEmission[2] = (entry.Param >> 26) & 0x1F;
UseShininessTable = (entry.Param & 0x8000) != 0;
AddCycles(3);
break;
case 0x32: // light direction
StallPolygonPipeline(8 + 1, 2); // 0x32 can run 6 cycles after a vertex
{
u32 l = entry.Param >> 30;
s16 dir[3];
dir[0] = (s16)((entry.Param & 0x000003FF) << 6) >> 6;
dir[1] = (s16)((entry.Param & 0x000FFC00) >> 4) >> 6;
dir[2] = (s16)((entry.Param & 0x3FF00000) >> 14) >> 6;
LightDirection[l][0] = (dir[0]*VecMatrix[0] + dir[1]*VecMatrix[4] + dir[2]*VecMatrix[8]) >> 12;
LightDirection[l][1] = (dir[0]*VecMatrix[1] + dir[1]*VecMatrix[5] + dir[2]*VecMatrix[9]) >> 12;
LightDirection[l][2] = (dir[0]*VecMatrix[2] + dir[1]*VecMatrix[6] + dir[2]*VecMatrix[10]) >> 12;
}
AddCycles(5);
break;
case 0x33: // light color
VertexPipelineCmdDelayed8();
{
u32 l = entry.Param >> 30;
LightColor[l][0] = entry.Param & 0x1F;
LightColor[l][1] = (entry.Param >> 5) & 0x1F;
LightColor[l][2] = (entry.Param >> 10) & 0x1F;
}
AddCycles(1);
break;
case 0x40: // begin polygons
StallPolygonPipeline(1, 0);
// TODO: check if there was a polygon being defined but incomplete
// such cases seem to freeze the GPU
PolygonMode = entry.Param & 0x3;
VertexNum = 0;
VertexNumInPoly = 0;
NumConsecutivePolygons = 0;
LastStripPolygon = NULL;
CurPolygonAttr = PolygonAttr;
break;
case 0x41: // end polygons
VertexPipelineCmdDelayed8();
// TODO: research this?
// it doesn't seem to have any effect whatsoever, but
// its timing characteristics are different from those of other
// no-op commands
break;
case 0x50: // flush
VertexPipelineCmdDelayed4();
FlushRequest = 1;
FlushAttributes = entry.Param & 0x3;
CycleCount = 325;
// probably safe to just reset all pipelines
// but needs checked
VertexPipeline = 0;
NormalPipeline = 0;
PolygonPipeline = 0;
VertexSlotCounter = 0;
VertexSlotsFree = 1;
break;
case 0x60: // viewport x1,y1,x2,y2
VertexPipelineCmdDelayed8();
// note: viewport Y coordinates are upside-down
Viewport[0] = entry.Param & 0xFF; // x0
Viewport[1] = (191 - ((entry.Param >> 8) & 0xFF)) & 0xFF; // y0
Viewport[2] = (entry.Param >> 16) & 0xFF; // x1
Viewport[3] = (191 - (entry.Param >> 24)) & 0xFF; // y1
Viewport[4] = (Viewport[2] - Viewport[0] + 1) & 0x1FF; // width
Viewport[5] = (Viewport[1] - Viewport[3] + 1) & 0xFF; // height
break;
default:
VertexPipelineCmdDelayed4();
//printf("!! UNKNOWN GX COMMAND %02X %08X\n", entry.Command, entry.Param);
break;
}
}
else
{
ExecParams[ExecParamCount] = entry.Param;
ExecParamCount++;
if (ExecParamCount == 1)
{
// delay the first command entry as needed
switch (entry.Command)
{
// commands that stall the polygon pipeline
case 0x23: VertexPipelineSubmitCmd(); break;
case 0x34:
case 0x71:
VertexPipelineCmdDelayed8();
break;
case 0x70: StallPolygonPipeline(10 + 1, 0); break;
case 0x72: VertexPipelineCmdDelayed6(); break;
default: VertexPipelineCmdDelayed4(); break;
}
}
else
{
AddCycles(1);
if (ExecParamCount >= paramsRequiredCount)
{
/*printf("[GXS:%08X] 0x%02X, ", GXStat, entry.Command);
for (int k = 0; k < ExecParamCount; k++) printf("0x%08X, ", ExecParams[k]);
printf("\n");*/
ExecParamCount = 0;
switch (entry.Command)
{
case 0x16: // load 4x4
if (MatrixMode == 0)
{
@ -2176,40 +2382,6 @@ void ExecuteCommand()
}
break;
case 0x20: // vertex color
{
u32 c = ExecParams[0];
u32 r = c & 0x1F;
u32 g = (c >> 5) & 0x1F;
u32 b = (c >> 10) & 0x1F;
VertexColor[0] = r;
VertexColor[1] = g;
VertexColor[2] = b;
}
break;
case 0x21: // normal
Normal[0] = (s16)((ExecParams[0] & 0x000003FF) << 6) >> 6;
Normal[1] = (s16)((ExecParams[0] & 0x000FFC00) >> 4) >> 6;
Normal[2] = (s16)((ExecParams[0] & 0x3FF00000) >> 14) >> 6;
CalculateLighting();
break;
case 0x22: // texcoord
RawTexCoords[0] = ExecParams[0] & 0xFFFF;
RawTexCoords[1] = ExecParams[0] >> 16;
if ((TexParam >> 30) == 1)
{
TexCoords[0] = (RawTexCoords[0]*TexMatrix[0] + RawTexCoords[1]*TexMatrix[4] + TexMatrix[8] + TexMatrix[12]) >> 12;
TexCoords[1] = (RawTexCoords[0]*TexMatrix[1] + RawTexCoords[1]*TexMatrix[5] + TexMatrix[9] + TexMatrix[13]) >> 12;
}
else
{
TexCoords[0] = RawTexCoords[0];
TexCoords[1] = RawTexCoords[1];
}
break;
case 0x23: // full vertex
CurVertex[0] = ExecParams[0] & 0xFFFF;
CurVertex[1] = ExecParams[0] >> 16;
@ -2217,101 +2389,6 @@ void ExecuteCommand()
SubmitVertex();
break;
case 0x24: // 10-bit vertex
CurVertex[0] = (ExecParams[0] & 0x000003FF) << 6;
CurVertex[1] = (ExecParams[0] & 0x000FFC00) >> 4;
CurVertex[2] = (ExecParams[0] & 0x3FF00000) >> 14;
SubmitVertex();
break;
case 0x25: // vertex XY
CurVertex[0] = ExecParams[0] & 0xFFFF;
CurVertex[1] = ExecParams[0] >> 16;
SubmitVertex();
break;
case 0x26: // vertex XZ
CurVertex[0] = ExecParams[0] & 0xFFFF;
CurVertex[2] = ExecParams[0] >> 16;
SubmitVertex();
break;
case 0x27: // vertex YZ
CurVertex[1] = ExecParams[0] & 0xFFFF;
CurVertex[2] = ExecParams[0] >> 16;
SubmitVertex();
break;
case 0x28: // 10-bit delta vertex
CurVertex[0] += (s16)((ExecParams[0] & 0x000003FF) << 6) >> 6;
CurVertex[1] += (s16)((ExecParams[0] & 0x000FFC00) >> 4) >> 6;
CurVertex[2] += (s16)((ExecParams[0] & 0x3FF00000) >> 14) >> 6;
SubmitVertex();
break;
case 0x29: // polygon attributes
PolygonAttr = ExecParams[0];
break;
case 0x2A: // texture param
TexParam = ExecParams[0];
break;
case 0x2B: // texture palette
TexPalette = ExecParams[0] & 0x1FFF;
break;
case 0x30: // diffuse/ambient material
MatDiffuse[0] = ExecParams[0] & 0x1F;
MatDiffuse[1] = (ExecParams[0] >> 5) & 0x1F;
MatDiffuse[2] = (ExecParams[0] >> 10) & 0x1F;
MatAmbient[0] = (ExecParams[0] >> 16) & 0x1F;
MatAmbient[1] = (ExecParams[0] >> 21) & 0x1F;
MatAmbient[2] = (ExecParams[0] >> 26) & 0x1F;
if (ExecParams[0] & 0x8000)
{
VertexColor[0] = MatDiffuse[0];
VertexColor[1] = MatDiffuse[1];
VertexColor[2] = MatDiffuse[2];
}
AddCycles(3);
break;
case 0x31: // specular/emission material
MatSpecular[0] = ExecParams[0] & 0x1F;
MatSpecular[1] = (ExecParams[0] >> 5) & 0x1F;
MatSpecular[2] = (ExecParams[0] >> 10) & 0x1F;
MatEmission[0] = (ExecParams[0] >> 16) & 0x1F;
MatEmission[1] = (ExecParams[0] >> 21) & 0x1F;
MatEmission[2] = (ExecParams[0] >> 26) & 0x1F;
UseShininessTable = (ExecParams[0] & 0x8000) != 0;
AddCycles(3);
break;
case 0x32: // light direction
{
u32 l = ExecParams[0] >> 30;
s16 dir[3];
dir[0] = (s16)((ExecParams[0] & 0x000003FF) << 6) >> 6;
dir[1] = (s16)((ExecParams[0] & 0x000FFC00) >> 4) >> 6;
dir[2] = (s16)((ExecParams[0] & 0x3FF00000) >> 14) >> 6;
LightDirection[l][0] = (dir[0]*VecMatrix[0] + dir[1]*VecMatrix[4] + dir[2]*VecMatrix[8]) >> 12;
LightDirection[l][1] = (dir[0]*VecMatrix[1] + dir[1]*VecMatrix[5] + dir[2]*VecMatrix[9]) >> 12;
LightDirection[l][2] = (dir[0]*VecMatrix[2] + dir[1]*VecMatrix[6] + dir[2]*VecMatrix[10]) >> 12;
}
AddCycles(5);
break;
case 0x33: // light color
{
u32 l = ExecParams[0] >> 30;
LightColor[l][0] = ExecParams[0] & 0x1F;
LightColor[l][1] = (ExecParams[0] >> 5) & 0x1F;
LightColor[l][2] = (ExecParams[0] >> 10) & 0x1F;
}
AddCycles(1);
break;
case 0x34: // shininess table
{
for (int i = 0; i < 128; i += 4)
@ -2325,52 +2402,6 @@ void ExecuteCommand()
}
break;
case 0x40: // begin polygons
// TODO: check if there was a polygon being defined but incomplete
// such cases seem to freeze the GPU
PolygonMode = ExecParams[0] & 0x3;
VertexNum = 0;
VertexNumInPoly = 0;
NumConsecutivePolygons = 0;
LastStripPolygon = NULL;
CurPolygonAttr = PolygonAttr;
break;
case 0x41: // end polygons
// TODO: research this?
// it doesn't seem to have any effect whatsoever, but
// its timing characteristics are different from those of other
// no-op commands
break;
case 0x50: // flush
FlushRequest = 1;
FlushAttributes = ExecParams[0] & 0x3;
CycleCount = 325;
// probably safe to just reset all pipelines
// but needs checked
VertexPipeline = 0;
NormalPipeline = 0;
PolygonPipeline = 0;
VertexSlotCounter = 0;
VertexSlotsFree = 1;
break;
case 0x60: // viewport x1,y1,x2,y2
// note: viewport Y coordinates are upside-down
Viewport[0] = ExecParams[0] & 0xFF; // x0
Viewport[1] = (191 - ((ExecParams[0] >> 8) & 0xFF)) & 0xFF; // y0
Viewport[2] = (ExecParams[0] >> 16) & 0xFF; // x1
Viewport[3] = (191 - (ExecParams[0] >> 24)) & 0xFF; // y1
Viewport[4] = (Viewport[2] - Viewport[0] + 1) & 0x1FF; // width
Viewport[5] = (Viewport[1] - Viewport[3] + 1) & 0xFF; // height
break;
case 0x70: // box test
NumTestCommands -= 3;
BoxTest(ExecParams);
break;
case 0x71: // pos test
NumTestCommands -= 2;
CurVertex[0] = ExecParams[0] & 0xFFFF;
@ -2379,14 +2410,20 @@ void ExecuteCommand()
PosTest();
break;
case 0x70: // box test
NumTestCommands -= 3;
BoxTest(ExecParams);
break;
case 0x72: // vec test
NumTestCommands--;
VecTest(ExecParams);
break;
default:
//printf("!! UNKNOWN GX COMMAND %02X %08X\n", entry.Command, entry.Param);
break;
__builtin_unreachable();
}
}
}
}
}