rewrite the tev stages generator to make it more hardware like, thanks to godisgovernment for he documentation. Also make it faster using lockup tables.

thinking going to get a lot of -1 for this one but is the only for of really test it. hope this fix something and don't break to much ;) git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4475 8ced0084-cf51-0410-be5f-012b33b47a6e
2025-07-25 07:09:48 -06:00 · 2009-10-29 03:28:38 +00:00
parent 2a99dfb261
commit 7602f7ab3e
1 changed files with 72 additions and 139 deletions
--- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp
+++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp
@ -356,7 +356,8 @@ static const char *alphaRef[2] =
 static const char *tevCOutputTable[]  = { "prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb" };
 static const char *tevAOutputTable[]  = { "prev.a", "c0.a", "c1.a", "c2.a" };
 static const char *tevIndAlphaSel[]   = {"", "x", "y", "z"};
-static const char *tevIndAlphaScale[] = {"", "*32","*16","*8"};
+//static const char *tevIndAlphaScale[] = {"", "*32","*16","*8"};
+static const char *tevIndAlphaScale[] = {"*(248.0f/255.0f)", "*(224.0f/255.0f)","*(240.0f/255.0f)","*(248.0f/255.0f)"};
 static const char *tevIndBiasField[]  = {"", "x", "y", "xy", "z", "xz", "yz", "xyz"}; // indexed by bias
 static const char *tevIndBiasAdd[]    = {"-128.0f", "1.0f", "1.0f", "1.0f" }; // indexed by fmt
 static const char *tevIndWrapStart[]  = {"0", "256", "128", "64", "32", "16", "0.001" };
@ -556,6 +557,52 @@ const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, bool HLSL
    return text;
 }

+
+
+//table with the color compare operations
+static const char *TEVCMPColorOPTable[16] =
+{
+	"float3(0.0f,0.0f,0.0f)",//0
+	"float3(0.0f,0.0f,0.0f)",//1
+	"float3(0.0f,0.0f,0.0f)",//2
+	"float3(0.0f,0.0f,0.0f)",//3
+	"float3(0.0f,0.0f,0.0f)",//4
+	"float3(0.0f,0.0f,0.0f)",//5
+	"float3(0.0f,0.0f,0.0f)",//6
+	"float3(0.0f,0.0f,0.0f)",//7
+	"   %s + ((%s.r > %s.r + (1.0f/510.0f)) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_R8_GT 8
+	"   %s + ((abs(%s.r - %s.r) < (1.0f/255.0f)) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_R8_EQ 9
+	"   %s + (( dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (1.0f/510.0f))) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_GR16_GT 10
+	"   %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (1.0f/255.0f) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_GR16_EQ 11
+	"   %s + (( dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (1.0f/510.0f))) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_BGR24_GT 12
+	"   %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (1.0f/255.0f) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_BGR24_EQ 13
+	"   %s + (max(sign(%s.rgb - %s.rgb - (1.0f/510.0f)),float3(0.0f,0.0f,0.0f)) * %s)",//#define TEVCMP_RGB8_GT  14
+	"   %s + ((float3(1.0f,1.0f,1.0f) - max(sign(abs(%s.rgb - %s.rgb) - (1.0f/255.0f)),float3(0.0f,0.0f,0.0f))) * %s)"//#define TEVCMP_RGB8_EQ  15
+};
+
+//table with the alpha compare operations
+static const char *TEVCMPAlphaOPTable[16] =
+{
+	"0.0f",//0
+	"0.0f",//1
+	"0.0f",//2
+	"0.0f",//3
+	"0.0f",//4
+	"0.0f",//5
+	"0.0f",//6
+	"0.0f",//7
+	"   %s + ((%s.r >= (%s.r + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_R8_GT 8
+	"   %s + (abs(%s.r - %s.r) < (1.0f/255.0f) ? %s : 0.0f)",//#define TEVCMP_R8_EQ 9
+	"   %s + ((dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_GR16_GT 10
+	"   %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (1.0f/255.0f) ? %s : 0.0f)",//#define TEVCMP_GR16_EQ 11
+	"   %s + ((dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_BGR24_GT 12
+	"   %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (1.0f/255.0f) ? %s : 0.0f)",//#define TEVCMP_BGR24_EQ 13	
+	"   %s + ((%s.a >= (%s.a + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_A8_GT 14
+	"   %s + (abs(%s.a - %s.a) < (1.0f/255.0f) ? %s : 0.0f)"//#define TEVCMP_A8_EQ 15
+
+};
+
+
 static void WriteStage(char *&p, int n, u32 texture_mask, bool HLSL)
 {
    char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap];
@ -575,33 +622,11 @@ static void WriteStage(char *&p, int n, u32 texture_mask, bool HLSL)
        // perform the indirect op on the incoming regular coordinates using indtex%d as the offset coords
 		if (bpmem.tevind[n].bs != ITBA_OFF) 
 		{
-            // write the bump alpha
-			if (bpmem.tevind[n].fmt == ITF_8) 
-				WRITE(p, "alphabump = indtex%d.%s %s;\n", 
-				bpmem.tevind[n].bt, 
-				tevIndAlphaSel[bpmem.tevind[n].bs], 
-				tevIndAlphaScale[bpmem.tevind[n].fmt]);
-			else 
-			{			
-				// donkopunchstania: really bad way to do this
-				// cannot always use fract because fract(1.0) is 0.0 when it needs to be 1.0
-				// omitting fract seems to work as well
-				WRITE(p, "if (indtex%d.%s >= 1.0f )\n", bpmem.tevind[n].bt,	tevIndAlphaSel[bpmem.tevind[n].bs]);
-				WRITE(p, "   alphabump = 1.0f;\n");
-				WRITE(p, "else\n");
-				WRITE(p, "   alphabump = fract ( indtex%d.%s %s );\n", 
-					bpmem.tevind[n].bt,
-					tevIndAlphaSel[bpmem.tevind[n].bs], 
-					tevIndAlphaScale[bpmem.tevind[n].fmt]);
-				/*WRITE(p, "   alphabump = indtex%d.%s %s;\n", 
-					bpmem.tevind[n].bt,
-					tevIndAlphaSel[bpmem.tevind[n].bs], 
-					tevIndAlphaScale[bpmem.tevind[n].fmt]);
-				WRITE(p, "if (alphabump > 1.0f ){ alphabump = fract ( alphabump );if (alphabump == 0.0f ) alphabump = 1.0f;}\n");*/
-				
-			}
-		}
-
+			WRITE(p, "alphabump = indtex%d.%s %s;\n", 
+			bpmem.tevind[n].bt, 
+			tevIndAlphaSel[bpmem.tevind[n].bs], 
+			tevIndAlphaScale[bpmem.tevind[n].fmt]);			
+		}		
        // format
        WRITE(p, "float3 indtevcrd%d = indtex%d * %s;\n", n, bpmem.tevind[n].bt, tevIndFmtScale[bpmem.tevind[n].fmt]);

@ -677,7 +702,7 @@ static void WriteStage(char *&p, int n, u32 texture_mask, bool HLSL)
        SampleTexture(p, "textemp", "tevcoord", texswap, texmap, texture_mask, HLSL);
    }
    else
-        WRITE(p, "textemp=float4(1.0,1.0,1.0,1.0);\n");
+        WRITE(p, "textemp=float4(1.0f,1.0f,1.0f,1.0f);\n");

    int kc = bpmem.tevksel[n / 2].getKC(n & 1);
    int ka = bpmem.tevksel[n / 2].getKA(n & 1);
@ -723,51 +748,11 @@ static void WriteStage(char *&p, int n, u32 texture_mask, bool HLSL)
    else 
 	{
        int cmp = (cc.shift<<1)|cc.op|8; // comparemode stored here
-        switch(cmp) 
-		{
-        case TEVCMP_R8_GT:
-        case TEVCMP_RGB8_GT: // per component compares
-            WRITE(p, "   %s + ((%s.%s > %s.%s) ? %s : float3(0.0f,0.0f,0.0f))",
+		WRITE(p, TEVCMPColorOPTable[cmp],//lockup the function from the op table
                tevCInputTable[cc.d], 
-				tevCInputTable2[cc.a], 
-				cmp==TEVCMP_R8_GT?"r":"rgb", 
-				tevCInputTable2[cc.b], 
-				cmp==TEVCMP_R8_GT?"r":"rgb", 
-				tevCInputTable[cc.c]);
-            break;
-        case TEVCMP_R8_EQ:
-        case TEVCMP_RGB8_EQ:
-            WRITE(p, "   %s + (abs(%s.r - %s.r)<%f ? %s : float3(0.0f,0.0f,0.0f))",
-                tevCInputTable[cc.d], 
-				tevCInputTable2[cc.a], 
-				tevCInputTable2[cc.b], 
-				epsilon8bit, 
-				tevCInputTable[cc.c]);
-            break;
-        
-        case TEVCMP_GR16_GT: // 16 bit compares: 255*g+r (probably used for ztextures, so make sure in ztextures, g is the most significant byte)
-        case TEVCMP_BGR24_GT: // 24 bit compares: 255*255*b+255*g+r
-            WRITE(p, "   %s + (( dot(%s.rgb-%s.rgb, comp%s) > 0) ? %s : float3(0.0f,0.0f,0.0f))",
-                tevCInputTable[cc.d], 
-				tevCInputTable2[cc.a], 
-				tevCInputTable2[cc.b], 
-				cmp==TEVCMP_GR16_GT?"16":"24", 
-				tevCInputTable[cc.c]);
-            break;
-        case TEVCMP_GR16_EQ:
-        case TEVCMP_BGR24_EQ:
-            WRITE(p, "   %s + (abs(dot(%s.rgb - %s.rgb, comp%s))<%f ? %s : float3(0.0f,0.0f,0.0f))",
-                tevCInputTable[cc.d], 
-				tevCInputTable2[cc.a], 
-				tevCInputTable2[cc.b], 
-				cmp==TEVCMP_GR16_EQ?"16":"24", 
-				epsilon8bit, 
-				tevCInputTable[cc.c]);
-            break;
-        default:
-            WRITE(p, "float3(0.0f,0.0f,0.0f)");
-            break;
-        }
+				tevCInputTable2[cc.a],
+				tevCInputTable2[cc.b],
+				tevCInputTable[cc.c]);       
    }

 	WRITE(p,");\n");
@ -806,51 +791,11 @@ static void WriteStage(char *&p, int n, u32 texture_mask, bool HLSL)
 	{
        //compare alpha combiner goes here
        int cmp = (ac.shift<<1)|ac.op|8; // comparemode stored here
-        switch(cmp) 
-		{
-        case TEVCMP_R8_GT:
-        case TEVCMP_A8_GT:
-            WRITE(p, "   %s + ((%s.%s > %s.%s) ? %s : 0)",
+		WRITE(p, TEVCMPAlphaOPTable[cmp],
                tevAInputTable[ac.d],
-				tevAInputTable2[ac.a], 
-				cmp==TEVCMP_R8_GT?"r":"a", 
-				tevAInputTable2[ac.b], 
-				cmp==TEVCMP_R8_GT?"r":"a", 
-				tevAInputTable[ac.c]);
-            break;
-        case TEVCMP_R8_EQ:
-        case TEVCMP_A8_EQ:
-            WRITE(p, "   %s + (abs(%s.r - %s.r)<= %f ? %s : 0)",
-                tevAInputTable[ac.d],
-				tevAInputTable2[ac.a], 
+				tevAInputTable2[ac.a],
 				tevAInputTable2[ac.b],
-				epsilon8bit,
-				tevAInputTable[ac.c]);
-            break;
-        
-        case TEVCMP_GR16_GT: // 16 bit compares: 255*g+r (probably used for ztextures, so make sure in ztextures, g is the most significant byte)
-        case TEVCMP_BGR24_GT: // 24 bit compares: 255*255*b+255*g+r
-            WRITE(p, "   %s + (( dot(%s.rgb-%s.rgb, comp%s) > 0) ? %s : 0)",
-                tevAInputTable[ac.d],
-				tevAInputTable2[ac.a], 
-				tevAInputTable2[ac.b], 
-				cmp==TEVCMP_GR16_GT?"16":"24", 
-				tevAInputTable[ac.c]);
-            break;
-        case TEVCMP_GR16_EQ:
-        case TEVCMP_BGR24_EQ:
-            WRITE(p, "   %s + (abs(dot(%s.rgb - %s.rgb, comp%s))<=%f ? %s : 0)",
-                tevAInputTable[ac.d],
-				tevAInputTable2[ac.a], 
-				tevAInputTable2[ac.b],
-				cmp==TEVCMP_GR16_EQ?"16":"24",
-				epsilon8bit,
-				tevAInputTable[ac.c]);
-            break;
-        default:
-            WRITE(p, "0)");
-            break;
-        }
+				tevAInputTable[ac.c]);       		
    }

    WRITE(p, ");\n\n");
@ -897,26 +842,14 @@ void SampleTexture(char *&p, const char *destination, const char *texcoords, con

 static const char *tevAlphaFuncsTable[] = 
 {
-    "(false)",						//ALPHACMP_NEVER 0
-	"(prev.a <= %s - %f)",			//ALPHACMP_LESS 1
-	"(abs( prev.a - %s ) < %f)",	//ALPHACMP_EQUAL 2
-	"(prev.a < %s + %f)",			//ALPHACMP_LEQUAL 3
-	"(prev.a >= %s + %f)",			//ALPHACMP_GREATER 4
-	"(abs( prev.a - %s ) >= %f)",	//ALPHACMP_NEQUAL 5
-	"(prev.a > %s - %f)",			//ALPHACMP_GEQUAL 6
-	"(true)"						//ALPHACMP_ALWAYS 7
-};
-
-static const float tevAlphaDeltas[] = 
-{
-    0.0f,				//ALPHACMP_NEVER 0
-	epsilon8bit*0.5f,	//ALPHACMP_LESS 1
-	epsilon8bit,		//ALPHACMP_EQUAL 2
-	epsilon8bit*0.5f,	//ALPHACMP_LEQUAL 3
-	epsilon8bit*0.5f,	//ALPHACMP_GREATER 4
-	epsilon8bit,		//ALPHACMP_NEQUAL 5
-	epsilon8bit*0.5f,	//ALPHACMP_GEQUAL 6
-	0.0f				//ALPHACMP_ALWAYS 7
+    "(false)",									//ALPHACMP_NEVER 0
+	"(prev.a <= %s - (1.0f/510.0f))",			//ALPHACMP_LESS 1
+	"(abs( prev.a - %s ) < (1.0f/255.0f))",		//ALPHACMP_EQUAL 2
+	"(prev.a < %s + (1.0f/510.0f))",			//ALPHACMP_LEQUAL 3
+	"(prev.a >= %s + (1.0f/510.0f))",			//ALPHACMP_GREATER 4
+	"(abs( prev.a - %s ) >= (1.0f/255.0f))",	//ALPHACMP_NEQUAL 5
+	"(prev.a > %s - (1.0f/510.0f))",			//ALPHACMP_GEQUAL 6
+	"(true)"									//ALPHACMP_ALWAYS 7
 };

 static const char *tevAlphaFunclogicTable[] =
@ -965,12 +898,12 @@ static bool WriteAlphaTest(char *&p, bool HLSL)
 		WRITE(p, "discard(!( ");

 	int compindex = bpmem.alphaFunc.comp0 % 8;
-	WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[0],tevAlphaDeltas[compindex]);
+	WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[0]);//lockup the first component from the alpha function table
    
-	WRITE(p, tevAlphaFunclogicTable[bpmem.alphaFunc.logic % 4]);
+	WRITE(p, tevAlphaFunclogicTable[bpmem.alphaFunc.logic % 4]);//lockup the logic op
   
    compindex = bpmem.alphaFunc.comp1 % 8;
-	WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[1],tevAlphaDeltas[compindex]);    
+	WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[1]);//lockup the second component from the alpha function table    

 	if (HLSL) {
 		// clip works differently than discard - discard takes a bool, clip takes a value that kills the pixel on negative