rewrite the tev stages generator to make it more hardware like, thanks to godisgovernment for he documentation. Also make it faster using lockup tables.

thinking going to get a lot of -1 for this one but is the only for of really test it. hope this fix something and don't break to much ;) git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4475 8ced0084-cf51-0410-be5f-012b33b47a6e
2025-07-25 15:19:42 -06:00 · 2009-10-29 03:28:38 +00:00
parent 2a99dfb261
commit 7602f7ab3e
1 changed files with 72 additions and 139 deletions
--- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp
+++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp
@ -356,7 +356,8 @@ static const char *alphaRef[2] =
 static const char *tevCOutputTable[]  = { "prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb" };
 static const char *tevAOutputTable[]  = { "prev.a", "c0.a", "c1.a", "c2.a" };
 static const char *tevIndAlphaSel[]   = {"", "x", "y", "z"};
-static const char *tevIndAlphaScale[] = {"", "*32","*16","*8"};
+//static const char *tevIndAlphaScale[] = {"", "*32","*16","*8"};
 static const char *tevIndAlphaScale[] = {"*(248.0f/255.0f)", "*(224.0f/255.0f)","*(240.0f/255.0f)","*(248.0f/255.0f)"};
 static const char *tevIndBiasField[]  = {"", "x", "y", "xy", "z", "xz", "yz", "xyz"}; // indexed by bias
 static const char *tevIndBiasAdd[]    = {"-128.0f", "1.0f", "1.0f", "1.0f" }; // indexed by fmt
 static const char *tevIndWrapStart[]  = {"0", "256", "128", "64", "32", "16", "0.001" };
@ -556,6 +557,52 @@ const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, bool HLSL
    return text;
 }
 //table with the color compare operations
 static const char *TEVCMPColorOPTable[16] =
 {
 	"float3(0.0f,0.0f,0.0f)",//0
 	"float3(0.0f,0.0f,0.0f)",//1
 	"float3(0.0f,0.0f,0.0f)",//2
 	"float3(0.0f,0.0f,0.0f)",//3
 	"float3(0.0f,0.0f,0.0f)",//4
 	"float3(0.0f,0.0f,0.0f)",//5
 	"float3(0.0f,0.0f,0.0f)",//6
 	"float3(0.0f,0.0f,0.0f)",//7
 	"   %s + ((%s.r > %s.r + (1.0f/510.0f)) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_R8_GT 8
 	"   %s + ((abs(%s.r - %s.r) < (1.0f/255.0f)) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_R8_EQ 9
 	"   %s + (( dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (1.0f/510.0f))) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_GR16_GT 10
 	"   %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (1.0f/255.0f) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_GR16_EQ 11
 	"   %s + (( dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (1.0f/510.0f))) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_BGR24_GT 12
 	"   %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (1.0f/255.0f) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_BGR24_EQ 13
 	"   %s + (max(sign(%s.rgb - %s.rgb - (1.0f/510.0f)),float3(0.0f,0.0f,0.0f)) * %s)",//#define TEVCMP_RGB8_GT  14
 	"   %s + ((float3(1.0f,1.0f,1.0f) - max(sign(abs(%s.rgb - %s.rgb) - (1.0f/255.0f)),float3(0.0f,0.0f,0.0f))) * %s)"//#define TEVCMP_RGB8_EQ  15
 };
 //table with the alpha compare operations
 static const char *TEVCMPAlphaOPTable[16] =
 {
 	"0.0f",//0
 	"0.0f",//1
 	"0.0f",//2
 	"0.0f",//3
 	"0.0f",//4
 	"0.0f",//5
 	"0.0f",//6
 	"0.0f",//7
 	"   %s + ((%s.r >= (%s.r + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_R8_GT 8
 	"   %s + (abs(%s.r - %s.r) < (1.0f/255.0f) ? %s : 0.0f)",//#define TEVCMP_R8_EQ 9
 	"   %s + ((dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_GR16_GT 10
 	"   %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (1.0f/255.0f) ? %s : 0.0f)",//#define TEVCMP_GR16_EQ 11
 	"   %s + ((dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_BGR24_GT 12
 	"   %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (1.0f/255.0f) ? %s : 0.0f)",//#define TEVCMP_BGR24_EQ 13	
 	"   %s + ((%s.a >= (%s.a + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_A8_GT 14
 	"   %s + (abs(%s.a - %s.a) < (1.0f/255.0f) ? %s : 0.0f)"//#define TEVCMP_A8_EQ 15
 };
 static void WriteStage(char *&p, int n, u32 texture_mask, bool HLSL)
 {
    char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap];
@ -575,33 +622,11 @@ static void WriteStage(char *&p, int n, u32 texture_mask, bool HLSL)
        // perform the indirect op on the incoming regular coordinates using indtex%d as the offset coords
 		if (bpmem.tevind[n].bs != ITBA_OFF) 
 		{
            // write the bump alpha
 			if (bpmem.tevind[n].fmt == ITF_8) 
 			WRITE(p, "alphabump = indtex%d.%s %s;\n", 
 			bpmem.tevind[n].bt, 
 			tevIndAlphaSel[bpmem.tevind[n].bs], 
 			tevIndAlphaScale[bpmem.tevind[n].fmt]);			
 			else 
 			{			
 				// donkopunchstania: really bad way to do this
 				// cannot always use fract because fract(1.0) is 0.0 when it needs to be 1.0
 				// omitting fract seems to work as well
 				WRITE(p, "if (indtex%d.%s >= 1.0f )\n", bpmem.tevind[n].bt,	tevIndAlphaSel[bpmem.tevind[n].bs]);
 				WRITE(p, "   alphabump = 1.0f;\n");
 				WRITE(p, "else\n");
 				WRITE(p, "   alphabump = fract ( indtex%d.%s %s );\n", 
 					bpmem.tevind[n].bt,
 					tevIndAlphaSel[bpmem.tevind[n].bs], 
 					tevIndAlphaScale[bpmem.tevind[n].fmt]);
 				/*WRITE(p, "   alphabump = indtex%d.%s %s;\n", 
 					bpmem.tevind[n].bt,
 					tevIndAlphaSel[bpmem.tevind[n].bs], 
 					tevIndAlphaScale[bpmem.tevind[n].fmt]);
 				WRITE(p, "if (alphabump > 1.0f ){ alphabump = fract ( alphabump );if (alphabump == 0.0f ) alphabump = 1.0f;}\n");*/
 		}		
 		}
        // format
        WRITE(p, "float3 indtevcrd%d = indtex%d * %s;\n", n, bpmem.tevind[n].bt, tevIndFmtScale[bpmem.tevind[n].fmt]);
@ -677,7 +702,7 @@ static void WriteStage(char *&p, int n, u32 texture_mask, bool HLSL)
        SampleTexture(p, "textemp", "tevcoord", texswap, texmap, texture_mask, HLSL);
    }
    else
-        WRITE(p, "textemp=float4(1.0,1.0,1.0,1.0);\n");
+        WRITE(p, "textemp=float4(1.0f,1.0f,1.0f,1.0f);\n");
    int kc = bpmem.tevksel[n / 2].getKC(n & 1);
    int ka = bpmem.tevksel[n / 2].getKA(n & 1);
@ -723,51 +748,11 @@ static void WriteStage(char *&p, int n, u32 texture_mask, bool HLSL)
    else 
 	{
        int cmp = (cc.shift<<1)|cc.op|8; // comparemode stored here
-        switch(cmp) 
+		WRITE(p, TEVCMPColorOPTable[cmp],//lockup the function from the op table
 		{
        case TEVCMP_R8_GT:
        case TEVCMP_RGB8_GT: // per component compares
            WRITE(p, "   %s + ((%s.%s > %s.%s) ? %s : float3(0.0f,0.0f,0.0f))",
                tevCInputTable[cc.d], 
 				tevCInputTable2[cc.a], 
 				cmp==TEVCMP_R8_GT?"r":"rgb", 
 				tevCInputTable2[cc.b], 
 				cmp==TEVCMP_R8_GT?"r":"rgb", 
 				tevCInputTable[cc.c]);
            break;
        case TEVCMP_R8_EQ:
        case TEVCMP_RGB8_EQ:
            WRITE(p, "   %s + (abs(%s.r - %s.r)<%f ? %s : float3(0.0f,0.0f,0.0f))",
                tevCInputTable[cc.d], 
 				tevCInputTable2[cc.a],
 				tevCInputTable2[cc.b],
 				epsilon8bit, 
 				tevCInputTable[cc.c]);       
            break;
        case TEVCMP_GR16_GT: // 16 bit compares: 255*g+r (probably used for ztextures, so make sure in ztextures, g is the most significant byte)
        case TEVCMP_BGR24_GT: // 24 bit compares: 255*255*b+255*g+r
            WRITE(p, "   %s + (( dot(%s.rgb-%s.rgb, comp%s) > 0) ? %s : float3(0.0f,0.0f,0.0f))",
                tevCInputTable[cc.d], 
 				tevCInputTable2[cc.a], 
 				tevCInputTable2[cc.b], 
 				cmp==TEVCMP_GR16_GT?"16":"24", 
 				tevCInputTable[cc.c]);
            break;
        case TEVCMP_GR16_EQ:
        case TEVCMP_BGR24_EQ:
            WRITE(p, "   %s + (abs(dot(%s.rgb - %s.rgb, comp%s))<%f ? %s : float3(0.0f,0.0f,0.0f))",
                tevCInputTable[cc.d], 
 				tevCInputTable2[cc.a], 
 				tevCInputTable2[cc.b], 
 				cmp==TEVCMP_GR16_EQ?"16":"24", 
 				epsilon8bit, 
 				tevCInputTable[cc.c]);
            break;
        default:
            WRITE(p, "float3(0.0f,0.0f,0.0f)");
            break;
        }
    }
 	WRITE(p,");\n");
@ -806,51 +791,11 @@ static void WriteStage(char *&p, int n, u32 texture_mask, bool HLSL)
 	{
        //compare alpha combiner goes here
        int cmp = (ac.shift<<1)|ac.op|8; // comparemode stored here
-        switch(cmp) 
+		WRITE(p, TEVCMPAlphaOPTable[cmp],
 		{
        case TEVCMP_R8_GT:
        case TEVCMP_A8_GT:
            WRITE(p, "   %s + ((%s.%s > %s.%s) ? %s : 0)",
                tevAInputTable[ac.d],
 				tevAInputTable2[ac.a], 
 				cmp==TEVCMP_R8_GT?"r":"a", 
 				tevAInputTable2[ac.b], 
 				cmp==TEVCMP_R8_GT?"r":"a", 
 				tevAInputTable[ac.c]);
            break;
        case TEVCMP_R8_EQ:
        case TEVCMP_A8_EQ:
            WRITE(p, "   %s + (abs(%s.r - %s.r)<= %f ? %s : 0)",
                tevAInputTable[ac.d],
 				tevAInputTable2[ac.a],
 				tevAInputTable2[ac.b],
 				epsilon8bit,
 				tevAInputTable[ac.c]);       		
            break;
        case TEVCMP_GR16_GT: // 16 bit compares: 255*g+r (probably used for ztextures, so make sure in ztextures, g is the most significant byte)
        case TEVCMP_BGR24_GT: // 24 bit compares: 255*255*b+255*g+r
            WRITE(p, "   %s + (( dot(%s.rgb-%s.rgb, comp%s) > 0) ? %s : 0)",
                tevAInputTable[ac.d],
 				tevAInputTable2[ac.a], 
 				tevAInputTable2[ac.b], 
 				cmp==TEVCMP_GR16_GT?"16":"24", 
 				tevAInputTable[ac.c]);
            break;
        case TEVCMP_GR16_EQ:
        case TEVCMP_BGR24_EQ:
            WRITE(p, "   %s + (abs(dot(%s.rgb - %s.rgb, comp%s))<=%f ? %s : 0)",
                tevAInputTable[ac.d],
 				tevAInputTable2[ac.a], 
 				tevAInputTable2[ac.b],
 				cmp==TEVCMP_GR16_EQ?"16":"24",
 				epsilon8bit,
 				tevAInputTable[ac.c]);
            break;
        default:
            WRITE(p, "0)");
            break;
        }
    }
    WRITE(p, ");\n\n");
@ -898,27 +843,15 @@ void SampleTexture(char *&p, const char *destination, const char *texcoords, con
 static const char *tevAlphaFuncsTable[] = 
 {
    "(false)",									//ALPHACMP_NEVER 0
-	"(prev.a <= %s - %f)",			//ALPHACMP_LESS 1
+	"(prev.a <= %s - (1.0f/510.0f))",			//ALPHACMP_LESS 1
-	"(abs( prev.a - %s ) < %f)",	//ALPHACMP_EQUAL 2
+	"(abs( prev.a - %s ) < (1.0f/255.0f))",		//ALPHACMP_EQUAL 2
-	"(prev.a < %s + %f)",			//ALPHACMP_LEQUAL 3
+	"(prev.a < %s + (1.0f/510.0f))",			//ALPHACMP_LEQUAL 3
-	"(prev.a >= %s + %f)",			//ALPHACMP_GREATER 4
+	"(prev.a >= %s + (1.0f/510.0f))",			//ALPHACMP_GREATER 4
-	"(abs( prev.a - %s ) >= %f)",	//ALPHACMP_NEQUAL 5
+	"(abs( prev.a - %s ) >= (1.0f/255.0f))",	//ALPHACMP_NEQUAL 5
-	"(prev.a > %s - %f)",			//ALPHACMP_GEQUAL 6
+	"(prev.a > %s - (1.0f/510.0f))",			//ALPHACMP_GEQUAL 6
 	"(true)"									//ALPHACMP_ALWAYS 7
 };
 static const float tevAlphaDeltas[] = 
 {
    0.0f,				//ALPHACMP_NEVER 0
 	epsilon8bit*0.5f,	//ALPHACMP_LESS 1
 	epsilon8bit,		//ALPHACMP_EQUAL 2
 	epsilon8bit*0.5f,	//ALPHACMP_LEQUAL 3
 	epsilon8bit*0.5f,	//ALPHACMP_GREATER 4
 	epsilon8bit,		//ALPHACMP_NEQUAL 5
 	epsilon8bit*0.5f,	//ALPHACMP_GEQUAL 6
 	0.0f				//ALPHACMP_ALWAYS 7
 };
 static const char *tevAlphaFunclogicTable[] =
 {
 	" && ", // and
@ -965,12 +898,12 @@ static bool WriteAlphaTest(char *&p, bool HLSL)
 		WRITE(p, "discard(!( ");
 	int compindex = bpmem.alphaFunc.comp0 % 8;
-	WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[0],tevAlphaDeltas[compindex]);
+	WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[0]);//lockup the first component from the alpha function table
-	WRITE(p, tevAlphaFunclogicTable[bpmem.alphaFunc.logic % 4]);
+	WRITE(p, tevAlphaFunclogicTable[bpmem.alphaFunc.logic % 4]);//lockup the logic op
    compindex = bpmem.alphaFunc.comp1 % 8;
-	WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[1],tevAlphaDeltas[compindex]);    
+	WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[1]);//lockup the second component from the alpha function table    
 	if (HLSL) {
 		// clip works differently than discard - discard takes a bool, clip takes a value that kills the pixel on negative