rewrite the tev stages generator to make it more hardware like, thanks to godisgovernment for he documentation. Also make it faster using lockup tables.

thinking going to get a lot of -1 for this one but is the only for of really test it.
hope this fix something and don't break to much ;)

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4475 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
Rodolfo Osvaldo Bogado 2009-10-29 03:28:38 +00:00
parent 2a99dfb261
commit 7602f7ab3e

View File

@ -356,7 +356,8 @@ static const char *alphaRef[2] =
static const char *tevCOutputTable[] = { "prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb" };
static const char *tevAOutputTable[] = { "prev.a", "c0.a", "c1.a", "c2.a" };
static const char *tevIndAlphaSel[] = {"", "x", "y", "z"};
static const char *tevIndAlphaScale[] = {"", "*32","*16","*8"};
//static const char *tevIndAlphaScale[] = {"", "*32","*16","*8"};
static const char *tevIndAlphaScale[] = {"*(248.0f/255.0f)", "*(224.0f/255.0f)","*(240.0f/255.0f)","*(248.0f/255.0f)"};
static const char *tevIndBiasField[] = {"", "x", "y", "xy", "z", "xz", "yz", "xyz"}; // indexed by bias
static const char *tevIndBiasAdd[] = {"-128.0f", "1.0f", "1.0f", "1.0f" }; // indexed by fmt
static const char *tevIndWrapStart[] = {"0", "256", "128", "64", "32", "16", "0.001" };
@ -556,6 +557,52 @@ const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, bool HLSL
return text;
}
//table with the color compare operations
static const char *TEVCMPColorOPTable[16] =
{
"float3(0.0f,0.0f,0.0f)",//0
"float3(0.0f,0.0f,0.0f)",//1
"float3(0.0f,0.0f,0.0f)",//2
"float3(0.0f,0.0f,0.0f)",//3
"float3(0.0f,0.0f,0.0f)",//4
"float3(0.0f,0.0f,0.0f)",//5
"float3(0.0f,0.0f,0.0f)",//6
"float3(0.0f,0.0f,0.0f)",//7
" %s + ((%s.r > %s.r + (1.0f/510.0f)) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_R8_GT 8
" %s + ((abs(%s.r - %s.r) < (1.0f/255.0f)) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_R8_EQ 9
" %s + (( dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (1.0f/510.0f))) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_GR16_GT 10
" %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (1.0f/255.0f) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_GR16_EQ 11
" %s + (( dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (1.0f/510.0f))) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_BGR24_GT 12
" %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (1.0f/255.0f) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_BGR24_EQ 13
" %s + (max(sign(%s.rgb - %s.rgb - (1.0f/510.0f)),float3(0.0f,0.0f,0.0f)) * %s)",//#define TEVCMP_RGB8_GT 14
" %s + ((float3(1.0f,1.0f,1.0f) - max(sign(abs(%s.rgb - %s.rgb) - (1.0f/255.0f)),float3(0.0f,0.0f,0.0f))) * %s)"//#define TEVCMP_RGB8_EQ 15
};
//table with the alpha compare operations
static const char *TEVCMPAlphaOPTable[16] =
{
"0.0f",//0
"0.0f",//1
"0.0f",//2
"0.0f",//3
"0.0f",//4
"0.0f",//5
"0.0f",//6
"0.0f",//7
" %s + ((%s.r >= (%s.r + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_R8_GT 8
" %s + (abs(%s.r - %s.r) < (1.0f/255.0f) ? %s : 0.0f)",//#define TEVCMP_R8_EQ 9
" %s + ((dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_GR16_GT 10
" %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (1.0f/255.0f) ? %s : 0.0f)",//#define TEVCMP_GR16_EQ 11
" %s + ((dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_BGR24_GT 12
" %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (1.0f/255.0f) ? %s : 0.0f)",//#define TEVCMP_BGR24_EQ 13
" %s + ((%s.a >= (%s.a + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_A8_GT 14
" %s + (abs(%s.a - %s.a) < (1.0f/255.0f) ? %s : 0.0f)"//#define TEVCMP_A8_EQ 15
};
static void WriteStage(char *&p, int n, u32 texture_mask, bool HLSL)
{
char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap];
@ -575,33 +622,11 @@ static void WriteStage(char *&p, int n, u32 texture_mask, bool HLSL)
// perform the indirect op on the incoming regular coordinates using indtex%d as the offset coords
if (bpmem.tevind[n].bs != ITBA_OFF)
{
// write the bump alpha
if (bpmem.tevind[n].fmt == ITF_8)
WRITE(p, "alphabump = indtex%d.%s %s;\n",
bpmem.tevind[n].bt,
tevIndAlphaSel[bpmem.tevind[n].bs],
tevIndAlphaScale[bpmem.tevind[n].fmt]);
else
{
// donkopunchstania: really bad way to do this
// cannot always use fract because fract(1.0) is 0.0 when it needs to be 1.0
// omitting fract seems to work as well
WRITE(p, "if (indtex%d.%s >= 1.0f )\n", bpmem.tevind[n].bt, tevIndAlphaSel[bpmem.tevind[n].bs]);
WRITE(p, " alphabump = 1.0f;\n");
WRITE(p, "else\n");
WRITE(p, " alphabump = fract ( indtex%d.%s %s );\n",
bpmem.tevind[n].bt,
tevIndAlphaSel[bpmem.tevind[n].bs],
tevIndAlphaScale[bpmem.tevind[n].fmt]);
/*WRITE(p, " alphabump = indtex%d.%s %s;\n",
bpmem.tevind[n].bt,
tevIndAlphaSel[bpmem.tevind[n].bs],
tevIndAlphaScale[bpmem.tevind[n].fmt]);
WRITE(p, "if (alphabump > 1.0f ){ alphabump = fract ( alphabump );if (alphabump == 0.0f ) alphabump = 1.0f;}\n");*/
}
}
// format
WRITE(p, "float3 indtevcrd%d = indtex%d * %s;\n", n, bpmem.tevind[n].bt, tevIndFmtScale[bpmem.tevind[n].fmt]);
@ -677,7 +702,7 @@ static void WriteStage(char *&p, int n, u32 texture_mask, bool HLSL)
SampleTexture(p, "textemp", "tevcoord", texswap, texmap, texture_mask, HLSL);
}
else
WRITE(p, "textemp=float4(1.0,1.0,1.0,1.0);\n");
WRITE(p, "textemp=float4(1.0f,1.0f,1.0f,1.0f);\n");
int kc = bpmem.tevksel[n / 2].getKC(n & 1);
int ka = bpmem.tevksel[n / 2].getKA(n & 1);
@ -723,51 +748,11 @@ static void WriteStage(char *&p, int n, u32 texture_mask, bool HLSL)
else
{
int cmp = (cc.shift<<1)|cc.op|8; // comparemode stored here
switch(cmp)
{
case TEVCMP_R8_GT:
case TEVCMP_RGB8_GT: // per component compares
WRITE(p, " %s + ((%s.%s > %s.%s) ? %s : float3(0.0f,0.0f,0.0f))",
tevCInputTable[cc.d],
tevCInputTable2[cc.a],
cmp==TEVCMP_R8_GT?"r":"rgb",
tevCInputTable2[cc.b],
cmp==TEVCMP_R8_GT?"r":"rgb",
tevCInputTable[cc.c]);
break;
case TEVCMP_R8_EQ:
case TEVCMP_RGB8_EQ:
WRITE(p, " %s + (abs(%s.r - %s.r)<%f ? %s : float3(0.0f,0.0f,0.0f))",
WRITE(p, TEVCMPColorOPTable[cmp],//lockup the function from the op table
tevCInputTable[cc.d],
tevCInputTable2[cc.a],
tevCInputTable2[cc.b],
epsilon8bit,
tevCInputTable[cc.c]);
break;
case TEVCMP_GR16_GT: // 16 bit compares: 255*g+r (probably used for ztextures, so make sure in ztextures, g is the most significant byte)
case TEVCMP_BGR24_GT: // 24 bit compares: 255*255*b+255*g+r
WRITE(p, " %s + (( dot(%s.rgb-%s.rgb, comp%s) > 0) ? %s : float3(0.0f,0.0f,0.0f))",
tevCInputTable[cc.d],
tevCInputTable2[cc.a],
tevCInputTable2[cc.b],
cmp==TEVCMP_GR16_GT?"16":"24",
tevCInputTable[cc.c]);
break;
case TEVCMP_GR16_EQ:
case TEVCMP_BGR24_EQ:
WRITE(p, " %s + (abs(dot(%s.rgb - %s.rgb, comp%s))<%f ? %s : float3(0.0f,0.0f,0.0f))",
tevCInputTable[cc.d],
tevCInputTable2[cc.a],
tevCInputTable2[cc.b],
cmp==TEVCMP_GR16_EQ?"16":"24",
epsilon8bit,
tevCInputTable[cc.c]);
break;
default:
WRITE(p, "float3(0.0f,0.0f,0.0f)");
break;
}
}
WRITE(p,");\n");
@ -806,51 +791,11 @@ static void WriteStage(char *&p, int n, u32 texture_mask, bool HLSL)
{
//compare alpha combiner goes here
int cmp = (ac.shift<<1)|ac.op|8; // comparemode stored here
switch(cmp)
{
case TEVCMP_R8_GT:
case TEVCMP_A8_GT:
WRITE(p, " %s + ((%s.%s > %s.%s) ? %s : 0)",
tevAInputTable[ac.d],
tevAInputTable2[ac.a],
cmp==TEVCMP_R8_GT?"r":"a",
tevAInputTable2[ac.b],
cmp==TEVCMP_R8_GT?"r":"a",
tevAInputTable[ac.c]);
break;
case TEVCMP_R8_EQ:
case TEVCMP_A8_EQ:
WRITE(p, " %s + (abs(%s.r - %s.r)<= %f ? %s : 0)",
WRITE(p, TEVCMPAlphaOPTable[cmp],
tevAInputTable[ac.d],
tevAInputTable2[ac.a],
tevAInputTable2[ac.b],
epsilon8bit,
tevAInputTable[ac.c]);
break;
case TEVCMP_GR16_GT: // 16 bit compares: 255*g+r (probably used for ztextures, so make sure in ztextures, g is the most significant byte)
case TEVCMP_BGR24_GT: // 24 bit compares: 255*255*b+255*g+r
WRITE(p, " %s + (( dot(%s.rgb-%s.rgb, comp%s) > 0) ? %s : 0)",
tevAInputTable[ac.d],
tevAInputTable2[ac.a],
tevAInputTable2[ac.b],
cmp==TEVCMP_GR16_GT?"16":"24",
tevAInputTable[ac.c]);
break;
case TEVCMP_GR16_EQ:
case TEVCMP_BGR24_EQ:
WRITE(p, " %s + (abs(dot(%s.rgb - %s.rgb, comp%s))<=%f ? %s : 0)",
tevAInputTable[ac.d],
tevAInputTable2[ac.a],
tevAInputTable2[ac.b],
cmp==TEVCMP_GR16_EQ?"16":"24",
epsilon8bit,
tevAInputTable[ac.c]);
break;
default:
WRITE(p, "0)");
break;
}
}
WRITE(p, ");\n\n");
@ -898,27 +843,15 @@ void SampleTexture(char *&p, const char *destination, const char *texcoords, con
static const char *tevAlphaFuncsTable[] =
{
"(false)", //ALPHACMP_NEVER 0
"(prev.a <= %s - %f)", //ALPHACMP_LESS 1
"(abs( prev.a - %s ) < %f)", //ALPHACMP_EQUAL 2
"(prev.a < %s + %f)", //ALPHACMP_LEQUAL 3
"(prev.a >= %s + %f)", //ALPHACMP_GREATER 4
"(abs( prev.a - %s ) >= %f)", //ALPHACMP_NEQUAL 5
"(prev.a > %s - %f)", //ALPHACMP_GEQUAL 6
"(prev.a <= %s - (1.0f/510.0f))", //ALPHACMP_LESS 1
"(abs( prev.a - %s ) < (1.0f/255.0f))", //ALPHACMP_EQUAL 2
"(prev.a < %s + (1.0f/510.0f))", //ALPHACMP_LEQUAL 3
"(prev.a >= %s + (1.0f/510.0f))", //ALPHACMP_GREATER 4
"(abs( prev.a - %s ) >= (1.0f/255.0f))", //ALPHACMP_NEQUAL 5
"(prev.a > %s - (1.0f/510.0f))", //ALPHACMP_GEQUAL 6
"(true)" //ALPHACMP_ALWAYS 7
};
static const float tevAlphaDeltas[] =
{
0.0f, //ALPHACMP_NEVER 0
epsilon8bit*0.5f, //ALPHACMP_LESS 1
epsilon8bit, //ALPHACMP_EQUAL 2
epsilon8bit*0.5f, //ALPHACMP_LEQUAL 3
epsilon8bit*0.5f, //ALPHACMP_GREATER 4
epsilon8bit, //ALPHACMP_NEQUAL 5
epsilon8bit*0.5f, //ALPHACMP_GEQUAL 6
0.0f //ALPHACMP_ALWAYS 7
};
static const char *tevAlphaFunclogicTable[] =
{
" && ", // and
@ -965,12 +898,12 @@ static bool WriteAlphaTest(char *&p, bool HLSL)
WRITE(p, "discard(!( ");
int compindex = bpmem.alphaFunc.comp0 % 8;
WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[0],tevAlphaDeltas[compindex]);
WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[0]);//lockup the first component from the alpha function table
WRITE(p, tevAlphaFunclogicTable[bpmem.alphaFunc.logic % 4]);
WRITE(p, tevAlphaFunclogicTable[bpmem.alphaFunc.logic % 4]);//lockup the logic op
compindex = bpmem.alphaFunc.comp1 % 8;
WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[1],tevAlphaDeltas[compindex]);
WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[1]);//lockup the second component from the alpha function table
if (HLSL) {
// clip works differently than discard - discard takes a bool, clip takes a value that kills the pixel on negative