Revert the recent zcomploc changes including the Graphic_Fixes merge.

Reason:
- It's wrong, zcomploc can't be emulated perfectly in HW backends without severely impacting performance.
- It provides virtually no advantages over the previous hack while introducing lots of code.
- There is a better alternative: If people insist on having some sort of valid zcomploc emulation, I suggest rendering each primitive separately while using a _clean_ dual-pass approach to emulate zcomploc.

This reverts commit 0efd4e5c29.
This reverts commit b4ec836aca.
This reverts commit bb4c9e2205.
This reverts commit 146b02615c.
This commit is contained in:
NeoBrainX
2012-08-10 18:57:37 +02:00
parent 6e02ad55bc
commit 08a9c66037
22 changed files with 211 additions and 358 deletions

View File

@ -26,7 +26,8 @@
#include "BPMemory.h"
#include "VideoConfig.h"
#include "NativeVertexFormat.h"
#include "PixelShaderManager.h"
static int AlphaPreTest();
static void StageHash(int stage, u32* out)
{
@ -101,25 +102,26 @@ static void StageHash(int stage, u32* out)
// a unique identifier, basically containing all the bits. Yup, it's a lot ....
// It would likely be a lot more efficient to build this incrementally as the attributes
// are set...
void GetPixelShaderId(PIXELSHADERUID *uid, PSGRENDER_MODE PSGRenderMode, u32 components)
void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode, u32 components)
{
memset(uid->values, 0, sizeof(uid->values));
uid->values[0] |= bpmem.genMode.numtevstages; // 4
uid->values[0] |= bpmem.genMode.numtexgens << 4; // 4
uid->values[0] |= ((u32)PSGRenderMode) << 8; // 2
bool PixelLightingEnabled = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting;
bool DepthTextureEnable = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.zcomploc && (bpmem.zmode.testenable || bpmem.zmode.updateenable));
bool MustWriteToDepth = DepthTextureEnable || g_ActiveConfig.bEnablePerPixelDepth;
bool ZCompLocEnabled = bpmem.zcontrol.zcomploc && bpmem.zmode.updateenable;
uid->values[0] |= dstAlphaMode << 8; // 2
bool DepthTextureEnable = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.zcomploc && bpmem.zmode.testenable && bpmem.zmode.updateenable) || g_ActiveConfig.bEnablePerPixelDepth;
uid->values[0] |= DepthTextureEnable << 10; // 1
uid->values[0] |= MustWriteToDepth << 11; // 1
uid->values[0] |= PixelLightingEnabled << 12; // 1
if (!PixelLightingEnabled) uid->values[0] |= xfregs.numTexGen.numTexGens << 13; // 4
ALPHA_PRETEST_RESULT alphaPreTest = PixelShaderManager::AlphaPreTest();
uid->values[0] |= ((u32)alphaPreTest) << 17; // 2
uid->values[0] |= ZCompLocEnabled << 18; // 2
if (((alphaPreTest == ALPHAPT_ALWAYSFAIL) || (alphaPreTest == ALPHAPT_ALWAYSPASS && PSGRenderMode == PSGRENDER_DSTALPHA_ALPHA_PASS)) && !DepthTextureEnable)
bool enablePL = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting;
uid->values[0] |= enablePL << 11; // 1
if (!enablePL) uid->values[0] |= xfregs.numTexGen.numTexGens << 12; // 4
u32 alphaPreTest = AlphaPreTest()+1;
uid->values[0] |= alphaPreTest << 16; // 2
if (alphaPreTest == 1 || (alphaPreTest && !DepthTextureEnable && dstAlphaMode == DSTALPHA_ALPHA_PASS))
{
// Courtesy of PreAlphaTest, we're done already ;)
// NOTE: The comment header of generated shaders depends on the value of bpmem.genmode.numindstages.. shouldnt really bother about that though.
@ -129,13 +131,13 @@ void GetPixelShaderId(PIXELSHADERUID *uid, PSGRENDER_MODE PSGRenderMode, u32 com
for (unsigned int i = 0; i < bpmem.genMode.numtexgens; ++i)
{
if (19+i < 32)
uid->values[0] |= xfregs.texMtxInfo[i].projection << (19+i); // 1
if (18+i < 32)
uid->values[0] |= xfregs.texMtxInfo[i].projection << (18+i); // 1
else
uid->values[1] |= xfregs.texMtxInfo[i].projection << (i - 13); // 1
uid->values[1] |= xfregs.texMtxInfo[i].projection << (i - 14); // 1
}
uid->values[1] = bpmem.genMode.numindstages << 3; // 3
uid->values[1] = bpmem.genMode.numindstages << 2; // 3
u32 indirectStagesUsed = 0;
for (unsigned int i = 0; i < bpmem.genMode.numindstages; ++i)
if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages)
@ -143,13 +145,13 @@ void GetPixelShaderId(PIXELSHADERUID *uid, PSGRENDER_MODE PSGRenderMode, u32 com
assert(indirectStagesUsed == (indirectStagesUsed & 0xF));
uid->values[1] |= indirectStagesUsed << 6; // 4;
uid->values[1] |= indirectStagesUsed << 5; // 4;
for (unsigned int i = 0; i < bpmem.genMode.numindstages; ++i)
{
if (indirectStagesUsed & (1 << i))
{
uid->values[1] |= (bpmem.tevindref.getTexCoord(i) < bpmem.genMode.numtexgens) << (10 + 3*i); // 1
uid->values[1] |= (bpmem.tevindref.getTexCoord(i) < bpmem.genMode.numtexgens) << (9 + 3*i); // 1
if (bpmem.tevindref.getTexCoord(i) < bpmem.genMode.numtexgens)
uid->values[1] |= bpmem.tevindref.getTexCoord(i) << (10 + 3*i); // 2
}
@ -178,7 +180,7 @@ void GetPixelShaderId(PIXELSHADERUID *uid, PSGRENDER_MODE PSGRenderMode, u32 com
}
}
if (PSGRenderMode != PSGRENDER_DSTALPHA_ALPHA_PASS)
if (dstAlphaMode != DSTALPHA_ALPHA_PASS)
{
if (bpmem.fog.c_proj_fsel.fsel != 0)
{
@ -188,7 +190,7 @@ void GetPixelShaderId(PIXELSHADERUID *uid, PSGRENDER_MODE PSGRenderMode, u32 com
}
++ptr;
if (PixelLightingEnabled)
if (enablePL)
{
ptr += GetLightingShaderId(ptr);
*ptr++ = components;
@ -197,26 +199,20 @@ void GetPixelShaderId(PIXELSHADERUID *uid, PSGRENDER_MODE PSGRenderMode, u32 com
uid->num_values = ptr - uid->values;
}
void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, PSGRENDER_MODE PSGRenderMode, u32 components)
void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u32 components)
{
memset(uid->values, 0, sizeof(uid->values));
bool PixelLightingEnabled = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting;
bool DepthTextureEnable = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.zcomploc && (bpmem.zmode.testenable || bpmem.zmode.updateenable));
bool MustWriteToDepth = DepthTextureEnable || g_ActiveConfig.bEnablePerPixelDepth;
bool ZCompLocEnabled = bpmem.zcontrol.zcomploc && bpmem.zmode.updateenable;
u32* ptr = uid->values;
*ptr++ = ((u32)PSGRenderMode); // 0
*ptr++ = dstAlphaMode; // 0
*ptr++ = bpmem.genMode.hex; // 1
*ptr++ = bpmem.ztex2.hex; // 2
*ptr++ = bpmem.zcontrol.hex; // 3
*ptr++ = bpmem.zmode.hex; // 4
*ptr++ = MustWriteToDepth; // 5
*ptr++ = PixelLightingEnabled; // 6
*ptr++ = g_ActiveConfig.bEnablePerPixelDepth; // 5
*ptr++ = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; // 6
*ptr++ = xfregs.numTexGen.hex; // 7
*ptr++ = DepthTextureEnable; // 8
*ptr++ = ZCompLocEnabled; // 9
if (PixelLightingEnabled)
if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{
*ptr++ = xfregs.color[0].hex;
*ptr++ = xfregs.alpha[0].hex;
@ -226,43 +222,43 @@ void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, PSGRENDER_MODE PSGRenderMode,
}
for (unsigned int i = 0; i < 8; ++i)
*ptr++ = xfregs.texMtxInfo[i].hex; // 9-17
*ptr++ = xfregs.texMtxInfo[i].hex; // 8-15
for (unsigned int i = 0; i < 16; ++i)
*ptr++ = bpmem.tevind[i].hex; // 18-33
*ptr++ = bpmem.tevind[i].hex; // 16-31
*ptr++ = bpmem.tevindref.hex; // 34
*ptr++ = bpmem.tevindref.hex; // 32
for (int i = 0; i < bpmem.genMode.numtevstages+1; ++i) // up to 16 times
{
*ptr++ = bpmem.combiners[i].colorC.hex; // 35+5*i
*ptr++ = bpmem.combiners[i].alphaC.hex; // 36+5*i
*ptr++ = bpmem.tevind[i].hex; // 37+5*i
*ptr++ = bpmem.tevksel[i/2].hex; // 38+5*i
*ptr++ = bpmem.tevorders[i/2].hex; // 39+5*i
*ptr++ = bpmem.combiners[i].colorC.hex; // 33+5*i
*ptr++ = bpmem.combiners[i].alphaC.hex; // 34+5*i
*ptr++ = bpmem.tevind[i].hex; // 35+5*i
*ptr++ = bpmem.tevksel[i/2].hex; // 36+5*i
*ptr++ = bpmem.tevorders[i/2].hex; // 37+5*i
}
ptr = &uid->values[115];
ptr = &uid->values[113];
*ptr++ = bpmem.alphaFunc.hex; // 116
*ptr++ = bpmem.alphaFunc.hex; // 113
*ptr++ = bpmem.fog.c_proj_fsel.hex; // 117
*ptr++ = bpmem.fogRange.Base.hex; // 118
*ptr++ = bpmem.fog.c_proj_fsel.hex; // 114
*ptr++ = bpmem.fogRange.Base.hex; // 115
_assert_((ptr - uid->values) == uid->GetNumValues());
}
void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std::string& old_code, PSGRENDER_MODE PSGRenderMode, u32 components)
void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std::string& old_code, DSTALPHA_MODE dstAlphaMode, u32 components)
{
if (!g_ActiveConfig.bEnableShaderDebugging)
return;
PIXELSHADERUIDSAFE new_id;
GetSafePixelShaderId(&new_id, PSGRenderMode, components);
GetSafePixelShaderId(&new_id, dstAlphaMode, components);
if (!(old_id == new_id))
{
std::string new_code(GeneratePixelShaderCode(PSGRenderMode, api, components));
std::string new_code(GeneratePixelShaderCode(dstAlphaMode, api, components));
if (old_code != new_code)
{
_assert_(old_id.GetNumValues() == new_id.GetNumValues());
@ -303,7 +299,7 @@ void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std::
static void WriteStage(char *&p, int n, API_TYPE ApiType);
static void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType);
// static void WriteAlphaCompare(char *&p, int num, int comp);
static void WriteAlphaTest(char *&p, API_TYPE ApiType,PSGRENDER_MODE PSGRenderMode);
static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode);
static void WriteFog(char *&p);
static const char *tevKSelTableC[] = // KCSEL
@ -489,10 +485,7 @@ static const char *tevIndFmtScale[] = {"255.0f", "31.0f", "15.0f", "7.0f" };
static char swapModeTable[4][5];
static char text[16384];
static bool PixelLightingEnabled;
static bool DepthTextureEnable;
static bool MustWriteToDepth;
static bool ZCompLocEnabled;
static void BuildSwapModeTable()
{
@ -507,7 +500,7 @@ static void BuildSwapModeTable()
}
}
const char *GeneratePixelShaderCode(PSGRENDER_MODE PSGRenderMode, API_TYPE ApiType, u32 components)
const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components)
{
setlocale(LC_NUMERIC, "C"); // Reset locale for compilation
text[sizeof(text) - 1] = 0x7C; // canary
@ -530,10 +523,7 @@ const char *GeneratePixelShaderCode(PSGRENDER_MODE PSGRenderMode, API_TYPE ApiTy
nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt;
}
}
PixelLightingEnabled = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting;
DepthTextureEnable = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.zcomploc && (bpmem.zmode.testenable || bpmem.zmode.updateenable));
MustWriteToDepth = DepthTextureEnable || g_ActiveConfig.bEnablePerPixelDepth;
ZCompLocEnabled = bpmem.zcontrol.zcomploc && bpmem.zmode.updateenable;
DepthTextureEnable = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.zcomploc && bpmem.zmode.testenable && bpmem.zmode.updateenable) || g_ActiveConfig.bEnablePerPixelDepth ;
// Declare samplers
if(ApiType != API_D3D11)
@ -575,7 +565,7 @@ const char *GeneratePixelShaderCode(PSGRENDER_MODE PSGRenderMode, API_TYPE ApiTy
WRITE(p, "uniform float4 " I_INDTEXMTX"[6] : register(c%d);\n", C_INDTEXMTX);
WRITE(p, "uniform float4 " I_FOG"[3] : register(c%d);\n", C_FOG);
if(PixelLightingEnabled)
if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{
WRITE(p,"typedef struct { float4 col; float4 cosatt; float4 distatt; float4 pos; float4 dir; } Light;\n");
WRITE(p,"typedef struct { Light lights[8]; } s_" I_PLIGHTS";\n");
@ -588,15 +578,15 @@ const char *GeneratePixelShaderCode(PSGRENDER_MODE PSGRenderMode, API_TYPE ApiTy
if(ApiType != API_D3D11)
{
WRITE(p, " out float4 ocol0 : COLOR0,%s%s\n in float4 rawpos : %s,\n",
PSGRenderMode == PSGRENDER_DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : COLOR1," : "",
MustWriteToDepth ? "\n out float depth : DEPTH," : "",
dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : COLOR1," : "",
DepthTextureEnable ? "\n out float depth : DEPTH," : "",
ApiType & API_OPENGL ? "WPOS" : ApiType & API_D3D9_SM20 ? "POSITION" : "VPOS");
}
else
{
WRITE(p, " out float4 ocol0 : SV_Target0,%s%s\n in float4 rawpos : SV_Position,\n",
PSGRenderMode == PSGRENDER_DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : SV_Target1," : "",
MustWriteToDepth ? "\n out float depth : SV_Depth," : "");
dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : SV_Target1," : "",
DepthTextureEnable ? "\n out float depth : SV_Depth," : "");
}
WRITE(p, " in float4 colors_0 : COLOR0,\n");
@ -608,75 +598,51 @@ const char *GeneratePixelShaderCode(PSGRENDER_MODE PSGRenderMode, API_TYPE ApiTy
for (int i = 0; i < numTexgen; ++i)
WRITE(p, ",\n in float3 uv%d : TEXCOORD%d", i, i);
WRITE(p, ",\n in float4 clipPos : TEXCOORD%d", numTexgen);
if(PixelLightingEnabled)
if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
WRITE(p, ",\n in float4 Normal : TEXCOORD%d", numTexgen + 1);
}
else
{
// wpos is in w of first 4 texcoords
if(PixelLightingEnabled)
if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{
for (int i = 0; i < (numTexgen + 1); ++i)
for (int i = 0; i < 8; ++i)
WRITE(p, ",\n in float4 uv%d : TEXCOORD%d", i, i);
}
else
{
for (unsigned int i = 0; i < numTexgen; ++i)
for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i)
WRITE(p, ",\n in float%d uv%d : TEXCOORD%d", i < 4 ? 4 : 3 , i, i);
}
}
WRITE(p, " ) {\n");
if((bpmem.fog.c_proj_fsel.fsel != 0) || MustWriteToDepth)
{
// the screen space depth value = far z + (clip z / clip w) * z range
if (numTexgen < 7)
WRITE(p, "float zCoord = " I_ZBIAS "[1].x + (clipPos.z / clipPos.w) * " I_ZBIAS "[1].y;\n");
else
WRITE(p, "float zCoord = " I_ZBIAS "[1].x + (uv2.w / uv3.w) * " I_ZBIAS "[1].y;\n");
}
char* pmainstart = p;
if(PSGRenderMode == PSGRENDER_ZCOMPLOC && !DepthTextureEnable)
int Pretest = AlphaPreTest();
if(Pretest >= 0 && !DepthTextureEnable)
{
// Within ZCompLoc pass, make this an empty function
WRITE(p, "ocol0 = 0;\n");
// only write to depth if shader Mode requires it
if(MustWriteToDepth)
WRITE(p, "depth = zCoord;\n");
WRITE(p, "}\n");
return text;
}
ALPHA_PRETEST_RESULT Pretest = PixelShaderManager::AlphaPreTest();
// Test if we can predict the alpha test result or if we are in depth only mode
if((Pretest != ALPHAPT_UNDEFINED) && !DepthTextureEnable)
{
if (Pretest == ALPHAPT_ALWAYSFAIL)
if (!Pretest)
{
// alpha test will always fail, just make this an empty function
// alpha test will always fail, so restart the shader and just make it an empty function
WRITE(p, "ocol0 = 0;\n");
// only write to depth if the shader mode requires it
if(MustWriteToDepth)
WRITE(p, "depth = zCoord;\n");
// if we are in dual source blend mode initialize the secondary color o the shader will fail
if(PSGRenderMode == PSGRENDER_DSTALPHA_DUAL_SOURCE_BLEND)
if(DepthTextureEnable)
WRITE(p, "depth = 1.f;\n");
if(dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND)
WRITE(p, "ocol1 = 0;\n");
WRITE(p, "discard;\n");
if(ApiType != API_D3D11)
WRITE(p, "return;\n");
}
else if (PSGRenderMode == PSGRENDER_DSTALPHA_ALPHA_PASS)
else if (dstAlphaMode == DSTALPHA_ALPHA_PASS)
{
//write constant alpha value
WRITE(p, " ocol0 = " I_ALPHA"[0].aaaa;\n");
// only write to depth if the shader mode requires it
if(MustWriteToDepth)
WRITE(p, "depth = zCoord;\n");
}
if((Pretest == ALPHAPT_ALWAYSFAIL) || (PSGRenderMode == PSGRENDER_DSTALPHA_ALPHA_PASS))
if(!Pretest || dstAlphaMode == DSTALPHA_ALPHA_PASS)
{
WRITE(p, "}\n");
return text;
}
}
}
WRITE(p, " float4 c0 = " I_COLORS"[1], c1 = " I_COLORS"[2], c2 = " I_COLORS"[3], prev = float4(0.0f, 0.0f, 0.0f, 0.0f), textemp = float4(0.0f, 0.0f, 0.0f, 0.0f), rastemp = float4(0.0f, 0.0f, 0.0f, 0.0f), konsttemp = float4(0.0f, 0.0f, 0.0f, 0.0f);\n"
" float3 comp16 = float3(1.0f, 255.0f, 0.0f), comp24 = float3(1.0f, 255.0f, 255.0f*255.0f);\n"
@ -687,9 +653,9 @@ const char *GeneratePixelShaderCode(PSGRENDER_MODE PSGRenderMode, API_TYPE ApiTy
" float4 cc2=float4(0.0f,0.0f,0.0f,0.0f), cprev=float4(0.0f,0.0f,0.0f,0.0f);\n"
" float4 crastemp=float4(0.0f,0.0f,0.0f,0.0f),ckonsttemp=float4(0.0f,0.0f,0.0f,0.0f);\n\n");
if(PixelLightingEnabled)
if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{
if (numTexgen < 7)
if (xfregs.numTexGen.numTexGens < 7)
{
WRITE(p,"float3 _norm0 = normalize(Normal.xyz);\n\n");
WRITE(p,"float3 pos = float3(clipPos.x,clipPos.y,Normal.w);\n");
@ -764,10 +730,21 @@ const char *GeneratePixelShaderCode(PSGRENDER_MODE PSGRenderMode, API_TYPE ApiTy
// emulation of unsigned 8 overflow when casting
WRITE(p, "prev = frac(4.0f + prev * (255.0f/256.0f)) * (256.0f/255.0f);\n");
if (MustWriteToDepth)
if(Pretest == -1)
{
WriteAlphaTest(p, ApiType, dstAlphaMode);
}
if((bpmem.fog.c_proj_fsel.fsel != 0) || DepthTextureEnable)
{
// the screen space depth value = far z + (clip z / clip w) * z range
WRITE(p, "float zCoord = " I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * " I_ZBIAS"[1].y;\n");
}
if (DepthTextureEnable)
{
// use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format...
if (DepthTextureEnable)
if (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.zcomploc && bpmem.zmode.testenable && bpmem.zmode.updateenable)
{
if (bpmem.ztex2.op == ZTEXTURE_ADD)
WRITE(p, "zCoord = dot(" I_ZBIAS"[0].xyzw, textemp.xyzw) + " I_ZBIAS"[1].w + zCoord;\n");
@ -780,9 +757,9 @@ const char *GeneratePixelShaderCode(PSGRENDER_MODE PSGRenderMode, API_TYPE ApiTy
WRITE(p, "zCoord = zCoord * (16777216.0f/16777215.0f);\n");
}
WRITE(p, "depth = zCoord;\n");
}
}
if (PSGRenderMode == PSGRENDER_DSTALPHA_ALPHA_PASS)
if (dstAlphaMode == DSTALPHA_ALPHA_PASS)
WRITE(p, " ocol0 = float4(prev.rgb, " I_ALPHA"[0].a);\n");
else
{
@ -792,25 +769,13 @@ const char *GeneratePixelShaderCode(PSGRENDER_MODE PSGRenderMode, API_TYPE ApiTy
// On D3D11, use dual-source color blending to perform dst alpha in a
// single pass
if (PSGRenderMode == PSGRENDER_DSTALPHA_DUAL_SOURCE_BLEND)
if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND)
{
// Colors will be blended against the alpha from ocol1...
WRITE(p, " ocol1 = ocol0;\n");
// ...and the alpha from ocol0 will be written to the framebuffer.
WRITE(p, " ocol0.a = " I_ALPHA"[0].a;\n");
}
// Alpha test could fail here if depth texture is enabled
if (Pretest == ALPHAPT_ALWAYSFAIL)
{
WRITE(p, "discard;\n");
if(ApiType != API_D3D11)
WRITE(p, "return;\n");
}
else if(Pretest == ALPHAPT_UNDEFINED)
{
WriteAlphaTest(p, ApiType, PSGRenderMode);
}
WRITE(p, "}\n");
if (text[sizeof(text) - 1] != 0x7C)
@ -1146,21 +1111,6 @@ static const char *tevAlphaFuncsTable[] =
"(true)" //ALPHACMP_ALWAYS 7
};
// THPS3 does not calculate ZCompLoc correctly if there is a margin
// of error included. This table removes that margin for ALPHACMP_LESS
// and ALPHACMP_GREATER. The other functions are to be confirmed.
static const char *tevAlphaFuncsTableZCompLoc[] =
{
"(false)", //ALPHACMP_NEVER 0
"(prev.a <= %s)", //ALPHACMP_LESS 1
"(abs( prev.a - %s ) < (0.5f/255.0f))", //ALPHACMP_EQUAL 2
"(prev.a < %s + (0.25f/255.0f))", //ALPHACMP_LEQUAL 3
"(prev.a >= %s)", //ALPHACMP_GREATER 4
"(abs( prev.a - %s ) >= (0.5f/255.0f))", //ALPHACMP_NEQUAL 5
"(prev.a > %s - (0.25f/255.0f))", //ALPHACMP_GEQUAL 6
"(true)" //ALPHACMP_ALWAYS 7
};
static const char *tevAlphaFunclogicTable[] =
{
" && ", // and
@ -1168,8 +1118,41 @@ static const char *tevAlphaFunclogicTable[] =
" != ", // xor
" == " // xnor
};
static int AlphaPreTest()
{
u32 op = bpmem.alphaFunc.logic;
u32 comp[2] = {bpmem.alphaFunc.comp0, bpmem.alphaFunc.comp1};
static void WriteAlphaTest(char *&p, API_TYPE ApiType,PSGRENDER_MODE PSGRenderMode)
// First kill all the simple cases
switch(op)
{
case 0: // AND
if (comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) return true;
if (comp[0] == ALPHACMP_NEVER || comp[1] == ALPHACMP_NEVER) return false;
break;
case 1: // OR
if (comp[0] == ALPHACMP_ALWAYS || comp[1] == ALPHACMP_ALWAYS) return true;
if (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER)return false;
break;
case 2: // XOR
if ((comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_NEVER) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_ALWAYS))
return true;
if ((comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER))
return false;
break;
case 3: // XNOR
if ((comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_NEVER) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_ALWAYS))
return false;
if ((comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER))
return true;
break;
default: PanicAlert("bad logic for alpha test? %08x", op);
}
return -1;
}
static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode)
{
static const char *alphaRef[2] =
{
@ -1181,28 +1164,38 @@ static void WriteAlphaTest(char *&p, API_TYPE ApiType,PSGRENDER_MODE PSGRenderMo
WRITE(p, "if(!( ");
int compindex = bpmem.alphaFunc.comp0 % 8;
// Lookup the first component from the alpha function table
WRITE(p, ZCompLocEnabled ? tevAlphaFuncsTableZCompLoc[compindex] : tevAlphaFuncsTable[compindex], alphaRef[0]);
WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[0]);//lookup the first component from the alpha function table
WRITE(p, "%s", tevAlphaFunclogicTable[bpmem.alphaFunc.logic % 4]);//lookup the logic op
compindex = bpmem.alphaFunc.comp1 % 8;
// Lookup the second component from the alpha function table
WRITE(p, ZCompLocEnabled ? tevAlphaFuncsTableZCompLoc[compindex] : tevAlphaFuncsTable[compindex], alphaRef[1]);
WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[1]);//lookup the second component from the alpha function table
WRITE(p, ")) {\n");
// ZCompLoc is a way to control whether depth test is done before
WRITE(p, "ocol0 = 0;\n");
if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND)
WRITE(p, "ocol1 = 0;\n");
if (DepthTextureEnable)
WRITE(p, "depth = 1.f;\n");
// HAXX: zcomploc is a way to control whether depth test is done before
// or after texturing and alpha test. PC GPU does depth test before texturing ONLY if depth value is
// not updated during shader execution.
// 1 - if ZCompLoc is enabled make a first pass, with color channel write disabled updating only
// We implement "depth test before texturing" by discarding the fragment
// when the alpha test fail. This is not a correct implementation because
// even if the depth test fails the fragment could be alpha blended.
// this implemnetation is a trick to keep speed.
// the correct, but slow, way to implement a correct zComploc is :
// 1 - if zcomplock is enebled make a first pass, with color channel write disabled updating only
// depth channel.
// 2 - in the next pass disable depth chanel update, but proccess the color data normally
// this way is the only CORRECT way to emulate perfectly the ZCompLoc behaviour
WRITE(p, "discard;\n");
if (ApiType != API_D3D11)
WRITE(p, "return;\n");
// this way is the only CORRECT way to emulate perfectly the zcomplock behaviour
if (!(bpmem.zcontrol.zcomploc && bpmem.zmode.updateenable))
{
WRITE(p, "discard;\n");
if (ApiType != API_D3D11)
WRITE(p, "return;\n");
}
WRITE(p, "}\n");