big changes here:

- Eliminate the useless check for cpu modifications option from efb to ram as it must be enabled always
- use constant names in dx11 for buffer length calculation instead to simplify code reading
- implemented scaled efb copy in opengl, still bugy in some games, the option is not in the gui but will add it when it works perfect
- Change the depth calculation behavior:
if the game use z textures is exactly the same as before.
if the game do not use z texture calculate z values in the vertex shader. the advantage id this approach is that early z culling is applied,
improving fill rate. this mus speed up things, even with ssaa and msaa enabled.
please test for regression and enjoy.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5896 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
Rodolfo Osvaldo Bogado
2010-07-18 00:18:31 +00:00
parent 4b1a3152b6
commit f78133f261
23 changed files with 128 additions and 149 deletions

View File

@ -342,6 +342,7 @@ static const char *swapColors = "rgba";
static char swapModeTable[4][5];
static char text[16384];
static bool DepthTextureEnable;
struct RegisterState
{
@ -388,7 +389,7 @@ const char *GeneratePixelShaderCode(u32 texture_mask, bool dstAlphaEnable, API_T
nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt;
}
}
DepthTextureEnable = bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.zcomploc && bpmem.zmode.testenable && bpmem.zmode.updateenable;
// Declare samplers
if (texture_mask && ApiType == API_OPENGL)
{
@ -454,10 +455,10 @@ const char *GeneratePixelShaderCode(u32 texture_mask, bool dstAlphaEnable, API_T
WRITE(p, "void main(\n");
if(ApiType != API_D3D11)
WRITE(p, " out float4 ocol0 : COLOR0,\n out float depth : DEPTH,\n in float4 rawpos : POSITION,\n");
WRITE(p, " out float4 ocol0 : COLOR0,%s\n in float4 rawpos : POSITION,\n",DepthTextureEnable ? "\n out float depth : DEPTH," : "");
else
WRITE(p, " out float4 ocol0 : SV_Target,\n out float depth : SV_Depth,\n in float4 rawpos : SV_Position,\n");
WRITE(p, " out float4 ocol0 : SV_Target,%s\n in float4 rawpos : SV_Position,\n",DepthTextureEnable ? "\n out float depth : SV_Depth," : "");
WRITE(p, " in float4 colors_0 : COLOR0,\n");
WRITE(p, " in float4 colors_1 : COLOR1");
@ -477,7 +478,8 @@ const char *GeneratePixelShaderCode(u32 texture_mask, bool dstAlphaEnable, API_T
}
WRITE(p, " ) {\n");
char* pmainstart = p;
char* pmainstart = p;
WRITE(p, " float4 c0 = "I_COLORS"[1], c1 = "I_COLORS"[2], c2 = "I_COLORS"[3], prev = float4(0.0f, 0.0f, 0.0f, 0.0f), textemp = float4(0.0f, 0.0f, 0.0f, 0.0f), rastemp = float4(0.0f, 0.0f, 0.0f, 0.0f), konsttemp = float4(0.0f, 0.0f, 0.0f, 0.0f);\n"
" float3 comp16 = float3(1.0f, 255.0f, 0.0f), comp24 = float3(1.0f, 255.0f, 255.0f*255.0f);\n"
@ -561,20 +563,23 @@ const char *GeneratePixelShaderCode(u32 texture_mask, bool dstAlphaEnable, API_T
// alpha test will always fail, so restart the shader and just make it an empty function
p = pmainstart;
WRITE(p, "ocol0 = 0;\n");
WRITE(p, "depth = 1.f;\n");
if(DepthTextureEnable)
WRITE(p, "depth = 1.f;\n");
WRITE(p, "discard;\n");
if(ApiType != API_D3D11)
WRITE(p, "return;\n");
}
else
{
if (numTexgen >= 7)
WRITE(p, "float4 clipPos = float4(uv0.w, uv1.w, uv2.w, uv3.w);\n");
if((bpmem.fog.c_proj_fsel.fsel != 0) || DepthTextureEnable)
{
if (numTexgen >= 7)
WRITE(p, "float4 clipPos = float4(uv0.w, uv1.w, uv2.w, uv3.w);\n");
// the screen space depth value = far z + (clip z / clip w) * z range
WRITE(p, "float zCoord = "I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * "I_ZBIAS"[1].y;\n");
}
// the screen space depth value = far z + (clip z / clip w) * z range
WRITE(p, "float zCoord = "I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * "I_ZBIAS"[1].y;\n");
if (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.zcomploc && bpmem.zmode.testenable && bpmem.zmode.updateenable)
if (DepthTextureEnable)
{
// use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format...
if (bpmem.ztex2.op == ZTEXTURE_ADD)
@ -586,8 +591,8 @@ const char *GeneratePixelShaderCode(u32 texture_mask, bool dstAlphaEnable, API_T
WRITE(p, "zCoord = zCoord * (16777215.0f/16777216.0f);\n");
WRITE(p, "zCoord = frac(zCoord);\n");
WRITE(p, "zCoord = zCoord * (16777216.0f/16777215.0f);\n");
WRITE(p, "depth = zCoord;\n");
}
WRITE(p, "depth = zCoord;\n");
if (dstAlphaEnable)
WRITE(p, " ocol0 = float4(prev.rgb, "I_ALPHA"[0].a);\n");
@ -954,7 +959,7 @@ static void WriteStage(char *&p, int n, u32 texture_mask, API_TYPE ApiType)
void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, u32 texture_mask, API_TYPE ApiType)
{
if (texture_mask & (1<<texmap)) {
if (texture_mask & (1<<texmap)) {// opengl only
// non pow 2
bool bwraps = (texture_mask & (0x100<<texmap)) ? true : false;
bool bwrapt = (texture_mask & (0x10000<<texmap)) ? true : false;
@ -973,21 +978,11 @@ void SampleTexture(char *&p, const char *destination, const char *texcoords, con
else {
WRITE(p, "tempcoord.y = %s.y;\n", texcoords);
}
if (ApiType == API_D3D11)
WRITE(p, "%s= Tex%d.Sample(samp%d,tempcoord.xy).%s;\n", destination, texmap,texmap, texswap);
else if (ApiType == API_D3D9)
WRITE(p, "%s=tex2D(samp%d,tempcoord.xy).%s;\n", destination, texmap, texswap);
else
WRITE(p, "%s=texRECT(samp%d,tempcoord.xy).%s;\n", destination, texmap, texswap);
WRITE(p, "%s=texRECT(samp%d,tempcoord.xy).%s;\n", destination, texmap, texswap);
}
else
{
if (ApiType == API_D3D11)
WRITE(p, "%s=Tex%d.Sample(samp%d,%s.xy).%s;\n", destination,texmap,texmap, texcoords, texswap);
else if (ApiType == API_D3D9)
WRITE(p, "%s=tex2D(samp%d,%s.xy).%s;\n", destination, texmap, texcoords, texswap);
else
WRITE(p, "%s=texRECT(samp%d,%s.xy).%s;\n", destination, texmap, texcoords, texswap);
WRITE(p, "%s=texRECT(samp%d,%s.xy).%s;\n", destination, texmap, texcoords, texswap);
}
}
else
@ -1061,9 +1056,7 @@ static bool WriteAlphaTest(char *&p, API_TYPE ApiType)
compindex = bpmem.alphaFunc.comp1 % 8;
WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[1]);//lookup the second component from the alpha function table
WRITE(p, ")){ocol0 = 0;depth = 1.f;discard;%s}\n",(ApiType != API_D3D11)? "return;" : "");
WRITE(p, ")){ocol0 = 0;%sdiscard;%s}\n",DepthTextureEnable ? "depth = 1.f;" : "",(ApiType != API_D3D11)? "return;" : "");
return true;
}
@ -1087,13 +1080,13 @@ static void WriteFog(char *&p)
{
// perspective
// ze = A/(B - Zs)
WRITE (p, " float ze = "I_FOG"[1].x / ("I_FOG"[1].y - depth);\n");
WRITE (p, " float ze = "I_FOG"[1].x / ("I_FOG"[1].y - zCoord);\n");
}
else
{
// orthographic
// ze = a*Zs
WRITE (p, " float ze = "I_FOG"[1].x * depth;\n");
WRITE (p, " float ze = "I_FOG"[1].x * zCoord;\n");
}
WRITE (p, " float fog = saturate(ze - "I_FOG"[1].z);\n");

View File

@ -37,9 +37,8 @@
#define C_INDTEXSCALE (C_ZBIAS + 2)
#define C_INDTEXMTX (C_INDTEXSCALE + 2)
#define C_FOG (C_INDTEXMTX + 6)
#define C_ENVCONST_END (C_FOG + 2)
#define C_COLORMATRIX (C_FOG + 2)
#define C_PENVCONST_END (C_COLORMATRIX + 16)
#define PIXELSHADERUID_MAX_VALUES (5 + 32 + 6 + 11)
// DO NOT make anything in this class virtual.

View File

@ -135,6 +135,7 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE api_type)
WRITE(p, "uniform s_"I_LIGHTS" "I_LIGHTS" : register(c%d);\n", C_LIGHTS);
WRITE(p, "uniform s_"I_MATERIALS" "I_MATERIALS" : register(c%d);\n", C_MATERIALS);
WRITE(p, "uniform s_"I_PROJECTION" "I_PROJECTION" : register(c%d);\n", C_PROJECTION);
WRITE(p, "uniform float4 "I_DEPTHPARAMS" : register(c%d);\n", C_DEPTHPARAMS);
WRITE(p, "VS_OUTPUT main(\n");
@ -462,14 +463,11 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE api_type)
WRITE(p, "o.tex3.w = o.pos.w;\n");
}
//write the true depth value, if the game uses depth textures pixel shaders will override with the correct values
//if not early z culling will improve speed
if (is_d3d)
{
WRITE(p, "o.pos.z = o.pos.z + o.pos.w;\n");
}
else
{
// scale to gl clip space - which is -o.pos.w to o.pos.w, hence the addition.
WRITE(p, "o.pos.z = (o.pos.z * 2.0f) + o.pos.w;\n");
WRITE(p, "o.pos.z = "I_DEPTHPARAMS".x * o.pos.w + o.pos.z * "I_DEPTHPARAMS".y;\n");
}
WRITE(p, "return o;\n}\n");

View File

@ -35,7 +35,7 @@
#define I_TRANSFORMMATRICES "ctrmtx"
#define I_NORMALMATRICES "cnmtx"
#define I_POSTTRANSFORMMATRICES "cpostmtx"
#define I_FOGPARAMS "cfog"
#define I_DEPTHPARAMS "cDepth"
#define C_POSNORMALMATRIX 0
#define C_PROJECTION (C_POSNORMALMATRIX + 6)
@ -45,8 +45,8 @@
#define C_TRANSFORMMATRICES (C_TEXMATRICES + 24)
#define C_NORMALMATRICES (C_TRANSFORMMATRICES + 64)
#define C_POSTTRANSFORMMATRICES (C_NORMALMATRICES + 32)
#define C_FOGPARAMS (C_POSTTRANSFORMMATRICES + 64)
#define C_DEPTHPARAMS (C_POSTTRANSFORMMATRICES + 64)
#define C_VENVCONST_END (C_DEPTHPARAMS + 4)
class VERTEXSHADERUID
{

View File

@ -276,7 +276,7 @@ void VertexShaderManager::SetConstants()
if (bViewportChanged)
{
bViewportChanged = false;
SetVSConstant4f(C_DEPTHPARAMS,xfregs.rawViewport[5]/ 16777216.0f,xfregs.rawViewport[2]/ 16777216.0f,0.0f,0.0f);
// This is so implementation-dependent that we can't have it here.
UpdateViewport();
}
@ -521,14 +521,6 @@ void VertexShaderManager::SetViewport(float* _Viewport, int constantIndex)
{
xfregs.rawViewport[constantIndex] = _Viewport[0];
}
/*//Tino: i think this is not needed so leave this commented till confirmed
// Workaround for paper mario, yep this is bizarre.
for (size_t i = 0; i < ARRAYSIZE(xfregs.rawViewport); ++i)
{
if (*(u32*)(_Viewport + i) == 0x7f800000) // invalid fp number
return;
}
memcpy(xfregs.rawViewport, _Viewport, sizeof(xfregs.rawViewport));*/
bViewportChanged = true;
}

View File

@ -93,7 +93,6 @@ void VideoConfig::Load(const char *ini_file)
iniFile.Get("Hacks", "EFBCopyDisable", &bEFBCopyDisable, false);
iniFile.Get("Hacks", "EFBCopyDisableHotKey", &bOSDHotKey, 0);
iniFile.Get("Hacks", "EFBToTextureEnable", &bCopyEFBToTexture, false);
iniFile.Get("Hacks", "EFBVerifyTextureModificationsByCPU",&bVerifyTextureModificationsByCPU,false);
iniFile.Get("Hacks", "EFBScaledCopy", &bCopyEFBScaled, true);
iniFile.Get("Hacks", "FIFOBPHack", &bFIFOBPhack, false);
iniFile.Get("Hacks", "ProjectionHack", &iPhackvalue, 0);
@ -125,9 +124,7 @@ void VideoConfig::GameIniLoad(const char *ini_file)
if (iniFile.Exists("Video", "EFBCopyDisableHotKey"))
iniFile.Get("Video", "EFBCopyDisableHotKey", &bOSDHotKey);
if (iniFile.Exists("Video", "EFBToTextureEnable"))
iniFile.Get("Video", "EFBToTextureEnable", &bCopyEFBToTexture);
if (iniFile.Exists("Video", "EFBVerifyTextureModificationsByCPU"))
iniFile.Get("Video", "EFBVerifyTextureModificationsByCPU", &bVerifyTextureModificationsByCPU);
iniFile.Get("Video", "EFBToTextureEnable", &bCopyEFBToTexture);
if (iniFile.Exists("Video", "EFBScaledCopy"))
iniFile.Get("Video", "EFBScaledCopy", &bCopyEFBScaled);
if (iniFile.Exists("Video", "SafeTextureCache"))
@ -202,8 +199,7 @@ void VideoConfig::Save(const char *ini_file)
iniFile.Set("Hacks", "EFBAccessEnable", bEFBAccessEnable);
iniFile.Set("Hacks", "EFBCopyDisable", bEFBCopyDisable);
iniFile.Set("Hacks", "EFBCopyDisableHotKey", bOSDHotKey);
iniFile.Set("Hacks", "EFBToTextureEnable", bCopyEFBToTexture);
iniFile.Set("Hacks", "EFBVerifyTextureModificationsByCPU", bVerifyTextureModificationsByCPU);
iniFile.Set("Hacks", "EFBToTextureEnable", bCopyEFBToTexture);
iniFile.Set("Hacks", "EFBScaledCopy", bCopyEFBScaled);
iniFile.Set("Hacks", "FIFOBPHack", bFIFOBPhack);
iniFile.Set("Hacks", "ProjectionHack", iPhackvalue);

View File

@ -115,8 +115,7 @@ struct VideoConfig
bool bEFBCopyDisable; // should reverse polarity of this one :) true=disabled can be confusing
bool bOSDHotKey;
bool bHack;
bool bCopyEFBToTexture;
bool bVerifyTextureModificationsByCPU;
bool bCopyEFBToTexture;
bool bCopyEFBScaled;
bool bSafeTextureCache;
int iSafeTextureCache_ColorSamples;