From f5f99e8f044a8c6ef8bc3b73b6c1f5540ac9f185 Mon Sep 17 00:00:00 2001 From: donkopunchstania Date: Sun, 8 Feb 2009 22:08:20 +0000 Subject: [PATCH] clip space coordinates are now available in fragment shader because depth needs to be computed there instead of the vertex shader. computing it in the vertex shader causes incorrect results sometimes. worked on z textures but 8 bit z texture still is not correct because it breaks SSBM. RE0 now looks ok. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@2163 8ced0084-cf51-0410-be5f-012b33b47a6e --- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 48 +++++++------- .../VideoCommon/Src/PixelShaderManager.cpp | 62 ++++++++++++++----- .../Core/VideoCommon/Src/PixelShaderManager.h | 2 + .../Core/VideoCommon/Src/VertexShaderGen.cpp | 38 +++++++----- .../VideoCommon/Src/VertexShaderManager.cpp | 6 +- .../Plugins/Plugin_VideoOGL/Src/BPStructs.cpp | 3 + .../Plugins/Plugin_VideoOGL/Src/XFStructs.cpp | 2 + 7 files changed, 101 insertions(+), 60 deletions(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index d409d1bacc..b7b51b5e4c 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -385,10 +385,6 @@ const char *GeneratePixelShader(u32 texture_mask, bool has_zbuffer_target, bool // bool bRenderZToCol0 = ; // output z and alpha to color0 assert( !bRenderZToCol0 || bRenderZ ); - int ztexcoord = -1; - if (bInputZ) - ztexcoord = numTexgen == 0 ? 0 : numTexgen-1; - int nIndirectStagesUsed = 0; if (bpmem.genMode.numindstages > 0) { for (int i = 0; i < numStages; ++i) { @@ -442,23 +438,16 @@ const char *GeneratePixelShader(u32 texture_mask, bool has_zbuffer_target, bool if (bOutputZ ) WRITE(p, " out float depth : DEPTH,\n"); - // if zcoord might come from vertex shader in texcoord - if (bInputZ) { - if (numTexgen) { - for (int i = 0; i < numTexgen; ++i) - WRITE(p, " in float%d uv%d : TEXCOORD%d, \n", i==ztexcoord?4:3, i,i); - } - else - WRITE(p, " in float4 uv0 : TEXCOORD0,"); //HACK - } - else { - if (numTexgen) { - for (int i = 0; i < numTexgen; ++i) - WRITE(p, " in float3 uv%d : TEXCOORD%d,\n",i,i); - } - else - WRITE(p, " in float3 uv0 : TEXCOORD0,\n"); //HACK - } + // compute window position if needed because binding semantic WPOS is not widely supported + if (numTexgen < 7) { + for (int i = 0; i < numTexgen; ++i) + WRITE(p, " in float3 uv%d : TEXCOORD%d, \n", i, i); + WRITE(p, " in float4 clipPos : TEXCOORD%d, \n", numTexgen); + } else { + // wpos is in w of first 4 texcoords + for (int i = 0; i < numTexgen; ++i) + WRITE(p, " in float%d uv%d : TEXCOORD%d, \n", i<4?4:3, i, i); + } WRITE(p, " in float4 colors[2] : COLOR0){\n"); @@ -499,14 +488,23 @@ const char *GeneratePixelShader(u32 texture_mask, bool has_zbuffer_target, bool for (int i = 0; i < numStages; i++) WriteStage(p, i, texture_mask); //build the equation for this stage + if (numTexgen >= 7) { + WRITE(p, "float4 clipPos = float4(uv0.w, uv1.w, uv2.w, uv3.w);\n"); + } + + if (bInputZ) { + // the screen space depth value = far z + (clip z / clip w) * z range + WRITE(p, "float zCoord = "I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * "I_ZBIAS"[1].y;\n"); + } + if (bOutputZ) { // use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format... if (bpmem.ztex2.op == ZTEXTURE_ADD) { - WRITE(p, "depth = frac(dot("I_ZBIAS"[0].xyzw, textemp.xyzw) + "I_ZBIAS"[1].w + uv%d.w);\n", ztexcoord); + WRITE(p, "depth = frac(dot("I_ZBIAS"[0].xyzw, textemp.xyzw) + "I_ZBIAS"[1].w + zCoord);\n"); } else { _assert_(bpmem.ztex2.op == ZTEXTURE_REPLACE); - WRITE(p, "depth = frac(dot("I_ZBIAS"[0].xyz, textemp.xyz) + "I_ZBIAS"[0].w);\n"); + WRITE(p, "depth = frac(dot("I_ZBIAS"[0].xyzw, textemp.xyzw) + "I_ZBIAS"[1].w);\n"); } } @@ -539,14 +537,14 @@ const char *GeneratePixelShader(u32 texture_mask, bool has_zbuffer_target, bool if (bOutputZ ) WRITE(p, "ocol0.xyz = frac(float3(256.0f*256.0f, 256.0f, 1.0f) * depth);\n"); else - WRITE(p, "ocol0.xyz = frac(float3(256.0f*256.0f, 256.0f, 1.0f) * uv%d.w);\n", ztexcoord); + WRITE(p, "ocol0.xyz = frac(float3(256.0f*256.0f, 256.0f, 1.0f) * zCoord);\n"); WRITE(p, "ocol0.w = prev.w;\n"); } else { if (bOutputZ) WRITE(p, "ocol1 = frac(float4(256.0f*256.0f, 256.0f, 1.0f, 0.0f) * depth);\n"); else - WRITE(p, "ocol1 = frac(float4(256.0f*256.0f, 256.0f, 1.0f, 0.0f) * uv%d.w);\n", ztexcoord); + WRITE(p, "ocol1 = frac(float4(256.0f*256.0f, 256.0f, 1.0f, 0.0f) * zCoord);\n"); } } WRITE(p, "}\n"); diff --git a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp index ba2c528e11..18c85a9dcf 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp @@ -30,6 +30,9 @@ static int s_nIndTexMtxChanged = 0; static bool s_bAlphaChanged; static bool s_bZBiasChanged; static bool s_bIndTexScaleChanged; +static bool s_bZTextureTypeChanged; +static bool s_bDepthRangeChanged; +static float lastDepthRange[2] = {0}; // 0 = far z, 1 = far - near static float lastRGBAfull[2][4][4]; static u8 s_nTexDimsChanged; static u32 lastAlpha = 0; @@ -49,7 +52,7 @@ void PixelShaderManager::Init() s_nColorsChanged[0] = s_nColorsChanged[1] = 0; s_nTexDimsChanged = 0; s_nIndTexMtxChanged = 15; - s_bAlphaChanged = s_bZBiasChanged = s_bIndTexScaleChanged = true; + s_bAlphaChanged = s_bZBiasChanged = s_bIndTexScaleChanged = s_bZTextureTypeChanged = s_bDepthRangeChanged = true; for (int i = 0; i < 8; ++i) maptocoord[i] = -1; maptocoord_mask = 0; @@ -109,29 +112,36 @@ void PixelShaderManager::SetConstants() if (s_bAlphaChanged) { SetPSConstant4f(C_ALPHA, (lastAlpha&0xff)/255.0f, ((lastAlpha>>8)&0xff)/255.0f, 0, ((lastAlpha>>16)&0xff)/255.0f); + s_bAlphaChanged = false; } - if (s_bZBiasChanged) { - u32 bits; - float ffrac = 255.0f/256.0f; + if (s_bZTextureTypeChanged) { + static float ffrac = 255.0f/256.0f; float ftemp[4]; switch (bpmem.ztex2.type) { case 0: - bits = 8; - ftemp[0] = ffrac/(256.0f*256.0f); ftemp[1] = ffrac/256.0f; ftemp[2] = ffrac; ftemp[3] = 0; + // 8 bits + // this breaks the menu in SSBM when it is set correctly to + //ftemp[0] = ffrac/(65536.0f); ftemp[1] = 0; ftemp[2] = 0; ftemp[3] = 0; + ftemp[0] = ffrac/65536.0f; ftemp[1] = ffrac/256.0f; ftemp[2] = ffrac; ftemp[3] = 0; break; case 1: - bits = 16; - ftemp[0] = 0; ftemp[1] = ffrac/(256.0f*256.0f); ftemp[2] = ffrac/256.0f; ftemp[3] = ffrac; + // 16 bits + ftemp[0] = ffrac/65536.0f; ftemp[1] = 0; ftemp[2] = 0; ftemp[3] = ffrac/256.0f; break; case 2: - bits = 24; - ftemp[0] = ffrac/(256.0f*256.0f); ftemp[1] = ffrac/256.0f; ftemp[2] = ffrac; ftemp[3] = 0; + // 24 bits + ftemp[0] = ffrac; ftemp[1] = ffrac/256.0f; ftemp[2] = ffrac/65536.0f; ftemp[3] = 0; break; } - //ERROR_LOG("pixel=%x,%x, bias=%x\n", bpmem.zcontrol.pixel_format, bpmem.ztex2.type, lastZBias); - SetPSConstant4fv(C_ZBIAS, ftemp); - SetPSConstant4f(C_ZBIAS+1, 0, 0, 0, (float)( (((int)lastZBias<<8)>>8))/16777216.0f); + SetPSConstant4fv(C_ZBIAS, ftemp); + s_bZTextureTypeChanged = false; + } + + if (s_bZBiasChanged || s_bDepthRangeChanged) { + //ERROR_LOG("pixel=%x,%x, bias=%x\n", bpmem.zcontrol.pixel_format, bpmem.ztex2.type, lastZBias); + SetPSConstant4f(C_ZBIAS+1, lastDepthRange[0] / 16777215.0f, lastDepthRange[1] / 16777215.0f, 0, (float)( (((int)lastZBias<<8)>>8))/16777216.0f); + s_bZBiasChanged = s_bDepthRangeChanged = false; } // indirect incoming texture scales, update all! @@ -276,6 +286,24 @@ void PixelShaderManager::SetZTextureBias(u32 bias) } } +void PixelShaderManager::SetViewport(float* viewport) +{ + // reversed gxsetviewport(xorig, yorig, width, height, nearz, farz) + // [0] = width/2 + // [1] = height/2 + // [2] = 16777215 * (farz - nearz) + // [3] = xorig + width/2 + 342 + // [4] = yorig + height/2 + 342 + // [5] = 16777215 * farz + + if(lastDepthRange[0] != viewport[5] || lastDepthRange[1] != viewport[2]) { + lastDepthRange[0] = viewport[5]; + lastDepthRange[1] = viewport[2]; + + s_bDepthRangeChanged = true; + } +} + void PixelShaderManager::SetIndTexScaleChanged() { s_bIndTexScaleChanged = true; @@ -308,7 +336,11 @@ void PixelShaderManager::SetTevIndirectChanged(int id) void PixelShaderManager::SetZTextureOpChanged() { - s_bZBiasChanged = true; +} + +void PixelShaderManager::SetZTextureTypeChanged() +{ + s_bZTextureTypeChanged = true; } void PixelShaderManager::SetTexturesUsed(u32 nonpow2tex) @@ -318,7 +350,7 @@ void PixelShaderManager::SetTexturesUsed(u32 nonpow2tex) if (nonpow2tex & (0x10101 << i)) { // this check was previously implicit, but should it be here? if (s_nTexDimsChanged ) - s_nTexDimsChanged |= 1 << i; + s_nTexDimsChanged |= 1 << i; } } s_texturemask = nonpow2tex; diff --git a/Source/Core/VideoCommon/Src/PixelShaderManager.h b/Source/Core/VideoCommon/Src/PixelShaderManager.h index 4a8a597e03..afe694cd4e 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderManager.h +++ b/Source/Core/VideoCommon/Src/PixelShaderManager.h @@ -40,6 +40,7 @@ public: static void SetDestAlpha(const ConstantAlpha& alpha); static void SetTexDims(int texmapid, u32 width, u32 height, u32 wraps, u32 wrapt); static void SetZTextureBias(u32 bias); + static void SetViewport(float* viewport); static void SetIndTexScaleChanged(); static void SetIndMatrixChanged(int matrixidx); @@ -49,6 +50,7 @@ public: static void SetTevOrderChanged(int id); static void SetTevIndirectChanged(int id); static void SetZTextureOpChanged(); + static void SetZTextureTypeChanged(); static void SetTexturesUsed(u32 nonpow2tex); static void SetTexDimsChanged(int texmapid); diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index a3fdfc13f0..93cc52b3f8 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -91,9 +91,6 @@ const char *GenerateVertexShader(u32 components, bool has_zbuffer_target) if (xfregs.nNumChans > 1) lightMask |= xfregs.colChans[1].color.GetFullLightMask() | xfregs.colChans[1].alpha.GetFullLightMask(); - bool bOutputZ = bpmem.ztex2.op==ZTEXTURE_ADD || has_zbuffer_target; - int ztexcoord = -1; - char *p = text; WRITE(p, "//Vertex Shader: comp:%x, \n", components); WRITE(p, "typedef struct {\n" @@ -139,18 +136,15 @@ const char *GenerateVertexShader(u32 components, bool has_zbuffer_target) WRITE(p, " float4 pos : POSITION;\n"); WRITE(p, " float4 colors[2] : COLOR0;\n"); - // if outputting Z, embed the Z coordinate in the w component of a texture coordinate - // if number of tex gens occupies all the texture coordinates, use the last tex coord - // otherwise use the next available tex coord - for (int i = 0; i < xfregs.numTexGens; ++i) { - WRITE(p, " float%d tex%d : TEXCOORD%d;\n", (i==(xfregs.numTexGens-1)&&bOutputZ)?4:3, i, i); - } - if (bOutputZ && xfregs.numTexGens == 0) { - ztexcoord = 0; - WRITE(p, " float4 tex%d : TEXCOORD%d;\n", ztexcoord, ztexcoord); - } - else if (bOutputZ) - ztexcoord = xfregs.numTexGens - 1; + if (xfregs.numTexGens < 7) { + for (int i = 0; i < xfregs.numTexGens; ++i) + WRITE(p, " float3 tex%d : TEXCOORD%d;\n", i, i); + WRITE(p, " float4 clipPos : TEXCOORD%d;\n", xfregs.numTexGens); + } else { + // clip position is in w of first 4 texcoords + for (int i = 0; i < xfregs.numTexGens; ++i) + WRITE(p, " float%d tex%d : TEXCOORD%d;\n", i<4?4:3, i, i); + } WRITE(p, "};\n"); WRITE(p, "\n"); @@ -429,8 +423,15 @@ const char *GenerateVertexShader(u32 components, bool has_zbuffer_target) WRITE(p, "}\n"); } - if (ztexcoord >= 0 ) - WRITE(p, "o.tex%d.w = o.pos.z/o.pos.w;\n", ztexcoord); + // clipPos/w needs to be done in pixel shader, not here + if (xfregs.numTexGens < 7) { + WRITE(p, "o.clipPos = o.pos;\n"); + } else { + WRITE(p, "o.tex0.w = o.pos.x;\n"); + WRITE(p, "o.tex1.w = o.pos.y;\n"); + WRITE(p, "o.tex2.w = o.pos.z;\n"); + WRITE(p, "o.tex3.w = o.pos.w;\n"); + } // if (bpmem.fog.c_proj_fsel.fsel != 0) { // switch (bpmem.fog.c_proj_fsel.fsel) { @@ -449,6 +450,9 @@ const char *GenerateVertexShader(u32 components, bool has_zbuffer_target) // WRITE(p, "o.fog = o.pos.z/o.pos.w;\n"); // } + // scale to gl clip space + WRITE(p, "o.pos.z = (o.pos.z * 2.0f) + o.pos.w;\n"); + WRITE(p, "return o;\n}\n"); if (text[sizeof(text) - 1] != 0x7C) diff --git a/Source/Core/VideoCommon/Src/VertexShaderManager.cpp b/Source/Core/VideoCommon/Src/VertexShaderManager.cpp index 51c373f93c..9ee50ccb5e 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderManager.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderManager.cpp @@ -252,7 +252,7 @@ void VertexShaderManager::SetConstants(bool proj_hax_1, bool proj_hax_2) //---------Projection[11]--------- // No hacks if ((!proj_hax_1 && !proj_hax_2) || (proj_hax_1 && proj_hax_2)) - g_fProjectionMatrix[11] = -(-1.0f - xfregs.rawProjection[5]); + g_fProjectionMatrix[11] = -(-0.5f - xfregs.rawProjection[5]); // Before R945 Hack if (proj_hax_1 && !proj_hax_2) @@ -260,7 +260,7 @@ void VertexShaderManager::SetConstants(bool proj_hax_1, bool proj_hax_2) // R844 Hack if (!proj_hax_1 && proj_hax_2) - g_fProjectionMatrix[11] = -xfregs.rawProjection[5]; + g_fProjectionMatrix[11] = xfregs.rawProjection[5]; //-------------------------------- @@ -431,4 +431,4 @@ void VertexShaderManager::SetMaterialColor(int index, u32 data) s_fMaterials[15] = ((data)&0xFF)/255.0f; break; } -} \ No newline at end of file +} diff --git a/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp b/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp index c7b29e6f12..d41de5d10e 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp @@ -341,6 +341,9 @@ void BPWritten(int addr, int changes, int newval) if (changes) { VertexManager::Flush(); ((u32*)&bpmem)[addr] = newval; + if (changes & 3) { + PixelShaderManager::SetZTextureTypeChanged(); + } #if defined(_DEBUG) || defined(DEBUGFAST) const char* pzop[] = {"DISABLE", "ADD", "REPLACE", "?"}; const char* pztype[] = {"Z8", "Z16", "Z24", "?"}; diff --git a/Source/Plugins/Plugin_VideoOGL/Src/XFStructs.cpp b/Source/Plugins/Plugin_VideoOGL/Src/XFStructs.cpp index 31d2ee5d7f..63c7c51eca 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/XFStructs.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/XFStructs.cpp @@ -20,6 +20,7 @@ #include "XFMemory.h" #include "VertexManager.h" #include "VertexShaderManager.h" +#include "PixelShaderManager.h" // LoadXFReg 0x10 void LoadXFReg(u32 transferSize, u32 baseAddress, u32 *pData) @@ -153,6 +154,7 @@ void LoadXFReg(u32 transferSize, u32 baseAddress, u32 *pData) case 0x101a: VertexManager::Flush(); VertexShaderManager::SetViewport((float*)&pData[i]); + PixelShaderManager::SetViewport((float*)&pData[i]); i += 6; break;