From 3a742e99bbf223bd268024a32ad6d1a5362be47f Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Tue, 30 Nov 2021 17:51:43 -0800 Subject: [PATCH 1/3] Software: Remove config to disable ZComploc and ZFreeze These aren't particularly useful, and make the code a bit more confusing. If for some reason someone wants to test what happens when these functions are disabled, it's easier to just edit the code that implements them. They aren't exposed in the UI, so one would need to restart Dolphin to do it anyways. --- Source/Core/Core/Config/GraphicsSettings.cpp | 2 -- Source/Core/Core/Config/GraphicsSettings.h | 2 -- Source/Core/VideoBackends/Software/Rasterizer.cpp | 4 ++-- Source/Core/VideoBackends/Software/Tev.cpp | 3 +-- Source/Core/VideoCommon/VideoConfig.cpp | 2 -- Source/Core/VideoCommon/VideoConfig.h | 2 -- 6 files changed, 3 insertions(+), 12 deletions(-) diff --git a/Source/Core/Core/Config/GraphicsSettings.cpp b/Source/Core/Core/Config/GraphicsSettings.cpp index 18a9e693ff..b1d7e879ea 100644 --- a/Source/Core/Core/Config/GraphicsSettings.cpp +++ b/Source/Core/Core/Config/GraphicsSettings.cpp @@ -90,8 +90,6 @@ const Info GFX_SHADER_PRECOMPILER_THREADS{ const Info GFX_SAVE_TEXTURE_CACHE_TO_STATE{ {System::GFX, "Settings", "SaveTextureCacheToState"}, true}; -const Info GFX_SW_ZCOMPLOC{{System::GFX, "Settings", "SWZComploc"}, true}; -const Info GFX_SW_ZFREEZE{{System::GFX, "Settings", "SWZFreeze"}, true}; const Info GFX_SW_DUMP_OBJECTS{{System::GFX, "Settings", "SWDumpObjects"}, false}; const Info GFX_SW_DUMP_TEV_STAGES{{System::GFX, "Settings", "SWDumpTevStages"}, false}; const Info GFX_SW_DUMP_TEV_TEX_FETCHES{{System::GFX, "Settings", "SWDumpTevTexFetches"}, diff --git a/Source/Core/Core/Config/GraphicsSettings.h b/Source/Core/Core/Config/GraphicsSettings.h index 710b6ca220..387233e27d 100644 --- a/Source/Core/Core/Config/GraphicsSettings.h +++ b/Source/Core/Core/Config/GraphicsSettings.h @@ -72,8 +72,6 @@ extern const Info GFX_SHADER_COMPILER_THREADS; extern const Info GFX_SHADER_PRECOMPILER_THREADS; extern const Info GFX_SAVE_TEXTURE_CACHE_TO_STATE; -extern const Info GFX_SW_ZCOMPLOC; -extern const Info GFX_SW_ZFREEZE; extern const Info GFX_SW_DUMP_OBJECTS; extern const Info GFX_SW_DUMP_TEV_STAGES; extern const Info GFX_SW_DUMP_TEV_TEX_FETCHES; diff --git a/Source/Core/VideoBackends/Software/Rasterizer.cpp b/Source/Core/VideoBackends/Software/Rasterizer.cpp index f886858124..d29e4954f2 100644 --- a/Source/Core/VideoBackends/Software/Rasterizer.cpp +++ b/Source/Core/VideoBackends/Software/Rasterizer.cpp @@ -80,7 +80,7 @@ static void Draw(s32 x, s32 y, s32 xi, s32 yi) s32 z = (s32)std::clamp(ZSlope.GetValue(dx, dy), 0.0f, 16777215.0f); - if (bpmem.UseEarlyDepthTest() && g_ActiveConfig.bZComploc) + if (bpmem.UseEarlyDepthTest()) { // TODO: Test if perf regs are incremented even if test is disabled EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_INPUT_ZCOMPLOC); @@ -354,7 +354,7 @@ void DrawTriangleFrontFace(const OutputVertexData* v0, const OutputVertexData* v // rejected during clipping! // We're currently sloppy at this since we abort early if any of the culling/clipping/scissoring // tests fail. - if (!bpmem.genMode.zfreeze || !g_ActiveConfig.bZFreeze) + if (!bpmem.genMode.zfreeze) InitSlope(&ZSlope, v0->screenPosition[2], v1->screenPosition[2], v2->screenPosition[2], fltdx31, fltdx12, fltdy12, fltdy31); diff --git a/Source/Core/VideoBackends/Software/Tev.cpp b/Source/Core/VideoBackends/Software/Tev.cpp index 6905920405..64e0f7774b 100644 --- a/Source/Core/VideoBackends/Software/Tev.cpp +++ b/Source/Core/VideoBackends/Software/Tev.cpp @@ -840,8 +840,7 @@ void Tev::Draw() output[BLU_C] = (output[BLU_C] * invFog + fogInt * bpmem.fog.color.b) >> 8; } - const bool late_ztest = !bpmem.zcontrol.early_ztest || !g_ActiveConfig.bZComploc; - if (late_ztest && bpmem.zmode.testenable) + if (bpmem.UseLateDepthTest()) { // TODO: Check against hw if these values get incremented even if depth testing is disabled EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_INPUT); diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp index 861cd7281c..aa6880e887 100644 --- a/Source/Core/VideoCommon/VideoConfig.cpp +++ b/Source/Core/VideoCommon/VideoConfig.cpp @@ -97,8 +97,6 @@ void VideoConfig::Refresh() iShaderCompilerThreads = Config::Get(Config::GFX_SHADER_COMPILER_THREADS); iShaderPrecompilerThreads = Config::Get(Config::GFX_SHADER_PRECOMPILER_THREADS); - bZComploc = Config::Get(Config::GFX_SW_ZCOMPLOC); - bZFreeze = Config::Get(Config::GFX_SW_ZFREEZE); bDumpObjects = Config::Get(Config::GFX_SW_DUMP_OBJECTS); bDumpTevStages = Config::Get(Config::GFX_SW_DUMP_TEV_STAGES); bDumpTevTextureFetches = Config::Get(Config::GFX_SW_DUMP_TEV_TEX_FETCHES); diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index 659611a170..1389a385fa 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -152,8 +152,6 @@ struct VideoConfig final // VideoSW Debugging int drawStart = 0; int drawEnd = 0; - bool bZComploc = false; - bool bZFreeze = false; bool bDumpObjects = false; bool bDumpTevStages = false; bool bDumpTevTextureFetches = false; From 164e0f742d544f369f7a45e20815f22056a7d866 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Tue, 30 Nov 2021 22:33:31 -0800 Subject: [PATCH 2/3] Software: Store offset in Slope This is needed since we need a separate offset for zfreeze to work correctly. It also makes the code a bit less jank. --- .../VideoBackends/Software/Rasterizer.cpp | 153 +++++++++++------- .../Core/VideoBackends/Software/Rasterizer.h | 9 -- 2 files changed, 91 insertions(+), 71 deletions(-) diff --git a/Source/Core/VideoBackends/Software/Rasterizer.cpp b/Source/Core/VideoBackends/Software/Rasterizer.cpp index d29e4954f2..b3d0dd3fdb 100644 --- a/Source/Core/VideoBackends/Software/Rasterizer.cpp +++ b/Source/Core/VideoBackends/Software/Rasterizer.cpp @@ -20,16 +20,82 @@ namespace Rasterizer { static constexpr int BLOCK_SIZE = 2; +struct SlopeContext +{ + SlopeContext(const OutputVertexData* v0, const OutputVertexData* v1, const OutputVertexData* v2, + s32 x0, s32 y0) + : x0(x0), y0(y0) + { + // adjust a little less than 0.5 + const float adjust = 0.495f; + + xOff = ((float)x0 - v0->screenPosition.x) + adjust; + yOff = ((float)y0 - v0->screenPosition.y) + adjust; + + dx10 = v1->screenPosition.x - v0->screenPosition.x; + dx20 = v2->screenPosition.x - v0->screenPosition.x; + dy10 = v1->screenPosition.y - v0->screenPosition.y; + dy20 = v2->screenPosition.y - v0->screenPosition.y; + } + s32 x0; + s32 y0; + float xOff; + float yOff; + float dx10; + float dx20; + float dy10; + float dy20; +}; + +struct Slope +{ + Slope() = default; + Slope(float f0, float f1, float f2, const SlopeContext& ctx) : f0(f0) + { + float delta_20 = f2 - f0; + float delta_10 = f1 - f0; + + // x2 - x0 y1 - y0 x1 - x0 y2 - y0 + float a = delta_20 * ctx.dy10 - delta_10 * ctx.dy20; + float b = ctx.dx20 * delta_10 - ctx.dx10 * delta_20; + float c = ctx.dx20 * ctx.dy10 - ctx.dx10 * ctx.dy20; + + dfdx = a / c; + dfdy = b / c; + + x0 = ctx.x0; + y0 = ctx.y0; + xOff = ctx.xOff; + yOff = ctx.yOff; + } + + // These default values are used in the unlikely case that zfreeze is enabled when drawing the + // first primitive. + // TODO: This is just a guess! + float dfdx = 0.0f; + float dfdy = 0.0f; + float f0 = 1.0f; + + // Both an s32 value and a float value are used to minimize rounding error + // TODO: is this really needed? + s32 x0 = 0; + s32 y0 = 0; + float xOff = 0.0f; + float yOff = 0.0f; + + float GetValue(s32 x, s32 y) const + { + float dx = xOff + (float)(x - x0); + float dy = yOff + (float)(y - y0); + return f0 + (dfdx * dx) + (dfdy * dy); + } +}; + static Slope ZSlope; static Slope WSlope; static Slope ColorSlopes[2][4]; static Slope TexSlopes[8][3]; -static s32 vertex0X; -static s32 vertex0Y; -static float vertexOffsetX; -static float vertexOffsetY; - static Tev tev; static RasterBlock rasterBlock; @@ -37,11 +103,9 @@ void Init() { tev.Init(); - // Set initial z reference plane in the unlikely case that zfreeze is enabled when drawing the - // first primitive. - // TODO: This is just a guess! - ZSlope.dfdx = ZSlope.dfdy = 0.f; - ZSlope.f0 = 1.f; + // The other slopes are set each for each primitive drawn, but zfreeze means that the z slope + // needs to be set to an (untested) default value. + ZSlope = Slope(); } // Returns approximation of log2(f) in s28.4 @@ -75,10 +139,7 @@ static void Draw(s32 x, s32 y, s32 xi, s32 yi) { INCSTAT(g_stats.this_frame.rasterized_pixels); - float dx = vertexOffsetX + (float)(x - vertex0X); - float dy = vertexOffsetY + (float)(y - vertex0Y); - - s32 z = (s32)std::clamp(ZSlope.GetValue(dx, dy), 0.0f, 16777215.0f); + s32 z = (s32)std::clamp(ZSlope.GetValue(x, y), 0.0f, 16777215.0f); if (bpmem.UseEarlyDepthTest()) { @@ -104,7 +165,7 @@ static void Draw(s32 x, s32 y, s32 xi, s32 yi) { for (int comp = 0; comp < 4; comp++) { - u16 color = (u16)ColorSlopes[i][comp].GetValue(dx, dy); + u16 color = (u16)ColorSlopes[i][comp].GetValue(x, y); // clamp color value to 0 u16 mask = ~(color >> 8); @@ -136,31 +197,6 @@ static void Draw(s32 x, s32 y, s32 xi, s32 yi) tev.Draw(); } -static void InitTriangle(float X1, float Y1, s32 xi, s32 yi) -{ - vertex0X = xi; - vertex0Y = yi; - - // adjust a little less than 0.5 - const float adjust = 0.495f; - - vertexOffsetX = ((float)xi - X1) + adjust; - vertexOffsetY = ((float)yi - Y1) + adjust; -} - -static void InitSlope(Slope* slope, float f1, float f2, float f3, float DX31, float DX12, - float DY12, float DY31) -{ - float DF31 = f3 - f1; - float DF21 = f2 - f1; - float a = DF31 * -DY12 - DF21 * DY31; - float b = DX31 * DF21 + DX12 * DF31; - float c = -DX12 * DY31 - DX31 * -DY12; - slope->dfdx = -a / c; - slope->dfdy = -b / c; - slope->f0 = f1; -} - static inline void CalculateLOD(s32* lodp, bool* linear, u32 texmap, u32 texcoord) { auto texUnit = bpmem.tex.GetUnit(texmap); @@ -220,22 +256,22 @@ static void BuildBlock(s32 blockX, s32 blockY) { RasterBlockPixel& pixel = rasterBlock.Pixel[xi][yi]; - float dx = vertexOffsetX + (float)(xi + blockX - vertex0X); - float dy = vertexOffsetY + (float)(yi + blockY - vertex0Y); + s32 x = xi + blockX; + s32 y = yi + blockY; - float invW = 1.0f / WSlope.GetValue(dx, dy); + float invW = 1.0f / WSlope.GetValue(x, y); pixel.InvW = invW; // tex coords for (unsigned int i = 0; i < bpmem.genMode.numtexgens; i++) { float projection = invW; - float q = TexSlopes[i][2].GetValue(dx, dy) * invW; + float q = TexSlopes[i][2].GetValue(x, y) * invW; if (q != 0.0f) projection = invW / q; - pixel.Uv[i][0] = TexSlopes[i][0].GetValue(dx, dy) * projection; - pixel.Uv[i][1] = TexSlopes[i][1].GetValue(dx, dy) * projection; + pixel.Uv[i][0] = TexSlopes[i][0].GetValue(x, y) * projection; + pixel.Uv[i][1] = TexSlopes[i][1].GetValue(x, y) * projection; } } } @@ -334,19 +370,12 @@ void DrawTriangleFrontFace(const OutputVertexData* v0, const OutputVertexData* v if (minx >= maxx || miny >= maxy) return; - // Setup slopes - float fltx1 = v0->screenPosition.x; - float flty1 = v0->screenPosition.y; - float fltdx31 = v2->screenPosition.x - fltx1; - float fltdx12 = fltx1 - v1->screenPosition.x; - float fltdy12 = flty1 - v1->screenPosition.y; - float fltdy31 = v2->screenPosition.y - flty1; - - InitTriangle(fltx1, flty1, (X1 + 0xF) >> 4, (Y1 + 0xF) >> 4); + // Set up slopes + const SlopeContext ctx(v0, v1, v2, (X1 + 0xF) >> 4, (Y1 + 0xF) >> 4); float w[3] = {1.0f / v0->projectedPosition.w, 1.0f / v1->projectedPosition.w, 1.0f / v2->projectedPosition.w}; - InitSlope(&WSlope, w[0], w[1], w[2], fltdx31, fltdx12, fltdy12, fltdy31); + WSlope = Slope(w[0], w[1], w[2], ctx); // TODO: The zfreeze emulation is not quite correct, yet! // Many things might prevent us from reaching this line (culling, clipping, scissoring). @@ -355,21 +384,21 @@ void DrawTriangleFrontFace(const OutputVertexData* v0, const OutputVertexData* v // We're currently sloppy at this since we abort early if any of the culling/clipping/scissoring // tests fail. if (!bpmem.genMode.zfreeze) - InitSlope(&ZSlope, v0->screenPosition[2], v1->screenPosition[2], v2->screenPosition[2], fltdx31, - fltdx12, fltdy12, fltdy31); + ZSlope = Slope(v0->screenPosition.z, v1->screenPosition.z, v2->screenPosition.z, ctx); for (unsigned int i = 0; i < bpmem.genMode.numcolchans; i++) { for (int comp = 0; comp < 4; comp++) - InitSlope(&ColorSlopes[i][comp], v0->color[i][comp], v1->color[i][comp], v2->color[i][comp], - fltdx31, fltdx12, fltdy12, fltdy31); + ColorSlopes[i][comp] = Slope(v0->color[i][comp], v1->color[i][comp], v2->color[i][comp], ctx); } for (unsigned int i = 0; i < bpmem.genMode.numtexgens; i++) { for (int comp = 0; comp < 3; comp++) - InitSlope(&TexSlopes[i][comp], v0->texCoords[i][comp] * w[0], v1->texCoords[i][comp] * w[1], - v2->texCoords[i][comp] * w[2], fltdx31, fltdx12, fltdy12, fltdy31); + { + TexSlopes[i][comp] = Slope(v0->texCoords[i][comp] * w[0], v1->texCoords[i][comp] * w[1], + v2->texCoords[i][comp] * w[2], ctx); + } } // Half-edge constants diff --git a/Source/Core/VideoBackends/Software/Rasterizer.h b/Source/Core/VideoBackends/Software/Rasterizer.h index bae35f7cd8..97cbce4d63 100644 --- a/Source/Core/VideoBackends/Software/Rasterizer.h +++ b/Source/Core/VideoBackends/Software/Rasterizer.h @@ -16,15 +16,6 @@ void DrawTriangleFrontFace(const OutputVertexData* v0, const OutputVertexData* v void SetTevReg(int reg, int comp, s16 color); -struct Slope -{ - float dfdx; - float dfdy; - float f0; - - float GetValue(float dx, float dy) const { return f0 + (dfdx * dx) + (dfdy * dy); } -}; - struct RasterBlockPixel { float InvW; From 59f299d5d6cabddacf8e7d80f6f53987e4767c22 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Mon, 29 Nov 2021 17:51:02 -0800 Subject: [PATCH 3/3] Software: Fix zfreeze with CullMode::All --- .../Core/VideoBackends/Software/Clipper.cpp | 70 ++++++++++++------- Source/Core/VideoBackends/Software/Clipper.h | 6 +- .../VideoBackends/Software/Rasterizer.cpp | 27 ++++--- .../Core/VideoBackends/Software/Rasterizer.h | 2 + .../VideoBackends/Software/SWVertexLoader.cpp | 7 ++ .../VideoBackends/Software/SWVertexLoader.h | 3 + Source/Core/VideoCommon/VertexManagerBase.h | 4 +- 7 files changed, 78 insertions(+), 41 deletions(-) diff --git a/Source/Core/VideoBackends/Software/Clipper.cpp b/Source/Core/VideoBackends/Software/Clipper.cpp index dc22a6a201..f13ebb90b7 100644 --- a/Source/Core/VideoBackends/Software/Clipper.cpp +++ b/Source/Core/VideoBackends/Software/Clipper.cpp @@ -289,10 +289,42 @@ void ProcessTriangle(OutputVertexData* v0, OutputVertexData* v1, OutputVertexDat { INCSTAT(g_stats.this_frame.num_triangles_in) - bool backface; - - if (!CullTest(v0, v1, v2, backface)) + if (IsTriviallyRejected(v0, v1, v2)) + { + INCSTAT(g_stats.this_frame.num_triangles_rejected) + // NOTE: The slope used by zfreeze shouldn't be updated if the triangle is + // trivially rejected during clipping return; + } + + bool backface = IsBackface(v0, v1, v2); + + if (!backface) + { + if (bpmem.genMode.cullmode == CullMode::Back || bpmem.genMode.cullmode == CullMode::All) + { + // cull frontfacing - we still need to update the slope for zfreeze + PerspectiveDivide(v0); + PerspectiveDivide(v1); + PerspectiveDivide(v2); + Rasterizer::UpdateZSlope(v0, v1, v2); + INCSTAT(g_stats.this_frame.num_triangles_culled) + return; + } + } + else + { + if (bpmem.genMode.cullmode == CullMode::Front || bpmem.genMode.cullmode == CullMode::All) + { + // cull backfacing - we still need to update the slope for zfreeze + PerspectiveDivide(v0); + PerspectiveDivide(v2); + PerspectiveDivide(v1); + Rasterizer::UpdateZSlope(v0, v2, v1); + INCSTAT(g_stats.this_frame.num_triangles_culled) + return; + } + } int indices[NUM_INDICES] = {0, 1, 2, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, @@ -461,19 +493,18 @@ void ProcessPoint(OutputVertexData* center) Rasterizer::DrawTriangleFrontFace(&ur, &lr, &ul); } -bool CullTest(const OutputVertexData* v0, const OutputVertexData* v1, const OutputVertexData* v2, - bool& backface) +bool IsTriviallyRejected(const OutputVertexData* v0, const OutputVertexData* v1, + const OutputVertexData* v2) { int mask = CalcClipMask(v0); mask &= CalcClipMask(v1); mask &= CalcClipMask(v2); - if (mask) - { - INCSTAT(g_stats.this_frame.num_triangles_rejected) - return false; - } + return mask != 0; +} +bool IsBackface(const OutputVertexData* v0, const OutputVertexData* v1, const OutputVertexData* v2) +{ float x0 = v0->projectedPosition.x; float x1 = v1->projectedPosition.x; float x2 = v2->projectedPosition.x; @@ -486,29 +517,14 @@ bool CullTest(const OutputVertexData* v0, const OutputVertexData* v1, const Outp float normalZDir = (x0 * w2 - x2 * w0) * y1 + (x2 * y0 - x0 * y2) * w1 + (y2 * w0 - y0 * w2) * x1; - backface = normalZDir <= 0.0f; + bool backface = normalZDir <= 0.0f; // Jimmie Johnson's Anything with an Engine has a positive viewport, while other games have a // negative viewport. The positive viewport does not require vertices to be vertically mirrored, // but the backface test does need to be inverted for things to be drawn. if (xfmem.viewport.ht > 0) backface = !backface; - // TODO: Are these tests / the definition of backface above backwards? - if ((bpmem.genMode.cullmode == CullMode::Back || bpmem.genMode.cullmode == CullMode::All) && - !backface) // cull frontfacing - { - INCSTAT(g_stats.this_frame.num_triangles_culled) - return false; - } - - if ((bpmem.genMode.cullmode == CullMode::Front || bpmem.genMode.cullmode == CullMode::All) && - backface) // cull backfacing - { - INCSTAT(g_stats.this_frame.num_triangles_culled) - return false; - } - - return true; + return backface; } void PerspectiveDivide(OutputVertexData* vertex) diff --git a/Source/Core/VideoBackends/Software/Clipper.h b/Source/Core/VideoBackends/Software/Clipper.h index 4b18023696..21be39c4fe 100644 --- a/Source/Core/VideoBackends/Software/Clipper.h +++ b/Source/Core/VideoBackends/Software/Clipper.h @@ -15,8 +15,10 @@ void ProcessLine(OutputVertexData* v0, OutputVertexData* v1); void ProcessPoint(OutputVertexData* v); -bool CullTest(const OutputVertexData* v0, const OutputVertexData* v1, const OutputVertexData* v2, - bool& backface); +bool IsTriviallyRejected(const OutputVertexData* v0, const OutputVertexData* v1, + const OutputVertexData* v2); + +bool IsBackface(const OutputVertexData* v0, const OutputVertexData* v1, const OutputVertexData* v2); void PerspectiveDivide(OutputVertexData* vertex); } // namespace Clipper diff --git a/Source/Core/VideoBackends/Software/Rasterizer.cpp b/Source/Core/VideoBackends/Software/Rasterizer.cpp index b3d0dd3fdb..726692138c 100644 --- a/Source/Core/VideoBackends/Software/Rasterizer.cpp +++ b/Source/Core/VideoBackends/Software/Rasterizer.cpp @@ -301,11 +301,27 @@ static void BuildBlock(s32 blockX, s32 blockY) } } +void UpdateZSlope(const OutputVertexData* v0, const OutputVertexData* v1, + const OutputVertexData* v2) +{ + if (!bpmem.genMode.zfreeze) + { + const s32 X1 = iround(16.0f * v0->screenPosition[0]) - 9; + const s32 Y1 = iround(16.0f * v0->screenPosition[1]) - 9; + const SlopeContext ctx(v0, v1, v2, (X1 + 0xF) >> 4, (Y1 + 0xF) >> 4); + ZSlope = Slope(v0->screenPosition.z, v1->screenPosition.z, v2->screenPosition.z, ctx); + } +} + void DrawTriangleFrontFace(const OutputVertexData* v0, const OutputVertexData* v1, const OutputVertexData* v2) { INCSTAT(g_stats.this_frame.num_triangles_drawn); + // The zslope should be updated now, even if the triangle is rejected by the scissor test, as + // zfreeze depends on it + UpdateZSlope(v0, v1, v2); + // adapted from http://devmaster.net/posts/6145/advanced-rasterization // 28.4 fixed-pou32 coordinates. rounded to nearest and adjusted to match hardware output @@ -370,22 +386,13 @@ void DrawTriangleFrontFace(const OutputVertexData* v0, const OutputVertexData* v if (minx >= maxx || miny >= maxy) return; - // Set up slopes + // Set up the remaining slopes const SlopeContext ctx(v0, v1, v2, (X1 + 0xF) >> 4, (Y1 + 0xF) >> 4); float w[3] = {1.0f / v0->projectedPosition.w, 1.0f / v1->projectedPosition.w, 1.0f / v2->projectedPosition.w}; WSlope = Slope(w[0], w[1], w[2], ctx); - // TODO: The zfreeze emulation is not quite correct, yet! - // Many things might prevent us from reaching this line (culling, clipping, scissoring). - // However, the zslope is always guaranteed to be calculated unless all vertices are trivially - // rejected during clipping! - // We're currently sloppy at this since we abort early if any of the culling/clipping/scissoring - // tests fail. - if (!bpmem.genMode.zfreeze) - ZSlope = Slope(v0->screenPosition.z, v1->screenPosition.z, v2->screenPosition.z, ctx); - for (unsigned int i = 0; i < bpmem.genMode.numcolchans; i++) { for (int comp = 0; comp < 4; comp++) diff --git a/Source/Core/VideoBackends/Software/Rasterizer.h b/Source/Core/VideoBackends/Software/Rasterizer.h index 97cbce4d63..c278809966 100644 --- a/Source/Core/VideoBackends/Software/Rasterizer.h +++ b/Source/Core/VideoBackends/Software/Rasterizer.h @@ -11,6 +11,8 @@ namespace Rasterizer { void Init(); +void UpdateZSlope(const OutputVertexData* v0, const OutputVertexData* v1, + const OutputVertexData* v2); void DrawTriangleFrontFace(const OutputVertexData* v0, const OutputVertexData* v1, const OutputVertexData* v2); diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp index af99e5ba85..287fcf4a4b 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp @@ -32,6 +32,13 @@ SWVertexLoader::SWVertexLoader() = default; SWVertexLoader::~SWVertexLoader() = default; +DataReader SWVertexLoader::PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, u32 count, + u32 stride, bool cullall) +{ + // The software renderer needs cullall to be false for zfreeze to work + return VertexManagerBase::PrepareForAdditionalData(primitive, count, stride, false); +} + void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) { DebugUtil::OnObjectBegin(); diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.h b/Source/Core/VideoBackends/Software/SWVertexLoader.h index bbda8da037..59b6ca65fd 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.h +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.h @@ -19,6 +19,9 @@ public: SWVertexLoader(); ~SWVertexLoader(); + DataReader PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, u32 count, u32 stride, + bool cullall) override; + protected: void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override; diff --git a/Source/Core/VideoCommon/VertexManagerBase.h b/Source/Core/VideoCommon/VertexManagerBase.h index b3dd49aa61..c413889713 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.h +++ b/Source/Core/VideoCommon/VertexManagerBase.h @@ -99,8 +99,8 @@ public: PrimitiveType GetCurrentPrimitiveType() const { return m_current_primitive_type; } void AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices); - DataReader PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, u32 count, u32 stride, - bool cullall); + virtual DataReader PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, u32 count, + u32 stride, bool cullall); void FlushData(u32 count, u32 stride); void Flush();