From 2d4b7e3f3f67f48ec2fb3d8eb8b2af3b70e4afcc Mon Sep 17 00:00:00 2001 From: crudelios Date: Sun, 14 Sep 2014 17:52:51 +0100 Subject: [PATCH] Reimplement Bounding Box calculation using the software renderer. --- .../VideoBackends/Software/BPMemLoader.cpp | 9 +- .../VideoBackends/Software/EfbInterface.cpp | 6 - .../VideoBackends/Software/Rasterizer.cpp | 301 ++++++++--- Source/Core/VideoBackends/Software/Tev.cpp | 232 +++++---- Source/Core/VideoCommon/BPStructs.cpp | 11 +- Source/Core/VideoCommon/BoundingBox.cpp | 150 ++++++ Source/Core/VideoCommon/BoundingBox.h | 41 ++ Source/Core/VideoCommon/PixelEngine.cpp | 18 +- Source/Core/VideoCommon/PixelEngine.h | 12 +- Source/Core/VideoCommon/VertexLoader.cpp | 473 +----------------- Source/Core/VideoCommon/VideoCommon.vcxproj | 6 +- .../VideoCommon/VideoCommon.vcxproj.filters | 10 +- Source/Core/VideoCommon/VideoState.cpp | 5 + 13 files changed, 602 insertions(+), 672 deletions(-) create mode 100644 Source/Core/VideoCommon/BoundingBox.cpp create mode 100644 Source/Core/VideoCommon/BoundingBox.h diff --git a/Source/Core/VideoBackends/Software/BPMemLoader.cpp b/Source/Core/VideoBackends/Software/BPMemLoader.cpp index f2aa429c49..bf11258cb0 100644 --- a/Source/Core/VideoBackends/Software/BPMemLoader.cpp +++ b/Source/Core/VideoBackends/Software/BPMemLoader.cpp @@ -14,6 +14,7 @@ #include "VideoCommon/PixelEngine.h" #include "VideoCommon/TextureDecoder.h" +#include "VideoCommon/BoundingBox.h" #include "VideoCommon/VideoCommon.h" @@ -73,12 +74,12 @@ void SWBPWritten(int address, int newvalue) EfbCopy::CopyEfb(); break; case BPMEM_CLEARBBOX1: - PixelEngine::bbox[0] = newvalue >> 10; - PixelEngine::bbox[1] = newvalue & 0x3ff; + BoundingBox::coords[BoundingBox::LEFT] = newvalue >> 10; + BoundingBox::coords[BoundingBox::RIGHT] = newvalue & 0x3ff; break; case BPMEM_CLEARBBOX2: - PixelEngine::bbox[2] = newvalue >> 10; - PixelEngine::bbox[3] = newvalue & 0x3ff; + BoundingBox::coords[BoundingBox::TOP] = newvalue >> 10; + BoundingBox::coords[BoundingBox::BOTTOM] = newvalue & 0x3ff; break; case BPMEM_CLEAR_PIXEL_PERF: // TODO: I didn't test if the value written to this register affects the amount of cleared registers diff --git a/Source/Core/VideoBackends/Software/EfbInterface.cpp b/Source/Core/VideoBackends/Software/EfbInterface.cpp index 8afae35bd8..172dd5ae23 100644 --- a/Source/Core/VideoBackends/Software/EfbInterface.cpp +++ b/Source/Core/VideoBackends/Software/EfbInterface.cpp @@ -437,12 +437,6 @@ namespace EfbInterface { SetPixelAlphaOnly(offset, dstClrPtr[ALP_C]); } - - // branchless bounding box update - PixelEngine::bbox[0] = std::min(x, PixelEngine::bbox[0]); - PixelEngine::bbox[1] = std::max(x, PixelEngine::bbox[1]); - PixelEngine::bbox[2] = std::min(y, PixelEngine::bbox[2]); - PixelEngine::bbox[3] = std::max(y, PixelEngine::bbox[3]); } void SetColor(u16 x, u16 y, u8 *color) diff --git a/Source/Core/VideoBackends/Software/Rasterizer.cpp b/Source/Core/VideoBackends/Software/Rasterizer.cpp index 5e8fdffd50..d797b9158e 100644 --- a/Source/Core/VideoBackends/Software/Rasterizer.cpp +++ b/Source/Core/VideoBackends/Software/Rasterizer.cpp @@ -14,6 +14,7 @@ #include "VideoBackends/Software/SWVideoConfig.h" #include "VideoBackends/Software/Tev.h" #include "VideoBackends/Software/XFMemLoader.h" +#include "VideoCommon/BoundingBox.h" #define BLOCK_SIZE 2 @@ -130,7 +131,7 @@ inline void Draw(s32 x, s32 y, s32 xi, s32 yi) if (z < 0 || z > 0x00ffffff) return; - if (bpmem.UseEarlyDepthTest() && g_SWVideoConfig.bZComploc) + if (!BoundingBox::active && bpmem.UseEarlyDepthTest() && g_SWVideoConfig.bZComploc) { // TODO: Test if perf regs are incremented even if test is disabled EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_INPUT_ZCOMPLOC); @@ -317,7 +318,7 @@ void DrawTriangleFrontFace(OutputVertexData *v0, OutputVertexData *v1, OutputVer { INCSTAT(swstats.thisFrame.numTrianglesDrawn); - if (g_SWVideoConfig.bHwRasterizer) + if (g_SWVideoConfig.bHwRasterizer && !BoundingBox::active) { HwRasterizer::DrawTriangleFrontFace(v0, v1, v2); return; @@ -414,84 +415,254 @@ void DrawTriangleFrontFace(OutputVertexData *v0, OutputVertexData *v1, OutputVer if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; - // Loop through blocks - for (s32 y = miny; y < maxy; y += BLOCK_SIZE) + // If drawing, rasterize every block + if (!BoundingBox::active) { - for (s32 x = minx; x < maxx; x += BLOCK_SIZE) + // Loop through blocks + for (s32 y = miny; y < maxy; y += BLOCK_SIZE) { - // Corners of block - s32 x0 = x << 4; - s32 x1 = (x + BLOCK_SIZE - 1) << 4; - s32 y0 = y << 4; - s32 y1 = (y + BLOCK_SIZE - 1) << 4; - - // Evaluate half-space functions - bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; - bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; - bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; - bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; - int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); - - bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; - bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; - bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; - bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; - int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); - - bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; - bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; - bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; - bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; - int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); - - // Skip block when outside an edge - if (a == 0x0 || b == 0x0 || c == 0x0) - continue; - - BuildBlock(x, y); - - // Accept whole block when totally covered - if (a == 0xF && b == 0xF && c == 0xF) + for (s32 x = minx; x < maxx; x += BLOCK_SIZE) { - for (s32 iy = 0; iy < BLOCK_SIZE; iy++) - { - for (s32 ix = 0; ix < BLOCK_SIZE; ix++) - { - Draw(x + ix, y + iy, ix, iy); - } - } - } - else // Partially covered block - { - s32 CY1 = C1 + DX12 * y0 - DY12 * x0; - s32 CY2 = C2 + DX23 * y0 - DY23 * x0; - s32 CY3 = C3 + DX31 * y0 - DY31 * x0; + // Corners of block + s32 x0 = x << 4; + s32 x1 = (x + BLOCK_SIZE - 1) << 4; + s32 y0 = y << 4; + s32 y1 = (y + BLOCK_SIZE - 1) << 4; - for (s32 iy = 0; iy < BLOCK_SIZE; iy++) - { - s32 CX1 = CY1; - s32 CX2 = CY2; - s32 CX3 = CY3; + // Evaluate half-space functions + bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; + bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; + bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; + bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; + int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); - for (s32 ix = 0; ix < BLOCK_SIZE; ix++) + bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; + bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; + bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; + bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; + int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); + + bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; + bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; + bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; + bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; + int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); + + // Skip block when outside an edge + if (a == 0x0 || b == 0x0 || c == 0x0) + continue; + + BuildBlock(x, y); + + // Accept whole block when totally covered + if (a == 0xF && b == 0xF && c == 0xF) + { + for (s32 iy = 0; iy < BLOCK_SIZE; iy++) { - if (CX1 > 0 && CX2 > 0 && CX3 > 0) + for (s32 ix = 0; ix < BLOCK_SIZE; ix++) { Draw(x + ix, y + iy, ix, iy); } - - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; } + } + else // Partially covered block + { + s32 CY1 = C1 + DX12 * y0 - DY12 * x0; + s32 CY2 = C2 + DX23 * y0 - DY23 * x0; + s32 CY3 = C3 + DX31 * y0 - DY31 * x0; - CY1 += FDX12; - CY2 += FDX23; - CY3 += FDX31; + for (s32 iy = 0; iy < BLOCK_SIZE; iy++) + { + s32 CX1 = CY1; + s32 CX2 = CY2; + s32 CX3 = CY3; + + for (s32 ix = 0; ix < BLOCK_SIZE; ix++) + { + if (CX1 > 0 && CX2 > 0 && CX3 > 0) + { + Draw(x + ix, y + iy, ix, iy); + } + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + } } } } } + else + { + // If we are only calculating bbox, we only need to find the topmost, + // leftmost, bottom most and rightmost pixels to be drawn. + // So instead of drawing every single one of the triangle's pixels, + // four loops are run: one for the top pixel, one for the left, one for + // the bottom and one for the right. As soon as a pixel that is to be + // drawn is found, the loop breaks. This enables a ~150% speedbost in + // bbox calculation, albeit at the cost of some ugly repetitive code. + const s32 FTOP = miny << 4; + const s32 FLEFT = minx << 4; + const s32 FBOTTOM = maxy << 4; + const s32 FRIGHT = maxx << 4; + + // Start checking for bbox top + s32 CY1 = C1 + DX12 * FTOP - DY12 * FLEFT; + s32 CY2 = C2 + DX23 * FTOP - DY23 * FLEFT; + s32 CY3 = C3 + DX31 * FTOP - DY31 * FLEFT; + + // Loop + for (s32 y = miny; y <= maxy; ++y) + { + if (y >= BoundingBox::coords[BoundingBox::TOP]) + break; + + s32 CX1 = CY1; + s32 CX2 = CY2; + s32 CX3 = CY3; + + for (s32 x = minx; x <= maxx; ++x) + { + if (CX1 > 0 && CX2 > 0 && CX3 > 0) + { + // Build the new raster block every other pixel + BuildBlock((x & ~(BLOCK_SIZE - 1)), y & ~(BLOCK_SIZE - 1)); + + Draw(x, y, x & 1, y & 1); + + if (y >= BoundingBox::coords[BoundingBox::TOP]) + break; + } + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + } + + // Checking for bbox left + s32 CX1 = C1 + DX12 * FTOP - DY12 * FLEFT; + s32 CX2 = C2 + DX23 * FTOP - DY23 * FLEFT; + s32 CX3 = C3 + DX31 * FTOP - DY31 * FLEFT; + + // Loop + for (s32 x = minx; x <= maxx; ++x) + { + if (x >= BoundingBox::coords[BoundingBox::LEFT]) + break; + + s32 CY1 = CX1; + s32 CY2 = CX2; + s32 CY3 = CX3; + + for (s32 y = miny; y <= maxy; ++y) + { + if (CY1 > 0 && CY2 > 0 && CY3 > 0) + { + // Build the new raster block every other pixel + BuildBlock((x & ~(BLOCK_SIZE - 1)), y & ~(BLOCK_SIZE - 1)); + + Draw(x, y, x & 1, y & 1); + + if (x >= BoundingBox::coords[BoundingBox::LEFT]) + break; + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + } + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + + // Checking for bbox bottom + CY1 = C1 + DX12 * FBOTTOM - DY12 * FRIGHT; + CY2 = C2 + DX23 * FBOTTOM - DY23 * FRIGHT; + CY3 = C3 + DX31 * FBOTTOM - DY31 * FRIGHT; + + // Loop + for (s32 y = maxy; y >= miny; --y) + { + s32 CX1 = CY1; + s32 CX2 = CY2; + s32 CX3 = CY3; + + if (y <= BoundingBox::coords[BoundingBox::BOTTOM]) + break; + + for (s32 x = maxx; x >= minx; --x) + { + if (CX1 > 0 && CX2 > 0 && CX3 > 0) + { + // Build the new raster block every other pixel + BuildBlock((x & ~(BLOCK_SIZE - 1)), y & ~(BLOCK_SIZE - 1)); + + Draw(x, y, x & 1, y & 1); + + if (y <= BoundingBox::coords[BoundingBox::BOTTOM]) + break; + } + + CX1 += FDY12; + CX2 += FDY23; + CX3 += FDY31; + } + + CY1 -= FDX12; + CY2 -= FDX23; + CY3 -= FDX31; + } + + // Checking for bbox right + CX1 = C1 + DX12 * FBOTTOM - DY12 * FRIGHT; + CX2 = C2 + DX23 * FBOTTOM - DY23 * FRIGHT; + CX3 = C3 + DX31 * FBOTTOM - DY31 * FRIGHT; + + // Loop + for (s32 x = maxx; x >= minx; --x) + { + if (x <= BoundingBox::coords[BoundingBox::RIGHT]) + break; + + s32 CY1 = CX1; + s32 CY2 = CX2; + s32 CY3 = CX3; + + for (s32 y = maxy; y >= miny; --y) + { + if (CY1 > 0 && CY2 > 0 && CY3 > 0) + { + // Build the new raster block every other pixel + BuildBlock((x & ~(BLOCK_SIZE - 1)), y & ~(BLOCK_SIZE - 1)); + + Draw(x, y, x & 1, y & 1); + + if (x <= BoundingBox::coords[BoundingBox::RIGHT]) + break; + } + + CY1 -= FDX12; + CY2 -= FDX23; + CY3 -= FDX31; + } + + CX1 += FDY12; + CX2 += FDY23; + CX3 += FDY31; + } + } } diff --git a/Source/Core/VideoBackends/Software/Tev.cpp b/Source/Core/VideoBackends/Software/Tev.cpp index 7d13fd7bb2..122e1293da 100644 --- a/Source/Core/VideoBackends/Software/Tev.cpp +++ b/Source/Core/VideoBackends/Software/Tev.cpp @@ -13,6 +13,7 @@ #include "VideoBackends/Software/Tev.h" #include "VideoBackends/Software/TextureSampler.h" #include "VideoBackends/Software/XFMemLoader.h" +#include "VideoCommon/BoundingBox.h" #ifdef _DEBUG #define ALLOW_TEV_DUMPS 1 @@ -157,7 +158,7 @@ void Tev::SetRasColor(int colorChan, int swaptable) RasColor[ALP_C] = color[bpmem.tevksel[swaptable].swap2]; } break; - case 5: // alpha bump + case 5: // alpha bump { for (s16& comp : RasColor) { @@ -649,121 +650,138 @@ void Tev::Draw() if (!TevAlphaTest(output[ALP_C])) return; - // z texture - if (bpmem.ztex2.op) + // This part is only needed if we are not simply computing bbox + // (i. e., only needed when using the SW renderer) + if (!BoundingBox::active) { - u32 ztex = bpmem.ztex1.bias; - switch (bpmem.ztex2.type) + // z texture + if (bpmem.ztex2.op) { - case 0: // 8 bit - ztex += TexColor[ALP_C]; - break; - case 1: // 16 bit - ztex += TexColor[ALP_C] << 8 | TexColor[RED_C]; - break; - case 2: // 24 bit - ztex += TexColor[RED_C] << 16 | TexColor[GRN_C] << 8 | TexColor[BLU_C]; - break; + u32 ztex = bpmem.ztex1.bias; + switch (bpmem.ztex2.type) + { + case 0: // 8 bit + ztex += TexColor[ALP_C]; + break; + case 1: // 16 bit + ztex += TexColor[ALP_C] << 8 | TexColor[RED_C]; + break; + case 2: // 24 bit + ztex += TexColor[RED_C] << 16 | TexColor[GRN_C] << 8 | TexColor[BLU_C]; + break; + } + + if (bpmem.ztex2.op == ZTEXTURE_ADD) + ztex += Position[2]; + + Position[2] = ztex & 0x00ffffff; } - if (bpmem.ztex2.op == ZTEXTURE_ADD) - ztex += Position[2]; + // fog + if (bpmem.fog.c_proj_fsel.fsel) + { + float ze; - Position[2] = ztex & 0x00ffffff; + if (bpmem.fog.c_proj_fsel.proj == 0) + { + // perspective + // ze = A/(B - (Zs >> B_SHF)) + s32 denom = bpmem.fog.b_magnitude - (Position[2] >> bpmem.fog.b_shift); + //in addition downscale magnitude and zs to 0.24 bits + ze = (bpmem.fog.a.GetA() * 16777215.0f) / (float)denom; + } + else + { + // orthographic + // ze = a*Zs + //in addition downscale zs to 0.24 bits + ze = bpmem.fog.a.GetA() * ((float)Position[2] / 16777215.0f); + + } + + if (bpmem.fogRange.Base.Enabled) + { + // TODO: This is untested and should definitely be checked against real hw. + // - No idea if offset is really normalized against the viewport width or against the projection matrix or yet something else + // - scaling of the "k" coefficient isn't clear either. + + // First, calculate the offset from the viewport center (normalized to 0..1) + float offset = (Position[0] - (bpmem.fogRange.Base.Center - 342)) / (float)xfmem.viewport.wd; + + // Based on that, choose the index such that points which are far away from the z-axis use the 10th "k" value and such that central points use the first value. + float floatindex = 9.f - std::abs(offset) * 9.f; + floatindex = (floatindex < 0.f) ? 0.f : (floatindex > 9.f) ? 9.f : floatindex; // TODO: This shouldn't be necessary! + + // Get the two closest integer indices, look up the corresponding samples + int indexlower = (int)floor(floatindex); + int indexupper = indexlower + 1; + // Look up coefficient... Seems like multiplying by 4 makes Fortune Street work properly (fog is too strong without the factor) + float klower = bpmem.fogRange.K[indexlower/2].GetValue(indexlower%2) * 4.f; + float kupper = bpmem.fogRange.K[indexupper/2].GetValue(indexupper%2) * 4.f; + + // linearly interpolate the samples and multiple ze by the resulting adjustment factor + float factor = indexupper - floatindex; + float k = klower * factor + kupper * (1.f - factor); + float x_adjust = sqrt(offset*offset + k*k)/k; + ze *= x_adjust; // NOTE: This is basically dividing by a cosine (hidden behind GXInitFogAdjTable): 1/cos = c/b = sqrt(a^2+b^2)/b + } + + ze -= bpmem.fog.c_proj_fsel.GetC(); + + // clamp 0 to 1 + float fog = (ze<0.0f) ? 0.0f : ((ze>1.0f) ? 1.0f : ze); + + switch (bpmem.fog.c_proj_fsel.fsel) + { + case 4: // exp + fog = 1.0f - pow(2.0f, -8.0f * fog); + break; + case 5: // exp2 + fog = 1.0f - pow(2.0f, -8.0f * fog * fog); + break; + case 6: // backward exp + fog = 1.0f - fog; + fog = pow(2.0f, -8.0f * fog); + break; + case 7: // backward exp2 + fog = 1.0f - fog; + fog = pow(2.0f, -8.0f * fog * fog); + break; + } + + // lerp from output to fog color + u32 fogInt = (u32)(fog * 256); + u32 invFog = 256 - fogInt; + + output[RED_C] = (output[RED_C] * invFog + fogInt * bpmem.fog.color.r) >> 8; + output[GRN_C] = (output[GRN_C] * invFog + fogInt * bpmem.fog.color.g) >> 8; + output[BLU_C] = (output[BLU_C] * invFog + fogInt * bpmem.fog.color.b) >> 8; + } + + bool late_ztest = !bpmem.zcontrol.early_ztest || !g_SWVideoConfig.bZComploc; + if (late_ztest && bpmem.zmode.testenable) + { + // TODO: Check against hw if these values get incremented even if depth testing is disabled + EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_INPUT); + + if (!EfbInterface::ZCompare(Position[0], Position[1], Position[2])) + return; + + EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_OUTPUT); + } } - // fog - if (bpmem.fog.c_proj_fsel.fsel) - { - float ze; + // branchless bounding box update + BoundingBox::coords[BoundingBox::LEFT] = std::min((u16)Position[0], BoundingBox::coords[BoundingBox::LEFT]); + BoundingBox::coords[BoundingBox::RIGHT] = std::max((u16)Position[0], BoundingBox::coords[BoundingBox::RIGHT]); + BoundingBox::coords[BoundingBox::TOP] = std::min((u16)Position[1], BoundingBox::coords[BoundingBox::TOP]); + BoundingBox::coords[BoundingBox::BOTTOM] = std::max((u16)Position[1], BoundingBox::coords[BoundingBox::BOTTOM]); - if (bpmem.fog.c_proj_fsel.proj == 0) - { - // perspective - // ze = A/(B - (Zs >> B_SHF)) - s32 denom = bpmem.fog.b_magnitude - (Position[2] >> bpmem.fog.b_shift); - //in addition downscale magnitude and zs to 0.24 bits - ze = (bpmem.fog.a.GetA() * 16777215.0f) / (float)denom; - } - else - { - // orthographic - // ze = a*Zs - //in addition downscale zs to 0.24 bits - ze = bpmem.fog.a.GetA() * ((float)Position[2] / 16777215.0f); + // if we are only calculating the bounding box, + // there's no need to actually draw anything + if (BoundingBox::active) + return; - } - - if (bpmem.fogRange.Base.Enabled) - { - // TODO: This is untested and should definitely be checked against real hw. - // - No idea if offset is really normalized against the viewport width or against the projection matrix or yet something else - // - scaling of the "k" coefficient isn't clear either. - - // First, calculate the offset from the viewport center (normalized to 0..1) - float offset = (Position[0] - (bpmem.fogRange.Base.Center - 342)) / (float)xfmem.viewport.wd; - - // Based on that, choose the index such that points which are far away from the z-axis use the 10th "k" value and such that central points use the first value. - float floatindex = 9.f - std::abs(offset) * 9.f; - floatindex = (floatindex < 0.f) ? 0.f : (floatindex > 9.f) ? 9.f : floatindex; // TODO: This shouldn't be necessary! - - // Get the two closest integer indices, look up the corresponding samples - int indexlower = (int)floor(floatindex); - int indexupper = indexlower + 1; - // Look up coefficient... Seems like multiplying by 4 makes Fortune Street work properly (fog is too strong without the factor) - float klower = bpmem.fogRange.K[indexlower/2].GetValue(indexlower%2) * 4.f; - float kupper = bpmem.fogRange.K[indexupper/2].GetValue(indexupper%2) * 4.f; - - // linearly interpolate the samples and multiple ze by the resulting adjustment factor - float factor = indexupper - floatindex; - float k = klower * factor + kupper * (1.f - factor); - float x_adjust = sqrt(offset*offset + k*k)/k; - ze *= x_adjust; // NOTE: This is basically dividing by a cosine (hidden behind GXInitFogAdjTable): 1/cos = c/b = sqrt(a^2+b^2)/b - } - - ze -= bpmem.fog.c_proj_fsel.GetC(); - - // clamp 0 to 1 - float fog = (ze<0.0f) ? 0.0f : ((ze>1.0f) ? 1.0f : ze); - - switch (bpmem.fog.c_proj_fsel.fsel) - { - case 4: // exp - fog = 1.0f - pow(2.0f, -8.0f * fog); - break; - case 5: // exp2 - fog = 1.0f - pow(2.0f, -8.0f * fog * fog); - break; - case 6: // backward exp - fog = 1.0f - fog; - fog = pow(2.0f, -8.0f * fog); - break; - case 7: // backward exp2 - fog = 1.0f - fog; - fog = pow(2.0f, -8.0f * fog * fog); - break; - } - - // lerp from output to fog color - u32 fogInt = (u32)(fog * 256); - u32 invFog = 256 - fogInt; - - output[RED_C] = (output[RED_C] * invFog + fogInt * bpmem.fog.color.r) >> 8; - output[GRN_C] = (output[GRN_C] * invFog + fogInt * bpmem.fog.color.g) >> 8; - output[BLU_C] = (output[BLU_C] * invFog + fogInt * bpmem.fog.color.b) >> 8; - } - - bool late_ztest = !bpmem.zcontrol.early_ztest || !g_SWVideoConfig.bZComploc; - if (late_ztest && bpmem.zmode.testenable) - { - // TODO: Check against hw if these values get incremented even if depth testing is disabled - EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_INPUT); - - if (!EfbInterface::ZCompare(Position[0], Position[1], Position[2])) - return; - - EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_OUTPUT); - } #if ALLOW_TEV_DUMPS if (g_SWVideoConfig.bDumpTevStages) diff --git a/Source/Core/VideoCommon/BPStructs.cpp b/Source/Core/VideoCommon/BPStructs.cpp index 152d15a7d5..b307cbf2f4 100644 --- a/Source/Core/VideoCommon/BPStructs.cpp +++ b/Source/Core/VideoCommon/BPStructs.cpp @@ -19,6 +19,7 @@ #include "VideoCommon/RenderBase.h" #include "VideoCommon/Statistics.h" #include "VideoCommon/TextureDecoder.h" +#include "VideoCommon/BoundingBox.h" #include "VideoCommon/VertexLoader.h" #include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VideoCommon.h" @@ -231,7 +232,7 @@ static void BPWritten(const BPCmd& bp) // here. Not sure if there's a better spot to put this. // the number of lines copied is determined by the y scale * source efb height - PixelEngine::bbox_active = false; + BoundingBox::active = false; float yScale; if (PE_copy.scale_invert) @@ -378,13 +379,13 @@ static void BPWritten(const BPCmd& bp) case BPMEM_CLEARBBOX2: // Don't compute bounding box if this frame is being skipped! // Wrong but valid values are better than bogus values... - if (g_ActiveConfig.bUseBBox && !g_bSkipCurrentFrame) + if (!g_bSkipCurrentFrame) { u8 offset = bp.address & 2; - PixelEngine::bbox[offset] = bp.newvalue & 0x3ff; - PixelEngine::bbox[offset | 1] = bp.newvalue >> 10; - PixelEngine::bbox_active = true; + BoundingBox::coords[offset] = bp.newvalue & 0x3ff; + BoundingBox::coords[offset + 1] = bp.newvalue >> 10; + BoundingBox::active = true; } return; case BPMEM_TEXINVALIDATE: diff --git a/Source/Core/VideoCommon/BoundingBox.cpp b/Source/Core/VideoCommon/BoundingBox.cpp new file mode 100644 index 0000000000..b76db60dab --- /dev/null +++ b/Source/Core/VideoCommon/BoundingBox.cpp @@ -0,0 +1,150 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include "VideoCommon/PixelShaderManager.h" +#include "VideoBackends/Software/SetupUnit.h" +#include "VideoBackends/Software/TransformUnit.h" +#include "VideoBackends/Software/Clipper.h" +#include "VideoBackends/Software/Rasterizer.h" + +#include "VideoCommon/BoundingBox.h" + +namespace BoundingBox +{ + +// External vars +bool active = false; +u16 coords[4] = { 0x80, 0xA0, 0x80, 0xA0 }; +u8 posMtxIdx; +u8 texMtxIdx[8]; + + +// Internal vars +SetupUnit vtxUnit; +VAT myVat; +u8 * bufferPos; +TVtxDesc vertexDesc; +PortableVertexDeclaration vertexDecl; + +// Gets the pointer to the current buffer position +void LOADERDECL SetVertexBufferPosition() +{ + bufferPos = VertexManager::s_pCurBufferPointer; +} + +// Prepares the bounding box for new primitive data +void Prepare(const VAT & vat, int primitive, const TVtxDesc & vtxDesc, const PortableVertexDeclaration & vtxDecl) +{ + if (!active) + return; + + myVat = vat; + vertexDesc = vtxDesc; + vertexDecl = vtxDecl; + + vtxUnit.Init(primitive); + + // Initialize the SW renderer + static bool SWinit = false; + + if (!SWinit) + { + Clipper::Init(); + Rasterizer::Init(); + SWinit = true; + } + + // Update SW renderer values + Clipper::SetViewOffset(); + Rasterizer::SetScissor(); + + for (u8 i = 0; i < 4; ++i) + { + Rasterizer::SetTevReg(i, 0, true, (s16)PixelShaderManager::constants.kcolors[i][0]); + Rasterizer::SetTevReg(i, 1, true, (s16)PixelShaderManager::constants.kcolors[i][1]); + Rasterizer::SetTevReg(i, 2, true, (s16)PixelShaderManager::constants.kcolors[i][2]); + Rasterizer::SetTevReg(i, 3, true, (s16)PixelShaderManager::constants.kcolors[i][3]); + + Rasterizer::SetTevReg(i, 0, false, (s16)PixelShaderManager::constants.colors[i][0]); + Rasterizer::SetTevReg(i, 1, false, (s16)PixelShaderManager::constants.colors[i][1]); + Rasterizer::SetTevReg(i, 2, false, (s16)PixelShaderManager::constants.colors[i][2]); + Rasterizer::SetTevReg(i, 3, false, (s16)PixelShaderManager::constants.colors[i][3]); + } +} + +// Updates the bounding box +void LOADERDECL Update() +{ + if (!active) + return; + + // Grab vertex input data and transform to output vertex + InputVertexData myVertex; + OutputVertexData * outVertex = vtxUnit.GetVertex(); + + // Feed vertex position and matrix + myVertex.position = Vec3((const float *)bufferPos); + myVertex.posMtx = vertexDesc.PosMatIdx ? posMtxIdx : MatrixIndexA.PosNormalMtxIdx; + + // Transform position + TransformUnit::TransformPosition(&myVertex, outVertex); + + if (g_VtxDesc.Normal != NOT_PRESENT) + { + // Feed normal input data and transform + myVat.g0.NormalIndex3; + + memcpy((u8 *)myVertex.normal, bufferPos + vertexDecl.normals[0].offset, sizeof(float) * 3 * ((myVat.g0.NormalElements) ? 3 : 1)); + + TransformUnit::TransformNormal(&myVertex, myVat.g0.NormalElements, outVertex); + } + + // Feed color input data + for (int i = 0; i < 2; ++i) + { + if (vertexDecl.colors[i].enable) + { + u32 color; + memcpy((u8 *)&color, bufferPos + vertexDecl.colors[i].offset, sizeof(u32)); + *(u32*)myVertex.color[i] = Common::swap32(color); + } + } + + // Transform color + TransformUnit::TransformColor(&myVertex, outVertex); + + // Feed texture matrices + int idx = 0; + + myVertex.texMtx[0] = (vertexDesc.Tex0MatIdx) ? texMtxIdx[idx++] : MatrixIndexA.Tex0MtxIdx; + myVertex.texMtx[1] = (vertexDesc.Tex1MatIdx) ? texMtxIdx[idx++] : MatrixIndexA.Tex1MtxIdx; + myVertex.texMtx[2] = (vertexDesc.Tex2MatIdx) ? texMtxIdx[idx++] : MatrixIndexA.Tex2MtxIdx; + myVertex.texMtx[3] = (vertexDesc.Tex3MatIdx) ? texMtxIdx[idx++] : MatrixIndexA.Tex3MtxIdx; + myVertex.texMtx[4] = (vertexDesc.Tex4MatIdx) ? texMtxIdx[idx++] : MatrixIndexB.Tex4MtxIdx; + myVertex.texMtx[5] = (vertexDesc.Tex5MatIdx) ? texMtxIdx[idx++] : MatrixIndexB.Tex5MtxIdx; + myVertex.texMtx[6] = (vertexDesc.Tex6MatIdx) ? texMtxIdx[idx++] : MatrixIndexB.Tex6MtxIdx; + myVertex.texMtx[7] = (vertexDesc.Tex7MatIdx) ? texMtxIdx[idx++] : MatrixIndexB.Tex7MtxIdx; + + // Feed texture coordinate data + for (int i = 0; i < 8; ++i) + { + if (vertexDecl.texcoords[i].enable) + memcpy((u8 *)&myVertex.texCoords[i], bufferPos + vertexDecl.texcoords[i].offset, sizeof(float) * 2); + } + + // Transform texture coordinate + TransformUnit::TransformTexCoord(&myVertex, outVertex, false); + + // Render the vertex in SW to calculate bbox + vtxUnit.SetupVertex(); +} + +// Save state +void DoState(PointerWrap &p) +{ + p.Do(active); + p.Do(coords); +} + +} // namespace BoundingBox diff --git a/Source/Core/VideoCommon/BoundingBox.h b/Source/Core/VideoCommon/BoundingBox.h new file mode 100644 index 0000000000..d6952b184e --- /dev/null +++ b/Source/Core/VideoCommon/BoundingBox.h @@ -0,0 +1,41 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#pragma once + +#include "VideoCommon/VertexLoader.h" + +// Bounding Box manager + +namespace BoundingBox +{ + +// Determines if bounding box is active +extern bool active; + +// Bounding box current coordinates +extern u16 coords[4]; + +enum +{ + LEFT = 0, + RIGHT = 1, + TOP = 2, + BOTTOM = 3 +}; + +// Current position matrix index +extern u8 posMtxIdx; + +// Texture matrix indexes +extern u8 texMtxIdx[8]; + +void LOADERDECL SetVertexBufferPosition(); +void LOADERDECL Update(); +void Prepare(const VAT & vat, int primitive, const TVtxDesc & vtxDesc, const PortableVertexDeclaration & vtxDecl); + +// Save state +void DoState(PointerWrap &p); + +}; // end of namespace BoundingBox diff --git a/Source/Core/VideoCommon/PixelEngine.cpp b/Source/Core/VideoCommon/PixelEngine.cpp index da36e2d019..877acf916c 100644 --- a/Source/Core/VideoCommon/PixelEngine.cpp +++ b/Source/Core/VideoCommon/PixelEngine.cpp @@ -16,6 +16,7 @@ #include "Core/HW/ProcessorInterface.h" #include "VideoCommon/CommandProcessor.h" #include "VideoCommon/PixelEngine.h" +#include "VideoCommon/BoundingBox.h" #include "VideoCommon/RenderBase.h" #include "VideoCommon/VideoCommon.h" @@ -106,9 +107,6 @@ static int et_SetFinishOnMainThread; static volatile u32 interruptSetToken = 0; static volatile u32 interruptSetFinish = 0; -u16 bbox[4]; -bool bbox_active; - enum { INT_CAUSE_PE_TOKEN = 0x200, // GP Token @@ -128,9 +126,6 @@ void DoState(PointerWrap &p) p.Do(g_bSignalFinishInterrupt); p.Do(interruptSetToken); p.Do(interruptSetFinish); - - p.Do(bbox); - p.Do(bbox_active); } void UpdateInterrupts(); @@ -155,13 +150,6 @@ void Init() et_SetTokenOnMainThread = CoreTiming::RegisterEvent("SetToken", SetToken_OnMainThread); et_SetFinishOnMainThread = CoreTiming::RegisterEvent("SetFinish", SetFinish_OnMainThread); - - bbox[0] = 0x80; - bbox[1] = 0xA0; - bbox[2] = 0x80; - bbox[3] = 0xA0; - - bbox_active = false; } void RegisterMMIO(MMIO::Mapping* mmio, u32 base) @@ -244,8 +232,8 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base) { mmio->Register(base | (PE_BBOX_LEFT + 2 * i), MMIO::ComplexRead([i](u32) { - bbox_active = false; - return bbox[i]; + BoundingBox::active = false; + return BoundingBox::coords[i]; }), MMIO::InvalidWrite() ); diff --git a/Source/Core/VideoCommon/PixelEngine.h b/Source/Core/VideoCommon/PixelEngine.h index 28df0ddb6e..b79449f5c8 100644 --- a/Source/Core/VideoCommon/PixelEngine.h +++ b/Source/Core/VideoCommon/PixelEngine.h @@ -18,10 +18,10 @@ enum PE_ALPHAREAD = 0x08, // Alpha Read PE_CTRL_REGISTER = 0x0a, // Control PE_TOKEN_REG = 0x0e, // Token - PE_BBOX_LEFT = 0x10, // Flip Left - PE_BBOX_RIGHT = 0x12, // Flip Right - PE_BBOX_TOP = 0x14, // Flip Top - PE_BBOX_BOTTOM = 0x16, // Flip Bottom + PE_BBOX_LEFT = 0x10, // Bounding Box Left Pixel + PE_BBOX_RIGHT = 0x12, // Bounding Box Right Pixel + PE_BBOX_TOP = 0x14, // Bounding Box Top Pixel + PE_BBOX_BOTTOM = 0x16, // Bounding Box Bottom Pixel // NOTE: Order not verified // These indicate the number of quads that are being used as input/output for each particular stage @@ -63,8 +63,4 @@ void SetToken(const u16 _token, const int _bSetTokenAcknowledge); void SetFinish(); UPEAlphaReadReg GetAlphaReadMode(); -// Bounding box functionality. Paper Mario (both) are a couple of the few games that use it. -extern u16 bbox[4]; -extern bool bbox_active; - } // end of namespace PixelEngine diff --git a/Source/Core/VideoCommon/VertexLoader.cpp b/Source/Core/VideoCommon/VertexLoader.cpp index b2f3cd7a4e..8bef28cc90 100644 --- a/Source/Core/VideoCommon/VertexLoader.cpp +++ b/Source/Core/VideoCommon/VertexLoader.cpp @@ -14,6 +14,7 @@ #include "VideoCommon/LookUpTables.h" #include "VideoCommon/PixelEngine.h" #include "VideoCommon/VertexLoader.h" +#include "VideoCommon/BoundingBox.h" #include "VideoCommon/VertexLoader_Color.h" #include "VideoCommon/VertexLoader_Normal.h" #include "VideoCommon/VertexLoader_Position.h" @@ -21,8 +22,6 @@ #include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoConfig.h" -//BBox -#include "VideoCommon/XFMemory.h" #define COMPILED_CODE_SIZE 4096 @@ -45,21 +44,6 @@ int colElements[2]; float posScale; float tcScale[8]; -// bbox variables -// bbox must read vertex position, so convert it to this buffer -static float s_bbox_vertex_buffer[3]; -static u8 *s_bbox_pCurBufferPointer_orig; -static int s_bbox_primitive; -static struct Point -{ - s32 x; - s32 y; - float z; -} s_bbox_points[3]; -static u8 s_bbox_currPoint; -static u8 s_bbox_loadedPoints; -static const u8 s_bbox_primitivePoints[8] = { 3, 0, 3, 3, 3, 2, 2, 1 }; - static const float fractionTable[32] = { 1.0f / (1U << 0), 1.0f / (1U << 1), 1.0f / (1U << 2), 1.0f / (1U << 3), 1.0f / (1U << 4), 1.0f / (1U << 5), 1.0f / (1U << 6), 1.0f / (1U << 7), @@ -75,7 +59,7 @@ using namespace Gen; static void LOADERDECL PosMtx_ReadDirect_UByte() { - s_curposmtx = DataReadU8() & 0x3f; + BoundingBox::posMtxIdx = s_curposmtx = DataReadU8() & 0x3f; PRIM_LOG("posmtx: %d, ", s_curposmtx); } @@ -85,437 +69,12 @@ static void LOADERDECL PosMtx_Write() DataWrite(0); DataWrite(0); DataWrite(0); - - // Resetting current position matrix to default is needed for bbox to behave - s_curposmtx = (u8) g_main_cp_state.matrix_index_a.PosNormalMtxIdx; -} - -static void LOADERDECL UpdateBoundingBoxPrepare() -{ - if (!PixelEngine::bbox_active) - return; - - // set our buffer as videodata buffer, so we will get a copy of the vertex positions - // this is a big hack, but so we can use the same converting function then without bbox - s_bbox_pCurBufferPointer_orig = VertexManager::s_pCurBufferPointer; - VertexManager::s_pCurBufferPointer = (u8*)s_bbox_vertex_buffer; -} - -static inline bool UpdateBoundingBoxVars() -{ - switch (s_bbox_primitive) - { - // Quads: fill 0,1,2 (check),1 (check, clear, repeat) - case 0: - ++s_bbox_loadedPoints; - if (s_bbox_loadedPoints == 3) - { - s_bbox_currPoint = 1; - return true; - } - if (s_bbox_loadedPoints == 4) - { - s_bbox_loadedPoints = 0; - s_bbox_currPoint = 0; - return true; - } - ++s_bbox_currPoint; - return false; - - // Triangles: 0,1,2 (check, clear, repeat) - case 2: - ++s_bbox_loadedPoints; - if (s_bbox_loadedPoints == 3) - { - s_bbox_loadedPoints = 0; - s_bbox_currPoint = 0; - return true; - } - ++s_bbox_currPoint; - return false; - - // Triangle strip: 0, 1, 2 (check), 0 (check), 1, (check), 2 (check, repeat checking 0, 1, 2) - case 3: - if (++s_bbox_currPoint == 3) - s_bbox_currPoint = 0; - - if (s_bbox_loadedPoints == 2) - return true; - - ++s_bbox_loadedPoints; - return false; - - // Triangle fan: 0,1,2 (check), 1 (check), 2 (check, repeat checking 1,2) - case 4: - s_bbox_currPoint ^= s_bbox_currPoint ? 3 : 1; - - if (s_bbox_loadedPoints == 2) - return true; - - ++s_bbox_loadedPoints; - return false; - - // Lines: 0,1 (check, clear, repeat) - case 5: - ++s_bbox_loadedPoints; - if (s_bbox_loadedPoints == 2) - { - s_bbox_loadedPoints = 0; - s_bbox_currPoint = 0; - return true; - } - ++s_bbox_currPoint; - return false; - - // Line strip: 0,1 (check), 0 (check), 1 (check, repeat checking 0,1) - case 6: - s_bbox_currPoint ^= 1; - - if (s_bbox_loadedPoints == 1) - return true; - - ++s_bbox_loadedPoints; - return false; - - // Points: 0 (check, clear, repeat) - case 7: - return true; - - // This should not happen! - default: - return false; - } -} - -static void LOADERDECL UpdateBoundingBox() -{ - if (!PixelEngine::bbox_active) - return; - - // Reset videodata pointer - VertexManager::s_pCurBufferPointer = s_bbox_pCurBufferPointer_orig; - - // Copy vertex pointers - memcpy(VertexManager::s_pCurBufferPointer, s_bbox_vertex_buffer, 12); - VertexManager::s_pCurBufferPointer += 12; - - // We must transform the just loaded point by the current world and projection matrix - in software - float transformed[3]; - float screenPoint[3]; - - // We need to get the raw projection values for the bounding box calculation - // to work properly. That means, no projection hacks! - const float * const orig_point = s_bbox_vertex_buffer; - const float * const world_matrix = (float*)xfmem.posMatrices + s_curposmtx * 4; - const float * const proj_matrix = xfmem.projection.rawProjection; - - // Transform by world matrix - // Only calculate what we need, discard the rest - transformed[0] = orig_point[0] * world_matrix[0] + orig_point[1] * world_matrix[1] + orig_point[2] * world_matrix[2] + world_matrix[3]; - transformed[1] = orig_point[0] * world_matrix[4] + orig_point[1] * world_matrix[5] + orig_point[2] * world_matrix[6] + world_matrix[7]; - - // Transform by projection matrix - switch (xfmem.projection.type) - { - // Perspective projection, we must divide by w - case GX_PERSPECTIVE: - transformed[2] = orig_point[0] * world_matrix[8] + orig_point[1] * world_matrix[9] + orig_point[2] * world_matrix[10] + world_matrix[11]; - screenPoint[0] = (transformed[0] * proj_matrix[0] + transformed[2] * proj_matrix[1]) / (-transformed[2]); - screenPoint[1] = (transformed[1] * proj_matrix[2] + transformed[2] * proj_matrix[3]) / (-transformed[2]); - screenPoint[2] = ((transformed[2] * proj_matrix[4] + proj_matrix[5]) * (1.0f - (float) 1e-7)) / (-transformed[2]); - break; - - // Orthographic projection - case GX_ORTHOGRAPHIC: - screenPoint[0] = transformed[0] * proj_matrix[0] + proj_matrix[1]; - screenPoint[1] = transformed[1] * proj_matrix[2] + proj_matrix[3]; - - // We don't really have to care about z here - screenPoint[2] = -0.2f; - break; - - default: - ERROR_LOG(VIDEO, "Unknown projection type: %d", xfmem.projection.type); - screenPoint[0] = screenPoint[1] = screenPoint[2] = 1; - } - - // Convert to screen space and add the point to the list - round like the real hardware - s_bbox_points[s_bbox_currPoint].x = (((s32) (0.5f + (16.0f * (screenPoint[0] * xfmem.viewport.wd + (xfmem.viewport.xOrig - 342.0f))))) + 3) >> 4; - s_bbox_points[s_bbox_currPoint].y = (((s32) (0.5f + (16.0f * (screenPoint[1] * xfmem.viewport.ht + (xfmem.viewport.yOrig - 342.0f))))) + 3) >> 4; - s_bbox_points[s_bbox_currPoint].z = screenPoint[2]; - - // Update point list for primitive - bool check_bbox = UpdateBoundingBoxVars(); - - // If we do not have enough points to check the bounding box yet, we are done for now - if (!check_bbox) - return; - - // How many points does our primitive have? - const u8 numPoints = s_bbox_primitivePoints[s_bbox_primitive]; - - // If the primitive is a point, update the bounding box now - if (numPoints == 1) - { - Point & p = s_bbox_points[0]; - - // Point is out of bounds - if (p.x < 0 || p.x > 607 || p.y < 0 || p.y > 479 || p.z >= 0.0f) - return; - - // Point is in bounds. Update bounding box if necessary and return - PixelEngine::bbox[0] = (p.x < PixelEngine::bbox[0]) ? p.x : PixelEngine::bbox[0]; - PixelEngine::bbox[1] = (p.x > PixelEngine::bbox[1]) ? p.x : PixelEngine::bbox[1]; - PixelEngine::bbox[2] = (p.y < PixelEngine::bbox[2]) ? p.y : PixelEngine::bbox[2]; - PixelEngine::bbox[3] = (p.y > PixelEngine::bbox[3]) ? p.y : PixelEngine::bbox[3]; - - return; - } - - // Now comes the fun part. We must clip the triangles/lines to the viewport - also in software - Point & p0 = s_bbox_points[0], &p1 = s_bbox_points[1], &p2 = s_bbox_points[2]; - - // Check for z-clip. This crude method is required for Mickey's Magical Mirror, at least - if ((p0.z > 0.0f) || (p1.z > 0.0f) || ((numPoints == 3) && (p2.z > 0.0f))) - return; - - // Check points for bounds - u8 b0 = ((p0.x > 0) ? 1 : 0) | ((p0.y > 0) ? 2 : 0) | ((p0.x > 607) ? 4 : 0) | ((p0.y > 479) ? 8 : 0); - u8 b1 = ((p1.x > 0) ? 1 : 0) | ((p1.y > 0) ? 2 : 0) | ((p1.x > 607) ? 4 : 0) | ((p1.y > 479) ? 8 : 0); - - // Let's be practical... If we only have a line, setting b2 to 3 saves an "if"-clause later on - u8 b2 = 3; - - // Otherwise if we have a triangle, we need to check the third point - if (numPoints == 3) - b2 = ((p2.x > 0) ? 1 : 0) | ((p2.y > 0) ? 2 : 0) | ((p2.x > 607) ? 4 : 0) | ((p2.y > 479) ? 8 : 0); - - // These are the internal bbox vars - s32 left = 608, right = -1, top = 480, bottom = -1; - - // If the polygon is inside viewport, let's update the bounding box and be done with it - if ((b0 == 3) && (b0 == b1) && (b0 == b2)) - { - left = std::min(p0.x, p1.x); - top = std::min(p0.y, p1.y); - right = std::max(p0.x, p1.x); - bottom = std::max(p0.y, p1.y); - - // Triangle - if (numPoints == 3) - { - left = std::min(left, p2.x); - top = std::min(top, p2.y); - right = std::max(right, p2.x); - bottom = std::max(bottom, p2.y); - } - - // Update bounding box - PixelEngine::bbox[0] = (left < PixelEngine::bbox[0]) ? left : PixelEngine::bbox[0]; - PixelEngine::bbox[1] = (right > PixelEngine::bbox[1]) ? right : PixelEngine::bbox[1]; - PixelEngine::bbox[2] = (top < PixelEngine::bbox[2]) ? top : PixelEngine::bbox[2]; - PixelEngine::bbox[3] = (bottom > PixelEngine::bbox[3]) ? bottom : PixelEngine::bbox[3]; - - return; - } - - // If it is not inside, then either it is completely outside, or it needs clipping. - // Check the primitive's lines - u8 i0 = b0 ^ b1; - u8 i1 = (numPoints == 3) ? (b1 ^ b2) : i0; - u8 i2 = (numPoints == 3) ? (b0 ^ b2) : i0; - - // Primitive out of bounds - return - if (!(i0 | i1 | i2)) - return; - - // First point inside viewport - update internal bbox - if (b0 == 3) - { - left = p0.x; - top = p0.y; - right = p0.x; - bottom = p0.y; - } - - // Second point inside - if (b1 == 3) - { - left = std::min(p1.x, left); - top = std::min(p1.y, top); - right = std::max(p1.x, right); - bottom = std::max(p1.y, bottom); - } - - // Third point inside - if ((b2 == 3) && (numPoints == 3)) - { - left = std::min(p2.x, left); - top = std::min(p2.y, top); - right = std::max(p2.x, right); - bottom = std::max(p2.y, bottom); - } - - // Triangle equation vars - float m, c; - - // Some definitions to help with rounding later on - const float highNum = 89374289734.0f; - const float roundUp = 0.001f; - - // Intersection result - s32 s; - - // First line intersects - if (i0) - { - m = (p1.x - p0.x) ? ((p1.y - p0.y) / (p1.x - p0.x)) : highNum; - c = p0.y - (m * p0.x); - if (i0 & 1) - { - s = (s32)(c + roundUp); - if (s >= 0 && s <= 479) - left = 0; - top = std::min(s, top); - bottom = std::max(s, bottom); - } - - if (i0 & 2) - { - s = (s32)((-c / m) + roundUp); - if (s >= 0 && s <= 607) - top = 0; - left = std::min(s, left); - right = std::max(s, right); - } - - if (i0 & 4) - { - s = (s32)((m * 607) + c + roundUp); - if (s >= 0 && s <= 479) - right = 607; - top = std::min(s, top); - bottom = std::max(s, bottom); - } - - if (i0 & 8) - { - s = (s32)(((479 - c) / m) + roundUp); - if (s >= 0 && s <= 607) - bottom = 479; - left = std::min(s, left); - right = std::max(s, right); - } - } - - // Only check other lines if we are dealing with a triangle - if (numPoints == 3) - { - // Second line intersects - if (i1) - { - m = (p2.x - p1.x) ? ((p2.y - p1.y) / (p2.x - p1.x)) : highNum; - c = p1.y - (m * p1.x); - if (i1 & 1) - { - s = (s32)(c + roundUp); - if (s >= 0 && s <= 479) - left = 0; - top = std::min(s, top); - bottom = std::max(s, bottom); - } - - if (i1 & 2) - { - s = (s32)((-c / m) + roundUp); - if (s >= 0 && s <= 607) - top = 0; - left = std::min(s, left); - right = std::max(s, right); - } - - if (i1 & 4) - { - s = (s32)((m * 607) + c + roundUp); - if (s >= 0 && s <= 479) - right = 607; - top = std::min(s, top); - bottom = std::max(s, bottom); - } - - if (i1 & 8) - { - s = (s32)(((479 - c) / m) + roundUp); - if (s >= 0 && s <= 607) - bottom = 479; - left = std::min(s, left); - right = std::max(s, right); - } - } - - // Third line intersects - if (i2) - { - m = (p2.x - p0.x) ? ((p2.y - p0.y) / (p2.x - p0.x)) : highNum; - c = p0.y - (m * p0.x); - if (i2 & 1) - { - s = (s32)(c + roundUp); - if (s >= 0 && s <= 479) - left = 0; - top = std::min(s, top); - bottom = std::max(s, bottom); - } - - if (i2 & 2) - { - s = (s32)((-c / m) + roundUp); - if (s >= 0 && s <= 607) - top = 0; - left = std::min(s, left); - right = std::max(s, right); - } - - if (i2 & 4) - { - s = (s32)((m * 607) + c + roundUp); - if (s >= 0 && s <= 479) - right = 607; - top = std::min(s, top); - bottom = std::max(s, bottom); - } - - if (i2 & 8) - { - s = (s32)(((479 - c) / m) + roundUp); - if (s >= 0 && s <= 607) - bottom = 479; - left = std::min(s, left); - right = std::max(s, right); - } - } - } - - // Wrong bounding box values, discard this polygon (it is outside) - if (left > 607 || top > 479 || right < 0 || bottom < 0) - return; - - // Trim bounding box to viewport - left = (left < 0) ? 0 : left; - top = (top < 0) ? 0 : top; - right = (right > 607) ? 607 : right; - bottom = (bottom > 479) ? 479 : bottom; - - // Update bounding box - PixelEngine::bbox[0] = (left < PixelEngine::bbox[0]) ? left : PixelEngine::bbox[0]; - PixelEngine::bbox[1] = (right > PixelEngine::bbox[1]) ? right : PixelEngine::bbox[1]; - PixelEngine::bbox[2] = (top < PixelEngine::bbox[2]) ? top : PixelEngine::bbox[2]; - PixelEngine::bbox[3] = (bottom > PixelEngine::bbox[3]) ? bottom : PixelEngine::bbox[3]; } static void LOADERDECL TexMtx_ReadDirect_UByte() { - s_curtexmtx[s_texmtxread] = DataReadU8() & 0x3f; + BoundingBox::texMtxIdx[s_texmtxread] = s_curtexmtx[s_texmtxread] = DataReadU8() & 0x3f; + PRIM_LOG("texmtx%d: %d, ", s_texmtxread, s_curtexmtx[s_texmtxread]); s_texmtxread++; } @@ -611,6 +170,10 @@ void VertexLoader::CompileVertexTranslator() m_numPipelineStages = 0; #endif + // Get the pointer to this vertex's buffer data for the bounding box + if (g_ActiveConfig.bUseBBox) + WriteCall(BoundingBox::SetVertexBufferPosition); + // Colors const u64 col[2] = {m_VtxDesc.Color0, m_VtxDesc.Color1}; // TextureCoord @@ -643,16 +206,8 @@ void VertexLoader::CompileVertexTranslator() if (m_VtxDesc.Tex7MatIdx) {m_VertexSize += 1; components |= VB_HAS_TEXMTXIDX7; WriteCall(TexMtx_ReadDirect_UByte); } // Write vertex position loader - if (g_ActiveConfig.bUseBBox) - { - WriteCall(UpdateBoundingBoxPrepare); - WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements)); - WriteCall(UpdateBoundingBox); - } - else - { - WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements)); - } + WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements)); + m_VertexSize += VertexLoader_Position::GetSize(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements); nat_offset += 12; m_native_vtx_decl.position.components = 3; @@ -826,6 +381,10 @@ void VertexLoader::CompileVertexTranslator() } } + // Update the bounding box + if (g_ActiveConfig.bUseBBox) + WriteCall(BoundingBox::Update); + if (m_VtxDesc.PosMatIdx) { WriteCall(PosMtx_Write); @@ -901,9 +460,7 @@ void VertexLoader::SetupRunVertices(const VAT& vat, int primitive, int const cou colElements[i] = m_VtxAttr.color[i].Elements; // Prepare bounding box - s_bbox_primitive = primitive; - s_bbox_currPoint = 0; - s_bbox_loadedPoints = 0; + BoundingBox::Prepare(vat, primitive, m_VtxDesc, m_native_vtx_decl); } void VertexLoader::ConvertVertices ( int count ) diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj b/Source/Core/VideoCommon/VideoCommon.vcxproj index 7f7de6b040..bda73f396c 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj @@ -1,4 +1,4 @@ - + @@ -36,6 +36,7 @@ + @@ -80,6 +81,7 @@ + @@ -147,4 +149,4 @@ - + \ No newline at end of file diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters index 24dcf3b610..c662422200 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters @@ -1,4 +1,4 @@ - + @@ -140,6 +140,9 @@ Decoding + + Util + @@ -269,8 +272,11 @@ Vertex Loading + + Util + - + \ No newline at end of file diff --git a/Source/Core/VideoCommon/VideoState.cpp b/Source/Core/VideoCommon/VideoState.cpp index dd0eb2fb88..1f23d5318d 100644 --- a/Source/Core/VideoCommon/VideoState.cpp +++ b/Source/Core/VideoCommon/VideoState.cpp @@ -14,6 +14,7 @@ #include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VideoState.h" #include "VideoCommon/XFMemory.h" +#include "VideoCommon/BoundingBox.h" static void DoState(PointerWrap &p) { @@ -52,6 +53,10 @@ static void DoState(PointerWrap &p) VertexManager::DoState(p); p.DoMarker("VertexManager"); + BoundingBox::DoState(p); + p.DoMarker("BoundingBox"); + + // TODO: search for more data that should be saved and add it here }