mirror of
https://github.com/melonDS-emu/melonDS.git
synced 2025-07-14 17:59:46 -06:00
1876 lines
59 KiB
C++
1876 lines
59 KiB
C++
/*
|
|
Copyright 2016-2023 melonDS team
|
|
|
|
This file is part of melonDS.
|
|
|
|
melonDS is free software: you can redistribute it and/or modify it under
|
|
the terms of the GNU General Public License as published by the Free
|
|
Software Foundation, either version 3 of the License, or (at your option)
|
|
any later version.
|
|
|
|
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License along
|
|
with melonDS. If not, see http://www.gnu.org/licenses/.
|
|
*/
|
|
|
|
#include "GPU3D_Soft.h"
|
|
|
|
#include <algorithm>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include "NDS.h"
|
|
#include "GPU.h"
|
|
|
|
namespace melonDS
|
|
{
|
|
|
|
void RenderThreadFunc();
|
|
|
|
|
|
void SoftRenderer::StopRenderThread()
|
|
{
|
|
if (RenderThreadRunning.load(std::memory_order_relaxed))
|
|
{
|
|
RenderThreadRunning = false;
|
|
Platform::Semaphore_Post(Sema_RenderStart);
|
|
Platform::Thread_Wait(RenderThread);
|
|
Platform::Thread_Free(RenderThread);
|
|
RenderThread = nullptr;
|
|
}
|
|
}
|
|
|
|
void SoftRenderer::SetupRenderThread()
|
|
{
|
|
if (Threaded)
|
|
{
|
|
if (!RenderThreadRunning.load(std::memory_order_relaxed))
|
|
{
|
|
RenderThreadRunning = true;
|
|
RenderThread = Platform::Thread_Create(std::bind(&SoftRenderer::RenderThreadFunc, this));
|
|
}
|
|
|
|
// otherwise more than one frame can be queued up at once
|
|
Platform::Semaphore_Reset(Sema_RenderStart);
|
|
|
|
if (RenderThreadRendering)
|
|
Platform::Semaphore_Wait(Sema_RenderDone);
|
|
|
|
Platform::Semaphore_Reset(Sema_RenderDone);
|
|
Platform::Semaphore_Reset(Sema_RenderStart);
|
|
Platform::Semaphore_Reset(Sema_ScanlineCount);
|
|
|
|
Platform::Semaphore_Post(Sema_RenderStart);
|
|
}
|
|
else
|
|
{
|
|
StopRenderThread();
|
|
}
|
|
}
|
|
|
|
|
|
SoftRenderer::SoftRenderer(melonDS::GPU& gpu, bool threaded) noexcept
|
|
: Renderer3D(false), GPU(gpu), Threaded(threaded)
|
|
{
|
|
Sema_RenderStart = Platform::Semaphore_Create();
|
|
Sema_RenderDone = Platform::Semaphore_Create();
|
|
Sema_ScanlineCount = Platform::Semaphore_Create();
|
|
|
|
RenderThreadRunning = false;
|
|
RenderThreadRendering = false;
|
|
RenderThread = nullptr;
|
|
}
|
|
|
|
SoftRenderer::~SoftRenderer()
|
|
{
|
|
StopRenderThread();
|
|
|
|
Platform::Semaphore_Free(Sema_RenderStart);
|
|
Platform::Semaphore_Free(Sema_RenderDone);
|
|
Platform::Semaphore_Free(Sema_ScanlineCount);
|
|
}
|
|
|
|
void SoftRenderer::Reset()
|
|
{
|
|
memset(ColorBuffer, 0, BufferSize * 2 * 4);
|
|
memset(DepthBuffer, 0, BufferSize * 2 * 4);
|
|
memset(AttrBuffer, 0, BufferSize * 2 * 4);
|
|
|
|
PrevIsShadowMask = false;
|
|
|
|
SetupRenderThread();
|
|
}
|
|
|
|
void SoftRenderer::SetThreaded(bool threaded) noexcept
|
|
{
|
|
if (Threaded != threaded)
|
|
{
|
|
Threaded = threaded;
|
|
SetupRenderThread();
|
|
}
|
|
}
|
|
|
|
bool SoftRenderer::DoTimings(s32 cycles, bool odd)
|
|
{
|
|
if (odd)
|
|
{
|
|
RasterTimingCounterOdd += cycles;
|
|
if ((RasterTimingCounterOdd + RasterTimingCounterPrev) < RasterTimingCap) return false;
|
|
}
|
|
else
|
|
{
|
|
RasterTimingCounterEven += cycles;
|
|
if ((RasterTimingCounterEven + RasterTimingCounterPrev) < RasterTimingCap) return false;
|
|
}
|
|
|
|
GPU.GPU3D.DispCnt |= (1<<12);
|
|
return true;
|
|
}
|
|
|
|
s32 SoftRenderer::DoTimingsPixels(u32 pixels, bool odd)
|
|
{
|
|
// return the difference between the old span and the new span
|
|
if (pixels <= 4) return 0;
|
|
|
|
u32 pixeltiming = (pixels - 4) * RasterFrac;
|
|
|
|
if (odd)
|
|
{
|
|
u32 rasterend = RasterTimingCap - (RasterTimingCounterOdd + RasterTimingCounterPrev);
|
|
pixeltiming = rasterend - pixeltiming;
|
|
}
|
|
else
|
|
{
|
|
u32 rasterend = RasterTimingCap - (RasterTimingCounterEven + RasterTimingCounterPrev);
|
|
pixeltiming = rasterend - pixeltiming;
|
|
}
|
|
if (pixeltiming > 0) return 0;
|
|
|
|
GPU.GPU3D.DispCnt |= (1<<12);
|
|
return pixels - (((pixeltiming + (RasterFrac-1)) / RasterFrac) + 4);
|
|
}
|
|
|
|
void SoftRenderer::EndScanline(bool odd)
|
|
{
|
|
if (!odd)
|
|
{
|
|
RasterTimingCounterPrev += std::max(RasterTimingCounterOdd, RasterTimingCounterEven);
|
|
RasterTimingCounterPrev -= PerScanlineRecup; // wip
|
|
if (RasterTimingCounterPrev < 0) RasterTimingCounterPrev = 0;
|
|
// calc is wrong, seems to round up...?
|
|
GPU.GPU3D.RDLines = (RasterTimingCap - RasterTimingCounterPrev) / PerScanlineTiming;
|
|
// seems to display the lowest scanline buffer count reached during the current frame.
|
|
// we also caps it to 46 here, because this reg does that too for some reason.
|
|
if (GPU.GPU3D.RDLines > GPU.GPU3D.RDLinesMin) GPU.GPU3D.RDLines = GPU.GPU3D.RDLinesMin;
|
|
else if (GPU.GPU3D.RDLines < GPU.GPU3D.RDLinesMin) GPU.GPU3D.RDLinesMin = GPU.GPU3D.RDLines;
|
|
RasterTimingCounterOdd = 0;
|
|
RasterTimingCounterEven = 0;
|
|
}
|
|
}
|
|
|
|
void SoftRenderer::TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha)
|
|
{
|
|
u32 vramaddr = (texparam & 0xFFFF) << 3;
|
|
|
|
s32 width = 8 << ((texparam >> 20) & 0x7);
|
|
s32 height = 8 << ((texparam >> 23) & 0x7);
|
|
|
|
s >>= 4;
|
|
t >>= 4;
|
|
|
|
// texture wrapping
|
|
// TODO: optimize this somehow
|
|
// testing shows that it's hardly worth optimizing, actually
|
|
|
|
if (texparam & (1<<16))
|
|
{
|
|
if (texparam & (1<<18))
|
|
{
|
|
if (s & width) s = (width-1) - (s & (width-1));
|
|
else s = (s & (width-1));
|
|
}
|
|
else
|
|
s &= width-1;
|
|
}
|
|
else
|
|
{
|
|
if (s < 0) s = 0;
|
|
else if (s >= width) s = width-1;
|
|
}
|
|
|
|
if (texparam & (1<<17))
|
|
{
|
|
if (texparam & (1<<19))
|
|
{
|
|
if (t & height) t = (height-1) - (t & (height-1));
|
|
else t = (t & (height-1));
|
|
}
|
|
else
|
|
t &= height-1;
|
|
}
|
|
else
|
|
{
|
|
if (t < 0) t = 0;
|
|
else if (t >= height) t = height-1;
|
|
}
|
|
|
|
u8 alpha0;
|
|
if (texparam & (1<<29)) alpha0 = 0;
|
|
else alpha0 = 31;
|
|
|
|
switch ((texparam >> 26) & 0x7)
|
|
{
|
|
case 1: // A3I5
|
|
{
|
|
vramaddr += ((t * width) + s);
|
|
u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
|
|
|
|
texpal <<= 4;
|
|
*color = ReadVRAM_TexPal<u16>(texpal + ((pixel&0x1F)<<1));
|
|
*alpha = ((pixel >> 3) & 0x1C) + (pixel >> 6);
|
|
}
|
|
break;
|
|
|
|
case 2: // 4-color
|
|
{
|
|
vramaddr += (((t * width) + s) >> 2);
|
|
u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
|
|
pixel >>= ((s & 0x3) << 1);
|
|
pixel &= 0x3;
|
|
|
|
texpal <<= 3;
|
|
*color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
|
|
*alpha = (pixel==0) ? alpha0 : 31;
|
|
}
|
|
break;
|
|
|
|
case 3: // 16-color
|
|
{
|
|
vramaddr += (((t * width) + s) >> 1);
|
|
u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
|
|
if (s & 0x1) pixel >>= 4;
|
|
else pixel &= 0xF;
|
|
|
|
texpal <<= 4;
|
|
*color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
|
|
*alpha = (pixel==0) ? alpha0 : 31;
|
|
}
|
|
break;
|
|
|
|
case 4: // 256-color
|
|
{
|
|
vramaddr += ((t * width) + s);
|
|
u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
|
|
|
|
texpal <<= 4;
|
|
*color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
|
|
*alpha = (pixel==0) ? alpha0 : 31;
|
|
}
|
|
break;
|
|
|
|
case 5: // compressed
|
|
{
|
|
vramaddr += ((t & 0x3FC) * (width>>2)) + (s & 0x3FC);
|
|
vramaddr += (t & 0x3);
|
|
|
|
u32 slot1addr = 0x20000 + ((vramaddr & 0x1FFFC) >> 1);
|
|
if (vramaddr >= 0x40000)
|
|
slot1addr += 0x10000;
|
|
|
|
u8 val = ReadVRAM_Texture<u8>(vramaddr);
|
|
val >>= (2 * (s & 0x3));
|
|
|
|
u16 palinfo = ReadVRAM_Texture<u16>(slot1addr);
|
|
u32 paloffset = (palinfo & 0x3FFF) << 2;
|
|
texpal <<= 4;
|
|
|
|
switch (val & 0x3)
|
|
{
|
|
case 0:
|
|
*color = ReadVRAM_TexPal<u16>(texpal + paloffset);
|
|
*alpha = 31;
|
|
break;
|
|
|
|
case 1:
|
|
*color = ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
|
|
*alpha = 31;
|
|
break;
|
|
|
|
case 2:
|
|
if ((palinfo >> 14) == 1)
|
|
{
|
|
u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset);
|
|
u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
|
|
|
|
u32 r0 = color0 & 0x001F;
|
|
u32 g0 = color0 & 0x03E0;
|
|
u32 b0 = color0 & 0x7C00;
|
|
u32 r1 = color1 & 0x001F;
|
|
u32 g1 = color1 & 0x03E0;
|
|
u32 b1 = color1 & 0x7C00;
|
|
|
|
u32 r = (r0 + r1) >> 1;
|
|
u32 g = ((g0 + g1) >> 1) & 0x03E0;
|
|
u32 b = ((b0 + b1) >> 1) & 0x7C00;
|
|
|
|
*color = r | g | b;
|
|
}
|
|
else if ((palinfo >> 14) == 3)
|
|
{
|
|
u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset);
|
|
u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
|
|
|
|
u32 r0 = color0 & 0x001F;
|
|
u32 g0 = color0 & 0x03E0;
|
|
u32 b0 = color0 & 0x7C00;
|
|
u32 r1 = color1 & 0x001F;
|
|
u32 g1 = color1 & 0x03E0;
|
|
u32 b1 = color1 & 0x7C00;
|
|
|
|
u32 r = (r0*5 + r1*3) >> 3;
|
|
u32 g = ((g0*5 + g1*3) >> 3) & 0x03E0;
|
|
u32 b = ((b0*5 + b1*3) >> 3) & 0x7C00;
|
|
|
|
*color = r | g | b;
|
|
}
|
|
else
|
|
*color = ReadVRAM_TexPal<u16>(texpal + paloffset + 4);
|
|
*alpha = 31;
|
|
break;
|
|
|
|
case 3:
|
|
if ((palinfo >> 14) == 2)
|
|
{
|
|
*color = ReadVRAM_TexPal<u16>(texpal + paloffset + 6);
|
|
*alpha = 31;
|
|
}
|
|
else if ((palinfo >> 14) == 3)
|
|
{
|
|
u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset);
|
|
u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
|
|
|
|
u32 r0 = color0 & 0x001F;
|
|
u32 g0 = color0 & 0x03E0;
|
|
u32 b0 = color0 & 0x7C00;
|
|
u32 r1 = color1 & 0x001F;
|
|
u32 g1 = color1 & 0x03E0;
|
|
u32 b1 = color1 & 0x7C00;
|
|
|
|
u32 r = (r0*3 + r1*5) >> 3;
|
|
u32 g = ((g0*3 + g1*5) >> 3) & 0x03E0;
|
|
u32 b = ((b0*3 + b1*5) >> 3) & 0x7C00;
|
|
|
|
*color = r | g | b;
|
|
*alpha = 31;
|
|
}
|
|
else
|
|
{
|
|
*color = 0;
|
|
*alpha = 0;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 6: // A5I3
|
|
{
|
|
vramaddr += ((t * width) + s);
|
|
u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
|
|
|
|
texpal <<= 4;
|
|
*color = ReadVRAM_TexPal<u16>(texpal + ((pixel&0x7)<<1));
|
|
*alpha = (pixel >> 3);
|
|
}
|
|
break;
|
|
|
|
case 7: // direct color
|
|
{
|
|
vramaddr += (((t * width) + s) << 1);
|
|
*color = ReadVRAM_Texture<u16>(vramaddr);
|
|
*alpha = (*color & 0x8000) ? 31 : 0;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
// depth test is 'less or equal' instead of 'less than' under the following conditions:
|
|
// * when drawing a front-facing pixel over an opaque back-facing pixel
|
|
// * when drawing wireframe edges, under certain conditions (TODO)
|
|
//
|
|
// range is different based on depth-buffering mode
|
|
// Z-buffering: +-0x200
|
|
// W-buffering: +-0xFF
|
|
|
|
bool DepthTest_Equal_Z(s32 dstz, s32 z, u32 dstattr)
|
|
{
|
|
s32 diff = dstz - z;
|
|
if ((u32)(diff + 0x200) <= 0x400)
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
bool DepthTest_Equal_W(s32 dstz, s32 z, u32 dstattr)
|
|
{
|
|
s32 diff = dstz - z;
|
|
if ((u32)(diff + 0xFF) <= 0x1FE)
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
bool DepthTest_LessThan(s32 dstz, s32 z, u32 dstattr)
|
|
{
|
|
if (z < dstz)
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
bool DepthTest_LessThan_FrontFacing(s32 dstz, s32 z, u32 dstattr)
|
|
{
|
|
if ((dstattr & 0x00400010) == 0x00000010) // opaque, back facing
|
|
{
|
|
if (z <= dstz)
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
if (z < dstz)
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
u32 SoftRenderer::AlphaBlend(u32 srccolor, u32 dstcolor, u32 alpha) noexcept
|
|
{
|
|
u32 dstalpha = dstcolor >> 24;
|
|
|
|
if (dstalpha == 0)
|
|
return srccolor;
|
|
|
|
u32 srcR = srccolor & 0x3F;
|
|
u32 srcG = (srccolor >> 8) & 0x3F;
|
|
u32 srcB = (srccolor >> 16) & 0x3F;
|
|
|
|
if (GPU.GPU3D.RenderDispCnt & (1<<3))
|
|
{
|
|
u32 dstR = dstcolor & 0x3F;
|
|
u32 dstG = (dstcolor >> 8) & 0x3F;
|
|
u32 dstB = (dstcolor >> 16) & 0x3F;
|
|
|
|
alpha++;
|
|
srcR = ((srcR * alpha) + (dstR * (32-alpha))) >> 5;
|
|
srcG = ((srcG * alpha) + (dstG * (32-alpha))) >> 5;
|
|
srcB = ((srcB * alpha) + (dstB * (32-alpha))) >> 5;
|
|
alpha--;
|
|
}
|
|
|
|
if (alpha > dstalpha)
|
|
dstalpha = alpha;
|
|
|
|
return srcR | (srcG << 8) | (srcB << 16) | (dstalpha << 24);
|
|
}
|
|
|
|
u32 SoftRenderer::RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t)
|
|
{
|
|
u8 r, g, b, a;
|
|
|
|
u32 blendmode = (polygon->Attr >> 4) & 0x3;
|
|
u32 polyalpha = (polygon->Attr >> 16) & 0x1F;
|
|
bool wireframe = (polyalpha == 0);
|
|
|
|
if (blendmode == 2)
|
|
{
|
|
if (GPU.GPU3D.RenderDispCnt & (1<<1))
|
|
{
|
|
// highlight mode: color is calculated normally
|
|
// except all vertex color components are set
|
|
// to the red component
|
|
// the toon color is added to the final color
|
|
|
|
vg = vr;
|
|
vb = vr;
|
|
}
|
|
else
|
|
{
|
|
// toon mode: vertex color is replaced by toon color
|
|
|
|
u16 tooncolor = GPU.GPU3D.RenderToonTable[vr >> 1];
|
|
|
|
vr = (tooncolor << 1) & 0x3E; if (vr) vr++;
|
|
vg = (tooncolor >> 4) & 0x3E; if (vg) vg++;
|
|
vb = (tooncolor >> 9) & 0x3E; if (vb) vb++;
|
|
}
|
|
}
|
|
|
|
if ((GPU.GPU3D.RenderDispCnt & (1<<0)) && (((polygon->TexParam >> 26) & 0x7) != 0))
|
|
{
|
|
u8 tr, tg, tb;
|
|
|
|
u16 tcolor; u8 talpha;
|
|
TextureLookup(polygon->TexParam, polygon->TexPalette, s, t, &tcolor, &talpha);
|
|
|
|
tr = (tcolor << 1) & 0x3E; if (tr) tr++;
|
|
tg = (tcolor >> 4) & 0x3E; if (tg) tg++;
|
|
tb = (tcolor >> 9) & 0x3E; if (tb) tb++;
|
|
|
|
if (blendmode & 0x1)
|
|
{
|
|
// decal
|
|
|
|
if (talpha == 0)
|
|
{
|
|
r = vr;
|
|
g = vg;
|
|
b = vb;
|
|
}
|
|
else if (talpha == 31)
|
|
{
|
|
r = tr;
|
|
g = tg;
|
|
b = tb;
|
|
}
|
|
else
|
|
{
|
|
r = ((tr * talpha) + (vr * (31-talpha))) >> 5;
|
|
g = ((tg * talpha) + (vg * (31-talpha))) >> 5;
|
|
b = ((tb * talpha) + (vb * (31-talpha))) >> 5;
|
|
}
|
|
a = polyalpha;
|
|
}
|
|
else
|
|
{
|
|
// modulate
|
|
|
|
r = ((tr+1) * (vr+1) - 1) >> 6;
|
|
g = ((tg+1) * (vg+1) - 1) >> 6;
|
|
b = ((tb+1) * (vb+1) - 1) >> 6;
|
|
a = ((talpha+1) * (polyalpha+1) - 1) >> 5;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
r = vr;
|
|
g = vg;
|
|
b = vb;
|
|
a = polyalpha;
|
|
}
|
|
|
|
if ((blendmode == 2) && (GPU.GPU3D.RenderDispCnt & (1<<1)))
|
|
{
|
|
u16 tooncolor = GPU.GPU3D.RenderToonTable[vr >> 1];
|
|
|
|
vr = (tooncolor << 1) & 0x3E; if (vr) vr++;
|
|
vg = (tooncolor >> 4) & 0x3E; if (vg) vg++;
|
|
vb = (tooncolor >> 9) & 0x3E; if (vb) vb++;
|
|
|
|
r += vr;
|
|
g += vg;
|
|
b += vb;
|
|
|
|
if (r > 63) r = 63;
|
|
if (g > 63) g = 63;
|
|
if (b > 63) b = 63;
|
|
}
|
|
|
|
// checkme: can wireframe polygons use texture alpha?
|
|
if (wireframe) a = 31;
|
|
|
|
return r | (g << 8) | (b << 16) | (a << 24);
|
|
}
|
|
|
|
void SoftRenderer::PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow)
|
|
{
|
|
u32 dstattr = AttrBuffer[pixeladdr];
|
|
u32 attr = (polyattr & 0xE0F0) | ((polyattr >> 8) & 0xFF0000) | (1<<22) | (dstattr & 0xFF001F0F);
|
|
|
|
if (shadow)
|
|
{
|
|
// for shadows, opaque pixels are also checked
|
|
if (dstattr & (1<<22))
|
|
{
|
|
if ((dstattr & 0x007F0000) == (attr & 0x007F0000))
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
if ((dstattr & 0x3F000000) == (polyattr & 0x3F000000))
|
|
return;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// skip if translucent polygon IDs are equal
|
|
if ((dstattr & 0x007F0000) == (attr & 0x007F0000))
|
|
return;
|
|
}
|
|
|
|
// fog flag
|
|
if (!(dstattr & (1<<15)))
|
|
attr &= ~(1<<15);
|
|
|
|
color = AlphaBlend(color, ColorBuffer[pixeladdr], color>>24);
|
|
|
|
if (z != -1)
|
|
DepthBuffer[pixeladdr] = z;
|
|
|
|
ColorBuffer[pixeladdr] = color;
|
|
AttrBuffer[pixeladdr] = attr;
|
|
}
|
|
|
|
void SoftRenderer::SetupPolygonLeftEdge(SoftRenderer::RendererPolygon* rp, s32 y)
|
|
{
|
|
Polygon* polygon = rp->PolyData;
|
|
|
|
while (y >= polygon->Vertices[rp->NextVL]->FinalPosition[1] && rp->CurVL != polygon->VBottom)
|
|
{
|
|
rp->CurVL = rp->NextVL;
|
|
|
|
if (polygon->FacingView)
|
|
{
|
|
rp->NextVL = rp->CurVL + 1;
|
|
if (rp->NextVL >= polygon->NumVertices)
|
|
rp->NextVL = 0;
|
|
}
|
|
else
|
|
{
|
|
rp->NextVL = rp->CurVL - 1;
|
|
if ((s32)rp->NextVL < 0)
|
|
rp->NextVL = polygon->NumVertices - 1;
|
|
}
|
|
}
|
|
|
|
rp->XL = rp->SlopeL.Setup(polygon->Vertices[rp->CurVL]->FinalPosition[0], polygon->Vertices[rp->NextVL]->FinalPosition[0],
|
|
polygon->Vertices[rp->CurVL]->FinalPosition[1], polygon->Vertices[rp->NextVL]->FinalPosition[1],
|
|
polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y);
|
|
}
|
|
|
|
void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 y)
|
|
{
|
|
Polygon* polygon = rp->PolyData;
|
|
|
|
while (y >= polygon->Vertices[rp->NextVR]->FinalPosition[1] && rp->CurVR != polygon->VBottom)
|
|
{
|
|
rp->CurVR = rp->NextVR;
|
|
|
|
if (polygon->FacingView)
|
|
{
|
|
rp->NextVR = rp->CurVR - 1;
|
|
if ((s32)rp->NextVR < 0)
|
|
rp->NextVR = polygon->NumVertices - 1;
|
|
}
|
|
else
|
|
{
|
|
rp->NextVR = rp->CurVR + 1;
|
|
if (rp->NextVR >= polygon->NumVertices)
|
|
rp->NextVR = 0;
|
|
}
|
|
}
|
|
|
|
rp->XR = rp->SlopeR.Setup(polygon->Vertices[rp->CurVR]->FinalPosition[0], polygon->Vertices[rp->NextVR]->FinalPosition[0],
|
|
polygon->Vertices[rp->CurVR]->FinalPosition[1], polygon->Vertices[rp->NextVR]->FinalPosition[1],
|
|
polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y);
|
|
}
|
|
|
|
void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* polygon)
|
|
{
|
|
u32 nverts = polygon->NumVertices;
|
|
|
|
u32 vtop = polygon->VTop, vbot = polygon->VBottom;
|
|
s32 ytop = polygon->YTop, ybot = polygon->YBottom;
|
|
|
|
rp->PolyData = polygon;
|
|
|
|
rp->CurVL = vtop;
|
|
rp->CurVR = vtop;
|
|
|
|
if (polygon->FacingView)
|
|
{
|
|
rp->NextVL = rp->CurVL + 1;
|
|
if (rp->NextVL >= nverts) rp->NextVL = 0;
|
|
rp->NextVR = rp->CurVR - 1;
|
|
if ((s32)rp->NextVR < 0) rp->NextVR = nverts - 1;
|
|
}
|
|
else
|
|
{
|
|
rp->NextVL = rp->CurVL - 1;
|
|
if ((s32)rp->NextVL < 0) rp->NextVL = nverts - 1;
|
|
rp->NextVR = rp->CurVR + 1;
|
|
if (rp->NextVR >= nverts) rp->NextVR = 0;
|
|
}
|
|
|
|
if (ybot == ytop)
|
|
{
|
|
vtop = 0; vbot = 0;
|
|
int i;
|
|
|
|
i = 1;
|
|
if (polygon->Vertices[i]->FinalPosition[0] < polygon->Vertices[vtop]->FinalPosition[0]) vtop = i;
|
|
if (polygon->Vertices[i]->FinalPosition[0] > polygon->Vertices[vbot]->FinalPosition[0]) vbot = i;
|
|
|
|
i = nverts - 1;
|
|
if (polygon->Vertices[i]->FinalPosition[0] < polygon->Vertices[vtop]->FinalPosition[0]) vtop = i;
|
|
if (polygon->Vertices[i]->FinalPosition[0] > polygon->Vertices[vbot]->FinalPosition[0]) vbot = i;
|
|
|
|
rp->CurVL = vtop; rp->NextVL = vtop;
|
|
rp->CurVR = vbot; rp->NextVR = vbot;
|
|
|
|
rp->XL = rp->SlopeL.SetupDummy(polygon->Vertices[rp->CurVL]->FinalPosition[0]);
|
|
rp->XR = rp->SlopeR.SetupDummy(polygon->Vertices[rp->CurVR]->FinalPosition[0]);
|
|
}
|
|
else
|
|
{
|
|
SetupPolygonLeftEdge(rp, ytop);
|
|
SetupPolygonRightEdge(rp, ytop);
|
|
}
|
|
}
|
|
|
|
void SoftRenderer::Step(RendererPolygon* rp)
|
|
{
|
|
rp->XL = rp->SlopeL.Step();
|
|
rp->XR = rp->SlopeR.Step();
|
|
}
|
|
|
|
void SoftRenderer::CheckSlope(RendererPolygon* rp, s32 y)
|
|
{
|
|
Polygon* polygon = rp->PolyData;
|
|
|
|
if (polygon->YTop != polygon->YBottom)
|
|
{
|
|
if (y >= polygon->Vertices[rp->NextVL]->FinalPosition[1] && rp->CurVL != polygon->VBottom)
|
|
{
|
|
SetupPolygonLeftEdge(rp, y);
|
|
}
|
|
|
|
if (y >= polygon->Vertices[rp->NextVR]->FinalPosition[1] && rp->CurVR != polygon->VBottom)
|
|
{
|
|
SetupPolygonRightEdge(rp, y);
|
|
}
|
|
}
|
|
}
|
|
|
|
void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y)
|
|
{
|
|
Polygon* polygon = rp->PolyData;
|
|
|
|
u32 polyattr = (polygon->Attr & 0x3F008000);
|
|
if (!polygon->FacingView) polyattr |= (1<<4);
|
|
|
|
u32 polyalpha = (polygon->Attr >> 16) & 0x1F;
|
|
bool wireframe = (polyalpha == 0);
|
|
|
|
bool (*fnDepthTest)(s32 dstz, s32 z, u32 dstattr);
|
|
if (polygon->Attr & (1<<14))
|
|
fnDepthTest = polygon->WBuffer ? DepthTest_Equal_W : DepthTest_Equal_Z;
|
|
else if (polygon->FacingView)
|
|
fnDepthTest = DepthTest_LessThan_FrontFacing;
|
|
else
|
|
fnDepthTest = DepthTest_LessThan;
|
|
|
|
if (!PrevIsShadowMask)
|
|
memset(&StencilBuffer[256 * (y&0x1)], 0, 256);
|
|
|
|
PrevIsShadowMask = true;
|
|
|
|
if (polygon->YTop != polygon->YBottom)
|
|
{
|
|
if (y >= polygon->Vertices[rp->NextVL]->FinalPosition[1] && rp->CurVL != polygon->VBottom)
|
|
{
|
|
SetupPolygonLeftEdge(rp, y);
|
|
}
|
|
|
|
if (y >= polygon->Vertices[rp->NextVR]->FinalPosition[1] && rp->CurVR != polygon->VBottom)
|
|
{
|
|
SetupPolygonRightEdge(rp, y);
|
|
}
|
|
}
|
|
|
|
Vertex *vlcur, *vlnext, *vrcur, *vrnext;
|
|
s32 xstart, xend;
|
|
bool l_filledge, r_filledge;
|
|
s32 l_edgelen, r_edgelen;
|
|
s32 l_edgecov, r_edgecov;
|
|
Interpolator<1>* interp_start;
|
|
Interpolator<1>* interp_end;
|
|
|
|
xstart = rp->XL;
|
|
xend = rp->XR;
|
|
|
|
s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]);
|
|
s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]);
|
|
|
|
s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL], polygon->WBuffer);
|
|
s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR], polygon->WBuffer);
|
|
|
|
// right vertical edges are pushed 1px to the left as long as either:
|
|
// the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen
|
|
if (rp->SlopeR.Increment==0 && (rp->SlopeL.Increment!=0 || xstart != xend) && (xend != 0))
|
|
xend--;
|
|
|
|
// if the left and right edges are swapped, render backwards.
|
|
if (xstart > xend)
|
|
{
|
|
vlcur = polygon->Vertices[rp->CurVR];
|
|
vlnext = polygon->Vertices[rp->NextVR];
|
|
vrcur = polygon->Vertices[rp->CurVL];
|
|
vrnext = polygon->Vertices[rp->NextVL];
|
|
|
|
interp_start = &rp->SlopeR.Interp;
|
|
interp_end = &rp->SlopeL.Interp;
|
|
|
|
rp->SlopeR.EdgeParams<true>(&l_edgelen, &l_edgecov);
|
|
rp->SlopeL.EdgeParams<true>(&r_edgelen, &r_edgecov);
|
|
|
|
std::swap(xstart, xend);
|
|
std::swap(wl, wr);
|
|
std::swap(zl, zr);
|
|
|
|
// CHECKME: edge fill rules for swapped opaque shadow mask polygons
|
|
if ((GPU.GPU3D.RenderDispCnt & ((1<<4)|(1<<5))) || ((polyalpha < 31) && (GPU.GPU3D.RenderDispCnt & (1<<3))) || wireframe)
|
|
{
|
|
l_filledge = true;
|
|
r_filledge = true;
|
|
}
|
|
else
|
|
{
|
|
l_filledge = (rp->SlopeR.Negative || !rp->SlopeR.XMajor)
|
|
|| (y == polygon->YBottom-1) && rp->SlopeR.XMajor && (vlnext->FinalPosition[0] != vrnext->FinalPosition[0]);
|
|
r_filledge = (!rp->SlopeL.Negative && rp->SlopeL.XMajor)
|
|
|| (!(rp->SlopeL.Negative && rp->SlopeL.XMajor) && rp->SlopeR.Increment==0)
|
|
|| (y == polygon->YBottom-1) && rp->SlopeL.XMajor && (vlnext->FinalPosition[0] != vrnext->FinalPosition[0]);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
vlcur = polygon->Vertices[rp->CurVL];
|
|
vlnext = polygon->Vertices[rp->NextVL];
|
|
vrcur = polygon->Vertices[rp->CurVR];
|
|
vrnext = polygon->Vertices[rp->NextVR];
|
|
|
|
interp_start = &rp->SlopeL.Interp;
|
|
interp_end = &rp->SlopeR.Interp;
|
|
|
|
rp->SlopeL.EdgeParams<false>(&l_edgelen, &l_edgecov);
|
|
rp->SlopeR.EdgeParams<false>(&r_edgelen, &r_edgecov);
|
|
|
|
// CHECKME: edge fill rules for unswapped opaque shadow mask polygons
|
|
if ((GPU.GPU3D.RenderDispCnt & ((1<<4)|(1<<5))) || ((polyalpha < 31) && (GPU.GPU3D.RenderDispCnt & (1<<3))) || wireframe)
|
|
{
|
|
l_filledge = true;
|
|
r_filledge = true;
|
|
}
|
|
else
|
|
{
|
|
l_filledge = ((rp->SlopeL.Negative || !rp->SlopeL.XMajor)
|
|
|| (y == polygon->YBottom-1) && rp->SlopeL.XMajor && (vlnext->FinalPosition[0] != vrnext->FinalPosition[0]))
|
|
|| (rp->SlopeL.Increment == rp->SlopeR.Increment) && (xstart+l_edgelen == xend+1);
|
|
r_filledge = (!rp->SlopeR.Negative && rp->SlopeR.XMajor) || (rp->SlopeR.Increment==0)
|
|
|| (y == polygon->YBottom-1) && rp->SlopeR.XMajor && (vlnext->FinalPosition[0] != vrnext->FinalPosition[0]);
|
|
}
|
|
}
|
|
|
|
// color/texcoord attributes aren't needed for shadow masks
|
|
// all the pixels are guaranteed to have the same alpha
|
|
// even if a texture is used (decal blending is used for shadows)
|
|
// similarly, we can perform alpha test early (checkme)
|
|
|
|
if (wireframe) polyalpha = 31;
|
|
if (polyalpha <= GPU.GPU3D.RenderAlphaRef) return;
|
|
|
|
// in wireframe mode, there are special rules for equal Z (TODO)
|
|
|
|
int yedge = 0;
|
|
if (y == polygon->YTop) yedge = 0x4;
|
|
else if (y == polygon->YBottom-1) yedge = 0x8;
|
|
int edge;
|
|
|
|
s32 x = xstart;
|
|
Interpolator<0> interpX(xstart, xend+1, wl, wr);
|
|
|
|
if (x < 0) x = 0;
|
|
s32 xlimit;
|
|
|
|
// for shadow masks: set stencil bits where the depth test fails.
|
|
// draw nothing.
|
|
|
|
// part 1: left edge
|
|
edge = yedge | 0x1;
|
|
xlimit = xstart+l_edgelen;
|
|
if (xlimit > xend+1) xlimit = xend+1;
|
|
if (xlimit > 256) xlimit = 256;
|
|
|
|
if (!l_filledge) x = xlimit;
|
|
else
|
|
for (; x < xlimit; x++)
|
|
{
|
|
u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x;
|
|
|
|
interpX.SetX(x);
|
|
|
|
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer);
|
|
u32 dstattr = AttrBuffer[pixeladdr];
|
|
|
|
if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
|
|
StencilBuffer[256*(y&0x1) + x] = 1;
|
|
|
|
if (dstattr & 0xF)
|
|
{
|
|
pixeladdr += BufferSize;
|
|
if (!fnDepthTest(DepthBuffer[pixeladdr], z, AttrBuffer[pixeladdr]))
|
|
StencilBuffer[256*(y&0x1) + x] |= 0x2;
|
|
}
|
|
}
|
|
|
|
// part 2: polygon inside
|
|
edge = yedge;
|
|
xlimit = xend-r_edgelen+1;
|
|
if (xlimit > xend+1) xlimit = xend+1;
|
|
if (xlimit > 256) xlimit = 256;
|
|
if (wireframe && !edge) x = std::max(x, xlimit);
|
|
else for (; x < xlimit; x++)
|
|
{
|
|
u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x;
|
|
|
|
interpX.SetX(x);
|
|
|
|
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer);
|
|
u32 dstattr = AttrBuffer[pixeladdr];
|
|
|
|
if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
|
|
StencilBuffer[256*(y&0x1) + x] = 1;
|
|
|
|
if (dstattr & 0xF)
|
|
{
|
|
pixeladdr += BufferSize;
|
|
if (!fnDepthTest(DepthBuffer[pixeladdr], z, AttrBuffer[pixeladdr]))
|
|
StencilBuffer[256*(y&0x1) + x] |= 0x2;
|
|
}
|
|
}
|
|
|
|
// part 3: right edge
|
|
edge = yedge | 0x2;
|
|
xlimit = xend+1;
|
|
if (xlimit > 256) xlimit = 256;
|
|
|
|
if (r_filledge)
|
|
for (; x < xlimit; x++)
|
|
{
|
|
u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x;
|
|
|
|
interpX.SetX(x);
|
|
|
|
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer);
|
|
u32 dstattr = AttrBuffer[pixeladdr];
|
|
|
|
if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
|
|
StencilBuffer[256*(y&0x1) + x] = 1;
|
|
|
|
if (dstattr & 0xF)
|
|
{
|
|
pixeladdr += BufferSize;
|
|
if (!fnDepthTest(DepthBuffer[pixeladdr], z, AttrBuffer[pixeladdr]))
|
|
StencilBuffer[256*(y&0x1) + x] |= 0x2;
|
|
}
|
|
}
|
|
|
|
rp->XL = rp->SlopeL.Step();
|
|
rp->XR = rp->SlopeR.Step();
|
|
}
|
|
|
|
bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd)
|
|
{
|
|
Polygon* polygon = rp->PolyData;
|
|
u32 polyattr = (polygon->Attr & 0x3F008000);
|
|
if (!polygon->FacingView) polyattr |= (1<<4);
|
|
|
|
u32 polyalpha = (polygon->Attr >> 16) & 0x1F;
|
|
bool wireframe = (polyalpha == 0);
|
|
|
|
bool (*fnDepthTest)(s32 dstz, s32 z, u32 dstattr);
|
|
if (polygon->Attr & (1<<14))
|
|
fnDepthTest = polygon->WBuffer ? DepthTest_Equal_W : DepthTest_Equal_Z;
|
|
else if (polygon->FacingView)
|
|
fnDepthTest = DepthTest_LessThan_FrontFacing;
|
|
else
|
|
fnDepthTest = DepthTest_LessThan;
|
|
|
|
PrevIsShadowMask = false;
|
|
|
|
CheckSlope(rp, y);
|
|
|
|
if (DoTimings(PerPolyTiming, odd))
|
|
{
|
|
Step(rp);
|
|
return true;
|
|
}
|
|
|
|
Vertex *vlcur, *vlnext, *vrcur, *vrnext;
|
|
s32 xstart, xend;
|
|
bool l_filledge, r_filledge;
|
|
s32 l_edgelen, r_edgelen;
|
|
s32 l_edgecov, r_edgecov;
|
|
Interpolator<1>* interp_start;
|
|
Interpolator<1>* interp_end;
|
|
bool abortscanline = false; // to abort the rest of the scanline after finishing this polygon
|
|
|
|
xstart = rp->XL;
|
|
xend = rp->XR;
|
|
|
|
s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]);
|
|
s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]);
|
|
|
|
s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL], polygon->WBuffer);
|
|
s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR], polygon->WBuffer);
|
|
|
|
// right vertical edges are pushed 1px to the left as long as either:
|
|
// the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen
|
|
if (rp->SlopeR.Increment==0 && (rp->SlopeL.Increment!=0 || xstart != xend) && (xend != 0))
|
|
xend--;
|
|
|
|
// if the left and right edges are swapped, render backwards.
|
|
// on hardware, swapped edges seem to break edge length calculation,
|
|
// causing X-major edges to be rendered wrong when filled,
|
|
// and resulting in buggy looking anti-aliasing on X-major edges
|
|
|
|
if (xstart > xend)
|
|
{
|
|
vlcur = polygon->Vertices[rp->CurVR];
|
|
vlnext = polygon->Vertices[rp->NextVR];
|
|
vrcur = polygon->Vertices[rp->CurVL];
|
|
vrnext = polygon->Vertices[rp->NextVL];
|
|
|
|
interp_start = &rp->SlopeR.Interp;
|
|
interp_end = &rp->SlopeL.Interp;
|
|
|
|
rp->SlopeR.EdgeParams<true>(&l_edgelen, &l_edgecov);
|
|
rp->SlopeL.EdgeParams<true>(&r_edgelen, &r_edgecov);
|
|
|
|
std::swap(xstart, xend);
|
|
std::swap(wl, wr);
|
|
std::swap(zl, zr);
|
|
|
|
// edge fill rules for swapped opaque edges:
|
|
// * right edge is filled if slope > 1, or if the left edge = 0, but is never filled if it is < -1
|
|
// * left edge is filled if slope <= 1
|
|
// * the bottom-most pixel of negative x-major slopes are filled if they are next to a flat bottom edge
|
|
// edges are always filled if antialiasing/edgemarking are enabled,
|
|
// if the pixels are translucent and alpha blending is enabled, or if the polygon is wireframe
|
|
// checkme: do swapped line polygons exist?
|
|
if ((GPU.GPU3D.RenderDispCnt & ((1<<4)|(1<<5))) || ((polyalpha < 31) && (GPU.GPU3D.RenderDispCnt & (1<<3))) || wireframe)
|
|
{
|
|
l_filledge = true;
|
|
r_filledge = true;
|
|
}
|
|
else
|
|
{
|
|
l_filledge = (rp->SlopeR.Negative || !rp->SlopeR.XMajor)
|
|
|| (y == polygon->YBottom-1) && rp->SlopeR.XMajor && (vlnext->FinalPosition[0] != vrnext->FinalPosition[0]);
|
|
r_filledge = (!rp->SlopeL.Negative && rp->SlopeL.XMajor)
|
|
|| (!(rp->SlopeL.Negative && rp->SlopeL.XMajor) && rp->SlopeR.Increment==0)
|
|
|| (y == polygon->YBottom-1) && rp->SlopeL.XMajor && (vlnext->FinalPosition[0] != vrnext->FinalPosition[0]);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
vlcur = polygon->Vertices[rp->CurVL];
|
|
vlnext = polygon->Vertices[rp->NextVL];
|
|
vrcur = polygon->Vertices[rp->CurVR];
|
|
vrnext = polygon->Vertices[rp->NextVR];
|
|
|
|
interp_start = &rp->SlopeL.Interp;
|
|
interp_end = &rp->SlopeR.Interp;
|
|
|
|
rp->SlopeL.EdgeParams<false>(&l_edgelen, &l_edgecov);
|
|
rp->SlopeR.EdgeParams<false>(&r_edgelen, &r_edgecov);
|
|
|
|
// edge fill rules for unswapped opaque edges:
|
|
// * right edge is filled if slope > 1
|
|
// * left edge is filled if slope <= 1
|
|
// * edges with slope = 0 are always filled
|
|
// * the bottom-most pixel of negative x-major slopes are filled if they are next to a flat bottom edge
|
|
// * edges are filled if both sides are identical and fully overlapping
|
|
// edges are always filled if antialiasing/edgemarking are enabled,
|
|
// if the pixels are translucent and alpha blending is enabled, or if the polygon is wireframe
|
|
if ((GPU.GPU3D.RenderDispCnt & ((1<<4)|(1<<5))) || ((polyalpha < 31) && (GPU.GPU3D.RenderDispCnt & (1<<3))) || wireframe)
|
|
{
|
|
l_filledge = true;
|
|
r_filledge = true;
|
|
}
|
|
else
|
|
{
|
|
l_filledge = ((rp->SlopeL.Negative || !rp->SlopeL.XMajor)
|
|
|| (y == polygon->YBottom-1) && rp->SlopeL.XMajor && (vlnext->FinalPosition[0] != vrnext->FinalPosition[0]))
|
|
|| (rp->SlopeL.Increment == rp->SlopeR.Increment) && (xstart+l_edgelen == xend+1);
|
|
r_filledge = (!rp->SlopeR.Negative && rp->SlopeR.XMajor) || (rp->SlopeR.Increment==0)
|
|
|| (y == polygon->YBottom-1) && rp->SlopeR.XMajor && (vlnext->FinalPosition[0] != vrnext->FinalPosition[0]);
|
|
}
|
|
}
|
|
|
|
// interpolate attributes along Y
|
|
|
|
s32 rl = interp_start->Interpolate(vlcur->FinalColor[0], vlnext->FinalColor[0]);
|
|
s32 gl = interp_start->Interpolate(vlcur->FinalColor[1], vlnext->FinalColor[1]);
|
|
s32 bl = interp_start->Interpolate(vlcur->FinalColor[2], vlnext->FinalColor[2]);
|
|
|
|
s32 sl = interp_start->Interpolate(vlcur->TexCoords[0], vlnext->TexCoords[0]);
|
|
s32 tl = interp_start->Interpolate(vlcur->TexCoords[1], vlnext->TexCoords[1]);
|
|
|
|
s32 rr = interp_end->Interpolate(vrcur->FinalColor[0], vrnext->FinalColor[0]);
|
|
s32 gr = interp_end->Interpolate(vrcur->FinalColor[1], vrnext->FinalColor[1]);
|
|
s32 br = interp_end->Interpolate(vrcur->FinalColor[2], vrnext->FinalColor[2]);
|
|
|
|
s32 sr = interp_end->Interpolate(vrcur->TexCoords[0], vrnext->TexCoords[0]);
|
|
s32 tr = interp_end->Interpolate(vrcur->TexCoords[1], vrnext->TexCoords[1]);
|
|
|
|
// in wireframe mode, there are special rules for equal Z (TODO)
|
|
|
|
int yedge = 0;
|
|
if (y == polygon->YTop) yedge = 0x4;
|
|
else if (y == polygon->YBottom-1) yedge = 0x8;
|
|
int edge;
|
|
|
|
s32 x = xstart;
|
|
xend += 1;
|
|
Interpolator<0> interpX(xstart, xend, wl, wr);
|
|
|
|
if (x < 0) x = 0;
|
|
s32 xlimit;
|
|
|
|
s32 xcov = 0;
|
|
if (xend > 256) xend = 256;
|
|
|
|
// determine if the span can be rendered within the time allotted to the scanline
|
|
s32 diff = DoTimingsPixels(xend-x, odd);
|
|
if (diff != 0)
|
|
{
|
|
xend -= diff;
|
|
r_edgelen -= diff;
|
|
abortscanline = true;
|
|
}
|
|
|
|
// part 1: left edge
|
|
edge = yedge | 0x1;
|
|
xlimit = xstart+l_edgelen;
|
|
if (xlimit > xend) xlimit = xend;
|
|
if (l_edgecov & (1<<31))
|
|
{
|
|
xcov = (l_edgecov >> 12) & 0x3FF;
|
|
if (xcov == 0x3FF) xcov = 0;
|
|
}
|
|
|
|
|
|
if (!l_filledge) x = xlimit;
|
|
else for (; x < xlimit; x++)
|
|
{
|
|
|
|
u32 pixeladdr = (BufferOffset * ScanlineWidth) + x;
|
|
u32 dstattr = AttrBuffer[pixeladdr];
|
|
|
|
// check stencil buffer for shadows
|
|
if (polygon->IsShadow)
|
|
{
|
|
u8 stencil = StencilBuffer[256*(y&0x1) + x];
|
|
if (!stencil)
|
|
continue;
|
|
if (!(stencil & 0x1))
|
|
pixeladdr += BufferSize;
|
|
if (!(stencil & 0x2))
|
|
dstattr &= ~0xF; // quick way to prevent drawing the shadow under antialiased edges
|
|
}
|
|
|
|
interpX.SetX(x);
|
|
|
|
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer);
|
|
|
|
// if depth test against the topmost pixel fails, test
|
|
// against the pixel underneath
|
|
if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
|
|
{
|
|
if (!(dstattr & 0xF) || pixeladdr >= BufferSize) continue;
|
|
|
|
pixeladdr += BufferSize;
|
|
dstattr = AttrBuffer[pixeladdr];
|
|
if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
|
|
continue;
|
|
}
|
|
|
|
u32 vr = interpX.Interpolate(rl, rr);
|
|
u32 vg = interpX.Interpolate(gl, gr);
|
|
u32 vb = interpX.Interpolate(bl, br);
|
|
|
|
s16 s = interpX.Interpolate(sl, sr);
|
|
s16 t = interpX.Interpolate(tl, tr);
|
|
|
|
u32 color = RenderPixel(polygon, vr>>3, vg>>3, vb>>3, s, t);
|
|
u8 alpha = color >> 24;
|
|
|
|
// alpha test
|
|
if (alpha <= GPU.GPU3D.RenderAlphaRef) continue;
|
|
|
|
if (alpha == 31)
|
|
{
|
|
u32 attr = polyattr | edge;
|
|
|
|
if (GPU.GPU3D.RenderDispCnt & (1<<4))
|
|
{
|
|
// anti-aliasing: all edges are rendered
|
|
|
|
// calculate coverage
|
|
s32 cov = l_edgecov;
|
|
if (cov & (1<<31))
|
|
{
|
|
cov = xcov >> 5;
|
|
if (cov > 31) cov = 31;
|
|
xcov += (l_edgecov & 0x3FF);
|
|
}
|
|
attr |= (cov << 8);
|
|
|
|
// push old pixel down if needed
|
|
if (pixeladdr < BufferSize)
|
|
{
|
|
ColorBuffer[pixeladdr+BufferSize] = ColorBuffer[pixeladdr];
|
|
DepthBuffer[pixeladdr+BufferSize] = DepthBuffer[pixeladdr];
|
|
AttrBuffer[pixeladdr+BufferSize] = AttrBuffer[pixeladdr];
|
|
}
|
|
}
|
|
|
|
DepthBuffer[pixeladdr] = z;
|
|
ColorBuffer[pixeladdr] = color;
|
|
AttrBuffer[pixeladdr] = attr;
|
|
}
|
|
else
|
|
{
|
|
if (!(polygon->Attr & (1<<11))) z = -1;
|
|
PlotTranslucentPixel(pixeladdr, color, z, polyattr, polygon->IsShadow);
|
|
|
|
// blend with bottom pixel too, if needed
|
|
if ((dstattr & 0xF) && (pixeladdr < BufferSize))
|
|
PlotTranslucentPixel(pixeladdr+BufferSize, color, z, polyattr, polygon->IsShadow);
|
|
}
|
|
}
|
|
|
|
// part 2: polygon inside
|
|
edge = yedge;
|
|
xlimit = xend-r_edgelen;
|
|
if (xlimit > xend) xlimit = xend;
|
|
|
|
if (wireframe && !edge) x = std::max(x, xlimit);
|
|
else for (; x < xlimit; x++)
|
|
{
|
|
|
|
u32 pixeladdr = (BufferOffset * ScanlineWidth) + x;
|
|
u32 dstattr = AttrBuffer[pixeladdr];
|
|
|
|
// check stencil buffer for shadows
|
|
if (polygon->IsShadow)
|
|
{
|
|
u8 stencil = StencilBuffer[256*(y&0x1) + x];
|
|
if (!stencil)
|
|
continue;
|
|
if (!(stencil & 0x1))
|
|
pixeladdr += BufferSize;
|
|
if (!(stencil & 0x2))
|
|
dstattr &= ~0xF; // quick way to prevent drawing the shadow under antialiased edges
|
|
}
|
|
|
|
interpX.SetX(x);
|
|
|
|
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer);
|
|
|
|
// if depth test against the topmost pixel fails, test
|
|
// against the pixel underneath
|
|
if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
|
|
{
|
|
if (!(dstattr & 0xF) || pixeladdr >= BufferSize) continue;
|
|
|
|
pixeladdr += BufferSize;
|
|
dstattr = AttrBuffer[pixeladdr];
|
|
if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
|
|
continue;
|
|
}
|
|
|
|
u32 vr = interpX.Interpolate(rl, rr);
|
|
u32 vg = interpX.Interpolate(gl, gr);
|
|
u32 vb = interpX.Interpolate(bl, br);
|
|
|
|
s16 s = interpX.Interpolate(sl, sr);
|
|
s16 t = interpX.Interpolate(tl, tr);
|
|
|
|
u32 color = RenderPixel(polygon, vr>>3, vg>>3, vb>>3, s, t);
|
|
u8 alpha = color >> 24;
|
|
|
|
// alpha test
|
|
if (alpha <= GPU.GPU3D.RenderAlphaRef) continue;
|
|
|
|
if (alpha == 31)
|
|
{
|
|
u32 attr = polyattr | edge;
|
|
|
|
if ((GPU.GPU3D.RenderDispCnt & (1<<4)) && (attr & 0xF))
|
|
{
|
|
// anti-aliasing: all edges are rendered
|
|
|
|
// set coverage to avoid black lines from anti-aliasing
|
|
attr |= (0x1F << 8);
|
|
|
|
// push old pixel down if needed
|
|
if (pixeladdr < BufferSize)
|
|
{
|
|
ColorBuffer[pixeladdr+BufferSize] = ColorBuffer[pixeladdr];
|
|
DepthBuffer[pixeladdr+BufferSize] = DepthBuffer[pixeladdr];
|
|
AttrBuffer[pixeladdr+BufferSize] = AttrBuffer[pixeladdr];
|
|
}
|
|
}
|
|
|
|
DepthBuffer[pixeladdr] = z;
|
|
ColorBuffer[pixeladdr] = color;
|
|
AttrBuffer[pixeladdr] = attr;
|
|
}
|
|
else
|
|
{
|
|
if (!(polygon->Attr & (1<<11))) z = -1;
|
|
PlotTranslucentPixel(pixeladdr, color, z, polyattr, polygon->IsShadow);
|
|
|
|
// blend with bottom pixel too, if needed
|
|
if ((dstattr & 0xF) && (pixeladdr < BufferSize))
|
|
PlotTranslucentPixel(pixeladdr+BufferSize, color, z, polyattr, polygon->IsShadow);
|
|
}
|
|
}
|
|
|
|
// part 3: right edge
|
|
edge = yedge | 0x2;
|
|
xlimit = xend;
|
|
if (r_edgecov & (1<<31))
|
|
{
|
|
xcov = (r_edgecov >> 12) & 0x3FF;
|
|
if (xcov == 0x3FF) xcov = 0;
|
|
}
|
|
|
|
if (r_filledge)
|
|
for (; x < xlimit; x++)
|
|
{
|
|
|
|
u32 pixeladdr = (BufferOffset * ScanlineWidth) + x;
|
|
u32 dstattr = AttrBuffer[pixeladdr];
|
|
|
|
// check stencil buffer for shadows
|
|
if (polygon->IsShadow)
|
|
{
|
|
u8 stencil = StencilBuffer[256*(y&0x1) + x];
|
|
if (!stencil)
|
|
continue;
|
|
if (!(stencil & 0x1))
|
|
pixeladdr += BufferSize;
|
|
if (!(stencil & 0x2))
|
|
dstattr &= ~0xF; // quick way to prevent drawing the shadow under antialiased edges
|
|
}
|
|
|
|
interpX.SetX(x);
|
|
|
|
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer);
|
|
|
|
// if depth test against the topmost pixel fails, test
|
|
// against the pixel underneath
|
|
if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
|
|
{
|
|
if (!(dstattr & 0xF) || pixeladdr >= BufferSize) continue;
|
|
|
|
pixeladdr += BufferSize;
|
|
dstattr = AttrBuffer[pixeladdr];
|
|
if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
|
|
continue;
|
|
}
|
|
|
|
u32 vr = interpX.Interpolate(rl, rr);
|
|
u32 vg = interpX.Interpolate(gl, gr);
|
|
u32 vb = interpX.Interpolate(bl, br);
|
|
|
|
s16 s = interpX.Interpolate(sl, sr);
|
|
s16 t = interpX.Interpolate(tl, tr);
|
|
|
|
u32 color = RenderPixel(polygon, vr>>3, vg>>3, vb>>3, s, t);
|
|
u8 alpha = color >> 24;
|
|
|
|
// alpha test
|
|
if (alpha <= GPU.GPU3D.RenderAlphaRef) continue;
|
|
|
|
if (alpha == 31)
|
|
{
|
|
u32 attr = polyattr | edge;
|
|
|
|
if (GPU.GPU3D.RenderDispCnt & (1<<4))
|
|
{
|
|
// anti-aliasing: all edges are rendered
|
|
|
|
// calculate coverage
|
|
s32 cov = r_edgecov;
|
|
if (cov & (1<<31))
|
|
{
|
|
cov = 0x1F - (xcov >> 5);
|
|
if (cov < 0) cov = 0;
|
|
xcov += (r_edgecov & 0x3FF);
|
|
}
|
|
attr |= (cov << 8);
|
|
|
|
// push old pixel down if needed
|
|
if (pixeladdr < BufferSize)
|
|
{
|
|
ColorBuffer[pixeladdr+BufferSize] = ColorBuffer[pixeladdr];
|
|
DepthBuffer[pixeladdr+BufferSize] = DepthBuffer[pixeladdr];
|
|
AttrBuffer[pixeladdr+BufferSize] = AttrBuffer[pixeladdr];
|
|
}
|
|
}
|
|
|
|
DepthBuffer[pixeladdr] = z;
|
|
ColorBuffer[pixeladdr] = color;
|
|
AttrBuffer[pixeladdr] = attr;
|
|
}
|
|
else
|
|
{
|
|
if (!(polygon->Attr & (1<<11))) z = -1;
|
|
PlotTranslucentPixel(pixeladdr, color, z, polyattr, polygon->IsShadow);
|
|
|
|
// blend with bottom pixel too, if needed
|
|
if ((dstattr & 0xF) && (pixeladdr < BufferSize))
|
|
PlotTranslucentPixel(pixeladdr+BufferSize, color, z, polyattr, polygon->IsShadow);
|
|
}
|
|
}
|
|
Step(rp);
|
|
return abortscanline;
|
|
}
|
|
|
|
bool SoftRenderer::RenderScanline(s32 y, int npolys, bool odd)
|
|
{
|
|
bool abort = false;
|
|
for (int i = 0; i < npolys; i++)
|
|
{
|
|
RendererPolygon* rp = &PolygonList[i];
|
|
Polygon* polygon = rp->PolyData;
|
|
|
|
if (abort)
|
|
{
|
|
CheckSlope(rp, y);
|
|
Step(rp);
|
|
}
|
|
else if (y == polygon->YBottom && y != polygon->YTop)
|
|
{
|
|
if (DoTimings(EmptyPolyScanline, odd)) abort = true;
|
|
}
|
|
else if (y >= polygon->YTop && (y < polygon->YBottom || (y == polygon->YTop && polygon->YBottom == polygon->YTop)))
|
|
{
|
|
if (polygon->IsShadowMask)
|
|
;//RenderShadowMaskScanline(rp, y);
|
|
else
|
|
if (RenderPolygonScanline(rp, y, odd)) abort = true;
|
|
}
|
|
}
|
|
|
|
BufferOffset = (BufferOffset + 1) & 0x7; // loop if == 8
|
|
EndScanline(odd);
|
|
return abort;
|
|
}
|
|
|
|
u32 SoftRenderer::CalculateFogDensity(u32 pixeladdr)
|
|
{
|
|
u32 z = DepthBuffer[pixeladdr];
|
|
u32 densityid, densityfrac;
|
|
|
|
if (z < GPU.GPU3D.RenderFogOffset)
|
|
{
|
|
densityid = 0;
|
|
densityfrac = 0;
|
|
}
|
|
else
|
|
{
|
|
// technically: Z difference is shifted right by two, then shifted left by fog shift
|
|
// then bit 0-16 are the fractional part and bit 17-31 are the density index
|
|
// on hardware, the final value can overflow the 32-bit range with a shift big enough,
|
|
// causing fog to 'wrap around' and accidentally apply to larger Z ranges
|
|
|
|
z -= GPU.GPU3D.RenderFogOffset;
|
|
z = (z >> 2) << GPU.GPU3D.RenderFogShift;
|
|
|
|
densityid = z >> 17;
|
|
if (densityid >= 32)
|
|
{
|
|
densityid = 32;
|
|
densityfrac = 0;
|
|
}
|
|
else
|
|
densityfrac = z & 0x1FFFF;
|
|
}
|
|
|
|
// checkme (may be too precise?)
|
|
u32 density =
|
|
((GPU.GPU3D.RenderFogDensityTable[densityid] * (0x20000-densityfrac)) +
|
|
(GPU.GPU3D.RenderFogDensityTable[densityid+1] * densityfrac)) >> 17;
|
|
if (density >= 127) density = 128;
|
|
|
|
return density;
|
|
}
|
|
|
|
void SoftRenderer::ScanlineFinalPass(s32 y, u8 rdbufferoffset, bool late, bool odd, bool finish)
|
|
{
|
|
// to consider:
|
|
// clearing all polygon fog flags if the master flag isn't set?
|
|
// merging all final pass loops into one?
|
|
u8 tempoffset;
|
|
if (finish)
|
|
tempoffset = (BufferOffset - 2 + (!odd));
|
|
else
|
|
tempoffset = (BufferOffset - 4 + (!odd));
|
|
|
|
if (tempoffset > 7) tempoffset -= 0xF8; // handle underflows
|
|
|
|
/*if (GPU.GPU3D.RenderDispCnt & (1<<5))
|
|
{
|
|
// edge marking
|
|
// only applied to topmost pixels
|
|
|
|
for (int x = 0; x < 256; x++)
|
|
{
|
|
u32 pixeladdr = (tempoffset * ScanlineWidth) + x;
|
|
|
|
u32 attr = AttrBuffer[pixeladdr];
|
|
if (!(attr & 0xF)) continue;
|
|
|
|
u32 polyid = attr >> 24; // opaque polygon IDs are used for edgemarking
|
|
u32 z = DepthBuffer[pixeladdr];
|
|
|
|
if (((polyid != (AttrBuffer[pixeladdr-1] >> 24)) && (z < DepthBuffer[pixeladdr-1])) ||
|
|
((polyid != (AttrBuffer[pixeladdr+1] >> 24)) && (z < DepthBuffer[pixeladdr+1])) ||
|
|
((polyid != (AttrBuffer[pixeladdr-ScanlineWidth] >> 24)) && (z < DepthBuffer[pixeladdr-ScanlineWidth])) ||
|
|
((polyid != (AttrBuffer[pixeladdr+ScanlineWidth] >> 24)) && (z < DepthBuffer[pixeladdr+ScanlineWidth])))
|
|
{
|
|
u16 edgecolor = GPU.GPU3D.RenderEdgeTable[polyid >> 3];
|
|
u32 edgeR = (edgecolor << 1) & 0x3E; if (edgeR) edgeR++;
|
|
u32 edgeG = (edgecolor >> 4) & 0x3E; if (edgeG) edgeG++;
|
|
u32 edgeB = (edgecolor >> 9) & 0x3E; if (edgeB) edgeB++;
|
|
|
|
ColorBuffer[pixeladdr] = edgeR | (edgeG << 8) | (edgeB << 16) | (ColorBuffer[pixeladdr] & 0xFF000000);
|
|
|
|
// break antialiasing coverage (checkme)
|
|
AttrBuffer[pixeladdr] = (AttrBuffer[pixeladdr] & 0xFFFFE0FF) | 0x00001000;
|
|
}
|
|
}
|
|
}*/
|
|
|
|
if (GPU.GPU3D.RenderDispCnt & (1<<7))
|
|
{
|
|
// fog
|
|
|
|
// hardware testing shows that the fog step is 0x80000>>SHIFT
|
|
// basically, the depth values used in GBAtek need to be
|
|
// multiplied by 0x200 to match Z-buffer values
|
|
|
|
// fog is applied to the topmost two pixels, which is required for
|
|
// proper antialiasing
|
|
|
|
// TODO: check the 'fog alpha glitch with small Z' GBAtek talks about
|
|
|
|
bool fogcolor = !(GPU.GPU3D.RenderDispCnt & (1<<6));
|
|
|
|
u32 fogR = (GPU.GPU3D.RenderFogColor << 1) & 0x3E; if (fogR) fogR++;
|
|
u32 fogG = (GPU.GPU3D.RenderFogColor >> 4) & 0x3E; if (fogG) fogG++;
|
|
u32 fogB = (GPU.GPU3D.RenderFogColor >> 9) & 0x3E; if (fogB) fogB++;
|
|
u32 fogA = (GPU.GPU3D.RenderFogColor >> 16) & 0x1F;
|
|
|
|
for (int x = 0; x < 256; x++)
|
|
{
|
|
u32 pixeladdr = (tempoffset * ScanlineWidth) + x;
|
|
u32 density, srccolor, srcR, srcG, srcB, srcA;
|
|
|
|
u32 attr = AttrBuffer[pixeladdr];
|
|
if (attr & (1<<15))
|
|
{
|
|
density = CalculateFogDensity(pixeladdr);
|
|
|
|
srccolor = ColorBuffer[pixeladdr];
|
|
srcR = srccolor & 0x3F;
|
|
srcG = (srccolor >> 8) & 0x3F;
|
|
srcB = (srccolor >> 16) & 0x3F;
|
|
srcA = (srccolor >> 24) & 0x1F;
|
|
|
|
if (fogcolor)
|
|
{
|
|
srcR = ((fogR * density) + (srcR * (128-density))) >> 7;
|
|
srcG = ((fogG * density) + (srcG * (128-density))) >> 7;
|
|
srcB = ((fogB * density) + (srcB * (128-density))) >> 7;
|
|
}
|
|
|
|
srcA = ((fogA * density) + (srcA * (128-density))) >> 7;
|
|
|
|
ColorBuffer[pixeladdr] = srcR | (srcG << 8) | (srcB << 16) | (srcA << 24);
|
|
}
|
|
|
|
// fog for lower pixel
|
|
// TODO: make this code nicer, but avoid using a loop
|
|
|
|
if (!(attr & 0xF)) continue;
|
|
pixeladdr += BufferSize;
|
|
|
|
attr = AttrBuffer[pixeladdr];
|
|
if (!(attr & (1<<15))) continue;
|
|
|
|
density = CalculateFogDensity(pixeladdr);
|
|
|
|
srccolor = ColorBuffer[pixeladdr];
|
|
srcR = srccolor & 0x3F;
|
|
srcG = (srccolor >> 8) & 0x3F;
|
|
srcB = (srccolor >> 16) & 0x3F;
|
|
srcA = (srccolor >> 24) & 0x1F;
|
|
|
|
if (fogcolor)
|
|
{
|
|
srcR = ((fogR * density) + (srcR * (128-density))) >> 7;
|
|
srcG = ((fogG * density) + (srcG * (128-density))) >> 7;
|
|
srcB = ((fogB * density) + (srcB * (128-density))) >> 7;
|
|
}
|
|
|
|
srcA = ((fogA * density) + (srcA * (128-density))) >> 7;
|
|
|
|
ColorBuffer[pixeladdr] = srcR | (srcG << 8) | (srcB << 16) | (srcA << 24);
|
|
}
|
|
}
|
|
|
|
if (GPU.GPU3D.RenderDispCnt & (1<<4))
|
|
{
|
|
// anti-aliasing
|
|
|
|
// edges were flagged and their coverages calculated during rendering
|
|
// this is where such edge pixels are blended with the pixels underneath
|
|
|
|
for (int x = 0; x < 256; x++)
|
|
{
|
|
u32 pixeladdr = (tempoffset * ScanlineWidth) + x;
|
|
|
|
u32 attr = AttrBuffer[pixeladdr];
|
|
if (!(attr & 0xF)) continue;
|
|
|
|
u32 coverage = (attr >> 8) & 0x1F;
|
|
if (coverage == 0x1F) continue;
|
|
|
|
if (coverage == 0)
|
|
{
|
|
ColorBuffer[pixeladdr] = ColorBuffer[pixeladdr+BufferSize];
|
|
continue;
|
|
}
|
|
|
|
u32 topcolor = ColorBuffer[pixeladdr];
|
|
u32 topR = topcolor & 0x3F;
|
|
u32 topG = (topcolor >> 8) & 0x3F;
|
|
u32 topB = (topcolor >> 16) & 0x3F;
|
|
u32 topA = (topcolor >> 24) & 0x1F;
|
|
|
|
u32 botcolor = ColorBuffer[pixeladdr+BufferSize];
|
|
u32 botR = botcolor & 0x3F;
|
|
u32 botG = (botcolor >> 8) & 0x3F;
|
|
u32 botB = (botcolor >> 16) & 0x3F;
|
|
u32 botA = (botcolor >> 24) & 0x1F;
|
|
|
|
coverage++;
|
|
|
|
// only blend color if the bottom pixel isn't fully transparent
|
|
if (botA > 0)
|
|
{
|
|
topR = ((topR * coverage) + (botR * (32-coverage))) >> 5;
|
|
topG = ((topG * coverage) + (botG * (32-coverage))) >> 5;
|
|
topB = ((topB * coverage) + (botB * (32-coverage))) >> 5;
|
|
}
|
|
|
|
// alpha is always blended
|
|
topA = ((topA * coverage) + (botA * (32-coverage))) >> 5;
|
|
|
|
ColorBuffer[pixeladdr] = topR | (topG << 8) | (topB << 16) | (topA << 24);
|
|
}
|
|
}
|
|
|
|
if (late)
|
|
{
|
|
memcpy(&FinalBuffer[y*ScanlineWidth], &RDBuffer[rdbufferoffset*ScanlineWidth], 4 * ScanlineWidth);
|
|
memcpy(&RDBuffer[rdbufferoffset*ScanlineWidth], &ColorBuffer[tempoffset*ScanlineWidth], 4 * ScanlineWidth);
|
|
}
|
|
else
|
|
{
|
|
memcpy(&RDBuffer[rdbufferoffset*ScanlineWidth], &ColorBuffer[tempoffset*ScanlineWidth], 4 * ScanlineWidth);
|
|
memcpy(&FinalBuffer[y*ScanlineWidth], &RDBuffer[rdbufferoffset*ScanlineWidth], 4 * ScanlineWidth);
|
|
}
|
|
}
|
|
|
|
void SoftRenderer::ClearBuffers(s32 y)
|
|
{
|
|
u32 clearz = ((GPU.GPU3D.RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF;
|
|
u32 polyid = GPU.GPU3D.RenderClearAttr1 & 0x3F000000; // this sets the opaque polygonID
|
|
|
|
// clear the screen
|
|
|
|
if (GPU.GPU3D.RenderDispCnt & (1<<14))
|
|
{
|
|
u8 xoff = (GPU.GPU3D.RenderClearAttr2 >> 16) & 0xFF;
|
|
u8 yoff = ((GPU.GPU3D.RenderClearAttr2 >> 24) & 0xFF) + y;
|
|
|
|
for (int i = 0; i < 2; i++)
|
|
{
|
|
for (int x = 0; x < 256; x++)
|
|
{
|
|
u16 val2 = ReadVRAM_Texture<u16>(0x40000 + (yoff << 9) + (xoff << 1));
|
|
u16 val3 = ReadVRAM_Texture<u16>(0x60000 + (yoff << 9) + (xoff << 1));
|
|
|
|
// TODO: confirm color conversion
|
|
u32 r = (val2 << 1) & 0x3E; if (r) r++;
|
|
u32 g = (val2 >> 4) & 0x3E; if (g) g++;
|
|
u32 b = (val2 >> 9) & 0x3E; if (b) b++;
|
|
u32 a = (val2 & 0x8000) ? 0x1F000000 : 0;
|
|
u32 color = r | (g << 8) | (b << 16) | a;
|
|
|
|
u32 z = ((val3 & 0x7FFF) * 0x200) + 0x1FF;
|
|
|
|
u32 pixeladdr = ((BufferOffset+i) * ScanlineWidth) + x;
|
|
ColorBuffer[pixeladdr] = color;
|
|
DepthBuffer[pixeladdr] = z;
|
|
AttrBuffer[pixeladdr] = polyid | (val3 & 0x8000);
|
|
|
|
xoff++;
|
|
}
|
|
|
|
yoff++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// TODO: confirm color conversion
|
|
u32 r = (GPU.GPU3D.RenderClearAttr1 << 1) & 0x3E; if (r) r++;
|
|
u32 g = (GPU.GPU3D.RenderClearAttr1 >> 4) & 0x3E; if (g) g++;
|
|
u32 b = (GPU.GPU3D.RenderClearAttr1 >> 9) & 0x3E; if (b) b++;
|
|
u32 a = (GPU.GPU3D.RenderClearAttr1 >> 16) & 0x1F;
|
|
u32 color = r | (g << 8) | (b << 16) | (a << 24);
|
|
|
|
polyid |= (GPU.GPU3D.RenderClearAttr1 & 0x8000);
|
|
|
|
for (int i = 0; i < 2; i++)
|
|
{
|
|
for (int x = 0; x < 256; x++)
|
|
{
|
|
u32 pixeladdr = ((BufferOffset+i) * ScanlineWidth) + x;
|
|
ColorBuffer[pixeladdr] = color;
|
|
DepthBuffer[pixeladdr] = clearz;
|
|
AttrBuffer[pixeladdr] = polyid;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void SoftRenderer::RenderPolygons(bool threaded, Polygon** polygons, int npolys)
|
|
{
|
|
int j = 0;
|
|
for (int i = 0; i < npolys; i++)
|
|
{
|
|
if (polygons[i]->Degenerate) continue;
|
|
SetupPolygon(&PolygonList[j++], polygons[i]);
|
|
}
|
|
|
|
s32 y = 0;
|
|
s8 prevbufferline = -2;
|
|
bool latebuffer[192] = {};
|
|
|
|
for (u8 quarter = 0; quarter < 4; quarter++)
|
|
for (u8 bufferline = 0; bufferline < 48; bufferline += 2)
|
|
{
|
|
ClearBuffers(y);
|
|
latebuffer[y] = RenderScanline(y, j, true);
|
|
latebuffer[y+1] = RenderScanline(y+1, j, false);
|
|
|
|
|
|
if (prevbufferline >= 0)
|
|
{
|
|
if (!latebuffer[y-2]) latebuffer[y] = false;
|
|
|
|
if (!latebuffer[y-1]) latebuffer[y+1] = false;
|
|
|
|
ScanlineFinalPass(y-2, prevbufferline, latebuffer[y-2], true, false);
|
|
ScanlineFinalPass(y-1, prevbufferline+1, latebuffer[y-1], false, false);
|
|
}
|
|
|
|
y += 2;
|
|
prevbufferline = bufferline;
|
|
|
|
if (threaded)
|
|
Platform::Semaphore_Post(Sema_ScanlineCount);
|
|
}
|
|
|
|
ScanlineFinalPass(190, prevbufferline, latebuffer[190], true, true);
|
|
ScanlineFinalPass(191, prevbufferline+1, latebuffer[191], false, true);
|
|
|
|
if (threaded)
|
|
Platform::Semaphore_Post(Sema_ScanlineCount);
|
|
}
|
|
|
|
void SoftRenderer::VCount144()
|
|
{
|
|
if (RenderThreadRunning.load(std::memory_order_relaxed) && !GPU.GPU3D.AbortFrame)
|
|
Platform::Semaphore_Wait(Sema_RenderDone);
|
|
|
|
RasterTimingCounterPrev = 0;
|
|
RasterTimingCounterOdd = 0;
|
|
RasterTimingCounterEven = 0;
|
|
}
|
|
|
|
void SoftRenderer::RenderFrame()
|
|
{
|
|
auto textureDirty = GPU.VRAMDirty_Texture.DeriveState(GPU.VRAMMap_Texture, GPU);
|
|
auto texPalDirty = GPU.VRAMDirty_TexPal.DeriveState(GPU.VRAMMap_TexPal, GPU);
|
|
|
|
bool textureChanged = GPU.MakeVRAMFlat_TextureCoherent(textureDirty);
|
|
bool texPalChanged = GPU.MakeVRAMFlat_TexPalCoherent(texPalDirty);
|
|
|
|
FrameIdentical = !(textureChanged || texPalChanged) && GPU.GPU3D.RenderFrameIdentical;
|
|
|
|
if (RenderThreadRunning.load(std::memory_order_relaxed))
|
|
{
|
|
Platform::Semaphore_Post(Sema_RenderStart);
|
|
}
|
|
else if (!FrameIdentical) RenderPolygons(false, &GPU.GPU3D.RenderPolygonRAM[0], GPU.GPU3D.RenderNumPolygons);
|
|
}
|
|
|
|
void SoftRenderer::RestartFrame()
|
|
{
|
|
SetupRenderThread();
|
|
}
|
|
|
|
void SoftRenderer::RenderThreadFunc()
|
|
{
|
|
for (;;)
|
|
{
|
|
Platform::Semaphore_Wait(Sema_RenderStart);
|
|
if (!RenderThreadRunning) return;
|
|
|
|
RenderThreadRendering = true;
|
|
if (FrameIdentical)
|
|
{
|
|
Platform::Semaphore_Post(Sema_ScanlineCount, 192);
|
|
}
|
|
else RenderPolygons(true, &GPU.GPU3D.RenderPolygonRAM[0], GPU.GPU3D.RenderNumPolygons);
|
|
|
|
Platform::Semaphore_Post(Sema_RenderDone);
|
|
RenderThreadRendering = false;
|
|
}
|
|
}
|
|
|
|
u32* SoftRenderer::GetLine(int line)
|
|
{
|
|
if (RenderThreadRunning.load(std::memory_order_relaxed))
|
|
{
|
|
if (line < 192)
|
|
Platform::Semaphore_Wait(Sema_ScanlineCount);
|
|
}
|
|
|
|
return &FinalBuffer[(line * ScanlineWidth) + FirstPixelOffset];
|
|
}
|
|
|
|
}
|